Example usage for org.apache.hadoop.mapreduce MRJobConfig MAPREDUCE_JOB

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce MRJobConfig MAPREDUCE_JOB_DIR.

Prototype

String MAPREDUCE_JOB_DIR

To view the source code for org.apache.hadoop.mapreduce MRJobConfig MAPREDUCE_JOB_DIR.

Click Source Link

Usage

From source file:org.apache.ignite.internal.processors.hadoop.impl.v2.HadoopV2Job.java

License:Apache License

/** {@inheritDoc} */
@Override//from   www  . jav  a  2  s .  c om
public Collection<HadoopInputSplit> input() {
    ClassLoader oldLdr = HadoopCommonUtils.setContextClassLoader(jobConf.getClassLoader());

    try {
        String jobDirPath = jobConf.get(MRJobConfig.MAPREDUCE_JOB_DIR);

        if (jobDirPath == null) { // Probably job was submitted not by hadoop client.
            // Assume that we have needed classes and try to generate input splits ourself.
            if (jobConf.getUseNewMapper())
                return HadoopV2Splitter.splitJob(jobCtx);
            else
                return HadoopV1Splitter.splitJob(jobConf);
        }

        Path jobDir = new Path(jobDirPath);

        try {
            FileSystem fs = fileSystem(jobDir.toUri(), jobConf);

            JobSplit.TaskSplitMetaInfo[] metaInfos = SplitMetaInfoReader.readSplitMetaInfo(hadoopJobID, fs,
                    jobConf, jobDir);

            if (F.isEmpty(metaInfos))
                throw new IgniteCheckedException("No input splits found.");

            Path splitsFile = JobSubmissionFiles.getJobSplitFile(jobDir);

            try (FSDataInputStream in = fs.open(splitsFile)) {
                Collection<HadoopInputSplit> res = new ArrayList<>(metaInfos.length);

                for (JobSplit.TaskSplitMetaInfo metaInfo : metaInfos) {
                    long off = metaInfo.getStartOffset();

                    String[] hosts = metaInfo.getLocations();

                    in.seek(off);

                    String clsName = Text.readString(in);

                    HadoopFileBlock block = HadoopV1Splitter.readFileBlock(clsName, in, hosts);

                    if (block == null)
                        block = HadoopV2Splitter.readFileBlock(clsName, in, hosts);

                    res.add(block != null ? block : new HadoopExternalSplit(hosts, off));
                }

                return res;
            }
        } catch (Throwable e) {
            if (e instanceof Error)
                throw (Error) e;
            else
                throw transformException(e);
        }
    } catch (IgniteCheckedException e) {
        throw new IgniteException(e);
    } finally {
        HadoopCommonUtils.restoreContextClassLoader(oldLdr);
    }
}

From source file:org.apache.ignite.internal.processors.hadoop.impl.v2.HadoopV2JobResourceManager.java

License:Apache License

/**
 * Prepare job resources. Resolve the classpath list and download it if needed.
 *
 * @param download {@code true} If need to download resources.
 * @param jobLocDir Work directory for the job.
 * @throws IgniteCheckedException If failed.
 *//* w w w . j a  va  2  s.c  o m*/
public void prepareJobEnvironment(boolean download, File jobLocDir) throws IgniteCheckedException {
    try {
        if (jobLocDir.exists())
            throw new IgniteCheckedException(
                    "Local job directory already exists: " + jobLocDir.getAbsolutePath());

        JobConf cfg = ctx.getJobConf();

        Collection<URL> clsPathUrls = new ArrayList<>();

        String mrDir = cfg.get(MRJobConfig.MAPREDUCE_JOB_DIR);

        if (mrDir != null) {
            stagingDir = new Path(new URI(mrDir));

            if (download) {
                FileSystem fs = job.fileSystem(stagingDir.toUri(), cfg);

                if (!fs.exists(stagingDir))
                    throw new IgniteCheckedException("Failed to find map-reduce submission "
                            + "directory (does not exist): " + stagingDir);

                if (!FileUtil.copy(fs, stagingDir, jobLocDir, false, cfg))
                    throw new IgniteCheckedException("Failed to copy job submission directory "
                            + "contents to local file system " + "[path=" + stagingDir + ", locDir="
                            + jobLocDir.getAbsolutePath() + ", jobId=" + jobId + ']');
            }

            File jarJobFile = new File(jobLocDir, "job.jar");

            clsPathUrls.add(jarJobFile.toURI().toURL());

            rsrcSet.add(jarJobFile);
            rsrcSet.add(new File(jobLocDir, "job.xml"));
        } else if (!jobLocDir.mkdirs())
            throw new IgniteCheckedException(
                    "Failed to create local job directory: " + jobLocDir.getAbsolutePath());

        processFiles(jobLocDir, ctx.getCacheFiles(), download, false, null, MRJobConfig.CACHE_LOCALFILES);
        processFiles(jobLocDir, ctx.getCacheArchives(), download, true, null, MRJobConfig.CACHE_LOCALARCHIVES);
        processFiles(jobLocDir, ctx.getFileClassPaths(), download, false, clsPathUrls, null);
        processFiles(jobLocDir, ctx.getArchiveClassPaths(), download, true, clsPathUrls, null);

        if (!clsPathUrls.isEmpty())
            clsPath = clsPathUrls.toArray(new URL[clsPathUrls.size()]);

        setLocalFSWorkingDirectory(jobLocDir);
    } catch (URISyntaxException | IOException e) {
        throw new IgniteCheckedException(e);
    }
}

From source file:org.apache.ignite.internal.processors.hadoop.impl.v2.HadoopV2TaskContext.java

License:Apache License

/**
 * @param split External split./*from ww w  . j a  v a2s  . co  m*/
 * @return Native input split.
 * @throws IgniteCheckedException If failed.
 */
@SuppressWarnings("unchecked")
private Object readExternalSplit(HadoopExternalSplit split) throws IgniteCheckedException {
    Path jobDir = new Path(jobConf().get(MRJobConfig.MAPREDUCE_JOB_DIR));

    FileSystem fs;

    try {
        fs = fileSystemForMrUserWithCaching(jobDir.toUri(), jobConf(), fsMap);
    } catch (IOException e) {
        throw new IgniteCheckedException(e);
    }

    try (FSDataInputStream in = fs.open(JobSubmissionFiles.getJobSplitFile(jobDir))) {

        in.seek(split.offset());

        String clsName = Text.readString(in);

        Class<?> cls = jobConf().getClassByName(clsName);

        assert cls != null;

        Serialization serialization = new SerializationFactory(jobConf()).getSerialization(cls);

        Deserializer deserializer = serialization.getDeserializer(cls);

        deserializer.open(in);

        Object res = deserializer.deserialize(null);

        deserializer.close();

        assert res != null;

        return res;
    } catch (IOException | ClassNotFoundException e) {
        throw new IgniteCheckedException(e);
    }
}

From source file:org.apache.ignite.internal.processors.hadoop.v2.GridHadoopV2Job.java

License:Apache License

/** {@inheritDoc} */
@Override/*from w  ww. j a v a2 s . c o  m*/
public Collection<GridHadoopInputSplit> input() throws IgniteCheckedException {
    Thread.currentThread().setContextClassLoader(jobConf.getClassLoader());

    try {
        String jobDirPath = jobConf.get(MRJobConfig.MAPREDUCE_JOB_DIR);

        if (jobDirPath == null) { // Probably job was submitted not by hadoop client.
            // Assume that we have needed classes and try to generate input splits ourself.
            if (jobConf.getUseNewMapper())
                return GridHadoopV2Splitter.splitJob(jobCtx);
            else
                return GridHadoopV1Splitter.splitJob(jobConf);
        }

        Path jobDir = new Path(jobDirPath);

        try (FileSystem fs = FileSystem.get(jobDir.toUri(), jobConf)) {
            JobSplit.TaskSplitMetaInfo[] metaInfos = SplitMetaInfoReader.readSplitMetaInfo(hadoopJobID, fs,
                    jobConf, jobDir);

            if (F.isEmpty(metaInfos))
                throw new IgniteCheckedException("No input splits found.");

            Path splitsFile = JobSubmissionFiles.getJobSplitFile(jobDir);

            try (FSDataInputStream in = fs.open(splitsFile)) {
                Collection<GridHadoopInputSplit> res = new ArrayList<>(metaInfos.length);

                for (JobSplit.TaskSplitMetaInfo metaInfo : metaInfos) {
                    long off = metaInfo.getStartOffset();

                    String[] hosts = metaInfo.getLocations();

                    in.seek(off);

                    String clsName = Text.readString(in);

                    GridHadoopFileBlock block = GridHadoopV1Splitter.readFileBlock(clsName, in, hosts);

                    if (block == null)
                        block = GridHadoopV2Splitter.readFileBlock(clsName, in, hosts);

                    res.add(block != null ? block : new GridHadoopExternalSplit(hosts, off));
                }

                return res;
            }
        } catch (Throwable e) {
            throw transformException(e);
        }
    } finally {
        Thread.currentThread().setContextClassLoader(null);
    }
}

From source file:org.apache.ignite.internal.processors.hadoop.v2.GridHadoopV2TaskContext.java

License:Apache License

/**
 * @param split External split./*from ww w .ja v  a  2 s  .com*/
 * @return Native input split.
 * @throws IgniteCheckedException If failed.
 */
@SuppressWarnings("unchecked")
private Object readExternalSplit(GridHadoopExternalSplit split) throws IgniteCheckedException {
    Path jobDir = new Path(jobConf().get(MRJobConfig.MAPREDUCE_JOB_DIR));

    try (FileSystem fs = FileSystem.get(jobDir.toUri(), jobConf());
            FSDataInputStream in = fs.open(JobSubmissionFiles.getJobSplitFile(jobDir))) {

        in.seek(split.offset());

        String clsName = Text.readString(in);

        Class<?> cls = jobConf().getClassByName(clsName);

        assert cls != null;

        Serialization serialization = new SerializationFactory(jobConf()).getSerialization(cls);

        Deserializer deserializer = serialization.getDeserializer(cls);

        deserializer.open(in);

        Object res = deserializer.deserialize(null);

        deserializer.close();

        assert res != null;

        return res;
    } catch (IOException | ClassNotFoundException e) {
        throw new IgniteCheckedException(e);
    }
}

From source file:org.apache.ignite.internal.processors.hadoop.v2.HadoopV2Job.java

License:Apache License

/** {@inheritDoc} */
@Override// w w  w . j ava2 s.  c  om
public Collection<HadoopInputSplit> input() throws IgniteCheckedException {
    Thread.currentThread().setContextClassLoader(jobConf.getClassLoader());

    try {
        String jobDirPath = jobConf.get(MRJobConfig.MAPREDUCE_JOB_DIR);

        if (jobDirPath == null) { // Probably job was submitted not by hadoop client.
            // Assume that we have needed classes and try to generate input splits ourself.
            if (jobConf.getUseNewMapper())
                return HadoopV2Splitter.splitJob(jobCtx);
            else
                return HadoopV1Splitter.splitJob(jobConf);
        }

        Path jobDir = new Path(jobDirPath);

        try (FileSystem fs = FileSystem.get(jobDir.toUri(), jobConf)) {
            JobSplit.TaskSplitMetaInfo[] metaInfos = SplitMetaInfoReader.readSplitMetaInfo(hadoopJobID, fs,
                    jobConf, jobDir);

            if (F.isEmpty(metaInfos))
                throw new IgniteCheckedException("No input splits found.");

            Path splitsFile = JobSubmissionFiles.getJobSplitFile(jobDir);

            try (FSDataInputStream in = fs.open(splitsFile)) {
                Collection<HadoopInputSplit> res = new ArrayList<>(metaInfos.length);

                for (JobSplit.TaskSplitMetaInfo metaInfo : metaInfos) {
                    long off = metaInfo.getStartOffset();

                    String[] hosts = metaInfo.getLocations();

                    in.seek(off);

                    String clsName = Text.readString(in);

                    HadoopFileBlock block = HadoopV1Splitter.readFileBlock(clsName, in, hosts);

                    if (block == null)
                        block = HadoopV2Splitter.readFileBlock(clsName, in, hosts);

                    res.add(block != null ? block : new HadoopExternalSplit(hosts, off));
                }

                return res;
            }
        } catch (Throwable e) {
            throw transformException(e);
        }
    } finally {
        Thread.currentThread().setContextClassLoader(null);
    }
}

From source file:org.apache.ignite.internal.processors.hadoop.v2.HadoopV2TaskContext.java

License:Apache License

/**
 * @param split External split.//from w w  w. ja v a  2 s.  c  o  m
 * @return Native input split.
 * @throws IgniteCheckedException If failed.
 */
@SuppressWarnings("unchecked")
private Object readExternalSplit(HadoopExternalSplit split) throws IgniteCheckedException {
    Path jobDir = new Path(jobConf().get(MRJobConfig.MAPREDUCE_JOB_DIR));

    try (FileSystem fs = FileSystem.get(jobDir.toUri(), jobConf());
            FSDataInputStream in = fs.open(JobSubmissionFiles.getJobSplitFile(jobDir))) {

        in.seek(split.offset());

        String clsName = Text.readString(in);

        Class<?> cls = jobConf().getClassByName(clsName);

        assert cls != null;

        Serialization serialization = new SerializationFactory(jobConf()).getSerialization(cls);

        Deserializer deserializer = serialization.getDeserializer(cls);

        deserializer.open(in);

        Object res = deserializer.deserialize(null);

        deserializer.close();

        assert res != null;

        return res;
    } catch (IOException | ClassNotFoundException e) {
        throw new IgniteCheckedException(e);
    }
}

From source file:org.gridgain.grid.kernal.processors.hadoop.v2.GridHadoopV2Job.java

License:Open Source License

/** {@inheritDoc} */
@Override//from   w w w .  ja  va 2s .c  o m
public Collection<GridHadoopInputSplit> input() throws GridException {
    Thread.currentThread().setContextClassLoader(jobConf.getClassLoader());

    try {
        String jobDirPath = jobConf.get(MRJobConfig.MAPREDUCE_JOB_DIR);

        if (jobDirPath == null) { // Probably job was submitted not by hadoop client.
            // Assume that we have needed classes and try to generate input splits ourself.
            if (jobConf.getUseNewMapper())
                return GridHadoopV2Splitter.splitJob(jobCtx);
            else
                return GridHadoopV1Splitter.splitJob(jobConf);
        }

        Path jobDir = new Path(jobDirPath);

        try (FileSystem fs = FileSystem.get(jobDir.toUri(), jobConf)) {
            JobSplit.TaskSplitMetaInfo[] metaInfos = SplitMetaInfoReader.readSplitMetaInfo(hadoopJobID, fs,
                    jobConf, jobDir);

            if (F.isEmpty(metaInfos))
                throw new GridException("No input splits found.");

            Path splitsFile = JobSubmissionFiles.getJobSplitFile(jobDir);

            try (FSDataInputStream in = fs.open(splitsFile)) {
                Collection<GridHadoopInputSplit> res = new ArrayList<>(metaInfos.length);

                for (JobSplit.TaskSplitMetaInfo metaInfo : metaInfos) {
                    long off = metaInfo.getStartOffset();

                    String[] hosts = metaInfo.getLocations();

                    in.seek(off);

                    String clsName = Text.readString(in);

                    GridHadoopFileBlock block = GridHadoopV1Splitter.readFileBlock(clsName, in, hosts);

                    if (block == null)
                        block = GridHadoopV2Splitter.readFileBlock(clsName, in, hosts);

                    res.add(block != null ? block : new GridHadoopExternalSplit(hosts, off));
                }

                return res;
            }
        } catch (Throwable e) {
            throw transformException(e);
        }
    } finally {
        Thread.currentThread().setContextClassLoader(null);
    }
}

From source file:org.gridgain.grid.kernal.processors.hadoop.v2.GridHadoopV2TaskContext.java

License:Open Source License

/**
 * @param split External split./*from   www. ja v a  2s . c o  m*/
 * @return Native input split.
 * @throws GridException If failed.
 */
@SuppressWarnings("unchecked")
private Object readExternalSplit(GridHadoopExternalSplit split) throws GridException {
    Path jobDir = new Path(jobConf().get(MRJobConfig.MAPREDUCE_JOB_DIR));

    try (FileSystem fs = FileSystem.get(jobDir.toUri(), jobConf());
            FSDataInputStream in = fs.open(JobSubmissionFiles.getJobSplitFile(jobDir))) {

        in.seek(split.offset());

        String clsName = Text.readString(in);

        Class<?> cls = jobConf().getClassByName(clsName);

        assert cls != null;

        Serialization serialization = new SerializationFactory(jobConf()).getSerialization(cls);

        Deserializer deserializer = serialization.getDeserializer(cls);

        deserializer.open(in);

        Object res = deserializer.deserialize(null);

        deserializer.close();

        assert res != null;

        return res;
    } catch (IOException | ClassNotFoundException e) {
        throw new GridException(e);
    }
}

Example usage for org.apache.hadoop.mapreduce MRJobConfig MAPREDUCE_JOB_DIR

Introduction

Prototype

Usage