Example usage for org.apache.hadoop.mapreduce JobSubmissionFiles getJobSplitFile

List of usage examples for org.apache.hadoop.mapreduce JobSubmissionFiles getJobSplitFile

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce JobSubmissionFiles getJobSplitFile.

Prototype

public static Path getJobSplitFile(Path jobSubmissionDir) 

Source Link

Usage

From source file:org.apache.tez.mapreduce.hadoop.MRInputHelpers.java

License:Apache License

/**
 * Generate old-api mapred InputFormat splits
 * @param jobConf JobConf required by InputFormat class
 * @param inputSplitDir Directory in which to generate splits information
 *
 * @return InputSplitInfo containing the split files' information and the
 * number of splits generated to be used to determining parallelism of
 * the map stage./* w ww  .  j  a v a 2 s  .c om*/
 *
 * @throws IOException
 */
private static InputSplitInfoDisk writeOldSplits(JobConf jobConf, Path inputSplitDir) throws IOException {

    org.apache.hadoop.mapred.InputSplit[] splits = generateOldSplits(jobConf, false, 0);

    JobSplitWriter.createSplitFiles(inputSplitDir, jobConf, inputSplitDir.getFileSystem(jobConf), splits);

    List<TaskLocationHint> locationHints = new ArrayList<TaskLocationHint>(splits.length);
    for (int i = 0; i < splits.length; ++i) {
        locationHints.add(TaskLocationHint
                .createTaskLocationHint(new HashSet<String>(Arrays.asList(splits[i].getLocations())), null));
    }

    return new InputSplitInfoDisk(JobSubmissionFiles.getJobSplitFile(inputSplitDir),
            JobSubmissionFiles.getJobSplitMetaFile(inputSplitDir), splits.length, locationHints,
            jobConf.getCredentials());
}

From source file:org.gridgain.grid.kernal.processors.hadoop.v2.GridHadoopV2Job.java

License:Open Source License

/** {@inheritDoc} */
@Override//from ww w  . j  a v a 2 s .  co m
public Collection<GridHadoopInputSplit> input() throws GridException {
    Thread.currentThread().setContextClassLoader(jobConf.getClassLoader());

    try {
        String jobDirPath = jobConf.get(MRJobConfig.MAPREDUCE_JOB_DIR);

        if (jobDirPath == null) { // Probably job was submitted not by hadoop client.
            // Assume that we have needed classes and try to generate input splits ourself.
            if (jobConf.getUseNewMapper())
                return GridHadoopV2Splitter.splitJob(jobCtx);
            else
                return GridHadoopV1Splitter.splitJob(jobConf);
        }

        Path jobDir = new Path(jobDirPath);

        try (FileSystem fs = FileSystem.get(jobDir.toUri(), jobConf)) {
            JobSplit.TaskSplitMetaInfo[] metaInfos = SplitMetaInfoReader.readSplitMetaInfo(hadoopJobID, fs,
                    jobConf, jobDir);

            if (F.isEmpty(metaInfos))
                throw new GridException("No input splits found.");

            Path splitsFile = JobSubmissionFiles.getJobSplitFile(jobDir);

            try (FSDataInputStream in = fs.open(splitsFile)) {
                Collection<GridHadoopInputSplit> res = new ArrayList<>(metaInfos.length);

                for (JobSplit.TaskSplitMetaInfo metaInfo : metaInfos) {
                    long off = metaInfo.getStartOffset();

                    String[] hosts = metaInfo.getLocations();

                    in.seek(off);

                    String clsName = Text.readString(in);

                    GridHadoopFileBlock block = GridHadoopV1Splitter.readFileBlock(clsName, in, hosts);

                    if (block == null)
                        block = GridHadoopV2Splitter.readFileBlock(clsName, in, hosts);

                    res.add(block != null ? block : new GridHadoopExternalSplit(hosts, off));
                }

                return res;
            }
        } catch (Throwable e) {
            throw transformException(e);
        }
    } finally {
        Thread.currentThread().setContextClassLoader(null);
    }
}

From source file:org.gridgain.grid.kernal.processors.hadoop.v2.GridHadoopV2TaskContext.java

License:Open Source License

/**
 * @param split External split./*from   w  w  w.  j  av  a  2s  .c  om*/
 * @return Native input split.
 * @throws GridException If failed.
 */
@SuppressWarnings("unchecked")
private Object readExternalSplit(GridHadoopExternalSplit split) throws GridException {
    Path jobDir = new Path(jobConf().get(MRJobConfig.MAPREDUCE_JOB_DIR));

    try (FileSystem fs = FileSystem.get(jobDir.toUri(), jobConf());
            FSDataInputStream in = fs.open(JobSubmissionFiles.getJobSplitFile(jobDir))) {

        in.seek(split.offset());

        String clsName = Text.readString(in);

        Class<?> cls = jobConf().getClassByName(clsName);

        assert cls != null;

        Serialization serialization = new SerializationFactory(jobConf()).getSerialization(cls);

        Deserializer deserializer = serialization.getDeserializer(cls);

        deserializer.open(in);

        Object res = deserializer.deserialize(null);

        deserializer.close();

        assert res != null;

        return res;
    } catch (IOException | ClassNotFoundException e) {
        throw new GridException(e);
    }
}