Example usage for org.apache.hadoop.mapreduce JobSubmissionFiles getJobSplitFile

List of usage examples for org.apache.hadoop.mapreduce JobSubmissionFiles getJobSplitFile

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce JobSubmissionFiles getJobSplitFile.

Prototype

public static Path getJobSplitFile(Path jobSubmissionDir) 

Source Link

Usage

From source file:com.scaleoutsoftware.soss.hserver.hadoop.SubmittedJob.java

License:Apache License

SubmittedJob(JobID jobID, String jobSubmitDirectory, Credentials credentials, Configuration configuration)
        throws IOException, InterruptedException {
    this.jobID = jobID;
    this.configuration = configuration;
    this.jobSubmitDirectoryPath = new Path(jobSubmitDirectory);
    this.fileSystem = FileSystem.get(configuration);

    JobSplit.TaskSplitMetaInfo splitInfo[] = SplitMetaInfoReader.readSplitMetaInfo(jobID, fileSystem,
            configuration, jobSubmitDirectoryPath);

    Path jobSplitFile = JobSubmissionFiles.getJobSplitFile(jobSubmitDirectoryPath);
    FSDataInputStream stream = fileSystem.open(jobSplitFile);

    for (JobSplit.TaskSplitMetaInfo info : splitInfo) {
        Object split = getSplitDetails(stream, info.getStartOffset(), configuration);
        inputSplits.add(split);//from ww  w  . j  a  v a2 s .  c om
        splitLocations.put(split, info.getLocations());
        LOG.info("Adding split for execution. Split = " + split + " Locations: "
                + Arrays.toString(splitLocations.get(split)));
    }

    stream.close();

    jobConfPath = JobSubmissionFiles.getJobConfPath(jobSubmitDirectoryPath);

    if (!fileSystem.exists(jobConfPath)) {
        throw new IOException("Cannot find job.xml. Path = " + jobConfPath);
    }

    //We cannot just use JobConf(Path) constructor,
    //because it does not work for HDFS locations.
    //The comment in Configuration#loadResource() states,
    //for the case when the Path to the resource is provided:
    //"Can't use FileSystem API or we get an infinite loop
    //since FileSystem uses Configuration API.  Use java.io.File instead."
    //
    //Workaround: construct empty Configuration, provide it with
    //input stream and give it to JobConf constructor.
    FSDataInputStream jobConfStream = fileSystem.open(jobConfPath);
    Configuration jobXML = new Configuration(false);
    jobXML.addResource(jobConfStream);

    //The configuration does not actually gets read before we attempt to
    //read some property. Call to #size() will make Configuration to
    //read the input stream.
    jobXML.size();

    //We are done with input stream, can close it now.
    jobConfStream.close();

    jobConf = new JobConf(jobXML);

    newApi = jobConf.getUseNewMapper();

    jobStatus = new JobStatus(jobID, 0f, 0f, 0f, 0f, JobStatus.State.RUNNING, JobPriority.NORMAL,
            UserGroupInformation.getCurrentUser().getUserName(), jobID.toString(), jobConfPath.toString(), "");
}

From source file:ml.shifu.guagua.yarn.GuaguaSplitWriter.java

License:Apache License

public static <T extends InputSplit> void createSplitFiles(Path jobSubmitDir, Configuration conf, FileSystem fs,
        T[] splits) throws IOException, InterruptedException {
    FSDataOutputStream out = createFile(fs, JobSubmissionFiles.getJobSplitFile(jobSubmitDir), conf);
    SplitMetaInfo[] info = writeNewSplits(conf, splits, out);
    out.close();/*from ww w . j  a va  2s  .  c o  m*/
    writeJobSplitMetaInfo(fs, JobSubmissionFiles.getJobSplitMetaFile(jobSubmitDir),
            new FsPermission(JobSubmissionFiles.JOB_FILE_PERMISSION), splitVersion, info);
}

From source file:ml.shifu.guagua.yarn.GuaguaSplitWriter.java

License:Apache License

public static void createSplitFiles(Path jobSubmitDir, Configuration conf, FileSystem fs,
        org.apache.hadoop.mapred.InputSplit[] splits) throws IOException {
    FSDataOutputStream out = createFile(fs, JobSubmissionFiles.getJobSplitFile(jobSubmitDir), conf);
    SplitMetaInfo[] info = writeOldSplits(splits, out, conf);
    out.close();//from  w w w .j  av  a 2s  .  com
    writeJobSplitMetaInfo(fs, JobSubmissionFiles.getJobSplitMetaFile(jobSubmitDir),
            new FsPermission(JobSubmissionFiles.JOB_FILE_PERMISSION), splitVersion, info);
}

From source file:org.apache.ignite.internal.processors.hadoop.impl.v2.HadoopV2Job.java

License:Apache License

/** {@inheritDoc} */
@Override/*from   www .  j  a  v a2  s  .  c  om*/
public Collection<HadoopInputSplit> input() {
    ClassLoader oldLdr = HadoopCommonUtils.setContextClassLoader(jobConf.getClassLoader());

    try {
        String jobDirPath = jobConf.get(MRJobConfig.MAPREDUCE_JOB_DIR);

        if (jobDirPath == null) { // Probably job was submitted not by hadoop client.
            // Assume that we have needed classes and try to generate input splits ourself.
            if (jobConf.getUseNewMapper())
                return HadoopV2Splitter.splitJob(jobCtx);
            else
                return HadoopV1Splitter.splitJob(jobConf);
        }

        Path jobDir = new Path(jobDirPath);

        try {
            FileSystem fs = fileSystem(jobDir.toUri(), jobConf);

            JobSplit.TaskSplitMetaInfo[] metaInfos = SplitMetaInfoReader.readSplitMetaInfo(hadoopJobID, fs,
                    jobConf, jobDir);

            if (F.isEmpty(metaInfos))
                throw new IgniteCheckedException("No input splits found.");

            Path splitsFile = JobSubmissionFiles.getJobSplitFile(jobDir);

            try (FSDataInputStream in = fs.open(splitsFile)) {
                Collection<HadoopInputSplit> res = new ArrayList<>(metaInfos.length);

                for (JobSplit.TaskSplitMetaInfo metaInfo : metaInfos) {
                    long off = metaInfo.getStartOffset();

                    String[] hosts = metaInfo.getLocations();

                    in.seek(off);

                    String clsName = Text.readString(in);

                    HadoopFileBlock block = HadoopV1Splitter.readFileBlock(clsName, in, hosts);

                    if (block == null)
                        block = HadoopV2Splitter.readFileBlock(clsName, in, hosts);

                    res.add(block != null ? block : new HadoopExternalSplit(hosts, off));
                }

                return res;
            }
        } catch (Throwable e) {
            if (e instanceof Error)
                throw (Error) e;
            else
                throw transformException(e);
        }
    } catch (IgniteCheckedException e) {
        throw new IgniteException(e);
    } finally {
        HadoopCommonUtils.restoreContextClassLoader(oldLdr);
    }
}

From source file:org.apache.ignite.internal.processors.hadoop.impl.v2.HadoopV2TaskContext.java

License:Apache License

/**
 * @param split External split./*from  w w  w.  j a  v  a2s .co  m*/
 * @return Native input split.
 * @throws IgniteCheckedException If failed.
 */
@SuppressWarnings("unchecked")
private Object readExternalSplit(HadoopExternalSplit split) throws IgniteCheckedException {
    Path jobDir = new Path(jobConf().get(MRJobConfig.MAPREDUCE_JOB_DIR));

    FileSystem fs;

    try {
        fs = fileSystemForMrUserWithCaching(jobDir.toUri(), jobConf(), fsMap);
    } catch (IOException e) {
        throw new IgniteCheckedException(e);
    }

    try (FSDataInputStream in = fs.open(JobSubmissionFiles.getJobSplitFile(jobDir))) {

        in.seek(split.offset());

        String clsName = Text.readString(in);

        Class<?> cls = jobConf().getClassByName(clsName);

        assert cls != null;

        Serialization serialization = new SerializationFactory(jobConf()).getSerialization(cls);

        Deserializer deserializer = serialization.getDeserializer(cls);

        deserializer.open(in);

        Object res = deserializer.deserialize(null);

        deserializer.close();

        assert res != null;

        return res;
    } catch (IOException | ClassNotFoundException e) {
        throw new IgniteCheckedException(e);
    }
}

From source file:org.apache.ignite.internal.processors.hadoop.v2.GridHadoopV2Job.java

License:Apache License

/** {@inheritDoc} */
@Override/*  w  w w.  j  a  v a  2  s.c  om*/
public Collection<GridHadoopInputSplit> input() throws IgniteCheckedException {
    Thread.currentThread().setContextClassLoader(jobConf.getClassLoader());

    try {
        String jobDirPath = jobConf.get(MRJobConfig.MAPREDUCE_JOB_DIR);

        if (jobDirPath == null) { // Probably job was submitted not by hadoop client.
            // Assume that we have needed classes and try to generate input splits ourself.
            if (jobConf.getUseNewMapper())
                return GridHadoopV2Splitter.splitJob(jobCtx);
            else
                return GridHadoopV1Splitter.splitJob(jobConf);
        }

        Path jobDir = new Path(jobDirPath);

        try (FileSystem fs = FileSystem.get(jobDir.toUri(), jobConf)) {
            JobSplit.TaskSplitMetaInfo[] metaInfos = SplitMetaInfoReader.readSplitMetaInfo(hadoopJobID, fs,
                    jobConf, jobDir);

            if (F.isEmpty(metaInfos))
                throw new IgniteCheckedException("No input splits found.");

            Path splitsFile = JobSubmissionFiles.getJobSplitFile(jobDir);

            try (FSDataInputStream in = fs.open(splitsFile)) {
                Collection<GridHadoopInputSplit> res = new ArrayList<>(metaInfos.length);

                for (JobSplit.TaskSplitMetaInfo metaInfo : metaInfos) {
                    long off = metaInfo.getStartOffset();

                    String[] hosts = metaInfo.getLocations();

                    in.seek(off);

                    String clsName = Text.readString(in);

                    GridHadoopFileBlock block = GridHadoopV1Splitter.readFileBlock(clsName, in, hosts);

                    if (block == null)
                        block = GridHadoopV2Splitter.readFileBlock(clsName, in, hosts);

                    res.add(block != null ? block : new GridHadoopExternalSplit(hosts, off));
                }

                return res;
            }
        } catch (Throwable e) {
            throw transformException(e);
        }
    } finally {
        Thread.currentThread().setContextClassLoader(null);
    }
}

From source file:org.apache.ignite.internal.processors.hadoop.v2.GridHadoopV2TaskContext.java

License:Apache License

/**
 * @param split External split.//  www.ja v a2 s  .  com
 * @return Native input split.
 * @throws IgniteCheckedException If failed.
 */
@SuppressWarnings("unchecked")
private Object readExternalSplit(GridHadoopExternalSplit split) throws IgniteCheckedException {
    Path jobDir = new Path(jobConf().get(MRJobConfig.MAPREDUCE_JOB_DIR));

    try (FileSystem fs = FileSystem.get(jobDir.toUri(), jobConf());
            FSDataInputStream in = fs.open(JobSubmissionFiles.getJobSplitFile(jobDir))) {

        in.seek(split.offset());

        String clsName = Text.readString(in);

        Class<?> cls = jobConf().getClassByName(clsName);

        assert cls != null;

        Serialization serialization = new SerializationFactory(jobConf()).getSerialization(cls);

        Deserializer deserializer = serialization.getDeserializer(cls);

        deserializer.open(in);

        Object res = deserializer.deserialize(null);

        deserializer.close();

        assert res != null;

        return res;
    } catch (IOException | ClassNotFoundException e) {
        throw new IgniteCheckedException(e);
    }
}

From source file:org.apache.ignite.internal.processors.hadoop.v2.HadoopV2Job.java

License:Apache License

/** {@inheritDoc} */
@Override//from  www  .j av a  2 s  . c  o m
public Collection<HadoopInputSplit> input() throws IgniteCheckedException {
    Thread.currentThread().setContextClassLoader(jobConf.getClassLoader());

    try {
        String jobDirPath = jobConf.get(MRJobConfig.MAPREDUCE_JOB_DIR);

        if (jobDirPath == null) { // Probably job was submitted not by hadoop client.
            // Assume that we have needed classes and try to generate input splits ourself.
            if (jobConf.getUseNewMapper())
                return HadoopV2Splitter.splitJob(jobCtx);
            else
                return HadoopV1Splitter.splitJob(jobConf);
        }

        Path jobDir = new Path(jobDirPath);

        try (FileSystem fs = FileSystem.get(jobDir.toUri(), jobConf)) {
            JobSplit.TaskSplitMetaInfo[] metaInfos = SplitMetaInfoReader.readSplitMetaInfo(hadoopJobID, fs,
                    jobConf, jobDir);

            if (F.isEmpty(metaInfos))
                throw new IgniteCheckedException("No input splits found.");

            Path splitsFile = JobSubmissionFiles.getJobSplitFile(jobDir);

            try (FSDataInputStream in = fs.open(splitsFile)) {
                Collection<HadoopInputSplit> res = new ArrayList<>(metaInfos.length);

                for (JobSplit.TaskSplitMetaInfo metaInfo : metaInfos) {
                    long off = metaInfo.getStartOffset();

                    String[] hosts = metaInfo.getLocations();

                    in.seek(off);

                    String clsName = Text.readString(in);

                    HadoopFileBlock block = HadoopV1Splitter.readFileBlock(clsName, in, hosts);

                    if (block == null)
                        block = HadoopV2Splitter.readFileBlock(clsName, in, hosts);

                    res.add(block != null ? block : new HadoopExternalSplit(hosts, off));
                }

                return res;
            }
        } catch (Throwable e) {
            throw transformException(e);
        }
    } finally {
        Thread.currentThread().setContextClassLoader(null);
    }
}

From source file:org.apache.ignite.internal.processors.hadoop.v2.HadoopV2TaskContext.java

License:Apache License

/**
 * @param split External split.// w ww  .ja v  a2  s. com
 * @return Native input split.
 * @throws IgniteCheckedException If failed.
 */
@SuppressWarnings("unchecked")
private Object readExternalSplit(HadoopExternalSplit split) throws IgniteCheckedException {
    Path jobDir = new Path(jobConf().get(MRJobConfig.MAPREDUCE_JOB_DIR));

    try (FileSystem fs = FileSystem.get(jobDir.toUri(), jobConf());
            FSDataInputStream in = fs.open(JobSubmissionFiles.getJobSplitFile(jobDir))) {

        in.seek(split.offset());

        String clsName = Text.readString(in);

        Class<?> cls = jobConf().getClassByName(clsName);

        assert cls != null;

        Serialization serialization = new SerializationFactory(jobConf()).getSerialization(cls);

        Deserializer deserializer = serialization.getDeserializer(cls);

        deserializer.open(in);

        Object res = deserializer.deserialize(null);

        deserializer.close();

        assert res != null;

        return res;
    } catch (IOException | ClassNotFoundException e) {
        throw new IgniteCheckedException(e);
    }
}

From source file:org.apache.tez.mapreduce.hadoop.MRInputHelpers.java

License:Apache License

/**
 * Generate new-api mapreduce InputFormat splits
 * @param jobContext JobContext required by InputFormat
 * @param inputSplitDir Directory in which to generate splits information
 *
 * @return InputSplitInfo containing the split files' information and the
 * location hints for each split generated to be used to determining parallelism of
 * the map stage.// w w w.j ava  2  s .  c o  m
 *
 * @throws IOException
 * @throws InterruptedException
 * @throws ClassNotFoundException
 */
private static InputSplitInfoDisk writeNewSplits(JobContext jobContext, Path inputSplitDir)
        throws IOException, InterruptedException, ClassNotFoundException {

    org.apache.hadoop.mapreduce.InputSplit[] splits = generateNewSplits(jobContext, false, 0);

    Configuration conf = jobContext.getConfiguration();

    JobSplitWriter.createSplitFiles(inputSplitDir, conf, inputSplitDir.getFileSystem(conf), splits);

    List<TaskLocationHint> locationHints = new ArrayList<TaskLocationHint>(splits.length);
    for (int i = 0; i < splits.length; ++i) {
        locationHints.add(TaskLocationHint
                .createTaskLocationHint(new HashSet<String>(Arrays.asList(splits[i].getLocations())), null));
    }

    return new InputSplitInfoDisk(JobSubmissionFiles.getJobSplitFile(inputSplitDir),
            JobSubmissionFiles.getJobSplitMetaFile(inputSplitDir), splits.length, locationHints,
            jobContext.getCredentials());
}