Example usage for org.apache.hadoop.mapreduce.split SplitMetaInfoReader readSplitMetaInfo

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce.split SplitMetaInfoReader readSplitMetaInfo.

Prototype

public static JobSplit.TaskSplitMetaInfo[] readSplitMetaInfo(JobID jobId, FileSystem fs, Configuration conf,
            Path jobSubmitDir) throws IOException

Source Link

Usage

From source file:com.scaleoutsoftware.soss.hserver.hadoop.SubmittedJob.java

License:Apache License

SubmittedJob(JobID jobID, String jobSubmitDirectory, Credentials credentials, Configuration configuration)
        throws IOException, InterruptedException {
    this.jobID = jobID;
    this.configuration = configuration;
    this.jobSubmitDirectoryPath = new Path(jobSubmitDirectory);
    this.fileSystem = FileSystem.get(configuration);

    JobSplit.TaskSplitMetaInfo splitInfo[] = SplitMetaInfoReader.readSplitMetaInfo(jobID, fileSystem,
            configuration, jobSubmitDirectoryPath);

    Path jobSplitFile = JobSubmissionFiles.getJobSplitFile(jobSubmitDirectoryPath);
    FSDataInputStream stream = fileSystem.open(jobSplitFile);

    for (JobSplit.TaskSplitMetaInfo info : splitInfo) {
        Object split = getSplitDetails(stream, info.getStartOffset(), configuration);
        inputSplits.add(split);/*  ww  w  .  j ava 2 s .co m*/
        splitLocations.put(split, info.getLocations());
        LOG.info("Adding split for execution. Split = " + split + " Locations: "
                + Arrays.toString(splitLocations.get(split)));
    }

    stream.close();

    jobConfPath = JobSubmissionFiles.getJobConfPath(jobSubmitDirectoryPath);

    if (!fileSystem.exists(jobConfPath)) {
        throw new IOException("Cannot find job.xml. Path = " + jobConfPath);
    }

    //We cannot just use JobConf(Path) constructor,
    //because it does not work for HDFS locations.
    //The comment in Configuration#loadResource() states,
    //for the case when the Path to the resource is provided:
    //"Can't use FileSystem API or we get an infinite loop
    //since FileSystem uses Configuration API.  Use java.io.File instead."
    //
    //Workaround: construct empty Configuration, provide it with
    //input stream and give it to JobConf constructor.
    FSDataInputStream jobConfStream = fileSystem.open(jobConfPath);
    Configuration jobXML = new Configuration(false);
    jobXML.addResource(jobConfStream);

    //The configuration does not actually gets read before we attempt to
    //read some property. Call to #size() will make Configuration to
    //read the input stream.
    jobXML.size();

    //We are done with input stream, can close it now.
    jobConfStream.close();

    jobConf = new JobConf(jobXML);

    newApi = jobConf.getUseNewMapper();

    jobStatus = new JobStatus(jobID, 0f, 0f, 0f, 0f, JobStatus.State.RUNNING, JobPriority.NORMAL,
            UserGroupInformation.getCurrentUser().getUserName(), jobID.toString(), jobConfPath.toString(), "");
}

From source file:org.apache.ignite.internal.processors.hadoop.impl.v2.HadoopV2Job.java

License:Apache License

/** {@inheritDoc} */
@Override/*from  ww  w  .  jav  a2s  . c o  m*/
public Collection<HadoopInputSplit> input() {
    ClassLoader oldLdr = HadoopCommonUtils.setContextClassLoader(jobConf.getClassLoader());

    try {
        String jobDirPath = jobConf.get(MRJobConfig.MAPREDUCE_JOB_DIR);

        if (jobDirPath == null) { // Probably job was submitted not by hadoop client.
            // Assume that we have needed classes and try to generate input splits ourself.
            if (jobConf.getUseNewMapper())
                return HadoopV2Splitter.splitJob(jobCtx);
            else
                return HadoopV1Splitter.splitJob(jobConf);
        }

        Path jobDir = new Path(jobDirPath);

        try {
            FileSystem fs = fileSystem(jobDir.toUri(), jobConf);

            JobSplit.TaskSplitMetaInfo[] metaInfos = SplitMetaInfoReader.readSplitMetaInfo(hadoopJobID, fs,
                    jobConf, jobDir);

            if (F.isEmpty(metaInfos))
                throw new IgniteCheckedException("No input splits found.");

            Path splitsFile = JobSubmissionFiles.getJobSplitFile(jobDir);

            try (FSDataInputStream in = fs.open(splitsFile)) {
                Collection<HadoopInputSplit> res = new ArrayList<>(metaInfos.length);

                for (JobSplit.TaskSplitMetaInfo metaInfo : metaInfos) {
                    long off = metaInfo.getStartOffset();

                    String[] hosts = metaInfo.getLocations();

                    in.seek(off);

                    String clsName = Text.readString(in);

                    HadoopFileBlock block = HadoopV1Splitter.readFileBlock(clsName, in, hosts);

                    if (block == null)
                        block = HadoopV2Splitter.readFileBlock(clsName, in, hosts);

                    res.add(block != null ? block : new HadoopExternalSplit(hosts, off));
                }

                return res;
            }
        } catch (Throwable e) {
            if (e instanceof Error)
                throw (Error) e;
            else
                throw transformException(e);
        }
    } catch (IgniteCheckedException e) {
        throw new IgniteException(e);
    } finally {
        HadoopCommonUtils.restoreContextClassLoader(oldLdr);
    }
}

From source file:org.apache.ignite.internal.processors.hadoop.v2.GridHadoopV2Job.java

License:Apache License

/** {@inheritDoc} */
@Override//from  ww w  .j a  v a  2s  .  c om
public Collection<GridHadoopInputSplit> input() throws IgniteCheckedException {
    Thread.currentThread().setContextClassLoader(jobConf.getClassLoader());

    try {
        String jobDirPath = jobConf.get(MRJobConfig.MAPREDUCE_JOB_DIR);

        if (jobDirPath == null) { // Probably job was submitted not by hadoop client.
            // Assume that we have needed classes and try to generate input splits ourself.
            if (jobConf.getUseNewMapper())
                return GridHadoopV2Splitter.splitJob(jobCtx);
            else
                return GridHadoopV1Splitter.splitJob(jobConf);
        }

        Path jobDir = new Path(jobDirPath);

        try (FileSystem fs = FileSystem.get(jobDir.toUri(), jobConf)) {
            JobSplit.TaskSplitMetaInfo[] metaInfos = SplitMetaInfoReader.readSplitMetaInfo(hadoopJobID, fs,
                    jobConf, jobDir);

            if (F.isEmpty(metaInfos))
                throw new IgniteCheckedException("No input splits found.");

            Path splitsFile = JobSubmissionFiles.getJobSplitFile(jobDir);

            try (FSDataInputStream in = fs.open(splitsFile)) {
                Collection<GridHadoopInputSplit> res = new ArrayList<>(metaInfos.length);

                for (JobSplit.TaskSplitMetaInfo metaInfo : metaInfos) {
                    long off = metaInfo.getStartOffset();

                    String[] hosts = metaInfo.getLocations();

                    in.seek(off);

                    String clsName = Text.readString(in);

                    GridHadoopFileBlock block = GridHadoopV1Splitter.readFileBlock(clsName, in, hosts);

                    if (block == null)
                        block = GridHadoopV2Splitter.readFileBlock(clsName, in, hosts);

                    res.add(block != null ? block : new GridHadoopExternalSplit(hosts, off));
                }

                return res;
            }
        } catch (Throwable e) {
            throw transformException(e);
        }
    } finally {
        Thread.currentThread().setContextClassLoader(null);
    }
}

From source file:org.apache.ignite.internal.processors.hadoop.v2.HadoopV2Job.java

License:Apache License

/** {@inheritDoc} */
@Override/*from   ww  w.java 2 s . c o  m*/
public Collection<HadoopInputSplit> input() throws IgniteCheckedException {
    Thread.currentThread().setContextClassLoader(jobConf.getClassLoader());

    try {
        String jobDirPath = jobConf.get(MRJobConfig.MAPREDUCE_JOB_DIR);

        if (jobDirPath == null) { // Probably job was submitted not by hadoop client.
            // Assume that we have needed classes and try to generate input splits ourself.
            if (jobConf.getUseNewMapper())
                return HadoopV2Splitter.splitJob(jobCtx);
            else
                return HadoopV1Splitter.splitJob(jobConf);
        }

        Path jobDir = new Path(jobDirPath);

        try (FileSystem fs = FileSystem.get(jobDir.toUri(), jobConf)) {
            JobSplit.TaskSplitMetaInfo[] metaInfos = SplitMetaInfoReader.readSplitMetaInfo(hadoopJobID, fs,
                    jobConf, jobDir);

            if (F.isEmpty(metaInfos))
                throw new IgniteCheckedException("No input splits found.");

            Path splitsFile = JobSubmissionFiles.getJobSplitFile(jobDir);

            try (FSDataInputStream in = fs.open(splitsFile)) {
                Collection<HadoopInputSplit> res = new ArrayList<>(metaInfos.length);

                for (JobSplit.TaskSplitMetaInfo metaInfo : metaInfos) {
                    long off = metaInfo.getStartOffset();

                    String[] hosts = metaInfo.getLocations();

                    in.seek(off);

                    String clsName = Text.readString(in);

                    HadoopFileBlock block = HadoopV1Splitter.readFileBlock(clsName, in, hosts);

                    if (block == null)
                        block = HadoopV2Splitter.readFileBlock(clsName, in, hosts);

                    res.add(block != null ? block : new HadoopExternalSplit(hosts, off));
                }

                return res;
            }
        } catch (Throwable e) {
            throw transformException(e);
        }
    } finally {
        Thread.currentThread().setContextClassLoader(null);
    }
}

From source file:org.apache.tez.mapreduce.client.YARNRunner.java

License:Apache License

private List<TaskLocationHint> getMapLocationHintsFromInputSplits(JobID jobId, FileSystem fs,
        Configuration conf, String jobSubmitDir) throws IOException {
    TaskSplitMetaInfo[] splitsInfo = SplitMetaInfoReader.readSplitMetaInfo(jobId, fs, conf,
            new Path(jobSubmitDir));
    int splitsCount = splitsInfo.length;
    List<TaskLocationHint> locationHints = new ArrayList<TaskLocationHint>(splitsCount);
    for (int i = 0; i < splitsCount; ++i) {
        TaskLocationHint locationHint = TaskLocationHint
                .createTaskLocationHint(new HashSet<String>(Arrays.asList(splitsInfo[i].getLocations())), null);
        locationHints.add(locationHint);
    }//from w w w .j a v  a 2s  . c  o m
    return locationHints;
}

From source file:org.apache.tez.mapreduce.hadoop.TestMRInputHelpers.java

License:Apache License

private void verifyLocationHints(Path inputSplitsDir, List<TaskLocationHint> actual) throws Exception {
    JobID jobId = new JobID("dummy", 1);
    JobSplit.TaskSplitMetaInfo[] splitsInfo = SplitMetaInfoReader.readSplitMetaInfo(jobId, remoteFs, conf,
            inputSplitsDir);/*from  w  w w. j  a v  a 2 s. com*/
    int splitsCount = splitsInfo.length;
    List<TaskLocationHint> locationHints = new ArrayList<TaskLocationHint>(splitsCount);
    for (int i = 0; i < splitsCount; ++i) {
        locationHints.add(TaskLocationHint.createTaskLocationHint(
                new HashSet<String>(Arrays.asList(splitsInfo[i].getLocations())), null));
    }

    Assert.assertEquals(locationHints, actual);
}

From source file:org.apache.tez.mapreduce.YARNRunner.java

License:Apache License

private List<TaskLocationHint> getMapLocationHintsFromInputSplits(JobID jobId, FileSystem fs,
        Configuration conf, String jobSubmitDir) throws IOException {
    TaskSplitMetaInfo[] splitsInfo = SplitMetaInfoReader.readSplitMetaInfo(jobId, fs, conf,
            new Path(jobSubmitDir));
    int splitsCount = splitsInfo.length;
    List<TaskLocationHint> locationHints = new ArrayList<TaskLocationHint>(splitsCount);
    for (int i = 0; i < splitsCount; ++i) {
        TaskLocationHint locationHint = new TaskLocationHint(
                new HashSet<String>(Arrays.asList(splitsInfo[i].getLocations())), null);
        locationHints.add(locationHint);
    }//from w w w  .  j  av  a 2  s .co  m
    return locationHints;
}

From source file:org.gridgain.grid.kernal.processors.hadoop.v2.GridHadoopV2Job.java

License:Open Source License

/** {@inheritDoc} */
@Override// w ww .j  a  v a  2s.  c  om
public Collection<GridHadoopInputSplit> input() throws GridException {
    Thread.currentThread().setContextClassLoader(jobConf.getClassLoader());

    try {
        String jobDirPath = jobConf.get(MRJobConfig.MAPREDUCE_JOB_DIR);

        if (jobDirPath == null) { // Probably job was submitted not by hadoop client.
            // Assume that we have needed classes and try to generate input splits ourself.
            if (jobConf.getUseNewMapper())
                return GridHadoopV2Splitter.splitJob(jobCtx);
            else
                return GridHadoopV1Splitter.splitJob(jobConf);
        }

        Path jobDir = new Path(jobDirPath);

        try (FileSystem fs = FileSystem.get(jobDir.toUri(), jobConf)) {
            JobSplit.TaskSplitMetaInfo[] metaInfos = SplitMetaInfoReader.readSplitMetaInfo(hadoopJobID, fs,
                    jobConf, jobDir);

            if (F.isEmpty(metaInfos))
                throw new GridException("No input splits found.");

            Path splitsFile = JobSubmissionFiles.getJobSplitFile(jobDir);

            try (FSDataInputStream in = fs.open(splitsFile)) {
                Collection<GridHadoopInputSplit> res = new ArrayList<>(metaInfos.length);

                for (JobSplit.TaskSplitMetaInfo metaInfo : metaInfos) {
                    long off = metaInfo.getStartOffset();

                    String[] hosts = metaInfo.getLocations();

                    in.seek(off);

                    String clsName = Text.readString(in);

                    GridHadoopFileBlock block = GridHadoopV1Splitter.readFileBlock(clsName, in, hosts);

                    if (block == null)
                        block = GridHadoopV2Splitter.readFileBlock(clsName, in, hosts);

                    res.add(block != null ? block : new GridHadoopExternalSplit(hosts, off));
                }

                return res;
            }
        } catch (Throwable e) {
            throw transformException(e);
        }
    } finally {
        Thread.currentThread().setContextClassLoader(null);
    }
}