Example usage for org.apache.hadoop.mapreduce.split SplitMetaInfoReader readSplitMetaInfo

List of usage examples for org.apache.hadoop.mapreduce.split SplitMetaInfoReader readSplitMetaInfo

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce.split SplitMetaInfoReader readSplitMetaInfo.

Prototype

public static JobSplit.TaskSplitMetaInfo[] readSplitMetaInfo(JobID jobId, FileSystem fs, Configuration conf,
            Path jobSubmitDir) throws IOException 

Source Link

Usage

From source file:com.scaleoutsoftware.soss.hserver.hadoop.SubmittedJob.java

License:Apache License

SubmittedJob(JobID jobID, String jobSubmitDirectory, Credentials credentials, Configuration configuration)
        throws IOException, InterruptedException {
    this.jobID = jobID;
    this.configuration = configuration;
    this.jobSubmitDirectoryPath = new Path(jobSubmitDirectory);
    this.fileSystem = FileSystem.get(configuration);

    JobSplit.TaskSplitMetaInfo splitInfo[] = SplitMetaInfoReader.readSplitMetaInfo(jobID, fileSystem,
            configuration, jobSubmitDirectoryPath);

    Path jobSplitFile = JobSubmissionFiles.getJobSplitFile(jobSubmitDirectoryPath);
    FSDataInputStream stream = fileSystem.open(jobSplitFile);

    for (JobSplit.TaskSplitMetaInfo info : splitInfo) {
        Object split = getSplitDetails(stream, info.getStartOffset(), configuration);
        inputSplits.add(split);/*  ww  w  .  j ava 2 s .co m*/
        splitLocations.put(split, info.getLocations());
        LOG.info("Adding split for execution. Split = " + split + " Locations: "
                + Arrays.toString(splitLocations.get(split)));
    }

    stream.close();

    jobConfPath = JobSubmissionFiles.getJobConfPath(jobSubmitDirectoryPath);

    if (!fileSystem.exists(jobConfPath)) {
        throw new IOException("Cannot find job.xml. Path = " + jobConfPath);
    }

    //We cannot just use JobConf(Path) constructor,
    //because it does not work for HDFS locations.
    //The comment in Configuration#loadResource() states,
    //for the case when the Path to the resource is provided:
    //"Can't use FileSystem API or we get an infinite loop
    //since FileSystem uses Configuration API.  Use java.io.File instead."
    //
    //Workaround: construct empty Configuration, provide it with
    //input stream and give it to JobConf constructor.
    FSDataInputStream jobConfStream = fileSystem.open(jobConfPath);
    Configuration jobXML = new Configuration(false);
    jobXML.addResource(jobConfStream);

    //The configuration does not actually gets read before we attempt to
    //read some property. Call to #size() will make Configuration to
    //read the input stream.
    jobXML.size();

    //We are done with input stream, can close it now.
    jobConfStream.close();

    jobConf = new JobConf(jobXML);

    newApi = jobConf.getUseNewMapper();

    jobStatus = new JobStatus(jobID, 0f, 0f, 0f, 0f, JobStatus.State.RUNNING, JobPriority.NORMAL,
            UserGroupInformation.getCurrentUser().getUserName(), jobID.toString(), jobConfPath.toString(), "");
}

From source file:org.apache.ignite.internal.processors.hadoop.impl.v2.HadoopV2Job.java

License:Apache License

/** {@inheritDoc} */
@Override/*from  ww  w  .  jav  a2s  . c o  m*/
public Collection<HadoopInputSplit> input() {
    ClassLoader oldLdr = HadoopCommonUtils.setContextClassLoader(jobConf.getClassLoader());

    try {
        String jobDirPath = jobConf.get(MRJobConfig.MAPREDUCE_JOB_DIR);

        if (jobDirPath == null) { // Probably job was submitted not by hadoop client.
            // Assume that we have needed classes and try to generate input splits ourself.
            if (jobConf.getUseNewMapper())
                return HadoopV2Splitter.splitJob(jobCtx);
            else
                return HadoopV1Splitter.splitJob(jobConf);
        }

        Path jobDir = new Path(jobDirPath);

        try {
            FileSystem fs = fileSystem(jobDir.toUri(), jobConf);

            JobSplit.TaskSplitMetaInfo[] metaInfos = SplitMetaInfoReader.readSplitMetaInfo(hadoopJobID, fs,
                    jobConf, jobDir);

            if (F.isEmpty(metaInfos))
                throw new IgniteCheckedException("No input splits found.");

            Path splitsFile = JobSubmissionFiles.getJobSplitFile(jobDir);

            try (FSDataInputStream in = fs.open(splitsFile)) {
                Collection<HadoopInputSplit> res = new ArrayList<>(metaInfos.length);

                for (JobSplit.TaskSplitMetaInfo metaInfo : metaInfos) {
                    long off = metaInfo.getStartOffset();

                    String[] hosts = metaInfo.getLocations();

                    in.seek(off);

                    String clsName = Text.readString(in);

                    HadoopFileBlock block = HadoopV1Splitter.readFileBlock(clsName, in, hosts);

                    if (block == null)
                        block = HadoopV2Splitter.readFileBlock(clsName, in, hosts);

                    res.add(block != null ? block : new HadoopExternalSplit(hosts, off));
                }

                return res;
            }
        } catch (Throwable e) {
            if (e instanceof Error)
                throw (Error) e;
            else
                throw transformException(e);
        }
    } catch (IgniteCheckedException e) {
        throw new IgniteException(e);
    } finally {
        HadoopCommonUtils.restoreContextClassLoader(oldLdr);
    }
}

From source file:org.apache.ignite.internal.processors.hadoop.v2.GridHadoopV2Job.java

License:Apache License

/** {@inheritDoc} */
@Override//from  ww w  .j a  v a  2s  .  c om
public Collection<GridHadoopInputSplit> input() throws IgniteCheckedException {
    Thread.currentThread().setContextClassLoader(jobConf.getClassLoader());

    try {
        String jobDirPath = jobConf.get(MRJobConfig.MAPREDUCE_JOB_DIR);

        if (jobDirPath == null) { // Probably job was submitted not by hadoop client.
            // Assume that we have needed classes and try to generate input splits ourself.
            if (jobConf.getUseNewMapper())
                return GridHadoopV2Splitter.splitJob(jobCtx);
            else
                return GridHadoopV1Splitter.splitJob(jobConf);
        }

        Path jobDir = new Path(jobDirPath);

        try (FileSystem fs = FileSystem.get(jobDir.toUri(), jobConf)) {
            JobSplit.TaskSplitMetaInfo[] metaInfos = SplitMetaInfoReader.readSplitMetaInfo(hadoopJobID, fs,
                    jobConf, jobDir);

            if (F.isEmpty(metaInfos))
                throw new IgniteCheckedException("No input splits found.");

            Path splitsFile = JobSubmissionFiles.getJobSplitFile(jobDir);

            try (FSDataInputStream in = fs.open(splitsFile)) {
                Collection<GridHadoopInputSplit> res = new ArrayList<>(metaInfos.length);

                for (JobSplit.TaskSplitMetaInfo metaInfo : metaInfos) {
                    long off = metaInfo.getStartOffset();

                    String[] hosts = metaInfo.getLocations();

                    in.seek(off);

                    String clsName = Text.readString(in);

                    GridHadoopFileBlock block = GridHadoopV1Splitter.readFileBlock(clsName, in, hosts);

                    if (block == null)
                        block = GridHadoopV2Splitter.readFileBlock(clsName, in, hosts);

                    res.add(block != null ? block : new GridHadoopExternalSplit(hosts, off));
                }

                return res;
            }
        } catch (Throwable e) {
            throw transformException(e);
        }
    } finally {
        Thread.currentThread().setContextClassLoader(null);
    }
}

From source file:org.apache.ignite.internal.processors.hadoop.v2.HadoopV2Job.java

License:Apache License

/** {@inheritDoc} */
@Override/*from   ww  w.java 2 s . c o  m*/
public Collection<HadoopInputSplit> input() throws IgniteCheckedException {
    Thread.currentThread().setContextClassLoader(jobConf.getClassLoader());

    try {
        String jobDirPath = jobConf.get(MRJobConfig.MAPREDUCE_JOB_DIR);

        if (jobDirPath == null) { // Probably job was submitted not by hadoop client.
            // Assume that we have needed classes and try to generate input splits ourself.
            if (jobConf.getUseNewMapper())
                return HadoopV2Splitter.splitJob(jobCtx);
            else
                return HadoopV1Splitter.splitJob(jobConf);
        }

        Path jobDir = new Path(jobDirPath);

        try (FileSystem fs = FileSystem.get(jobDir.toUri(), jobConf)) {
            JobSplit.TaskSplitMetaInfo[] metaInfos = SplitMetaInfoReader.readSplitMetaInfo(hadoopJobID, fs,
                    jobConf, jobDir);

            if (F.isEmpty(metaInfos))
                throw new IgniteCheckedException("No input splits found.");

            Path splitsFile = JobSubmissionFiles.getJobSplitFile(jobDir);

            try (FSDataInputStream in = fs.open(splitsFile)) {
                Collection<HadoopInputSplit> res = new ArrayList<>(metaInfos.length);

                for (JobSplit.TaskSplitMetaInfo metaInfo : metaInfos) {
                    long off = metaInfo.getStartOffset();

                    String[] hosts = metaInfo.getLocations();

                    in.seek(off);

                    String clsName = Text.readString(in);

                    HadoopFileBlock block = HadoopV1Splitter.readFileBlock(clsName, in, hosts);

                    if (block == null)
                        block = HadoopV2Splitter.readFileBlock(clsName, in, hosts);

                    res.add(block != null ? block : new HadoopExternalSplit(hosts, off));
                }

                return res;
            }
        } catch (Throwable e) {
            throw transformException(e);
        }
    } finally {
        Thread.currentThread().setContextClassLoader(null);
    }
}

From source file:org.apache.tez.mapreduce.client.YARNRunner.java

License:Apache License

private List<TaskLocationHint> getMapLocationHintsFromInputSplits(JobID jobId, FileSystem fs,
        Configuration conf, String jobSubmitDir) throws IOException {
    TaskSplitMetaInfo[] splitsInfo = SplitMetaInfoReader.readSplitMetaInfo(jobId, fs, conf,
            new Path(jobSubmitDir));
    int splitsCount = splitsInfo.length;
    List<TaskLocationHint> locationHints = new ArrayList<TaskLocationHint>(splitsCount);
    for (int i = 0; i < splitsCount; ++i) {
        TaskLocationHint locationHint = TaskLocationHint
                .createTaskLocationHint(new HashSet<String>(Arrays.asList(splitsInfo[i].getLocations())), null);
        locationHints.add(locationHint);
    }//from w w w .j a v  a 2s  . c  o m
    return locationHints;
}

From source file:org.apache.tez.mapreduce.hadoop.TestMRInputHelpers.java

License:Apache License

private void verifyLocationHints(Path inputSplitsDir, List<TaskLocationHint> actual) throws Exception {
    JobID jobId = new JobID("dummy", 1);
    JobSplit.TaskSplitMetaInfo[] splitsInfo = SplitMetaInfoReader.readSplitMetaInfo(jobId, remoteFs, conf,
            inputSplitsDir);/*from  w  w w. j  a v  a 2 s. com*/
    int splitsCount = splitsInfo.length;
    List<TaskLocationHint> locationHints = new ArrayList<TaskLocationHint>(splitsCount);
    for (int i = 0; i < splitsCount; ++i) {
        locationHints.add(TaskLocationHint.createTaskLocationHint(
                new HashSet<String>(Arrays.asList(splitsInfo[i].getLocations())), null));
    }

    Assert.assertEquals(locationHints, actual);
}

From source file:org.apache.tez.mapreduce.YARNRunner.java

License:Apache License

private List<TaskLocationHint> getMapLocationHintsFromInputSplits(JobID jobId, FileSystem fs,
        Configuration conf, String jobSubmitDir) throws IOException {
    TaskSplitMetaInfo[] splitsInfo = SplitMetaInfoReader.readSplitMetaInfo(jobId, fs, conf,
            new Path(jobSubmitDir));
    int splitsCount = splitsInfo.length;
    List<TaskLocationHint> locationHints = new ArrayList<TaskLocationHint>(splitsCount);
    for (int i = 0; i < splitsCount; ++i) {
        TaskLocationHint locationHint = new TaskLocationHint(
                new HashSet<String>(Arrays.asList(splitsInfo[i].getLocations())), null);
        locationHints.add(locationHint);
    }//from w w w  .  j  av  a 2  s .co  m
    return locationHints;
}

From source file:org.gridgain.grid.kernal.processors.hadoop.v2.GridHadoopV2Job.java

License:Open Source License

/** {@inheritDoc} */
@Override// w ww .j  a  v a  2s.  c  om
public Collection<GridHadoopInputSplit> input() throws GridException {
    Thread.currentThread().setContextClassLoader(jobConf.getClassLoader());

    try {
        String jobDirPath = jobConf.get(MRJobConfig.MAPREDUCE_JOB_DIR);

        if (jobDirPath == null) { // Probably job was submitted not by hadoop client.
            // Assume that we have needed classes and try to generate input splits ourself.
            if (jobConf.getUseNewMapper())
                return GridHadoopV2Splitter.splitJob(jobCtx);
            else
                return GridHadoopV1Splitter.splitJob(jobConf);
        }

        Path jobDir = new Path(jobDirPath);

        try (FileSystem fs = FileSystem.get(jobDir.toUri(), jobConf)) {
            JobSplit.TaskSplitMetaInfo[] metaInfos = SplitMetaInfoReader.readSplitMetaInfo(hadoopJobID, fs,
                    jobConf, jobDir);

            if (F.isEmpty(metaInfos))
                throw new GridException("No input splits found.");

            Path splitsFile = JobSubmissionFiles.getJobSplitFile(jobDir);

            try (FSDataInputStream in = fs.open(splitsFile)) {
                Collection<GridHadoopInputSplit> res = new ArrayList<>(metaInfos.length);

                for (JobSplit.TaskSplitMetaInfo metaInfo : metaInfos) {
                    long off = metaInfo.getStartOffset();

                    String[] hosts = metaInfo.getLocations();

                    in.seek(off);

                    String clsName = Text.readString(in);

                    GridHadoopFileBlock block = GridHadoopV1Splitter.readFileBlock(clsName, in, hosts);

                    if (block == null)
                        block = GridHadoopV2Splitter.readFileBlock(clsName, in, hosts);

                    res.add(block != null ? block : new GridHadoopExternalSplit(hosts, off));
                }

                return res;
            }
        } catch (Throwable e) {
            throw transformException(e);
        }
    } finally {
        Thread.currentThread().setContextClassLoader(null);
    }
}