List of usage examples for org.apache.hadoop.mapreduce.split SplitMetaInfoReader readSplitMetaInfo
public static JobSplit.TaskSplitMetaInfo[] readSplitMetaInfo(JobID jobId, FileSystem fs, Configuration conf, Path jobSubmitDir) throws IOException
From source file:com.scaleoutsoftware.soss.hserver.hadoop.SubmittedJob.java
License:Apache License
SubmittedJob(JobID jobID, String jobSubmitDirectory, Credentials credentials, Configuration configuration)
throws IOException, InterruptedException {
this.jobID = jobID;
this.configuration = configuration;
this.jobSubmitDirectoryPath = new Path(jobSubmitDirectory);
this.fileSystem = FileSystem.get(configuration);
JobSplit.TaskSplitMetaInfo splitInfo[] = SplitMetaInfoReader.readSplitMetaInfo(jobID, fileSystem,
configuration, jobSubmitDirectoryPath);
Path jobSplitFile = JobSubmissionFiles.getJobSplitFile(jobSubmitDirectoryPath);
FSDataInputStream stream = fileSystem.open(jobSplitFile);
for (JobSplit.TaskSplitMetaInfo info : splitInfo) {
Object split = getSplitDetails(stream, info.getStartOffset(), configuration);
inputSplits.add(split);/* ww w . j ava 2 s .co m*/
splitLocations.put(split, info.getLocations());
LOG.info("Adding split for execution. Split = " + split + " Locations: "
+ Arrays.toString(splitLocations.get(split)));
}
stream.close();
jobConfPath = JobSubmissionFiles.getJobConfPath(jobSubmitDirectoryPath);
if (!fileSystem.exists(jobConfPath)) {
throw new IOException("Cannot find job.xml. Path = " + jobConfPath);
}
//We cannot just use JobConf(Path) constructor,
//because it does not work for HDFS locations.
//The comment in Configuration#loadResource() states,
//for the case when the Path to the resource is provided:
//"Can't use FileSystem API or we get an infinite loop
//since FileSystem uses Configuration API. Use java.io.File instead."
//
//Workaround: construct empty Configuration, provide it with
//input stream and give it to JobConf constructor.
FSDataInputStream jobConfStream = fileSystem.open(jobConfPath);
Configuration jobXML = new Configuration(false);
jobXML.addResource(jobConfStream);
//The configuration does not actually gets read before we attempt to
//read some property. Call to #size() will make Configuration to
//read the input stream.
jobXML.size();
//We are done with input stream, can close it now.
jobConfStream.close();
jobConf = new JobConf(jobXML);
newApi = jobConf.getUseNewMapper();
jobStatus = new JobStatus(jobID, 0f, 0f, 0f, 0f, JobStatus.State.RUNNING, JobPriority.NORMAL,
UserGroupInformation.getCurrentUser().getUserName(), jobID.toString(), jobConfPath.toString(), "");
}
From source file:org.apache.ignite.internal.processors.hadoop.impl.v2.HadoopV2Job.java
License:Apache License
/** {@inheritDoc} */ @Override/*from ww w . jav a2s . c o m*/ public Collection<HadoopInputSplit> input() { ClassLoader oldLdr = HadoopCommonUtils.setContextClassLoader(jobConf.getClassLoader()); try { String jobDirPath = jobConf.get(MRJobConfig.MAPREDUCE_JOB_DIR); if (jobDirPath == null) { // Probably job was submitted not by hadoop client. // Assume that we have needed classes and try to generate input splits ourself. if (jobConf.getUseNewMapper()) return HadoopV2Splitter.splitJob(jobCtx); else return HadoopV1Splitter.splitJob(jobConf); } Path jobDir = new Path(jobDirPath); try { FileSystem fs = fileSystem(jobDir.toUri(), jobConf); JobSplit.TaskSplitMetaInfo[] metaInfos = SplitMetaInfoReader.readSplitMetaInfo(hadoopJobID, fs, jobConf, jobDir); if (F.isEmpty(metaInfos)) throw new IgniteCheckedException("No input splits found."); Path splitsFile = JobSubmissionFiles.getJobSplitFile(jobDir); try (FSDataInputStream in = fs.open(splitsFile)) { Collection<HadoopInputSplit> res = new ArrayList<>(metaInfos.length); for (JobSplit.TaskSplitMetaInfo metaInfo : metaInfos) { long off = metaInfo.getStartOffset(); String[] hosts = metaInfo.getLocations(); in.seek(off); String clsName = Text.readString(in); HadoopFileBlock block = HadoopV1Splitter.readFileBlock(clsName, in, hosts); if (block == null) block = HadoopV2Splitter.readFileBlock(clsName, in, hosts); res.add(block != null ? block : new HadoopExternalSplit(hosts, off)); } return res; } } catch (Throwable e) { if (e instanceof Error) throw (Error) e; else throw transformException(e); } } catch (IgniteCheckedException e) { throw new IgniteException(e); } finally { HadoopCommonUtils.restoreContextClassLoader(oldLdr); } }
From source file:org.apache.ignite.internal.processors.hadoop.v2.GridHadoopV2Job.java
License:Apache License
/** {@inheritDoc} */ @Override//from ww w .j a v a 2s . c om public Collection<GridHadoopInputSplit> input() throws IgniteCheckedException { Thread.currentThread().setContextClassLoader(jobConf.getClassLoader()); try { String jobDirPath = jobConf.get(MRJobConfig.MAPREDUCE_JOB_DIR); if (jobDirPath == null) { // Probably job was submitted not by hadoop client. // Assume that we have needed classes and try to generate input splits ourself. if (jobConf.getUseNewMapper()) return GridHadoopV2Splitter.splitJob(jobCtx); else return GridHadoopV1Splitter.splitJob(jobConf); } Path jobDir = new Path(jobDirPath); try (FileSystem fs = FileSystem.get(jobDir.toUri(), jobConf)) { JobSplit.TaskSplitMetaInfo[] metaInfos = SplitMetaInfoReader.readSplitMetaInfo(hadoopJobID, fs, jobConf, jobDir); if (F.isEmpty(metaInfos)) throw new IgniteCheckedException("No input splits found."); Path splitsFile = JobSubmissionFiles.getJobSplitFile(jobDir); try (FSDataInputStream in = fs.open(splitsFile)) { Collection<GridHadoopInputSplit> res = new ArrayList<>(metaInfos.length); for (JobSplit.TaskSplitMetaInfo metaInfo : metaInfos) { long off = metaInfo.getStartOffset(); String[] hosts = metaInfo.getLocations(); in.seek(off); String clsName = Text.readString(in); GridHadoopFileBlock block = GridHadoopV1Splitter.readFileBlock(clsName, in, hosts); if (block == null) block = GridHadoopV2Splitter.readFileBlock(clsName, in, hosts); res.add(block != null ? block : new GridHadoopExternalSplit(hosts, off)); } return res; } } catch (Throwable e) { throw transformException(e); } } finally { Thread.currentThread().setContextClassLoader(null); } }
From source file:org.apache.ignite.internal.processors.hadoop.v2.HadoopV2Job.java
License:Apache License
/** {@inheritDoc} */ @Override/*from ww w.java 2 s . c o m*/ public Collection<HadoopInputSplit> input() throws IgniteCheckedException { Thread.currentThread().setContextClassLoader(jobConf.getClassLoader()); try { String jobDirPath = jobConf.get(MRJobConfig.MAPREDUCE_JOB_DIR); if (jobDirPath == null) { // Probably job was submitted not by hadoop client. // Assume that we have needed classes and try to generate input splits ourself. if (jobConf.getUseNewMapper()) return HadoopV2Splitter.splitJob(jobCtx); else return HadoopV1Splitter.splitJob(jobConf); } Path jobDir = new Path(jobDirPath); try (FileSystem fs = FileSystem.get(jobDir.toUri(), jobConf)) { JobSplit.TaskSplitMetaInfo[] metaInfos = SplitMetaInfoReader.readSplitMetaInfo(hadoopJobID, fs, jobConf, jobDir); if (F.isEmpty(metaInfos)) throw new IgniteCheckedException("No input splits found."); Path splitsFile = JobSubmissionFiles.getJobSplitFile(jobDir); try (FSDataInputStream in = fs.open(splitsFile)) { Collection<HadoopInputSplit> res = new ArrayList<>(metaInfos.length); for (JobSplit.TaskSplitMetaInfo metaInfo : metaInfos) { long off = metaInfo.getStartOffset(); String[] hosts = metaInfo.getLocations(); in.seek(off); String clsName = Text.readString(in); HadoopFileBlock block = HadoopV1Splitter.readFileBlock(clsName, in, hosts); if (block == null) block = HadoopV2Splitter.readFileBlock(clsName, in, hosts); res.add(block != null ? block : new HadoopExternalSplit(hosts, off)); } return res; } } catch (Throwable e) { throw transformException(e); } } finally { Thread.currentThread().setContextClassLoader(null); } }
From source file:org.apache.tez.mapreduce.client.YARNRunner.java
License:Apache License
private List<TaskLocationHint> getMapLocationHintsFromInputSplits(JobID jobId, FileSystem fs, Configuration conf, String jobSubmitDir) throws IOException { TaskSplitMetaInfo[] splitsInfo = SplitMetaInfoReader.readSplitMetaInfo(jobId, fs, conf, new Path(jobSubmitDir)); int splitsCount = splitsInfo.length; List<TaskLocationHint> locationHints = new ArrayList<TaskLocationHint>(splitsCount); for (int i = 0; i < splitsCount; ++i) { TaskLocationHint locationHint = TaskLocationHint .createTaskLocationHint(new HashSet<String>(Arrays.asList(splitsInfo[i].getLocations())), null); locationHints.add(locationHint); }//from w w w .j a v a 2s . c o m return locationHints; }
From source file:org.apache.tez.mapreduce.hadoop.TestMRInputHelpers.java
License:Apache License
private void verifyLocationHints(Path inputSplitsDir, List<TaskLocationHint> actual) throws Exception { JobID jobId = new JobID("dummy", 1); JobSplit.TaskSplitMetaInfo[] splitsInfo = SplitMetaInfoReader.readSplitMetaInfo(jobId, remoteFs, conf, inputSplitsDir);/*from w w w. j a v a 2 s. com*/ int splitsCount = splitsInfo.length; List<TaskLocationHint> locationHints = new ArrayList<TaskLocationHint>(splitsCount); for (int i = 0; i < splitsCount; ++i) { locationHints.add(TaskLocationHint.createTaskLocationHint( new HashSet<String>(Arrays.asList(splitsInfo[i].getLocations())), null)); } Assert.assertEquals(locationHints, actual); }
From source file:org.apache.tez.mapreduce.YARNRunner.java
License:Apache License
private List<TaskLocationHint> getMapLocationHintsFromInputSplits(JobID jobId, FileSystem fs, Configuration conf, String jobSubmitDir) throws IOException { TaskSplitMetaInfo[] splitsInfo = SplitMetaInfoReader.readSplitMetaInfo(jobId, fs, conf, new Path(jobSubmitDir)); int splitsCount = splitsInfo.length; List<TaskLocationHint> locationHints = new ArrayList<TaskLocationHint>(splitsCount); for (int i = 0; i < splitsCount; ++i) { TaskLocationHint locationHint = new TaskLocationHint( new HashSet<String>(Arrays.asList(splitsInfo[i].getLocations())), null); locationHints.add(locationHint); }//from w w w . j av a 2 s .co m return locationHints; }
From source file:org.gridgain.grid.kernal.processors.hadoop.v2.GridHadoopV2Job.java
License:Open Source License
/** {@inheritDoc} */ @Override// w ww .j a v a 2s. c om public Collection<GridHadoopInputSplit> input() throws GridException { Thread.currentThread().setContextClassLoader(jobConf.getClassLoader()); try { String jobDirPath = jobConf.get(MRJobConfig.MAPREDUCE_JOB_DIR); if (jobDirPath == null) { // Probably job was submitted not by hadoop client. // Assume that we have needed classes and try to generate input splits ourself. if (jobConf.getUseNewMapper()) return GridHadoopV2Splitter.splitJob(jobCtx); else return GridHadoopV1Splitter.splitJob(jobConf); } Path jobDir = new Path(jobDirPath); try (FileSystem fs = FileSystem.get(jobDir.toUri(), jobConf)) { JobSplit.TaskSplitMetaInfo[] metaInfos = SplitMetaInfoReader.readSplitMetaInfo(hadoopJobID, fs, jobConf, jobDir); if (F.isEmpty(metaInfos)) throw new GridException("No input splits found."); Path splitsFile = JobSubmissionFiles.getJobSplitFile(jobDir); try (FSDataInputStream in = fs.open(splitsFile)) { Collection<GridHadoopInputSplit> res = new ArrayList<>(metaInfos.length); for (JobSplit.TaskSplitMetaInfo metaInfo : metaInfos) { long off = metaInfo.getStartOffset(); String[] hosts = metaInfo.getLocations(); in.seek(off); String clsName = Text.readString(in); GridHadoopFileBlock block = GridHadoopV1Splitter.readFileBlock(clsName, in, hosts); if (block == null) block = GridHadoopV2Splitter.readFileBlock(clsName, in, hosts); res.add(block != null ? block : new GridHadoopExternalSplit(hosts, off)); } return res; } } catch (Throwable e) { throw transformException(e); } } finally { Thread.currentThread().setContextClassLoader(null); } }