List of usage examples for org.apache.hadoop.mapreduce JobSubmissionFiles getJobSplitFile
public static Path getJobSplitFile(Path jobSubmissionDir)
From source file:com.scaleoutsoftware.soss.hserver.hadoop.SubmittedJob.java
License:Apache License
SubmittedJob(JobID jobID, String jobSubmitDirectory, Credentials credentials, Configuration configuration)
throws IOException, InterruptedException {
this.jobID = jobID;
this.configuration = configuration;
this.jobSubmitDirectoryPath = new Path(jobSubmitDirectory);
this.fileSystem = FileSystem.get(configuration);
JobSplit.TaskSplitMetaInfo splitInfo[] = SplitMetaInfoReader.readSplitMetaInfo(jobID, fileSystem,
configuration, jobSubmitDirectoryPath);
Path jobSplitFile = JobSubmissionFiles.getJobSplitFile(jobSubmitDirectoryPath);
FSDataInputStream stream = fileSystem.open(jobSplitFile);
for (JobSplit.TaskSplitMetaInfo info : splitInfo) {
Object split = getSplitDetails(stream, info.getStartOffset(), configuration);
inputSplits.add(split);//from ww w . j a v a2 s . c om
splitLocations.put(split, info.getLocations());
LOG.info("Adding split for execution. Split = " + split + " Locations: "
+ Arrays.toString(splitLocations.get(split)));
}
stream.close();
jobConfPath = JobSubmissionFiles.getJobConfPath(jobSubmitDirectoryPath);
if (!fileSystem.exists(jobConfPath)) {
throw new IOException("Cannot find job.xml. Path = " + jobConfPath);
}
//We cannot just use JobConf(Path) constructor,
//because it does not work for HDFS locations.
//The comment in Configuration#loadResource() states,
//for the case when the Path to the resource is provided:
//"Can't use FileSystem API or we get an infinite loop
//since FileSystem uses Configuration API. Use java.io.File instead."
//
//Workaround: construct empty Configuration, provide it with
//input stream and give it to JobConf constructor.
FSDataInputStream jobConfStream = fileSystem.open(jobConfPath);
Configuration jobXML = new Configuration(false);
jobXML.addResource(jobConfStream);
//The configuration does not actually gets read before we attempt to
//read some property. Call to #size() will make Configuration to
//read the input stream.
jobXML.size();
//We are done with input stream, can close it now.
jobConfStream.close();
jobConf = new JobConf(jobXML);
newApi = jobConf.getUseNewMapper();
jobStatus = new JobStatus(jobID, 0f, 0f, 0f, 0f, JobStatus.State.RUNNING, JobPriority.NORMAL,
UserGroupInformation.getCurrentUser().getUserName(), jobID.toString(), jobConfPath.toString(), "");
}
From source file:ml.shifu.guagua.yarn.GuaguaSplitWriter.java
License:Apache License
public static <T extends InputSplit> void createSplitFiles(Path jobSubmitDir, Configuration conf, FileSystem fs, T[] splits) throws IOException, InterruptedException { FSDataOutputStream out = createFile(fs, JobSubmissionFiles.getJobSplitFile(jobSubmitDir), conf); SplitMetaInfo[] info = writeNewSplits(conf, splits, out); out.close();/*from ww w . j a va 2s . c o m*/ writeJobSplitMetaInfo(fs, JobSubmissionFiles.getJobSplitMetaFile(jobSubmitDir), new FsPermission(JobSubmissionFiles.JOB_FILE_PERMISSION), splitVersion, info); }
From source file:ml.shifu.guagua.yarn.GuaguaSplitWriter.java
License:Apache License
public static void createSplitFiles(Path jobSubmitDir, Configuration conf, FileSystem fs, org.apache.hadoop.mapred.InputSplit[] splits) throws IOException { FSDataOutputStream out = createFile(fs, JobSubmissionFiles.getJobSplitFile(jobSubmitDir), conf); SplitMetaInfo[] info = writeOldSplits(splits, out, conf); out.close();//from w w w .j av a 2s . com writeJobSplitMetaInfo(fs, JobSubmissionFiles.getJobSplitMetaFile(jobSubmitDir), new FsPermission(JobSubmissionFiles.JOB_FILE_PERMISSION), splitVersion, info); }
From source file:org.apache.ignite.internal.processors.hadoop.impl.v2.HadoopV2Job.java
License:Apache License
/** {@inheritDoc} */ @Override/*from www . j a v a2 s . c om*/ public Collection<HadoopInputSplit> input() { ClassLoader oldLdr = HadoopCommonUtils.setContextClassLoader(jobConf.getClassLoader()); try { String jobDirPath = jobConf.get(MRJobConfig.MAPREDUCE_JOB_DIR); if (jobDirPath == null) { // Probably job was submitted not by hadoop client. // Assume that we have needed classes and try to generate input splits ourself. if (jobConf.getUseNewMapper()) return HadoopV2Splitter.splitJob(jobCtx); else return HadoopV1Splitter.splitJob(jobConf); } Path jobDir = new Path(jobDirPath); try { FileSystem fs = fileSystem(jobDir.toUri(), jobConf); JobSplit.TaskSplitMetaInfo[] metaInfos = SplitMetaInfoReader.readSplitMetaInfo(hadoopJobID, fs, jobConf, jobDir); if (F.isEmpty(metaInfos)) throw new IgniteCheckedException("No input splits found."); Path splitsFile = JobSubmissionFiles.getJobSplitFile(jobDir); try (FSDataInputStream in = fs.open(splitsFile)) { Collection<HadoopInputSplit> res = new ArrayList<>(metaInfos.length); for (JobSplit.TaskSplitMetaInfo metaInfo : metaInfos) { long off = metaInfo.getStartOffset(); String[] hosts = metaInfo.getLocations(); in.seek(off); String clsName = Text.readString(in); HadoopFileBlock block = HadoopV1Splitter.readFileBlock(clsName, in, hosts); if (block == null) block = HadoopV2Splitter.readFileBlock(clsName, in, hosts); res.add(block != null ? block : new HadoopExternalSplit(hosts, off)); } return res; } } catch (Throwable e) { if (e instanceof Error) throw (Error) e; else throw transformException(e); } } catch (IgniteCheckedException e) { throw new IgniteException(e); } finally { HadoopCommonUtils.restoreContextClassLoader(oldLdr); } }
From source file:org.apache.ignite.internal.processors.hadoop.impl.v2.HadoopV2TaskContext.java
License:Apache License
/** * @param split External split./*from w w w. j a v a2s .co m*/ * @return Native input split. * @throws IgniteCheckedException If failed. */ @SuppressWarnings("unchecked") private Object readExternalSplit(HadoopExternalSplit split) throws IgniteCheckedException { Path jobDir = new Path(jobConf().get(MRJobConfig.MAPREDUCE_JOB_DIR)); FileSystem fs; try { fs = fileSystemForMrUserWithCaching(jobDir.toUri(), jobConf(), fsMap); } catch (IOException e) { throw new IgniteCheckedException(e); } try (FSDataInputStream in = fs.open(JobSubmissionFiles.getJobSplitFile(jobDir))) { in.seek(split.offset()); String clsName = Text.readString(in); Class<?> cls = jobConf().getClassByName(clsName); assert cls != null; Serialization serialization = new SerializationFactory(jobConf()).getSerialization(cls); Deserializer deserializer = serialization.getDeserializer(cls); deserializer.open(in); Object res = deserializer.deserialize(null); deserializer.close(); assert res != null; return res; } catch (IOException | ClassNotFoundException e) { throw new IgniteCheckedException(e); } }
From source file:org.apache.ignite.internal.processors.hadoop.v2.GridHadoopV2Job.java
License:Apache License
/** {@inheritDoc} */ @Override/* w w w. j a v a 2 s.c om*/ public Collection<GridHadoopInputSplit> input() throws IgniteCheckedException { Thread.currentThread().setContextClassLoader(jobConf.getClassLoader()); try { String jobDirPath = jobConf.get(MRJobConfig.MAPREDUCE_JOB_DIR); if (jobDirPath == null) { // Probably job was submitted not by hadoop client. // Assume that we have needed classes and try to generate input splits ourself. if (jobConf.getUseNewMapper()) return GridHadoopV2Splitter.splitJob(jobCtx); else return GridHadoopV1Splitter.splitJob(jobConf); } Path jobDir = new Path(jobDirPath); try (FileSystem fs = FileSystem.get(jobDir.toUri(), jobConf)) { JobSplit.TaskSplitMetaInfo[] metaInfos = SplitMetaInfoReader.readSplitMetaInfo(hadoopJobID, fs, jobConf, jobDir); if (F.isEmpty(metaInfos)) throw new IgniteCheckedException("No input splits found."); Path splitsFile = JobSubmissionFiles.getJobSplitFile(jobDir); try (FSDataInputStream in = fs.open(splitsFile)) { Collection<GridHadoopInputSplit> res = new ArrayList<>(metaInfos.length); for (JobSplit.TaskSplitMetaInfo metaInfo : metaInfos) { long off = metaInfo.getStartOffset(); String[] hosts = metaInfo.getLocations(); in.seek(off); String clsName = Text.readString(in); GridHadoopFileBlock block = GridHadoopV1Splitter.readFileBlock(clsName, in, hosts); if (block == null) block = GridHadoopV2Splitter.readFileBlock(clsName, in, hosts); res.add(block != null ? block : new GridHadoopExternalSplit(hosts, off)); } return res; } } catch (Throwable e) { throw transformException(e); } } finally { Thread.currentThread().setContextClassLoader(null); } }
From source file:org.apache.ignite.internal.processors.hadoop.v2.GridHadoopV2TaskContext.java
License:Apache License
/** * @param split External split.// www.ja v a2 s . com * @return Native input split. * @throws IgniteCheckedException If failed. */ @SuppressWarnings("unchecked") private Object readExternalSplit(GridHadoopExternalSplit split) throws IgniteCheckedException { Path jobDir = new Path(jobConf().get(MRJobConfig.MAPREDUCE_JOB_DIR)); try (FileSystem fs = FileSystem.get(jobDir.toUri(), jobConf()); FSDataInputStream in = fs.open(JobSubmissionFiles.getJobSplitFile(jobDir))) { in.seek(split.offset()); String clsName = Text.readString(in); Class<?> cls = jobConf().getClassByName(clsName); assert cls != null; Serialization serialization = new SerializationFactory(jobConf()).getSerialization(cls); Deserializer deserializer = serialization.getDeserializer(cls); deserializer.open(in); Object res = deserializer.deserialize(null); deserializer.close(); assert res != null; return res; } catch (IOException | ClassNotFoundException e) { throw new IgniteCheckedException(e); } }
From source file:org.apache.ignite.internal.processors.hadoop.v2.HadoopV2Job.java
License:Apache License
/** {@inheritDoc} */ @Override//from www .j av a 2 s . c o m public Collection<HadoopInputSplit> input() throws IgniteCheckedException { Thread.currentThread().setContextClassLoader(jobConf.getClassLoader()); try { String jobDirPath = jobConf.get(MRJobConfig.MAPREDUCE_JOB_DIR); if (jobDirPath == null) { // Probably job was submitted not by hadoop client. // Assume that we have needed classes and try to generate input splits ourself. if (jobConf.getUseNewMapper()) return HadoopV2Splitter.splitJob(jobCtx); else return HadoopV1Splitter.splitJob(jobConf); } Path jobDir = new Path(jobDirPath); try (FileSystem fs = FileSystem.get(jobDir.toUri(), jobConf)) { JobSplit.TaskSplitMetaInfo[] metaInfos = SplitMetaInfoReader.readSplitMetaInfo(hadoopJobID, fs, jobConf, jobDir); if (F.isEmpty(metaInfos)) throw new IgniteCheckedException("No input splits found."); Path splitsFile = JobSubmissionFiles.getJobSplitFile(jobDir); try (FSDataInputStream in = fs.open(splitsFile)) { Collection<HadoopInputSplit> res = new ArrayList<>(metaInfos.length); for (JobSplit.TaskSplitMetaInfo metaInfo : metaInfos) { long off = metaInfo.getStartOffset(); String[] hosts = metaInfo.getLocations(); in.seek(off); String clsName = Text.readString(in); HadoopFileBlock block = HadoopV1Splitter.readFileBlock(clsName, in, hosts); if (block == null) block = HadoopV2Splitter.readFileBlock(clsName, in, hosts); res.add(block != null ? block : new HadoopExternalSplit(hosts, off)); } return res; } } catch (Throwable e) { throw transformException(e); } } finally { Thread.currentThread().setContextClassLoader(null); } }
From source file:org.apache.ignite.internal.processors.hadoop.v2.HadoopV2TaskContext.java
License:Apache License
/** * @param split External split.// w ww .ja v a2 s. com * @return Native input split. * @throws IgniteCheckedException If failed. */ @SuppressWarnings("unchecked") private Object readExternalSplit(HadoopExternalSplit split) throws IgniteCheckedException { Path jobDir = new Path(jobConf().get(MRJobConfig.MAPREDUCE_JOB_DIR)); try (FileSystem fs = FileSystem.get(jobDir.toUri(), jobConf()); FSDataInputStream in = fs.open(JobSubmissionFiles.getJobSplitFile(jobDir))) { in.seek(split.offset()); String clsName = Text.readString(in); Class<?> cls = jobConf().getClassByName(clsName); assert cls != null; Serialization serialization = new SerializationFactory(jobConf()).getSerialization(cls); Deserializer deserializer = serialization.getDeserializer(cls); deserializer.open(in); Object res = deserializer.deserialize(null); deserializer.close(); assert res != null; return res; } catch (IOException | ClassNotFoundException e) { throw new IgniteCheckedException(e); } }
From source file:org.apache.tez.mapreduce.hadoop.MRInputHelpers.java
License:Apache License
/** * Generate new-api mapreduce InputFormat splits * @param jobContext JobContext required by InputFormat * @param inputSplitDir Directory in which to generate splits information * * @return InputSplitInfo containing the split files' information and the * location hints for each split generated to be used to determining parallelism of * the map stage.// w w w.j ava 2 s . c o m * * @throws IOException * @throws InterruptedException * @throws ClassNotFoundException */ private static InputSplitInfoDisk writeNewSplits(JobContext jobContext, Path inputSplitDir) throws IOException, InterruptedException, ClassNotFoundException { org.apache.hadoop.mapreduce.InputSplit[] splits = generateNewSplits(jobContext, false, 0); Configuration conf = jobContext.getConfiguration(); JobSplitWriter.createSplitFiles(inputSplitDir, conf, inputSplitDir.getFileSystem(conf), splits); List<TaskLocationHint> locationHints = new ArrayList<TaskLocationHint>(splits.length); for (int i = 0; i < splits.length; ++i) { locationHints.add(TaskLocationHint .createTaskLocationHint(new HashSet<String>(Arrays.asList(splits[i].getLocations())), null)); } return new InputSplitInfoDisk(JobSubmissionFiles.getJobSplitFile(inputSplitDir), JobSubmissionFiles.getJobSplitMetaFile(inputSplitDir), splits.length, locationHints, jobContext.getCredentials()); }