List of usage examples for org.apache.hadoop.mapreduce JobSubmissionFiles getJobSplitFile
public static Path getJobSplitFile(Path jobSubmissionDir)
From source file:com.scaleoutsoftware.soss.hserver.hadoop.SubmittedJob.java
License:Apache License
SubmittedJob(JobID jobID, String jobSubmitDirectory, Credentials credentials, Configuration configuration) throws IOException, InterruptedException { this.jobID = jobID; this.configuration = configuration; this.jobSubmitDirectoryPath = new Path(jobSubmitDirectory); this.fileSystem = FileSystem.get(configuration); JobSplit.TaskSplitMetaInfo splitInfo[] = SplitMetaInfoReader.readSplitMetaInfo(jobID, fileSystem, configuration, jobSubmitDirectoryPath); Path jobSplitFile = JobSubmissionFiles.getJobSplitFile(jobSubmitDirectoryPath); FSDataInputStream stream = fileSystem.open(jobSplitFile); for (JobSplit.TaskSplitMetaInfo info : splitInfo) { Object split = getSplitDetails(stream, info.getStartOffset(), configuration); inputSplits.add(split);//from ww w . j a v a2 s . c om splitLocations.put(split, info.getLocations()); LOG.info("Adding split for execution. Split = " + split + " Locations: " + Arrays.toString(splitLocations.get(split))); } stream.close(); jobConfPath = JobSubmissionFiles.getJobConfPath(jobSubmitDirectoryPath); if (!fileSystem.exists(jobConfPath)) { throw new IOException("Cannot find job.xml. Path = " + jobConfPath); } //We cannot just use JobConf(Path) constructor, //because it does not work for HDFS locations. //The comment in Configuration#loadResource() states, //for the case when the Path to the resource is provided: //"Can't use FileSystem API or we get an infinite loop //since FileSystem uses Configuration API. Use java.io.File instead." // //Workaround: construct empty Configuration, provide it with //input stream and give it to JobConf constructor. FSDataInputStream jobConfStream = fileSystem.open(jobConfPath); Configuration jobXML = new Configuration(false); jobXML.addResource(jobConfStream); //The configuration does not actually gets read before we attempt to //read some property. Call to #size() will make Configuration to //read the input stream. jobXML.size(); //We are done with input stream, can close it now. jobConfStream.close(); jobConf = new JobConf(jobXML); newApi = jobConf.getUseNewMapper(); jobStatus = new JobStatus(jobID, 0f, 0f, 0f, 0f, JobStatus.State.RUNNING, JobPriority.NORMAL, UserGroupInformation.getCurrentUser().getUserName(), jobID.toString(), jobConfPath.toString(), ""); }
From source file:ml.shifu.guagua.yarn.GuaguaSplitWriter.java
License:Apache License
public static <T extends InputSplit> void createSplitFiles(Path jobSubmitDir, Configuration conf, FileSystem fs, T[] splits) throws IOException, InterruptedException { FSDataOutputStream out = createFile(fs, JobSubmissionFiles.getJobSplitFile(jobSubmitDir), conf); SplitMetaInfo[] info = writeNewSplits(conf, splits, out); out.close();/*from ww w . j a va 2s . c o m*/ writeJobSplitMetaInfo(fs, JobSubmissionFiles.getJobSplitMetaFile(jobSubmitDir), new FsPermission(JobSubmissionFiles.JOB_FILE_PERMISSION), splitVersion, info); }
From source file:ml.shifu.guagua.yarn.GuaguaSplitWriter.java
License:Apache License
public static void createSplitFiles(Path jobSubmitDir, Configuration conf, FileSystem fs, org.apache.hadoop.mapred.InputSplit[] splits) throws IOException { FSDataOutputStream out = createFile(fs, JobSubmissionFiles.getJobSplitFile(jobSubmitDir), conf); SplitMetaInfo[] info = writeOldSplits(splits, out, conf); out.close();//from w w w .j av a 2s . com writeJobSplitMetaInfo(fs, JobSubmissionFiles.getJobSplitMetaFile(jobSubmitDir), new FsPermission(JobSubmissionFiles.JOB_FILE_PERMISSION), splitVersion, info); }
From source file:org.apache.ignite.internal.processors.hadoop.impl.v2.HadoopV2Job.java
License:Apache License
/** {@inheritDoc} */ @Override/*from www . j a v a2 s . c om*/ public Collection<HadoopInputSplit> input() { ClassLoader oldLdr = HadoopCommonUtils.setContextClassLoader(jobConf.getClassLoader()); try { String jobDirPath = jobConf.get(MRJobConfig.MAPREDUCE_JOB_DIR); if (jobDirPath == null) { // Probably job was submitted not by hadoop client. // Assume that we have needed classes and try to generate input splits ourself. if (jobConf.getUseNewMapper()) return HadoopV2Splitter.splitJob(jobCtx); else return HadoopV1Splitter.splitJob(jobConf); } Path jobDir = new Path(jobDirPath); try { FileSystem fs = fileSystem(jobDir.toUri(), jobConf); JobSplit.TaskSplitMetaInfo[] metaInfos = SplitMetaInfoReader.readSplitMetaInfo(hadoopJobID, fs, jobConf, jobDir); if (F.isEmpty(metaInfos)) throw new IgniteCheckedException("No input splits found."); Path splitsFile = JobSubmissionFiles.getJobSplitFile(jobDir); try (FSDataInputStream in = fs.open(splitsFile)) { Collection<HadoopInputSplit> res = new ArrayList<>(metaInfos.length); for (JobSplit.TaskSplitMetaInfo metaInfo : metaInfos) { long off = metaInfo.getStartOffset(); String[] hosts = metaInfo.getLocations(); in.seek(off); String clsName = Text.readString(in); HadoopFileBlock block = HadoopV1Splitter.readFileBlock(clsName, in, hosts); if (block == null) block = HadoopV2Splitter.readFileBlock(clsName, in, hosts); res.add(block != null ? block : new HadoopExternalSplit(hosts, off)); } return res; } } catch (Throwable e) { if (e instanceof Error) throw (Error) e; else throw transformException(e); } } catch (IgniteCheckedException e) { throw new IgniteException(e); } finally { HadoopCommonUtils.restoreContextClassLoader(oldLdr); } }
From source file:org.apache.ignite.internal.processors.hadoop.impl.v2.HadoopV2TaskContext.java
License:Apache License
/** * @param split External split./*from w w w. j a v a2s .co m*/ * @return Native input split. * @throws IgniteCheckedException If failed. */ @SuppressWarnings("unchecked") private Object readExternalSplit(HadoopExternalSplit split) throws IgniteCheckedException { Path jobDir = new Path(jobConf().get(MRJobConfig.MAPREDUCE_JOB_DIR)); FileSystem fs; try { fs = fileSystemForMrUserWithCaching(jobDir.toUri(), jobConf(), fsMap); } catch (IOException e) { throw new IgniteCheckedException(e); } try (FSDataInputStream in = fs.open(JobSubmissionFiles.getJobSplitFile(jobDir))) { in.seek(split.offset()); String clsName = Text.readString(in); Class<?> cls = jobConf().getClassByName(clsName); assert cls != null; Serialization serialization = new SerializationFactory(jobConf()).getSerialization(cls); Deserializer deserializer = serialization.getDeserializer(cls); deserializer.open(in); Object res = deserializer.deserialize(null); deserializer.close(); assert res != null; return res; } catch (IOException | ClassNotFoundException e) { throw new IgniteCheckedException(e); } }
From source file:org.apache.ignite.internal.processors.hadoop.v2.GridHadoopV2Job.java
License:Apache License
/** {@inheritDoc} */ @Override/* w w w. j a v a 2 s.c om*/ public Collection<GridHadoopInputSplit> input() throws IgniteCheckedException { Thread.currentThread().setContextClassLoader(jobConf.getClassLoader()); try { String jobDirPath = jobConf.get(MRJobConfig.MAPREDUCE_JOB_DIR); if (jobDirPath == null) { // Probably job was submitted not by hadoop client. // Assume that we have needed classes and try to generate input splits ourself. if (jobConf.getUseNewMapper()) return GridHadoopV2Splitter.splitJob(jobCtx); else return GridHadoopV1Splitter.splitJob(jobConf); } Path jobDir = new Path(jobDirPath); try (FileSystem fs = FileSystem.get(jobDir.toUri(), jobConf)) { JobSplit.TaskSplitMetaInfo[] metaInfos = SplitMetaInfoReader.readSplitMetaInfo(hadoopJobID, fs, jobConf, jobDir); if (F.isEmpty(metaInfos)) throw new IgniteCheckedException("No input splits found."); Path splitsFile = JobSubmissionFiles.getJobSplitFile(jobDir); try (FSDataInputStream in = fs.open(splitsFile)) { Collection<GridHadoopInputSplit> res = new ArrayList<>(metaInfos.length); for (JobSplit.TaskSplitMetaInfo metaInfo : metaInfos) { long off = metaInfo.getStartOffset(); String[] hosts = metaInfo.getLocations(); in.seek(off); String clsName = Text.readString(in); GridHadoopFileBlock block = GridHadoopV1Splitter.readFileBlock(clsName, in, hosts); if (block == null) block = GridHadoopV2Splitter.readFileBlock(clsName, in, hosts); res.add(block != null ? block : new GridHadoopExternalSplit(hosts, off)); } return res; } } catch (Throwable e) { throw transformException(e); } } finally { Thread.currentThread().setContextClassLoader(null); } }
From source file:org.apache.ignite.internal.processors.hadoop.v2.GridHadoopV2TaskContext.java
License:Apache License
/** * @param split External split.// www.ja v a2 s . com * @return Native input split. * @throws IgniteCheckedException If failed. */ @SuppressWarnings("unchecked") private Object readExternalSplit(GridHadoopExternalSplit split) throws IgniteCheckedException { Path jobDir = new Path(jobConf().get(MRJobConfig.MAPREDUCE_JOB_DIR)); try (FileSystem fs = FileSystem.get(jobDir.toUri(), jobConf()); FSDataInputStream in = fs.open(JobSubmissionFiles.getJobSplitFile(jobDir))) { in.seek(split.offset()); String clsName = Text.readString(in); Class<?> cls = jobConf().getClassByName(clsName); assert cls != null; Serialization serialization = new SerializationFactory(jobConf()).getSerialization(cls); Deserializer deserializer = serialization.getDeserializer(cls); deserializer.open(in); Object res = deserializer.deserialize(null); deserializer.close(); assert res != null; return res; } catch (IOException | ClassNotFoundException e) { throw new IgniteCheckedException(e); } }
From source file:org.apache.ignite.internal.processors.hadoop.v2.HadoopV2Job.java
License:Apache License
/** {@inheritDoc} */ @Override//from www .j av a 2 s . c o m public Collection<HadoopInputSplit> input() throws IgniteCheckedException { Thread.currentThread().setContextClassLoader(jobConf.getClassLoader()); try { String jobDirPath = jobConf.get(MRJobConfig.MAPREDUCE_JOB_DIR); if (jobDirPath == null) { // Probably job was submitted not by hadoop client. // Assume that we have needed classes and try to generate input splits ourself. if (jobConf.getUseNewMapper()) return HadoopV2Splitter.splitJob(jobCtx); else return HadoopV1Splitter.splitJob(jobConf); } Path jobDir = new Path(jobDirPath); try (FileSystem fs = FileSystem.get(jobDir.toUri(), jobConf)) { JobSplit.TaskSplitMetaInfo[] metaInfos = SplitMetaInfoReader.readSplitMetaInfo(hadoopJobID, fs, jobConf, jobDir); if (F.isEmpty(metaInfos)) throw new IgniteCheckedException("No input splits found."); Path splitsFile = JobSubmissionFiles.getJobSplitFile(jobDir); try (FSDataInputStream in = fs.open(splitsFile)) { Collection<HadoopInputSplit> res = new ArrayList<>(metaInfos.length); for (JobSplit.TaskSplitMetaInfo metaInfo : metaInfos) { long off = metaInfo.getStartOffset(); String[] hosts = metaInfo.getLocations(); in.seek(off); String clsName = Text.readString(in); HadoopFileBlock block = HadoopV1Splitter.readFileBlock(clsName, in, hosts); if (block == null) block = HadoopV2Splitter.readFileBlock(clsName, in, hosts); res.add(block != null ? block : new HadoopExternalSplit(hosts, off)); } return res; } } catch (Throwable e) { throw transformException(e); } } finally { Thread.currentThread().setContextClassLoader(null); } }
From source file:org.apache.ignite.internal.processors.hadoop.v2.HadoopV2TaskContext.java
License:Apache License
/** * @param split External split.// w ww .ja v a2 s. com * @return Native input split. * @throws IgniteCheckedException If failed. */ @SuppressWarnings("unchecked") private Object readExternalSplit(HadoopExternalSplit split) throws IgniteCheckedException { Path jobDir = new Path(jobConf().get(MRJobConfig.MAPREDUCE_JOB_DIR)); try (FileSystem fs = FileSystem.get(jobDir.toUri(), jobConf()); FSDataInputStream in = fs.open(JobSubmissionFiles.getJobSplitFile(jobDir))) { in.seek(split.offset()); String clsName = Text.readString(in); Class<?> cls = jobConf().getClassByName(clsName); assert cls != null; Serialization serialization = new SerializationFactory(jobConf()).getSerialization(cls); Deserializer deserializer = serialization.getDeserializer(cls); deserializer.open(in); Object res = deserializer.deserialize(null); deserializer.close(); assert res != null; return res; } catch (IOException | ClassNotFoundException e) { throw new IgniteCheckedException(e); } }
From source file:org.apache.tez.mapreduce.hadoop.MRInputHelpers.java
License:Apache License
/** * Generate new-api mapreduce InputFormat splits * @param jobContext JobContext required by InputFormat * @param inputSplitDir Directory in which to generate splits information * * @return InputSplitInfo containing the split files' information and the * location hints for each split generated to be used to determining parallelism of * the map stage.// w w w.j ava 2 s . c o m * * @throws IOException * @throws InterruptedException * @throws ClassNotFoundException */ private static InputSplitInfoDisk writeNewSplits(JobContext jobContext, Path inputSplitDir) throws IOException, InterruptedException, ClassNotFoundException { org.apache.hadoop.mapreduce.InputSplit[] splits = generateNewSplits(jobContext, false, 0); Configuration conf = jobContext.getConfiguration(); JobSplitWriter.createSplitFiles(inputSplitDir, conf, inputSplitDir.getFileSystem(conf), splits); List<TaskLocationHint> locationHints = new ArrayList<TaskLocationHint>(splits.length); for (int i = 0; i < splits.length; ++i) { locationHints.add(TaskLocationHint .createTaskLocationHint(new HashSet<String>(Arrays.asList(splits[i].getLocations())), null)); } return new InputSplitInfoDisk(JobSubmissionFiles.getJobSplitFile(inputSplitDir), JobSubmissionFiles.getJobSplitMetaFile(inputSplitDir), splits.length, locationHints, jobContext.getCredentials()); }