List of usage examples for org.apache.hadoop.mapreduce.split SplitMetaInfoReader readSplitMetaInfo
public static JobSplit.TaskSplitMetaInfo[] readSplitMetaInfo(JobID jobId, FileSystem fs, Configuration conf, Path jobSubmitDir) throws IOException
From source file:com.scaleoutsoftware.soss.hserver.hadoop.SubmittedJob.java
License:Apache License
SubmittedJob(JobID jobID, String jobSubmitDirectory, Credentials credentials, Configuration configuration) throws IOException, InterruptedException { this.jobID = jobID; this.configuration = configuration; this.jobSubmitDirectoryPath = new Path(jobSubmitDirectory); this.fileSystem = FileSystem.get(configuration); JobSplit.TaskSplitMetaInfo splitInfo[] = SplitMetaInfoReader.readSplitMetaInfo(jobID, fileSystem, configuration, jobSubmitDirectoryPath); Path jobSplitFile = JobSubmissionFiles.getJobSplitFile(jobSubmitDirectoryPath); FSDataInputStream stream = fileSystem.open(jobSplitFile); for (JobSplit.TaskSplitMetaInfo info : splitInfo) { Object split = getSplitDetails(stream, info.getStartOffset(), configuration); inputSplits.add(split);/* ww w . j ava 2 s .co m*/ splitLocations.put(split, info.getLocations()); LOG.info("Adding split for execution. Split = " + split + " Locations: " + Arrays.toString(splitLocations.get(split))); } stream.close(); jobConfPath = JobSubmissionFiles.getJobConfPath(jobSubmitDirectoryPath); if (!fileSystem.exists(jobConfPath)) { throw new IOException("Cannot find job.xml. Path = " + jobConfPath); } //We cannot just use JobConf(Path) constructor, //because it does not work for HDFS locations. //The comment in Configuration#loadResource() states, //for the case when the Path to the resource is provided: //"Can't use FileSystem API or we get an infinite loop //since FileSystem uses Configuration API. Use java.io.File instead." // //Workaround: construct empty Configuration, provide it with //input stream and give it to JobConf constructor. FSDataInputStream jobConfStream = fileSystem.open(jobConfPath); Configuration jobXML = new Configuration(false); jobXML.addResource(jobConfStream); //The configuration does not actually gets read before we attempt to //read some property. Call to #size() will make Configuration to //read the input stream. jobXML.size(); //We are done with input stream, can close it now. jobConfStream.close(); jobConf = new JobConf(jobXML); newApi = jobConf.getUseNewMapper(); jobStatus = new JobStatus(jobID, 0f, 0f, 0f, 0f, JobStatus.State.RUNNING, JobPriority.NORMAL, UserGroupInformation.getCurrentUser().getUserName(), jobID.toString(), jobConfPath.toString(), ""); }
From source file:org.apache.ignite.internal.processors.hadoop.impl.v2.HadoopV2Job.java
License:Apache License
/** {@inheritDoc} */ @Override/*from ww w . jav a2s . c o m*/ public Collection<HadoopInputSplit> input() { ClassLoader oldLdr = HadoopCommonUtils.setContextClassLoader(jobConf.getClassLoader()); try { String jobDirPath = jobConf.get(MRJobConfig.MAPREDUCE_JOB_DIR); if (jobDirPath == null) { // Probably job was submitted not by hadoop client. // Assume that we have needed classes and try to generate input splits ourself. if (jobConf.getUseNewMapper()) return HadoopV2Splitter.splitJob(jobCtx); else return HadoopV1Splitter.splitJob(jobConf); } Path jobDir = new Path(jobDirPath); try { FileSystem fs = fileSystem(jobDir.toUri(), jobConf); JobSplit.TaskSplitMetaInfo[] metaInfos = SplitMetaInfoReader.readSplitMetaInfo(hadoopJobID, fs, jobConf, jobDir); if (F.isEmpty(metaInfos)) throw new IgniteCheckedException("No input splits found."); Path splitsFile = JobSubmissionFiles.getJobSplitFile(jobDir); try (FSDataInputStream in = fs.open(splitsFile)) { Collection<HadoopInputSplit> res = new ArrayList<>(metaInfos.length); for (JobSplit.TaskSplitMetaInfo metaInfo : metaInfos) { long off = metaInfo.getStartOffset(); String[] hosts = metaInfo.getLocations(); in.seek(off); String clsName = Text.readString(in); HadoopFileBlock block = HadoopV1Splitter.readFileBlock(clsName, in, hosts); if (block == null) block = HadoopV2Splitter.readFileBlock(clsName, in, hosts); res.add(block != null ? block : new HadoopExternalSplit(hosts, off)); } return res; } } catch (Throwable e) { if (e instanceof Error) throw (Error) e; else throw transformException(e); } } catch (IgniteCheckedException e) { throw new IgniteException(e); } finally { HadoopCommonUtils.restoreContextClassLoader(oldLdr); } }
From source file:org.apache.ignite.internal.processors.hadoop.v2.GridHadoopV2Job.java
License:Apache License
/** {@inheritDoc} */ @Override//from ww w .j a v a 2s . c om public Collection<GridHadoopInputSplit> input() throws IgniteCheckedException { Thread.currentThread().setContextClassLoader(jobConf.getClassLoader()); try { String jobDirPath = jobConf.get(MRJobConfig.MAPREDUCE_JOB_DIR); if (jobDirPath == null) { // Probably job was submitted not by hadoop client. // Assume that we have needed classes and try to generate input splits ourself. if (jobConf.getUseNewMapper()) return GridHadoopV2Splitter.splitJob(jobCtx); else return GridHadoopV1Splitter.splitJob(jobConf); } Path jobDir = new Path(jobDirPath); try (FileSystem fs = FileSystem.get(jobDir.toUri(), jobConf)) { JobSplit.TaskSplitMetaInfo[] metaInfos = SplitMetaInfoReader.readSplitMetaInfo(hadoopJobID, fs, jobConf, jobDir); if (F.isEmpty(metaInfos)) throw new IgniteCheckedException("No input splits found."); Path splitsFile = JobSubmissionFiles.getJobSplitFile(jobDir); try (FSDataInputStream in = fs.open(splitsFile)) { Collection<GridHadoopInputSplit> res = new ArrayList<>(metaInfos.length); for (JobSplit.TaskSplitMetaInfo metaInfo : metaInfos) { long off = metaInfo.getStartOffset(); String[] hosts = metaInfo.getLocations(); in.seek(off); String clsName = Text.readString(in); GridHadoopFileBlock block = GridHadoopV1Splitter.readFileBlock(clsName, in, hosts); if (block == null) block = GridHadoopV2Splitter.readFileBlock(clsName, in, hosts); res.add(block != null ? block : new GridHadoopExternalSplit(hosts, off)); } return res; } } catch (Throwable e) { throw transformException(e); } } finally { Thread.currentThread().setContextClassLoader(null); } }
From source file:org.apache.ignite.internal.processors.hadoop.v2.HadoopV2Job.java
License:Apache License
/** {@inheritDoc} */ @Override/*from ww w.java 2 s . c o m*/ public Collection<HadoopInputSplit> input() throws IgniteCheckedException { Thread.currentThread().setContextClassLoader(jobConf.getClassLoader()); try { String jobDirPath = jobConf.get(MRJobConfig.MAPREDUCE_JOB_DIR); if (jobDirPath == null) { // Probably job was submitted not by hadoop client. // Assume that we have needed classes and try to generate input splits ourself. if (jobConf.getUseNewMapper()) return HadoopV2Splitter.splitJob(jobCtx); else return HadoopV1Splitter.splitJob(jobConf); } Path jobDir = new Path(jobDirPath); try (FileSystem fs = FileSystem.get(jobDir.toUri(), jobConf)) { JobSplit.TaskSplitMetaInfo[] metaInfos = SplitMetaInfoReader.readSplitMetaInfo(hadoopJobID, fs, jobConf, jobDir); if (F.isEmpty(metaInfos)) throw new IgniteCheckedException("No input splits found."); Path splitsFile = JobSubmissionFiles.getJobSplitFile(jobDir); try (FSDataInputStream in = fs.open(splitsFile)) { Collection<HadoopInputSplit> res = new ArrayList<>(metaInfos.length); for (JobSplit.TaskSplitMetaInfo metaInfo : metaInfos) { long off = metaInfo.getStartOffset(); String[] hosts = metaInfo.getLocations(); in.seek(off); String clsName = Text.readString(in); HadoopFileBlock block = HadoopV1Splitter.readFileBlock(clsName, in, hosts); if (block == null) block = HadoopV2Splitter.readFileBlock(clsName, in, hosts); res.add(block != null ? block : new HadoopExternalSplit(hosts, off)); } return res; } } catch (Throwable e) { throw transformException(e); } } finally { Thread.currentThread().setContextClassLoader(null); } }
From source file:org.apache.tez.mapreduce.client.YARNRunner.java
License:Apache License
private List<TaskLocationHint> getMapLocationHintsFromInputSplits(JobID jobId, FileSystem fs, Configuration conf, String jobSubmitDir) throws IOException { TaskSplitMetaInfo[] splitsInfo = SplitMetaInfoReader.readSplitMetaInfo(jobId, fs, conf, new Path(jobSubmitDir)); int splitsCount = splitsInfo.length; List<TaskLocationHint> locationHints = new ArrayList<TaskLocationHint>(splitsCount); for (int i = 0; i < splitsCount; ++i) { TaskLocationHint locationHint = TaskLocationHint .createTaskLocationHint(new HashSet<String>(Arrays.asList(splitsInfo[i].getLocations())), null); locationHints.add(locationHint); }//from w w w .j a v a 2s . c o m return locationHints; }
From source file:org.apache.tez.mapreduce.hadoop.TestMRInputHelpers.java
License:Apache License
private void verifyLocationHints(Path inputSplitsDir, List<TaskLocationHint> actual) throws Exception { JobID jobId = new JobID("dummy", 1); JobSplit.TaskSplitMetaInfo[] splitsInfo = SplitMetaInfoReader.readSplitMetaInfo(jobId, remoteFs, conf, inputSplitsDir);/*from w w w. j a v a 2 s. com*/ int splitsCount = splitsInfo.length; List<TaskLocationHint> locationHints = new ArrayList<TaskLocationHint>(splitsCount); for (int i = 0; i < splitsCount; ++i) { locationHints.add(TaskLocationHint.createTaskLocationHint( new HashSet<String>(Arrays.asList(splitsInfo[i].getLocations())), null)); } Assert.assertEquals(locationHints, actual); }
From source file:org.apache.tez.mapreduce.YARNRunner.java
License:Apache License
private List<TaskLocationHint> getMapLocationHintsFromInputSplits(JobID jobId, FileSystem fs, Configuration conf, String jobSubmitDir) throws IOException { TaskSplitMetaInfo[] splitsInfo = SplitMetaInfoReader.readSplitMetaInfo(jobId, fs, conf, new Path(jobSubmitDir)); int splitsCount = splitsInfo.length; List<TaskLocationHint> locationHints = new ArrayList<TaskLocationHint>(splitsCount); for (int i = 0; i < splitsCount; ++i) { TaskLocationHint locationHint = new TaskLocationHint( new HashSet<String>(Arrays.asList(splitsInfo[i].getLocations())), null); locationHints.add(locationHint); }//from w w w . j av a 2 s .co m return locationHints; }
From source file:org.gridgain.grid.kernal.processors.hadoop.v2.GridHadoopV2Job.java
License:Open Source License
/** {@inheritDoc} */ @Override// w ww .j a v a 2s. c om public Collection<GridHadoopInputSplit> input() throws GridException { Thread.currentThread().setContextClassLoader(jobConf.getClassLoader()); try { String jobDirPath = jobConf.get(MRJobConfig.MAPREDUCE_JOB_DIR); if (jobDirPath == null) { // Probably job was submitted not by hadoop client. // Assume that we have needed classes and try to generate input splits ourself. if (jobConf.getUseNewMapper()) return GridHadoopV2Splitter.splitJob(jobCtx); else return GridHadoopV1Splitter.splitJob(jobConf); } Path jobDir = new Path(jobDirPath); try (FileSystem fs = FileSystem.get(jobDir.toUri(), jobConf)) { JobSplit.TaskSplitMetaInfo[] metaInfos = SplitMetaInfoReader.readSplitMetaInfo(hadoopJobID, fs, jobConf, jobDir); if (F.isEmpty(metaInfos)) throw new GridException("No input splits found."); Path splitsFile = JobSubmissionFiles.getJobSplitFile(jobDir); try (FSDataInputStream in = fs.open(splitsFile)) { Collection<GridHadoopInputSplit> res = new ArrayList<>(metaInfos.length); for (JobSplit.TaskSplitMetaInfo metaInfo : metaInfos) { long off = metaInfo.getStartOffset(); String[] hosts = metaInfo.getLocations(); in.seek(off); String clsName = Text.readString(in); GridHadoopFileBlock block = GridHadoopV1Splitter.readFileBlock(clsName, in, hosts); if (block == null) block = GridHadoopV2Splitter.readFileBlock(clsName, in, hosts); res.add(block != null ? block : new GridHadoopExternalSplit(hosts, off)); } return res; } } catch (Throwable e) { throw transformException(e); } } finally { Thread.currentThread().setContextClassLoader(null); } }