List of usage examples for org.apache.hadoop.mapred FileOutputFormat getWorkOutputPath
public static Path getWorkOutputPath(JobConf conf)
Note: The following is valid only if the OutputCommitter is FileOutputCommitter .
From source file:com.facebook.hive.orc.OrcOutputFormat.java
License:Open Source License
@Override public RecordWriter<NullWritable, OrcSerdeRow> getRecordWriter(FileSystem fileSystem, JobConf conf, String name, Progressable reporter) throws IOException { ReaderWriterProfiler.setProfilerOptions(conf); // To be compatible with older file formats like Sequence and RC // Only works if mapred.work.output.dir is set in the conf Path workOutputPath = FileOutputFormat.getWorkOutputPath(conf); Path outputPath = workOutputPath == null ? new Path(name) : new Path(workOutputPath, name); if (fileSystem == null && workOutputPath != null) { fileSystem = workOutputPath.getFileSystem(conf); }// www. ja va2s .c o m return new OrcRecordWriter(fileSystem, outputPath, conf, OrcConf.ConfVars.HIVE_ORC_STRIPE_SIZE.defaultLongVal, OrcConf.ConfVars.HIVE_ORC_COMPRESSION.defaultVal, OrcConf.ConfVars.HIVE_ORC_COMPRESSION_BLOCK_SIZE.defaultIntVal, OrcConf.ConfVars.HIVE_ORC_ROW_INDEX_STRIDE.defaultIntVal); }
From source file:com.ibm.bi.dml.runtime.matrix.data.MultipleOutputCommitter.java
License:Open Source License
@Override public void commitTask(TaskAttemptContext context) throws IOException { JobConf conf = context.getJobConf(); TaskAttemptID attemptId = context.getTaskAttemptID(); // get the mapping between index to output filename outputs = MRJobConfiguration.getOutputs(conf); //get temp task output path (compatible with hadoop1 and hadoop2) Path taskOutPath = FileOutputFormat.getWorkOutputPath(conf); FileSystem fs = taskOutPath.getFileSystem(conf); if (!fs.exists(taskOutPath)) throw new IOException("Task output path " + taskOutPath.toString() + "does not exist."); // Move the task outputs to their final places context.getProgressible().progress(); moveFinalTaskOutputs(context, fs, taskOutPath); // Delete the temporary task-specific output directory if (!fs.delete(taskOutPath, true)) LOG.debug(// ww w .j a va 2 s . com "Failed to delete the temporary output directory of task: " + attemptId + " - " + taskOutPath); }
From source file:de.tudarmstadt.ukp.dkpro.bigdata.hadoop.UIMAMapReduceBase.java
License:Open Source License
@Override public void close() throws IOException { try {/*from ww w.j a v a2 s .c o m*/ // notify uima of the end of this collection this.engine.batchProcessComplete(); this.engine.collectionProcessComplete(); // copy back data copyDir(this.results_dir, FileOutputFormat.getWorkOutputPath(this.job)); } catch (final AnalysisEngineProcessException e) { throw new IOException(e); } this.engine.destroy(); }
From source file:org.apache.pig.backend.hadoop.executionengine.mapreduceExec.PigOutputFormat.java
License:Apache License
public RecordWriter getRecordWriter(FileSystem fs, JobConf job, String name, Progressable progress) throws IOException { Path outputDir = FileOutputFormat.getWorkOutputPath(job); return getRecordWriter(fs, job, outputDir, name, progress); }
From source file:org.apache.sysml.runtime.matrix.data.MultipleOutputCommitter.java
License:Apache License
@Override public void commitTask(TaskAttemptContext context) throws IOException { JobConf conf = context.getJobConf(); TaskAttemptID attemptId = context.getTaskAttemptID(); // get the mapping between index to output filename outputs = MRJobConfiguration.getOutputs(conf); // get temp task output path (compatible with hadoop1 and hadoop2) Path taskOutPath = FileOutputFormat.getWorkOutputPath(conf); FileSystem fs = taskOutPath.getFileSystem(conf); if (!fs.exists(taskOutPath)) throw new IOException("Task output path " + taskOutPath.toString() + "does not exist."); // move the task outputs to their final places context.getProgressible().progress(); moveFinalTaskOutputs(context, fs, taskOutPath); // delete the temporary task-specific output directory if (!fs.delete(taskOutPath, true)) LOG.debug(//from w ww. j a va 2 s . co m "Failed to delete the temporary output directory of task: " + attemptId + " - " + taskOutPath); }
From source file:org.commoncrawl.mapred.ec2.postprocess.linkCollector.LinkMergerJob.java
License:Open Source License
@Override public void configure(JobConf job) { HashSet<Integer> onlyDoPartitions = null; String hack = job.get("hack"); if (hack != null) { onlyDoPartitions = new HashSet<Integer>(); JsonParser parser = new JsonParser(); JsonArray hackArray = parser.parse(hack).getAsJsonArray(); for (JsonElement element : hackArray) { onlyDoPartitions.add(element.getAsInt()); }/* www .j a va 2 s. co m*/ } _conf = job; try { _fs = FileSystem.get(_conf); int partitionId = _conf.getInt("mapred.task.partition", 0); if (onlyDoPartitions == null || onlyDoPartitions.contains(partitionId)) { Path redirectPath = new Path(FileOutputFormat.getWorkOutputPath(_conf), "redirect-" + NUMBER_FORMAT.format(partitionId)); _redirectWriter = SequenceFile.createWriter(_fs, _conf, redirectPath, TextBytes.class, TextBytes.class, CompressionType.BLOCK); } else { _skipPartition = true; } } catch (IOException e) { e.printStackTrace(); } }
From source file:org.terrier.indexing.hadoop.Hadoop_BasicSinglePassIndexer.java
License:Mozilla Public License
protected void configureMap() throws Exception { super.init(); Path indexDestination = FileOutputFormat.getWorkOutputPath(jc); Files.mkdir(indexDestination.toString()); mapTaskID = TaskAttemptID.forName(jc.get("mapred.task.id")).getTaskID().toString(); currentIndex = Index.createNewIndex(indexDestination.toString(), mapTaskID); maxMemory = Long.parseLong(ApplicationSetup.getProperty("indexing.singlepass.max.postings.memory", "0")); //during reduce, we dont want to load indices into memory, as we only use //them as streams currentIndex.setIndexProperty("index.preloadIndices.disabled", "true"); RunData = new DataOutputStream( Files.writeFileStream(new Path(indexDestination, mapTaskID + ".runs").toString())); RunData.writeUTF(mapTaskID);/*from w w w . j a va 2 s. com*/ start = true; createMemoryPostings(); super.emptyDocIndexEntry = new SimpleDocumentIndexEntry(); super.docIndexBuilder = new DocumentIndexBuilder(currentIndex, "document"); super.metaBuilder = createMetaIndexBuilder(); emptyDocIndexEntry = (FieldScore.FIELDS_COUNT > 0) ? new FieldDocumentIndexEntry(FieldScore.FIELDS_COUNT) : new SimpleDocumentIndexEntry(); }
From source file:org.terrier.indexing.hadoop.Hadoop_BasicSinglePassIndexer.java
License:Mozilla Public License
protected void configureReduce() throws Exception { super.init(); start = true;/*from www. ja v a 2s. c om*/ //load in the current index final Path indexDestination = FileOutputFormat.getWorkOutputPath(jc); Files.mkdir(path = indexDestination.toString()); final String indexDestinationPrefix = jc.get("indexing.hadoop.prefix", "data"); reduceId = TaskAttemptID.forName(jc.get("mapred.task.id")).getTaskID().getId(); indexDestination.toString(); mutipleIndices = jc.getBoolean("indexing.hadoop.multiple.indices", true); if (jc.getNumReduceTasks() > 1) { //gets the reduce number and suffices this to the index prefix prefix = indexDestinationPrefix + "-" + reduceId; } else { prefix = indexDestinationPrefix; } currentIndex = Index.createNewIndex(path, prefix); super.merger = createtheRunMerger(); reduceStarted = false; }