List of usage examples for org.apache.hadoop.mapreduce.lib.output FileOutputFormat getWorkOutputPath
public static Path getWorkOutputPath(TaskInputOutputContext<?, ?, ?, ?> context) throws IOException, InterruptedException
Some applications need to create/write-to side-files, which differ from the actual job-outputs.
From source file:com.cloudera.sqoop.mapreduce.SequenceFileImportMapper.java
License:Apache License
@Override protected void setup(Context context) throws IOException, InterruptedException { this.lobLoader = new LargeObjectLoader(context.getConfiguration(), FileOutputFormat.getWorkOutputPath(context)); }
From source file:com.linkedin.cubert.examples.Purge.java
License:Open Source License
@Override public void setInput(Map<String, Block> input, JsonNode json, BlockProperties props) throws IOException, InterruptedException { block = input.values().iterator().next(); conf = PhaseContext.getConf();//from w w w . ja v a 2 s . co m output = TupleFactory.getInstance().newTuple(3); purgeFileName = FileCache.get(filesToCache.get(0)); if (purgeFileName == null) { throw new IOException("purgeFileName is null"); } loadMembersToPurge(purgeFileName); String columnName = JsonUtils.getText(json.get("args"), "purgeColumnName"); setColumnName(columnName); // Create temp file Path root = null; String filename = null; tempFileName = null; if (PhaseContext.isMapper()) { root = FileOutputFormat.getWorkOutputPath(PhaseContext.getMapContext()); filename = FileOutputFormat.getUniqueFile(PhaseContext.getMapContext(), "tempFileForPurge", ""); } else { root = FileOutputFormat.getWorkOutputPath(PhaseContext.getRedContext()); filename = FileOutputFormat.getUniqueFile(PhaseContext.getRedContext(), "tempFileForPurge", ""); } tempFileName = root + "/" + filename; }
From source file:com.linkedin.cubert.operator.TeeOperator.java
License:Open Source License
@Override public void setInput(Map<String, Block> input, JsonNode json, BlockProperties props) throws IOException, InterruptedException { block = input.values().iterator().next(); String prefix = JsonUtils.getText(json, "prefix"); BlockSchema teeSchema = new BlockSchema(json.get("teeSchema")); if (json.has("generate") && !json.get("generate").isNull()) { ObjectNode generateJson = JsonUtils.createObjectNode("name", "GENERATE", "input", json.get("input"), "output", json.get("input"), "outputTuple", json.get("generate")); generateOperator = new GenerateOperator(); BlockProperties generateProps = new BlockProperties("teeGenerate", teeSchema, props); generateOperator.setInput(input, generateJson, generateProps); }/* ww w . j a v a 2 s . co m*/ Configuration conf = PhaseContext.getConf(); Path root = null; String filename = null; if (PhaseContext.isMapper()) { root = FileOutputFormat.getWorkOutputPath(PhaseContext.getMapContext()); filename = FileOutputFormat.getUniqueFile(PhaseContext.getMapContext(), prefix, ""); } else { root = FileOutputFormat.getWorkOutputPath(PhaseContext.getRedContext()); filename = FileOutputFormat.getUniqueFile(PhaseContext.getRedContext(), prefix, ""); } writer = openedWriters.get(prefix); if (writer == null) { writer = StorageFactory.get(JsonUtils.getText(json, "type")).getTeeWriter(); writer.open(conf, json, teeSchema, root, filename); openedWriters.put(prefix, writer); } if (json.has("filter") && json.get("filter") != null && !json.get("filter").isNull()) { JsonNode filterJson = json.get("filter"); filterTree = new FunctionTree(block); try { filterTree.addFunctionTree(filterJson); } catch (PreconditionException e) { throw new RuntimeException(e); } } }
From source file:com.talis.hadoop.rdf.merge.IndexMergeReducer.java
License:Apache License
@Override public void setup(Context context) { LOG.info("Configuring index merge reducer"); taskAttemptID = context.getTaskAttemptID(); try {//from w w w.ja v a 2 s . co m fs = FileSystem.get(FileOutputFormat.getOutputPath(context).toUri(), context.getConfiguration()); outRemote = FileOutputFormat.getWorkOutputPath(context); LOG.debug("Remote output path is {}", outRemote); String workDirRoot = context.getConfiguration().get(LOCAL_WORK_ROOT_DIR, System.getProperty("java.io.tmpdir")); LOG.debug("Local work root directory is {}", workDirRoot); localWorkDir = new File(workDirRoot, context.getJobName() + "_" + context.getJobID() + "_" + taskAttemptID); FileUtils.forceMkdir(localWorkDir); LOG.info("Local work directory is {}", localWorkDir); localShards = new Path(localWorkDir.getAbsolutePath(), "shards"); localShardsDir = new File(localShards.toString()); FileUtils.forceMkdir(localShardsDir); LOG.info("Local shards directory is {}", localShardsDir); outLocal = new Path(localWorkDir.getAbsolutePath(), "combined"); File combinedDir = new File(outLocal.toString()); FileUtils.forceMkdir(combinedDir); LOG.info("Local combined index directory is {}", combinedDir); optimizeOutput = context.getConfiguration().getBoolean(OPTIMIZE_OUTPUT, true); LOG.info("Output optimization false is set to {}", optimizeOutput); combined = FSDirectory.open(combinedDir); writer = new IndexWriter(combined, new StopAnalyzer(Version.LUCENE_29), true, IndexWriter.MaxFieldLength.UNLIMITED); } catch (Exception e) { throw new TDBLoader3Exception(e); } }
From source file:org.apache.jena.tdbloader4.FourthReducer.java
License:Apache License
@Override public void setup(Context context) { this.taskAttemptID = context.getTaskAttemptID(); outputs = new HashMap<String, OutputStream>(); String outputRootDirectory = context.getConfiguration().get(Constants.OPTION_FOURTH_LOCAL_OUTPUT_DIR, Constants.OPTION_FOURTH_LOCAL_OUTPUT_DIR_DEFAULT); try {/*from w ww . j av a2 s.c o m*/ fs = FileSystem.get(context.getConfiguration()); outRemote = FileOutputFormat.getWorkOutputPath(context); outLocal = new Path(outputRootDirectory, context.getJobName() + "_" + context.getJobID() + "_" + taskAttemptID); new File(outLocal.toString()).mkdir(); // TODO: does this make sense? fs.setReplication(outLocal, (short) 2); fs.startLocalOutput(outRemote, outLocal); } catch (Exception e) { throw new TDBLoader4Exception(e); } counters = new Counters(context); }
From source file:org.apache.jena.tdbloader4.SecondReducer.java
License:Apache License
@Override public void setup(Context context) { this.taskAttemptID = context.getTaskAttemptID(); String id = String.valueOf(taskAttemptID.getTaskID().getId()); log.debug("Loading offsets from DistributedCache..."); offsets = loadOffsets(context);// w w w . j a v a 2 s .c o m log.debug("Finished loading offsets from DistributedCache."); // this is the offset this reducer needs to add (the sum of all his 'previous' peers) for (int i = 0; i < Integer.valueOf(id); i++) { offset += offsets.get(i); } log.debug("Reducer's number {} offset is {}", id, offset); try { fs = FileSystem.get(context.getConfiguration()); outRemote = FileOutputFormat.getWorkOutputPath(context); log.debug("outRemote is {}", outRemote); outLocal = new Path("/tmp", context.getJobName() + "_" + context.getJobID() + "_" + taskAttemptID); fs.startLocalOutput(outRemote, outLocal); } catch (Exception e) { throw new TDBLoader4Exception(e); } Location location = new Location(outLocal.toString()); init(location); counters = new Counters(context); }
From source file:org.apache.sqoop.mapreduce.AvroImportMapper.java
License:Apache License
@Override protected void setup(Context context) throws IOException, InterruptedException { Configuration conf = context.getConfiguration(); schema = AvroJob.getMapOutputSchema(conf); lobLoader = new LargeObjectLoader(conf, FileOutputFormat.getWorkOutputPath(context)); bigDecimalFormatString = conf.getBoolean(ImportJobBase.PROPERTY_BIGDECIMAL_FORMAT, ImportJobBase.PROPERTY_BIGDECIMAL_FORMAT_DEFAULT); }
From source file:org.terrier.indexing.HadoopIndexerMapper.java
License:Mozilla Public License
@Override protected void setup(Context context) throws IOException, InterruptedException { TerrierHDFSAdaptor.initialiseHDFSAdaptor(context.getConfiguration()); proxyIndexer = createIndexer(context); currentContext = context;//from www .j ava 2 s. c o m splitnum = getSplitNum(context); proxyIndexer.setFlushDelegate(this); final Path indexDestination = FileOutputFormat.getWorkOutputPath(context); indexDestination.getFileSystem(context.getConfiguration()).mkdirs(indexDestination); mapTaskID = getTaskID(context); proxyIndexer.currentIndex = Index.createNewIndex(indexDestination.toString(), mapTaskID); proxyIndexer.maxMemory = Long .parseLong(ApplicationSetup.getProperty("indexing.singlepass.max.postings.memory", "0")); // during reduce, we dont want to load indices into memory, as we only // use them as streams proxyIndexer.currentIndex.setIndexProperty("index.preloadIndices.disabled", "true"); runData = new DataOutputStream( Files.writeFileStream(new Path(indexDestination, mapTaskID + ".runs").toString())); runData.writeUTF(mapTaskID); proxyIndexer.createMemoryPostings(); proxyIndexer.docIndexBuilder = new DocumentIndexBuilder(proxyIndexer.currentIndex, "document"); proxyIndexer.metaBuilder = createMetaIndexBuilder(); proxyIndexer.emptyDocIndexEntry = (FieldScore.FIELDS_COUNT > 0) ? new FieldDocumentIndexEntry(FieldScore.FIELDS_COUNT) : new SimpleDocumentIndexEntry(); final String filterFile = context.getConfiguration().get(HadoopIndexerOptions.DOCUMENT_ID_FILTER_FILE); synchronized (HadoopIndexerMapper.class) { if (filterFile != null) { if (filter == null) { logger.warn("Loading filter..."); final Timer timer = Timer.timer(); final FileSystem fs = FileSystem.get(context.getConfiguration()); final Path p = new Path(filterFile); filter = new DocListFilter(fs, p); logger.warn("Took:" + timer.duration() / 1000f + "s"); } } } }
From source file:org.terrier.indexing.HadoopIndexerReducer.java
License:Mozilla Public License
@Override protected void setup(Context context) throws IOException, InterruptedException { TerrierHDFSAdaptor.initialiseHDFSAdaptor(context.getConfiguration()); proxyIndexer = createIndexer(context); //load in the current index final Path indexDestination = FileOutputFormat.getWorkOutputPath(context); reduceId = context.getTaskAttemptID().getTaskID().getId(); proxyIndexer.path = indexDestination.toString(); mutipleIndices = context.getConfiguration().getBoolean("indexing.hadoop.multiple.indices", true); if (context.getNumReduceTasks() > 1) { //gets the reduce number and suffices this to data proxyIndexer.prefix = ApplicationSetup.TERRIER_INDEX_PREFIX + "-" + reduceId; } else {/*from w ww. jav a2s .c om*/ proxyIndexer.prefix = ApplicationSetup.TERRIER_INDEX_PREFIX; } proxyIndexer.currentIndex = Index.createNewIndex(proxyIndexer.path, proxyIndexer.prefix); proxyIndexer.merger = createRunMerger(); }