Example usage for org.apache.hadoop.mapred FileOutputFormat getWorkOutputPath

List of usage examples for org.apache.hadoop.mapred FileOutputFormat getWorkOutputPath

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred FileOutputFormat getWorkOutputPath.

Prototype

public static Path getWorkOutputPath(JobConf conf) 

Source Link

Document

Get the Path to the task's temporary output directory for the map-reduce job Tasks' Side-Effect Files

Note: The following is valid only if the OutputCommitter is FileOutputCommitter .

Usage

From source file:com.facebook.hive.orc.OrcOutputFormat.java

License:Open Source License

@Override
public RecordWriter<NullWritable, OrcSerdeRow> getRecordWriter(FileSystem fileSystem, JobConf conf, String name,
        Progressable reporter) throws IOException {
    ReaderWriterProfiler.setProfilerOptions(conf);

    // To be compatible with older file formats like Sequence and RC
    // Only works if mapred.work.output.dir is set in the conf
    Path workOutputPath = FileOutputFormat.getWorkOutputPath(conf);
    Path outputPath = workOutputPath == null ? new Path(name) : new Path(workOutputPath, name);

    if (fileSystem == null && workOutputPath != null) {
        fileSystem = workOutputPath.getFileSystem(conf);
    }//  www. ja  va2s  .c o m

    return new OrcRecordWriter(fileSystem, outputPath, conf,
            OrcConf.ConfVars.HIVE_ORC_STRIPE_SIZE.defaultLongVal,
            OrcConf.ConfVars.HIVE_ORC_COMPRESSION.defaultVal,
            OrcConf.ConfVars.HIVE_ORC_COMPRESSION_BLOCK_SIZE.defaultIntVal,
            OrcConf.ConfVars.HIVE_ORC_ROW_INDEX_STRIDE.defaultIntVal);
}

From source file:com.ibm.bi.dml.runtime.matrix.data.MultipleOutputCommitter.java

License:Open Source License

@Override
public void commitTask(TaskAttemptContext context) throws IOException {
    JobConf conf = context.getJobConf();
    TaskAttemptID attemptId = context.getTaskAttemptID();

    // get the mapping between index to output filename
    outputs = MRJobConfiguration.getOutputs(conf);

    //get temp task output path (compatible with hadoop1 and hadoop2)
    Path taskOutPath = FileOutputFormat.getWorkOutputPath(conf);
    FileSystem fs = taskOutPath.getFileSystem(conf);
    if (!fs.exists(taskOutPath))
        throw new IOException("Task output path " + taskOutPath.toString() + "does not exist.");

    // Move the task outputs to their final places
    context.getProgressible().progress();
    moveFinalTaskOutputs(context, fs, taskOutPath);

    // Delete the temporary task-specific output directory
    if (!fs.delete(taskOutPath, true))
        LOG.debug(// ww  w .j a va 2  s . com
                "Failed to delete the temporary output directory of task: " + attemptId + " - " + taskOutPath);
}

From source file:de.tudarmstadt.ukp.dkpro.bigdata.hadoop.UIMAMapReduceBase.java

License:Open Source License

@Override
public void close() throws IOException {
    try {/*from  ww  w.j  a  v  a2  s  .c  o m*/
        // notify uima of the end of this collection
        this.engine.batchProcessComplete();
        this.engine.collectionProcessComplete();
        // copy back data
        copyDir(this.results_dir, FileOutputFormat.getWorkOutputPath(this.job));
    } catch (final AnalysisEngineProcessException e) {
        throw new IOException(e);
    }
    this.engine.destroy();
}

From source file:org.apache.pig.backend.hadoop.executionengine.mapreduceExec.PigOutputFormat.java

License:Apache License

public RecordWriter getRecordWriter(FileSystem fs, JobConf job, String name, Progressable progress)
        throws IOException {
    Path outputDir = FileOutputFormat.getWorkOutputPath(job);
    return getRecordWriter(fs, job, outputDir, name, progress);
}

From source file:org.apache.sysml.runtime.matrix.data.MultipleOutputCommitter.java

License:Apache License

@Override
public void commitTask(TaskAttemptContext context) throws IOException {
    JobConf conf = context.getJobConf();
    TaskAttemptID attemptId = context.getTaskAttemptID();

    // get the mapping between index to output filename
    outputs = MRJobConfiguration.getOutputs(conf);

    // get temp task output path (compatible with hadoop1 and hadoop2)
    Path taskOutPath = FileOutputFormat.getWorkOutputPath(conf);
    FileSystem fs = taskOutPath.getFileSystem(conf);
    if (!fs.exists(taskOutPath))
        throw new IOException("Task output path " + taskOutPath.toString() + "does not exist.");

    // move the task outputs to their final places
    context.getProgressible().progress();
    moveFinalTaskOutputs(context, fs, taskOutPath);

    // delete the temporary task-specific output directory
    if (!fs.delete(taskOutPath, true))
        LOG.debug(//from  w  ww. j a  va 2  s  .  co m
                "Failed to delete the temporary output directory of task: " + attemptId + " - " + taskOutPath);
}

From source file:org.commoncrawl.mapred.ec2.postprocess.linkCollector.LinkMergerJob.java

License:Open Source License

@Override
public void configure(JobConf job) {

    HashSet<Integer> onlyDoPartitions = null;
    String hack = job.get("hack");
    if (hack != null) {
        onlyDoPartitions = new HashSet<Integer>();
        JsonParser parser = new JsonParser();
        JsonArray hackArray = parser.parse(hack).getAsJsonArray();
        for (JsonElement element : hackArray) {
            onlyDoPartitions.add(element.getAsInt());
        }/*  www .j  a  va 2 s. co  m*/
    }
    _conf = job;
    try {
        _fs = FileSystem.get(_conf);
        int partitionId = _conf.getInt("mapred.task.partition", 0);
        if (onlyDoPartitions == null || onlyDoPartitions.contains(partitionId)) {
            Path redirectPath = new Path(FileOutputFormat.getWorkOutputPath(_conf),
                    "redirect-" + NUMBER_FORMAT.format(partitionId));
            _redirectWriter = SequenceFile.createWriter(_fs, _conf, redirectPath, TextBytes.class,
                    TextBytes.class, CompressionType.BLOCK);
        } else {
            _skipPartition = true;
        }
    } catch (IOException e) {
        e.printStackTrace();
    }
}

From source file:org.terrier.indexing.hadoop.Hadoop_BasicSinglePassIndexer.java

License:Mozilla Public License

protected void configureMap() throws Exception {
    super.init();
    Path indexDestination = FileOutputFormat.getWorkOutputPath(jc);
    Files.mkdir(indexDestination.toString());
    mapTaskID = TaskAttemptID.forName(jc.get("mapred.task.id")).getTaskID().toString();
    currentIndex = Index.createNewIndex(indexDestination.toString(), mapTaskID);
    maxMemory = Long.parseLong(ApplicationSetup.getProperty("indexing.singlepass.max.postings.memory", "0"));
    //during reduce, we dont want to load indices into memory, as we only use
    //them as streams
    currentIndex.setIndexProperty("index.preloadIndices.disabled", "true");
    RunData = new DataOutputStream(
            Files.writeFileStream(new Path(indexDestination, mapTaskID + ".runs").toString()));
    RunData.writeUTF(mapTaskID);/*from w w w  .  j  a va  2  s. com*/
    start = true;
    createMemoryPostings();
    super.emptyDocIndexEntry = new SimpleDocumentIndexEntry();
    super.docIndexBuilder = new DocumentIndexBuilder(currentIndex, "document");
    super.metaBuilder = createMetaIndexBuilder();
    emptyDocIndexEntry = (FieldScore.FIELDS_COUNT > 0) ? new FieldDocumentIndexEntry(FieldScore.FIELDS_COUNT)
            : new SimpleDocumentIndexEntry();
}

From source file:org.terrier.indexing.hadoop.Hadoop_BasicSinglePassIndexer.java

License:Mozilla Public License

protected void configureReduce() throws Exception {
    super.init();
    start = true;/*from   www. ja  v a 2s.  c om*/
    //load in the current index
    final Path indexDestination = FileOutputFormat.getWorkOutputPath(jc);
    Files.mkdir(path = indexDestination.toString());
    final String indexDestinationPrefix = jc.get("indexing.hadoop.prefix", "data");
    reduceId = TaskAttemptID.forName(jc.get("mapred.task.id")).getTaskID().getId();
    indexDestination.toString();
    mutipleIndices = jc.getBoolean("indexing.hadoop.multiple.indices", true);
    if (jc.getNumReduceTasks() > 1) {
        //gets the reduce number and suffices this to the index prefix
        prefix = indexDestinationPrefix + "-" + reduceId;
    } else {
        prefix = indexDestinationPrefix;
    }

    currentIndex = Index.createNewIndex(path, prefix);

    super.merger = createtheRunMerger();
    reduceStarted = false;
}