Example usage for org.apache.hadoop.mapreduce.lib.output FileOutputFormat getWorkOutputPath

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce.lib.output FileOutputFormat getWorkOutputPath.

Prototype

public static Path getWorkOutputPath(TaskInputOutputContext<?, ?, ?, ?> context)
        throws IOException, InterruptedException

Source Link

Document

Get the Path to the task's temporary output directory for the map-reduce job Tasks' Side-Effect Files

Some applications need to create/write-to side-files, which differ from the actual job-outputs.

Usage

From source file:com.cloudera.sqoop.mapreduce.SequenceFileImportMapper.java

License:Apache License

@Override
protected void setup(Context context) throws IOException, InterruptedException {
    this.lobLoader = new LargeObjectLoader(context.getConfiguration(),
            FileOutputFormat.getWorkOutputPath(context));
}

From source file:com.linkedin.cubert.examples.Purge.java

License:Open Source License

@Override
public void setInput(Map<String, Block> input, JsonNode json, BlockProperties props)
        throws IOException, InterruptedException {
    block = input.values().iterator().next();
    conf = PhaseContext.getConf();//from w w  w . ja  v a  2 s . co  m
    output = TupleFactory.getInstance().newTuple(3);
    purgeFileName = FileCache.get(filesToCache.get(0));

    if (purgeFileName == null) {
        throw new IOException("purgeFileName is null");
    }

    loadMembersToPurge(purgeFileName);

    String columnName = JsonUtils.getText(json.get("args"), "purgeColumnName");
    setColumnName(columnName);

    // Create temp file
    Path root = null;
    String filename = null;
    tempFileName = null;

    if (PhaseContext.isMapper()) {
        root = FileOutputFormat.getWorkOutputPath(PhaseContext.getMapContext());
        filename = FileOutputFormat.getUniqueFile(PhaseContext.getMapContext(), "tempFileForPurge", "");
    } else {
        root = FileOutputFormat.getWorkOutputPath(PhaseContext.getRedContext());
        filename = FileOutputFormat.getUniqueFile(PhaseContext.getRedContext(), "tempFileForPurge", "");
    }

    tempFileName = root + "/" + filename;
}

From source file:com.linkedin.cubert.operator.TeeOperator.java

License:Open Source License

@Override
public void setInput(Map<String, Block> input, JsonNode json, BlockProperties props)
        throws IOException, InterruptedException {
    block = input.values().iterator().next();
    String prefix = JsonUtils.getText(json, "prefix");

    BlockSchema teeSchema = new BlockSchema(json.get("teeSchema"));

    if (json.has("generate") && !json.get("generate").isNull()) {
        ObjectNode generateJson = JsonUtils.createObjectNode("name", "GENERATE", "input", json.get("input"),
                "output", json.get("input"), "outputTuple", json.get("generate"));

        generateOperator = new GenerateOperator();

        BlockProperties generateProps = new BlockProperties("teeGenerate", teeSchema, props);
        generateOperator.setInput(input, generateJson, generateProps);
    }/*  ww  w  .  j a v a 2  s . co  m*/

    Configuration conf = PhaseContext.getConf();

    Path root = null;
    String filename = null;

    if (PhaseContext.isMapper()) {
        root = FileOutputFormat.getWorkOutputPath(PhaseContext.getMapContext());
        filename = FileOutputFormat.getUniqueFile(PhaseContext.getMapContext(), prefix, "");
    } else {
        root = FileOutputFormat.getWorkOutputPath(PhaseContext.getRedContext());
        filename = FileOutputFormat.getUniqueFile(PhaseContext.getRedContext(), prefix, "");
    }

    writer = openedWriters.get(prefix);

    if (writer == null) {
        writer = StorageFactory.get(JsonUtils.getText(json, "type")).getTeeWriter();
        writer.open(conf, json, teeSchema, root, filename);
        openedWriters.put(prefix, writer);
    }

    if (json.has("filter") && json.get("filter") != null && !json.get("filter").isNull()) {
        JsonNode filterJson = json.get("filter");
        filterTree = new FunctionTree(block);
        try {
            filterTree.addFunctionTree(filterJson);
        } catch (PreconditionException e) {
            throw new RuntimeException(e);
        }

    }
}

From source file:com.talis.hadoop.rdf.merge.IndexMergeReducer.java

License:Apache License

@Override
public void setup(Context context) {
    LOG.info("Configuring index merge reducer");
    taskAttemptID = context.getTaskAttemptID();
    try {//from w  w w.ja v a  2 s .  co m
        fs = FileSystem.get(FileOutputFormat.getOutputPath(context).toUri(), context.getConfiguration());
        outRemote = FileOutputFormat.getWorkOutputPath(context);
        LOG.debug("Remote output path is {}", outRemote);

        String workDirRoot = context.getConfiguration().get(LOCAL_WORK_ROOT_DIR,
                System.getProperty("java.io.tmpdir"));
        LOG.debug("Local work root directory is {}", workDirRoot);

        localWorkDir = new File(workDirRoot,
                context.getJobName() + "_" + context.getJobID() + "_" + taskAttemptID);
        FileUtils.forceMkdir(localWorkDir);
        LOG.info("Local work directory is {}", localWorkDir);

        localShards = new Path(localWorkDir.getAbsolutePath(), "shards");
        localShardsDir = new File(localShards.toString());
        FileUtils.forceMkdir(localShardsDir);
        LOG.info("Local shards directory is {}", localShardsDir);

        outLocal = new Path(localWorkDir.getAbsolutePath(), "combined");
        File combinedDir = new File(outLocal.toString());
        FileUtils.forceMkdir(combinedDir);
        LOG.info("Local combined index directory is {}", combinedDir);

        optimizeOutput = context.getConfiguration().getBoolean(OPTIMIZE_OUTPUT, true);
        LOG.info("Output optimization false is set to {}", optimizeOutput);

        combined = FSDirectory.open(combinedDir);
        writer = new IndexWriter(combined, new StopAnalyzer(Version.LUCENE_29), true,
                IndexWriter.MaxFieldLength.UNLIMITED);

    } catch (Exception e) {
        throw new TDBLoader3Exception(e);
    }
}

From source file:org.apache.jena.tdbloader4.FourthReducer.java

License:Apache License

@Override
public void setup(Context context) {
    this.taskAttemptID = context.getTaskAttemptID();
    outputs = new HashMap<String, OutputStream>();
    String outputRootDirectory = context.getConfiguration().get(Constants.OPTION_FOURTH_LOCAL_OUTPUT_DIR,
            Constants.OPTION_FOURTH_LOCAL_OUTPUT_DIR_DEFAULT);
    try {/*from  w ww  . j av a2 s.c o m*/
        fs = FileSystem.get(context.getConfiguration());
        outRemote = FileOutputFormat.getWorkOutputPath(context);
        outLocal = new Path(outputRootDirectory,
                context.getJobName() + "_" + context.getJobID() + "_" + taskAttemptID);
        new File(outLocal.toString()).mkdir();
        // TODO: does this make sense?
        fs.setReplication(outLocal, (short) 2);
        fs.startLocalOutput(outRemote, outLocal);
    } catch (Exception e) {
        throw new TDBLoader4Exception(e);
    }
    counters = new Counters(context);
}

From source file:org.apache.jena.tdbloader4.SecondReducer.java

License:Apache License

@Override
public void setup(Context context) {
    this.taskAttemptID = context.getTaskAttemptID();
    String id = String.valueOf(taskAttemptID.getTaskID().getId());

    log.debug("Loading offsets from DistributedCache...");
    offsets = loadOffsets(context);// w  w  w  .  j a v a 2  s  .c o m
    log.debug("Finished loading offsets from DistributedCache.");

    // this is the offset this reducer needs to add (the sum of all his 'previous' peers) 
    for (int i = 0; i < Integer.valueOf(id); i++) {
        offset += offsets.get(i);
    }
    log.debug("Reducer's number {} offset is {}", id, offset);

    try {
        fs = FileSystem.get(context.getConfiguration());
        outRemote = FileOutputFormat.getWorkOutputPath(context);
        log.debug("outRemote is {}", outRemote);
        outLocal = new Path("/tmp", context.getJobName() + "_" + context.getJobID() + "_" + taskAttemptID);
        fs.startLocalOutput(outRemote, outLocal);
    } catch (Exception e) {
        throw new TDBLoader4Exception(e);
    }
    Location location = new Location(outLocal.toString());
    init(location);

    counters = new Counters(context);
}

From source file:org.apache.sqoop.mapreduce.AvroImportMapper.java

License:Apache License

@Override
protected void setup(Context context) throws IOException, InterruptedException {
    Configuration conf = context.getConfiguration();
    schema = AvroJob.getMapOutputSchema(conf);
    lobLoader = new LargeObjectLoader(conf, FileOutputFormat.getWorkOutputPath(context));
    bigDecimalFormatString = conf.getBoolean(ImportJobBase.PROPERTY_BIGDECIMAL_FORMAT,
            ImportJobBase.PROPERTY_BIGDECIMAL_FORMAT_DEFAULT);
}

From source file:org.terrier.indexing.HadoopIndexerMapper.java

License:Mozilla Public License

@Override
protected void setup(Context context) throws IOException, InterruptedException {
    TerrierHDFSAdaptor.initialiseHDFSAdaptor(context.getConfiguration());

    proxyIndexer = createIndexer(context);

    currentContext = context;//from  www  .j  ava 2 s.  c  o m
    splitnum = getSplitNum(context);

    proxyIndexer.setFlushDelegate(this);

    final Path indexDestination = FileOutputFormat.getWorkOutputPath(context);
    indexDestination.getFileSystem(context.getConfiguration()).mkdirs(indexDestination);

    mapTaskID = getTaskID(context);
    proxyIndexer.currentIndex = Index.createNewIndex(indexDestination.toString(), mapTaskID);
    proxyIndexer.maxMemory = Long
            .parseLong(ApplicationSetup.getProperty("indexing.singlepass.max.postings.memory", "0"));

    // during reduce, we dont want to load indices into memory, as we only
    // use them as streams
    proxyIndexer.currentIndex.setIndexProperty("index.preloadIndices.disabled", "true");
    runData = new DataOutputStream(
            Files.writeFileStream(new Path(indexDestination, mapTaskID + ".runs").toString()));
    runData.writeUTF(mapTaskID);

    proxyIndexer.createMemoryPostings();
    proxyIndexer.docIndexBuilder = new DocumentIndexBuilder(proxyIndexer.currentIndex, "document");
    proxyIndexer.metaBuilder = createMetaIndexBuilder();
    proxyIndexer.emptyDocIndexEntry = (FieldScore.FIELDS_COUNT > 0)
            ? new FieldDocumentIndexEntry(FieldScore.FIELDS_COUNT)
            : new SimpleDocumentIndexEntry();

    final String filterFile = context.getConfiguration().get(HadoopIndexerOptions.DOCUMENT_ID_FILTER_FILE);
    synchronized (HadoopIndexerMapper.class) {
        if (filterFile != null) {
            if (filter == null) {
                logger.warn("Loading filter...");
                final Timer timer = Timer.timer();
                final FileSystem fs = FileSystem.get(context.getConfiguration());
                final Path p = new Path(filterFile);
                filter = new DocListFilter(fs, p);
                logger.warn("Took:" + timer.duration() / 1000f + "s");
            }
        }
    }
}

From source file:org.terrier.indexing.HadoopIndexerReducer.java

License:Mozilla Public License

@Override
protected void setup(Context context) throws IOException, InterruptedException {
    TerrierHDFSAdaptor.initialiseHDFSAdaptor(context.getConfiguration());

    proxyIndexer = createIndexer(context);

    //load in the current index
    final Path indexDestination = FileOutputFormat.getWorkOutputPath(context);

    reduceId = context.getTaskAttemptID().getTaskID().getId();
    proxyIndexer.path = indexDestination.toString();
    mutipleIndices = context.getConfiguration().getBoolean("indexing.hadoop.multiple.indices", true);

    if (context.getNumReduceTasks() > 1) {
        //gets the reduce number and suffices this to data
        proxyIndexer.prefix = ApplicationSetup.TERRIER_INDEX_PREFIX + "-" + reduceId;
    } else {/*from   w ww.  jav  a2s .c om*/
        proxyIndexer.prefix = ApplicationSetup.TERRIER_INDEX_PREFIX;
    }

    proxyIndexer.currentIndex = Index.createNewIndex(proxyIndexer.path, proxyIndexer.prefix);

    proxyIndexer.merger = createRunMerger();
}