Example usage for org.apache.hadoop.mapreduce.lib.output FileOutputFormat getWorkOutputPath

List of usage examples for org.apache.hadoop.mapreduce.lib.output FileOutputFormat getWorkOutputPath

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce.lib.output FileOutputFormat getWorkOutputPath.

Prototype

public static Path getWorkOutputPath(TaskInputOutputContext<?, ?, ?, ?> context)
        throws IOException, InterruptedException 

Source Link

Document

Get the Path to the task's temporary output directory for the map-reduce job Tasks' Side-Effect Files

Some applications need to create/write-to side-files, which differ from the actual job-outputs.

Usage

From source file:com.cloudera.sqoop.mapreduce.SequenceFileImportMapper.java

License:Apache License

@Override
protected void setup(Context context) throws IOException, InterruptedException {
    this.lobLoader = new LargeObjectLoader(context.getConfiguration(),
            FileOutputFormat.getWorkOutputPath(context));
}

From source file:com.linkedin.cubert.examples.Purge.java

License:Open Source License

@Override
public void setInput(Map<String, Block> input, JsonNode json, BlockProperties props)
        throws IOException, InterruptedException {
    block = input.values().iterator().next();
    conf = PhaseContext.getConf();//from w w  w . ja  v a  2 s . co  m
    output = TupleFactory.getInstance().newTuple(3);
    purgeFileName = FileCache.get(filesToCache.get(0));

    if (purgeFileName == null) {
        throw new IOException("purgeFileName is null");
    }

    loadMembersToPurge(purgeFileName);

    String columnName = JsonUtils.getText(json.get("args"), "purgeColumnName");
    setColumnName(columnName);

    // Create temp file
    Path root = null;
    String filename = null;
    tempFileName = null;

    if (PhaseContext.isMapper()) {
        root = FileOutputFormat.getWorkOutputPath(PhaseContext.getMapContext());
        filename = FileOutputFormat.getUniqueFile(PhaseContext.getMapContext(), "tempFileForPurge", "");
    } else {
        root = FileOutputFormat.getWorkOutputPath(PhaseContext.getRedContext());
        filename = FileOutputFormat.getUniqueFile(PhaseContext.getRedContext(), "tempFileForPurge", "");
    }

    tempFileName = root + "/" + filename;
}

From source file:com.linkedin.cubert.operator.TeeOperator.java

License:Open Source License

@Override
public void setInput(Map<String, Block> input, JsonNode json, BlockProperties props)
        throws IOException, InterruptedException {
    block = input.values().iterator().next();
    String prefix = JsonUtils.getText(json, "prefix");

    BlockSchema teeSchema = new BlockSchema(json.get("teeSchema"));

    if (json.has("generate") && !json.get("generate").isNull()) {
        ObjectNode generateJson = JsonUtils.createObjectNode("name", "GENERATE", "input", json.get("input"),
                "output", json.get("input"), "outputTuple", json.get("generate"));

        generateOperator = new GenerateOperator();

        BlockProperties generateProps = new BlockProperties("teeGenerate", teeSchema, props);
        generateOperator.setInput(input, generateJson, generateProps);
    }/*  ww  w  .  j a v a 2  s . co  m*/

    Configuration conf = PhaseContext.getConf();

    Path root = null;
    String filename = null;

    if (PhaseContext.isMapper()) {
        root = FileOutputFormat.getWorkOutputPath(PhaseContext.getMapContext());
        filename = FileOutputFormat.getUniqueFile(PhaseContext.getMapContext(), prefix, "");
    } else {
        root = FileOutputFormat.getWorkOutputPath(PhaseContext.getRedContext());
        filename = FileOutputFormat.getUniqueFile(PhaseContext.getRedContext(), prefix, "");
    }

    writer = openedWriters.get(prefix);

    if (writer == null) {
        writer = StorageFactory.get(JsonUtils.getText(json, "type")).getTeeWriter();
        writer.open(conf, json, teeSchema, root, filename);
        openedWriters.put(prefix, writer);
    }

    if (json.has("filter") && json.get("filter") != null && !json.get("filter").isNull()) {
        JsonNode filterJson = json.get("filter");
        filterTree = new FunctionTree(block);
        try {
            filterTree.addFunctionTree(filterJson);
        } catch (PreconditionException e) {
            throw new RuntimeException(e);
        }

    }
}

From source file:com.talis.hadoop.rdf.merge.IndexMergeReducer.java

License:Apache License

@Override
public void setup(Context context) {
    LOG.info("Configuring index merge reducer");
    taskAttemptID = context.getTaskAttemptID();
    try {//from w  w w.ja v a  2 s .  co m
        fs = FileSystem.get(FileOutputFormat.getOutputPath(context).toUri(), context.getConfiguration());
        outRemote = FileOutputFormat.getWorkOutputPath(context);
        LOG.debug("Remote output path is {}", outRemote);

        String workDirRoot = context.getConfiguration().get(LOCAL_WORK_ROOT_DIR,
                System.getProperty("java.io.tmpdir"));
        LOG.debug("Local work root directory is {}", workDirRoot);

        localWorkDir = new File(workDirRoot,
                context.getJobName() + "_" + context.getJobID() + "_" + taskAttemptID);
        FileUtils.forceMkdir(localWorkDir);
        LOG.info("Local work directory is {}", localWorkDir);

        localShards = new Path(localWorkDir.getAbsolutePath(), "shards");
        localShardsDir = new File(localShards.toString());
        FileUtils.forceMkdir(localShardsDir);
        LOG.info("Local shards directory is {}", localShardsDir);

        outLocal = new Path(localWorkDir.getAbsolutePath(), "combined");
        File combinedDir = new File(outLocal.toString());
        FileUtils.forceMkdir(combinedDir);
        LOG.info("Local combined index directory is {}", combinedDir);

        optimizeOutput = context.getConfiguration().getBoolean(OPTIMIZE_OUTPUT, true);
        LOG.info("Output optimization false is set to {}", optimizeOutput);

        combined = FSDirectory.open(combinedDir);
        writer = new IndexWriter(combined, new StopAnalyzer(Version.LUCENE_29), true,
                IndexWriter.MaxFieldLength.UNLIMITED);

    } catch (Exception e) {
        throw new TDBLoader3Exception(e);
    }
}

From source file:org.apache.jena.tdbloader4.FourthReducer.java

License:Apache License

@Override
public void setup(Context context) {
    this.taskAttemptID = context.getTaskAttemptID();
    outputs = new HashMap<String, OutputStream>();
    String outputRootDirectory = context.getConfiguration().get(Constants.OPTION_FOURTH_LOCAL_OUTPUT_DIR,
            Constants.OPTION_FOURTH_LOCAL_OUTPUT_DIR_DEFAULT);
    try {/*from  w ww  . j av a2 s.c o m*/
        fs = FileSystem.get(context.getConfiguration());
        outRemote = FileOutputFormat.getWorkOutputPath(context);
        outLocal = new Path(outputRootDirectory,
                context.getJobName() + "_" + context.getJobID() + "_" + taskAttemptID);
        new File(outLocal.toString()).mkdir();
        // TODO: does this make sense?
        fs.setReplication(outLocal, (short) 2);
        fs.startLocalOutput(outRemote, outLocal);
    } catch (Exception e) {
        throw new TDBLoader4Exception(e);
    }
    counters = new Counters(context);
}

From source file:org.apache.jena.tdbloader4.SecondReducer.java

License:Apache License

@Override
public void setup(Context context) {
    this.taskAttemptID = context.getTaskAttemptID();
    String id = String.valueOf(taskAttemptID.getTaskID().getId());

    log.debug("Loading offsets from DistributedCache...");
    offsets = loadOffsets(context);// w  w  w  .  j a v a 2  s  .c o m
    log.debug("Finished loading offsets from DistributedCache.");

    // this is the offset this reducer needs to add (the sum of all his 'previous' peers) 
    for (int i = 0; i < Integer.valueOf(id); i++) {
        offset += offsets.get(i);
    }
    log.debug("Reducer's number {} offset is {}", id, offset);

    try {
        fs = FileSystem.get(context.getConfiguration());
        outRemote = FileOutputFormat.getWorkOutputPath(context);
        log.debug("outRemote is {}", outRemote);
        outLocal = new Path("/tmp", context.getJobName() + "_" + context.getJobID() + "_" + taskAttemptID);
        fs.startLocalOutput(outRemote, outLocal);
    } catch (Exception e) {
        throw new TDBLoader4Exception(e);
    }
    Location location = new Location(outLocal.toString());
    init(location);

    counters = new Counters(context);
}

From source file:org.apache.sqoop.mapreduce.AvroImportMapper.java

License:Apache License

@Override
protected void setup(Context context) throws IOException, InterruptedException {
    Configuration conf = context.getConfiguration();
    schema = AvroJob.getMapOutputSchema(conf);
    lobLoader = new LargeObjectLoader(conf, FileOutputFormat.getWorkOutputPath(context));
    bigDecimalFormatString = conf.getBoolean(ImportJobBase.PROPERTY_BIGDECIMAL_FORMAT,
            ImportJobBase.PROPERTY_BIGDECIMAL_FORMAT_DEFAULT);
}

From source file:org.terrier.indexing.HadoopIndexerMapper.java

License:Mozilla Public License

@Override
protected void setup(Context context) throws IOException, InterruptedException {
    TerrierHDFSAdaptor.initialiseHDFSAdaptor(context.getConfiguration());

    proxyIndexer = createIndexer(context);

    currentContext = context;//from  www  .j  ava 2 s.  c  o m
    splitnum = getSplitNum(context);

    proxyIndexer.setFlushDelegate(this);

    final Path indexDestination = FileOutputFormat.getWorkOutputPath(context);
    indexDestination.getFileSystem(context.getConfiguration()).mkdirs(indexDestination);

    mapTaskID = getTaskID(context);
    proxyIndexer.currentIndex = Index.createNewIndex(indexDestination.toString(), mapTaskID);
    proxyIndexer.maxMemory = Long
            .parseLong(ApplicationSetup.getProperty("indexing.singlepass.max.postings.memory", "0"));

    // during reduce, we dont want to load indices into memory, as we only
    // use them as streams
    proxyIndexer.currentIndex.setIndexProperty("index.preloadIndices.disabled", "true");
    runData = new DataOutputStream(
            Files.writeFileStream(new Path(indexDestination, mapTaskID + ".runs").toString()));
    runData.writeUTF(mapTaskID);

    proxyIndexer.createMemoryPostings();
    proxyIndexer.docIndexBuilder = new DocumentIndexBuilder(proxyIndexer.currentIndex, "document");
    proxyIndexer.metaBuilder = createMetaIndexBuilder();
    proxyIndexer.emptyDocIndexEntry = (FieldScore.FIELDS_COUNT > 0)
            ? new FieldDocumentIndexEntry(FieldScore.FIELDS_COUNT)
            : new SimpleDocumentIndexEntry();

    final String filterFile = context.getConfiguration().get(HadoopIndexerOptions.DOCUMENT_ID_FILTER_FILE);
    synchronized (HadoopIndexerMapper.class) {
        if (filterFile != null) {
            if (filter == null) {
                logger.warn("Loading filter...");
                final Timer timer = Timer.timer();
                final FileSystem fs = FileSystem.get(context.getConfiguration());
                final Path p = new Path(filterFile);
                filter = new DocListFilter(fs, p);
                logger.warn("Took:" + timer.duration() / 1000f + "s");
            }
        }
    }
}

From source file:org.terrier.indexing.HadoopIndexerReducer.java

License:Mozilla Public License

@Override
protected void setup(Context context) throws IOException, InterruptedException {
    TerrierHDFSAdaptor.initialiseHDFSAdaptor(context.getConfiguration());

    proxyIndexer = createIndexer(context);

    //load in the current index
    final Path indexDestination = FileOutputFormat.getWorkOutputPath(context);

    reduceId = context.getTaskAttemptID().getTaskID().getId();
    proxyIndexer.path = indexDestination.toString();
    mutipleIndices = context.getConfiguration().getBoolean("indexing.hadoop.multiple.indices", true);

    if (context.getNumReduceTasks() > 1) {
        //gets the reduce number and suffices this to data
        proxyIndexer.prefix = ApplicationSetup.TERRIER_INDEX_PREFIX + "-" + reduceId;
    } else {/*from   w ww.  jav  a2s .c om*/
        proxyIndexer.prefix = ApplicationSetup.TERRIER_INDEX_PREFIX;
    }

    proxyIndexer.currentIndex = Index.createNewIndex(proxyIndexer.path, proxyIndexer.prefix);

    proxyIndexer.merger = createRunMerger();
}