Example usage for org.apache.hadoop.mapreduce.lib.output FileOutputFormat getPathForWorkFile

List of usage examples for org.apache.hadoop.mapreduce.lib.output FileOutputFormat getPathForWorkFile

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce.lib.output FileOutputFormat getPathForWorkFile.

Prototype

public static Path getPathForWorkFile(TaskInputOutputContext<?, ?, ?, ?> context, String name, String extension)
        throws IOException, InterruptedException 

Source Link

Document

Helper function to generate a Path for a file that is unique for the task within the job output directory.

Usage

From source file:simsql.runtime.MapperWithPipeNetwork.java

License:Apache License

public void run(Context context) throws IOException, InterruptedException {

    // first we figure out which mapper this is
    String taskId = context.getConfiguration().get("mapred.task.id");
    String[] parts = taskId.split("_");
    int whichMapper = Integer.parseInt(parts[4]);

    // now we figure out the number of mappers that there are gonna be
    String fileToMap = context.getConfiguration().get("simsql.fileToMap", "noFile");
    int numMappers;
    if (fileToMap.equals("noFile"))
        numMappers = -1;/*from  w  w w  .j a v  a 2  s  .co  m*/
    else {
        Path path = new Path(fileToMap);
        FileSystem fs = FileSystem.get(context.getConfiguration());
        FileStatus[] fstatus = fs.listStatus(path, new TableFileFilter());
        numMappers = fstatus.length;
    }

    // set up the pipe network
    myNetwork = new NetworkProcessor("PipeNetwork.obj", whichMapper, numMappers);

    // now, check if this is a map-only job.
    Context myHookContext = context;
    if (context.getNumReduceTasks() == 0) {

        // if so, then we need to put some hooks on our context.
        myHookContext = new Wrapped<Nothing, Record, WritableKey, WritableValue>().getMapContext(context);

        // and check if there's statistics collection to do here.
        collectStats = context.getConfiguration().getBoolean("simsql.collectStats", false);
    }

    // set things up for the context
    setup(myHookContext);

    // loop through and process all of the input records
    while (context.nextKeyValue()) {
        pushOneRecord(myHookContext);
    }

    // clean everything up
    cleanup(myHookContext);

    // cleanup the network and send everyone on
    Record r;
    Nothing garbage = new Nothing();
    while ((r = myNetwork.cleanupNetwork()) != null) {
        context.write(garbage, r);
    }

    if (collectStats) {
        coll.save(FileOutputFormat.getPathForWorkFile(context, "part", ".stats").toUri().getPath());
    }
}

From source file:simsql.runtime.ReducerWithPostProcessing.java

License:Apache License

public void run(Context context) throws IOException, InterruptedException {

    int numReducers = context.getNumReduceTasks();
    int whichReducer = context.getTaskAttemptID().getTaskID().getId();

    // set up the pipe network
    myNetwork = new NetworkProcessor("PipeNetwork.obj", whichReducer, numReducers);

    // set up our 'hook' context.
    Context myHookContext = new Wrapped<RecordKey, RecordWrapper, simsql.runtime.WritableKey, simsql.runtime.WritableValue>()
            .getReducerContext(context);

    // get ready to collect the stats
    collectStats = context.getConfiguration().getBoolean("simsql.collectStats", false);

    // do all of the reducing
    setup(myHookContext);//from w w w. ja  v  a 2  s  . c  o  m
    while (context.nextKey()) {
        reduce(context.getCurrentKey(), context.getValues(), myHookContext);
    }

    // clean everything up 
    cleanup(myHookContext);

    // send any extra junk from the netowrk thru the reducer
    Nothing garbage = new Nothing();
    Record r;
    while ((r = myNetwork.cleanupNetwork()) != null) {
        context.write(garbage, r);
    }

    if (collectStats) {
        coll.save(FileOutputFormat.getPathForWorkFile(context, "part", ".stats").toUri().getPath());
    }
}