Example usage for org.apache.hadoop.mapreduce.lib.output FileOutputFormat getPathForWorkFile

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce.lib.output FileOutputFormat getPathForWorkFile.

Prototype

public static Path getPathForWorkFile(TaskInputOutputContext<?, ?, ?, ?> context, String name, String extension)
        throws IOException, InterruptedException

Source Link

Document

Helper function to generate a Path for a file that is unique for the task within the job output directory.

Usage

From source file:simsql.runtime.MapperWithPipeNetwork.java

License:Apache License

public void run(Context context) throws IOException, InterruptedException {

    // first we figure out which mapper this is
    String taskId = context.getConfiguration().get("mapred.task.id");
    String[] parts = taskId.split("_");
    int whichMapper = Integer.parseInt(parts[4]);

    // now we figure out the number of mappers that there are gonna be
    String fileToMap = context.getConfiguration().get("simsql.fileToMap", "noFile");
    int numMappers;
    if (fileToMap.equals("noFile"))
        numMappers = -1;/*from  w  w w  .j a v  a 2  s  .co  m*/
    else {
        Path path = new Path(fileToMap);
        FileSystem fs = FileSystem.get(context.getConfiguration());
        FileStatus[] fstatus = fs.listStatus(path, new TableFileFilter());
        numMappers = fstatus.length;
    }

    // set up the pipe network
    myNetwork = new NetworkProcessor("PipeNetwork.obj", whichMapper, numMappers);

    // now, check if this is a map-only job.
    Context myHookContext = context;
    if (context.getNumReduceTasks() == 0) {

        // if so, then we need to put some hooks on our context.
        myHookContext = new Wrapped<Nothing, Record, WritableKey, WritableValue>().getMapContext(context);

        // and check if there's statistics collection to do here.
        collectStats = context.getConfiguration().getBoolean("simsql.collectStats", false);
    }

    // set things up for the context
    setup(myHookContext);

    // loop through and process all of the input records
    while (context.nextKeyValue()) {
        pushOneRecord(myHookContext);
    }

    // clean everything up
    cleanup(myHookContext);

    // cleanup the network and send everyone on
    Record r;
    Nothing garbage = new Nothing();
    while ((r = myNetwork.cleanupNetwork()) != null) {
        context.write(garbage, r);
    }

    if (collectStats) {
        coll.save(FileOutputFormat.getPathForWorkFile(context, "part", ".stats").toUri().getPath());
    }
}

From source file:simsql.runtime.ReducerWithPostProcessing.java

License:Apache License

public void run(Context context) throws IOException, InterruptedException {

    int numReducers = context.getNumReduceTasks();
    int whichReducer = context.getTaskAttemptID().getTaskID().getId();

    // set up the pipe network
    myNetwork = new NetworkProcessor("PipeNetwork.obj", whichReducer, numReducers);

    // set up our 'hook' context.
    Context myHookContext = new Wrapped<RecordKey, RecordWrapper, simsql.runtime.WritableKey, simsql.runtime.WritableValue>()
            .getReducerContext(context);

    // get ready to collect the stats
    collectStats = context.getConfiguration().getBoolean("simsql.collectStats", false);

    // do all of the reducing
    setup(myHookContext);//from w w w. ja  v  a 2  s  . c  o  m
    while (context.nextKey()) {
        reduce(context.getCurrentKey(), context.getValues(), myHookContext);
    }

    // clean everything up 
    cleanup(myHookContext);

    // send any extra junk from the netowrk thru the reducer
    Nothing garbage = new Nothing();
    Record r;
    while ((r = myNetwork.cleanupNetwork()) != null) {
        context.write(garbage, r);
    }

    if (collectStats) {
        coll.save(FileOutputFormat.getPathForWorkFile(context, "part", ".stats").toUri().getPath());
    }
}