List of usage examples for org.apache.hadoop.mapreduce.lib.output FileOutputFormat getPathForWorkFile
public static Path getPathForWorkFile(TaskInputOutputContext<?, ?, ?, ?> context, String name, String extension) throws IOException, InterruptedException
From source file:simsql.runtime.MapperWithPipeNetwork.java
License:Apache License
public void run(Context context) throws IOException, InterruptedException { // first we figure out which mapper this is String taskId = context.getConfiguration().get("mapred.task.id"); String[] parts = taskId.split("_"); int whichMapper = Integer.parseInt(parts[4]); // now we figure out the number of mappers that there are gonna be String fileToMap = context.getConfiguration().get("simsql.fileToMap", "noFile"); int numMappers; if (fileToMap.equals("noFile")) numMappers = -1;/*from w w w .j a v a 2 s .co m*/ else { Path path = new Path(fileToMap); FileSystem fs = FileSystem.get(context.getConfiguration()); FileStatus[] fstatus = fs.listStatus(path, new TableFileFilter()); numMappers = fstatus.length; } // set up the pipe network myNetwork = new NetworkProcessor("PipeNetwork.obj", whichMapper, numMappers); // now, check if this is a map-only job. Context myHookContext = context; if (context.getNumReduceTasks() == 0) { // if so, then we need to put some hooks on our context. myHookContext = new Wrapped<Nothing, Record, WritableKey, WritableValue>().getMapContext(context); // and check if there's statistics collection to do here. collectStats = context.getConfiguration().getBoolean("simsql.collectStats", false); } // set things up for the context setup(myHookContext); // loop through and process all of the input records while (context.nextKeyValue()) { pushOneRecord(myHookContext); } // clean everything up cleanup(myHookContext); // cleanup the network and send everyone on Record r; Nothing garbage = new Nothing(); while ((r = myNetwork.cleanupNetwork()) != null) { context.write(garbage, r); } if (collectStats) { coll.save(FileOutputFormat.getPathForWorkFile(context, "part", ".stats").toUri().getPath()); } }
From source file:simsql.runtime.ReducerWithPostProcessing.java
License:Apache License
public void run(Context context) throws IOException, InterruptedException { int numReducers = context.getNumReduceTasks(); int whichReducer = context.getTaskAttemptID().getTaskID().getId(); // set up the pipe network myNetwork = new NetworkProcessor("PipeNetwork.obj", whichReducer, numReducers); // set up our 'hook' context. Context myHookContext = new Wrapped<RecordKey, RecordWrapper, simsql.runtime.WritableKey, simsql.runtime.WritableValue>() .getReducerContext(context); // get ready to collect the stats collectStats = context.getConfiguration().getBoolean("simsql.collectStats", false); // do all of the reducing setup(myHookContext);//from w w w. ja v a 2 s . c o m while (context.nextKey()) { reduce(context.getCurrentKey(), context.getValues(), myHookContext); } // clean everything up cleanup(myHookContext); // send any extra junk from the netowrk thru the reducer Nothing garbage = new Nothing(); Record r; while ((r = myNetwork.cleanupNetwork()) != null) { context.write(garbage, r); } if (collectStats) { coll.save(FileOutputFormat.getPathForWorkFile(context, "part", ".stats").toUri().getPath()); } }