List of usage examples for org.apache.hadoop.mapreduce TaskAttemptContext progress
public void progress();
From source file:com.flipkart.fdp.migration.distcp.core.MirrorUtils.java
License:Apache License
public static MD5Digester copy(InputStream input, OutputStream result, TaskAttemptContext context) throws IOException { byte[] buffer = new byte[65536]; // 8K=8192 12K=12288 64K=65536 long count = 0L; int n;// ww w. j a v a 2 s .c om long sts = System.currentTimeMillis(); MD5Digester digester = new MD5Digester(); while (-1 != (n = input.read(buffer))) { result.write(buffer, 0, n); digester.updateMd5digester(buffer, 0, n); count += n; if (count % 67108864 == 0) { System.out.println("Wrote 64M Data to Destination Total: " + count + ", Time Taken(ms): " + (System.currentTimeMillis() - sts)); sts = System.currentTimeMillis(); context.progress(); } } System.out.println( "Transfer Complete Total: " + count + ", Time Taken(ms): " + (System.currentTimeMillis() - sts)); return digester; }
From source file:com.linkedin.json.JsonSequenceFileOutputFormat.java
License:Apache License
@Override public RecordWriter<Object, Object> getRecordWriter(final TaskAttemptContext context) throws IOException, InterruptedException { // Shamelessly copy in hadoop code to allow us to set the metadata with our schema Configuration conf = context.getConfiguration(); CompressionCodec codec = null;/*w w w . j a v a 2 s . c o m*/ CompressionType compressionType = CompressionType.NONE; if (getCompressOutput(context)) { // find the kind of compression to do compressionType = SequenceFileOutputFormat.getOutputCompressionType(context); // find the right codec Class<?> codecClass = getOutputCompressorClass(context, DefaultCodec.class); codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, conf); } // get the path of the temporary output file Path file = getDefaultWorkFile(context, ""); FileSystem fs = file.getFileSystem(conf); final String keySchema = getSchema("output.key.schema", conf); final String valueSchema = getSchema("output.value.schema", conf); /* begin cheddar's stealing of jay's code */ SequenceFile.Metadata meta = new SequenceFile.Metadata(); meta.set(new Text("key.schema"), new Text(keySchema)); meta.set(new Text("value.schema"), new Text(valueSchema)); final SequenceFile.Writer out = SequenceFile.createWriter(fs, conf, file, context.getOutputKeyClass(), context.getOutputValueClass(), compressionType, codec, context, meta); /* end cheddar's stealing of jay's code */ final JsonTypeSerializer keySerializer = new JsonTypeSerializer(keySchema); final JsonTypeSerializer valueSerializer = new JsonTypeSerializer(valueSchema); return new RecordWriter<Object, Object>() { public void write(Object key, Object value) throws IOException { out.append(new BytesWritable(keySerializer.toBytes(key)), new BytesWritable(valueSerializer.toBytes(value))); context.progress(); } public void close(TaskAttemptContext context) throws IOException { out.close(); } }; }
From source file:com.netflix.aegisthus.tools.Utils.java
License:Apache License
public static void copy(Path from, Path to, boolean snappy, TaskAttemptContext ctx) throws IOException { FileSystem fromFs = from.getFileSystem(ctx.getConfiguration()); FileSystem toFs = to.getFileSystem(ctx.getConfiguration()); if (!to.isAbsolute()) { to = new Path(ctx.getConfiguration().get("mapred.working.dir"), to); }/*from w ww . j ava 2s . com*/ if (!snappy && onSameHdfs(ctx.getConfiguration(), from, to)) { LOG.info(String.format("renaming %s to %s", from, to)); toFs.mkdirs(to.getParent()); toFs.rename(from, to); return; } InputStream in = fromFs.open(from); OutputStream out = toFs.create(to, false); try { if (snappy) { in = new SnappyInputStream2(in); } byte[] buffer = new byte[65536]; int bytesRead; int count = 0; while ((bytesRead = in.read(buffer)) >= 0) { if (bytesRead > 0) { out.write(buffer, 0, bytesRead); } if (count++ % 50 == 0) { ctx.progress(); } } } finally { in.close(); out.close(); } }
From source file:com.practicalHadoop.outputformat.MultpleDirectories.FileOutputCommitter.java
License:Apache License
/** * Move the files from the work directory to the job output directory * @param context the task context/*from w w w . java 2s. co m*/ */ public void commitTask(TaskAttemptContext context) throws IOException { if (!fake || (committers.size() == 0)) { TaskAttemptID attemptId = context.getTaskAttemptID(); if (workPath != null) { context.progress(); if (outputFileSystem.exists(workPath)) { // Move the task outputs to their final place moveTaskOutputs(context, outputFileSystem, outputPath, workPath); // Delete the temporary task-specific output directory if (!outputFileSystem.delete(workPath, true)) { LOG.warn("Failed to delete the temporary output" + " directory of task: " + attemptId + " - " + workPath); } LOG.info("Saved output of task '" + attemptId + "' to " + outputPath); } } // commitJob(context); } else { for (FileOutputCommitter c : committers.values()) { c.commitTask(context); } } }
From source file:com.practicalHadoop.outputformat.MultpleDirectories.FileOutputCommitter.java
License:Apache License
/** * Move all of the files from the work directory to the final output * @param context the task context/*from w ww . jav a 2s .c o m*/ * @param fs the output file system * @param jobOutputDir the final output direcotry * @param taskOutput the work path * @throws IOException */ private void moveTaskOutputs(TaskAttemptContext context, FileSystem fs, Path jobOutputDir, Path taskOutput) throws IOException { TaskAttemptID attemptId = context.getTaskAttemptID(); context.progress(); if (fs.isFile(taskOutput)) { Path finalOutputPath = getFinalPath(jobOutputDir, taskOutput, workPath); if (!fs.rename(taskOutput, finalOutputPath)) { if (!fs.delete(finalOutputPath, true)) { throw new IOException("Failed to delete earlier output of task: " + attemptId); } if (!fs.rename(taskOutput, finalOutputPath)) { throw new IOException("Failed to save output of task: " + attemptId); } } LOG.debug("Moved " + taskOutput + " to " + finalOutputPath); } else if (fs.getFileStatus(taskOutput).isDir()) { FileStatus[] paths = fs.listStatus(taskOutput); Path finalOutputPath = getFinalPath(jobOutputDir, taskOutput, workPath); fs.mkdirs(finalOutputPath); if (paths != null) { for (FileStatus path : paths) { moveTaskOutputs(context, fs, jobOutputDir, path.getPath()); } } } }
From source file:com.practicalHadoop.outputformat.MultpleDirectories.FileOutputCommitter.java
License:Apache License
/** * Delete the work directory/*from w w w . jav a 2 s . c o m*/ */ @Override public void abortTask(TaskAttemptContext context) { if (!fake || (committers.size() == 0)) { try { if (workPath != null) { context.progress(); outputFileSystem.delete(workPath, true); } } catch (IOException ie) { LOG.warn("Error discarding output" + StringUtils.stringifyException(ie)); } } else { for (FileOutputCommitter c : committers.values()) { c.abortTask(context); } } }
From source file:com.zjy.mongo.output.MongoOutputCommitter.java
License:Apache License
@Override public void commitTask(final TaskAttemptContext taskContext) throws IOException { LOG.info("Committing task."); collections = MongoConfigUtil.getOutputCollections(taskContext.getConfiguration()); numberOfHosts = collections.size();/* w ww . ja v a 2 s . c o m*/ // Get temporary file. Path tempFilePath = getTaskAttemptPath(taskContext); LOG.info("Committing from temporary file: " + tempFilePath.toString()); long filePos = 0, fileLen; FSDataInputStream inputStream = null; try { FileSystem fs = FileSystem.get(taskContext.getConfiguration()); inputStream = fs.open(tempFilePath); fileLen = fs.getFileStatus(tempFilePath).getLen(); } catch (IOException e) { LOG.error("Could not open temporary file for committing", e); cleanupAfterCommit(inputStream, taskContext); throw e; } int maxDocs = MongoConfigUtil.getBatchSize(taskContext.getConfiguration()); int curBatchSize = 0; DBCollection coll = getDbCollectionByRoundRobin(); BulkWriteOperation bulkOp = coll.initializeOrderedBulkOperation(); // Read Writables out of the temporary file. BSONWritable bw = new BSONWritable(); MongoUpdateWritable muw = new MongoUpdateWritable(); while (filePos < fileLen) { try { // Determine writable type, and perform corresponding operation // on MongoDB. int mwType = inputStream.readInt(); if (MongoWritableTypes.BSON_WRITABLE == mwType) { bw.readFields(inputStream); bulkOp.insert(new BasicDBObject(bw.getDoc().toMap())); } else if (MongoWritableTypes.MONGO_UPDATE_WRITABLE == mwType) { muw.readFields(inputStream); DBObject query = new BasicDBObject(muw.getQuery().toMap()); DBObject modifiers = new BasicDBObject(muw.getModifiers().toMap()); if (muw.isMultiUpdate()) { if (muw.isUpsert()) { bulkOp.find(query).upsert().update(modifiers); } else { bulkOp.find(query).update(modifiers); } } else { if (muw.isUpsert()) { bulkOp.find(query).upsert().updateOne(modifiers); } else { bulkOp.find(query).updateOne(modifiers); } } } else { throw new IOException("Unrecognized type: " + mwType); } filePos = inputStream.getPos(); // Write to MongoDB if the batch is full, or if this is the last // operation to be performed for the Task. if (++curBatchSize >= maxDocs || filePos >= fileLen) { try { bulkOp.execute(); } catch (MongoException e) { LOG.error("Could not write to MongoDB", e); throw e; } coll = getDbCollectionByRoundRobin(); bulkOp = coll.initializeOrderedBulkOperation(); curBatchSize = 0; // Signal progress back to Hadoop framework so that we // don't time out. taskContext.progress(); } } catch (IOException e) { LOG.error("Error reading from temporary file", e); throw e; } } cleanupAfterCommit(inputStream, taskContext); }
From source file:edu.arizona.cs.hadoop.fs.irods.output.HirodsFileOutputCommitter.java
License:Apache License
/** * Move the files from the work directory to the job output directory * * @param context the task context/* ww w. ja v a2s. com*/ */ public void commitTask(TaskAttemptContext context) throws IOException { TaskAttemptID attemptId = context.getTaskAttemptID(); if (this.workPath != null) { context.progress(); if (this.workFileSystem.exists(this.workPath)) { // Move the task outputs to their final place moveTaskOutputsToIRODS(context, this.outputFileSystem, this.outputPath, this.workFileSystem, this.workPath); // Delete the temporary task-specific output directory if (!this.workFileSystem.delete(this.workPath, true)) { LOG.warn("Failed to delete the temporary output" + " directory of task: " + attemptId + " - " + this.workPath); } LOG.info("Saved output of task '" + attemptId + "' to " + this.outputPath); } } }
From source file:edu.arizona.cs.hadoop.fs.irods.output.HirodsFileOutputCommitter.java
License:Apache License
private void moveTaskOutputsToIRODS(TaskAttemptContext context, FileSystem outfs, Path outDir, FileSystem workfs, Path workOutput) throws IOException { context.progress(); if (workfs.isFile(workOutput)) { Path finalOutputPath = getFinalPath(outDir, workOutput, this.workPath); FSDataOutputStream irods_os = null; FSDataInputStream temp_is = null; try {/*from ww w . ja v a 2 s. c om*/ // commit to iRODS irods_os = outfs.create(finalOutputPath, true); temp_is = workfs.open(workOutput); byte[] buffer = new byte[100 * 1024]; int bytes_read = 0; while ((bytes_read = temp_is.read(buffer)) != -1) { irods_os.write(buffer, 0, bytes_read); } } finally { if (temp_is != null) { try { temp_is.close(); } catch (IOException ex) { // ignore exceptions } } // remove temporary file try { workfs.delete(workOutput, true); } catch (IOException ex) { // ignore exceptions } if (irods_os != null) { irods_os.close(); } } LOG.debug("Moved " + workOutput + " to " + finalOutputPath); } else if (workfs.getFileStatus(workOutput).isDir()) { FileStatus[] paths = workfs.listStatus(workOutput); Path finalOutputPath = getFinalPath(outDir, workOutput, this.workPath); outfs.mkdirs(finalOutputPath); if (paths != null) { for (FileStatus path : paths) { moveTaskOutputsToIRODS(context, outfs, outDir, workfs, path.getPath()); } } } }
From source file:edu.arizona.cs.hadoop.fs.irods.output.HirodsFileOutputCommitter.java
License:Apache License
/** * Delete the work directory//w ww .j ava 2 s . c om */ @Override public void abortTask(TaskAttemptContext context) { try { if (this.workPath != null) { context.progress(); this.workFileSystem.delete(this.workPath, true); } } catch (IOException ie) { LOG.warn("Error discarding output" + StringUtils.stringifyException(ie)); } }