Example usage for org.apache.hadoop.mapreduce TaskAttemptContext progress

List of usage examples for org.apache.hadoop.mapreduce TaskAttemptContext progress

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce TaskAttemptContext progress.

Prototype

public void progress();

Source Link

Document

Report progress to the Hadoop framework.

Usage

From source file:com.flipkart.fdp.migration.distcp.core.MirrorUtils.java

License:Apache License

public static MD5Digester copy(InputStream input, OutputStream result, TaskAttemptContext context)
        throws IOException {

    byte[] buffer = new byte[65536]; // 8K=8192 12K=12288 64K=65536
    long count = 0L;
    int n;//  ww  w. j a v a  2  s  .c  om

    long sts = System.currentTimeMillis();
    MD5Digester digester = new MD5Digester();

    while (-1 != (n = input.read(buffer))) {

        result.write(buffer, 0, n);

        digester.updateMd5digester(buffer, 0, n);
        count += n;

        if (count % 67108864 == 0) {
            System.out.println("Wrote 64M Data to Destination Total: " + count + ", Time Taken(ms): "
                    + (System.currentTimeMillis() - sts));

            sts = System.currentTimeMillis();
            context.progress();
        }
    }

    System.out.println(
            "Transfer Complete Total: " + count + ", Time Taken(ms): " + (System.currentTimeMillis() - sts));
    return digester;
}

From source file:com.linkedin.json.JsonSequenceFileOutputFormat.java

License:Apache License

@Override
public RecordWriter<Object, Object> getRecordWriter(final TaskAttemptContext context)
        throws IOException, InterruptedException {
    // Shamelessly copy in hadoop code to allow us to set the metadata with our schema

    Configuration conf = context.getConfiguration();

    CompressionCodec codec = null;/*w w  w . j  a v  a 2 s  . c o m*/
    CompressionType compressionType = CompressionType.NONE;
    if (getCompressOutput(context)) {
        // find the kind of compression to do
        compressionType = SequenceFileOutputFormat.getOutputCompressionType(context);

        // find the right codec
        Class<?> codecClass = getOutputCompressorClass(context, DefaultCodec.class);
        codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, conf);
    }
    // get the path of the temporary output file
    Path file = getDefaultWorkFile(context, "");
    FileSystem fs = file.getFileSystem(conf);

    final String keySchema = getSchema("output.key.schema", conf);
    final String valueSchema = getSchema("output.value.schema", conf);

    /* begin cheddar's stealing of jay's code */
    SequenceFile.Metadata meta = new SequenceFile.Metadata();

    meta.set(new Text("key.schema"), new Text(keySchema));
    meta.set(new Text("value.schema"), new Text(valueSchema));

    final SequenceFile.Writer out = SequenceFile.createWriter(fs, conf, file, context.getOutputKeyClass(),
            context.getOutputValueClass(), compressionType, codec, context, meta);
    /* end cheddar's stealing of jay's code */

    final JsonTypeSerializer keySerializer = new JsonTypeSerializer(keySchema);
    final JsonTypeSerializer valueSerializer = new JsonTypeSerializer(valueSchema);

    return new RecordWriter<Object, Object>() {

        public void write(Object key, Object value) throws IOException {

            out.append(new BytesWritable(keySerializer.toBytes(key)),
                    new BytesWritable(valueSerializer.toBytes(value)));
            context.progress();
        }

        public void close(TaskAttemptContext context) throws IOException {
            out.close();
        }
    };
}

From source file:com.netflix.aegisthus.tools.Utils.java

License:Apache License

public static void copy(Path from, Path to, boolean snappy, TaskAttemptContext ctx) throws IOException {
    FileSystem fromFs = from.getFileSystem(ctx.getConfiguration());
    FileSystem toFs = to.getFileSystem(ctx.getConfiguration());

    if (!to.isAbsolute()) {
        to = new Path(ctx.getConfiguration().get("mapred.working.dir"), to);
    }/*from w ww  .  j  ava  2s .  com*/
    if (!snappy && onSameHdfs(ctx.getConfiguration(), from, to)) {
        LOG.info(String.format("renaming %s to %s", from, to));
        toFs.mkdirs(to.getParent());
        toFs.rename(from, to);
        return;
    }

    InputStream in = fromFs.open(from);
    OutputStream out = toFs.create(to, false);
    try {
        if (snappy) {
            in = new SnappyInputStream2(in);
        }
        byte[] buffer = new byte[65536];
        int bytesRead;
        int count = 0;
        while ((bytesRead = in.read(buffer)) >= 0) {
            if (bytesRead > 0) {
                out.write(buffer, 0, bytesRead);
            }
            if (count++ % 50 == 0) {
                ctx.progress();
            }
        }
    } finally {
        in.close();
        out.close();
    }
}

From source file:com.practicalHadoop.outputformat.MultpleDirectories.FileOutputCommitter.java

License:Apache License

/**
 * Move the files from the work directory to the job output directory
 * @param context the task context/*from w  w  w .  java  2s. co  m*/
 */
public void commitTask(TaskAttemptContext context) throws IOException {
    if (!fake || (committers.size() == 0)) {
        TaskAttemptID attemptId = context.getTaskAttemptID();
        if (workPath != null) {
            context.progress();
            if (outputFileSystem.exists(workPath)) {
                // Move the task outputs to their final place
                moveTaskOutputs(context, outputFileSystem, outputPath, workPath);
                // Delete the temporary task-specific output directory
                if (!outputFileSystem.delete(workPath, true)) {
                    LOG.warn("Failed to delete the temporary output" + " directory of task: " + attemptId
                            + " - " + workPath);
                }
                LOG.info("Saved output of task '" + attemptId + "' to " + outputPath);
            }
        }
        //        commitJob(context);
    } else {
        for (FileOutputCommitter c : committers.values()) {
            c.commitTask(context);
        }
    }
}

From source file:com.practicalHadoop.outputformat.MultpleDirectories.FileOutputCommitter.java

License:Apache License

/**
 * Move all of the files from the work directory to the final output
 * @param context the task context/*from  w ww  .  jav a 2s .c  o m*/
 * @param fs the output file system
 * @param jobOutputDir the final output direcotry
 * @param taskOutput the work path
 * @throws IOException
 */
private void moveTaskOutputs(TaskAttemptContext context, FileSystem fs, Path jobOutputDir, Path taskOutput)
        throws IOException {
    TaskAttemptID attemptId = context.getTaskAttemptID();
    context.progress();
    if (fs.isFile(taskOutput)) {
        Path finalOutputPath = getFinalPath(jobOutputDir, taskOutput, workPath);
        if (!fs.rename(taskOutput, finalOutputPath)) {
            if (!fs.delete(finalOutputPath, true)) {
                throw new IOException("Failed to delete earlier output of task: " + attemptId);
            }
            if (!fs.rename(taskOutput, finalOutputPath)) {
                throw new IOException("Failed to save output of task: " + attemptId);
            }
        }
        LOG.debug("Moved " + taskOutput + " to " + finalOutputPath);
    } else if (fs.getFileStatus(taskOutput).isDir()) {
        FileStatus[] paths = fs.listStatus(taskOutput);
        Path finalOutputPath = getFinalPath(jobOutputDir, taskOutput, workPath);
        fs.mkdirs(finalOutputPath);
        if (paths != null) {
            for (FileStatus path : paths) {
                moveTaskOutputs(context, fs, jobOutputDir, path.getPath());
            }
        }
    }
}

From source file:com.practicalHadoop.outputformat.MultpleDirectories.FileOutputCommitter.java

License:Apache License

/**
 * Delete the work directory/*from w w w . jav a 2  s  . c o m*/
 */
@Override
public void abortTask(TaskAttemptContext context) {
    if (!fake || (committers.size() == 0)) {
        try {
            if (workPath != null) {
                context.progress();
                outputFileSystem.delete(workPath, true);
            }
        } catch (IOException ie) {
            LOG.warn("Error discarding output" + StringUtils.stringifyException(ie));
        }
    } else {
        for (FileOutputCommitter c : committers.values()) {
            c.abortTask(context);
        }
    }
}

From source file:com.zjy.mongo.output.MongoOutputCommitter.java

License:Apache License

@Override
public void commitTask(final TaskAttemptContext taskContext) throws IOException {
    LOG.info("Committing task.");

    collections = MongoConfigUtil.getOutputCollections(taskContext.getConfiguration());
    numberOfHosts = collections.size();/*  w  ww .  ja  v  a  2 s  . c  o m*/

    // Get temporary file.
    Path tempFilePath = getTaskAttemptPath(taskContext);
    LOG.info("Committing from temporary file: " + tempFilePath.toString());
    long filePos = 0, fileLen;
    FSDataInputStream inputStream = null;
    try {
        FileSystem fs = FileSystem.get(taskContext.getConfiguration());
        inputStream = fs.open(tempFilePath);
        fileLen = fs.getFileStatus(tempFilePath).getLen();
    } catch (IOException e) {
        LOG.error("Could not open temporary file for committing", e);
        cleanupAfterCommit(inputStream, taskContext);
        throw e;
    }

    int maxDocs = MongoConfigUtil.getBatchSize(taskContext.getConfiguration());
    int curBatchSize = 0;
    DBCollection coll = getDbCollectionByRoundRobin();
    BulkWriteOperation bulkOp = coll.initializeOrderedBulkOperation();

    // Read Writables out of the temporary file.
    BSONWritable bw = new BSONWritable();
    MongoUpdateWritable muw = new MongoUpdateWritable();
    while (filePos < fileLen) {
        try {
            // Determine writable type, and perform corresponding operation
            // on MongoDB.
            int mwType = inputStream.readInt();
            if (MongoWritableTypes.BSON_WRITABLE == mwType) {
                bw.readFields(inputStream);
                bulkOp.insert(new BasicDBObject(bw.getDoc().toMap()));
            } else if (MongoWritableTypes.MONGO_UPDATE_WRITABLE == mwType) {
                muw.readFields(inputStream);
                DBObject query = new BasicDBObject(muw.getQuery().toMap());
                DBObject modifiers = new BasicDBObject(muw.getModifiers().toMap());
                if (muw.isMultiUpdate()) {
                    if (muw.isUpsert()) {
                        bulkOp.find(query).upsert().update(modifiers);
                    } else {
                        bulkOp.find(query).update(modifiers);
                    }
                } else {
                    if (muw.isUpsert()) {
                        bulkOp.find(query).upsert().updateOne(modifiers);
                    } else {
                        bulkOp.find(query).updateOne(modifiers);
                    }
                }
            } else {
                throw new IOException("Unrecognized type: " + mwType);
            }
            filePos = inputStream.getPos();
            // Write to MongoDB if the batch is full, or if this is the last
            // operation to be performed for the Task.
            if (++curBatchSize >= maxDocs || filePos >= fileLen) {
                try {
                    bulkOp.execute();
                } catch (MongoException e) {
                    LOG.error("Could not write to MongoDB", e);
                    throw e;
                }
                coll = getDbCollectionByRoundRobin();
                bulkOp = coll.initializeOrderedBulkOperation();
                curBatchSize = 0;

                // Signal progress back to Hadoop framework so that we
                // don't time out.
                taskContext.progress();
            }
        } catch (IOException e) {
            LOG.error("Error reading from temporary file", e);
            throw e;
        }
    }

    cleanupAfterCommit(inputStream, taskContext);
}

From source file:edu.arizona.cs.hadoop.fs.irods.output.HirodsFileOutputCommitter.java

License:Apache License

/**
 * Move the files from the work directory to the job output directory
 *
 * @param context the task context/* ww  w. ja v a2s. com*/
 */
public void commitTask(TaskAttemptContext context) throws IOException {
    TaskAttemptID attemptId = context.getTaskAttemptID();
    if (this.workPath != null) {
        context.progress();
        if (this.workFileSystem.exists(this.workPath)) {
            // Move the task outputs to their final place
            moveTaskOutputsToIRODS(context, this.outputFileSystem, this.outputPath, this.workFileSystem,
                    this.workPath);
            // Delete the temporary task-specific output directory
            if (!this.workFileSystem.delete(this.workPath, true)) {
                LOG.warn("Failed to delete the temporary output" + " directory of task: " + attemptId + " - "
                        + this.workPath);
            }
            LOG.info("Saved output of task '" + attemptId + "' to " + this.outputPath);
        }
    }
}

From source file:edu.arizona.cs.hadoop.fs.irods.output.HirodsFileOutputCommitter.java

License:Apache License

private void moveTaskOutputsToIRODS(TaskAttemptContext context, FileSystem outfs, Path outDir,
        FileSystem workfs, Path workOutput) throws IOException {
    context.progress();
    if (workfs.isFile(workOutput)) {
        Path finalOutputPath = getFinalPath(outDir, workOutput, this.workPath);
        FSDataOutputStream irods_os = null;
        FSDataInputStream temp_is = null;
        try {/*from  ww w  . ja  v a 2 s. c om*/
            // commit to iRODS
            irods_os = outfs.create(finalOutputPath, true);
            temp_is = workfs.open(workOutput);

            byte[] buffer = new byte[100 * 1024];
            int bytes_read = 0;

            while ((bytes_read = temp_is.read(buffer)) != -1) {
                irods_os.write(buffer, 0, bytes_read);
            }
        } finally {
            if (temp_is != null) {
                try {
                    temp_is.close();
                } catch (IOException ex) {
                    // ignore exceptions
                }
            }

            // remove temporary file
            try {
                workfs.delete(workOutput, true);
            } catch (IOException ex) {
                // ignore exceptions
            }

            if (irods_os != null) {
                irods_os.close();
            }
        }

        LOG.debug("Moved " + workOutput + " to " + finalOutputPath);
    } else if (workfs.getFileStatus(workOutput).isDir()) {
        FileStatus[] paths = workfs.listStatus(workOutput);
        Path finalOutputPath = getFinalPath(outDir, workOutput, this.workPath);
        outfs.mkdirs(finalOutputPath);
        if (paths != null) {
            for (FileStatus path : paths) {
                moveTaskOutputsToIRODS(context, outfs, outDir, workfs, path.getPath());
            }
        }
    }
}

From source file:edu.arizona.cs.hadoop.fs.irods.output.HirodsFileOutputCommitter.java

License:Apache License

/**
 * Delete the work directory//w ww  .j  ava 2  s  . c  om
 */
@Override
public void abortTask(TaskAttemptContext context) {
    try {
        if (this.workPath != null) {
            context.progress();
            this.workFileSystem.delete(this.workPath, true);
        }
    } catch (IOException ie) {
        LOG.warn("Error discarding output" + StringUtils.stringifyException(ie));
    }
}