Example usage for org.apache.hadoop.mapreduce TaskAttemptContext progress

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce TaskAttemptContext progress.

Prototype

public void progress();

Source Link

Document

Report progress to the Hadoop framework.

Usage

From source file:com.flipkart.fdp.migration.distcp.core.MirrorUtils.java

License:Apache License

public static MD5Digester copy(InputStream input, OutputStream result, TaskAttemptContext context)
        throws IOException {

    byte[] buffer = new byte[65536]; // 8K=8192 12K=12288 64K=65536
    long count = 0L;
    int n;//  ww  w. j a v a  2  s  .c  om

    long sts = System.currentTimeMillis();
    MD5Digester digester = new MD5Digester();

    while (-1 != (n = input.read(buffer))) {

        result.write(buffer, 0, n);

        digester.updateMd5digester(buffer, 0, n);
        count += n;

        if (count % 67108864 == 0) {
            System.out.println("Wrote 64M Data to Destination Total: " + count + ", Time Taken(ms): "
                    + (System.currentTimeMillis() - sts));

            sts = System.currentTimeMillis();
            context.progress();
        }
    }

    System.out.println(
            "Transfer Complete Total: " + count + ", Time Taken(ms): " + (System.currentTimeMillis() - sts));
    return digester;
}

From source file:com.linkedin.json.JsonSequenceFileOutputFormat.java

License:Apache License

@Override
public RecordWriter<Object, Object> getRecordWriter(final TaskAttemptContext context)
        throws IOException, InterruptedException {
    // Shamelessly copy in hadoop code to allow us to set the metadata with our schema

    Configuration conf = context.getConfiguration();

    CompressionCodec codec = null;/*w w  w . j  a v  a 2 s  . c o m*/
    CompressionType compressionType = CompressionType.NONE;
    if (getCompressOutput(context)) {
        // find the kind of compression to do
        compressionType = SequenceFileOutputFormat.getOutputCompressionType(context);

        // find the right codec
        Class<?> codecClass = getOutputCompressorClass(context, DefaultCodec.class);
        codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, conf);
    }
    // get the path of the temporary output file
    Path file = getDefaultWorkFile(context, "");
    FileSystem fs = file.getFileSystem(conf);

    final String keySchema = getSchema("output.key.schema", conf);
    final String valueSchema = getSchema("output.value.schema", conf);

    /* begin cheddar's stealing of jay's code */
    SequenceFile.Metadata meta = new SequenceFile.Metadata();

    meta.set(new Text("key.schema"), new Text(keySchema));
    meta.set(new Text("value.schema"), new Text(valueSchema));

    final SequenceFile.Writer out = SequenceFile.createWriter(fs, conf, file, context.getOutputKeyClass(),
            context.getOutputValueClass(), compressionType, codec, context, meta);
    /* end cheddar's stealing of jay's code */

    final JsonTypeSerializer keySerializer = new JsonTypeSerializer(keySchema);
    final JsonTypeSerializer valueSerializer = new JsonTypeSerializer(valueSchema);

    return new RecordWriter<Object, Object>() {

        public void write(Object key, Object value) throws IOException {

            out.append(new BytesWritable(keySerializer.toBytes(key)),
                    new BytesWritable(valueSerializer.toBytes(value)));
            context.progress();
        }

        public void close(TaskAttemptContext context) throws IOException {
            out.close();
        }
    };
}

From source file:com.netflix.aegisthus.tools.Utils.java

License:Apache License

public static void copy(Path from, Path to, boolean snappy, TaskAttemptContext ctx) throws IOException {
    FileSystem fromFs = from.getFileSystem(ctx.getConfiguration());
    FileSystem toFs = to.getFileSystem(ctx.getConfiguration());

    if (!to.isAbsolute()) {
        to = new Path(ctx.getConfiguration().get("mapred.working.dir"), to);
    }/*from w ww  .  j  ava  2s .  com*/
    if (!snappy && onSameHdfs(ctx.getConfiguration(), from, to)) {
        LOG.info(String.format("renaming %s to %s", from, to));
        toFs.mkdirs(to.getParent());
        toFs.rename(from, to);
        return;
    }

    InputStream in = fromFs.open(from);
    OutputStream out = toFs.create(to, false);
    try {
        if (snappy) {
            in = new SnappyInputStream2(in);
        }
        byte[] buffer = new byte[65536];
        int bytesRead;
        int count = 0;
        while ((bytesRead = in.read(buffer)) >= 0) {
            if (bytesRead > 0) {
                out.write(buffer, 0, bytesRead);
            }
            if (count++ % 50 == 0) {
                ctx.progress();
            }
        }
    } finally {
        in.close();
        out.close();
    }
}

From source file:com.practicalHadoop.outputformat.MultpleDirectories.FileOutputCommitter.java

License:Apache License

/**
 * Move the files from the work directory to the job output directory
 * @param context the task context/*from w  w  w .  java  2s. co  m*/
 */
public void commitTask(TaskAttemptContext context) throws IOException {
    if (!fake || (committers.size() == 0)) {
        TaskAttemptID attemptId = context.getTaskAttemptID();
        if (workPath != null) {
            context.progress();
            if (outputFileSystem.exists(workPath)) {
                // Move the task outputs to their final place
                moveTaskOutputs(context, outputFileSystem, outputPath, workPath);
                // Delete the temporary task-specific output directory
                if (!outputFileSystem.delete(workPath, true)) {
                    LOG.warn("Failed to delete the temporary output" + " directory of task: " + attemptId
                            + " - " + workPath);
                }
                LOG.info("Saved output of task '" + attemptId + "' to " + outputPath);
            }
        }
        //        commitJob(context);
    } else {
        for (FileOutputCommitter c : committers.values()) {
            c.commitTask(context);
        }
    }
}

From source file:com.practicalHadoop.outputformat.MultpleDirectories.FileOutputCommitter.java

License:Apache License

/**
 * Move all of the files from the work directory to the final output
 * @param context the task context/*from  w ww  .  jav a 2s .c  o m*/
 * @param fs the output file system
 * @param jobOutputDir the final output direcotry
 * @param taskOutput the work path
 * @throws IOException
 */
private void moveTaskOutputs(TaskAttemptContext context, FileSystem fs, Path jobOutputDir, Path taskOutput)
        throws IOException {
    TaskAttemptID attemptId = context.getTaskAttemptID();
    context.progress();
    if (fs.isFile(taskOutput)) {
        Path finalOutputPath = getFinalPath(jobOutputDir, taskOutput, workPath);
        if (!fs.rename(taskOutput, finalOutputPath)) {
            if (!fs.delete(finalOutputPath, true)) {
                throw new IOException("Failed to delete earlier output of task: " + attemptId);
            }
            if (!fs.rename(taskOutput, finalOutputPath)) {
                throw new IOException("Failed to save output of task: " + attemptId);
            }
        }
        LOG.debug("Moved " + taskOutput + " to " + finalOutputPath);
    } else if (fs.getFileStatus(taskOutput).isDir()) {
        FileStatus[] paths = fs.listStatus(taskOutput);
        Path finalOutputPath = getFinalPath(jobOutputDir, taskOutput, workPath);
        fs.mkdirs(finalOutputPath);
        if (paths != null) {
            for (FileStatus path : paths) {
                moveTaskOutputs(context, fs, jobOutputDir, path.getPath());
            }
        }
    }
}

From source file:com.practicalHadoop.outputformat.MultpleDirectories.FileOutputCommitter.java

License:Apache License

/**
 * Delete the work directory/*from w w w . jav a 2  s  . c o m*/
 */
@Override
public void abortTask(TaskAttemptContext context) {
    if (!fake || (committers.size() == 0)) {
        try {
            if (workPath != null) {
                context.progress();
                outputFileSystem.delete(workPath, true);
            }
        } catch (IOException ie) {
            LOG.warn("Error discarding output" + StringUtils.stringifyException(ie));
        }
    } else {
        for (FileOutputCommitter c : committers.values()) {
            c.abortTask(context);
        }
    }
}

From source file:com.zjy.mongo.output.MongoOutputCommitter.java

License:Apache License

@Override
public void commitTask(final TaskAttemptContext taskContext) throws IOException {
    LOG.info("Committing task.");

    collections = MongoConfigUtil.getOutputCollections(taskContext.getConfiguration());
    numberOfHosts = collections.size();/*  w  ww .  ja  v  a  2 s  . c  o m*/

    // Get temporary file.
    Path tempFilePath = getTaskAttemptPath(taskContext);
    LOG.info("Committing from temporary file: " + tempFilePath.toString());
    long filePos = 0, fileLen;
    FSDataInputStream inputStream = null;
    try {
        FileSystem fs = FileSystem.get(taskContext.getConfiguration());
        inputStream = fs.open(tempFilePath);
        fileLen = fs.getFileStatus(tempFilePath).getLen();
    } catch (IOException e) {
        LOG.error("Could not open temporary file for committing", e);
        cleanupAfterCommit(inputStream, taskContext);
        throw e;
    }

    int maxDocs = MongoConfigUtil.getBatchSize(taskContext.getConfiguration());
    int curBatchSize = 0;
    DBCollection coll = getDbCollectionByRoundRobin();
    BulkWriteOperation bulkOp = coll.initializeOrderedBulkOperation();

    // Read Writables out of the temporary file.
    BSONWritable bw = new BSONWritable();
    MongoUpdateWritable muw = new MongoUpdateWritable();
    while (filePos < fileLen) {
        try {
            // Determine writable type, and perform corresponding operation
            // on MongoDB.
            int mwType = inputStream.readInt();
            if (MongoWritableTypes.BSON_WRITABLE == mwType) {
                bw.readFields(inputStream);
                bulkOp.insert(new BasicDBObject(bw.getDoc().toMap()));
            } else if (MongoWritableTypes.MONGO_UPDATE_WRITABLE == mwType) {
                muw.readFields(inputStream);
                DBObject query = new BasicDBObject(muw.getQuery().toMap());
                DBObject modifiers = new BasicDBObject(muw.getModifiers().toMap());
                if (muw.isMultiUpdate()) {
                    if (muw.isUpsert()) {
                        bulkOp.find(query).upsert().update(modifiers);
                    } else {
                        bulkOp.find(query).update(modifiers);
                    }
                } else {
                    if (muw.isUpsert()) {
                        bulkOp.find(query).upsert().updateOne(modifiers);
                    } else {
                        bulkOp.find(query).updateOne(modifiers);
                    }
                }
            } else {
                throw new IOException("Unrecognized type: " + mwType);
            }
            filePos = inputStream.getPos();
            // Write to MongoDB if the batch is full, or if this is the last
            // operation to be performed for the Task.
            if (++curBatchSize >= maxDocs || filePos >= fileLen) {
                try {
                    bulkOp.execute();
                } catch (MongoException e) {
                    LOG.error("Could not write to MongoDB", e);
                    throw e;
                }
                coll = getDbCollectionByRoundRobin();
                bulkOp = coll.initializeOrderedBulkOperation();
                curBatchSize = 0;

                // Signal progress back to Hadoop framework so that we
                // don't time out.
                taskContext.progress();
            }
        } catch (IOException e) {
            LOG.error("Error reading from temporary file", e);
            throw e;
        }
    }

    cleanupAfterCommit(inputStream, taskContext);
}

From source file:edu.arizona.cs.hadoop.fs.irods.output.HirodsFileOutputCommitter.java

License:Apache License

/**
 * Move the files from the work directory to the job output directory
 *
 * @param context the task context/* ww  w. ja v a2s. com*/
 */
public void commitTask(TaskAttemptContext context) throws IOException {
    TaskAttemptID attemptId = context.getTaskAttemptID();
    if (this.workPath != null) {
        context.progress();
        if (this.workFileSystem.exists(this.workPath)) {
            // Move the task outputs to their final place
            moveTaskOutputsToIRODS(context, this.outputFileSystem, this.outputPath, this.workFileSystem,
                    this.workPath);
            // Delete the temporary task-specific output directory
            if (!this.workFileSystem.delete(this.workPath, true)) {
                LOG.warn("Failed to delete the temporary output" + " directory of task: " + attemptId + " - "
                        + this.workPath);
            }
            LOG.info("Saved output of task '" + attemptId + "' to " + this.outputPath);
        }
    }
}

From source file:edu.arizona.cs.hadoop.fs.irods.output.HirodsFileOutputCommitter.java

License:Apache License

private void moveTaskOutputsToIRODS(TaskAttemptContext context, FileSystem outfs, Path outDir,
        FileSystem workfs, Path workOutput) throws IOException {
    context.progress();
    if (workfs.isFile(workOutput)) {
        Path finalOutputPath = getFinalPath(outDir, workOutput, this.workPath);
        FSDataOutputStream irods_os = null;
        FSDataInputStream temp_is = null;
        try {/*from  ww w  . ja  v a 2 s. c om*/
            // commit to iRODS
            irods_os = outfs.create(finalOutputPath, true);
            temp_is = workfs.open(workOutput);

            byte[] buffer = new byte[100 * 1024];
            int bytes_read = 0;

            while ((bytes_read = temp_is.read(buffer)) != -1) {
                irods_os.write(buffer, 0, bytes_read);
            }
        } finally {
            if (temp_is != null) {
                try {
                    temp_is.close();
                } catch (IOException ex) {
                    // ignore exceptions
                }
            }

            // remove temporary file
            try {
                workfs.delete(workOutput, true);
            } catch (IOException ex) {
                // ignore exceptions
            }

            if (irods_os != null) {
                irods_os.close();
            }
        }

        LOG.debug("Moved " + workOutput + " to " + finalOutputPath);
    } else if (workfs.getFileStatus(workOutput).isDir()) {
        FileStatus[] paths = workfs.listStatus(workOutput);
        Path finalOutputPath = getFinalPath(outDir, workOutput, this.workPath);
        outfs.mkdirs(finalOutputPath);
        if (paths != null) {
            for (FileStatus path : paths) {
                moveTaskOutputsToIRODS(context, outfs, outDir, workfs, path.getPath());
            }
        }
    }
}

From source file:edu.arizona.cs.hadoop.fs.irods.output.HirodsFileOutputCommitter.java

License:Apache License

/**
 * Delete the work directory//w ww  .j  ava 2  s  . c  om
 */
@Override
public void abortTask(TaskAttemptContext context) {
    try {
        if (this.workPath != null) {
            context.progress();
            this.workFileSystem.delete(this.workPath, true);
        }
    } catch (IOException ie) {
        LOG.warn("Error discarding output" + StringUtils.stringifyException(ie));
    }
}