Example usage for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration

List of usage examples for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration.

Prototype

public Configuration getConfiguration();

Source Link

Document

Return the configuration for the job.

Usage

From source file:com.willetinc.hadoop.mapreduce.dynamodb.DynamoDBQueryInputFormat.java

License:Apache License

@Override
public RecordReader<LongWritable, T> createRecordReader(InputSplit inputSplit, TaskAttemptContext context)
        throws IOException, InterruptedException {
    setConf(context.getConfiguration());

    @SuppressWarnings("unchecked")
    Class<T> inputClass = (Class<T>) (dbConf.getInputClass());
    return new DynamoDBQueryRecordReader<T>((DynamoDBQueryInputSplit) inputSplit, inputClass,
            context.getConfiguration(), dbConf.getAmazonDynamoDBClient(), dbConf, tableName);
}

From source file:com.willetinc.hadoop.mapreduce.dynamodb.DynamoDBScanInputFormat.java

License:Apache License

@Override
public RecordReader<LongWritable, T> createRecordReader(InputSplit inputSplit, TaskAttemptContext context)
        throws IOException, InterruptedException {
    setConf(context.getConfiguration());

    @SuppressWarnings("unchecked")
    Class<T> inputClass = (Class<T>) (dbConf.getInputClass());
    return new DynamoDBScanRecordReader<T>((DynamoDBInputSplit) inputSplit, inputClass,
            context.getConfiguration(), dbConf.getAmazonDynamoDBClient(), dbConf, tableName);
}

From source file:com.yahoo.druid.hadoop.DruidRecordReader.java

License:Apache License

@Override
public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
    ObjectMapper jsonMapper = DruidInitialization.getInstance().getObjectMapper();
    SegmentLoadSpec spec = readSegmentJobSpec(context.getConfiguration(), jsonMapper);

    final List<String> dimensions = spec.getDimensions();
    final List<String> metrics = spec.getMetrics();
    final DimFilter filter = spec.getFilter();
    final Interval interval = new Interval(
            context.getConfiguration().get(DruidInputFormat.CONF_DRUID_INTERVAL));

    String hdfsPath = ((DruidInputSplit) split).getPath();
    logger.info("Reading segment from " + hdfsPath);

    segmentDir = Files.createTempDir();
    logger.info("segment dir: " + segmentDir);

    FileSystem fs = FileSystem.get(context.getConfiguration());
    getSegmentFiles(hdfsPath, segmentDir, fs);
    logger.info("finished getting segment files");

    QueryableIndex index = IndexIO.loadIndex(segmentDir);
    StorageAdapter adapter = new QueryableIndexStorageAdapter(index);
    List<StorageAdapter> adapters = Lists.newArrayList(adapter);
    rowYielder = new IngestSegmentFirehose(adapters, dimensions, metrics, filter, interval,
            QueryGranularity.NONE);/*from   ww  w  . j  a va 2 s.c om*/
}

From source file:com.zjy.mongo.input.BSONFileRecordReader.java

License:Apache License

@Override
public void initialize(final InputSplit inputSplit, final TaskAttemptContext context)
        throws IOException, InterruptedException {
    fileSplit = (FileSplit) inputSplit;/*from   w w w  .  j  av  a2 s.c o m*/
    configuration = context.getConfiguration();
    if (LOG.isDebugEnabled()) {
        LOG.debug("reading split " + fileSplit);
    }
    Path file = fileSplit.getPath();
    FileSystem fs = file.getFileSystem(configuration);
    in = fs.open(file, 16 * 1024 * 1024);
    in.seek(startingPosition == BSON_RR_POSITION_NOT_GIVEN ? fileSplit.getStart() : startingPosition);

    if (MongoConfigUtil.getLazyBSON(configuration)) {
        callback = new LazyBSONCallback();
        decoder = new LazyBSONDecoder();
    } else {
        callback = new BasicBSONCallback();
        decoder = new BasicBSONDecoder();
    }
}

From source file:com.zjy.mongo.MongoOutputFormat.java

License:Apache License

/**
 * Get the record writer that points to the output collection.
 *//* w  w w  . j  a  v a2 s.  c om*/
public RecordWriter<K, V> getRecordWriter(final TaskAttemptContext context) {
    return new MongoRecordWriter<K, V>(MongoConfigUtil.getOutputCollections(context.getConfiguration()),
            context);
}

From source file:com.zjy.mongo.output.MongoOutputCommitter.java

License:Apache License

/**
 * Get the Path to where temporary files should be stored for a
 * TaskAttempt, whose TaskAttemptContext is provided.
 *
 * @param context the TaskAttemptContext.
 * @return the Path to the temporary file for the TaskAttempt.
 *//*w w  w.  j av a 2  s.  c o  m*/
public static Path getTaskAttemptPath(final TaskAttemptContext context) {
    Configuration config = context.getConfiguration();
    // Try to use the following base temporary directories, in this order:
    // 1. New-style option for task tmp dir
    // 2. Old-style option for task tmp dir
    // 3. Hadoop system-wide tmp dir
    // 4. /tmp
    String basePath = config.get("mapreduce.task.tmp.dir",
            config.get("mapred.child.tmp", config.get("hadoop.tmp.dir", "/tmp")));
    // Hadoop Paths always use "/" as a directory separator.
    return new Path(
            String.format("%s/%s/%s/_out", basePath, context.getTaskAttemptID().toString(), TEMP_DIR_NAME));
}

From source file:com.zjy.mongo.output.MongoOutputCommitter.java

License:Apache License

@Override
public boolean needsTaskCommit(final TaskAttemptContext taskContext) throws IOException {
    try {/*from  www  . j  a  v a2 s.  c  o m*/
        FileSystem fs = FileSystem.get(taskContext.getConfiguration());
        // Commit is only necessary if there was any output.
        return fs.exists(getTaskAttemptPath(taskContext));
    } catch (IOException e) {
        LOG.error("Could not open filesystem", e);
        throw e;
    }
}

From source file:com.zjy.mongo.output.MongoOutputCommitter.java

License:Apache License

@Override
public void commitTask(final TaskAttemptContext taskContext) throws IOException {
    LOG.info("Committing task.");

    collections = MongoConfigUtil.getOutputCollections(taskContext.getConfiguration());
    numberOfHosts = collections.size();// w ww  .j a v a  2 s. c o  m

    // Get temporary file.
    Path tempFilePath = getTaskAttemptPath(taskContext);
    LOG.info("Committing from temporary file: " + tempFilePath.toString());
    long filePos = 0, fileLen;
    FSDataInputStream inputStream = null;
    try {
        FileSystem fs = FileSystem.get(taskContext.getConfiguration());
        inputStream = fs.open(tempFilePath);
        fileLen = fs.getFileStatus(tempFilePath).getLen();
    } catch (IOException e) {
        LOG.error("Could not open temporary file for committing", e);
        cleanupAfterCommit(inputStream, taskContext);
        throw e;
    }

    int maxDocs = MongoConfigUtil.getBatchSize(taskContext.getConfiguration());
    int curBatchSize = 0;
    DBCollection coll = getDbCollectionByRoundRobin();
    BulkWriteOperation bulkOp = coll.initializeOrderedBulkOperation();

    // Read Writables out of the temporary file.
    BSONWritable bw = new BSONWritable();
    MongoUpdateWritable muw = new MongoUpdateWritable();
    while (filePos < fileLen) {
        try {
            // Determine writable type, and perform corresponding operation
            // on MongoDB.
            int mwType = inputStream.readInt();
            if (MongoWritableTypes.BSON_WRITABLE == mwType) {
                bw.readFields(inputStream);
                bulkOp.insert(new BasicDBObject(bw.getDoc().toMap()));
            } else if (MongoWritableTypes.MONGO_UPDATE_WRITABLE == mwType) {
                muw.readFields(inputStream);
                DBObject query = new BasicDBObject(muw.getQuery().toMap());
                DBObject modifiers = new BasicDBObject(muw.getModifiers().toMap());
                if (muw.isMultiUpdate()) {
                    if (muw.isUpsert()) {
                        bulkOp.find(query).upsert().update(modifiers);
                    } else {
                        bulkOp.find(query).update(modifiers);
                    }
                } else {
                    if (muw.isUpsert()) {
                        bulkOp.find(query).upsert().updateOne(modifiers);
                    } else {
                        bulkOp.find(query).updateOne(modifiers);
                    }
                }
            } else {
                throw new IOException("Unrecognized type: " + mwType);
            }
            filePos = inputStream.getPos();
            // Write to MongoDB if the batch is full, or if this is the last
            // operation to be performed for the Task.
            if (++curBatchSize >= maxDocs || filePos >= fileLen) {
                try {
                    bulkOp.execute();
                } catch (MongoException e) {
                    LOG.error("Could not write to MongoDB", e);
                    throw e;
                }
                coll = getDbCollectionByRoundRobin();
                bulkOp = coll.initializeOrderedBulkOperation();
                curBatchSize = 0;

                // Signal progress back to Hadoop framework so that we
                // don't time out.
                taskContext.progress();
            }
        } catch (IOException e) {
            LOG.error("Error reading from temporary file", e);
            throw e;
        }
    }

    cleanupAfterCommit(inputStream, taskContext);
}

From source file:com.zjy.mongo.output.MongoOutputCommitter.java

License:Apache License

private void cleanupTemporaryFiles(final TaskAttemptContext taskContext) throws IOException {
    Path tempPath = getTaskAttemptPath(taskContext);
    try {//from  w ww . ja v  a  2s  .  co  m
        FileSystem fs = FileSystem.get(taskContext.getConfiguration());
        fs.delete(tempPath, true);
    } catch (IOException e) {
        LOG.error("Could not delete temporary file " + tempPath, e);
        throw e;
    }
}

From source file:com.zjy.mongo.output.MongoRecordWriter.java

License:Apache License

/**
 * Create a MongoRecordWriter that targets multiple DBCollections.
 * @param c a list of DBCollections// w w w  .j a va 2 s.  co  m
 * @param ctx the TaskAttemptContext
 */
public MongoRecordWriter(final List<DBCollection> c, final TaskAttemptContext ctx) {
    collections = new ArrayList<DBCollection>(c);
    context = ctx;
    bsonWritable = new BSONWritable();

    // Initialize output stream.
    try {
        FileSystem fs = FileSystem.get(ctx.getConfiguration());
        Path outputPath = MongoOutputCommitter.getTaskAttemptPath(ctx);
        LOG.info("Writing to temporary file: " + outputPath.toString());
        outputStream = fs.create(outputPath, true);
    } catch (IOException e) {
        LOG.error("Could not open temporary file for buffering Mongo output", e);
    }
}