Example usage for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration.

Prototype

public Configuration getConfiguration();

Source Link

Document

Return the configuration for the job.

Usage

From source file:com.willetinc.hadoop.mapreduce.dynamodb.DynamoDBQueryInputFormat.java

License:Apache License

@Override
public RecordReader<LongWritable, T> createRecordReader(InputSplit inputSplit, TaskAttemptContext context)
        throws IOException, InterruptedException {
    setConf(context.getConfiguration());

    @SuppressWarnings("unchecked")
    Class<T> inputClass = (Class<T>) (dbConf.getInputClass());
    return new DynamoDBQueryRecordReader<T>((DynamoDBQueryInputSplit) inputSplit, inputClass,
            context.getConfiguration(), dbConf.getAmazonDynamoDBClient(), dbConf, tableName);
}

From source file:com.willetinc.hadoop.mapreduce.dynamodb.DynamoDBScanInputFormat.java

License:Apache License

@Override
public RecordReader<LongWritable, T> createRecordReader(InputSplit inputSplit, TaskAttemptContext context)
        throws IOException, InterruptedException {
    setConf(context.getConfiguration());

    @SuppressWarnings("unchecked")
    Class<T> inputClass = (Class<T>) (dbConf.getInputClass());
    return new DynamoDBScanRecordReader<T>((DynamoDBInputSplit) inputSplit, inputClass,
            context.getConfiguration(), dbConf.getAmazonDynamoDBClient(), dbConf, tableName);
}

From source file:com.yahoo.druid.hadoop.DruidRecordReader.java

License:Apache License

@Override
public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
    ObjectMapper jsonMapper = DruidInitialization.getInstance().getObjectMapper();
    SegmentLoadSpec spec = readSegmentJobSpec(context.getConfiguration(), jsonMapper);

    final List<String> dimensions = spec.getDimensions();
    final List<String> metrics = spec.getMetrics();
    final DimFilter filter = spec.getFilter();
    final Interval interval = new Interval(
            context.getConfiguration().get(DruidInputFormat.CONF_DRUID_INTERVAL));

    String hdfsPath = ((DruidInputSplit) split).getPath();
    logger.info("Reading segment from " + hdfsPath);

    segmentDir = Files.createTempDir();
    logger.info("segment dir: " + segmentDir);

    FileSystem fs = FileSystem.get(context.getConfiguration());
    getSegmentFiles(hdfsPath, segmentDir, fs);
    logger.info("finished getting segment files");

    QueryableIndex index = IndexIO.loadIndex(segmentDir);
    StorageAdapter adapter = new QueryableIndexStorageAdapter(index);
    List<StorageAdapter> adapters = Lists.newArrayList(adapter);
    rowYielder = new IngestSegmentFirehose(adapters, dimensions, metrics, filter, interval,
            QueryGranularity.NONE);/*from   ww  w  . j  a va 2 s.c om*/
}

From source file:com.zjy.mongo.input.BSONFileRecordReader.java

License:Apache License

@Override
public void initialize(final InputSplit inputSplit, final TaskAttemptContext context)
        throws IOException, InterruptedException {
    fileSplit = (FileSplit) inputSplit;/*from   w w w  .  j  av  a2 s.c o m*/
    configuration = context.getConfiguration();
    if (LOG.isDebugEnabled()) {
        LOG.debug("reading split " + fileSplit);
    }
    Path file = fileSplit.getPath();
    FileSystem fs = file.getFileSystem(configuration);
    in = fs.open(file, 16 * 1024 * 1024);
    in.seek(startingPosition == BSON_RR_POSITION_NOT_GIVEN ? fileSplit.getStart() : startingPosition);

    if (MongoConfigUtil.getLazyBSON(configuration)) {
        callback = new LazyBSONCallback();
        decoder = new LazyBSONDecoder();
    } else {
        callback = new BasicBSONCallback();
        decoder = new BasicBSONDecoder();
    }
}

From source file:com.zjy.mongo.MongoOutputFormat.java

License:Apache License

/**
 * Get the record writer that points to the output collection.
 *//* w  w w  . j  a  v a2 s.  c om*/
public RecordWriter<K, V> getRecordWriter(final TaskAttemptContext context) {
    return new MongoRecordWriter<K, V>(MongoConfigUtil.getOutputCollections(context.getConfiguration()),
            context);
}

From source file:com.zjy.mongo.output.MongoOutputCommitter.java

License:Apache License

/**
 * Get the Path to where temporary files should be stored for a
 * TaskAttempt, whose TaskAttemptContext is provided.
 *
 * @param context the TaskAttemptContext.
 * @return the Path to the temporary file for the TaskAttempt.
 *//*w w  w.  j av a 2  s.  c o  m*/
public static Path getTaskAttemptPath(final TaskAttemptContext context) {
    Configuration config = context.getConfiguration();
    // Try to use the following base temporary directories, in this order:
    // 1. New-style option for task tmp dir
    // 2. Old-style option for task tmp dir
    // 3. Hadoop system-wide tmp dir
    // 4. /tmp
    String basePath = config.get("mapreduce.task.tmp.dir",
            config.get("mapred.child.tmp", config.get("hadoop.tmp.dir", "/tmp")));
    // Hadoop Paths always use "/" as a directory separator.
    return new Path(
            String.format("%s/%s/%s/_out", basePath, context.getTaskAttemptID().toString(), TEMP_DIR_NAME));
}

From source file:com.zjy.mongo.output.MongoOutputCommitter.java

License:Apache License

@Override
public boolean needsTaskCommit(final TaskAttemptContext taskContext) throws IOException {
    try {/*from  www  . j  a  v a2 s.  c  o m*/
        FileSystem fs = FileSystem.get(taskContext.getConfiguration());
        // Commit is only necessary if there was any output.
        return fs.exists(getTaskAttemptPath(taskContext));
    } catch (IOException e) {
        LOG.error("Could not open filesystem", e);
        throw e;
    }
}

From source file:com.zjy.mongo.output.MongoOutputCommitter.java

License:Apache License

@Override
public void commitTask(final TaskAttemptContext taskContext) throws IOException {
    LOG.info("Committing task.");

    collections = MongoConfigUtil.getOutputCollections(taskContext.getConfiguration());
    numberOfHosts = collections.size();// w ww  .j a v a  2 s. c o  m

    // Get temporary file.
    Path tempFilePath = getTaskAttemptPath(taskContext);
    LOG.info("Committing from temporary file: " + tempFilePath.toString());
    long filePos = 0, fileLen;
    FSDataInputStream inputStream = null;
    try {
        FileSystem fs = FileSystem.get(taskContext.getConfiguration());
        inputStream = fs.open(tempFilePath);
        fileLen = fs.getFileStatus(tempFilePath).getLen();
    } catch (IOException e) {
        LOG.error("Could not open temporary file for committing", e);
        cleanupAfterCommit(inputStream, taskContext);
        throw e;
    }

    int maxDocs = MongoConfigUtil.getBatchSize(taskContext.getConfiguration());
    int curBatchSize = 0;
    DBCollection coll = getDbCollectionByRoundRobin();
    BulkWriteOperation bulkOp = coll.initializeOrderedBulkOperation();

    // Read Writables out of the temporary file.
    BSONWritable bw = new BSONWritable();
    MongoUpdateWritable muw = new MongoUpdateWritable();
    while (filePos < fileLen) {
        try {
            // Determine writable type, and perform corresponding operation
            // on MongoDB.
            int mwType = inputStream.readInt();
            if (MongoWritableTypes.BSON_WRITABLE == mwType) {
                bw.readFields(inputStream);
                bulkOp.insert(new BasicDBObject(bw.getDoc().toMap()));
            } else if (MongoWritableTypes.MONGO_UPDATE_WRITABLE == mwType) {
                muw.readFields(inputStream);
                DBObject query = new BasicDBObject(muw.getQuery().toMap());
                DBObject modifiers = new BasicDBObject(muw.getModifiers().toMap());
                if (muw.isMultiUpdate()) {
                    if (muw.isUpsert()) {
                        bulkOp.find(query).upsert().update(modifiers);
                    } else {
                        bulkOp.find(query).update(modifiers);
                    }
                } else {
                    if (muw.isUpsert()) {
                        bulkOp.find(query).upsert().updateOne(modifiers);
                    } else {
                        bulkOp.find(query).updateOne(modifiers);
                    }
                }
            } else {
                throw new IOException("Unrecognized type: " + mwType);
            }
            filePos = inputStream.getPos();
            // Write to MongoDB if the batch is full, or if this is the last
            // operation to be performed for the Task.
            if (++curBatchSize >= maxDocs || filePos >= fileLen) {
                try {
                    bulkOp.execute();
                } catch (MongoException e) {
                    LOG.error("Could not write to MongoDB", e);
                    throw e;
                }
                coll = getDbCollectionByRoundRobin();
                bulkOp = coll.initializeOrderedBulkOperation();
                curBatchSize = 0;

                // Signal progress back to Hadoop framework so that we
                // don't time out.
                taskContext.progress();
            }
        } catch (IOException e) {
            LOG.error("Error reading from temporary file", e);
            throw e;
        }
    }

    cleanupAfterCommit(inputStream, taskContext);
}

From source file:com.zjy.mongo.output.MongoOutputCommitter.java

License:Apache License

private void cleanupTemporaryFiles(final TaskAttemptContext taskContext) throws IOException {
    Path tempPath = getTaskAttemptPath(taskContext);
    try {//from  w ww . ja v  a  2s  .  co  m
        FileSystem fs = FileSystem.get(taskContext.getConfiguration());
        fs.delete(tempPath, true);
    } catch (IOException e) {
        LOG.error("Could not delete temporary file " + tempPath, e);
        throw e;
    }
}

From source file:com.zjy.mongo.output.MongoRecordWriter.java

License:Apache License

/**
 * Create a MongoRecordWriter that targets multiple DBCollections.
 * @param c a list of DBCollections// w w w  .j a va 2 s.  co  m
 * @param ctx the TaskAttemptContext
 */
public MongoRecordWriter(final List<DBCollection> c, final TaskAttemptContext ctx) {
    collections = new ArrayList<DBCollection>(c);
    context = ctx;
    bsonWritable = new BSONWritable();

    // Initialize output stream.
    try {
        FileSystem fs = FileSystem.get(ctx.getConfiguration());
        Path outputPath = MongoOutputCommitter.getTaskAttemptPath(ctx);
        LOG.info("Writing to temporary file: " + outputPath.toString());
        outputStream = fs.create(outputPath, true);
    } catch (IOException e) {
        LOG.error("Could not open temporary file for buffering Mongo output", e);
    }
}