List of usage examples for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration
public Configuration getConfiguration();
From source file:com.willetinc.hadoop.mapreduce.dynamodb.DynamoDBQueryInputFormat.java
License:Apache License
@Override public RecordReader<LongWritable, T> createRecordReader(InputSplit inputSplit, TaskAttemptContext context) throws IOException, InterruptedException { setConf(context.getConfiguration()); @SuppressWarnings("unchecked") Class<T> inputClass = (Class<T>) (dbConf.getInputClass()); return new DynamoDBQueryRecordReader<T>((DynamoDBQueryInputSplit) inputSplit, inputClass, context.getConfiguration(), dbConf.getAmazonDynamoDBClient(), dbConf, tableName); }
From source file:com.willetinc.hadoop.mapreduce.dynamodb.DynamoDBScanInputFormat.java
License:Apache License
@Override public RecordReader<LongWritable, T> createRecordReader(InputSplit inputSplit, TaskAttemptContext context) throws IOException, InterruptedException { setConf(context.getConfiguration()); @SuppressWarnings("unchecked") Class<T> inputClass = (Class<T>) (dbConf.getInputClass()); return new DynamoDBScanRecordReader<T>((DynamoDBInputSplit) inputSplit, inputClass, context.getConfiguration(), dbConf.getAmazonDynamoDBClient(), dbConf, tableName); }
From source file:com.yahoo.druid.hadoop.DruidRecordReader.java
License:Apache License
@Override public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { ObjectMapper jsonMapper = DruidInitialization.getInstance().getObjectMapper(); SegmentLoadSpec spec = readSegmentJobSpec(context.getConfiguration(), jsonMapper); final List<String> dimensions = spec.getDimensions(); final List<String> metrics = spec.getMetrics(); final DimFilter filter = spec.getFilter(); final Interval interval = new Interval( context.getConfiguration().get(DruidInputFormat.CONF_DRUID_INTERVAL)); String hdfsPath = ((DruidInputSplit) split).getPath(); logger.info("Reading segment from " + hdfsPath); segmentDir = Files.createTempDir(); logger.info("segment dir: " + segmentDir); FileSystem fs = FileSystem.get(context.getConfiguration()); getSegmentFiles(hdfsPath, segmentDir, fs); logger.info("finished getting segment files"); QueryableIndex index = IndexIO.loadIndex(segmentDir); StorageAdapter adapter = new QueryableIndexStorageAdapter(index); List<StorageAdapter> adapters = Lists.newArrayList(adapter); rowYielder = new IngestSegmentFirehose(adapters, dimensions, metrics, filter, interval, QueryGranularity.NONE);/*from ww w . j a va 2 s.c om*/ }
From source file:com.zjy.mongo.input.BSONFileRecordReader.java
License:Apache License
@Override public void initialize(final InputSplit inputSplit, final TaskAttemptContext context) throws IOException, InterruptedException { fileSplit = (FileSplit) inputSplit;/*from w w w . j av a2 s.c o m*/ configuration = context.getConfiguration(); if (LOG.isDebugEnabled()) { LOG.debug("reading split " + fileSplit); } Path file = fileSplit.getPath(); FileSystem fs = file.getFileSystem(configuration); in = fs.open(file, 16 * 1024 * 1024); in.seek(startingPosition == BSON_RR_POSITION_NOT_GIVEN ? fileSplit.getStart() : startingPosition); if (MongoConfigUtil.getLazyBSON(configuration)) { callback = new LazyBSONCallback(); decoder = new LazyBSONDecoder(); } else { callback = new BasicBSONCallback(); decoder = new BasicBSONDecoder(); } }
From source file:com.zjy.mongo.MongoOutputFormat.java
License:Apache License
/** * Get the record writer that points to the output collection. *//* w w w . j a v a2 s. c om*/ public RecordWriter<K, V> getRecordWriter(final TaskAttemptContext context) { return new MongoRecordWriter<K, V>(MongoConfigUtil.getOutputCollections(context.getConfiguration()), context); }
From source file:com.zjy.mongo.output.MongoOutputCommitter.java
License:Apache License
/** * Get the Path to where temporary files should be stored for a * TaskAttempt, whose TaskAttemptContext is provided. * * @param context the TaskAttemptContext. * @return the Path to the temporary file for the TaskAttempt. *//*w w w. j av a 2 s. c o m*/ public static Path getTaskAttemptPath(final TaskAttemptContext context) { Configuration config = context.getConfiguration(); // Try to use the following base temporary directories, in this order: // 1. New-style option for task tmp dir // 2. Old-style option for task tmp dir // 3. Hadoop system-wide tmp dir // 4. /tmp String basePath = config.get("mapreduce.task.tmp.dir", config.get("mapred.child.tmp", config.get("hadoop.tmp.dir", "/tmp"))); // Hadoop Paths always use "/" as a directory separator. return new Path( String.format("%s/%s/%s/_out", basePath, context.getTaskAttemptID().toString(), TEMP_DIR_NAME)); }
From source file:com.zjy.mongo.output.MongoOutputCommitter.java
License:Apache License
@Override public boolean needsTaskCommit(final TaskAttemptContext taskContext) throws IOException { try {/*from www . j a v a2 s. c o m*/ FileSystem fs = FileSystem.get(taskContext.getConfiguration()); // Commit is only necessary if there was any output. return fs.exists(getTaskAttemptPath(taskContext)); } catch (IOException e) { LOG.error("Could not open filesystem", e); throw e; } }
From source file:com.zjy.mongo.output.MongoOutputCommitter.java
License:Apache License
@Override public void commitTask(final TaskAttemptContext taskContext) throws IOException { LOG.info("Committing task."); collections = MongoConfigUtil.getOutputCollections(taskContext.getConfiguration()); numberOfHosts = collections.size();// w ww .j a v a 2 s. c o m // Get temporary file. Path tempFilePath = getTaskAttemptPath(taskContext); LOG.info("Committing from temporary file: " + tempFilePath.toString()); long filePos = 0, fileLen; FSDataInputStream inputStream = null; try { FileSystem fs = FileSystem.get(taskContext.getConfiguration()); inputStream = fs.open(tempFilePath); fileLen = fs.getFileStatus(tempFilePath).getLen(); } catch (IOException e) { LOG.error("Could not open temporary file for committing", e); cleanupAfterCommit(inputStream, taskContext); throw e; } int maxDocs = MongoConfigUtil.getBatchSize(taskContext.getConfiguration()); int curBatchSize = 0; DBCollection coll = getDbCollectionByRoundRobin(); BulkWriteOperation bulkOp = coll.initializeOrderedBulkOperation(); // Read Writables out of the temporary file. BSONWritable bw = new BSONWritable(); MongoUpdateWritable muw = new MongoUpdateWritable(); while (filePos < fileLen) { try { // Determine writable type, and perform corresponding operation // on MongoDB. int mwType = inputStream.readInt(); if (MongoWritableTypes.BSON_WRITABLE == mwType) { bw.readFields(inputStream); bulkOp.insert(new BasicDBObject(bw.getDoc().toMap())); } else if (MongoWritableTypes.MONGO_UPDATE_WRITABLE == mwType) { muw.readFields(inputStream); DBObject query = new BasicDBObject(muw.getQuery().toMap()); DBObject modifiers = new BasicDBObject(muw.getModifiers().toMap()); if (muw.isMultiUpdate()) { if (muw.isUpsert()) { bulkOp.find(query).upsert().update(modifiers); } else { bulkOp.find(query).update(modifiers); } } else { if (muw.isUpsert()) { bulkOp.find(query).upsert().updateOne(modifiers); } else { bulkOp.find(query).updateOne(modifiers); } } } else { throw new IOException("Unrecognized type: " + mwType); } filePos = inputStream.getPos(); // Write to MongoDB if the batch is full, or if this is the last // operation to be performed for the Task. if (++curBatchSize >= maxDocs || filePos >= fileLen) { try { bulkOp.execute(); } catch (MongoException e) { LOG.error("Could not write to MongoDB", e); throw e; } coll = getDbCollectionByRoundRobin(); bulkOp = coll.initializeOrderedBulkOperation(); curBatchSize = 0; // Signal progress back to Hadoop framework so that we // don't time out. taskContext.progress(); } } catch (IOException e) { LOG.error("Error reading from temporary file", e); throw e; } } cleanupAfterCommit(inputStream, taskContext); }
From source file:com.zjy.mongo.output.MongoOutputCommitter.java
License:Apache License
private void cleanupTemporaryFiles(final TaskAttemptContext taskContext) throws IOException { Path tempPath = getTaskAttemptPath(taskContext); try {//from w ww . ja v a 2s . co m FileSystem fs = FileSystem.get(taskContext.getConfiguration()); fs.delete(tempPath, true); } catch (IOException e) { LOG.error("Could not delete temporary file " + tempPath, e); throw e; } }
From source file:com.zjy.mongo.output.MongoRecordWriter.java
License:Apache License
/** * Create a MongoRecordWriter that targets multiple DBCollections. * @param c a list of DBCollections// w w w .j a va 2 s. co m * @param ctx the TaskAttemptContext */ public MongoRecordWriter(final List<DBCollection> c, final TaskAttemptContext ctx) { collections = new ArrayList<DBCollection>(c); context = ctx; bsonWritable = new BSONWritable(); // Initialize output stream. try { FileSystem fs = FileSystem.get(ctx.getConfiguration()); Path outputPath = MongoOutputCommitter.getTaskAttemptPath(ctx); LOG.info("Writing to temporary file: " + outputPath.toString()); outputStream = fs.create(outputPath, true); } catch (IOException e) { LOG.error("Could not open temporary file for buffering Mongo output", e); } }