List of usage examples for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration
public Configuration getConfiguration();
From source file:com.twitter.algebra.matrix.format.MatrixOutputFormat.java
License:Apache License
@Override public RecordWriter<WritableComparable<?>, Writable> getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException { if (baseOut == null) { getBaseOutputFormat(context.getConfiguration()); }//from w w w .ja v a 2 s . co m return new LazyRecordWriter<WritableComparable<?>, Writable>(baseOut, context); }
From source file:com.twitter.algebra.matrix.format.MatrixOutputFormat.java
License:Apache License
@Override public OutputCommitter getOutputCommitter(TaskAttemptContext context) throws IOException, InterruptedException { if (baseOut == null) { getBaseOutputFormat(context.getConfiguration()); }/*from ww w .ja va 2s . c o m*/ return super.getOutputCommitter(context); }
From source file:com.twitter.elephanttwin.retrieval.BlockIndexedFileInputFormat.java
License:Apache License
/** * for indexing jobs which needs to read all data, we just use the real * RecordReader returned by the real InputFormat class; <p> * * for searching jobs,//from www . j a v a 2 s .c om * we have to use a "wrapped" RecorderReader to do filtering ourself * since we are indexing at Block level, not Record/Line level. <p> * * Also Pig would not do the filtering once filtering conditions are * pushed to data sources. * */ @Override public RecordReader<K, V> createRecordReader(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { RecordReader<K, V> realRecorderReader; realRecorderReader = getRealRecordReader(split, context); if (getFilterCondition(context) != null) { String valueClass = context.getConfiguration().get(BlockIndexedFileInputFormat.VALUECLASS); BinaryExpression filter = getFilterCondition(context); HashMap<String, Method> colunName2Method = new HashMap<String, Method>(); Set<String> allColumns = getAllColumnNames(filter); for (String colName : allColumns) { Method method = null; Class<?> c = getValueClassByName(valueClass); String methodName; methodName = getCamelCaseMethodName(colName, c); try { method = c.getMethod(methodName); colunName2Method.put(colName, method); } catch (Exception e) { throw new IOException("couldn't get Method from columname", e); } } LOG.info("using IndexedFilterRecordReader, filter:" + filter); return getIndexedFilterRecordReader(filter, colunName2Method); } else // use the real RecordReader to read everything return realRecorderReader; }
From source file:com.twitter.elephanttwin.retrieval.BlockIndexedFileInputFormat.java
License:Apache License
/** * return the real RecordReader based on inputformat and value classes <br> * information stored in the job's configuration. *//*from ww w . j a v a 2 s .co m*/ public RecordReader<K, V> getRealRecordReader(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { String inputformat = context.getConfiguration().get(BlockIndexedFileInputFormat.REALINPUTFORMAT); String valueClass = context.getConfiguration().get(BlockIndexedFileInputFormat.VALUECLASS); FileInputFormat<K, V> realInputFormat = getInputFormatClass(inputformat, valueClass); return realInputFormat.createRecordReader(split, context); }
From source file:com.vertica.hadoop.VerticaOutputFormat.java
License:Apache License
/** {@inheritDoc} */ public RecordWriter<Text, VerticaRecord> getRecordWriter(TaskAttemptContext context) throws IOException { VerticaConfiguration config = new VerticaConfiguration(context.getConfiguration()); String name = context.getJobName(); String table = config.getOutputTableName(); try {//from w w w . j a v a 2 s . co m return new VerticaRecordWriter(getConnection(context.getConfiguration()), table, config.getBatchSize()); } catch (SQLException e) { throw new IOException(e); } }
From source file:com.vertica.hadoop.VerticaOutputFormat.java
License:Apache License
/** (@inheritDoc) */ public OutputCommitter getOutputCommitter(TaskAttemptContext context) throws IOException, InterruptedException { VerticaConfiguration config = new VerticaConfiguration(context.getConfiguration()); try {//from ww w . j ava2s .co m Class outputCommitterClass = config.getOutputCommitterClass(); LOG.info("Initializing OutputCommitter class " + outputCommitterClass.getCanonicalName()); return initializeOutputCommitter(outputCommitterClass, context.getConfiguration()); } catch (ClassNotFoundException e) { throw new IOException("Could not find OutputCommitter class confiugured as " + VerticaConfiguration.OUTPUT_COMMITTER_CLASS_PARAM, e); } }
From source file:com.vertica.hadoop.VerticaTaskOutputCommitter.java
License:Apache License
/** * This method is called upon successful task execution. From the {@link OutputCommitter} javadocs, * under very rare circumstances this may be called multiple times for the same task, but for * different task attempts.//from w w w . ja va 2 s. com */ @Override public void commitTask(TaskAttemptContext taskAttemptContext) throws IOException { log.info("Task complete - committing database connection"); sqlCommit(getConnection(taskAttemptContext.getConfiguration())); }
From source file:com.vertica.hadoop.VerticaTaskOutputCommitter.java
License:Apache License
/** * This method is called upon aborted task execution. This may be called multiple times for the * same task, but for different task attempts. *///from w w w . j av a2 s.c o m @Override public void abortTask(TaskAttemptContext taskAttemptContext) throws IOException { log.warn("Task aborted - rolling back database connection"); sqlRollback(getConnection(taskAttemptContext.getConfiguration())); }
From source file:com.willetinc.hadoop.mapreduce.dynamodb.DynamoDBOutputFormat.java
License:Apache License
@Override public RecordWriter<K, V> getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException { DynamoDBConfiguration dbConf = new DynamoDBConfiguration(context.getConfiguration()); return new DynamoDBRecordWriter(dbConf.getAmazonDynamoDBClient(), dbConf.getOutputTableName()); }
From source file:com.willetinc.hadoop.mapreduce.dynamodb.DynamoDBOutputFormatTest.java
License:Apache License
@Test public void testGetRecordWriter() throws IOException, InterruptedException { TaskAttemptContext context = createMock(TaskAttemptContext.class); Configuration conf = createMock(Configuration.class); DynamoDBOutputFormat<MyTable, NullWritable> outputFormat = new DynamoDBOutputFormat<MyTable, NullWritable>(); expect(context.getConfiguration()).andReturn(conf); expect(conf.get(DynamoDBConfiguration.ACCESS_KEY_PROPERTY)).andReturn(ACCESS_KEY); expect(conf.get(DynamoDBConfiguration.SECRET_KEY_PROPERTY)).andReturn(SECRET_KEY); expect(conf.get(DynamoDBConfiguration.OUTPUT_TABLE_NAME_PROPERTY)).andReturn(TABLE_NAME); expect(conf.get(DynamoDBConfiguration.DYNAMODB_ENDPOINT)).andReturn("test"); replay(context);//w w w. j a v a 2 s. c om replay(conf); DynamoDBOutputFormat<MyTable, NullWritable>.DynamoDBRecordWriter writer = (DynamoDBOutputFormat<MyTable, NullWritable>.DynamoDBRecordWriter) outputFormat .getRecordWriter(context); assertEquals(TABLE_NAME, writer.getTableName()); verify(context); verify(conf); }