Example usage for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration

List of usage examples for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration.

Prototype

public Configuration getConfiguration();

Source Link

Document

Return the configuration for the job.

Usage

From source file:com.twitter.algebra.matrix.format.MatrixOutputFormat.java

License:Apache License

@Override
public RecordWriter<WritableComparable<?>, Writable> getRecordWriter(TaskAttemptContext context)
        throws IOException, InterruptedException {
    if (baseOut == null) {
        getBaseOutputFormat(context.getConfiguration());
    }//from  w  w w .ja  v  a  2  s . co m
    return new LazyRecordWriter<WritableComparable<?>, Writable>(baseOut, context);
}

From source file:com.twitter.algebra.matrix.format.MatrixOutputFormat.java

License:Apache License

@Override
public OutputCommitter getOutputCommitter(TaskAttemptContext context) throws IOException, InterruptedException {
    if (baseOut == null) {
        getBaseOutputFormat(context.getConfiguration());
    }/*from  ww w .ja  va 2s . c o m*/
    return super.getOutputCommitter(context);
}

From source file:com.twitter.elephanttwin.retrieval.BlockIndexedFileInputFormat.java

License:Apache License

/**
 * for indexing jobs which needs to read all data, we just use the real
 * RecordReader returned by the real InputFormat class; <p>
 *
 * for searching jobs,//from www  . j  a v  a 2 s  .c om
 * we have to use a "wrapped" RecorderReader to do filtering ourself
 * since we are indexing at Block level, not Record/Line level. <p>
 *
 * Also Pig would not do the filtering once filtering conditions are
 * pushed to data sources.
 * */
@Override
public RecordReader<K, V> createRecordReader(InputSplit split, TaskAttemptContext context)
        throws IOException, InterruptedException {
    RecordReader<K, V> realRecorderReader;
    realRecorderReader = getRealRecordReader(split, context);

    if (getFilterCondition(context) != null) {
        String valueClass = context.getConfiguration().get(BlockIndexedFileInputFormat.VALUECLASS);

        BinaryExpression filter = getFilterCondition(context);
        HashMap<String, Method> colunName2Method = new HashMap<String, Method>();
        Set<String> allColumns = getAllColumnNames(filter);
        for (String colName : allColumns) {
            Method method = null;
            Class<?> c = getValueClassByName(valueClass);
            String methodName;
            methodName = getCamelCaseMethodName(colName, c);
            try {
                method = c.getMethod(methodName);
                colunName2Method.put(colName, method);
            } catch (Exception e) {
                throw new IOException("couldn't get Method from columname", e);
            }
        }
        LOG.info("using IndexedFilterRecordReader, filter:" + filter);
        return getIndexedFilterRecordReader(filter, colunName2Method);
    } else
        // use the real RecordReader to read everything
        return realRecorderReader;
}

From source file:com.twitter.elephanttwin.retrieval.BlockIndexedFileInputFormat.java

License:Apache License

/**
 * return the real RecordReader based on inputformat and value classes <br>
 * information stored in the job's configuration.
 *//*from ww  w .  j a v a  2 s  .co  m*/
public RecordReader<K, V> getRealRecordReader(InputSplit split, TaskAttemptContext context)
        throws IOException, InterruptedException {

    String inputformat = context.getConfiguration().get(BlockIndexedFileInputFormat.REALINPUTFORMAT);
    String valueClass = context.getConfiguration().get(BlockIndexedFileInputFormat.VALUECLASS);

    FileInputFormat<K, V> realInputFormat = getInputFormatClass(inputformat, valueClass);
    return realInputFormat.createRecordReader(split, context);
}

From source file:com.vertica.hadoop.VerticaOutputFormat.java

License:Apache License

/** {@inheritDoc} */
public RecordWriter<Text, VerticaRecord> getRecordWriter(TaskAttemptContext context) throws IOException {

    VerticaConfiguration config = new VerticaConfiguration(context.getConfiguration());

    String name = context.getJobName();
    String table = config.getOutputTableName();
    try {//from  w  w w . j  a v a  2  s  .  co m
        return new VerticaRecordWriter(getConnection(context.getConfiguration()), table, config.getBatchSize());
    } catch (SQLException e) {
        throw new IOException(e);
    }
}

From source file:com.vertica.hadoop.VerticaOutputFormat.java

License:Apache License

/** (@inheritDoc) */
public OutputCommitter getOutputCommitter(TaskAttemptContext context) throws IOException, InterruptedException {

    VerticaConfiguration config = new VerticaConfiguration(context.getConfiguration());

    try {//from ww w .  j ava2s  .co  m
        Class outputCommitterClass = config.getOutputCommitterClass();
        LOG.info("Initializing OutputCommitter class " + outputCommitterClass.getCanonicalName());
        return initializeOutputCommitter(outputCommitterClass, context.getConfiguration());
    } catch (ClassNotFoundException e) {
        throw new IOException("Could not find OutputCommitter class confiugured as "
                + VerticaConfiguration.OUTPUT_COMMITTER_CLASS_PARAM, e);
    }
}

From source file:com.vertica.hadoop.VerticaTaskOutputCommitter.java

License:Apache License

/**
 * This method is called upon successful task execution. From the {@link OutputCommitter} javadocs,
 * under very rare circumstances this may be called multiple times for the same task, but for
 * different task attempts.//from   w  w  w .  ja va 2  s.  com
 */
@Override
public void commitTask(TaskAttemptContext taskAttemptContext) throws IOException {
    log.info("Task complete - committing database connection");
    sqlCommit(getConnection(taskAttemptContext.getConfiguration()));
}

From source file:com.vertica.hadoop.VerticaTaskOutputCommitter.java

License:Apache License

/**
 * This method is called upon aborted task execution. This may be called multiple times for the
 * same task, but for different task attempts.
 *///from w  w w  .  j  av  a2 s.c  o  m
@Override
public void abortTask(TaskAttemptContext taskAttemptContext) throws IOException {
    log.warn("Task aborted - rolling back database connection");
    sqlRollback(getConnection(taskAttemptContext.getConfiguration()));
}

From source file:com.willetinc.hadoop.mapreduce.dynamodb.DynamoDBOutputFormat.java

License:Apache License

@Override
public RecordWriter<K, V> getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException {
    DynamoDBConfiguration dbConf = new DynamoDBConfiguration(context.getConfiguration());
    return new DynamoDBRecordWriter(dbConf.getAmazonDynamoDBClient(), dbConf.getOutputTableName());
}

From source file:com.willetinc.hadoop.mapreduce.dynamodb.DynamoDBOutputFormatTest.java

License:Apache License

@Test
public void testGetRecordWriter() throws IOException, InterruptedException {
    TaskAttemptContext context = createMock(TaskAttemptContext.class);
    Configuration conf = createMock(Configuration.class);
    DynamoDBOutputFormat<MyTable, NullWritable> outputFormat = new DynamoDBOutputFormat<MyTable, NullWritable>();

    expect(context.getConfiguration()).andReturn(conf);
    expect(conf.get(DynamoDBConfiguration.ACCESS_KEY_PROPERTY)).andReturn(ACCESS_KEY);
    expect(conf.get(DynamoDBConfiguration.SECRET_KEY_PROPERTY)).andReturn(SECRET_KEY);
    expect(conf.get(DynamoDBConfiguration.OUTPUT_TABLE_NAME_PROPERTY)).andReturn(TABLE_NAME);
    expect(conf.get(DynamoDBConfiguration.DYNAMODB_ENDPOINT)).andReturn("test");

    replay(context);//w  w  w. j a v a 2 s. c om
    replay(conf);

    DynamoDBOutputFormat<MyTable, NullWritable>.DynamoDBRecordWriter writer = (DynamoDBOutputFormat<MyTable, NullWritable>.DynamoDBRecordWriter) outputFormat
            .getRecordWriter(context);
    assertEquals(TABLE_NAME, writer.getTableName());

    verify(context);
    verify(conf);
}