Example usage for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration

List of usage examples for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration.

Prototype

public Configuration getConfiguration();

Source Link

Document

Return the configuration for the job.

Usage

From source file:com.facebook.hiveio.output.HiveApiOutputFormat.java

License:Apache License

@Override
public HiveApiOutputCommitter getOutputCommitter(TaskAttemptContext taskAttemptContext)
        throws IOException, InterruptedException {
    HadoopUtils.setWorkOutputDir(taskAttemptContext);
    Configuration conf = taskAttemptContext.getConfiguration();
    JobConf jobConf = new JobConf(conf);
    OutputCommitter baseCommitter = jobConf.getOutputCommitter();
    LOG.info("Getting output committer with base output committer {}",
            baseCommitter.getClass().getSimpleName());
    return new HiveApiOutputCommitter(new HackOutputCommitter(baseCommitter, jobConf), myProfileId);
}

From source file:com.fullcontact.sstable.hadoop.mapreduce.SSTableRecordReader.java

License:Apache License

@Override
public void initialize(InputSplit inputSplit, TaskAttemptContext context)
        throws IOException, InterruptedException {
    this.split = (SSTableSplit) inputSplit;

    final FileSystem fileSystem = FileSystem.get(context.getConfiguration());
    final CompressionMetadata compressionMetadata = CompressionMetadata.create(split.getPath().toString(),
            fileSystem);//w w w .  j  a  v  a 2s  .c om
    if (compressionMetadata == null) {
        throw new IOException("Compression metadata for file " + split.getPath() + " not found, cannot run");
    }

    // open the file and seek to the start of the split
    this.reader = CompressedRandomAccessReader.open(split.getPath(), compressionMetadata, false, fileSystem);
    this.reader.seek(split.getStart());

    this.cfMetaData = initializeCfMetaData(context);
}

From source file:com.fullcontact.sstable.hadoop.mapreduce.SSTableRecordReader.java

License:Apache License

private static CFMetaData initializeCfMetaData(TaskAttemptContext context) {
    final String cql = context.getConfiguration().get(HadoopSSTableConstants.HADOOP_SSTABLE_CQL);
    Preconditions.checkNotNull(cql, "Cannot proceed without CQL definition.");

    final CreateColumnFamilyStatement statement = getCreateColumnFamilyStatement(cql);

    final String keyspace = context.getConfiguration().get(HadoopSSTableConstants.HADOOP_SSTABLE_KEYSPACE,
            "default");
    final String columnFamily = context.getConfiguration()
            .get(HadoopSSTableConstants.HADOOP_SSTABLE_COLUMN_FAMILY_NAME, "default");
    final CFMetaData cfMetaData = new CFMetaData(keyspace, columnFamily, ColumnFamilyType.Standard,
            statement.comparator, null);

    try {/*  w w  w. j a va2 s  . c  o  m*/
        statement.applyPropertiesTo(cfMetaData);
    } catch (RequestValidationException e) {
        // Cannot proceed if an error occurs
        throw new RuntimeException("Error configuring SSTable reader. Cannot proceed", e);
    }

    return cfMetaData;
}

From source file:com.gemstone.gemfire.cache.hdfs.internal.hoplog.mapreduce.AbstractGFRecordReader.java

License:Apache License

@Override
public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
    CombineFileSplit cSplit = (CombineFileSplit) split;
    Path[] path = cSplit.getPaths();
    long[] start = cSplit.getStartOffsets();
    long[] len = cSplit.getLengths();

    Configuration conf = context.getConfiguration();
    FileSystem fs = cSplit.getPath(0).getFileSystem(conf);

    this.splitIterator = HDFSSplitIterator.newInstance(fs, path, start, len, 0l, 0l);
}

From source file:com.gemstone.gemfire.cache.hdfs.internal.hoplog.mapreduce.GFOutputFormat.java

License:Apache License

@Override
public RecordWriter<Object, Object> getRecordWriter(TaskAttemptContext context)
        throws IOException, InterruptedException {
    Configuration conf = context.getConfiguration();
    ClientCache cache = getClientCacheInstance(conf);
    return new GFRecordWriter(cache, context.getConfiguration());
}

From source file:com.geneix.bottle.WordRecordReader.java

License:Apache License

public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException {
    if (LOG.isInfoEnabled()) {
        LOG.info("Initializing WordRecordReader");
    }//from ww  w.j  av a 2  s. c o  m
    FileSplit split = (FileSplit) genericSplit;
    Configuration job = context.getConfiguration();
    this.maxWordLength = job.getInt(MAX_WORD_LENGTH, Integer.MAX_VALUE);
    start = split.getStart();
    end = start + split.getLength();
    final Path file = split.getPath();

    // open the file and seek to the start of the split
    final FileSystem fs = file.getFileSystem(job);
    fileIn = fs.open(file);

    CompressionCodec codec = new CompressionCodecFactory(job).getCodec(file);
    if (null != codec) {
        throw new IOException("Cannot handle compressed files right now");
    } else {
        fileIn.seek(start);
        in = new WordReader(fileIn, job);
        filePosition = fileIn;
    }
    // If this is not the first split, we always throw away first record
    // because we always (except the last split) read one extra line in
    // next() method.
    if (start != 0) {
        start += in.readWord(new Text(), 0, maxBytesToConsume(start));
    }
    this.pos = start;
}

From source file:com.github.bskaggs.avro_json_hadoop.AvroAsJsonRecordReader.java

License:Apache License

/** {@inheritDoc} */
@Override/*from w  w w . jav  a  2s. c o  m*/
public void initialize(InputSplit inputSplit, TaskAttemptContext context)
        throws IOException, InterruptedException {
    if (!(inputSplit instanceof FileSplit)) {
        throw new IllegalArgumentException("Only compatible with FileSplits.");
    }
    FileSplit fileSplit = (FileSplit) inputSplit;

    // Open a seekable input stream to the Avro container file.
    SeekableInput seekableFileInput = createSeekableInput(context.getConfiguration(), fileSplit.getPath());

    // Wrap the seekable input stream in an Avro DataFileReader.
    Configuration conf = context.getConfiguration();
    GenericData dataModel = AvroSerialization.createDataModel(conf);

    GenericDatumReader<Object> reader = new GenericDatumReader<Object>();

    //Figure out the schema
    Path path = fileSplit.getPath();
    FSDataInputStream schemaStream = path.getFileSystem(conf).open(path);
    DataFileStream<Object> streamReader = new DataFileStream<Object>(schemaStream, reader);
    Schema mReaderSchema = streamReader.getSchema();
    streamReader.close();

    //Set up writer and encoder for json
    writer = new GenericDatumWriter<Object>(mReaderSchema);
    encoder = new TerseJsonEncoder(mReaderSchema, bout);

    @SuppressWarnings("unchecked")
    DatumReader<Object> datumReader = dataModel.createDatumReader(mReaderSchema);
    mAvroFileReader = createAvroFileReader(seekableFileInput, datumReader);

    // Initialize the start and end offsets into the file based on the boundaries of the
    // input split we're responsible for.  We will read the first block that begins
    // after the input split start boundary.  We will read up to but not including the
    // first block that starts after input split end boundary.

    // Sync to the closest block/record boundary just after beginning of our input split.
    mAvroFileReader.sync(fileSplit.getStart());

    // Initialize the start position to the beginning of the first block of the input split.
    mStartPosition = mAvroFileReader.previousSync();

    // Initialize the end position to the end of the input split (this isn't necessarily
    // on a block boundary so using this for reporting progress will be approximate.
    mEndPosition = fileSplit.getStart() + fileSplit.getLength();
}

From source file:com.github.bskaggs.mapreduce.flowfile.AbstractFlowFileV3RecordReader.java

License:Apache License

@Override
public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
    FileSplit fileSplit = (FileSplit) split;

    Path file = fileSplit.getPath();
    FileSystem fs = file.getFileSystem(context.getConfiguration());
    fileStream = fs.open(file);/* ww w  . ja  v  a  2 s. com*/

    startPos = fileSplit.getStart();
    nextPos = startPos;
    length = fileSplit.getLength();
    lastPos = nextPos + length;
}

From source file:com.google.appengine.tools.mapreduce.BlobstoreRecordReader.java

License:Apache License

@Override
public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
    Preconditions.checkNotNull(split);//  ww  w.  ja v  a 2s.c  o m
    if (!(split instanceof BlobstoreInputSplit)) {
        throw new IOException(getClass().getName() + " initialized with non-BlobstoreInputSplit");
    }

    this.split = (BlobstoreInputSplit) split;
    int intTerminator = context.getConfiguration().getInt(BlobstoreInputFormat.TERMINATOR, DEFAULT_TERMINATOR);
    Preconditions.checkState(Byte.MIN_VALUE <= intTerminator && intTerminator <= Byte.MAX_VALUE,
            BlobstoreInputFormat.TERMINATOR + " is not in [" + Byte.MIN_VALUE + ", " + Byte.MAX_VALUE
                    + "] range.");
    terminator = (byte) intTerminator;
    input = getInputStream(this.split, offset);
    recordIterator = getInputStreamIterator(input, this.split, offset, terminator);
}

From source file:com.hadoop.mapreduce.LzoLineRecordReader.java

License:Open Source License

@Override
public void initialize(InputSplit genericSplit, TaskAttemptContext context)
        throws IOException, InterruptedException {
    FileSplit split = (FileSplit) genericSplit;
    start = split.getStart();/* w  ww.  ja  va 2  s . c o m*/
    end = start + split.getLength();
    final Path file = split.getPath();
    Configuration job = context.getConfiguration();

    FileSystem fs = file.getFileSystem(job);
    CompressionCodecFactory compressionCodecs = new CompressionCodecFactory(job);
    final CompressionCodec codec = compressionCodecs.getCodec(file);
    if (codec == null) {
        throw new IOException("No codec for file " + file + " not found, cannot run");
    }

    // open the file and seek to the start of the split
    fileIn = fs.open(split.getPath());

    // creates input stream and also reads the file header
    in = new LineReader(codec.createInputStream(fileIn), job);

    if (start != 0) {
        fileIn.seek(start);

        // read and ignore the first line
        in.readLine(new Text());
        start = fileIn.getPos();
    }

    this.pos = start;
}