Example usage for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration.

Prototype

public Configuration getConfiguration();

Source Link

Document

Return the configuration for the job.

Usage

From source file:com.facebook.hiveio.output.HiveApiOutputFormat.java

License:Apache License

@Override
public HiveApiOutputCommitter getOutputCommitter(TaskAttemptContext taskAttemptContext)
        throws IOException, InterruptedException {
    HadoopUtils.setWorkOutputDir(taskAttemptContext);
    Configuration conf = taskAttemptContext.getConfiguration();
    JobConf jobConf = new JobConf(conf);
    OutputCommitter baseCommitter = jobConf.getOutputCommitter();
    LOG.info("Getting output committer with base output committer {}",
            baseCommitter.getClass().getSimpleName());
    return new HiveApiOutputCommitter(new HackOutputCommitter(baseCommitter, jobConf), myProfileId);
}

From source file:com.fullcontact.sstable.hadoop.mapreduce.SSTableRecordReader.java

License:Apache License

@Override
public void initialize(InputSplit inputSplit, TaskAttemptContext context)
        throws IOException, InterruptedException {
    this.split = (SSTableSplit) inputSplit;

    final FileSystem fileSystem = FileSystem.get(context.getConfiguration());
    final CompressionMetadata compressionMetadata = CompressionMetadata.create(split.getPath().toString(),
            fileSystem);//w w w .  j  a  v  a 2s  .c om
    if (compressionMetadata == null) {
        throw new IOException("Compression metadata for file " + split.getPath() + " not found, cannot run");
    }

    // open the file and seek to the start of the split
    this.reader = CompressedRandomAccessReader.open(split.getPath(), compressionMetadata, false, fileSystem);
    this.reader.seek(split.getStart());

    this.cfMetaData = initializeCfMetaData(context);
}

From source file:com.fullcontact.sstable.hadoop.mapreduce.SSTableRecordReader.java

License:Apache License

private static CFMetaData initializeCfMetaData(TaskAttemptContext context) {
    final String cql = context.getConfiguration().get(HadoopSSTableConstants.HADOOP_SSTABLE_CQL);
    Preconditions.checkNotNull(cql, "Cannot proceed without CQL definition.");

    final CreateColumnFamilyStatement statement = getCreateColumnFamilyStatement(cql);

    final String keyspace = context.getConfiguration().get(HadoopSSTableConstants.HADOOP_SSTABLE_KEYSPACE,
            "default");
    final String columnFamily = context.getConfiguration()
            .get(HadoopSSTableConstants.HADOOP_SSTABLE_COLUMN_FAMILY_NAME, "default");
    final CFMetaData cfMetaData = new CFMetaData(keyspace, columnFamily, ColumnFamilyType.Standard,
            statement.comparator, null);

    try {/*  w w  w. j a va2 s  . c  o  m*/
        statement.applyPropertiesTo(cfMetaData);
    } catch (RequestValidationException e) {
        // Cannot proceed if an error occurs
        throw new RuntimeException("Error configuring SSTable reader. Cannot proceed", e);
    }

    return cfMetaData;
}

From source file:com.gemstone.gemfire.cache.hdfs.internal.hoplog.mapreduce.AbstractGFRecordReader.java

License:Apache License

@Override
public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
    CombineFileSplit cSplit = (CombineFileSplit) split;
    Path[] path = cSplit.getPaths();
    long[] start = cSplit.getStartOffsets();
    long[] len = cSplit.getLengths();

    Configuration conf = context.getConfiguration();
    FileSystem fs = cSplit.getPath(0).getFileSystem(conf);

    this.splitIterator = HDFSSplitIterator.newInstance(fs, path, start, len, 0l, 0l);
}

From source file:com.gemstone.gemfire.cache.hdfs.internal.hoplog.mapreduce.GFOutputFormat.java

License:Apache License

@Override
public RecordWriter<Object, Object> getRecordWriter(TaskAttemptContext context)
        throws IOException, InterruptedException {
    Configuration conf = context.getConfiguration();
    ClientCache cache = getClientCacheInstance(conf);
    return new GFRecordWriter(cache, context.getConfiguration());
}

From source file:com.geneix.bottle.WordRecordReader.java

License:Apache License

public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException {
    if (LOG.isInfoEnabled()) {
        LOG.info("Initializing WordRecordReader");
    }//from ww  w.j  av a 2  s. c o  m
    FileSplit split = (FileSplit) genericSplit;
    Configuration job = context.getConfiguration();
    this.maxWordLength = job.getInt(MAX_WORD_LENGTH, Integer.MAX_VALUE);
    start = split.getStart();
    end = start + split.getLength();
    final Path file = split.getPath();

    // open the file and seek to the start of the split
    final FileSystem fs = file.getFileSystem(job);
    fileIn = fs.open(file);

    CompressionCodec codec = new CompressionCodecFactory(job).getCodec(file);
    if (null != codec) {
        throw new IOException("Cannot handle compressed files right now");
    } else {
        fileIn.seek(start);
        in = new WordReader(fileIn, job);
        filePosition = fileIn;
    }
    // If this is not the first split, we always throw away first record
    // because we always (except the last split) read one extra line in
    // next() method.
    if (start != 0) {
        start += in.readWord(new Text(), 0, maxBytesToConsume(start));
    }
    this.pos = start;
}

From source file:com.github.bskaggs.avro_json_hadoop.AvroAsJsonRecordReader.java

License:Apache License

/** {@inheritDoc} */
@Override/*from w  w w . jav  a  2s. c o  m*/
public void initialize(InputSplit inputSplit, TaskAttemptContext context)
        throws IOException, InterruptedException {
    if (!(inputSplit instanceof FileSplit)) {
        throw new IllegalArgumentException("Only compatible with FileSplits.");
    }
    FileSplit fileSplit = (FileSplit) inputSplit;

    // Open a seekable input stream to the Avro container file.
    SeekableInput seekableFileInput = createSeekableInput(context.getConfiguration(), fileSplit.getPath());

    // Wrap the seekable input stream in an Avro DataFileReader.
    Configuration conf = context.getConfiguration();
    GenericData dataModel = AvroSerialization.createDataModel(conf);

    GenericDatumReader<Object> reader = new GenericDatumReader<Object>();

    //Figure out the schema
    Path path = fileSplit.getPath();
    FSDataInputStream schemaStream = path.getFileSystem(conf).open(path);
    DataFileStream<Object> streamReader = new DataFileStream<Object>(schemaStream, reader);
    Schema mReaderSchema = streamReader.getSchema();
    streamReader.close();

    //Set up writer and encoder for json
    writer = new GenericDatumWriter<Object>(mReaderSchema);
    encoder = new TerseJsonEncoder(mReaderSchema, bout);

    @SuppressWarnings("unchecked")
    DatumReader<Object> datumReader = dataModel.createDatumReader(mReaderSchema);
    mAvroFileReader = createAvroFileReader(seekableFileInput, datumReader);

    // Initialize the start and end offsets into the file based on the boundaries of the
    // input split we're responsible for.  We will read the first block that begins
    // after the input split start boundary.  We will read up to but not including the
    // first block that starts after input split end boundary.

    // Sync to the closest block/record boundary just after beginning of our input split.
    mAvroFileReader.sync(fileSplit.getStart());

    // Initialize the start position to the beginning of the first block of the input split.
    mStartPosition = mAvroFileReader.previousSync();

    // Initialize the end position to the end of the input split (this isn't necessarily
    // on a block boundary so using this for reporting progress will be approximate.
    mEndPosition = fileSplit.getStart() + fileSplit.getLength();
}

From source file:com.github.bskaggs.mapreduce.flowfile.AbstractFlowFileV3RecordReader.java

License:Apache License

@Override
public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
    FileSplit fileSplit = (FileSplit) split;

    Path file = fileSplit.getPath();
    FileSystem fs = file.getFileSystem(context.getConfiguration());
    fileStream = fs.open(file);/* ww w  . ja  v  a  2 s. com*/

    startPos = fileSplit.getStart();
    nextPos = startPos;
    length = fileSplit.getLength();
    lastPos = nextPos + length;
}

From source file:com.google.appengine.tools.mapreduce.BlobstoreRecordReader.java

License:Apache License

@Override
public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
    Preconditions.checkNotNull(split);//  ww  w.  ja v  a 2s.c  o m
    if (!(split instanceof BlobstoreInputSplit)) {
        throw new IOException(getClass().getName() + " initialized with non-BlobstoreInputSplit");
    }

    this.split = (BlobstoreInputSplit) split;
    int intTerminator = context.getConfiguration().getInt(BlobstoreInputFormat.TERMINATOR, DEFAULT_TERMINATOR);
    Preconditions.checkState(Byte.MIN_VALUE <= intTerminator && intTerminator <= Byte.MAX_VALUE,
            BlobstoreInputFormat.TERMINATOR + " is not in [" + Byte.MIN_VALUE + ", " + Byte.MAX_VALUE
                    + "] range.");
    terminator = (byte) intTerminator;
    input = getInputStream(this.split, offset);
    recordIterator = getInputStreamIterator(input, this.split, offset, terminator);
}

From source file:com.hadoop.mapreduce.LzoLineRecordReader.java

License:Open Source License

@Override
public void initialize(InputSplit genericSplit, TaskAttemptContext context)
        throws IOException, InterruptedException {
    FileSplit split = (FileSplit) genericSplit;
    start = split.getStart();/* w  ww.  ja  va 2  s . c o m*/
    end = start + split.getLength();
    final Path file = split.getPath();
    Configuration job = context.getConfiguration();

    FileSystem fs = file.getFileSystem(job);
    CompressionCodecFactory compressionCodecs = new CompressionCodecFactory(job);
    final CompressionCodec codec = compressionCodecs.getCodec(file);
    if (codec == null) {
        throw new IOException("No codec for file " + file + " not found, cannot run");
    }

    // open the file and seek to the start of the split
    fileIn = fs.open(split.getPath());

    // creates input stream and also reads the file header
    in = new LineReader(codec.createInputStream(fileIn), job);

    if (start != 0) {
        fileIn.seek(start);

        // read and ignore the first line
        in.readLine(new Text());
        start = fileIn.getPos();
    }

    this.pos = start;
}