Example usage for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration.

Prototype

public Configuration getConfiguration();

Source Link

Document

Return the configuration for the job.

Usage

From source file:com.dinglicom.clouder.mapreduce.input.LineRecordReader.java

License:Apache License

public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException {
    FileSplit split = (FileSplit) genericSplit;
    System.out.println("-------------------length:" + split.getLength() + "\tposition:" + split.getStart());
    Configuration job = context.getConfiguration();
    this.maxLineLength = job.getInt(MAX_LINE_LENGTH, Integer.MAX_VALUE);
    start = split.getStart();/*from   w  ww.j  a  v a2s . c o  m*/
    end = start + split.getLength();
    final Path file = split.getPath();
    key = new Text(FileToCDRType.getTypeByPath(file.getName()));
    compressionCodecs = new CompressionCodecFactory(job);
    codec = compressionCodecs.getCodec(file);

    // open the file and seek to the start of the split
    final FileSystem fs = file.getFileSystem(job);
    fileIn = fs.open(file);
    if (isCompressedInput()) {
        decompressor = CodecPool.getDecompressor(codec);
        if (codec instanceof SplittableCompressionCodec) {
            final SplitCompressionInputStream cIn = ((SplittableCompressionCodec) codec).createInputStream(
                    fileIn, decompressor, start, end, SplittableCompressionCodec.READ_MODE.BYBLOCK);
            if (null == this.recordDelimiterBytes) {
                in = new LineReader(cIn, job);
            } else {
                in = new LineReader(cIn, job, this.recordDelimiterBytes);
            }

            start = cIn.getAdjustedStart();
            end = cIn.getAdjustedEnd();
            filePosition = cIn;
        } else {
            if (null == this.recordDelimiterBytes) {
                in = new LineReader(codec.createInputStream(fileIn, decompressor), job);
            } else {
                in = new LineReader(codec.createInputStream(fileIn, decompressor), job,
                        this.recordDelimiterBytes);
            }
            filePosition = fileIn;
        }
    } else {
        fileIn.seek(start);
        if (null == this.recordDelimiterBytes) {
            in = new LineReader(fileIn, job);
        } else {
            in = new LineReader(fileIn, job, this.recordDelimiterBytes);
        }

        filePosition = fileIn;
    }
    // If this is not the first split, we always throw away first record
    // because we always (except the last split) read one extra line in
    // next() method.
    if (start != 0) {
        start += in.readLine(new Text(), 0, maxBytesToConsume(start));
    }
    this.pos = start;
}

From source file:com.dinglicom.clouder.mapreduce.input.TextInputFormat.java

License:Apache License

@Override
public RecordReader<Text, Text> createRecordReader(InputSplit split, TaskAttemptContext context) {
    String delimiter = context.getConfiguration().get("textinputformat.record.delimiter");
    byte[] recordDelimiterBytes = null;
    if (null != delimiter)
        recordDelimiterBytes = delimiter.getBytes(Charsets.UTF_8);
    return new LineRecordReader(recordDelimiterBytes);
}

From source file:com.edwardsit.spark4n6.EWFRecordReader.java

License:Apache License

@Override
public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
    FileSplit fileSplit = (FileSplit) split;
    Configuration conf = context.getConfiguration();
    file = fileSplit.getPath();/*from   w  w  w .  jav  a 2 s  .co m*/
    start = fileSplit.getStart();
    end = start + fileSplit.getLength();
    fs = file.getFileSystem(conf);
    stream = new EWFFileReader(fs, getFirstFile());
    chunkSize = new EWFSegmentFileReader(fs).DEFAULT_CHUNK_SIZE;
    log.setLevel(Level.DEBUG);
}

From source file:com.ery.hadoop.mrddx.hbase.HbaseOutputFormat.java

License:Apache License

@Override
public RecordWriter<K, NullWritable> getRecordWriter(TaskAttemptContext context)
        throws IOException, InterruptedException {
    // public RecordWriter<K, NullWritable> getRecordWriter(FileSystem
    // ignored, JobConf job, String name,
    // Progressable progress) throws IOException {
    HbaseConfiguration dbConf = new HbaseConfiguration(context.getConfiguration(),
            HbaseConfiguration.FLAG_HBASE_OUTPUT);
    HTable table = new HTable(dbConf.getConf(), dbConf.getOutputHBaseTableName());
    table.setAutoFlush(false, true);//from   w  w  w. j  a  v  a  2 s . c  o m
    table.setWriteBufferSize(dbConf.getOutputHBaseWriteBuffersize());
    return new HbaseRecordWriter<K, NullWritable>(dbConf, table);
}

From source file:com.ery.hadoop.mrddx.hFile.LineRecordReader.java

License:Apache License

public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException {
    FileSplit split = (FileSplit) genericSplit;
    Configuration job = context.getConfiguration();
    this.maxLineLength = job.getInt("mapred.linerecordreader.maxlength", Integer.MAX_VALUE);
    start = split.getStart();//from  ww w  .ja  v  a2  s .c  o m
    end = start + split.getLength();
    final Path file = split.getPath();
    compressionCodecs = new CompressionCodecFactory(job);
    codec = compressionCodecs.getCodec(file);

    // open the file and seek to the start of the split
    FileSystem fs = file.getFileSystem(job);
    FSDataInputStream fileIn = fs.open(split.getPath());

    if (isCompressedInput()) {
        decompressor = CodecPool.getDecompressor(codec);
        if (codec instanceof SplittableCompressionCodec) {
            final SplitCompressionInputStream cIn = ((SplittableCompressionCodec) codec).createInputStream(
                    fileIn, decompressor, start, end, SplittableCompressionCodec.READ_MODE.BYBLOCK);
            // tar.gzTarInputStream
            // new TarInputStream(codec.createInputStream(fileIn, decompressor)
            String filename = file.getName();
            if (filename.endsWith(".tar.gz")) {
                in = new LineReader(new TarInputStream(cIn), job);
            } else {
                in = new LineReader(cIn, job);
            }
            start = cIn.getAdjustedStart();
            end = cIn.getAdjustedEnd();
            filePosition = cIn;
        } else {
            String filename = file.getName();
            if (filename.endsWith(".tar.gz")) {
                in = new LineReader(new TarInputStream(codec.createInputStream(fileIn, decompressor)), job);
            } else {
                in = new LineReader(codec.createInputStream(fileIn, decompressor), job);
            }
            filePosition = fileIn;
        }
    } else {
        fileIn.seek(start);
        in = new LineReader(fileIn, job);
        filePosition = fileIn;
    }
    // If this is not the first split, we always throw away first record
    // because we always (except the last split) read one extra line in
    // next() method.
    if (start != 0) {
        start += in.readLine(new Text(), 0, maxBytesToConsume(start));
    }
    this.pos = start;
}

From source file:com.explorys.apothecary.hbase.mr.inputformat.MergedStoreFileRecordReader.java

License:Apache License

/**
 * In initialize we check the number of files in the table directory.
 * This is an invariant that will cause us to fail the job if it changes, because it is
 * likely that when this changes, the number of regions has changed as a result of a split.
 * //www .  j  a v a  2s .  c o m
 * We also initialize a scanner that handles merging KeyValues from storefiles 
 * (or Memstore although this is not currently accessible via the API)
 *  
 */
@Override
public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {

    scanner = new CachedPeekHFilesScanner(region, scan);
    tableDir = region.getTableDir();
    fs = FileSystem.get(context.getConfiguration());
    int numFiles = HRegionUtil.lsRecursive(fs, tableDir).size();

    int numFilesConf = context.getConfiguration().getInt(HFileResultInputFormat.NUMBER_TABLE_FILES, 0);
    if (numFiles != numFilesConf) {
        throw new IllegalStateException(
                "Number of files has shifted underneath the job. Possible region split?");
    }
    numberTableFiles = numFiles;
    scanner.initialize();
}

From source file:com.facebook.hiveio.common.HadoopUtils.java

License:Apache License

/**
 * Set worker output directory/*from  w  w  w  . j a  va2 s  .  c om*/
 * @param context Task context
 * @throws IOException I/O errors
 */
public static void setWorkOutputDir(TaskAttemptContext context) throws IOException {
    Configuration conf = context.getConfiguration();
    String outputPath = getOutputDir(conf);
    // we need to do this to get the task path and set it for mapred
    // implementation since it can't be done automatically because of
    // mapreduce->mapred abstraction
    if (outputPath != null) {
        FileOutputCommitter foc = new FileOutputCommitter(getOutputPath(conf), context);
        Path path = foc.getWorkPath();
        FileSystem fs = path.getFileSystem(conf);
        fs.mkdirs(path);
        conf.set("mapred.work.output.dir", path.toString());
        LOG.info("Setting mapred.work.output.dir to {}", path.toString());
    }
}

From source file:com.facebook.hiveio.input.HiveApiInputFormat.java

License:Apache License

@Override
public RecordReaderImpl createRecordReader(InputSplit inputSplit, TaskAttemptContext context)
        throws IOException, InterruptedException {
    Configuration conf = context.getConfiguration();
    JobConf jobConf = new JobConf(conf);

    HInputSplit split = (HInputSplit) inputSplit;
    split.setConf(jobConf);// w w w.  j a  va 2 s . c  o m

    int[] columnIds = split.getColumnIds();
    HiveUtils.setReadColumnIds(jobConf, columnIds);

    // CHECKSTYLE: stop LineLength
    org.apache.hadoop.mapred.RecordReader<WritableComparable, Writable> baseRecordReader = split
            .getBaseRecordReader(jobConf, context);
    // CHECKSTYLE: resume LineLength

    RecordParser<Writable> recordParser = getParser(baseRecordReader.createValue(), split, columnIds, conf);

    RecordReaderImpl reader = new RecordReaderImpl(baseRecordReader, recordParser);
    reader.setObserver(observer);

    return reader;
}

From source file:com.facebook.hiveio.output.HiveApiOutputFormat.java

License:Apache License

@Override
public RecordWriterImpl getRecordWriter(TaskAttemptContext taskAttemptContext)
        throws IOException, InterruptedException {
    HadoopUtils.setWorkOutputDir(taskAttemptContext);

    Configuration conf = taskAttemptContext.getConfiguration();
    OutputConf outputConf = new OutputConf(conf, myProfileId);

    OutputInfo oti = outputConf.readOutputTableInfo();

    HiveUtils.setRCileNumColumns(conf, oti.getColumnInfo().size());
    HadoopUtils.setOutputKeyWritableClass(conf, NullWritable.class);

    Serializer serializer = oti.createSerializer(conf);
    HadoopUtils.setOutputValueWritableClass(conf, serializer.getSerializedClass());

    org.apache.hadoop.mapred.OutputFormat baseOutputFormat = ReflectionUtils
            .newInstance(oti.getOutputFormatClass(), conf);
    // CHECKSTYLE: stop LineLength
    org.apache.hadoop.mapred.RecordWriter<WritableComparable, Writable> baseWriter = getBaseRecordWriter(
            taskAttemptContext, baseOutputFormat);
    // CHECKSTYLE: resume LineLength

    StructObjectInspector soi = Inspectors.createFor(oti.getColumnInfo());

    if (!outputConf.shouldResetSlowWrites()) {
        return new RecordWriterImpl(baseWriter, serializer, soi);
    } else {//from  ww w  .j ava  2  s . c om
        long writeTimeout = outputConf.getWriteResetTimeout();
        return new ResettableRecordWriterImpl(baseWriter, serializer, soi, taskAttemptContext, baseOutputFormat,
                writeTimeout);
    }
}

From source file:com.facebook.hiveio.output.HiveApiOutputFormat.java

License:Apache License

/**
 * Get the base Hadoop RecordWriter.//  ww w.  j a va2  s . c  om
 * @param taskAttemptContext TaskAttemptContext
 * @param baseOutputFormat Hadoop OutputFormat
 * @return RecordWriter
 * @throws IOException Hadoop issues
 */
// CHECKSTYLE: stop LineLengthCheck
protected static org.apache.hadoop.mapred.RecordWriter<WritableComparable, Writable> getBaseRecordWriter(
        TaskAttemptContext taskAttemptContext, org.apache.hadoop.mapred.OutputFormat baseOutputFormat)
        throws IOException {
    // CHECKSTYLE: resume LineLengthCheck
    HadoopUtils.setWorkOutputDir(taskAttemptContext);
    JobConf jobConf = new JobConf(taskAttemptContext.getConfiguration());
    int fileId = CREATED_FILES_COUNTER.incrementAndGet();
    String name = FileOutputFormat.getUniqueName(jobConf, "part-" + fileId);
    Reporter reporter = new ProgressReporter(taskAttemptContext);
    org.apache.hadoop.mapred.RecordWriter<WritableComparable, Writable> baseWriter = baseOutputFormat
            .getRecordWriter(null, jobConf, name, reporter);
    LOG.info("getBaseRecordWriter: Created new {} with file {}", baseWriter, name);
    return baseWriter;
}