Example usage for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration

List of usage examples for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration.

Prototype

public Configuration getConfiguration();

Source Link

Document

Return the configuration for the job.

Usage

From source file:com.dinglicom.clouder.mapreduce.input.LineRecordReader.java

License:Apache License

public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException {
    FileSplit split = (FileSplit) genericSplit;
    System.out.println("-------------------length:" + split.getLength() + "\tposition:" + split.getStart());
    Configuration job = context.getConfiguration();
    this.maxLineLength = job.getInt(MAX_LINE_LENGTH, Integer.MAX_VALUE);
    start = split.getStart();/*from   w  ww.j  a  v a2s . c o  m*/
    end = start + split.getLength();
    final Path file = split.getPath();
    key = new Text(FileToCDRType.getTypeByPath(file.getName()));
    compressionCodecs = new CompressionCodecFactory(job);
    codec = compressionCodecs.getCodec(file);

    // open the file and seek to the start of the split
    final FileSystem fs = file.getFileSystem(job);
    fileIn = fs.open(file);
    if (isCompressedInput()) {
        decompressor = CodecPool.getDecompressor(codec);
        if (codec instanceof SplittableCompressionCodec) {
            final SplitCompressionInputStream cIn = ((SplittableCompressionCodec) codec).createInputStream(
                    fileIn, decompressor, start, end, SplittableCompressionCodec.READ_MODE.BYBLOCK);
            if (null == this.recordDelimiterBytes) {
                in = new LineReader(cIn, job);
            } else {
                in = new LineReader(cIn, job, this.recordDelimiterBytes);
            }

            start = cIn.getAdjustedStart();
            end = cIn.getAdjustedEnd();
            filePosition = cIn;
        } else {
            if (null == this.recordDelimiterBytes) {
                in = new LineReader(codec.createInputStream(fileIn, decompressor), job);
            } else {
                in = new LineReader(codec.createInputStream(fileIn, decompressor), job,
                        this.recordDelimiterBytes);
            }
            filePosition = fileIn;
        }
    } else {
        fileIn.seek(start);
        if (null == this.recordDelimiterBytes) {
            in = new LineReader(fileIn, job);
        } else {
            in = new LineReader(fileIn, job, this.recordDelimiterBytes);
        }

        filePosition = fileIn;
    }
    // If this is not the first split, we always throw away first record
    // because we always (except the last split) read one extra line in
    // next() method.
    if (start != 0) {
        start += in.readLine(new Text(), 0, maxBytesToConsume(start));
    }
    this.pos = start;
}

From source file:com.dinglicom.clouder.mapreduce.input.TextInputFormat.java

License:Apache License

@Override
public RecordReader<Text, Text> createRecordReader(InputSplit split, TaskAttemptContext context) {
    String delimiter = context.getConfiguration().get("textinputformat.record.delimiter");
    byte[] recordDelimiterBytes = null;
    if (null != delimiter)
        recordDelimiterBytes = delimiter.getBytes(Charsets.UTF_8);
    return new LineRecordReader(recordDelimiterBytes);
}

From source file:com.edwardsit.spark4n6.EWFRecordReader.java

License:Apache License

@Override
public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
    FileSplit fileSplit = (FileSplit) split;
    Configuration conf = context.getConfiguration();
    file = fileSplit.getPath();/*from   w  w  w .  jav  a 2 s  .co m*/
    start = fileSplit.getStart();
    end = start + fileSplit.getLength();
    fs = file.getFileSystem(conf);
    stream = new EWFFileReader(fs, getFirstFile());
    chunkSize = new EWFSegmentFileReader(fs).DEFAULT_CHUNK_SIZE;
    log.setLevel(Level.DEBUG);
}

From source file:com.ery.hadoop.mrddx.hbase.HbaseOutputFormat.java

License:Apache License

@Override
public RecordWriter<K, NullWritable> getRecordWriter(TaskAttemptContext context)
        throws IOException, InterruptedException {
    // public RecordWriter<K, NullWritable> getRecordWriter(FileSystem
    // ignored, JobConf job, String name,
    // Progressable progress) throws IOException {
    HbaseConfiguration dbConf = new HbaseConfiguration(context.getConfiguration(),
            HbaseConfiguration.FLAG_HBASE_OUTPUT);
    HTable table = new HTable(dbConf.getConf(), dbConf.getOutputHBaseTableName());
    table.setAutoFlush(false, true);//from   w  w  w. j  a  v  a  2 s . c  o m
    table.setWriteBufferSize(dbConf.getOutputHBaseWriteBuffersize());
    return new HbaseRecordWriter<K, NullWritable>(dbConf, table);
}

From source file:com.ery.hadoop.mrddx.hFile.LineRecordReader.java

License:Apache License

public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException {
    FileSplit split = (FileSplit) genericSplit;
    Configuration job = context.getConfiguration();
    this.maxLineLength = job.getInt("mapred.linerecordreader.maxlength", Integer.MAX_VALUE);
    start = split.getStart();//from  ww w  .ja  v  a2  s .c  o m
    end = start + split.getLength();
    final Path file = split.getPath();
    compressionCodecs = new CompressionCodecFactory(job);
    codec = compressionCodecs.getCodec(file);

    // open the file and seek to the start of the split
    FileSystem fs = file.getFileSystem(job);
    FSDataInputStream fileIn = fs.open(split.getPath());

    if (isCompressedInput()) {
        decompressor = CodecPool.getDecompressor(codec);
        if (codec instanceof SplittableCompressionCodec) {
            final SplitCompressionInputStream cIn = ((SplittableCompressionCodec) codec).createInputStream(
                    fileIn, decompressor, start, end, SplittableCompressionCodec.READ_MODE.BYBLOCK);
            // tar.gzTarInputStream
            // new TarInputStream(codec.createInputStream(fileIn, decompressor)
            String filename = file.getName();
            if (filename.endsWith(".tar.gz")) {
                in = new LineReader(new TarInputStream(cIn), job);
            } else {
                in = new LineReader(cIn, job);
            }
            start = cIn.getAdjustedStart();
            end = cIn.getAdjustedEnd();
            filePosition = cIn;
        } else {
            String filename = file.getName();
            if (filename.endsWith(".tar.gz")) {
                in = new LineReader(new TarInputStream(codec.createInputStream(fileIn, decompressor)), job);
            } else {
                in = new LineReader(codec.createInputStream(fileIn, decompressor), job);
            }
            filePosition = fileIn;
        }
    } else {
        fileIn.seek(start);
        in = new LineReader(fileIn, job);
        filePosition = fileIn;
    }
    // If this is not the first split, we always throw away first record
    // because we always (except the last split) read one extra line in
    // next() method.
    if (start != 0) {
        start += in.readLine(new Text(), 0, maxBytesToConsume(start));
    }
    this.pos = start;
}

From source file:com.explorys.apothecary.hbase.mr.inputformat.MergedStoreFileRecordReader.java

License:Apache License

/**
 * In initialize we check the number of files in the table directory.
 * This is an invariant that will cause us to fail the job if it changes, because it is
 * likely that when this changes, the number of regions has changed as a result of a split.
 * //www .  j  a v a  2s .  c o m
 * We also initialize a scanner that handles merging KeyValues from storefiles 
 * (or Memstore although this is not currently accessible via the API)
 *  
 */
@Override
public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {

    scanner = new CachedPeekHFilesScanner(region, scan);
    tableDir = region.getTableDir();
    fs = FileSystem.get(context.getConfiguration());
    int numFiles = HRegionUtil.lsRecursive(fs, tableDir).size();

    int numFilesConf = context.getConfiguration().getInt(HFileResultInputFormat.NUMBER_TABLE_FILES, 0);
    if (numFiles != numFilesConf) {
        throw new IllegalStateException(
                "Number of files has shifted underneath the job. Possible region split?");
    }
    numberTableFiles = numFiles;
    scanner.initialize();
}

From source file:com.facebook.hiveio.common.HadoopUtils.java

License:Apache License

/**
 * Set worker output directory/*from  w  w  w  . j a  va2 s  .  c om*/
 * @param context Task context
 * @throws IOException I/O errors
 */
public static void setWorkOutputDir(TaskAttemptContext context) throws IOException {
    Configuration conf = context.getConfiguration();
    String outputPath = getOutputDir(conf);
    // we need to do this to get the task path and set it for mapred
    // implementation since it can't be done automatically because of
    // mapreduce->mapred abstraction
    if (outputPath != null) {
        FileOutputCommitter foc = new FileOutputCommitter(getOutputPath(conf), context);
        Path path = foc.getWorkPath();
        FileSystem fs = path.getFileSystem(conf);
        fs.mkdirs(path);
        conf.set("mapred.work.output.dir", path.toString());
        LOG.info("Setting mapred.work.output.dir to {}", path.toString());
    }
}

From source file:com.facebook.hiveio.input.HiveApiInputFormat.java

License:Apache License

@Override
public RecordReaderImpl createRecordReader(InputSplit inputSplit, TaskAttemptContext context)
        throws IOException, InterruptedException {
    Configuration conf = context.getConfiguration();
    JobConf jobConf = new JobConf(conf);

    HInputSplit split = (HInputSplit) inputSplit;
    split.setConf(jobConf);// w w w.  j a  va 2 s . c  o m

    int[] columnIds = split.getColumnIds();
    HiveUtils.setReadColumnIds(jobConf, columnIds);

    // CHECKSTYLE: stop LineLength
    org.apache.hadoop.mapred.RecordReader<WritableComparable, Writable> baseRecordReader = split
            .getBaseRecordReader(jobConf, context);
    // CHECKSTYLE: resume LineLength

    RecordParser<Writable> recordParser = getParser(baseRecordReader.createValue(), split, columnIds, conf);

    RecordReaderImpl reader = new RecordReaderImpl(baseRecordReader, recordParser);
    reader.setObserver(observer);

    return reader;
}

From source file:com.facebook.hiveio.output.HiveApiOutputFormat.java

License:Apache License

@Override
public RecordWriterImpl getRecordWriter(TaskAttemptContext taskAttemptContext)
        throws IOException, InterruptedException {
    HadoopUtils.setWorkOutputDir(taskAttemptContext);

    Configuration conf = taskAttemptContext.getConfiguration();
    OutputConf outputConf = new OutputConf(conf, myProfileId);

    OutputInfo oti = outputConf.readOutputTableInfo();

    HiveUtils.setRCileNumColumns(conf, oti.getColumnInfo().size());
    HadoopUtils.setOutputKeyWritableClass(conf, NullWritable.class);

    Serializer serializer = oti.createSerializer(conf);
    HadoopUtils.setOutputValueWritableClass(conf, serializer.getSerializedClass());

    org.apache.hadoop.mapred.OutputFormat baseOutputFormat = ReflectionUtils
            .newInstance(oti.getOutputFormatClass(), conf);
    // CHECKSTYLE: stop LineLength
    org.apache.hadoop.mapred.RecordWriter<WritableComparable, Writable> baseWriter = getBaseRecordWriter(
            taskAttemptContext, baseOutputFormat);
    // CHECKSTYLE: resume LineLength

    StructObjectInspector soi = Inspectors.createFor(oti.getColumnInfo());

    if (!outputConf.shouldResetSlowWrites()) {
        return new RecordWriterImpl(baseWriter, serializer, soi);
    } else {//from  ww w  .j ava  2  s . c om
        long writeTimeout = outputConf.getWriteResetTimeout();
        return new ResettableRecordWriterImpl(baseWriter, serializer, soi, taskAttemptContext, baseOutputFormat,
                writeTimeout);
    }
}

From source file:com.facebook.hiveio.output.HiveApiOutputFormat.java

License:Apache License

/**
 * Get the base Hadoop RecordWriter.//  ww w.  j a va2  s . c  om
 * @param taskAttemptContext TaskAttemptContext
 * @param baseOutputFormat Hadoop OutputFormat
 * @return RecordWriter
 * @throws IOException Hadoop issues
 */
// CHECKSTYLE: stop LineLengthCheck
protected static org.apache.hadoop.mapred.RecordWriter<WritableComparable, Writable> getBaseRecordWriter(
        TaskAttemptContext taskAttemptContext, org.apache.hadoop.mapred.OutputFormat baseOutputFormat)
        throws IOException {
    // CHECKSTYLE: resume LineLengthCheck
    HadoopUtils.setWorkOutputDir(taskAttemptContext);
    JobConf jobConf = new JobConf(taskAttemptContext.getConfiguration());
    int fileId = CREATED_FILES_COUNTER.incrementAndGet();
    String name = FileOutputFormat.getUniqueName(jobConf, "part-" + fileId);
    Reporter reporter = new ProgressReporter(taskAttemptContext);
    org.apache.hadoop.mapred.RecordWriter<WritableComparable, Writable> baseWriter = baseOutputFormat
            .getRecordWriter(null, jobConf, name, reporter);
    LOG.info("getBaseRecordWriter: Created new {} with file {}", baseWriter, name);
    return baseWriter;
}