Example usage for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration

List of usage examples for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration.

Prototype

public Configuration getConfiguration();

Source Link

Document

Return the configuration for the job.

Usage

From source file:fr.ens.biologie.genomique.eoulsan.bio.io.hadoop.FastqLineRecordReader.java

License:Apache License

public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException {
    FileSplit split = (FileSplit) genericSplit;
    Configuration job = context.getConfiguration();
    this.maxLineLength = job.getInt(MAX_LINE_LENGTH, Integer.MAX_VALUE);
    start = split.getStart();/*w w  w.j  av  a  2s.c o  m*/
    end = start + split.getLength();
    final Path file = split.getPath();

    // open the file and seek to the start of the split
    final FileSystem fs = file.getFileSystem(job);
    fileIn = fs.open(file);

    CompressionCodec codec = new CompressionCodecFactory(job).getCodec(file);
    if (null != codec) {
        isCompressedInput = true;
        decompressor = CodecPool.getDecompressor(codec);
        if (codec instanceof SplittableCompressionCodec) {
            final SplitCompressionInputStream cIn = ((SplittableCompressionCodec) codec).createInputStream(
                    fileIn, decompressor, start, end, SplittableCompressionCodec.READ_MODE.BYBLOCK);
            in = new CompressedSplitFastqLineReader(cIn, job, this.recordDelimiterBytes);
            start = cIn.getAdjustedStart();
            end = cIn.getAdjustedEnd();
            filePosition = cIn;
        } else {
            in = new SplitLineReader(codec.createInputStream(fileIn, decompressor), job,
                    this.recordDelimiterBytes);
            filePosition = fileIn;
        }
    } else {
        fileIn.seek(start);
        in = new SplitLineReader(fileIn, job, this.recordDelimiterBytes);
        filePosition = fileIn;
    }
    // If this is not the first split, we always throw away first record
    // because we always (except the last split) read one extra line in
    // next() method.
    if (start != 0) {
        start += in.readLine(new Text(), 0, maxBytesToConsume(start));
    }
    this.pos = start;
}

From source file:fr.ens.biologie.genomique.eoulsan.bio.io.hadoop.FastqOutputFormat.java

License:LGPL

@Override
public RecordWriter<Text, Text> getRecordWriter(TaskAttemptContext context)
        throws IOException, InterruptedException {

    Configuration conf = context.getConfiguration();
    boolean isCompressed = getCompressOutput(context);

    CompressionCodec codec = null;/*ww w. j a v  a 2  s .  c  o  m*/
    String extension = "";

    if (isCompressed) {
        Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(context, GzipCodec.class);
        codec = ReflectionUtils.newInstance(codecClass, conf);
        extension = codec.getDefaultExtension();
    }

    // Get the output file path
    final Path file = getDefaultWorkFile(context, extension);

    final FileSystem fs = file.getFileSystem(conf);
    if (!isCompressed) {

        FSDataOutputStream fileOut = fs.create(file, false);
        return new FastqRecordWriter(context, fileOut);
    } else {

        FSDataOutputStream fileOut = fs.create(file, false);
        return new FastqRecordWriter(context, new DataOutputStream(codec.createOutputStream(fileOut)));
    }
}

From source file:fr.ens.biologie.genomique.eoulsan.bio.io.hadoop.SAMOutputFormat.java

License:LGPL

@Override
public RecordWriter<Text, Text> getRecordWriter(TaskAttemptContext context)
        throws IOException, InterruptedException {

    Configuration conf = context.getConfiguration();
    boolean isCompressed = getCompressOutput(context);

    CompressionCodec codec = null;//from  w w w. j av a  2 s.  co  m
    String extension = "";

    if (isCompressed) {
        Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(context, GzipCodec.class);
        codec = ReflectionUtils.newInstance(codecClass, conf);
        extension = codec.getDefaultExtension();
    }

    // Get the output file path
    final Path file = getDefaultWorkFile(context, extension);

    final FileSystem fs = file.getFileSystem(conf);
    if (!isCompressed) {

        FSDataOutputStream fileOut = fs.create(file, false);
        return new SAMRecordWriter(context, fileOut);
    } else {

        FSDataOutputStream fileOut = fs.create(file, false);
        return new SAMRecordWriter(context, new DataOutputStream(codec.createOutputStream(fileOut)));
    }
}

From source file:gaffer.accumulo.inputformat.BatchScannerElementInputFormat.java

License:Apache License

@Override
public RecordReader<GraphElement, SetOfStatistics> createRecordReader(InputSplit split,
        TaskAttemptContext context) throws IOException, InterruptedException {
    log.setLevel(getLogLevel(context));//  w ww .j  a  v  a  2 s.com
    String serialisedPostRollUpTransform = context.getConfiguration().get(POST_ROLL_UP_TRANSFORM);
    if (serialisedPostRollUpTransform != null) {
        try {
            Transform transform = (Transform) WritableToStringConverter
                    .deserialiseFromString(serialisedPostRollUpTransform);
            return new BatchScannerRecordReader(transform);
        } catch (IOException e) {
            throw new IllegalArgumentException(
                    "Unable to deserialise a Transform from the string: " + serialisedPostRollUpTransform);
        } catch (ClassCastException e) {
            throw new IllegalArgumentException(
                    "Unable to deserialise a Transform from the string: " + serialisedPostRollUpTransform);
        }
    }
    return new BatchScannerRecordReader();
}

From source file:gaffer.accumulo.inputformat.ElementInputFormat.java

License:Apache License

@Override
public RecordReader<GraphElement, SetOfStatistics> createRecordReader(InputSplit split,
        TaskAttemptContext context) throws IOException, InterruptedException {
    log.setLevel(getLogLevel(context));//from   w w  w  .jav a 2 s.co m
    String serialisedPostRollUpTransform = context.getConfiguration().get(POST_ROLL_UP_TRANSFORM);
    if (serialisedPostRollUpTransform != null) {
        try {
            Transform transform = (Transform) WritableToStringConverter
                    .deserialiseFromString(serialisedPostRollUpTransform);
            return new ElementWithStatisticsRecordReader(transform);
        } catch (IOException e) {
            throw new IllegalArgumentException(
                    "Unable to deserialise a Transform from the string: " + serialisedPostRollUpTransform);
        } catch (ClassCastException e) {
            throw new IllegalArgumentException(
                    "Unable to deserialise a Transform from the string: " + serialisedPostRollUpTransform);
        }
    }
    return new ElementWithStatisticsRecordReader();
}

From source file:gaffer.accumulostore.inputformat.ElementInputFormat.java

License:Apache License

@Override
public RecordReader<Element, NullWritable> createRecordReader(final InputSplit split,
        final TaskAttemptContext context) throws IOException, InterruptedException {
    log.setLevel(getLogLevel(context));//from ww w.j  a  v a  2s.  c  o m
    final Configuration conf = context.getConfiguration();
    final String keyPackageClass = conf.get(KEY_PACKAGE);
    final Schema schema = Schema.fromJson(conf.get(SCHEMA).getBytes(CommonConstants.UTF_8));
    final View view = View.fromJson(conf.get(VIEW).getBytes(CommonConstants.UTF_8));
    try {
        return new ElementWithPropertiesRecordReader(keyPackageClass, schema, view);
    } catch (final StoreException | SchemaException | SerialisationException e) {
        throw new IOException("Exception creating RecordReader", e);
    }
}

From source file:gobblin.compaction.mapreduce.avro.AvroKeyCombineFileRecordReader.java

License:Apache License

private static Schema getSchema(CombineFileSplit split, TaskAttemptContext cx, Integer idx) throws IOException {
    Schema schema = AvroJob.getInputKeySchema(cx.getConfiguration());
    if (schema != null) {
        return schema;
    }//  w  w w  .  jav  a  2 s  . co  m

    Path path = split.getPath(idx);
    FileSystem fs = path.getFileSystem(cx.getConfiguration());
    return AvroUtils.getSchemaFromDataFile(path, fs);
}

From source file:gobblin.compaction.mapreduce.avro.AvroKeyCompactorOutputCommitter.java

License:Apache License

/**
 * Commits the task, moving files to their final committed location by delegating to
 * {@link FileOutputCommitter} to perform the actual moving. First, renames the
 * files to include the count of records contained within the file and a timestamp,
 * in the form {recordCount}.{timestamp}.avro. Then, the files are moved to their
 * committed location.//  w ww . j  av a 2  s . c o  m
 */
@Override
public void commitTask(TaskAttemptContext context) throws IOException {
    Path workPath = getWorkPath();
    FileSystem fs = workPath.getFileSystem(context.getConfiguration());

    if (fs.exists(workPath)) {
        long recordCount = getRecordCountFromCounter(context, AvroKeyDedupReducer.EVENT_COUNTER.RECORD_COUNT);
        String fileNamePrefix;
        if (recordCount == 0) {

            // recordCount == 0 indicates that it is a map-only, non-dedup job, and thus record count should
            // be obtained from mapper counter.
            fileNamePrefix = CompactionRecordCountProvider.M_OUTPUT_FILE_PREFIX;
            recordCount = getRecordCountFromCounter(context, AvroKeyMapper.EVENT_COUNTER.RECORD_COUNT);
        } else {
            fileNamePrefix = CompactionRecordCountProvider.MR_OUTPUT_FILE_PREFIX;
        }
        String fileName = CompactionRecordCountProvider.constructFileName(fileNamePrefix, recordCount);

        for (FileStatus status : fs.listStatus(workPath, new PathFilter() {
            @Override
            public boolean accept(Path path) {
                return FilenameUtils.isExtension(path.getName(), "avro");
            }
        })) {
            Path newPath = new Path(status.getPath().getParent(), fileName);
            LOG.info(String.format("Renaming %s to %s", status.getPath(), newPath));
            fs.rename(status.getPath(), newPath);
        }
    }

    super.commitTask(context);
}

From source file:gov.jgi.meta.hadoop.input.FastaBlockRecordReader.java

License:Open Source License

public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException {

    LOG.info("initializing FastaBlockRecordReader");

    FileSplit split = (FileSplit) genericSplit;
    Configuration job = context.getConfiguration();
    this.maxLineLength = job.getInt("mapred.linerecordreader.maxlength", Integer.MAX_VALUE);
    start = split.getStart();//from w w w  .j  a  va2  s .  c om
    end = start + split.getLength();
    final Path file = split.getPath();
    compressionCodecs = new CompressionCodecFactory(job);
    final CompressionCodec codec = compressionCodecs.getCodec(file);

    // open the file and seek to the start of the split
    FileSystem fs = file.getFileSystem(job);
    FSDataInputStream fileIn = fs.open(split.getPath());
    boolean skipFirstLine = false;
    if (codec != null) {
        in = new FastaBlockLineReader(codec.createInputStream(fileIn), job);
        end = Long.MAX_VALUE;
    } else {
        if (start != 0) {
            skipFirstLine = false; // don't do this!
            //--start;                      or this
            fileIn.seek(start);
        }
        in = new FastaBlockLineReader(fileIn, job);
    }
    this.pos = start;
}

From source file:gov.jgi.meta.hadoop.input.FastaRecordReader.java

License:Open Source License

public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException {
    FileSplit split = (FileSplit) genericSplit;
    Configuration job = context.getConfiguration();
    this.maxLineLength = job.getInt("mapred.linerecordreader.maxlength", Integer.MAX_VALUE);
    start = split.getStart();/*ww w  .  j a v a2  s  .c  om*/
    end = start + split.getLength();
    final Path file = split.getPath();
    compressionCodecs = new CompressionCodecFactory(job);
    final CompressionCodec codec = compressionCodecs.getCodec(file);

    // open the file and seek to the start of the split
    FileSystem fs = file.getFileSystem(job);
    FSDataInputStream fileIn = fs.open(split.getPath());
    boolean skipFirstLine = false;
    if (codec != null) {
        in = new FastaLineReader(codec.createInputStream(fileIn), job);
        end = Long.MAX_VALUE;
    } else {
        if (start != 0) {
            skipFirstLine = false; // don't do this!
            //--start;                      or this
            fileIn.seek(start);
        }
        in = new FastaLineReader(fileIn, job);
    }
    if (skipFirstLine) { // skip first line and re-establish "start".
        start += in.readLine(new Text(), new Text(), 0, (int) Math.min((long) Integer.MAX_VALUE, end - start));
    }
    this.pos = start;
}