Example usage for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration.

Prototype

public Configuration getConfiguration();

Source Link

Document

Return the configuration for the job.

Usage

From source file:fr.ens.biologie.genomique.eoulsan.bio.io.hadoop.FastqLineRecordReader.java

License:Apache License

public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException {
    FileSplit split = (FileSplit) genericSplit;
    Configuration job = context.getConfiguration();
    this.maxLineLength = job.getInt(MAX_LINE_LENGTH, Integer.MAX_VALUE);
    start = split.getStart();/*w w  w.j  av  a  2s.c o  m*/
    end = start + split.getLength();
    final Path file = split.getPath();

    // open the file and seek to the start of the split
    final FileSystem fs = file.getFileSystem(job);
    fileIn = fs.open(file);

    CompressionCodec codec = new CompressionCodecFactory(job).getCodec(file);
    if (null != codec) {
        isCompressedInput = true;
        decompressor = CodecPool.getDecompressor(codec);
        if (codec instanceof SplittableCompressionCodec) {
            final SplitCompressionInputStream cIn = ((SplittableCompressionCodec) codec).createInputStream(
                    fileIn, decompressor, start, end, SplittableCompressionCodec.READ_MODE.BYBLOCK);
            in = new CompressedSplitFastqLineReader(cIn, job, this.recordDelimiterBytes);
            start = cIn.getAdjustedStart();
            end = cIn.getAdjustedEnd();
            filePosition = cIn;
        } else {
            in = new SplitLineReader(codec.createInputStream(fileIn, decompressor), job,
                    this.recordDelimiterBytes);
            filePosition = fileIn;
        }
    } else {
        fileIn.seek(start);
        in = new SplitLineReader(fileIn, job, this.recordDelimiterBytes);
        filePosition = fileIn;
    }
    // If this is not the first split, we always throw away first record
    // because we always (except the last split) read one extra line in
    // next() method.
    if (start != 0) {
        start += in.readLine(new Text(), 0, maxBytesToConsume(start));
    }
    this.pos = start;
}

From source file:fr.ens.biologie.genomique.eoulsan.bio.io.hadoop.FastqOutputFormat.java

License:LGPL

@Override
public RecordWriter<Text, Text> getRecordWriter(TaskAttemptContext context)
        throws IOException, InterruptedException {

    Configuration conf = context.getConfiguration();
    boolean isCompressed = getCompressOutput(context);

    CompressionCodec codec = null;/*ww w. j a v  a 2  s .  c  o  m*/
    String extension = "";

    if (isCompressed) {
        Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(context, GzipCodec.class);
        codec = ReflectionUtils.newInstance(codecClass, conf);
        extension = codec.getDefaultExtension();
    }

    // Get the output file path
    final Path file = getDefaultWorkFile(context, extension);

    final FileSystem fs = file.getFileSystem(conf);
    if (!isCompressed) {

        FSDataOutputStream fileOut = fs.create(file, false);
        return new FastqRecordWriter(context, fileOut);
    } else {

        FSDataOutputStream fileOut = fs.create(file, false);
        return new FastqRecordWriter(context, new DataOutputStream(codec.createOutputStream(fileOut)));
    }
}

From source file:fr.ens.biologie.genomique.eoulsan.bio.io.hadoop.SAMOutputFormat.java

License:LGPL

@Override
public RecordWriter<Text, Text> getRecordWriter(TaskAttemptContext context)
        throws IOException, InterruptedException {

    Configuration conf = context.getConfiguration();
    boolean isCompressed = getCompressOutput(context);

    CompressionCodec codec = null;//from  w w w. j av a  2 s.  co  m
    String extension = "";

    if (isCompressed) {
        Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(context, GzipCodec.class);
        codec = ReflectionUtils.newInstance(codecClass, conf);
        extension = codec.getDefaultExtension();
    }

    // Get the output file path
    final Path file = getDefaultWorkFile(context, extension);

    final FileSystem fs = file.getFileSystem(conf);
    if (!isCompressed) {

        FSDataOutputStream fileOut = fs.create(file, false);
        return new SAMRecordWriter(context, fileOut);
    } else {

        FSDataOutputStream fileOut = fs.create(file, false);
        return new SAMRecordWriter(context, new DataOutputStream(codec.createOutputStream(fileOut)));
    }
}

From source file:gaffer.accumulo.inputformat.BatchScannerElementInputFormat.java

License:Apache License

@Override
public RecordReader<GraphElement, SetOfStatistics> createRecordReader(InputSplit split,
        TaskAttemptContext context) throws IOException, InterruptedException {
    log.setLevel(getLogLevel(context));//  w ww .j  a  v  a  2 s.com
    String serialisedPostRollUpTransform = context.getConfiguration().get(POST_ROLL_UP_TRANSFORM);
    if (serialisedPostRollUpTransform != null) {
        try {
            Transform transform = (Transform) WritableToStringConverter
                    .deserialiseFromString(serialisedPostRollUpTransform);
            return new BatchScannerRecordReader(transform);
        } catch (IOException e) {
            throw new IllegalArgumentException(
                    "Unable to deserialise a Transform from the string: " + serialisedPostRollUpTransform);
        } catch (ClassCastException e) {
            throw new IllegalArgumentException(
                    "Unable to deserialise a Transform from the string: " + serialisedPostRollUpTransform);
        }
    }
    return new BatchScannerRecordReader();
}

From source file:gaffer.accumulo.inputformat.ElementInputFormat.java

License:Apache License

@Override
public RecordReader<GraphElement, SetOfStatistics> createRecordReader(InputSplit split,
        TaskAttemptContext context) throws IOException, InterruptedException {
    log.setLevel(getLogLevel(context));//from   w w  w  .jav a 2 s.co m
    String serialisedPostRollUpTransform = context.getConfiguration().get(POST_ROLL_UP_TRANSFORM);
    if (serialisedPostRollUpTransform != null) {
        try {
            Transform transform = (Transform) WritableToStringConverter
                    .deserialiseFromString(serialisedPostRollUpTransform);
            return new ElementWithStatisticsRecordReader(transform);
        } catch (IOException e) {
            throw new IllegalArgumentException(
                    "Unable to deserialise a Transform from the string: " + serialisedPostRollUpTransform);
        } catch (ClassCastException e) {
            throw new IllegalArgumentException(
                    "Unable to deserialise a Transform from the string: " + serialisedPostRollUpTransform);
        }
    }
    return new ElementWithStatisticsRecordReader();
}

From source file:gaffer.accumulostore.inputformat.ElementInputFormat.java

License:Apache License

@Override
public RecordReader<Element, NullWritable> createRecordReader(final InputSplit split,
        final TaskAttemptContext context) throws IOException, InterruptedException {
    log.setLevel(getLogLevel(context));//from ww w.j  a  v a  2s.  c  o m
    final Configuration conf = context.getConfiguration();
    final String keyPackageClass = conf.get(KEY_PACKAGE);
    final Schema schema = Schema.fromJson(conf.get(SCHEMA).getBytes(CommonConstants.UTF_8));
    final View view = View.fromJson(conf.get(VIEW).getBytes(CommonConstants.UTF_8));
    try {
        return new ElementWithPropertiesRecordReader(keyPackageClass, schema, view);
    } catch (final StoreException | SchemaException | SerialisationException e) {
        throw new IOException("Exception creating RecordReader", e);
    }
}

From source file:gobblin.compaction.mapreduce.avro.AvroKeyCombineFileRecordReader.java

License:Apache License

private static Schema getSchema(CombineFileSplit split, TaskAttemptContext cx, Integer idx) throws IOException {
    Schema schema = AvroJob.getInputKeySchema(cx.getConfiguration());
    if (schema != null) {
        return schema;
    }//  w  w w  .  jav  a  2 s  . co  m

    Path path = split.getPath(idx);
    FileSystem fs = path.getFileSystem(cx.getConfiguration());
    return AvroUtils.getSchemaFromDataFile(path, fs);
}

From source file:gobblin.compaction.mapreduce.avro.AvroKeyCompactorOutputCommitter.java

License:Apache License

/**
 * Commits the task, moving files to their final committed location by delegating to
 * {@link FileOutputCommitter} to perform the actual moving. First, renames the
 * files to include the count of records contained within the file and a timestamp,
 * in the form {recordCount}.{timestamp}.avro. Then, the files are moved to their
 * committed location.//  w ww . j  av a 2  s . c o  m
 */
@Override
public void commitTask(TaskAttemptContext context) throws IOException {
    Path workPath = getWorkPath();
    FileSystem fs = workPath.getFileSystem(context.getConfiguration());

    if (fs.exists(workPath)) {
        long recordCount = getRecordCountFromCounter(context, AvroKeyDedupReducer.EVENT_COUNTER.RECORD_COUNT);
        String fileNamePrefix;
        if (recordCount == 0) {

            // recordCount == 0 indicates that it is a map-only, non-dedup job, and thus record count should
            // be obtained from mapper counter.
            fileNamePrefix = CompactionRecordCountProvider.M_OUTPUT_FILE_PREFIX;
            recordCount = getRecordCountFromCounter(context, AvroKeyMapper.EVENT_COUNTER.RECORD_COUNT);
        } else {
            fileNamePrefix = CompactionRecordCountProvider.MR_OUTPUT_FILE_PREFIX;
        }
        String fileName = CompactionRecordCountProvider.constructFileName(fileNamePrefix, recordCount);

        for (FileStatus status : fs.listStatus(workPath, new PathFilter() {
            @Override
            public boolean accept(Path path) {
                return FilenameUtils.isExtension(path.getName(), "avro");
            }
        })) {
            Path newPath = new Path(status.getPath().getParent(), fileName);
            LOG.info(String.format("Renaming %s to %s", status.getPath(), newPath));
            fs.rename(status.getPath(), newPath);
        }
    }

    super.commitTask(context);
}

From source file:gov.jgi.meta.hadoop.input.FastaBlockRecordReader.java

License:Open Source License

public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException {

    LOG.info("initializing FastaBlockRecordReader");

    FileSplit split = (FileSplit) genericSplit;
    Configuration job = context.getConfiguration();
    this.maxLineLength = job.getInt("mapred.linerecordreader.maxlength", Integer.MAX_VALUE);
    start = split.getStart();//from w w w  .j  a  va2  s .  c om
    end = start + split.getLength();
    final Path file = split.getPath();
    compressionCodecs = new CompressionCodecFactory(job);
    final CompressionCodec codec = compressionCodecs.getCodec(file);

    // open the file and seek to the start of the split
    FileSystem fs = file.getFileSystem(job);
    FSDataInputStream fileIn = fs.open(split.getPath());
    boolean skipFirstLine = false;
    if (codec != null) {
        in = new FastaBlockLineReader(codec.createInputStream(fileIn), job);
        end = Long.MAX_VALUE;
    } else {
        if (start != 0) {
            skipFirstLine = false; // don't do this!
            //--start;                      or this
            fileIn.seek(start);
        }
        in = new FastaBlockLineReader(fileIn, job);
    }
    this.pos = start;
}

From source file:gov.jgi.meta.hadoop.input.FastaRecordReader.java

License:Open Source License

public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException {
    FileSplit split = (FileSplit) genericSplit;
    Configuration job = context.getConfiguration();
    this.maxLineLength = job.getInt("mapred.linerecordreader.maxlength", Integer.MAX_VALUE);
    start = split.getStart();/*ww w  .  j a v a2  s  .c  om*/
    end = start + split.getLength();
    final Path file = split.getPath();
    compressionCodecs = new CompressionCodecFactory(job);
    final CompressionCodec codec = compressionCodecs.getCodec(file);

    // open the file and seek to the start of the split
    FileSystem fs = file.getFileSystem(job);
    FSDataInputStream fileIn = fs.open(split.getPath());
    boolean skipFirstLine = false;
    if (codec != null) {
        in = new FastaLineReader(codec.createInputStream(fileIn), job);
        end = Long.MAX_VALUE;
    } else {
        if (start != 0) {
            skipFirstLine = false; // don't do this!
            //--start;                      or this
            fileIn.seek(start);
        }
        in = new FastaLineReader(fileIn, job);
    }
    if (skipFirstLine) { // skip first line and re-establish "start".
        start += in.readLine(new Text(), new Text(), 0, (int) Math.min((long) Integer.MAX_VALUE, end - start));
    }
    this.pos = start;
}