Example usage for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration

List of usage examples for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration.

Prototype

public Configuration getConfiguration();

Source Link

Document

Return the configuration for the job.

Usage

From source file:brush.InterleavedFastqInputFormat.java

License:Apache License

/**
 * Creates the new record reader that underlies this input format.
 *
 * @param genericSplit The split that the record reader should read.
 * @param context The Hadoop task context.
 * @return Returns the interleaved FASTQ record reader.
 *///  ww  w . j av a 2s.c o  m
public RecordReader<Void, Text> createRecordReader(InputSplit genericSplit, TaskAttemptContext context)
        throws IOException, InterruptedException {
    context.setStatus(genericSplit.toString());

    // cast as per example in TextInputFormat
    return new InterleavedFastqRecordReader(context.getConfiguration(), (FileSplit) genericSplit);
}

From source file:ca.sparkera.adapters.mapreduce.MainframeVBRecordReader.java

License:Apache License

@Override
public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException {
    FileSplit split = (FileSplit) genericSplit;
    Configuration job = context.getConfiguration();
    final Path file = split.getPath();
    initialize(job, split.getStart(), split.getLength(), file);
}

From source file:chaohBIM.ZipFileRecordReader.java

License:Apache License

/**
 * Initialise and open the ZIP file from the FileSystem
 *///from w  ww.java  2s  .  com
@Override
public void initialize(InputSplit inputSplit, TaskAttemptContext taskAttemptContext)
        throws IOException, InterruptedException {
    FileSplit split = (FileSplit) inputSplit;
    Configuration conf = taskAttemptContext.getConfiguration();
    Path path = split.getPath();
    FileSystem fs = path.getFileSystem(conf);

    // Open the stream
    fsin = fs.open(path);
    zip = new ZipInputStream(fsin);

    zipfilename = path.getName().replaceAll(".zip", "");
    //System.out.println(zipfilename);
}

From source file:cloudbase.core.client.mapreduce.CloudbaseOutputFormatShim.java

License:Apache License

@Override
public RecordWriter<Text, Mutation> getRecordWriter(TaskAttemptContext attempt) throws IOException {
    if (isMock(attempt.getConfiguration())) {
        try {/*from   w w w  .  j av  a2s  .co m*/
            return new MockCloudbaseRecordWriter(attempt);
        } catch (Exception e) {
            throw new IOException(e);
        }
    }
    return super.getRecordWriter(attempt);
}

From source file:cn.edu.hfut.dmic.webcollector.fetcher.FetcherOutputFormat.java

@Override
public RecordWriter<Text, Writable> getRecordWriter(TaskAttemptContext tac)
        throws IOException, InterruptedException {
    Configuration conf = tac.getConfiguration();
    FileSystem fs = FileSystem.get(conf);
    String outputPath = conf.get("mapred.output.dir");

    Path fetchPath = new Path(outputPath, "fetch/info");
    Path contentPath = new Path(outputPath, "content/info");
    Path parseDataPath = new Path(outputPath, "parse/info");
    Path redirectPath = new Path(outputPath, "redirect/info");
    final SequenceFile.Writer fetchOut = new SequenceFile.Writer(fs, conf, fetchPath, Text.class,
            CrawlDatum.class);
    final SequenceFile.Writer contentOut = new SequenceFile.Writer(fs, conf, contentPath, Text.class,
            Content.class);
    final SequenceFile.Writer parseDataOut = new SequenceFile.Writer(fs, conf, parseDataPath, Text.class,
            CrawlDatum.class);
    final SequenceFile.Writer redirectOut = new SequenceFile.Writer(fs, conf, redirectPath, CrawlDatum.class,
            Text.class);

    return new RecordWriter<Text, Writable>() {

        @Override//w  w w .  j a  v  a2  s.c o m
        public void write(Text k, Writable v) throws IOException, InterruptedException {
            if (v instanceof CrawlDatum) {
                fetchOut.append(k, v);
            } else if (v instanceof Content) {
                contentOut.append(k, v);
            } else if (v instanceof ParseData) {

                ParseData parseData = (ParseData) v;
                CrawlDatums next = parseData.next;
                for (CrawlDatum datum : next) {
                    parseDataOut.append(new Text(datum.getKey()), datum);
                }

            } else if (v instanceof Redirect) {
                Redirect redirect = (Redirect) v;
                redirectOut.append(redirect.datum, new Text(redirect.realUrl));
            }
        }

        @Override
        public void close(TaskAttemptContext tac) throws IOException, InterruptedException {
            fetchOut.close();
            contentOut.close();
            parseDataOut.close();
            redirectOut.close();
        }
    };

}

From source file:cn.uc.hadoop.mapreduce.lib.input.FileNameLineRecordReader.java

License:Apache License

public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException {
    FileSplit split = (FileSplit) genericSplit;
    Configuration job = context.getConfiguration();
    this.maxLineLength = job.getInt(MAX_LINE_LENGTH, Integer.MAX_VALUE);
    start = split.getStart();// w w w  .  jav a2  s. co  m
    end = start + split.getLength();
    final Path file = split.getPath();
    //ADD by qiujw key??
    key = new Text(file.getName());

    compressionCodecs = new CompressionCodecFactory(job);
    codec = compressionCodecs.getCodec(file);

    // open the file and seek to the start of the split
    final FileSystem fs = file.getFileSystem(job);
    fileIn = fs.open(file);
    if (isCompressedInput()) {
        decompressor = CodecPool.getDecompressor(codec);
        if (codec instanceof SplittableCompressionCodec) {
            final SplitCompressionInputStream cIn = ((SplittableCompressionCodec) codec).createInputStream(
                    fileIn, decompressor, start, end, SplittableCompressionCodec.READ_MODE.BYBLOCK);
            if (null == this.recordDelimiterBytes) {
                in = new LineReader(cIn, job);
            } else {
                in = new LineReader(cIn, job, this.recordDelimiterBytes);
            }

            start = cIn.getAdjustedStart();
            end = cIn.getAdjustedEnd();
            filePosition = cIn;
        } else {
            if (null == this.recordDelimiterBytes) {
                in = new LineReader(codec.createInputStream(fileIn, decompressor), job);
            } else {
                in = new LineReader(codec.createInputStream(fileIn, decompressor), job,
                        this.recordDelimiterBytes);
            }
            filePosition = fileIn;
        }
    } else {
        fileIn.seek(start);
        if (null == this.recordDelimiterBytes) {
            in = new LineReader(fileIn, job);
        } else {
            in = new LineReader(fileIn, job, this.recordDelimiterBytes);
        }

        filePosition = fileIn;
    }
    // If this is not the first split, we always throw away first record
    // because we always (except the last split) read one extra line in
    // next() method.
    if (start != 0) {
        start += in.readLine(new Text(), 0, maxBytesToConsume(start));
    }
    this.pos = start;
}

From source file:cn.uc.hadoop.mapreduce.lib.input.FileNameTextInputFormat.java

License:Apache License

@Override
public RecordReader<Text, Text> createRecordReader(InputSplit split, TaskAttemptContext context) {
    String delimiter = context.getConfiguration().get("textinputformat.record.delimiter");
    byte[] recordDelimiterBytes = null;
    if (null != delimiter)
        recordDelimiterBytes = delimiter.getBytes(Charsets.UTF_8);
    return new FileNameLineRecordReader(recordDelimiterBytes);
}

From source file:cn.uc.hadoop.mapreduce.lib.input.FilePathLineRecordReader.java

License:Apache License

public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException {
    FileSplit split = (FileSplit) genericSplit;
    Configuration job = context.getConfiguration();
    this.maxLineLength = job.getInt(MAX_LINE_LENGTH, Integer.MAX_VALUE);
    start = split.getStart();/*from  w w  w. j  a  v a2  s  . c  o  m*/
    end = start + split.getLength();
    final Path file = split.getPath();
    //ADD by qiujw key?
    key = new Text(file.toString());

    compressionCodecs = new CompressionCodecFactory(job);
    codec = compressionCodecs.getCodec(file);

    // open the file and seek to the start of the split
    final FileSystem fs = file.getFileSystem(job);
    fileIn = fs.open(file);
    if (isCompressedInput()) {
        decompressor = CodecPool.getDecompressor(codec);
        if (codec instanceof SplittableCompressionCodec) {
            final SplitCompressionInputStream cIn = ((SplittableCompressionCodec) codec).createInputStream(
                    fileIn, decompressor, start, end, SplittableCompressionCodec.READ_MODE.BYBLOCK);
            if (null == this.recordDelimiterBytes) {
                in = new LineReader(cIn, job);
            } else {
                in = new LineReader(cIn, job, this.recordDelimiterBytes);
            }

            start = cIn.getAdjustedStart();
            end = cIn.getAdjustedEnd();
            filePosition = cIn;
        } else {
            if (null == this.recordDelimiterBytes) {
                in = new LineReader(codec.createInputStream(fileIn, decompressor), job);
            } else {
                in = new LineReader(codec.createInputStream(fileIn, decompressor), job,
                        this.recordDelimiterBytes);
            }
            filePosition = fileIn;
        }
    } else {
        fileIn.seek(start);
        if (null == this.recordDelimiterBytes) {
            in = new LineReader(fileIn, job);
        } else {
            in = new LineReader(fileIn, job, this.recordDelimiterBytes);
        }

        filePosition = fileIn;
    }
    // If this is not the first split, we always throw away first record
    // because we always (except the last split) read one extra line in
    // next() method.
    if (start != 0) {
        start += in.readLine(new Text(), 0, maxBytesToConsume(start));
    }
    this.pos = start;
}

From source file:cn.uc.hadoop.mapreduce.lib.input.FilePathTextInputFormat.java

License:Apache License

@Override
public RecordReader<Text, Text> createRecordReader(InputSplit split, TaskAttemptContext context) {
    String delimiter = context.getConfiguration().get("textinputformat.record.delimiter");
    byte[] recordDelimiterBytes = null;
    if (null != delimiter)
        recordDelimiterBytes = delimiter.getBytes(Charsets.UTF_8);
    return new FilePathLineRecordReader(recordDelimiterBytes);
}

From source file:co.cask.cdap.data.stream.AbstractStreamInputFormat.java

License:Apache License

@Override
public RecordReader<K, V> createRecordReader(InputSplit split, TaskAttemptContext context)
        throws IOException, InterruptedException {
    return new StreamRecordReader<>(createStreamEventDecoder(context.getConfiguration()),
            getAuthorizationEnforcer(context), getAuthenticationContext(context),
            GSON.fromJson(context.getConfiguration().get(STREAM_ID), StreamId.class));
}