Example usage for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration

List of usage examples for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration.

Prototype

public Configuration getConfiguration();

Source Link

Document

Return the configuration for the job.

Usage

From source file:nl.cwi.kba.thrift.bin.ThriftRecordReader.java

License:Apache License

/** Boilerplate initialization code for file input streams. */
@Override// w w w .j  a va2  s . c om
public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {

    conf = context.getConfiguration();
    fileSplit = (FileSplit) split;
    start = fileSplit.getStart();
    length = fileSplit.getLength();
    position = 0;

    Path path = fileSplit.getPath();
    FileSystem fs = path.getFileSystem(conf);
    in = fs.open(path);

    tp = new TBinaryProtocol.Factory().getProtocol(new TIOStreamTransport(in));

}

From source file:nl.cwi.kba2013.thrift.bin.ThriftRecordReader.java

License:Apache License

/** Boilerplate initialization code for file input streams. */
@Override//  ww w  .  j  a  v  a  2 s .  c om
public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {

    conf = context.getConfiguration();
    fileSplit = (FileSplit) split;
    start = fileSplit.getStart();
    length = fileSplit.getLength();
    position = start;

    Path path = fileSplit.getPath();
    FileSystem fs = path.getFileSystem(conf);
    FSDataInputStream fileIn = fs.open(path);

    compressionCodecs = new CompressionCodecFactory(conf);
    codec = compressionCodecs.getCodec(path);

    if (isCompressedInput()) {
        decompressor = CodecPool.getDecompressor(codec);
        in = new DataInputStream(codec.createInputStream(fileIn, decompressor));
        filePosition = fileIn;
        //LOG.info("Successfully initialized input stream for compressed data.");
    } else {
        fileIn.seek(start);
        in = fileIn;
        filePosition = fileIn;
    }

    tp = new TBinaryProtocol.Factory().getProtocol(new TIOStreamTransport(in));
}

From source file:nl.cwi.wikilink.thrift.bin.ThriftFileInputFormat.java

License:Apache License

@Override
public RecordReader<Text, WikiLinkItemWritable> createRecordReader(InputSplit split, TaskAttemptContext tac)
        throws IOException, InterruptedException {
    return new ThriftRecordReader((FileSplit) split, tac.getConfiguration());
}

From source file:nl.surfsara.warcutils.WarcRecordReader.java

License:Apache License

@Override
public void initialize(InputSplit inputSplit, TaskAttemptContext context) throws IOException {
    FileSplit split = (FileSplit) inputSplit;
    Configuration conf = context.getConfiguration();
    final Path file = split.getPath();

    start = split.getStart();//from w  w w  . j  av a 2s. com
    end = start + split.getLength();
    compressionCodecs = new CompressionCodecFactory(conf);
    codec = compressionCodecs.getCodec(file);

    FileSystem fs = file.getFileSystem(conf);
    FSDataInputStream fileIn = fs.open(split.getPath());

    if (isCompressedInput()) {
        in = new DataInputStream(codec.createInputStream(fileIn, decompressor));
        filePosition = fileIn;
    } else {
        fileIn.seek(start);
        in = fileIn;
        filePosition = fileIn;
    }

    warcReader = WarcReaderFactory.getReaderUncompressed(in);

    warcReader.setWarcTargetUriProfile(WarcIOConstants.URIPROFILE);
    warcReader.setBlockDigestEnabled(WarcIOConstants.BLOCKDIGESTENABLED);
    warcReader.setPayloadDigestEnabled(WarcIOConstants.PAYLOADDIGESTENABLED);
    warcReader.setRecordHeaderMaxSize(WarcIOConstants.HEADERMAXSIZE);
    warcReader.setPayloadHeaderMaxSize(WarcIOConstants.PAYLOADHEADERMAXSIZE);

    this.pos = start;
}

From source file:nl.surfsara.warcutils.WarcSequenceFileRecordReader.java

License:Apache License

@Override
public void initialize(InputSplit inputSplit, TaskAttemptContext context)
        throws IOException, InterruptedException {
    FileSplit split = (FileSplit) inputSplit;
    Configuration conf = context.getConfiguration();
    final Path path = split.getPath();

    Option optPath = SequenceFile.Reader.file(path);
    in = new SequenceFile.Reader(conf, optPath);

    this.end = split.getStart() + inputSplit.getLength();
    if (split.getStart() > in.getPosition()) {
        in.sync(split.getStart());// ww  w  .  ja v a 2  s.  c o m
    }
    start = in.getPosition();
    done = start >= end;
}

From source file:nyu.cs.webgraph.MRhelpers.LzoTabSeperatedLineRecordReader.java

License:Open Source License

@Override
public void initialize(InputSplit genericSplit, TaskAttemptContext context)
        throws IOException, InterruptedException {
    FileSplit split = (FileSplit) genericSplit;
    start = split.getStart();//from  w ww .  ja  v  a  2  s  . c  o  m
    end = start + split.getLength();
    final Path file = split.getPath();
    Configuration job = context.getConfiguration();

    FileSystem fs = file.getFileSystem(job);
    CompressionCodecFactory compressionCodecs = new CompressionCodecFactory(job);
    final CompressionCodec codec = compressionCodecs.getCodec(file);
    if (codec == null) {
        throw new IOException("Codec for file " + file + " not found, cannot run");
    }

    // open the file and seek to the start of the split
    fileIn = fs.open(split.getPath());

    // creates input stream and also reads the file header
    in = new LineReader(codec.createInputStream(fileIn), job);

    if (start != 0) {
        fileIn.seek(start);

        // read and ignore the first line
        in.readLine(new Text());
        start = fileIn.getPos();
    }

    this.pos = start;
}

From source file:org.acaro.graffiti.processing.GraffitiReader.java

License:Apache License

@Override
public void initialize(InputSplit inputSplit, TaskAttemptContext context)
        throws IOException, InterruptedException {
    super.initialize(inputSplit, context);

    try {//from w  ww  .  j a v  a  2 s  .c  om

        String queryString = context.getConfiguration().get(Graffiti.QUERY);
        Query query = new QueryParser(queryString).parse();
        msgList = new ArrayList<Message>(1);
        msgList.add(new Message(query, new ResultSet()));

    } catch (RecognitionException e) {
        e.printStackTrace();
        throw new ParseError("cannot parse query ");
    }
}

From source file:org.apache.accumulo.core.client.mapreduce.AccumuloFileOutputFormat.java

License:Apache License

@Override
public RecordWriter<Key, Value> getRecordWriter(TaskAttemptContext context) throws IOException {
    // get the path of the temporary output file
    final Configuration conf = context.getConfiguration();
    final AccumuloConfiguration acuConf = FileOutputConfigurator.getAccumuloConfiguration(CLASS,
            context.getConfiguration());

    final String extension = acuConf.get(Property.TABLE_FILE_TYPE);
    final Path file = this.getDefaultWorkFile(context, "." + extension);
    final int visCacheSize = ConfiguratorBase.getVisibilityCacheSize(conf);

    return new RecordWriter<Key, Value>() {
        RFileWriter out = null;//  w w  w  .ja  va 2s  . c  om

        @Override
        public void close(TaskAttemptContext context) throws IOException {
            if (out != null)
                out.close();
        }

        @Override
        public void write(Key key, Value value) throws IOException {
            if (out == null) {
                out = RFile.newWriter().to(file.toString()).withFileSystem(file.getFileSystem(conf))
                        .withTableProperties(acuConf).withVisibilityCacheSize(visCacheSize).build();
                out.startDefaultLocalityGroup();
            }
            out.append(key, value);
        }
    };
}

From source file:org.apache.accumulo.examples.wikisearch.output.SortingRFileOutputFormat.java

License:Apache License

@Override
public RecordWriter<Text, Mutation> getRecordWriter(TaskAttemptContext attempt)
        throws IOException, InterruptedException {

    // grab the configuration
    final Configuration conf = attempt.getConfiguration();
    // grab the max size
    final long maxSize = getMaxBufferSize(conf);

    return new BufferingRFileRecordWriter(maxSize, conf);
}

From source file:org.apache.accumulo.examples.wikisearch.reader.AggregatingRecordReader.java

License:Apache License

@Override
public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException {
    super.initialize(((WikipediaInputSplit) genericSplit).getFileSplit(), context);
    this.startToken = WikipediaConfiguration.isNull(context.getConfiguration(), START_TOKEN, String.class);
    this.endToken = WikipediaConfiguration.isNull(context.getConfiguration(), END_TOKEN, String.class);
    this.returnPartialMatches = context.getConfiguration().getBoolean(RETURN_PARTIAL_MATCHES, false);

    /*/*www. j av  a 2s . c o m*/
     * Text-appending works almost exactly like the + operator on Strings- it creates a byte array
     * exactly the size of [prefix + suffix] and dumps the bytes into the new array. This module
     * works by doing lots of little additions, one line at a time. With most XML, the documents are
     * partitioned on line boundaries, so we will generally have lots of additions. Setting a large
     * default byte array for a text object can avoid this and give us StringBuilder-like
     * functionality for Text objects.
     */
    byte[] txtBuffer = new byte[2048];
    aggValue.set(txtBuffer);
}