Example usage for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration.

Prototype

public Configuration getConfiguration();

Source Link

Document

Return the configuration for the job.

Usage

From source file:nl.cwi.kba.thrift.bin.ThriftRecordReader.java

License:Apache License

/** Boilerplate initialization code for file input streams. */
@Override// w w w .j  a va2  s . c om
public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {

    conf = context.getConfiguration();
    fileSplit = (FileSplit) split;
    start = fileSplit.getStart();
    length = fileSplit.getLength();
    position = 0;

    Path path = fileSplit.getPath();
    FileSystem fs = path.getFileSystem(conf);
    in = fs.open(path);

    tp = new TBinaryProtocol.Factory().getProtocol(new TIOStreamTransport(in));

}

From source file:nl.cwi.kba2013.thrift.bin.ThriftRecordReader.java

License:Apache License

/** Boilerplate initialization code for file input streams. */
@Override//  ww w  .  j  a  v  a  2 s .  c om
public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {

    conf = context.getConfiguration();
    fileSplit = (FileSplit) split;
    start = fileSplit.getStart();
    length = fileSplit.getLength();
    position = start;

    Path path = fileSplit.getPath();
    FileSystem fs = path.getFileSystem(conf);
    FSDataInputStream fileIn = fs.open(path);

    compressionCodecs = new CompressionCodecFactory(conf);
    codec = compressionCodecs.getCodec(path);

    if (isCompressedInput()) {
        decompressor = CodecPool.getDecompressor(codec);
        in = new DataInputStream(codec.createInputStream(fileIn, decompressor));
        filePosition = fileIn;
        //LOG.info("Successfully initialized input stream for compressed data.");
    } else {
        fileIn.seek(start);
        in = fileIn;
        filePosition = fileIn;
    }

    tp = new TBinaryProtocol.Factory().getProtocol(new TIOStreamTransport(in));
}

From source file:nl.cwi.wikilink.thrift.bin.ThriftFileInputFormat.java

License:Apache License

@Override
public RecordReader<Text, WikiLinkItemWritable> createRecordReader(InputSplit split, TaskAttemptContext tac)
        throws IOException, InterruptedException {
    return new ThriftRecordReader((FileSplit) split, tac.getConfiguration());
}

From source file:nl.surfsara.warcutils.WarcRecordReader.java

License:Apache License

@Override
public void initialize(InputSplit inputSplit, TaskAttemptContext context) throws IOException {
    FileSplit split = (FileSplit) inputSplit;
    Configuration conf = context.getConfiguration();
    final Path file = split.getPath();

    start = split.getStart();//from w  w w  . j  av a 2s. com
    end = start + split.getLength();
    compressionCodecs = new CompressionCodecFactory(conf);
    codec = compressionCodecs.getCodec(file);

    FileSystem fs = file.getFileSystem(conf);
    FSDataInputStream fileIn = fs.open(split.getPath());

    if (isCompressedInput()) {
        in = new DataInputStream(codec.createInputStream(fileIn, decompressor));
        filePosition = fileIn;
    } else {
        fileIn.seek(start);
        in = fileIn;
        filePosition = fileIn;
    }

    warcReader = WarcReaderFactory.getReaderUncompressed(in);

    warcReader.setWarcTargetUriProfile(WarcIOConstants.URIPROFILE);
    warcReader.setBlockDigestEnabled(WarcIOConstants.BLOCKDIGESTENABLED);
    warcReader.setPayloadDigestEnabled(WarcIOConstants.PAYLOADDIGESTENABLED);
    warcReader.setRecordHeaderMaxSize(WarcIOConstants.HEADERMAXSIZE);
    warcReader.setPayloadHeaderMaxSize(WarcIOConstants.PAYLOADHEADERMAXSIZE);

    this.pos = start;
}

From source file:nl.surfsara.warcutils.WarcSequenceFileRecordReader.java

License:Apache License

@Override
public void initialize(InputSplit inputSplit, TaskAttemptContext context)
        throws IOException, InterruptedException {
    FileSplit split = (FileSplit) inputSplit;
    Configuration conf = context.getConfiguration();
    final Path path = split.getPath();

    Option optPath = SequenceFile.Reader.file(path);
    in = new SequenceFile.Reader(conf, optPath);

    this.end = split.getStart() + inputSplit.getLength();
    if (split.getStart() > in.getPosition()) {
        in.sync(split.getStart());// ww  w  .  ja v a 2  s.  c o m
    }
    start = in.getPosition();
    done = start >= end;
}

From source file:nyu.cs.webgraph.MRhelpers.LzoTabSeperatedLineRecordReader.java

License:Open Source License

@Override
public void initialize(InputSplit genericSplit, TaskAttemptContext context)
        throws IOException, InterruptedException {
    FileSplit split = (FileSplit) genericSplit;
    start = split.getStart();//from  w ww .  ja  v  a  2  s  . c  o  m
    end = start + split.getLength();
    final Path file = split.getPath();
    Configuration job = context.getConfiguration();

    FileSystem fs = file.getFileSystem(job);
    CompressionCodecFactory compressionCodecs = new CompressionCodecFactory(job);
    final CompressionCodec codec = compressionCodecs.getCodec(file);
    if (codec == null) {
        throw new IOException("Codec for file " + file + " not found, cannot run");
    }

    // open the file and seek to the start of the split
    fileIn = fs.open(split.getPath());

    // creates input stream and also reads the file header
    in = new LineReader(codec.createInputStream(fileIn), job);

    if (start != 0) {
        fileIn.seek(start);

        // read and ignore the first line
        in.readLine(new Text());
        start = fileIn.getPos();
    }

    this.pos = start;
}

From source file:org.acaro.graffiti.processing.GraffitiReader.java

License:Apache License

@Override
public void initialize(InputSplit inputSplit, TaskAttemptContext context)
        throws IOException, InterruptedException {
    super.initialize(inputSplit, context);

    try {//from w  ww  .  j a v  a  2 s  .c  om

        String queryString = context.getConfiguration().get(Graffiti.QUERY);
        Query query = new QueryParser(queryString).parse();
        msgList = new ArrayList<Message>(1);
        msgList.add(new Message(query, new ResultSet()));

    } catch (RecognitionException e) {
        e.printStackTrace();
        throw new ParseError("cannot parse query ");
    }
}

From source file:org.apache.accumulo.core.client.mapreduce.AccumuloFileOutputFormat.java

License:Apache License

@Override
public RecordWriter<Key, Value> getRecordWriter(TaskAttemptContext context) throws IOException {
    // get the path of the temporary output file
    final Configuration conf = context.getConfiguration();
    final AccumuloConfiguration acuConf = FileOutputConfigurator.getAccumuloConfiguration(CLASS,
            context.getConfiguration());

    final String extension = acuConf.get(Property.TABLE_FILE_TYPE);
    final Path file = this.getDefaultWorkFile(context, "." + extension);
    final int visCacheSize = ConfiguratorBase.getVisibilityCacheSize(conf);

    return new RecordWriter<Key, Value>() {
        RFileWriter out = null;//  w w  w  .ja  va 2s  . c  om

        @Override
        public void close(TaskAttemptContext context) throws IOException {
            if (out != null)
                out.close();
        }

        @Override
        public void write(Key key, Value value) throws IOException {
            if (out == null) {
                out = RFile.newWriter().to(file.toString()).withFileSystem(file.getFileSystem(conf))
                        .withTableProperties(acuConf).withVisibilityCacheSize(visCacheSize).build();
                out.startDefaultLocalityGroup();
            }
            out.append(key, value);
        }
    };
}

From source file:org.apache.accumulo.examples.wikisearch.output.SortingRFileOutputFormat.java

License:Apache License

@Override
public RecordWriter<Text, Mutation> getRecordWriter(TaskAttemptContext attempt)
        throws IOException, InterruptedException {

    // grab the configuration
    final Configuration conf = attempt.getConfiguration();
    // grab the max size
    final long maxSize = getMaxBufferSize(conf);

    return new BufferingRFileRecordWriter(maxSize, conf);
}

From source file:org.apache.accumulo.examples.wikisearch.reader.AggregatingRecordReader.java

License:Apache License

@Override
public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException {
    super.initialize(((WikipediaInputSplit) genericSplit).getFileSplit(), context);
    this.startToken = WikipediaConfiguration.isNull(context.getConfiguration(), START_TOKEN, String.class);
    this.endToken = WikipediaConfiguration.isNull(context.getConfiguration(), END_TOKEN, String.class);
    this.returnPartialMatches = context.getConfiguration().getBoolean(RETURN_PARTIAL_MATCHES, false);

    /*/*www. j av  a 2s . c o m*/
     * Text-appending works almost exactly like the + operator on Strings- it creates a byte array
     * exactly the size of [prefix + suffix] and dumps the bytes into the new array. This module
     * works by doing lots of little additions, one line at a time. With most XML, the documents are
     * partitioned on line boundaries, so we will generally have lots of additions. Setting a large
     * default byte array for a text object can avoid this and give us StringBuilder-like
     * functionality for Text objects.
     */
    byte[] txtBuffer = new byte[2048];
    aggValue.set(txtBuffer);
}