List of usage examples for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration
public Configuration getConfiguration();
From source file:nl.cwi.kba.thrift.bin.ThriftRecordReader.java
License:Apache License
/** Boilerplate initialization code for file input streams. */ @Override// w w w .j a va2 s . c om public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { conf = context.getConfiguration(); fileSplit = (FileSplit) split; start = fileSplit.getStart(); length = fileSplit.getLength(); position = 0; Path path = fileSplit.getPath(); FileSystem fs = path.getFileSystem(conf); in = fs.open(path); tp = new TBinaryProtocol.Factory().getProtocol(new TIOStreamTransport(in)); }
From source file:nl.cwi.kba2013.thrift.bin.ThriftRecordReader.java
License:Apache License
/** Boilerplate initialization code for file input streams. */ @Override// ww w . j a v a 2 s . c om public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { conf = context.getConfiguration(); fileSplit = (FileSplit) split; start = fileSplit.getStart(); length = fileSplit.getLength(); position = start; Path path = fileSplit.getPath(); FileSystem fs = path.getFileSystem(conf); FSDataInputStream fileIn = fs.open(path); compressionCodecs = new CompressionCodecFactory(conf); codec = compressionCodecs.getCodec(path); if (isCompressedInput()) { decompressor = CodecPool.getDecompressor(codec); in = new DataInputStream(codec.createInputStream(fileIn, decompressor)); filePosition = fileIn; //LOG.info("Successfully initialized input stream for compressed data."); } else { fileIn.seek(start); in = fileIn; filePosition = fileIn; } tp = new TBinaryProtocol.Factory().getProtocol(new TIOStreamTransport(in)); }
From source file:nl.cwi.wikilink.thrift.bin.ThriftFileInputFormat.java
License:Apache License
@Override public RecordReader<Text, WikiLinkItemWritable> createRecordReader(InputSplit split, TaskAttemptContext tac) throws IOException, InterruptedException { return new ThriftRecordReader((FileSplit) split, tac.getConfiguration()); }
From source file:nl.surfsara.warcutils.WarcRecordReader.java
License:Apache License
@Override public void initialize(InputSplit inputSplit, TaskAttemptContext context) throws IOException { FileSplit split = (FileSplit) inputSplit; Configuration conf = context.getConfiguration(); final Path file = split.getPath(); start = split.getStart();//from w w w . j av a 2s. com end = start + split.getLength(); compressionCodecs = new CompressionCodecFactory(conf); codec = compressionCodecs.getCodec(file); FileSystem fs = file.getFileSystem(conf); FSDataInputStream fileIn = fs.open(split.getPath()); if (isCompressedInput()) { in = new DataInputStream(codec.createInputStream(fileIn, decompressor)); filePosition = fileIn; } else { fileIn.seek(start); in = fileIn; filePosition = fileIn; } warcReader = WarcReaderFactory.getReaderUncompressed(in); warcReader.setWarcTargetUriProfile(WarcIOConstants.URIPROFILE); warcReader.setBlockDigestEnabled(WarcIOConstants.BLOCKDIGESTENABLED); warcReader.setPayloadDigestEnabled(WarcIOConstants.PAYLOADDIGESTENABLED); warcReader.setRecordHeaderMaxSize(WarcIOConstants.HEADERMAXSIZE); warcReader.setPayloadHeaderMaxSize(WarcIOConstants.PAYLOADHEADERMAXSIZE); this.pos = start; }
From source file:nl.surfsara.warcutils.WarcSequenceFileRecordReader.java
License:Apache License
@Override public void initialize(InputSplit inputSplit, TaskAttemptContext context) throws IOException, InterruptedException { FileSplit split = (FileSplit) inputSplit; Configuration conf = context.getConfiguration(); final Path path = split.getPath(); Option optPath = SequenceFile.Reader.file(path); in = new SequenceFile.Reader(conf, optPath); this.end = split.getStart() + inputSplit.getLength(); if (split.getStart() > in.getPosition()) { in.sync(split.getStart());// ww w . ja v a 2 s. c o m } start = in.getPosition(); done = start >= end; }
From source file:nyu.cs.webgraph.MRhelpers.LzoTabSeperatedLineRecordReader.java
License:Open Source License
@Override public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException, InterruptedException { FileSplit split = (FileSplit) genericSplit; start = split.getStart();//from w ww . ja v a 2 s . c o m end = start + split.getLength(); final Path file = split.getPath(); Configuration job = context.getConfiguration(); FileSystem fs = file.getFileSystem(job); CompressionCodecFactory compressionCodecs = new CompressionCodecFactory(job); final CompressionCodec codec = compressionCodecs.getCodec(file); if (codec == null) { throw new IOException("Codec for file " + file + " not found, cannot run"); } // open the file and seek to the start of the split fileIn = fs.open(split.getPath()); // creates input stream and also reads the file header in = new LineReader(codec.createInputStream(fileIn), job); if (start != 0) { fileIn.seek(start); // read and ignore the first line in.readLine(new Text()); start = fileIn.getPos(); } this.pos = start; }
From source file:org.acaro.graffiti.processing.GraffitiReader.java
License:Apache License
@Override public void initialize(InputSplit inputSplit, TaskAttemptContext context) throws IOException, InterruptedException { super.initialize(inputSplit, context); try {//from w ww . j a v a 2 s .c om String queryString = context.getConfiguration().get(Graffiti.QUERY); Query query = new QueryParser(queryString).parse(); msgList = new ArrayList<Message>(1); msgList.add(new Message(query, new ResultSet())); } catch (RecognitionException e) { e.printStackTrace(); throw new ParseError("cannot parse query "); } }
From source file:org.apache.accumulo.core.client.mapreduce.AccumuloFileOutputFormat.java
License:Apache License
@Override public RecordWriter<Key, Value> getRecordWriter(TaskAttemptContext context) throws IOException { // get the path of the temporary output file final Configuration conf = context.getConfiguration(); final AccumuloConfiguration acuConf = FileOutputConfigurator.getAccumuloConfiguration(CLASS, context.getConfiguration()); final String extension = acuConf.get(Property.TABLE_FILE_TYPE); final Path file = this.getDefaultWorkFile(context, "." + extension); final int visCacheSize = ConfiguratorBase.getVisibilityCacheSize(conf); return new RecordWriter<Key, Value>() { RFileWriter out = null;// w w w .ja va 2s . c om @Override public void close(TaskAttemptContext context) throws IOException { if (out != null) out.close(); } @Override public void write(Key key, Value value) throws IOException { if (out == null) { out = RFile.newWriter().to(file.toString()).withFileSystem(file.getFileSystem(conf)) .withTableProperties(acuConf).withVisibilityCacheSize(visCacheSize).build(); out.startDefaultLocalityGroup(); } out.append(key, value); } }; }
From source file:org.apache.accumulo.examples.wikisearch.output.SortingRFileOutputFormat.java
License:Apache License
@Override public RecordWriter<Text, Mutation> getRecordWriter(TaskAttemptContext attempt) throws IOException, InterruptedException { // grab the configuration final Configuration conf = attempt.getConfiguration(); // grab the max size final long maxSize = getMaxBufferSize(conf); return new BufferingRFileRecordWriter(maxSize, conf); }
From source file:org.apache.accumulo.examples.wikisearch.reader.AggregatingRecordReader.java
License:Apache License
@Override public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException { super.initialize(((WikipediaInputSplit) genericSplit).getFileSplit(), context); this.startToken = WikipediaConfiguration.isNull(context.getConfiguration(), START_TOKEN, String.class); this.endToken = WikipediaConfiguration.isNull(context.getConfiguration(), END_TOKEN, String.class); this.returnPartialMatches = context.getConfiguration().getBoolean(RETURN_PARTIAL_MATCHES, false); /*/*www. j av a 2s . c o m*/ * Text-appending works almost exactly like the + operator on Strings- it creates a byte array * exactly the size of [prefix + suffix] and dumps the bytes into the new array. This module * works by doing lots of little additions, one line at a time. With most XML, the documents are * partitioned on line boundaries, so we will generally have lots of additions. Setting a large * default byte array for a text object can avoid this and give us StringBuilder-like * functionality for Text objects. */ byte[] txtBuffer = new byte[2048]; aggValue.set(txtBuffer); }