List of usage examples for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration
public Configuration getConfiguration();
From source file:com.facebook.hiveio.output.HiveApiOutputFormat.java
License:Apache License
@Override public HiveApiOutputCommitter getOutputCommitter(TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException { HadoopUtils.setWorkOutputDir(taskAttemptContext); Configuration conf = taskAttemptContext.getConfiguration(); JobConf jobConf = new JobConf(conf); OutputCommitter baseCommitter = jobConf.getOutputCommitter(); LOG.info("Getting output committer with base output committer {}", baseCommitter.getClass().getSimpleName()); return new HiveApiOutputCommitter(new HackOutputCommitter(baseCommitter, jobConf), myProfileId); }
From source file:com.fullcontact.sstable.hadoop.mapreduce.SSTableRecordReader.java
License:Apache License
@Override public void initialize(InputSplit inputSplit, TaskAttemptContext context) throws IOException, InterruptedException { this.split = (SSTableSplit) inputSplit; final FileSystem fileSystem = FileSystem.get(context.getConfiguration()); final CompressionMetadata compressionMetadata = CompressionMetadata.create(split.getPath().toString(), fileSystem);//w w w . j a v a 2s .c om if (compressionMetadata == null) { throw new IOException("Compression metadata for file " + split.getPath() + " not found, cannot run"); } // open the file and seek to the start of the split this.reader = CompressedRandomAccessReader.open(split.getPath(), compressionMetadata, false, fileSystem); this.reader.seek(split.getStart()); this.cfMetaData = initializeCfMetaData(context); }
From source file:com.fullcontact.sstable.hadoop.mapreduce.SSTableRecordReader.java
License:Apache License
private static CFMetaData initializeCfMetaData(TaskAttemptContext context) { final String cql = context.getConfiguration().get(HadoopSSTableConstants.HADOOP_SSTABLE_CQL); Preconditions.checkNotNull(cql, "Cannot proceed without CQL definition."); final CreateColumnFamilyStatement statement = getCreateColumnFamilyStatement(cql); final String keyspace = context.getConfiguration().get(HadoopSSTableConstants.HADOOP_SSTABLE_KEYSPACE, "default"); final String columnFamily = context.getConfiguration() .get(HadoopSSTableConstants.HADOOP_SSTABLE_COLUMN_FAMILY_NAME, "default"); final CFMetaData cfMetaData = new CFMetaData(keyspace, columnFamily, ColumnFamilyType.Standard, statement.comparator, null); try {/* w w w. j a va2 s . c o m*/ statement.applyPropertiesTo(cfMetaData); } catch (RequestValidationException e) { // Cannot proceed if an error occurs throw new RuntimeException("Error configuring SSTable reader. Cannot proceed", e); } return cfMetaData; }
From source file:com.gemstone.gemfire.cache.hdfs.internal.hoplog.mapreduce.AbstractGFRecordReader.java
License:Apache License
@Override public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { CombineFileSplit cSplit = (CombineFileSplit) split; Path[] path = cSplit.getPaths(); long[] start = cSplit.getStartOffsets(); long[] len = cSplit.getLengths(); Configuration conf = context.getConfiguration(); FileSystem fs = cSplit.getPath(0).getFileSystem(conf); this.splitIterator = HDFSSplitIterator.newInstance(fs, path, start, len, 0l, 0l); }
From source file:com.gemstone.gemfire.cache.hdfs.internal.hoplog.mapreduce.GFOutputFormat.java
License:Apache License
@Override public RecordWriter<Object, Object> getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException { Configuration conf = context.getConfiguration(); ClientCache cache = getClientCacheInstance(conf); return new GFRecordWriter(cache, context.getConfiguration()); }
From source file:com.geneix.bottle.WordRecordReader.java
License:Apache License
public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException { if (LOG.isInfoEnabled()) { LOG.info("Initializing WordRecordReader"); }//from ww w.j av a 2 s. c o m FileSplit split = (FileSplit) genericSplit; Configuration job = context.getConfiguration(); this.maxWordLength = job.getInt(MAX_WORD_LENGTH, Integer.MAX_VALUE); start = split.getStart(); end = start + split.getLength(); final Path file = split.getPath(); // open the file and seek to the start of the split final FileSystem fs = file.getFileSystem(job); fileIn = fs.open(file); CompressionCodec codec = new CompressionCodecFactory(job).getCodec(file); if (null != codec) { throw new IOException("Cannot handle compressed files right now"); } else { fileIn.seek(start); in = new WordReader(fileIn, job); filePosition = fileIn; } // If this is not the first split, we always throw away first record // because we always (except the last split) read one extra line in // next() method. if (start != 0) { start += in.readWord(new Text(), 0, maxBytesToConsume(start)); } this.pos = start; }
From source file:com.github.bskaggs.avro_json_hadoop.AvroAsJsonRecordReader.java
License:Apache License
/** {@inheritDoc} */ @Override/*from w w w . jav a 2s. c o m*/ public void initialize(InputSplit inputSplit, TaskAttemptContext context) throws IOException, InterruptedException { if (!(inputSplit instanceof FileSplit)) { throw new IllegalArgumentException("Only compatible with FileSplits."); } FileSplit fileSplit = (FileSplit) inputSplit; // Open a seekable input stream to the Avro container file. SeekableInput seekableFileInput = createSeekableInput(context.getConfiguration(), fileSplit.getPath()); // Wrap the seekable input stream in an Avro DataFileReader. Configuration conf = context.getConfiguration(); GenericData dataModel = AvroSerialization.createDataModel(conf); GenericDatumReader<Object> reader = new GenericDatumReader<Object>(); //Figure out the schema Path path = fileSplit.getPath(); FSDataInputStream schemaStream = path.getFileSystem(conf).open(path); DataFileStream<Object> streamReader = new DataFileStream<Object>(schemaStream, reader); Schema mReaderSchema = streamReader.getSchema(); streamReader.close(); //Set up writer and encoder for json writer = new GenericDatumWriter<Object>(mReaderSchema); encoder = new TerseJsonEncoder(mReaderSchema, bout); @SuppressWarnings("unchecked") DatumReader<Object> datumReader = dataModel.createDatumReader(mReaderSchema); mAvroFileReader = createAvroFileReader(seekableFileInput, datumReader); // Initialize the start and end offsets into the file based on the boundaries of the // input split we're responsible for. We will read the first block that begins // after the input split start boundary. We will read up to but not including the // first block that starts after input split end boundary. // Sync to the closest block/record boundary just after beginning of our input split. mAvroFileReader.sync(fileSplit.getStart()); // Initialize the start position to the beginning of the first block of the input split. mStartPosition = mAvroFileReader.previousSync(); // Initialize the end position to the end of the input split (this isn't necessarily // on a block boundary so using this for reporting progress will be approximate. mEndPosition = fileSplit.getStart() + fileSplit.getLength(); }
From source file:com.github.bskaggs.mapreduce.flowfile.AbstractFlowFileV3RecordReader.java
License:Apache License
@Override public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { FileSplit fileSplit = (FileSplit) split; Path file = fileSplit.getPath(); FileSystem fs = file.getFileSystem(context.getConfiguration()); fileStream = fs.open(file);/* ww w . ja v a 2 s. com*/ startPos = fileSplit.getStart(); nextPos = startPos; length = fileSplit.getLength(); lastPos = nextPos + length; }
From source file:com.google.appengine.tools.mapreduce.BlobstoreRecordReader.java
License:Apache License
@Override public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { Preconditions.checkNotNull(split);// ww w. ja v a 2s.c o m if (!(split instanceof BlobstoreInputSplit)) { throw new IOException(getClass().getName() + " initialized with non-BlobstoreInputSplit"); } this.split = (BlobstoreInputSplit) split; int intTerminator = context.getConfiguration().getInt(BlobstoreInputFormat.TERMINATOR, DEFAULT_TERMINATOR); Preconditions.checkState(Byte.MIN_VALUE <= intTerminator && intTerminator <= Byte.MAX_VALUE, BlobstoreInputFormat.TERMINATOR + " is not in [" + Byte.MIN_VALUE + ", " + Byte.MAX_VALUE + "] range."); terminator = (byte) intTerminator; input = getInputStream(this.split, offset); recordIterator = getInputStreamIterator(input, this.split, offset, terminator); }
From source file:com.hadoop.mapreduce.LzoLineRecordReader.java
License:Open Source License
@Override public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException, InterruptedException { FileSplit split = (FileSplit) genericSplit; start = split.getStart();/* w ww. ja va 2 s . c o m*/ end = start + split.getLength(); final Path file = split.getPath(); Configuration job = context.getConfiguration(); FileSystem fs = file.getFileSystem(job); CompressionCodecFactory compressionCodecs = new CompressionCodecFactory(job); final CompressionCodec codec = compressionCodecs.getCodec(file); if (codec == null) { throw new IOException("No codec for file " + file + " not found, cannot run"); } // open the file and seek to the start of the split fileIn = fs.open(split.getPath()); // creates input stream and also reads the file header in = new LineReader(codec.createInputStream(fileIn), job); if (start != 0) { fileIn.seek(start); // read and ignore the first line in.readLine(new Text()); start = fileIn.getPos(); } this.pos = start; }