List of usage examples for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration
public Configuration getConfiguration();
From source file:cosmos.mapred.LongLineRecordReader.java
License:Apache License
@Override public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException { FileSplit split = (FileSplit) genericSplit; Configuration job = context.getConfiguration(); this.maxLineLength = job.getInt("mapred.linerecordreader.maxlength", Integer.MAX_VALUE); start = split.getStart();/* w w w .j a v a 2 s . c o m*/ end = start + split.getLength(); final Path file = split.getPath(); compressionCodecs = new CompressionCodecFactory(job); final CompressionCodec codec = compressionCodecs.getCodec(file); // open the file and seek to the start of the split FileSystem fs = file.getFileSystem(job); FSDataInputStream fileIn = fs.open(split.getPath()); boolean skipFirstLine = false; if (codec != null) { in = new LfLineReader(codec.createInputStream(fileIn), job); end = Long.MAX_VALUE; } else { if (start != 0) { skipFirstLine = true; --start; fileIn.seek(start); } in = new LfLineReader(fileIn, job); } if (skipFirstLine) { // skip first line and re-establish "start". start += in.readLine(new Text(), 0, (int) Math.min(Integer.MAX_VALUE, end - start)); } this.pos = start; }
From source file:crunch.MaxTemperature.java
License:Apache License
@Override public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { this.fileSplit = (FileSplit) split; this.conf = context.getConfiguration(); }// w ww . j a v a2 s .c o m
From source file:cs480a2.yqiu.recSystem.mapreduce.input.SingleBookReader.java
/** * @param inputSplit//ww w . j av a 2 s. co m * @param context the information about the task * @throws IOException * @throws InterruptedException */ @Override public void initialize(InputSplit inputSplit, TaskAttemptContext context) throws IOException, InterruptedException { FileSplit split = (FileSplit) inputSplit; Configuration configuration = context.getConfiguration(); Path path = split.getPath(); filename = path.getName(); FileSystem fileSystem = path.getFileSystem(configuration); FSDataInputStream inputStream = fileSystem.open(path); lineReader = new LineReader(inputStream, configuration); //initial start point and end point start = split.getStart(); end = start + split.getLength(); inputStream.seek(start); if (start != 0) { start += lineReader.readLine(new Text(), 0, (int) Math.min(Integer.MAX_VALUE, end - start)); } start += lineReader.readLine(currentLine); prepareToScanBook(); }
From source file:cz.seznam.euphoria.hadoop.input.DataSourceInputFormat.java
License:Apache License
@Override public RecordReader<NullWritable, V> createRecordReader(InputSplit is, TaskAttemptContext tac) throws IOException, InterruptedException { initialize(tac.getConfiguration()); @SuppressWarnings("unchecked") SourceSplit<V> split = (SourceSplit<V>) is; Reader<V> reader = split.partition.openReader(); return new RecordReader<NullWritable, V>() { V v;/*from w w w . ja va2s .co m*/ @Override public void initialize(InputSplit is, TaskAttemptContext tac) throws IOException, InterruptedException { // nop } @Override public boolean nextKeyValue() throws IOException, InterruptedException { if (reader.hasNext()) { v = reader.next(); return true; } return false; } @Override public NullWritable getCurrentKey() throws IOException, InterruptedException { return NullWritable.get(); } @Override public V getCurrentValue() throws IOException, InterruptedException { return v; } @Override public float getProgress() throws IOException, InterruptedException { return 0.0f; } @Override public void close() throws IOException { reader.close(); } }; }
From source file:cz.seznam.euphoria.hadoop.input.TestDataSourceInputFormat.java
License:Apache License
@Test public void testDataSource() throws Exception { DummySource<Pair<Long, Long>> source = new DummySource<>(() -> Pair .of(Math.round(Math.random() * Long.MAX_VALUE), Math.round(Math.random() * Long.MAX_VALUE))); Configuration conf = new Configuration(); TaskAttemptContext tac = mock(TaskAttemptContext.class); DataSourceInputFormat.configure(conf, source); when(tac.getConfiguration()).thenReturn(conf); InputFormat<NullWritable, Pair<Long, Long>> inputFormat = new DataSourceInputFormat<>(); List<InputSplit> splits = inputFormat.getSplits(tac); assertEquals(2, splits.size());/*www . j a va2 s . co m*/ try (RecordReader<NullWritable, Pair<Long, Long>> reader = inputFormat.createRecordReader(splits.get(0), tac)) { reader.initialize(splits.get(0), tac); assertTrue(reader.nextKeyValue()); reader.getCurrentKey(); reader.getCurrentValue(); assertTrue(reader.nextKeyValue()); assertFalse(reader.nextKeyValue()); } try (RecordReader<NullWritable, Pair<Long, Long>> reader = inputFormat.createRecordReader(splits.get(1), tac)) { reader.initialize(splits.get(1), tac); assertTrue(reader.nextKeyValue()); reader.getCurrentKey(); reader.getCurrentValue(); assertTrue(reader.nextKeyValue()); assertTrue(reader.nextKeyValue()); assertFalse(reader.nextKeyValue()); } }
From source file:cz.seznam.euphoria.hadoop.output.TestDataSinkOutputFormat.java
License:Apache License
private TaskAttemptContext mockContext(Configuration conf, int taskId) { TaskAttemptContext ret = mock(TaskAttemptContext.class); TaskAttemptID mockAttemptId = mock(TaskAttemptID.class); TaskID mockTaskId = mock(TaskID.class); when(ret.getConfiguration()).thenReturn(conf); when(ret.getTaskAttemptID()).thenReturn(mockAttemptId); when(mockAttemptId.getTaskID()).thenReturn(mockTaskId); when(mockTaskId.getId()).thenReturn(taskId); return ret;//from w ww. j a v a2 s . c o m }
From source file:datafu.hourglass.avro.AvroKeyValueWithMetadataOutputFormat.java
License:Apache License
/** {@inheritDoc} */ @Override// w w w.j a v a2 s .c om @SuppressWarnings("unchecked") public RecordWriter<K, V> getRecordWriter(TaskAttemptContext context) throws IOException { AvroDatumConverterFactory converterFactory = new AvroDatumConverterFactory(context.getConfiguration()); AvroDatumConverter<K, ?> keyConverter = converterFactory.create((Class<K>) context.getOutputKeyClass()); AvroDatumConverter<V, ?> valueConverter = converterFactory.create((Class<V>) context.getOutputValueClass()); return new AvroKeyValueWithMetadataRecordWriter<K, V>(keyConverter, valueConverter, getCompressionCodec(context), getAvroFileOutputStream(context), context.getConfiguration()); }
From source file:datafu.hourglass.avro.AvroKeyWithMetadataOutputFormat.java
License:Apache License
/** {@inheritDoc} */ @Override/* w w w . jav a 2 s. c o m*/ @SuppressWarnings("unchecked") public RecordWriter<AvroKey<T>, NullWritable> getRecordWriter(TaskAttemptContext context) throws IOException { // Get the writer schema. Schema writerSchema = AvroJob.getOutputKeySchema(context.getConfiguration()); if (null == writerSchema) { throw new IOException( "AvroKeyOutputFormat requires an output schema. Use AvroJob.setOutputKeySchema()."); } return mRecordWriterFactory.create(writerSchema, getCompressionCodec(context), getAvroFileOutputStream(context), context.getConfiguration()); }
From source file:datafu.hourglass.avro.AvroMultipleInputsKeyInputFormat.java
License:Apache License
/** {@inheritDoc} */ @Override/*from w ww. ja va2 s . c o m*/ public RecordReader<AvroKey<T>, NullWritable> createRecordReader(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { Schema readerSchema = AvroMultipleInputsUtil.getInputKeySchemaForSplit(context.getConfiguration(), split); if (readerSchema == null) { throw new RuntimeException("Could not determine input schema"); } return new AvroKeyRecordReader<T>(readerSchema); }
From source file:de.gesundkrank.wikipedia.hadoop.inputformat.WikiInputRecordReader.java
License:Open Source License
@Override public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { this.fs = FileSystem.get(context.getConfiguration()); this.parser = new Parser(); openSplit(split);/*from ww w. ja va 2s . co m*/ }