Example usage for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration

List of usage examples for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration.

Prototype

public Configuration getConfiguration();

Source Link

Document

Return the configuration for the job.

Usage

From source file:cosmos.mapred.LongLineRecordReader.java

License:Apache License

@Override
public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException {
    FileSplit split = (FileSplit) genericSplit;
    Configuration job = context.getConfiguration();
    this.maxLineLength = job.getInt("mapred.linerecordreader.maxlength", Integer.MAX_VALUE);
    start = split.getStart();/* w w  w .j a  v  a  2  s  .  c  o  m*/
    end = start + split.getLength();
    final Path file = split.getPath();
    compressionCodecs = new CompressionCodecFactory(job);
    final CompressionCodec codec = compressionCodecs.getCodec(file);

    // open the file and seek to the start of the split
    FileSystem fs = file.getFileSystem(job);
    FSDataInputStream fileIn = fs.open(split.getPath());
    boolean skipFirstLine = false;
    if (codec != null) {
        in = new LfLineReader(codec.createInputStream(fileIn), job);
        end = Long.MAX_VALUE;
    } else {
        if (start != 0) {
            skipFirstLine = true;
            --start;
            fileIn.seek(start);
        }
        in = new LfLineReader(fileIn, job);
    }
    if (skipFirstLine) { // skip first line and re-establish "start".
        start += in.readLine(new Text(), 0, (int) Math.min(Integer.MAX_VALUE, end - start));
    }
    this.pos = start;
}

From source file:crunch.MaxTemperature.java

License:Apache License

@Override
    public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
        this.fileSplit = (FileSplit) split;
        this.conf = context.getConfiguration();
    }// w ww  .  j a  v a2 s  .c  o m

From source file:cs480a2.yqiu.recSystem.mapreduce.input.SingleBookReader.java

/**
 * @param inputSplit//ww w .  j av  a 2  s.  co  m
 * @param context    the information about the task
 * @throws IOException
 * @throws InterruptedException
 */
@Override
public void initialize(InputSplit inputSplit, TaskAttemptContext context)
        throws IOException, InterruptedException {

    FileSplit split = (FileSplit) inputSplit;
    Configuration configuration = context.getConfiguration();
    Path path = split.getPath();
    filename = path.getName();
    FileSystem fileSystem = path.getFileSystem(configuration);
    FSDataInputStream inputStream = fileSystem.open(path);
    lineReader = new LineReader(inputStream, configuration);

    //initial start point and end point
    start = split.getStart();
    end = start + split.getLength();

    inputStream.seek(start);
    if (start != 0) {
        start += lineReader.readLine(new Text(), 0, (int) Math.min(Integer.MAX_VALUE, end - start));
    }

    start += lineReader.readLine(currentLine);

    prepareToScanBook();
}

From source file:cz.seznam.euphoria.hadoop.input.DataSourceInputFormat.java

License:Apache License

@Override
public RecordReader<NullWritable, V> createRecordReader(InputSplit is, TaskAttemptContext tac)
        throws IOException, InterruptedException {

    initialize(tac.getConfiguration());
    @SuppressWarnings("unchecked")
    SourceSplit<V> split = (SourceSplit<V>) is;
    Reader<V> reader = split.partition.openReader();
    return new RecordReader<NullWritable, V>() {

        V v;/*from   w w w  . ja va2s  .co  m*/

        @Override
        public void initialize(InputSplit is, TaskAttemptContext tac) throws IOException, InterruptedException {
            // nop
        }

        @Override
        public boolean nextKeyValue() throws IOException, InterruptedException {
            if (reader.hasNext()) {
                v = reader.next();
                return true;
            }
            return false;
        }

        @Override
        public NullWritable getCurrentKey() throws IOException, InterruptedException {
            return NullWritable.get();
        }

        @Override
        public V getCurrentValue() throws IOException, InterruptedException {
            return v;
        }

        @Override
        public float getProgress() throws IOException, InterruptedException {
            return 0.0f;
        }

        @Override
        public void close() throws IOException {
            reader.close();
        }

    };
}

From source file:cz.seznam.euphoria.hadoop.input.TestDataSourceInputFormat.java

License:Apache License

@Test
public void testDataSource() throws Exception {
    DummySource<Pair<Long, Long>> source = new DummySource<>(() -> Pair
            .of(Math.round(Math.random() * Long.MAX_VALUE), Math.round(Math.random() * Long.MAX_VALUE)));

    Configuration conf = new Configuration();
    TaskAttemptContext tac = mock(TaskAttemptContext.class);
    DataSourceInputFormat.configure(conf, source);

    when(tac.getConfiguration()).thenReturn(conf);

    InputFormat<NullWritable, Pair<Long, Long>> inputFormat = new DataSourceInputFormat<>();
    List<InputSplit> splits = inputFormat.getSplits(tac);
    assertEquals(2, splits.size());/*www  . j a  va2 s . co m*/

    try (RecordReader<NullWritable, Pair<Long, Long>> reader = inputFormat.createRecordReader(splits.get(0),
            tac)) {
        reader.initialize(splits.get(0), tac);
        assertTrue(reader.nextKeyValue());
        reader.getCurrentKey();
        reader.getCurrentValue();
        assertTrue(reader.nextKeyValue());
        assertFalse(reader.nextKeyValue());
    }

    try (RecordReader<NullWritable, Pair<Long, Long>> reader = inputFormat.createRecordReader(splits.get(1),
            tac)) {
        reader.initialize(splits.get(1), tac);
        assertTrue(reader.nextKeyValue());
        reader.getCurrentKey();
        reader.getCurrentValue();
        assertTrue(reader.nextKeyValue());
        assertTrue(reader.nextKeyValue());
        assertFalse(reader.nextKeyValue());
    }

}

From source file:cz.seznam.euphoria.hadoop.output.TestDataSinkOutputFormat.java

License:Apache License

private TaskAttemptContext mockContext(Configuration conf, int taskId) {
    TaskAttemptContext ret = mock(TaskAttemptContext.class);
    TaskAttemptID mockAttemptId = mock(TaskAttemptID.class);
    TaskID mockTaskId = mock(TaskID.class);
    when(ret.getConfiguration()).thenReturn(conf);
    when(ret.getTaskAttemptID()).thenReturn(mockAttemptId);
    when(mockAttemptId.getTaskID()).thenReturn(mockTaskId);
    when(mockTaskId.getId()).thenReturn(taskId);
    return ret;//from   w  ww. j a  v a2 s .  c  o  m
}

From source file:datafu.hourglass.avro.AvroKeyValueWithMetadataOutputFormat.java

License:Apache License

/** {@inheritDoc} */
@Override// w  w  w.j a  v a2  s  .c om
@SuppressWarnings("unchecked")
public RecordWriter<K, V> getRecordWriter(TaskAttemptContext context) throws IOException {
    AvroDatumConverterFactory converterFactory = new AvroDatumConverterFactory(context.getConfiguration());

    AvroDatumConverter<K, ?> keyConverter = converterFactory.create((Class<K>) context.getOutputKeyClass());
    AvroDatumConverter<V, ?> valueConverter = converterFactory.create((Class<V>) context.getOutputValueClass());

    return new AvroKeyValueWithMetadataRecordWriter<K, V>(keyConverter, valueConverter,
            getCompressionCodec(context), getAvroFileOutputStream(context), context.getConfiguration());
}

From source file:datafu.hourglass.avro.AvroKeyWithMetadataOutputFormat.java

License:Apache License

/** {@inheritDoc} */
@Override/*  w  w  w  . jav a 2  s. c  o  m*/
@SuppressWarnings("unchecked")
public RecordWriter<AvroKey<T>, NullWritable> getRecordWriter(TaskAttemptContext context) throws IOException {
    // Get the writer schema.
    Schema writerSchema = AvroJob.getOutputKeySchema(context.getConfiguration());
    if (null == writerSchema) {
        throw new IOException(
                "AvroKeyOutputFormat requires an output schema. Use AvroJob.setOutputKeySchema().");
    }

    return mRecordWriterFactory.create(writerSchema, getCompressionCodec(context),
            getAvroFileOutputStream(context), context.getConfiguration());
}

From source file:datafu.hourglass.avro.AvroMultipleInputsKeyInputFormat.java

License:Apache License

/** {@inheritDoc} */
@Override/*from w ww.  ja va2 s  . c o  m*/
public RecordReader<AvroKey<T>, NullWritable> createRecordReader(InputSplit split, TaskAttemptContext context)
        throws IOException, InterruptedException {
    Schema readerSchema = AvroMultipleInputsUtil.getInputKeySchemaForSplit(context.getConfiguration(), split);
    if (readerSchema == null) {
        throw new RuntimeException("Could not determine input schema");
    }
    return new AvroKeyRecordReader<T>(readerSchema);
}

From source file:de.gesundkrank.wikipedia.hadoop.inputformat.WikiInputRecordReader.java

License:Open Source License

@Override
public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
    this.fs = FileSystem.get(context.getConfiguration());
    this.parser = new Parser();
    openSplit(split);/*from ww w. ja  va 2s  . co m*/

}