Example usage for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration.

Prototype

public Configuration getConfiguration();

Source Link

Document

Return the configuration for the job.

Usage

From source file:cosmos.mapred.LongLineRecordReader.java

License:Apache License

@Override
public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException {
    FileSplit split = (FileSplit) genericSplit;
    Configuration job = context.getConfiguration();
    this.maxLineLength = job.getInt("mapred.linerecordreader.maxlength", Integer.MAX_VALUE);
    start = split.getStart();/* w w  w .j a  v  a  2  s  .  c  o  m*/
    end = start + split.getLength();
    final Path file = split.getPath();
    compressionCodecs = new CompressionCodecFactory(job);
    final CompressionCodec codec = compressionCodecs.getCodec(file);

    // open the file and seek to the start of the split
    FileSystem fs = file.getFileSystem(job);
    FSDataInputStream fileIn = fs.open(split.getPath());
    boolean skipFirstLine = false;
    if (codec != null) {
        in = new LfLineReader(codec.createInputStream(fileIn), job);
        end = Long.MAX_VALUE;
    } else {
        if (start != 0) {
            skipFirstLine = true;
            --start;
            fileIn.seek(start);
        }
        in = new LfLineReader(fileIn, job);
    }
    if (skipFirstLine) { // skip first line and re-establish "start".
        start += in.readLine(new Text(), 0, (int) Math.min(Integer.MAX_VALUE, end - start));
    }
    this.pos = start;
}

From source file:crunch.MaxTemperature.java

License:Apache License

@Override
    public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
        this.fileSplit = (FileSplit) split;
        this.conf = context.getConfiguration();
    }// w ww  .  j a  v a2 s  .c  o m

From source file:cs480a2.yqiu.recSystem.mapreduce.input.SingleBookReader.java

/**
 * @param inputSplit//ww w .  j av  a 2  s.  co  m
 * @param context    the information about the task
 * @throws IOException
 * @throws InterruptedException
 */
@Override
public void initialize(InputSplit inputSplit, TaskAttemptContext context)
        throws IOException, InterruptedException {

    FileSplit split = (FileSplit) inputSplit;
    Configuration configuration = context.getConfiguration();
    Path path = split.getPath();
    filename = path.getName();
    FileSystem fileSystem = path.getFileSystem(configuration);
    FSDataInputStream inputStream = fileSystem.open(path);
    lineReader = new LineReader(inputStream, configuration);

    //initial start point and end point
    start = split.getStart();
    end = start + split.getLength();

    inputStream.seek(start);
    if (start != 0) {
        start += lineReader.readLine(new Text(), 0, (int) Math.min(Integer.MAX_VALUE, end - start));
    }

    start += lineReader.readLine(currentLine);

    prepareToScanBook();
}

From source file:cz.seznam.euphoria.hadoop.input.DataSourceInputFormat.java

License:Apache License

@Override
public RecordReader<NullWritable, V> createRecordReader(InputSplit is, TaskAttemptContext tac)
        throws IOException, InterruptedException {

    initialize(tac.getConfiguration());
    @SuppressWarnings("unchecked")
    SourceSplit<V> split = (SourceSplit<V>) is;
    Reader<V> reader = split.partition.openReader();
    return new RecordReader<NullWritable, V>() {

        V v;/*from   w w w  . ja va2s  .co  m*/

        @Override
        public void initialize(InputSplit is, TaskAttemptContext tac) throws IOException, InterruptedException {
            // nop
        }

        @Override
        public boolean nextKeyValue() throws IOException, InterruptedException {
            if (reader.hasNext()) {
                v = reader.next();
                return true;
            }
            return false;
        }

        @Override
        public NullWritable getCurrentKey() throws IOException, InterruptedException {
            return NullWritable.get();
        }

        @Override
        public V getCurrentValue() throws IOException, InterruptedException {
            return v;
        }

        @Override
        public float getProgress() throws IOException, InterruptedException {
            return 0.0f;
        }

        @Override
        public void close() throws IOException {
            reader.close();
        }

    };
}

From source file:cz.seznam.euphoria.hadoop.input.TestDataSourceInputFormat.java

License:Apache License

@Test
public void testDataSource() throws Exception {
    DummySource<Pair<Long, Long>> source = new DummySource<>(() -> Pair
            .of(Math.round(Math.random() * Long.MAX_VALUE), Math.round(Math.random() * Long.MAX_VALUE)));

    Configuration conf = new Configuration();
    TaskAttemptContext tac = mock(TaskAttemptContext.class);
    DataSourceInputFormat.configure(conf, source);

    when(tac.getConfiguration()).thenReturn(conf);

    InputFormat<NullWritable, Pair<Long, Long>> inputFormat = new DataSourceInputFormat<>();
    List<InputSplit> splits = inputFormat.getSplits(tac);
    assertEquals(2, splits.size());/*www  . j a  va2 s . co m*/

    try (RecordReader<NullWritable, Pair<Long, Long>> reader = inputFormat.createRecordReader(splits.get(0),
            tac)) {
        reader.initialize(splits.get(0), tac);
        assertTrue(reader.nextKeyValue());
        reader.getCurrentKey();
        reader.getCurrentValue();
        assertTrue(reader.nextKeyValue());
        assertFalse(reader.nextKeyValue());
    }

    try (RecordReader<NullWritable, Pair<Long, Long>> reader = inputFormat.createRecordReader(splits.get(1),
            tac)) {
        reader.initialize(splits.get(1), tac);
        assertTrue(reader.nextKeyValue());
        reader.getCurrentKey();
        reader.getCurrentValue();
        assertTrue(reader.nextKeyValue());
        assertTrue(reader.nextKeyValue());
        assertFalse(reader.nextKeyValue());
    }

}

From source file:cz.seznam.euphoria.hadoop.output.TestDataSinkOutputFormat.java

License:Apache License

private TaskAttemptContext mockContext(Configuration conf, int taskId) {
    TaskAttemptContext ret = mock(TaskAttemptContext.class);
    TaskAttemptID mockAttemptId = mock(TaskAttemptID.class);
    TaskID mockTaskId = mock(TaskID.class);
    when(ret.getConfiguration()).thenReturn(conf);
    when(ret.getTaskAttemptID()).thenReturn(mockAttemptId);
    when(mockAttemptId.getTaskID()).thenReturn(mockTaskId);
    when(mockTaskId.getId()).thenReturn(taskId);
    return ret;//from   w  ww. j a  v a2 s .  c  o  m
}

From source file:datafu.hourglass.avro.AvroKeyValueWithMetadataOutputFormat.java

License:Apache License

/** {@inheritDoc} */
@Override// w  w  w.j a  v a2  s  .c om
@SuppressWarnings("unchecked")
public RecordWriter<K, V> getRecordWriter(TaskAttemptContext context) throws IOException {
    AvroDatumConverterFactory converterFactory = new AvroDatumConverterFactory(context.getConfiguration());

    AvroDatumConverter<K, ?> keyConverter = converterFactory.create((Class<K>) context.getOutputKeyClass());
    AvroDatumConverter<V, ?> valueConverter = converterFactory.create((Class<V>) context.getOutputValueClass());

    return new AvroKeyValueWithMetadataRecordWriter<K, V>(keyConverter, valueConverter,
            getCompressionCodec(context), getAvroFileOutputStream(context), context.getConfiguration());
}

From source file:datafu.hourglass.avro.AvroKeyWithMetadataOutputFormat.java

License:Apache License

/** {@inheritDoc} */
@Override/*  w  w  w  . jav a 2  s. c  o  m*/
@SuppressWarnings("unchecked")
public RecordWriter<AvroKey<T>, NullWritable> getRecordWriter(TaskAttemptContext context) throws IOException {
    // Get the writer schema.
    Schema writerSchema = AvroJob.getOutputKeySchema(context.getConfiguration());
    if (null == writerSchema) {
        throw new IOException(
                "AvroKeyOutputFormat requires an output schema. Use AvroJob.setOutputKeySchema().");
    }

    return mRecordWriterFactory.create(writerSchema, getCompressionCodec(context),
            getAvroFileOutputStream(context), context.getConfiguration());
}

From source file:datafu.hourglass.avro.AvroMultipleInputsKeyInputFormat.java

License:Apache License

/** {@inheritDoc} */
@Override/*from w ww.  ja va2 s  . c o  m*/
public RecordReader<AvroKey<T>, NullWritable> createRecordReader(InputSplit split, TaskAttemptContext context)
        throws IOException, InterruptedException {
    Schema readerSchema = AvroMultipleInputsUtil.getInputKeySchemaForSplit(context.getConfiguration(), split);
    if (readerSchema == null) {
        throw new RuntimeException("Could not determine input schema");
    }
    return new AvroKeyRecordReader<T>(readerSchema);
}

From source file:de.gesundkrank.wikipedia.hadoop.inputformat.WikiInputRecordReader.java

License:Open Source License

@Override
public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
    this.fs = FileSystem.get(context.getConfiguration());
    this.parser = new Parser();
    openSplit(split);/*from ww w. ja  va 2s  . co m*/

}