Example usage for org.apache.hadoop.mapreduce.task TaskAttemptContextImpl TaskAttemptContextImpl

List of usage examples for org.apache.hadoop.mapreduce.task TaskAttemptContextImpl TaskAttemptContextImpl

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce.task TaskAttemptContextImpl TaskAttemptContextImpl.

Prototype

public TaskAttemptContextImpl(Configuration conf, TaskAttemptID taskId) 

Source Link

Usage

From source file:org.apache.crunch.io.impl.DefaultFileReaderFactory.java

License:Apache License

@Override
public Iterator<T> read(FileSystem fs, Path path) {
    final Configuration conf = new Configuration(fs.getConf());
    bundle.configure(conf);//  w w  w  . j a  va 2 s.  co m
    ptype.initialize(conf);

    final InputFormat fmt = ReflectionUtils.newInstance(bundle.getFormatClass(), conf);
    final TaskAttemptContext ctxt = new TaskAttemptContextImpl(conf, new TaskAttemptID());
    try {
        Job job = new Job(conf);
        FileInputFormat.addInputPath(job, path);
        return Iterators.concat(Lists.transform(fmt.getSplits(job), new Function<InputSplit, Iterator<T>>() {
            @Override
            public Iterator<T> apply(InputSplit split) {
                try {
                    RecordReader reader = fmt.createRecordReader(split, ctxt);
                    reader.initialize(split, ctxt);
                    return new RecordReaderIterator<T>(reader, ptype);
                } catch (Exception e) {
                    LOG.error("Error reading split: " + split, e);
                    throw new CrunchRuntimeException(e);
                }
            }
        }).iterator());
    } catch (Exception e) {
        LOG.error("Error reading path: " + path, e);
        throw new CrunchRuntimeException(e);
    }
}

From source file:org.apache.druid.data.input.orc.DruidOrcInputFormatTest.java

License:Apache License

@Test
public void testRead() throws IOException, InterruptedException {
    InputFormat inputFormat = ReflectionUtils.newInstance(OrcNewInputFormat.class, job.getConfiguration());

    TaskAttemptContext context = new TaskAttemptContextImpl(job.getConfiguration(), new TaskAttemptID());
    RecordReader reader = inputFormat.createRecordReader(split, context);
    InputRowParser<OrcStruct> parser = (InputRowParser<OrcStruct>) config.getParser();

    reader.initialize(split, context);/*w  w w. j  av  a2s . c  o m*/

    reader.nextKeyValue();

    OrcStruct data = (OrcStruct) reader.getCurrentValue();

    MapBasedInputRow row = (MapBasedInputRow) parser.parseBatch(data).get(0);

    Assert.assertTrue(row.getEvent().keySet().size() == 4);
    Assert.assertEquals(DateTimes.of(timestamp), row.getTimestamp());
    Assert.assertEquals(parser.getParseSpec().getDimensionsSpec().getDimensionNames(), row.getDimensions());
    Assert.assertEquals(col1, row.getEvent().get("col1"));
    Assert.assertEquals(Arrays.asList(col2), row.getDimension("col2"));

    reader.close();
}

From source file:org.apache.druid.data.input.orc.DruidOrcInputFormatTest.java

License:Apache License

@Test
public void testReadDateColumn() throws IOException, InterruptedException {
    File testFile2 = makeOrcFileWithDate();
    Path path = new Path(testFile2.getAbsoluteFile().toURI());
    FileSplit split = new FileSplit(path, 0, testFile2.length(), null);

    InputFormat inputFormat = ReflectionUtils.newInstance(OrcNewInputFormat.class, job.getConfiguration());

    TaskAttemptContext context = new TaskAttemptContextImpl(job.getConfiguration(), new TaskAttemptID());
    RecordReader reader = inputFormat.createRecordReader(split, context);
    InputRowParser<OrcStruct> parser = (InputRowParser<OrcStruct>) config.getParser();

    reader.initialize(split, context);/* w  w  w. j a  va  2s.c o  m*/

    reader.nextKeyValue();

    OrcStruct data = (OrcStruct) reader.getCurrentValue();

    MapBasedInputRow row = (MapBasedInputRow) parser.parseBatch(data).get(0);

    Assert.assertTrue(row.getEvent().keySet().size() == 4);
    Assert.assertEquals(DateTimes.of(timestamp), row.getTimestamp());
    Assert.assertEquals(parser.getParseSpec().getDimensionsSpec().getDimensionNames(), row.getDimensions());
    Assert.assertEquals(col1, row.getEvent().get("col1"));
    Assert.assertEquals(Arrays.asList(col2), row.getDimension("col2"));

    reader.close();
}

From source file:org.apache.druid.data.input.orc.OrcHadoopInputRowParserTest.java

License:Apache License

private static OrcStruct getFirstRow(Job job, String orcPath) throws IOException, InterruptedException {
    File testFile = new File(orcPath);
    Path path = new Path(testFile.getAbsoluteFile().toURI());
    FileSplit split = new FileSplit(path, 0, testFile.length(), null);

    InputFormat inputFormat = ReflectionUtils.newInstance(OrcInputFormat.class, job.getConfiguration());
    TaskAttemptContext context = new TaskAttemptContextImpl(job.getConfiguration(), new TaskAttemptID());

    try (RecordReader reader = inputFormat.createRecordReader(split, context)) {

        reader.initialize(split, context);
        reader.nextKeyValue();/*www . ja  v a 2  s  . c om*/
        return (OrcStruct) reader.getCurrentValue();
    }
}

From source file:org.apache.druid.data.input.orc.OrcHadoopInputRowParserTest.java

License:Apache License

private static List<InputRow> getAllRows(HadoopDruidIndexerConfig config)
        throws IOException, InterruptedException {
    Job job = Job.getInstance(new Configuration());
    config.intoConfiguration(job);//w w  w . j  a va2s.c om

    File testFile = new File(((StaticPathSpec) config.getPathSpec()).getPaths());
    Path path = new Path(testFile.getAbsoluteFile().toURI());
    FileSplit split = new FileSplit(path, 0, testFile.length(), null);

    InputFormat inputFormat = ReflectionUtils.newInstance(OrcInputFormat.class, job.getConfiguration());
    TaskAttemptContext context = new TaskAttemptContextImpl(job.getConfiguration(), new TaskAttemptID());

    try (RecordReader reader = inputFormat.createRecordReader(split, context)) {
        List<InputRow> records = new ArrayList<>();
        InputRowParser parser = config.getParser();

        reader.initialize(split, context);
        while (reader.nextKeyValue()) {
            reader.nextKeyValue();
            Object data = reader.getCurrentValue();
            records.add(((List<InputRow>) parser.parseBatch(data)).get(0));
        }

        return records;
    }
}

From source file:org.apache.druid.data.input.parquet.BaseParquetInputTest.java

License:Apache License

static Object getFirstRow(Job job, String parserType, String parquetPath)
        throws IOException, InterruptedException {
    File testFile = new File(parquetPath);
    Path path = new Path(testFile.getAbsoluteFile().toURI());
    FileSplit split = new FileSplit(path, 0, testFile.length(), null);

    InputFormat inputFormat = ReflectionUtils.newInstance(inputFormatClass.get(parserType),
            job.getConfiguration());/*  www  . ja va 2 s .  c  o  m*/
    TaskAttemptContext context = new TaskAttemptContextImpl(job.getConfiguration(), new TaskAttemptID());

    try (RecordReader reader = inputFormat.createRecordReader(split, context)) {

        reader.initialize(split, context);
        reader.nextKeyValue();
        return reader.getCurrentValue();
    }
}

From source file:org.apache.druid.data.input.parquet.BaseParquetInputTest.java

License:Apache License

static List<InputRow> getAllRows(String parserType, HadoopDruidIndexerConfig config)
        throws IOException, InterruptedException {
    Job job = Job.getInstance(new Configuration());
    config.intoConfiguration(job);/*from  ww w . jav  a 2s. c  o  m*/

    File testFile = new File(((StaticPathSpec) config.getPathSpec()).getPaths());
    Path path = new Path(testFile.getAbsoluteFile().toURI());
    FileSplit split = new FileSplit(path, 0, testFile.length(), null);

    InputFormat inputFormat = ReflectionUtils.newInstance(inputFormatClass.get(parserType),
            job.getConfiguration());
    TaskAttemptContext context = new TaskAttemptContextImpl(job.getConfiguration(), new TaskAttemptID());

    try (RecordReader reader = inputFormat.createRecordReader(split, context)) {
        List<InputRow> records = new ArrayList<>();
        InputRowParser parser = config.getParser();

        reader.initialize(split, context);
        while (reader.nextKeyValue()) {
            reader.nextKeyValue();
            Object data = reader.getCurrentValue();
            records.add(((List<InputRow>) parser.parseBatch(data)).get(0));
        }

        return records;
    }
}

From source file:org.apache.druid.data.input.parquet.DruidParquetInputTest.java

License:Apache License

private List<InputRow> getAllRows(String configPath) throws IOException, InterruptedException {
    HadoopDruidIndexerConfig config = HadoopDruidIndexerConfig.fromFile(new File(configPath));
    Job job = Job.getInstance(new Configuration());
    config.intoConfiguration(job);/*from www.  j  a  va  2 s . c  o  m*/

    File testFile = new File(((StaticPathSpec) config.getPathSpec()).getPaths());
    Path path = new Path(testFile.getAbsoluteFile().toURI());
    FileSplit split = new FileSplit(path, 0, testFile.length(), null);

    DruidParquetInputFormat inputFormat = ReflectionUtils.newInstance(DruidParquetInputFormat.class,
            job.getConfiguration());
    TaskAttemptContext context = new TaskAttemptContextImpl(job.getConfiguration(), new TaskAttemptID());

    try (RecordReader reader = inputFormat.createRecordReader(split, context)) {
        List<InputRow> records = Lists.newArrayList();
        InputRowParser parser = config.getParser();

        reader.initialize(split, context);
        while (reader.nextKeyValue()) {
            reader.nextKeyValue();
            GenericRecord data = (GenericRecord) reader.getCurrentValue();
            records.add(((List<InputRow>) parser.parseBatch(data)).get(0));
        }

        return records;
    }
}

From source file:org.apache.giraph.job.HadoopUtils.java

License:Apache License

/**
 * Create a TaskAttemptContext, supporting many Hadoops.
 *
 * @param conf Configuration/*  w  ww . j  a v a2s  .  co  m*/
 * @param taskAttemptID TaskAttemptID to use
 * @return TaskAttemptContext
 */
public static TaskAttemptContext makeTaskAttemptContext(Configuration conf, TaskAttemptID taskAttemptID) {
    TaskAttemptContext context;
    /*if[HADOOP_NON_JOBCONTEXT_IS_INTERFACE]
    context = new TaskAttemptContext(conf, taskAttemptID);
    else[HADOOP_NON_JOBCONTEXT_IS_INTERFACE]*/
    context = new TaskAttemptContextImpl(conf, taskAttemptID);
    /*end[HADOOP_NON_JOBCONTEXT_IS_INTERFACE]*/
    return context;
}

From source file:org.apache.gobblin.compaction.mapreduce.OrcCompactionTaskTest.java

License:Apache License

public void writeOrcRecordsInFile(Path path, TypeDescription schema, List<OrcStruct> orcStructs)
        throws Exception {
    Configuration configuration = new Configuration();
    OrcFile.WriterOptions options = OrcFile.writerOptions(configuration).setSchema(schema);

    Writer writer = OrcFile.createWriter(path, options);
    OrcMapreduceRecordWriter recordWriter = new OrcMapreduceRecordWriter(writer);
    for (OrcStruct orcRecord : orcStructs) {
        recordWriter.write(NullWritable.get(), orcRecord);
    }/*from   w  w w. j ava 2s  .co m*/
    recordWriter.close(new TaskAttemptContextImpl(configuration, new TaskAttemptID()));
}