List of usage examples for org.apache.hadoop.mapreduce.task TaskAttemptContextImpl TaskAttemptContextImpl
public TaskAttemptContextImpl(Configuration conf, TaskAttemptID taskId)
From source file:org.apache.crunch.io.impl.DefaultFileReaderFactory.java
License:Apache License
@Override public Iterator<T> read(FileSystem fs, Path path) { final Configuration conf = new Configuration(fs.getConf()); bundle.configure(conf);// w w w . j a va 2 s. co m ptype.initialize(conf); final InputFormat fmt = ReflectionUtils.newInstance(bundle.getFormatClass(), conf); final TaskAttemptContext ctxt = new TaskAttemptContextImpl(conf, new TaskAttemptID()); try { Job job = new Job(conf); FileInputFormat.addInputPath(job, path); return Iterators.concat(Lists.transform(fmt.getSplits(job), new Function<InputSplit, Iterator<T>>() { @Override public Iterator<T> apply(InputSplit split) { try { RecordReader reader = fmt.createRecordReader(split, ctxt); reader.initialize(split, ctxt); return new RecordReaderIterator<T>(reader, ptype); } catch (Exception e) { LOG.error("Error reading split: " + split, e); throw new CrunchRuntimeException(e); } } }).iterator()); } catch (Exception e) { LOG.error("Error reading path: " + path, e); throw new CrunchRuntimeException(e); } }
From source file:org.apache.druid.data.input.orc.DruidOrcInputFormatTest.java
License:Apache License
@Test public void testRead() throws IOException, InterruptedException { InputFormat inputFormat = ReflectionUtils.newInstance(OrcNewInputFormat.class, job.getConfiguration()); TaskAttemptContext context = new TaskAttemptContextImpl(job.getConfiguration(), new TaskAttemptID()); RecordReader reader = inputFormat.createRecordReader(split, context); InputRowParser<OrcStruct> parser = (InputRowParser<OrcStruct>) config.getParser(); reader.initialize(split, context);/*w w w. j av a2s . c o m*/ reader.nextKeyValue(); OrcStruct data = (OrcStruct) reader.getCurrentValue(); MapBasedInputRow row = (MapBasedInputRow) parser.parseBatch(data).get(0); Assert.assertTrue(row.getEvent().keySet().size() == 4); Assert.assertEquals(DateTimes.of(timestamp), row.getTimestamp()); Assert.assertEquals(parser.getParseSpec().getDimensionsSpec().getDimensionNames(), row.getDimensions()); Assert.assertEquals(col1, row.getEvent().get("col1")); Assert.assertEquals(Arrays.asList(col2), row.getDimension("col2")); reader.close(); }
From source file:org.apache.druid.data.input.orc.DruidOrcInputFormatTest.java
License:Apache License
@Test public void testReadDateColumn() throws IOException, InterruptedException { File testFile2 = makeOrcFileWithDate(); Path path = new Path(testFile2.getAbsoluteFile().toURI()); FileSplit split = new FileSplit(path, 0, testFile2.length(), null); InputFormat inputFormat = ReflectionUtils.newInstance(OrcNewInputFormat.class, job.getConfiguration()); TaskAttemptContext context = new TaskAttemptContextImpl(job.getConfiguration(), new TaskAttemptID()); RecordReader reader = inputFormat.createRecordReader(split, context); InputRowParser<OrcStruct> parser = (InputRowParser<OrcStruct>) config.getParser(); reader.initialize(split, context);/* w w w. j a va 2s.c o m*/ reader.nextKeyValue(); OrcStruct data = (OrcStruct) reader.getCurrentValue(); MapBasedInputRow row = (MapBasedInputRow) parser.parseBatch(data).get(0); Assert.assertTrue(row.getEvent().keySet().size() == 4); Assert.assertEquals(DateTimes.of(timestamp), row.getTimestamp()); Assert.assertEquals(parser.getParseSpec().getDimensionsSpec().getDimensionNames(), row.getDimensions()); Assert.assertEquals(col1, row.getEvent().get("col1")); Assert.assertEquals(Arrays.asList(col2), row.getDimension("col2")); reader.close(); }
From source file:org.apache.druid.data.input.orc.OrcHadoopInputRowParserTest.java
License:Apache License
private static OrcStruct getFirstRow(Job job, String orcPath) throws IOException, InterruptedException { File testFile = new File(orcPath); Path path = new Path(testFile.getAbsoluteFile().toURI()); FileSplit split = new FileSplit(path, 0, testFile.length(), null); InputFormat inputFormat = ReflectionUtils.newInstance(OrcInputFormat.class, job.getConfiguration()); TaskAttemptContext context = new TaskAttemptContextImpl(job.getConfiguration(), new TaskAttemptID()); try (RecordReader reader = inputFormat.createRecordReader(split, context)) { reader.initialize(split, context); reader.nextKeyValue();/*www . ja v a 2 s . c om*/ return (OrcStruct) reader.getCurrentValue(); } }
From source file:org.apache.druid.data.input.orc.OrcHadoopInputRowParserTest.java
License:Apache License
private static List<InputRow> getAllRows(HadoopDruidIndexerConfig config) throws IOException, InterruptedException { Job job = Job.getInstance(new Configuration()); config.intoConfiguration(job);//w w w . j a va2s.c om File testFile = new File(((StaticPathSpec) config.getPathSpec()).getPaths()); Path path = new Path(testFile.getAbsoluteFile().toURI()); FileSplit split = new FileSplit(path, 0, testFile.length(), null); InputFormat inputFormat = ReflectionUtils.newInstance(OrcInputFormat.class, job.getConfiguration()); TaskAttemptContext context = new TaskAttemptContextImpl(job.getConfiguration(), new TaskAttemptID()); try (RecordReader reader = inputFormat.createRecordReader(split, context)) { List<InputRow> records = new ArrayList<>(); InputRowParser parser = config.getParser(); reader.initialize(split, context); while (reader.nextKeyValue()) { reader.nextKeyValue(); Object data = reader.getCurrentValue(); records.add(((List<InputRow>) parser.parseBatch(data)).get(0)); } return records; } }
From source file:org.apache.druid.data.input.parquet.BaseParquetInputTest.java
License:Apache License
static Object getFirstRow(Job job, String parserType, String parquetPath) throws IOException, InterruptedException { File testFile = new File(parquetPath); Path path = new Path(testFile.getAbsoluteFile().toURI()); FileSplit split = new FileSplit(path, 0, testFile.length(), null); InputFormat inputFormat = ReflectionUtils.newInstance(inputFormatClass.get(parserType), job.getConfiguration());/* www . ja va 2 s . c o m*/ TaskAttemptContext context = new TaskAttemptContextImpl(job.getConfiguration(), new TaskAttemptID()); try (RecordReader reader = inputFormat.createRecordReader(split, context)) { reader.initialize(split, context); reader.nextKeyValue(); return reader.getCurrentValue(); } }
From source file:org.apache.druid.data.input.parquet.BaseParquetInputTest.java
License:Apache License
static List<InputRow> getAllRows(String parserType, HadoopDruidIndexerConfig config) throws IOException, InterruptedException { Job job = Job.getInstance(new Configuration()); config.intoConfiguration(job);/*from ww w . jav a 2s. c o m*/ File testFile = new File(((StaticPathSpec) config.getPathSpec()).getPaths()); Path path = new Path(testFile.getAbsoluteFile().toURI()); FileSplit split = new FileSplit(path, 0, testFile.length(), null); InputFormat inputFormat = ReflectionUtils.newInstance(inputFormatClass.get(parserType), job.getConfiguration()); TaskAttemptContext context = new TaskAttemptContextImpl(job.getConfiguration(), new TaskAttemptID()); try (RecordReader reader = inputFormat.createRecordReader(split, context)) { List<InputRow> records = new ArrayList<>(); InputRowParser parser = config.getParser(); reader.initialize(split, context); while (reader.nextKeyValue()) { reader.nextKeyValue(); Object data = reader.getCurrentValue(); records.add(((List<InputRow>) parser.parseBatch(data)).get(0)); } return records; } }
From source file:org.apache.druid.data.input.parquet.DruidParquetInputTest.java
License:Apache License
private List<InputRow> getAllRows(String configPath) throws IOException, InterruptedException { HadoopDruidIndexerConfig config = HadoopDruidIndexerConfig.fromFile(new File(configPath)); Job job = Job.getInstance(new Configuration()); config.intoConfiguration(job);/*from www. j a va 2 s . c o m*/ File testFile = new File(((StaticPathSpec) config.getPathSpec()).getPaths()); Path path = new Path(testFile.getAbsoluteFile().toURI()); FileSplit split = new FileSplit(path, 0, testFile.length(), null); DruidParquetInputFormat inputFormat = ReflectionUtils.newInstance(DruidParquetInputFormat.class, job.getConfiguration()); TaskAttemptContext context = new TaskAttemptContextImpl(job.getConfiguration(), new TaskAttemptID()); try (RecordReader reader = inputFormat.createRecordReader(split, context)) { List<InputRow> records = Lists.newArrayList(); InputRowParser parser = config.getParser(); reader.initialize(split, context); while (reader.nextKeyValue()) { reader.nextKeyValue(); GenericRecord data = (GenericRecord) reader.getCurrentValue(); records.add(((List<InputRow>) parser.parseBatch(data)).get(0)); } return records; } }
From source file:org.apache.giraph.job.HadoopUtils.java
License:Apache License
/** * Create a TaskAttemptContext, supporting many Hadoops. * * @param conf Configuration/* w ww . j a v a2s . co m*/ * @param taskAttemptID TaskAttemptID to use * @return TaskAttemptContext */ public static TaskAttemptContext makeTaskAttemptContext(Configuration conf, TaskAttemptID taskAttemptID) { TaskAttemptContext context; /*if[HADOOP_NON_JOBCONTEXT_IS_INTERFACE] context = new TaskAttemptContext(conf, taskAttemptID); else[HADOOP_NON_JOBCONTEXT_IS_INTERFACE]*/ context = new TaskAttemptContextImpl(conf, taskAttemptID); /*end[HADOOP_NON_JOBCONTEXT_IS_INTERFACE]*/ return context; }
From source file:org.apache.gobblin.compaction.mapreduce.OrcCompactionTaskTest.java
License:Apache License
public void writeOrcRecordsInFile(Path path, TypeDescription schema, List<OrcStruct> orcStructs) throws Exception { Configuration configuration = new Configuration(); OrcFile.WriterOptions options = OrcFile.writerOptions(configuration).setSchema(schema); Writer writer = OrcFile.createWriter(path, options); OrcMapreduceRecordWriter recordWriter = new OrcMapreduceRecordWriter(writer); for (OrcStruct orcRecord : orcStructs) { recordWriter.write(NullWritable.get(), orcRecord); }/*from w w w. j ava 2s .co m*/ recordWriter.close(new TaskAttemptContextImpl(configuration, new TaskAttemptID())); }