List of usage examples for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration
public Configuration getConfiguration();
From source file:io.druid.data.input.avro.AvroValueInputFormat.java
License:Apache License
/** * {@inheritDoc}//w ww.j a v a 2 s .c om */ @Override public RecordReader<NullWritable, GenericRecord> createRecordReader(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { Schema readerSchema = AvroJob.getInputValueSchema(context.getConfiguration()); if (readerSchema == null) { String schemaFilePath = context.getConfiguration().get(CONF_INPUT_VALUE_SCHEMA_PATH); if (StringUtils.isNotBlank(schemaFilePath)) { log.info("Using file: %s as reader schema.", schemaFilePath); try (FSDataInputStream inputStream = FileSystem.get(context.getConfiguration()) .open(new Path(schemaFilePath))) { readerSchema = new Schema.Parser().parse(inputStream); } } } if (null == readerSchema) { log.warn("Reader schema was not set. Use AvroJob.setInputKeySchema() if desired."); log.info("Using a reader schema equal to the writer schema."); } return new AvroValueRecordReader(readerSchema); }
From source file:io.druid.indexer.hadoop.DatasourceRecordReader.java
License:Apache License
@Override public void initialize(InputSplit split, final TaskAttemptContext context) throws IOException, InterruptedException { spec = readAndVerifyDatasourceIngestionSpec(context.getConfiguration(), HadoopDruidIndexerConfig.jsonMapper); List<WindowedDataSegment> segments = ((DatasourceInputSplit) split).getSegments(); List<WindowedStorageAdapter> adapters = Lists.transform(segments, new Function<WindowedDataSegment, WindowedStorageAdapter>() { @Override// w w w.java2 s .co m public WindowedStorageAdapter apply(WindowedDataSegment segment) { try { logger.info("Getting storage path for segment [%s]", segment.getSegment().getIdentifier()); Path path = new Path(JobHelper.getURIFromSegment(segment.getSegment())); logger.info("Fetch segment files from [%s]", path); File dir = Files.createTempDir(); tmpSegmentDirs.add(dir); logger.info("Locally storing fetched segment at [%s]", dir); JobHelper.unzipNoGuava(path, context.getConfiguration(), dir, context); logger.info("finished fetching segment files"); QueryableIndex index = IndexIO.loadIndex(dir); indexes.add(index); numRows += index.getNumRows(); return new WindowedStorageAdapter(new QueryableIndexStorageAdapter(index), segment.getInterval()); } catch (IOException ex) { throw Throwables.propagate(ex); } } }); firehose = new IngestSegmentFirehose(adapters, spec.getDimensions(), spec.getMetrics(), spec.getFilter(), spec.getGranularity()); }
From source file:io.druid.indexer.hadoop.DatasourceRecordReaderTest.java
License:Apache License
@Test public void testSanity() throws Exception { DataSegment segment = new DefaultObjectMapper() .readValue(this.getClass().getClassLoader().getResource("test-segment/descriptor.json"), DataSegment.class) .withLoadSpec(ImmutableMap.<String, Object>of("type", "local", "path", this.getClass().getClassLoader().getResource("test-segment/index.zip").getPath())); InputSplit split = new DatasourceInputSplit(Lists.newArrayList(WindowedDataSegment.of(segment))); Configuration config = new Configuration(); config.set(DatasourceInputFormat.CONF_DRUID_SCHEMA, HadoopDruidIndexerConfig.jsonMapper .writeValueAsString(new DatasourceIngestionSpec(segment.getDataSource(), segment.getInterval(), null, null, segment.getDimensions(), segment.getMetrics()))); TaskAttemptContext context = EasyMock.createNiceMock(TaskAttemptContext.class); EasyMock.expect(context.getConfiguration()).andReturn(config).anyTimes(); EasyMock.replay(context);//from w w w .j ava2 s . c o m DatasourceRecordReader rr = new DatasourceRecordReader(); rr.initialize(split, context); Assert.assertEquals(0, rr.getProgress(), 0.0001); List<InputRow> rows = Lists.newArrayList(); while (rr.nextKeyValue()) { rows.add(rr.getCurrentValue()); } verifyRows(rows); Assert.assertEquals(1, rr.getProgress(), 0.0001); rr.close(); }
From source file:io.fluo.mapreduce.FluoInputFormat.java
License:Apache License
@Override public RecordReader<Bytes, ColumnIterator> createRecordReader(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { return new RecordReader<Bytes, ColumnIterator>() { private Entry<Bytes, ColumnIterator> entry; private RowIterator rowIter; private Environment env = null; private TransactionImpl ti = null; @Override//from w w w . j ava 2 s. c o m public void close() throws IOException { if (env != null) { env.close(); } if (ti != null) { ti.close(); } } @Override public Bytes getCurrentKey() throws IOException, InterruptedException { return entry.getKey(); } @Override public ColumnIterator getCurrentValue() throws IOException, InterruptedException { return entry.getValue(); } @Override public float getProgress() throws IOException, InterruptedException { // TODO Auto-generated method stub return 0; } @Override public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { try { // TODO this uses non public Accumulo API! RangeInputSplit ris = (RangeInputSplit) split; Span span = SpanUtil.toSpan(ris.getRange()); ByteArrayInputStream bais = new ByteArrayInputStream( context.getConfiguration().get(PROPS_CONF_KEY).getBytes("UTF-8")); PropertiesConfiguration props = new PropertiesConfiguration(); props.load(bais); env = new Environment(new FluoConfiguration(props)); ti = new TransactionImpl(env, context.getConfiguration().getLong(TIMESTAMP_CONF_KEY, -1)); ScannerConfiguration sc = new ScannerConfiguration().setSpan(span); for (String fam : context.getConfiguration().getStrings(FAMS_CONF_KEY, new String[0])) sc.fetchColumnFamily(Bytes.wrap(fam)); rowIter = ti.get(sc); } catch (Exception e) { throw new IOException(e); } } @Override public boolean nextKeyValue() throws IOException, InterruptedException { if (rowIter.hasNext()) { entry = rowIter.next(); return true; } return false; } }; }
From source file:io.fluo.mapreduce.FluoOutputFormat.java
License:Apache License
@Override public RecordWriter<Loader, NullWritable> getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException { ByteArrayInputStream bais = new ByteArrayInputStream( context.getConfiguration().get(PROPS_CONF_KEY).getBytes("UTF-8")); Properties props = new Properties(); props.load(bais);//from ww w . j a va 2s . c o m FluoConfiguration config = new FluoConfiguration(ConfigurationConverter.getConfiguration(props)); try (final LoaderExecutorImpl lexecutor = new LoaderExecutorImpl(config)) { return new RecordWriter<Loader, NullWritable>() { @Override public void close(TaskAttemptContext conext) throws IOException, InterruptedException { lexecutor.close(); } @Override public void write(Loader loader, NullWritable nullw) throws IOException, InterruptedException { lexecutor.execute(loader); } }; } catch (Exception e) { throw new IOException(e); } }
From source file:io.fluo.webindex.data.util.WARCFileRecordReader.java
License:Apache License
@Override public void initialize(InputSplit inputSplit, TaskAttemptContext context) throws IOException, InterruptedException { FileSplit split = (FileSplit) inputSplit; Configuration conf = context.getConfiguration(); Path path = split.getPath();/*from w ww . j av a2 s . co m*/ FileSystem fs = path.getFileSystem(conf); fsin = fs.open(path); arPath = path.getName(); ar = WARCReaderFactory.get(path.getName(), fsin, true); }
From source file:io.imply.druid.hadoop.DruidRecordReader.java
License:Apache License
@Override public void initialize(final InputSplit split, final TaskAttemptContext context) throws IOException, InterruptedException { if (firehose != null) { firehose.close();//from w ww.j a v a 2 s . c o m } if (queryableIndex != null) { queryableIndex.close(); } final WindowedDataSegment segment = ((DruidInputSplit) split).getSegment(); queryableIndex = loadSegment(context, segment); firehose = makeFirehose( new WindowedStorageAdapter(new QueryableIndexStorageAdapter(queryableIndex), segment.getInterval()), DruidInputFormat.getFilter(context.getConfiguration()), DruidInputFormat.getColumns(context.getConfiguration())); }
From source file:io.imply.druid.hadoop.DruidRecordReader.java
License:Apache License
private QueryableIndex loadSegment(final TaskAttemptContext context, final WindowedDataSegment segment) throws IOException { if (tmpDir == null) { tmpDir = Files.createTempDir(); }/*from w w w . ja v a 2 s.com*/ final Path path = new Path(JobHelper.getURIFromSegment(segment.getSegment())); final File segmentDir = new File(tmpDir, segment.getSegment().getIdentifier()); if (!segmentDir.exists()) { log.info("Fetching segment[%s] from[%s] to [%s].", segment.getSegment().getIdentifier(), path, segmentDir); if (!segmentDir.mkdir()) { throw new ISE("Failed to make directory[%s]", segmentDir); } JobHelper.unzipNoGuava(path, context.getConfiguration(), segmentDir, context); } final QueryableIndex index = HadoopDruidIndexerConfig.INDEX_IO.loadIndex(segmentDir); log.info("Loaded segment[%s].", segment.getSegment().getIdentifier()); return index; }
From source file:io.ssc.trackthetrackers.extraction.hadoop.io.ArcRecordReader.java
License:Open Source License
public void initialize(InputSplit insplit, TaskAttemptContext context) throws IOException { conf = context.getConfiguration(); FileSplit split = (FileSplit) insplit; if (split.getStart() != 0) { String errorMessage = "Invalid ARC file split start " + split.getStart() + ": ARC files are not splittable"; log.error(errorMessage);/*w w w.j ava2 s.co m*/ throw new IOException(errorMessage); } // open the file and seek to the start of the split final Path file = split.getPath(); FileSystem fs = file.getFileSystem(context.getConfiguration()); fsin = fs.open(file); // create a GZIP stream that *does not* automatically read through // members gzip = new GzipCompressorInputStream(fsin, false); fileLength = fs.getFileStatus(file).getLen(); // First record should be an ARC file header record. Skip it. skipRecord(); }
From source file:io.vitess.hadoop.VitessRecordReader.java
License:Apache License
/** * Fetch connection parameters from Configuration and open VtGate connection. */// w w w . j a va2 s . co m @Override public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { this.split = (VitessInputSplit) split; conf = new VitessConf(context.getConfiguration()); try { @SuppressWarnings("unchecked") Class<? extends RpcClientFactory> rpcFactoryClass = (Class<? extends RpcClientFactory>) Class .forName(conf.getRpcFactoryClass()); List<String> addressList = Arrays.asList(conf.getHosts().split(",")); int index = new Random().nextInt(addressList.size()); RpcClient rpcClient = rpcFactoryClass.newInstance().create( Context.getDefault().withDeadlineAfter(Duration.millis(conf.getTimeoutMs())), addressList.get(index)); vtgate = new VTGateBlockingConn(rpcClient); includedFields = conf.getIncludedFields(); } catch (ClassNotFoundException | InstantiationException | IllegalAccessException exc) { throw new RuntimeException(exc); } }