Example usage for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration.

Prototype

public Configuration getConfiguration();

Source Link

Document

Return the configuration for the job.

Usage

From source file:io.druid.data.input.avro.AvroValueInputFormat.java

License:Apache License

/**
 * {@inheritDoc}//w ww.j a v  a  2 s .c om
 */
@Override
public RecordReader<NullWritable, GenericRecord> createRecordReader(InputSplit split,
        TaskAttemptContext context) throws IOException, InterruptedException {
    Schema readerSchema = AvroJob.getInputValueSchema(context.getConfiguration());

    if (readerSchema == null) {
        String schemaFilePath = context.getConfiguration().get(CONF_INPUT_VALUE_SCHEMA_PATH);
        if (StringUtils.isNotBlank(schemaFilePath)) {
            log.info("Using file: %s as reader schema.", schemaFilePath);
            try (FSDataInputStream inputStream = FileSystem.get(context.getConfiguration())
                    .open(new Path(schemaFilePath))) {
                readerSchema = new Schema.Parser().parse(inputStream);
            }
        }
    }

    if (null == readerSchema) {
        log.warn("Reader schema was not set. Use AvroJob.setInputKeySchema() if desired.");
        log.info("Using a reader schema equal to the writer schema.");
    }
    return new AvroValueRecordReader(readerSchema);
}

From source file:io.druid.indexer.hadoop.DatasourceRecordReader.java

License:Apache License

@Override
public void initialize(InputSplit split, final TaskAttemptContext context)
        throws IOException, InterruptedException {
    spec = readAndVerifyDatasourceIngestionSpec(context.getConfiguration(),
            HadoopDruidIndexerConfig.jsonMapper);

    List<WindowedDataSegment> segments = ((DatasourceInputSplit) split).getSegments();

    List<WindowedStorageAdapter> adapters = Lists.transform(segments,
            new Function<WindowedDataSegment, WindowedStorageAdapter>() {
                @Override//  w w  w.java2 s .co  m
                public WindowedStorageAdapter apply(WindowedDataSegment segment) {
                    try {
                        logger.info("Getting storage path for segment [%s]",
                                segment.getSegment().getIdentifier());
                        Path path = new Path(JobHelper.getURIFromSegment(segment.getSegment()));

                        logger.info("Fetch segment files from [%s]", path);

                        File dir = Files.createTempDir();
                        tmpSegmentDirs.add(dir);
                        logger.info("Locally storing fetched segment at [%s]", dir);

                        JobHelper.unzipNoGuava(path, context.getConfiguration(), dir, context);
                        logger.info("finished fetching segment files");

                        QueryableIndex index = IndexIO.loadIndex(dir);
                        indexes.add(index);
                        numRows += index.getNumRows();

                        return new WindowedStorageAdapter(new QueryableIndexStorageAdapter(index),
                                segment.getInterval());
                    } catch (IOException ex) {
                        throw Throwables.propagate(ex);
                    }
                }
            });

    firehose = new IngestSegmentFirehose(adapters, spec.getDimensions(), spec.getMetrics(), spec.getFilter(),
            spec.getGranularity());

}

From source file:io.druid.indexer.hadoop.DatasourceRecordReaderTest.java

License:Apache License

@Test
public void testSanity() throws Exception {
    DataSegment segment = new DefaultObjectMapper()
            .readValue(this.getClass().getClassLoader().getResource("test-segment/descriptor.json"),
                    DataSegment.class)
            .withLoadSpec(ImmutableMap.<String, Object>of("type", "local", "path",
                    this.getClass().getClassLoader().getResource("test-segment/index.zip").getPath()));
    InputSplit split = new DatasourceInputSplit(Lists.newArrayList(WindowedDataSegment.of(segment)));

    Configuration config = new Configuration();
    config.set(DatasourceInputFormat.CONF_DRUID_SCHEMA,
            HadoopDruidIndexerConfig.jsonMapper
                    .writeValueAsString(new DatasourceIngestionSpec(segment.getDataSource(),
                            segment.getInterval(), null, null, segment.getDimensions(), segment.getMetrics())));

    TaskAttemptContext context = EasyMock.createNiceMock(TaskAttemptContext.class);
    EasyMock.expect(context.getConfiguration()).andReturn(config).anyTimes();
    EasyMock.replay(context);//from  w w  w .j  ava2 s  . c o m

    DatasourceRecordReader rr = new DatasourceRecordReader();
    rr.initialize(split, context);

    Assert.assertEquals(0, rr.getProgress(), 0.0001);

    List<InputRow> rows = Lists.newArrayList();
    while (rr.nextKeyValue()) {
        rows.add(rr.getCurrentValue());
    }
    verifyRows(rows);

    Assert.assertEquals(1, rr.getProgress(), 0.0001);

    rr.close();
}

From source file:io.fluo.mapreduce.FluoInputFormat.java

License:Apache License

@Override
public RecordReader<Bytes, ColumnIterator> createRecordReader(InputSplit split, TaskAttemptContext context)
        throws IOException, InterruptedException {

    return new RecordReader<Bytes, ColumnIterator>() {

        private Entry<Bytes, ColumnIterator> entry;
        private RowIterator rowIter;
        private Environment env = null;
        private TransactionImpl ti = null;

        @Override//from  w  w  w  .  j  ava  2 s. c  o  m
        public void close() throws IOException {
            if (env != null) {
                env.close();
            }
            if (ti != null) {
                ti.close();
            }
        }

        @Override
        public Bytes getCurrentKey() throws IOException, InterruptedException {
            return entry.getKey();
        }

        @Override
        public ColumnIterator getCurrentValue() throws IOException, InterruptedException {
            return entry.getValue();
        }

        @Override
        public float getProgress() throws IOException, InterruptedException {
            // TODO Auto-generated method stub
            return 0;
        }

        @Override
        public void initialize(InputSplit split, TaskAttemptContext context)
                throws IOException, InterruptedException {
            try {
                // TODO this uses non public Accumulo API!
                RangeInputSplit ris = (RangeInputSplit) split;

                Span span = SpanUtil.toSpan(ris.getRange());

                ByteArrayInputStream bais = new ByteArrayInputStream(
                        context.getConfiguration().get(PROPS_CONF_KEY).getBytes("UTF-8"));
                PropertiesConfiguration props = new PropertiesConfiguration();
                props.load(bais);

                env = new Environment(new FluoConfiguration(props));

                ti = new TransactionImpl(env, context.getConfiguration().getLong(TIMESTAMP_CONF_KEY, -1));
                ScannerConfiguration sc = new ScannerConfiguration().setSpan(span);

                for (String fam : context.getConfiguration().getStrings(FAMS_CONF_KEY, new String[0]))
                    sc.fetchColumnFamily(Bytes.wrap(fam));

                rowIter = ti.get(sc);
            } catch (Exception e) {
                throw new IOException(e);
            }
        }

        @Override
        public boolean nextKeyValue() throws IOException, InterruptedException {
            if (rowIter.hasNext()) {
                entry = rowIter.next();
                return true;
            }
            return false;
        }
    };

}

From source file:io.fluo.mapreduce.FluoOutputFormat.java

License:Apache License

@Override
public RecordWriter<Loader, NullWritable> getRecordWriter(TaskAttemptContext context)
        throws IOException, InterruptedException {

    ByteArrayInputStream bais = new ByteArrayInputStream(
            context.getConfiguration().get(PROPS_CONF_KEY).getBytes("UTF-8"));
    Properties props = new Properties();
    props.load(bais);//from ww w  .  j  a va  2s . c o m

    FluoConfiguration config = new FluoConfiguration(ConfigurationConverter.getConfiguration(props));

    try (final LoaderExecutorImpl lexecutor = new LoaderExecutorImpl(config)) {
        return new RecordWriter<Loader, NullWritable>() {

            @Override
            public void close(TaskAttemptContext conext) throws IOException, InterruptedException {
                lexecutor.close();
            }

            @Override
            public void write(Loader loader, NullWritable nullw) throws IOException, InterruptedException {
                lexecutor.execute(loader);
            }
        };
    } catch (Exception e) {
        throw new IOException(e);
    }
}

From source file:io.fluo.webindex.data.util.WARCFileRecordReader.java

License:Apache License

@Override
public void initialize(InputSplit inputSplit, TaskAttemptContext context)
        throws IOException, InterruptedException {
    FileSplit split = (FileSplit) inputSplit;
    Configuration conf = context.getConfiguration();
    Path path = split.getPath();/*from w  ww  .  j av  a2 s  .  co m*/
    FileSystem fs = path.getFileSystem(conf);
    fsin = fs.open(path);
    arPath = path.getName();
    ar = WARCReaderFactory.get(path.getName(), fsin, true);
}

From source file:io.imply.druid.hadoop.DruidRecordReader.java

License:Apache License

@Override
public void initialize(final InputSplit split, final TaskAttemptContext context)
        throws IOException, InterruptedException {
    if (firehose != null) {
        firehose.close();//from   w  ww.j a v  a  2  s .  c  o  m
    }

    if (queryableIndex != null) {
        queryableIndex.close();
    }

    final WindowedDataSegment segment = ((DruidInputSplit) split).getSegment();

    queryableIndex = loadSegment(context, segment);
    firehose = makeFirehose(
            new WindowedStorageAdapter(new QueryableIndexStorageAdapter(queryableIndex), segment.getInterval()),
            DruidInputFormat.getFilter(context.getConfiguration()),
            DruidInputFormat.getColumns(context.getConfiguration()));
}

From source file:io.imply.druid.hadoop.DruidRecordReader.java

License:Apache License

private QueryableIndex loadSegment(final TaskAttemptContext context, final WindowedDataSegment segment)
        throws IOException {
    if (tmpDir == null) {
        tmpDir = Files.createTempDir();
    }/*from w  w w  .  ja v  a  2  s.com*/

    final Path path = new Path(JobHelper.getURIFromSegment(segment.getSegment()));
    final File segmentDir = new File(tmpDir, segment.getSegment().getIdentifier());
    if (!segmentDir.exists()) {
        log.info("Fetching segment[%s] from[%s] to [%s].", segment.getSegment().getIdentifier(), path,
                segmentDir);
        if (!segmentDir.mkdir()) {
            throw new ISE("Failed to make directory[%s]", segmentDir);
        }
        JobHelper.unzipNoGuava(path, context.getConfiguration(), segmentDir, context);
    }

    final QueryableIndex index = HadoopDruidIndexerConfig.INDEX_IO.loadIndex(segmentDir);
    log.info("Loaded segment[%s].", segment.getSegment().getIdentifier());
    return index;
}

From source file:io.ssc.trackthetrackers.extraction.hadoop.io.ArcRecordReader.java

License:Open Source License

public void initialize(InputSplit insplit, TaskAttemptContext context) throws IOException {

    conf = context.getConfiguration();

    FileSplit split = (FileSplit) insplit;

    if (split.getStart() != 0) {
        String errorMessage = "Invalid ARC file split start " + split.getStart()
                + ": ARC files are not splittable";
        log.error(errorMessage);/*w w w.j  ava2  s.co  m*/
        throw new IOException(errorMessage);
    }

    // open the file and seek to the start of the split
    final Path file = split.getPath();

    FileSystem fs = file.getFileSystem(context.getConfiguration());

    fsin = fs.open(file);

    // create a GZIP stream that *does not* automatically read through
    // members
    gzip = new GzipCompressorInputStream(fsin, false);

    fileLength = fs.getFileStatus(file).getLen();

    // First record should be an ARC file header record. Skip it.
    skipRecord();
}

From source file:io.vitess.hadoop.VitessRecordReader.java

License:Apache License

/**
 * Fetch connection parameters from Configuration and open VtGate connection.
 */// w w  w . j  a va2  s . co  m
@Override
public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
    this.split = (VitessInputSplit) split;
    conf = new VitessConf(context.getConfiguration());
    try {
        @SuppressWarnings("unchecked")
        Class<? extends RpcClientFactory> rpcFactoryClass = (Class<? extends RpcClientFactory>) Class
                .forName(conf.getRpcFactoryClass());
        List<String> addressList = Arrays.asList(conf.getHosts().split(","));
        int index = new Random().nextInt(addressList.size());

        RpcClient rpcClient = rpcFactoryClass.newInstance().create(
                Context.getDefault().withDeadlineAfter(Duration.millis(conf.getTimeoutMs())),
                addressList.get(index));
        vtgate = new VTGateBlockingConn(rpcClient);
        includedFields = conf.getIncludedFields();
    } catch (ClassNotFoundException | InstantiationException | IllegalAccessException exc) {
        throw new RuntimeException(exc);
    }
}