Example usage for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration

List of usage examples for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration.

Prototype

public Configuration getConfiguration();

Source Link

Document

Return the configuration for the job.

Usage

From source file:io.druid.data.input.avro.AvroValueInputFormat.java

License:Apache License

/**
 * {@inheritDoc}//w ww.j a v  a  2 s .c om
 */
@Override
public RecordReader<NullWritable, GenericRecord> createRecordReader(InputSplit split,
        TaskAttemptContext context) throws IOException, InterruptedException {
    Schema readerSchema = AvroJob.getInputValueSchema(context.getConfiguration());

    if (readerSchema == null) {
        String schemaFilePath = context.getConfiguration().get(CONF_INPUT_VALUE_SCHEMA_PATH);
        if (StringUtils.isNotBlank(schemaFilePath)) {
            log.info("Using file: %s as reader schema.", schemaFilePath);
            try (FSDataInputStream inputStream = FileSystem.get(context.getConfiguration())
                    .open(new Path(schemaFilePath))) {
                readerSchema = new Schema.Parser().parse(inputStream);
            }
        }
    }

    if (null == readerSchema) {
        log.warn("Reader schema was not set. Use AvroJob.setInputKeySchema() if desired.");
        log.info("Using a reader schema equal to the writer schema.");
    }
    return new AvroValueRecordReader(readerSchema);
}

From source file:io.druid.indexer.hadoop.DatasourceRecordReader.java

License:Apache License

@Override
public void initialize(InputSplit split, final TaskAttemptContext context)
        throws IOException, InterruptedException {
    spec = readAndVerifyDatasourceIngestionSpec(context.getConfiguration(),
            HadoopDruidIndexerConfig.jsonMapper);

    List<WindowedDataSegment> segments = ((DatasourceInputSplit) split).getSegments();

    List<WindowedStorageAdapter> adapters = Lists.transform(segments,
            new Function<WindowedDataSegment, WindowedStorageAdapter>() {
                @Override//  w w  w.java2 s .co  m
                public WindowedStorageAdapter apply(WindowedDataSegment segment) {
                    try {
                        logger.info("Getting storage path for segment [%s]",
                                segment.getSegment().getIdentifier());
                        Path path = new Path(JobHelper.getURIFromSegment(segment.getSegment()));

                        logger.info("Fetch segment files from [%s]", path);

                        File dir = Files.createTempDir();
                        tmpSegmentDirs.add(dir);
                        logger.info("Locally storing fetched segment at [%s]", dir);

                        JobHelper.unzipNoGuava(path, context.getConfiguration(), dir, context);
                        logger.info("finished fetching segment files");

                        QueryableIndex index = IndexIO.loadIndex(dir);
                        indexes.add(index);
                        numRows += index.getNumRows();

                        return new WindowedStorageAdapter(new QueryableIndexStorageAdapter(index),
                                segment.getInterval());
                    } catch (IOException ex) {
                        throw Throwables.propagate(ex);
                    }
                }
            });

    firehose = new IngestSegmentFirehose(adapters, spec.getDimensions(), spec.getMetrics(), spec.getFilter(),
            spec.getGranularity());

}

From source file:io.druid.indexer.hadoop.DatasourceRecordReaderTest.java

License:Apache License

@Test
public void testSanity() throws Exception {
    DataSegment segment = new DefaultObjectMapper()
            .readValue(this.getClass().getClassLoader().getResource("test-segment/descriptor.json"),
                    DataSegment.class)
            .withLoadSpec(ImmutableMap.<String, Object>of("type", "local", "path",
                    this.getClass().getClassLoader().getResource("test-segment/index.zip").getPath()));
    InputSplit split = new DatasourceInputSplit(Lists.newArrayList(WindowedDataSegment.of(segment)));

    Configuration config = new Configuration();
    config.set(DatasourceInputFormat.CONF_DRUID_SCHEMA,
            HadoopDruidIndexerConfig.jsonMapper
                    .writeValueAsString(new DatasourceIngestionSpec(segment.getDataSource(),
                            segment.getInterval(), null, null, segment.getDimensions(), segment.getMetrics())));

    TaskAttemptContext context = EasyMock.createNiceMock(TaskAttemptContext.class);
    EasyMock.expect(context.getConfiguration()).andReturn(config).anyTimes();
    EasyMock.replay(context);//from  w w  w .j  ava2 s  . c o m

    DatasourceRecordReader rr = new DatasourceRecordReader();
    rr.initialize(split, context);

    Assert.assertEquals(0, rr.getProgress(), 0.0001);

    List<InputRow> rows = Lists.newArrayList();
    while (rr.nextKeyValue()) {
        rows.add(rr.getCurrentValue());
    }
    verifyRows(rows);

    Assert.assertEquals(1, rr.getProgress(), 0.0001);

    rr.close();
}

From source file:io.fluo.mapreduce.FluoInputFormat.java

License:Apache License

@Override
public RecordReader<Bytes, ColumnIterator> createRecordReader(InputSplit split, TaskAttemptContext context)
        throws IOException, InterruptedException {

    return new RecordReader<Bytes, ColumnIterator>() {

        private Entry<Bytes, ColumnIterator> entry;
        private RowIterator rowIter;
        private Environment env = null;
        private TransactionImpl ti = null;

        @Override//from  w  w  w  .  j  ava  2 s. c  o  m
        public void close() throws IOException {
            if (env != null) {
                env.close();
            }
            if (ti != null) {
                ti.close();
            }
        }

        @Override
        public Bytes getCurrentKey() throws IOException, InterruptedException {
            return entry.getKey();
        }

        @Override
        public ColumnIterator getCurrentValue() throws IOException, InterruptedException {
            return entry.getValue();
        }

        @Override
        public float getProgress() throws IOException, InterruptedException {
            // TODO Auto-generated method stub
            return 0;
        }

        @Override
        public void initialize(InputSplit split, TaskAttemptContext context)
                throws IOException, InterruptedException {
            try {
                // TODO this uses non public Accumulo API!
                RangeInputSplit ris = (RangeInputSplit) split;

                Span span = SpanUtil.toSpan(ris.getRange());

                ByteArrayInputStream bais = new ByteArrayInputStream(
                        context.getConfiguration().get(PROPS_CONF_KEY).getBytes("UTF-8"));
                PropertiesConfiguration props = new PropertiesConfiguration();
                props.load(bais);

                env = new Environment(new FluoConfiguration(props));

                ti = new TransactionImpl(env, context.getConfiguration().getLong(TIMESTAMP_CONF_KEY, -1));
                ScannerConfiguration sc = new ScannerConfiguration().setSpan(span);

                for (String fam : context.getConfiguration().getStrings(FAMS_CONF_KEY, new String[0]))
                    sc.fetchColumnFamily(Bytes.wrap(fam));

                rowIter = ti.get(sc);
            } catch (Exception e) {
                throw new IOException(e);
            }
        }

        @Override
        public boolean nextKeyValue() throws IOException, InterruptedException {
            if (rowIter.hasNext()) {
                entry = rowIter.next();
                return true;
            }
            return false;
        }
    };

}

From source file:io.fluo.mapreduce.FluoOutputFormat.java

License:Apache License

@Override
public RecordWriter<Loader, NullWritable> getRecordWriter(TaskAttemptContext context)
        throws IOException, InterruptedException {

    ByteArrayInputStream bais = new ByteArrayInputStream(
            context.getConfiguration().get(PROPS_CONF_KEY).getBytes("UTF-8"));
    Properties props = new Properties();
    props.load(bais);//from ww w  .  j  a va  2s . c o m

    FluoConfiguration config = new FluoConfiguration(ConfigurationConverter.getConfiguration(props));

    try (final LoaderExecutorImpl lexecutor = new LoaderExecutorImpl(config)) {
        return new RecordWriter<Loader, NullWritable>() {

            @Override
            public void close(TaskAttemptContext conext) throws IOException, InterruptedException {
                lexecutor.close();
            }

            @Override
            public void write(Loader loader, NullWritable nullw) throws IOException, InterruptedException {
                lexecutor.execute(loader);
            }
        };
    } catch (Exception e) {
        throw new IOException(e);
    }
}

From source file:io.fluo.webindex.data.util.WARCFileRecordReader.java

License:Apache License

@Override
public void initialize(InputSplit inputSplit, TaskAttemptContext context)
        throws IOException, InterruptedException {
    FileSplit split = (FileSplit) inputSplit;
    Configuration conf = context.getConfiguration();
    Path path = split.getPath();/*from w  ww  .  j av  a2 s  .  co m*/
    FileSystem fs = path.getFileSystem(conf);
    fsin = fs.open(path);
    arPath = path.getName();
    ar = WARCReaderFactory.get(path.getName(), fsin, true);
}

From source file:io.imply.druid.hadoop.DruidRecordReader.java

License:Apache License

@Override
public void initialize(final InputSplit split, final TaskAttemptContext context)
        throws IOException, InterruptedException {
    if (firehose != null) {
        firehose.close();//from   w  ww.j a v  a  2  s .  c  o  m
    }

    if (queryableIndex != null) {
        queryableIndex.close();
    }

    final WindowedDataSegment segment = ((DruidInputSplit) split).getSegment();

    queryableIndex = loadSegment(context, segment);
    firehose = makeFirehose(
            new WindowedStorageAdapter(new QueryableIndexStorageAdapter(queryableIndex), segment.getInterval()),
            DruidInputFormat.getFilter(context.getConfiguration()),
            DruidInputFormat.getColumns(context.getConfiguration()));
}

From source file:io.imply.druid.hadoop.DruidRecordReader.java

License:Apache License

private QueryableIndex loadSegment(final TaskAttemptContext context, final WindowedDataSegment segment)
        throws IOException {
    if (tmpDir == null) {
        tmpDir = Files.createTempDir();
    }/*from w  w w  .  ja v  a  2  s.com*/

    final Path path = new Path(JobHelper.getURIFromSegment(segment.getSegment()));
    final File segmentDir = new File(tmpDir, segment.getSegment().getIdentifier());
    if (!segmentDir.exists()) {
        log.info("Fetching segment[%s] from[%s] to [%s].", segment.getSegment().getIdentifier(), path,
                segmentDir);
        if (!segmentDir.mkdir()) {
            throw new ISE("Failed to make directory[%s]", segmentDir);
        }
        JobHelper.unzipNoGuava(path, context.getConfiguration(), segmentDir, context);
    }

    final QueryableIndex index = HadoopDruidIndexerConfig.INDEX_IO.loadIndex(segmentDir);
    log.info("Loaded segment[%s].", segment.getSegment().getIdentifier());
    return index;
}

From source file:io.ssc.trackthetrackers.extraction.hadoop.io.ArcRecordReader.java

License:Open Source License

public void initialize(InputSplit insplit, TaskAttemptContext context) throws IOException {

    conf = context.getConfiguration();

    FileSplit split = (FileSplit) insplit;

    if (split.getStart() != 0) {
        String errorMessage = "Invalid ARC file split start " + split.getStart()
                + ": ARC files are not splittable";
        log.error(errorMessage);/*w w w.j  ava2  s.co  m*/
        throw new IOException(errorMessage);
    }

    // open the file and seek to the start of the split
    final Path file = split.getPath();

    FileSystem fs = file.getFileSystem(context.getConfiguration());

    fsin = fs.open(file);

    // create a GZIP stream that *does not* automatically read through
    // members
    gzip = new GzipCompressorInputStream(fsin, false);

    fileLength = fs.getFileStatus(file).getLen();

    // First record should be an ARC file header record. Skip it.
    skipRecord();
}

From source file:io.vitess.hadoop.VitessRecordReader.java

License:Apache License

/**
 * Fetch connection parameters from Configuration and open VtGate connection.
 */// w w  w . j  a va2  s . co  m
@Override
public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
    this.split = (VitessInputSplit) split;
    conf = new VitessConf(context.getConfiguration());
    try {
        @SuppressWarnings("unchecked")
        Class<? extends RpcClientFactory> rpcFactoryClass = (Class<? extends RpcClientFactory>) Class
                .forName(conf.getRpcFactoryClass());
        List<String> addressList = Arrays.asList(conf.getHosts().split(","));
        int index = new Random().nextInt(addressList.size());

        RpcClient rpcClient = rpcFactoryClass.newInstance().create(
                Context.getDefault().withDeadlineAfter(Duration.millis(conf.getTimeoutMs())),
                addressList.get(index));
        vtgate = new VTGateBlockingConn(rpcClient);
        includedFields = conf.getIncludedFields();
    } catch (ClassNotFoundException | InstantiationException | IllegalAccessException exc) {
        throw new RuntimeException(exc);
    }
}