Example usage for org.apache.hadoop.mapreduce JobContext getConfiguration

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce JobContext getConfiguration.

Prototype

public Configuration getConfiguration();

Source Link

Document

Return the configuration for the job.

Usage

From source file:io.druid.indexer.Utils.java

License:Apache License

public static OutputStream makePathAndOutputStream(JobContext job, Path outputPath, boolean deleteExisting)
        throws IOException {
    OutputStream retVal;/*from   ww  w.  j  a  v  a2  s.  c o  m*/
    FileSystem fs = outputPath.getFileSystem(job.getConfiguration());
    Class<? extends CompressionCodec> codecClass;
    CompressionCodec codec = null;

    if (FileOutputFormat.getCompressOutput(job)) {
        codecClass = FileOutputFormat.getOutputCompressorClass(job, GzipCodec.class);
        codec = ReflectionUtils.newInstance(codecClass, job.getConfiguration());
        outputPath = new Path(outputPath.toString() + codec.getDefaultExtension());
    }

    if (fs.exists(outputPath)) {
        if (deleteExisting) {
            fs.delete(outputPath, false);
        } else {
            throw new ISE("outputPath[%s] must not exist.", outputPath);
        }
    }

    if (FileOutputFormat.getCompressOutput(job)) {
        retVal = codec.createOutputStream(fs.create(outputPath, false));
    } else {
        retVal = fs.create(outputPath, false);
    }
    return retVal;
}

From source file:io.druid.indexer.Utils.java

License:Apache License

public static InputStream openInputStream(JobContext job, Path inputPath) throws IOException {
    return openInputStream(job, inputPath, inputPath.getFileSystem(job.getConfiguration()));
}

From source file:io.druid.indexer.Utils.java

License:Apache License

public static boolean exists(JobContext job, FileSystem fs, Path inputPath) throws IOException {
    if (!FileOutputFormat.getCompressOutput(job)) {
        return fs.exists(inputPath);
    } else {/*from w  ww  .j a v a2 s .  co m*/
        Class<? extends CompressionCodec> codecClass = FileOutputFormat.getOutputCompressorClass(job,
                GzipCodec.class);
        CompressionCodec codec = ReflectionUtils.newInstance(codecClass, job.getConfiguration());
        return fs.exists(new Path(inputPath.toString() + codec.getDefaultExtension()));
    }
}

From source file:io.druid.indexer.Utils.java

License:Apache License

public static InputStream openInputStream(JobContext job, Path inputPath, final FileSystem fileSystem)
        throws IOException {
    if (!FileOutputFormat.getCompressOutput(job)) {
        return fileSystem.open(inputPath);
    } else {/*from w w  w .  java 2  s  .  c o m*/
        Class<? extends CompressionCodec> codecClass = FileOutputFormat.getOutputCompressorClass(job,
                GzipCodec.class);
        CompressionCodec codec = ReflectionUtils.newInstance(codecClass, job.getConfiguration());
        inputPath = new Path(inputPath.toString() + codec.getDefaultExtension());

        return codec.createInputStream(fileSystem.open(inputPath));
    }
}

From source file:io.imply.druid.hadoop.DruidInputFormat.java

License:Apache License

@Override
public List<InputSplit> getSplits(final JobContext jobContext) throws IOException, InterruptedException {
    final Configuration conf = jobContext.getConfiguration();
    final String dataSource = getDataSource(conf);
    final List<Interval> intervals = getIntervals(conf);
    final List<DataSegment> segments;
    try (final HttpClientHolder httpClient = HttpClientHolder.create()) {
        segments = new DruidMetadataClient(httpClient.get(), objectMapper(), getCoordinatorHost(conf))
                .usedSegments(dataSource, intervals);
    } catch (Exception e) {
        throw Throwables.propagate(e);
    }//from ww  w.  ja v  a  2  s  .c om

    log.info("Got %,d used segments for dataSource[%s], intervals[%s] from coordinator.", segments.size(),
            dataSource, Joiner.on(", ").join(intervals));

    // Window the DataSegments by putting them in a timeline.
    final VersionedIntervalTimeline<String, DataSegment> timeline = new VersionedIntervalTimeline<>(
            Ordering.natural());
    for (DataSegment segment : segments) {
        timeline.add(segment.getInterval(), segment.getVersion(), segment.getShardSpec().createChunk(segment));
    }

    final List<InputSplit> splits = Lists.newArrayList();

    for (Interval interval : intervals) {
        final List<TimelineObjectHolder<String, DataSegment>> lookup = timeline.lookup(interval);
        for (final TimelineObjectHolder<String, DataSegment> holder : lookup) {
            for (final PartitionChunk<DataSegment> chunk : holder.getObject()) {
                final WindowedDataSegment windowedDataSegment = new WindowedDataSegment(chunk.getObject(),
                        holder.getInterval());
                splits.add(DruidInputSplit.create(windowedDataSegment));
            }
        }
    }

    log.info("Found %,d splits for dataSource[%s], intervals[%s].", splits.size(), dataSource,
            Joiner.on(", ").join(intervals));

    return splits;
}

From source file:io.vitess.hadoop.VitessInputFormat.java

License:Apache License

@Override
public List<InputSplit> getSplits(JobContext context) {
    VitessConf conf = new VitessConf(context.getConfiguration());
    List<SplitQueryResponse.Part> splitResult;
    try {//from w w  w  . ja v  a 2s . co m
        @SuppressWarnings("unchecked")
        Class<? extends RpcClientFactory> rpcFactoryClass = (Class<? extends RpcClientFactory>) Class
                .forName(conf.getRpcFactoryClass());
        List<String> addressList = Arrays.asList(conf.getHosts().split(","));
        int index = new Random().nextInt(addressList.size());

        RpcClient rpcClient = rpcFactoryClass.newInstance().create(
                Context.getDefault().withDeadlineAfter(Duration.millis(conf.getTimeoutMs())),
                addressList.get(index));

        try (VTGateBlockingConn vtgate = new VTGateBlockingConn(rpcClient)) {
            splitResult = vtgate.splitQuery(Context.getDefault(), conf.getKeyspace(), conf.getInputQuery(),
                    null /* bind vars */, conf.getSplitColumns(), conf.getSplitCount(),
                    conf.getNumRowsPerQueryPart(), conf.getAlgorithm());
        }
    } catch (ClassNotFoundException | InstantiationException | IllegalAccessException | SQLException
            | IOException exc) {
        throw new RuntimeException(exc);
    }

    List<InputSplit> splits = Lists.newArrayList();
    for (SplitQueryResponse.Part part : splitResult) {
        splits.add(new VitessInputSplit(part));
    }

    for (InputSplit split : splits) {
        ((VitessInputSplit) split).setLocations(conf.getHosts().split(VitessConf.HOSTS_DELIM));
    }
    return splits;
}

From source file:it.crs4.features.BioImgInputFormat.java

License:Apache License

@Override
public List<InputSplit> getSplits(JobContext job) throws IOException {
    Configuration conf = job.getConfiguration();
    getMetadata(conf);/*from   ww  w .  jav a 2  s  .c  o m*/
    List<InputSplit> splits = new ArrayList<InputSplit>();
    int planesPerSplit = getPlanesPerSplit(job);
    for (FileStatus status : listStatus(job)) {
        splits.addAll(getSplitsForFile(status, conf, planesPerSplit));
    }
    return splits;
}

From source file:it.crs4.features.BioImgInputFormat.java

License:Apache License

public static void setPlanesPerSplit(JobContext job, int numPlanes) {
    job.getConfiguration().setInt(PLANES_PER_MAP, numPlanes);
}

From source file:it.crs4.features.BioImgInputFormat.java

License:Apache License

public static int getPlanesPerSplit(JobContext job) {
    return job.getConfiguration().getInt(PLANES_PER_MAP, 0);
}

From source file:it.crs4.features.BioImgInputFormat.java

License:Apache License

public static void setMetadataFile(JobContext job, String pathName) {
    job.getConfiguration().set(META_FN, pathName);
}