Example usage for org.apache.hadoop.mapreduce JobContext getConfiguration

List of usage examples for org.apache.hadoop.mapreduce JobContext getConfiguration

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce JobContext getConfiguration.

Prototype

public Configuration getConfiguration();

Source Link

Document

Return the configuration for the job.

Usage

From source file:io.druid.indexer.Utils.java

License:Apache License

public static OutputStream makePathAndOutputStream(JobContext job, Path outputPath, boolean deleteExisting)
        throws IOException {
    OutputStream retVal;/*from   ww  w.  j  a  v  a2  s.  c o  m*/
    FileSystem fs = outputPath.getFileSystem(job.getConfiguration());
    Class<? extends CompressionCodec> codecClass;
    CompressionCodec codec = null;

    if (FileOutputFormat.getCompressOutput(job)) {
        codecClass = FileOutputFormat.getOutputCompressorClass(job, GzipCodec.class);
        codec = ReflectionUtils.newInstance(codecClass, job.getConfiguration());
        outputPath = new Path(outputPath.toString() + codec.getDefaultExtension());
    }

    if (fs.exists(outputPath)) {
        if (deleteExisting) {
            fs.delete(outputPath, false);
        } else {
            throw new ISE("outputPath[%s] must not exist.", outputPath);
        }
    }

    if (FileOutputFormat.getCompressOutput(job)) {
        retVal = codec.createOutputStream(fs.create(outputPath, false));
    } else {
        retVal = fs.create(outputPath, false);
    }
    return retVal;
}

From source file:io.druid.indexer.Utils.java

License:Apache License

public static InputStream openInputStream(JobContext job, Path inputPath) throws IOException {
    return openInputStream(job, inputPath, inputPath.getFileSystem(job.getConfiguration()));
}

From source file:io.druid.indexer.Utils.java

License:Apache License

public static boolean exists(JobContext job, FileSystem fs, Path inputPath) throws IOException {
    if (!FileOutputFormat.getCompressOutput(job)) {
        return fs.exists(inputPath);
    } else {/*from w  ww  .j a v a2 s .  co m*/
        Class<? extends CompressionCodec> codecClass = FileOutputFormat.getOutputCompressorClass(job,
                GzipCodec.class);
        CompressionCodec codec = ReflectionUtils.newInstance(codecClass, job.getConfiguration());
        return fs.exists(new Path(inputPath.toString() + codec.getDefaultExtension()));
    }
}

From source file:io.druid.indexer.Utils.java

License:Apache License

public static InputStream openInputStream(JobContext job, Path inputPath, final FileSystem fileSystem)
        throws IOException {
    if (!FileOutputFormat.getCompressOutput(job)) {
        return fileSystem.open(inputPath);
    } else {/*from w w  w .  java 2  s  .  c o m*/
        Class<? extends CompressionCodec> codecClass = FileOutputFormat.getOutputCompressorClass(job,
                GzipCodec.class);
        CompressionCodec codec = ReflectionUtils.newInstance(codecClass, job.getConfiguration());
        inputPath = new Path(inputPath.toString() + codec.getDefaultExtension());

        return codec.createInputStream(fileSystem.open(inputPath));
    }
}

From source file:io.imply.druid.hadoop.DruidInputFormat.java

License:Apache License

@Override
public List<InputSplit> getSplits(final JobContext jobContext) throws IOException, InterruptedException {
    final Configuration conf = jobContext.getConfiguration();
    final String dataSource = getDataSource(conf);
    final List<Interval> intervals = getIntervals(conf);
    final List<DataSegment> segments;
    try (final HttpClientHolder httpClient = HttpClientHolder.create()) {
        segments = new DruidMetadataClient(httpClient.get(), objectMapper(), getCoordinatorHost(conf))
                .usedSegments(dataSource, intervals);
    } catch (Exception e) {
        throw Throwables.propagate(e);
    }//from ww  w.  ja v  a  2  s  .c om

    log.info("Got %,d used segments for dataSource[%s], intervals[%s] from coordinator.", segments.size(),
            dataSource, Joiner.on(", ").join(intervals));

    // Window the DataSegments by putting them in a timeline.
    final VersionedIntervalTimeline<String, DataSegment> timeline = new VersionedIntervalTimeline<>(
            Ordering.natural());
    for (DataSegment segment : segments) {
        timeline.add(segment.getInterval(), segment.getVersion(), segment.getShardSpec().createChunk(segment));
    }

    final List<InputSplit> splits = Lists.newArrayList();

    for (Interval interval : intervals) {
        final List<TimelineObjectHolder<String, DataSegment>> lookup = timeline.lookup(interval);
        for (final TimelineObjectHolder<String, DataSegment> holder : lookup) {
            for (final PartitionChunk<DataSegment> chunk : holder.getObject()) {
                final WindowedDataSegment windowedDataSegment = new WindowedDataSegment(chunk.getObject(),
                        holder.getInterval());
                splits.add(DruidInputSplit.create(windowedDataSegment));
            }
        }
    }

    log.info("Found %,d splits for dataSource[%s], intervals[%s].", splits.size(), dataSource,
            Joiner.on(", ").join(intervals));

    return splits;
}

From source file:io.vitess.hadoop.VitessInputFormat.java

License:Apache License

@Override
public List<InputSplit> getSplits(JobContext context) {
    VitessConf conf = new VitessConf(context.getConfiguration());
    List<SplitQueryResponse.Part> splitResult;
    try {//from w w  w  . ja v  a 2s . co m
        @SuppressWarnings("unchecked")
        Class<? extends RpcClientFactory> rpcFactoryClass = (Class<? extends RpcClientFactory>) Class
                .forName(conf.getRpcFactoryClass());
        List<String> addressList = Arrays.asList(conf.getHosts().split(","));
        int index = new Random().nextInt(addressList.size());

        RpcClient rpcClient = rpcFactoryClass.newInstance().create(
                Context.getDefault().withDeadlineAfter(Duration.millis(conf.getTimeoutMs())),
                addressList.get(index));

        try (VTGateBlockingConn vtgate = new VTGateBlockingConn(rpcClient)) {
            splitResult = vtgate.splitQuery(Context.getDefault(), conf.getKeyspace(), conf.getInputQuery(),
                    null /* bind vars */, conf.getSplitColumns(), conf.getSplitCount(),
                    conf.getNumRowsPerQueryPart(), conf.getAlgorithm());
        }
    } catch (ClassNotFoundException | InstantiationException | IllegalAccessException | SQLException
            | IOException exc) {
        throw new RuntimeException(exc);
    }

    List<InputSplit> splits = Lists.newArrayList();
    for (SplitQueryResponse.Part part : splitResult) {
        splits.add(new VitessInputSplit(part));
    }

    for (InputSplit split : splits) {
        ((VitessInputSplit) split).setLocations(conf.getHosts().split(VitessConf.HOSTS_DELIM));
    }
    return splits;
}

From source file:it.crs4.features.BioImgInputFormat.java

License:Apache License

@Override
public List<InputSplit> getSplits(JobContext job) throws IOException {
    Configuration conf = job.getConfiguration();
    getMetadata(conf);/*from   ww  w .  jav a 2  s  .c  o m*/
    List<InputSplit> splits = new ArrayList<InputSplit>();
    int planesPerSplit = getPlanesPerSplit(job);
    for (FileStatus status : listStatus(job)) {
        splits.addAll(getSplitsForFile(status, conf, planesPerSplit));
    }
    return splits;
}

From source file:it.crs4.features.BioImgInputFormat.java

License:Apache License

public static void setPlanesPerSplit(JobContext job, int numPlanes) {
    job.getConfiguration().setInt(PLANES_PER_MAP, numPlanes);
}

From source file:it.crs4.features.BioImgInputFormat.java

License:Apache License

public static int getPlanesPerSplit(JobContext job) {
    return job.getConfiguration().getInt(PLANES_PER_MAP, 0);
}

From source file:it.crs4.features.BioImgInputFormat.java

License:Apache License

public static void setMetadataFile(JobContext job, String pathName) {
    job.getConfiguration().set(META_FN, pathName);
}