List of usage examples for org.apache.hadoop.mapreduce JobContext getConfiguration
public Configuration getConfiguration();
From source file:io.druid.indexer.Utils.java
License:Apache License
public static OutputStream makePathAndOutputStream(JobContext job, Path outputPath, boolean deleteExisting) throws IOException { OutputStream retVal;/*from ww w. j a v a2 s. c o m*/ FileSystem fs = outputPath.getFileSystem(job.getConfiguration()); Class<? extends CompressionCodec> codecClass; CompressionCodec codec = null; if (FileOutputFormat.getCompressOutput(job)) { codecClass = FileOutputFormat.getOutputCompressorClass(job, GzipCodec.class); codec = ReflectionUtils.newInstance(codecClass, job.getConfiguration()); outputPath = new Path(outputPath.toString() + codec.getDefaultExtension()); } if (fs.exists(outputPath)) { if (deleteExisting) { fs.delete(outputPath, false); } else { throw new ISE("outputPath[%s] must not exist.", outputPath); } } if (FileOutputFormat.getCompressOutput(job)) { retVal = codec.createOutputStream(fs.create(outputPath, false)); } else { retVal = fs.create(outputPath, false); } return retVal; }
From source file:io.druid.indexer.Utils.java
License:Apache License
public static InputStream openInputStream(JobContext job, Path inputPath) throws IOException { return openInputStream(job, inputPath, inputPath.getFileSystem(job.getConfiguration())); }
From source file:io.druid.indexer.Utils.java
License:Apache License
public static boolean exists(JobContext job, FileSystem fs, Path inputPath) throws IOException { if (!FileOutputFormat.getCompressOutput(job)) { return fs.exists(inputPath); } else {/*from w ww .j a v a2 s . co m*/ Class<? extends CompressionCodec> codecClass = FileOutputFormat.getOutputCompressorClass(job, GzipCodec.class); CompressionCodec codec = ReflectionUtils.newInstance(codecClass, job.getConfiguration()); return fs.exists(new Path(inputPath.toString() + codec.getDefaultExtension())); } }
From source file:io.druid.indexer.Utils.java
License:Apache License
public static InputStream openInputStream(JobContext job, Path inputPath, final FileSystem fileSystem) throws IOException { if (!FileOutputFormat.getCompressOutput(job)) { return fileSystem.open(inputPath); } else {/*from w w w . java 2 s . c o m*/ Class<? extends CompressionCodec> codecClass = FileOutputFormat.getOutputCompressorClass(job, GzipCodec.class); CompressionCodec codec = ReflectionUtils.newInstance(codecClass, job.getConfiguration()); inputPath = new Path(inputPath.toString() + codec.getDefaultExtension()); return codec.createInputStream(fileSystem.open(inputPath)); } }
From source file:io.imply.druid.hadoop.DruidInputFormat.java
License:Apache License
@Override public List<InputSplit> getSplits(final JobContext jobContext) throws IOException, InterruptedException { final Configuration conf = jobContext.getConfiguration(); final String dataSource = getDataSource(conf); final List<Interval> intervals = getIntervals(conf); final List<DataSegment> segments; try (final HttpClientHolder httpClient = HttpClientHolder.create()) { segments = new DruidMetadataClient(httpClient.get(), objectMapper(), getCoordinatorHost(conf)) .usedSegments(dataSource, intervals); } catch (Exception e) { throw Throwables.propagate(e); }//from ww w. ja v a 2 s .c om log.info("Got %,d used segments for dataSource[%s], intervals[%s] from coordinator.", segments.size(), dataSource, Joiner.on(", ").join(intervals)); // Window the DataSegments by putting them in a timeline. final VersionedIntervalTimeline<String, DataSegment> timeline = new VersionedIntervalTimeline<>( Ordering.natural()); for (DataSegment segment : segments) { timeline.add(segment.getInterval(), segment.getVersion(), segment.getShardSpec().createChunk(segment)); } final List<InputSplit> splits = Lists.newArrayList(); for (Interval interval : intervals) { final List<TimelineObjectHolder<String, DataSegment>> lookup = timeline.lookup(interval); for (final TimelineObjectHolder<String, DataSegment> holder : lookup) { for (final PartitionChunk<DataSegment> chunk : holder.getObject()) { final WindowedDataSegment windowedDataSegment = new WindowedDataSegment(chunk.getObject(), holder.getInterval()); splits.add(DruidInputSplit.create(windowedDataSegment)); } } } log.info("Found %,d splits for dataSource[%s], intervals[%s].", splits.size(), dataSource, Joiner.on(", ").join(intervals)); return splits; }
From source file:io.vitess.hadoop.VitessInputFormat.java
License:Apache License
@Override public List<InputSplit> getSplits(JobContext context) { VitessConf conf = new VitessConf(context.getConfiguration()); List<SplitQueryResponse.Part> splitResult; try {//from w w w . ja v a 2s . co m @SuppressWarnings("unchecked") Class<? extends RpcClientFactory> rpcFactoryClass = (Class<? extends RpcClientFactory>) Class .forName(conf.getRpcFactoryClass()); List<String> addressList = Arrays.asList(conf.getHosts().split(",")); int index = new Random().nextInt(addressList.size()); RpcClient rpcClient = rpcFactoryClass.newInstance().create( Context.getDefault().withDeadlineAfter(Duration.millis(conf.getTimeoutMs())), addressList.get(index)); try (VTGateBlockingConn vtgate = new VTGateBlockingConn(rpcClient)) { splitResult = vtgate.splitQuery(Context.getDefault(), conf.getKeyspace(), conf.getInputQuery(), null /* bind vars */, conf.getSplitColumns(), conf.getSplitCount(), conf.getNumRowsPerQueryPart(), conf.getAlgorithm()); } } catch (ClassNotFoundException | InstantiationException | IllegalAccessException | SQLException | IOException exc) { throw new RuntimeException(exc); } List<InputSplit> splits = Lists.newArrayList(); for (SplitQueryResponse.Part part : splitResult) { splits.add(new VitessInputSplit(part)); } for (InputSplit split : splits) { ((VitessInputSplit) split).setLocations(conf.getHosts().split(VitessConf.HOSTS_DELIM)); } return splits; }
From source file:it.crs4.features.BioImgInputFormat.java
License:Apache License
@Override public List<InputSplit> getSplits(JobContext job) throws IOException { Configuration conf = job.getConfiguration(); getMetadata(conf);/*from ww w . jav a 2 s .c o m*/ List<InputSplit> splits = new ArrayList<InputSplit>(); int planesPerSplit = getPlanesPerSplit(job); for (FileStatus status : listStatus(job)) { splits.addAll(getSplitsForFile(status, conf, planesPerSplit)); } return splits; }
From source file:it.crs4.features.BioImgInputFormat.java
License:Apache License
public static void setPlanesPerSplit(JobContext job, int numPlanes) { job.getConfiguration().setInt(PLANES_PER_MAP, numPlanes); }
From source file:it.crs4.features.BioImgInputFormat.java
License:Apache License
public static int getPlanesPerSplit(JobContext job) { return job.getConfiguration().getInt(PLANES_PER_MAP, 0); }
From source file:it.crs4.features.BioImgInputFormat.java
License:Apache License
public static void setMetadataFile(JobContext job, String pathName) { job.getConfiguration().set(META_FN, pathName); }