List of usage examples for org.apache.hadoop.io NullWritable get
public static NullWritable get()
From source file:edu.umn.cs.spatialHadoop.operations.Skyline.java
License:Open Source License
/** * Computes the skyline of an input file using a single machine algorithm. * The output is written to the output file. If output file is null, the * output is just thrown away./*from w w w. ja va 2s .co m*/ * @param inFile * @param outFile * @param params * @throws IOException * @throws InterruptedException */ public static void skylineLocal(Path inFile, Path outFile, final OperationsParams params) throws IOException, InterruptedException { if (params.getBoolean("mem", false)) MemoryReporter.startReporting(); // 1- Split the input path/file to get splits that can be processed // independently final SpatialInputFormat3<Rectangle, Point> inputFormat = new SpatialInputFormat3<Rectangle, Point>(); Job job = Job.getInstance(params); SpatialInputFormat3.setInputPaths(job, inFile); final List<InputSplit> splits = inputFormat.getSplits(job); final Direction dir = params.getDirection("dir", Direction.MaxMax); // 2- Read all input points in memory LOG.info("Reading points from " + splits.size() + " splits"); List<Point[]> allLists = Parallel.forEach(splits.size(), new RunnableRange<Point[]>() { @Override public Point[] run(int i1, int i2) { try { List<Point> finalPoints = new ArrayList<Point>(); final int MaxSize = 100000; Point[] points = new Point[MaxSize]; int size = 0; for (int i = i1; i < i2; i++) { org.apache.hadoop.mapreduce.lib.input.FileSplit fsplit = (org.apache.hadoop.mapreduce.lib.input.FileSplit) splits .get(i); final RecordReader<Rectangle, Iterable<Point>> reader = inputFormat .createRecordReader(fsplit, null); if (reader instanceof SpatialRecordReader3) { ((SpatialRecordReader3) reader).initialize(fsplit, params); } else if (reader instanceof RTreeRecordReader3) { ((RTreeRecordReader3) reader).initialize(fsplit, params); } else if (reader instanceof HDFRecordReader) { ((HDFRecordReader) reader).initialize(fsplit, params); } else { throw new RuntimeException("Unknown record reader"); } while (reader.nextKeyValue()) { Iterable<Point> pts = reader.getCurrentValue(); for (Point p : pts) { points[size++] = p.clone(); if (size >= points.length) { // Perform Skyline and write the result to finalPoints Point[] skylinePoints = skylineInMemory(points, dir); for (Point skylinePoint : skylinePoints) finalPoints.add(skylinePoint); size = 0; // reset } } } reader.close(); } while (size-- > 0) finalPoints.add(points[size]); return finalPoints.toArray(new Point[finalPoints.size()]); } catch (IOException e) { e.printStackTrace(); } catch (InterruptedException e) { e.printStackTrace(); } return null; } }, params.getInt("parallel", Runtime.getRuntime().availableProcessors())); int totalNumPoints = 0; for (Point[] list : allLists) totalNumPoints += list.length; LOG.info("Read " + totalNumPoints + " points and merging into one list"); Point[] allPoints = new Point[totalNumPoints]; int pointer = 0; for (Point[] list : allLists) { System.arraycopy(list, 0, allPoints, pointer, list.length); pointer += list.length; } allLists.clear(); // To the let the GC collect it Point[] skyline = skylineInMemory(allPoints, dir); if (outFile != null) { if (params.getBoolean("overwrite", false)) { FileSystem outFs = outFile.getFileSystem(new Configuration()); outFs.delete(outFile, true); } GridRecordWriter<Point> out = new GridRecordWriter<Point>(outFile, null, null, null); for (Point pt : skyline) { out.write(NullWritable.get(), pt); } out.close(null); } }
From source file:edu.umn.cs.spatialHadoop.RandomSpatialGenerator.java
License:Open Source License
/** * Generates random rectangles and write the result to a file. * @param outFS - The file system that contains the output file * @param outputFile - The file name to write to. If either outFS or * outputFile is null, data is generated to the standard output * @param mbr - The whole MBR to generate in * @param shape /*www. j a v a 2 s.co m*/ * @param totalSize - The total size of the generated file * @param blocksize * @throws IOException */ private static void generateFileLocal(Path outFile, OperationsParams params) throws IOException { JobConf job = new JobConf(params, RandomSpatialGenerator.class); FileSystem outFS = outFile.getFileSystem(params); long blocksize = outFS.getDefaultBlockSize(outFile); String sindex = params.get("sindex"); Rectangle mbr = params.getShape("mbr").getMBR(); long totalSize = params.getSize("size"); // Calculate the dimensions of each partition based on gindex type CellInfo[] cells; if (sindex == null) { cells = new CellInfo[] { new CellInfo(1, mbr) }; } else if (sindex.equals("grid")) { int num_partitions = Repartition.calculateNumberOfPartitions(params, totalSize, outFS, outFile, blocksize); GridInfo gridInfo = new GridInfo(mbr.x1, mbr.y1, mbr.x2, mbr.y2); gridInfo.calculateCellDimensions(num_partitions); cells = gridInfo.getAllCells(); } else { throw new RuntimeException("Unsupported spatial index: " + sindex); } outFS.mkdirs(outFile); ShapeRecordWriter<Shape> writer; if (sindex == null || sindex.equals("grid")) { writer = new GridRecordWriter<Shape>(outFile, job, null, cells); } else { throw new RuntimeException("Unupoorted spatial idnex: " + sindex); } int rectSize = params.getInt("rectsize", 100); long seed = params.getLong("seed", System.currentTimeMillis()); float circleThickness = params.getFloat("thickness", 1); DistributionType type = SpatialSite.getDistributionType(params, "type", DistributionType.UNIFORM); Shape shape = params.getShape("shape"); long t1 = System.currentTimeMillis(); RandomShapeGenerator<Shape> generator = new RandomShapeGenerator<Shape>(totalSize, mbr, type, rectSize, seed, circleThickness); Rectangle key = generator.createKey(); while (generator.next(key, shape)) { // Serialize it to text writer.write(NullWritable.get(), shape); } writer.close(null); long t2 = System.currentTimeMillis(); System.out.println("Generation time: " + (t2 - t1) + " millis"); }
From source file:fi.tkk.ics.hadoop.bam.cli.plugins.chipster.Summarize.java
License:Open Source License
private void doSummary(SummaryGroup group) throws IOException, InterruptedException { // The reverseStrand flag is already represented in which group is passed // to this method, so there's no need to set it in summary.range. summary.rid.set(currentReferenceID); summary.range.beg.set((int) (group.sumBeg / group.count)); summary.range.end.set((int) (group.sumEnd / group.count)); summary.count.set(group.count);/*from ww w .j a v a 2s. c o m*/ mos.write(NullWritable.get(), summary, group.outName); group.reset(); }
From source file:fi.tkk.ics.hadoop.bam.cli.plugins.chipster.SummarySort.java
License:Open Source License
@Override protected void reduce(LongWritable ignored, Iterable<Text> records, Reducer<LongWritable, Text, NullWritable, Text>.Context ctx) throws IOException, InterruptedException { for (Text rec : records) ctx.write(NullWritable.get(), rec); }
From source file:fi.tkk.ics.hadoop.bam.cli.plugins.Sort.java
License:Open Source License
@Override protected void reduce(LongWritable ignored, Iterable<SAMRecordWritable> records, Reducer<LongWritable, SAMRecordWritable, NullWritable, SAMRecordWritable>.Context ctx) throws IOException, InterruptedException { for (SAMRecordWritable rec : records) ctx.write(NullWritable.get(), rec); }
From source file:fi.tkk.ics.hadoop.bam.cli.plugins.VCFSort.java
License:Open Source License
@Override protected void reduce(LongWritable ignored, Iterable<VariantContextWritable> records, Reducer<LongWritable, VariantContextWritable, NullWritable, VariantContextWritable>.Context ctx) throws IOException, InterruptedException { for (VariantContextWritable rec : records) ctx.write(NullWritable.get(), rec); }
From source file:full_MapReduce.FindBestAttributeMapper.java
License:Open Source License
public void map(Text key, MapWritable value, Context context) throws IOException, InterruptedException { TextArrayWritable values = getValues(value); Map<Text, Integer> tuple_per_split = getTuplePerSplit(value); int tot_tuple = 0; for (Integer i : tuple_per_split.values()) { tot_tuple += i;/*w ww . j a va 2 s.c om*/ } double global_entropy = global_entropy(value, tot_tuple); double gain = gain(global_entropy, tuple_per_split, value, tot_tuple); DoubleWritable gain_ratio = new DoubleWritable(gainRatio(gain, tuple_per_split, tot_tuple)); context.write(NullWritable.get(), new AttributeGainRatioWritable(key, values, gain_ratio)); }
From source file:gobblin.compaction.mapreduce.avro.AvroKeyDedupReducer.java
License:Apache License
@Override protected void reduce(AvroKey<GenericRecord> key, Iterable<AvroValue<GenericRecord>> values, Context context) throws IOException, InterruptedException { int numVals = 0; AvroValue<GenericRecord> valueToRetain = null; for (AvroValue<GenericRecord> value : values) { if (valueToRetain == null) { valueToRetain = value;/*w w w. j a va 2 s . co m*/ } else if (this.deltaComparatorOptional.isPresent()) { valueToRetain = this.deltaComparatorOptional.get().compare(valueToRetain, value) >= 0 ? valueToRetain : value; } numVals++; } this.outKey.datum(valueToRetain.datum()); if (numVals > 1) { context.getCounter(EVENT_COUNTER.MORE_THAN_1).increment(1); context.getCounter(EVENT_COUNTER.DEDUPED).increment(numVals - 1); } context.getCounter(EVENT_COUNTER.RECORD_COUNT).increment(1); context.write(this.outKey, NullWritable.get()); }
From source file:gobblin.compaction.mapreduce.avro.AvroKeyMapper.java
License:Apache License
@Override protected void map(AvroKey<GenericRecord> key, NullWritable value, Context context) throws IOException, InterruptedException { if (context.getNumReduceTasks() == 0) { context.write(key, NullWritable.get()); } else {//from w w w . j a v a2 s. c o m populateComparableKeyRecord(key.datum(), this.outKey.datum()); this.outValue.datum(key.datum()); try { context.write(this.outKey, this.outValue); } catch (AvroRuntimeException e) { final Path[] paths = ((CombineFileSplit) context.getInputSplit()).getPaths(); throw new IOException("Unable to process paths " + StringUtils.join(paths, ','), e); } } context.getCounter(EVENT_COUNTER.RECORD_COUNT).increment(1); }
From source file:gov.va.research.red.ex.hadoop.BioCReducer.java
License:Apache License
@Override protected void cleanup(Reducer<Text, MatchedElementWritable, Text, NullWritable>.Context context) throws IOException, InterruptedException { StringWriter sw = new StringWriter(); BioCCollectionWriter cw = null;/*w w w . j a v a 2 s. c om*/ try { cw = biocFactory.createBioCCollectionWriter(sw); cw.writeCollection(biocCollection); } catch (XMLStreamException e) { throw new RuntimeException(e); } finally { if (cw != null) { cw.close(); } } Text output = new Text(sw.toString()); context.write(output, NullWritable.get()); }