Example usage for org.apache.hadoop.io NullWritable get

List of usage examples for org.apache.hadoop.io NullWritable get

Introduction

In this page you can find the example usage for org.apache.hadoop.io NullWritable get.

Prototype

public static NullWritable get() 

Source Link

Document

Returns the single instance of this class.

Usage

From source file:edu.umn.cs.spatialHadoop.operations.Skyline.java

License:Open Source License

/**
 * Computes the skyline of an input file using a single machine algorithm.
 * The output is written to the output file. If output file is null, the
 * output is just thrown away./*from w w w.  ja va  2s  .co m*/
 * @param inFile
 * @param outFile
 * @param params
 * @throws IOException
 * @throws InterruptedException
 */
public static void skylineLocal(Path inFile, Path outFile, final OperationsParams params)
        throws IOException, InterruptedException {
    if (params.getBoolean("mem", false))
        MemoryReporter.startReporting();
    // 1- Split the input path/file to get splits that can be processed
    // independently
    final SpatialInputFormat3<Rectangle, Point> inputFormat = new SpatialInputFormat3<Rectangle, Point>();
    Job job = Job.getInstance(params);
    SpatialInputFormat3.setInputPaths(job, inFile);
    final List<InputSplit> splits = inputFormat.getSplits(job);
    final Direction dir = params.getDirection("dir", Direction.MaxMax);

    // 2- Read all input points in memory
    LOG.info("Reading points from " + splits.size() + " splits");
    List<Point[]> allLists = Parallel.forEach(splits.size(), new RunnableRange<Point[]>() {
        @Override
        public Point[] run(int i1, int i2) {
            try {
                List<Point> finalPoints = new ArrayList<Point>();
                final int MaxSize = 100000;
                Point[] points = new Point[MaxSize];
                int size = 0;
                for (int i = i1; i < i2; i++) {
                    org.apache.hadoop.mapreduce.lib.input.FileSplit fsplit = (org.apache.hadoop.mapreduce.lib.input.FileSplit) splits
                            .get(i);
                    final RecordReader<Rectangle, Iterable<Point>> reader = inputFormat
                            .createRecordReader(fsplit, null);
                    if (reader instanceof SpatialRecordReader3) {
                        ((SpatialRecordReader3) reader).initialize(fsplit, params);
                    } else if (reader instanceof RTreeRecordReader3) {
                        ((RTreeRecordReader3) reader).initialize(fsplit, params);
                    } else if (reader instanceof HDFRecordReader) {
                        ((HDFRecordReader) reader).initialize(fsplit, params);
                    } else {
                        throw new RuntimeException("Unknown record reader");
                    }
                    while (reader.nextKeyValue()) {
                        Iterable<Point> pts = reader.getCurrentValue();
                        for (Point p : pts) {
                            points[size++] = p.clone();
                            if (size >= points.length) {
                                // Perform Skyline and write the result to finalPoints
                                Point[] skylinePoints = skylineInMemory(points, dir);
                                for (Point skylinePoint : skylinePoints)
                                    finalPoints.add(skylinePoint);
                                size = 0; // reset
                            }
                        }
                    }
                    reader.close();
                }
                while (size-- > 0)
                    finalPoints.add(points[size]);
                return finalPoints.toArray(new Point[finalPoints.size()]);
            } catch (IOException e) {
                e.printStackTrace();
            } catch (InterruptedException e) {
                e.printStackTrace();
            }
            return null;
        }
    }, params.getInt("parallel", Runtime.getRuntime().availableProcessors()));

    int totalNumPoints = 0;
    for (Point[] list : allLists)
        totalNumPoints += list.length;

    LOG.info("Read " + totalNumPoints + " points and merging into one list");
    Point[] allPoints = new Point[totalNumPoints];
    int pointer = 0;

    for (Point[] list : allLists) {
        System.arraycopy(list, 0, allPoints, pointer, list.length);
        pointer += list.length;
    }
    allLists.clear(); // To the let the GC collect it

    Point[] skyline = skylineInMemory(allPoints, dir);

    if (outFile != null) {
        if (params.getBoolean("overwrite", false)) {
            FileSystem outFs = outFile.getFileSystem(new Configuration());
            outFs.delete(outFile, true);
        }
        GridRecordWriter<Point> out = new GridRecordWriter<Point>(outFile, null, null, null);
        for (Point pt : skyline) {
            out.write(NullWritable.get(), pt);
        }
        out.close(null);
    }
}

From source file:edu.umn.cs.spatialHadoop.RandomSpatialGenerator.java

License:Open Source License

/**
 * Generates random rectangles and write the result to a file.
 * @param outFS - The file system that contains the output file
 * @param outputFile - The file name to write to. If either outFS or
 *   outputFile is null, data is generated to the standard output
 * @param mbr - The whole MBR to generate in
 * @param shape /*www.  j  a v a  2  s.co m*/
 * @param totalSize - The total size of the generated file
 * @param blocksize 
 * @throws IOException 
 */
private static void generateFileLocal(Path outFile, OperationsParams params) throws IOException {
    JobConf job = new JobConf(params, RandomSpatialGenerator.class);
    FileSystem outFS = outFile.getFileSystem(params);
    long blocksize = outFS.getDefaultBlockSize(outFile);
    String sindex = params.get("sindex");
    Rectangle mbr = params.getShape("mbr").getMBR();
    long totalSize = params.getSize("size");

    // Calculate the dimensions of each partition based on gindex type
    CellInfo[] cells;
    if (sindex == null) {
        cells = new CellInfo[] { new CellInfo(1, mbr) };
    } else if (sindex.equals("grid")) {
        int num_partitions = Repartition.calculateNumberOfPartitions(params, totalSize, outFS, outFile,
                blocksize);

        GridInfo gridInfo = new GridInfo(mbr.x1, mbr.y1, mbr.x2, mbr.y2);
        gridInfo.calculateCellDimensions(num_partitions);
        cells = gridInfo.getAllCells();
    } else {
        throw new RuntimeException("Unsupported spatial index: " + sindex);
    }

    outFS.mkdirs(outFile);

    ShapeRecordWriter<Shape> writer;
    if (sindex == null || sindex.equals("grid")) {
        writer = new GridRecordWriter<Shape>(outFile, job, null, cells);
    } else {
        throw new RuntimeException("Unupoorted spatial idnex: " + sindex);
    }

    int rectSize = params.getInt("rectsize", 100);
    long seed = params.getLong("seed", System.currentTimeMillis());
    float circleThickness = params.getFloat("thickness", 1);
    DistributionType type = SpatialSite.getDistributionType(params, "type", DistributionType.UNIFORM);
    Shape shape = params.getShape("shape");
    long t1 = System.currentTimeMillis();

    RandomShapeGenerator<Shape> generator = new RandomShapeGenerator<Shape>(totalSize, mbr, type, rectSize,
            seed, circleThickness);

    Rectangle key = generator.createKey();

    while (generator.next(key, shape)) {
        // Serialize it to text
        writer.write(NullWritable.get(), shape);
    }
    writer.close(null);
    long t2 = System.currentTimeMillis();

    System.out.println("Generation time: " + (t2 - t1) + " millis");
}

From source file:fi.tkk.ics.hadoop.bam.cli.plugins.chipster.Summarize.java

License:Open Source License

private void doSummary(SummaryGroup group) throws IOException, InterruptedException {
    // The reverseStrand flag is already represented in which group is passed
    // to this method, so there's no need to set it in summary.range.
    summary.rid.set(currentReferenceID);
    summary.range.beg.set((int) (group.sumBeg / group.count));
    summary.range.end.set((int) (group.sumEnd / group.count));
    summary.count.set(group.count);/*from   ww w .j  a v a 2s. c o m*/
    mos.write(NullWritable.get(), summary, group.outName);

    group.reset();
}

From source file:fi.tkk.ics.hadoop.bam.cli.plugins.chipster.SummarySort.java

License:Open Source License

@Override
protected void reduce(LongWritable ignored, Iterable<Text> records,
        Reducer<LongWritable, Text, NullWritable, Text>.Context ctx) throws IOException, InterruptedException {
    for (Text rec : records)
        ctx.write(NullWritable.get(), rec);
}

From source file:fi.tkk.ics.hadoop.bam.cli.plugins.Sort.java

License:Open Source License

@Override
protected void reduce(LongWritable ignored, Iterable<SAMRecordWritable> records,
        Reducer<LongWritable, SAMRecordWritable, NullWritable, SAMRecordWritable>.Context ctx)
        throws IOException, InterruptedException {
    for (SAMRecordWritable rec : records)
        ctx.write(NullWritable.get(), rec);
}

From source file:fi.tkk.ics.hadoop.bam.cli.plugins.VCFSort.java

License:Open Source License

@Override
protected void reduce(LongWritable ignored, Iterable<VariantContextWritable> records,
        Reducer<LongWritable, VariantContextWritable, NullWritable, VariantContextWritable>.Context ctx)
        throws IOException, InterruptedException {
    for (VariantContextWritable rec : records)
        ctx.write(NullWritable.get(), rec);
}

From source file:full_MapReduce.FindBestAttributeMapper.java

License:Open Source License

public void map(Text key, MapWritable value, Context context) throws IOException, InterruptedException {
    TextArrayWritable values = getValues(value);
    Map<Text, Integer> tuple_per_split = getTuplePerSplit(value);

    int tot_tuple = 0;
    for (Integer i : tuple_per_split.values()) {
        tot_tuple += i;/*w  ww .  j a va 2 s.c  om*/
    }

    double global_entropy = global_entropy(value, tot_tuple);
    double gain = gain(global_entropy, tuple_per_split, value, tot_tuple);
    DoubleWritable gain_ratio = new DoubleWritable(gainRatio(gain, tuple_per_split, tot_tuple));

    context.write(NullWritable.get(), new AttributeGainRatioWritable(key, values, gain_ratio));
}

From source file:gobblin.compaction.mapreduce.avro.AvroKeyDedupReducer.java

License:Apache License

@Override
protected void reduce(AvroKey<GenericRecord> key, Iterable<AvroValue<GenericRecord>> values, Context context)
        throws IOException, InterruptedException {
    int numVals = 0;

    AvroValue<GenericRecord> valueToRetain = null;

    for (AvroValue<GenericRecord> value : values) {
        if (valueToRetain == null) {
            valueToRetain = value;/*w  w w. j  a  va  2  s . co  m*/
        } else if (this.deltaComparatorOptional.isPresent()) {
            valueToRetain = this.deltaComparatorOptional.get().compare(valueToRetain, value) >= 0
                    ? valueToRetain
                    : value;
        }
        numVals++;
    }
    this.outKey.datum(valueToRetain.datum());

    if (numVals > 1) {
        context.getCounter(EVENT_COUNTER.MORE_THAN_1).increment(1);
        context.getCounter(EVENT_COUNTER.DEDUPED).increment(numVals - 1);
    }

    context.getCounter(EVENT_COUNTER.RECORD_COUNT).increment(1);

    context.write(this.outKey, NullWritable.get());
}

From source file:gobblin.compaction.mapreduce.avro.AvroKeyMapper.java

License:Apache License

@Override
protected void map(AvroKey<GenericRecord> key, NullWritable value, Context context)
        throws IOException, InterruptedException {
    if (context.getNumReduceTasks() == 0) {
        context.write(key, NullWritable.get());
    } else {//from w  w  w .  j  a  v  a2 s. c  o m
        populateComparableKeyRecord(key.datum(), this.outKey.datum());
        this.outValue.datum(key.datum());
        try {
            context.write(this.outKey, this.outValue);
        } catch (AvroRuntimeException e) {
            final Path[] paths = ((CombineFileSplit) context.getInputSplit()).getPaths();
            throw new IOException("Unable to process paths " + StringUtils.join(paths, ','), e);
        }
    }
    context.getCounter(EVENT_COUNTER.RECORD_COUNT).increment(1);
}

From source file:gov.va.research.red.ex.hadoop.BioCReducer.java

License:Apache License

@Override
protected void cleanup(Reducer<Text, MatchedElementWritable, Text, NullWritable>.Context context)
        throws IOException, InterruptedException {
    StringWriter sw = new StringWriter();
    BioCCollectionWriter cw = null;/*w w  w . j a  v  a 2 s. c  om*/
    try {
        cw = biocFactory.createBioCCollectionWriter(sw);
        cw.writeCollection(biocCollection);
    } catch (XMLStreamException e) {
        throw new RuntimeException(e);
    } finally {
        if (cw != null) {
            cw.close();
        }
    }
    Text output = new Text(sw.toString());
    context.write(output, NullWritable.get());
}