Example usage for org.apache.hadoop.io IntWritable IntWritable

List of usage examples for org.apache.hadoop.io IntWritable IntWritable

Introduction

In this page you can find the example usage for org.apache.hadoop.io IntWritable IntWritable.

Prototype

public IntWritable() 

Source Link

Usage

From source file:edu.umn.cs.spatialHadoop.core.RectangleNN.java

License:Open Source License

/**
 * The general version of self join algorithm which works with arbitrary
 * shapes. First, it performs a filter step where it finds shapes with
 * overlapping MBRs. Second, an optional refine step can be executed to
 * return only shapes which actually overlap.
 * @param R - input set of shapes//  w w w  . j a va 2 s  . co  m
 * @param refine - Whether or not to run a refine step
 * @param output - output collector where the results are reported
 * @return - number of pairs returned by the planesweep algorithm
 * @throws IOException
 */
public static <S extends Shape> int SelfJoin_planeSweep(final S[] R, boolean refine,
        final OutputCollector<S, S> output, Progressable reporter) throws IOException {
    // Use a two-phase filter and refine approach
    // 1- Use MBRs as a first filter
    // 2- Use ConvexHull as a second filter
    // 3- Use the exact shape for refinement
    final RectangleID[] mbrs = new RectangleID[R.length];
    for (int i = 0; i < R.length; i++) {
        mbrs[i] = new RectangleID(i, R[i].getMBR());
    }

    if (refine) {
        final IntWritable count = new IntWritable();
        int filterCount = SelfJoin_rectangles(mbrs, new OutputCollector<RectangleID, RectangleID>() {
            @Override
            public void collect(RectangleID r1, RectangleID r2) throws IOException {
                if (R[r1.id].isIntersected(R[r2.id])) {
                    if (output != null)
                        output.collect(R[r1.id], R[r2.id]);
                    count.set(count.get() + 1);
                }
            }
        }, reporter);

        LOG.debug("Filtered result size " + filterCount + ", refined result size " + count.get());

        return count.get();
    } else {
        return SelfJoin_rectangles(mbrs, new OutputCollector<RectangleID, RectangleID>() {
            @Override
            public void collect(RectangleID r1, RectangleID r2) throws IOException {
                if (output != null)
                    output.collect(R[r1.id], R[r2.id]);
            }
        }, reporter);
    }
}

From source file:edu.umn.cs.spatialHadoop.indexing.Indexer.java

License:Open Source License

private static void indexLocal(Path inPath, final Path outPath, OperationsParams params)
        throws IOException, InterruptedException {
    Job job = Job.getInstance(params);//from w w  w. j  a v a2s  .c om
    final Configuration conf = job.getConfiguration();

    final String sindex = conf.get("sindex");

    // Start reading input file
    List<InputSplit> splits = new ArrayList<InputSplit>();
    final SpatialInputFormat3<Rectangle, Shape> inputFormat = new SpatialInputFormat3<Rectangle, Shape>();
    FileSystem inFs = inPath.getFileSystem(conf);
    FileStatus inFStatus = inFs.getFileStatus(inPath);
    if (inFStatus != null && !inFStatus.isDir()) {
        // One file, retrieve it immediately.
        // This is useful if the input is a hidden file which is automatically
        // skipped by FileInputFormat. We need to plot a hidden file for the case
        // of plotting partition boundaries of a spatial index
        splits.add(new FileSplit(inPath, 0, inFStatus.getLen(), new String[0]));
    } else {
        SpatialInputFormat3.setInputPaths(job, inPath);
        for (InputSplit s : inputFormat.getSplits(job))
            splits.add(s);
    }

    // Copy splits to a final array to be used in parallel
    final FileSplit[] fsplits = splits.toArray(new FileSplit[splits.size()]);
    boolean replicate = PartitionerReplicate.get(sindex);

    // Set input file MBR if not already set
    Rectangle inputMBR = (Rectangle) OperationsParams.getShape(conf, "mbr");
    if (inputMBR == null) {
        inputMBR = FileMBR.fileMBR(inPath, new OperationsParams(conf));
        OperationsParams.setShape(conf, "mbr", inputMBR);
    }

    setLocalIndexer(conf, sindex);
    final Partitioner partitioner = createPartitioner(inPath, outPath, conf, sindex);

    final IndexRecordWriter<Shape> recordWriter = new IndexRecordWriter<Shape>(partitioner, replicate, sindex,
            outPath, conf);
    for (FileSplit fsplit : fsplits) {
        RecordReader<Rectangle, Iterable<Shape>> reader = inputFormat.createRecordReader(fsplit, null);
        if (reader instanceof SpatialRecordReader3) {
            ((SpatialRecordReader3) reader).initialize(fsplit, conf);
        } else if (reader instanceof RTreeRecordReader3) {
            ((RTreeRecordReader3) reader).initialize(fsplit, conf);
        } else if (reader instanceof HDFRecordReader) {
            ((HDFRecordReader) reader).initialize(fsplit, conf);
        } else {
            throw new RuntimeException("Unknown record reader");
        }

        final IntWritable partitionID = new IntWritable();

        while (reader.nextKeyValue()) {
            Iterable<Shape> shapes = reader.getCurrentValue();
            if (replicate) {
                for (final Shape s : shapes) {
                    partitioner.overlapPartitions(s, new ResultCollector<Integer>() {
                        @Override
                        public void collect(Integer id) {
                            partitionID.set(id);
                            try {
                                recordWriter.write(partitionID, s);
                            } catch (IOException e) {
                                throw new RuntimeException(e);
                            }
                        }
                    });
                }
            } else {
                for (final Shape s : shapes) {
                    int pid = partitioner.overlapPartition(s);
                    if (pid != -1) {
                        partitionID.set(pid);
                        recordWriter.write(partitionID, s);
                    }
                }
            }
        }
        reader.close();
    }
    recordWriter.close(null);

    // Write the WKT formatted master file
    Path masterPath = new Path(outPath, "_master." + sindex);
    FileSystem outFs = outPath.getFileSystem(params);
    Path wktPath = new Path(outPath, "_" + sindex + ".wkt");
    PrintStream wktOut = new PrintStream(outFs.create(wktPath));
    wktOut.println("ID\tBoundaries\tRecord Count\tSize\tFile name");
    Text tempLine = new Text2();
    Partition tempPartition = new Partition();
    LineReader in = new LineReader(outFs.open(masterPath));
    while (in.readLine(tempLine) > 0) {
        tempPartition.fromText(tempLine);
        wktOut.println(tempPartition.toWKT());
    }
    in.close();
    wktOut.close();
}

From source file:edu.umn.cs.spatialHadoop.operations.Indexer.java

License:Open Source License

private static void indexLocal(Path inPath, Path outPath, OperationsParams params) throws IOException {
    JobConf job = new JobConf(params);
    String sindex = params.get("sindex");
    Partitioner partitioner = createPartitioner(inPath, outPath, job, sindex);

    // Start reading input file
    Vector<InputSplit> splits = new Vector<InputSplit>();
    final ShapeIterInputFormat inputFormat = new ShapeIterInputFormat();
    FileSystem inFs = inPath.getFileSystem(params);
    FileStatus inFStatus = inFs.getFileStatus(inPath);
    if (inFStatus != null && !inFStatus.isDir()) {
        // One file, retrieve it immediately.
        // This is useful if the input is a hidden file which is automatically
        // skipped by FileInputFormat. We need to plot a hidden file for the case
        // of plotting partition boundaries of a spatial index
        splits.add(new FileSplit(inPath, 0, inFStatus.getLen(), new String[0]));
    } else {/*from   ww  w  .j  a  v  a2  s . com*/
        ShapeIterInputFormat.addInputPath(job, inPath);
        for (InputSplit s : inputFormat.getSplits(job, 1))
            splits.add(s);
    }

    // Copy splits to a final array to be used in parallel
    final FileSplit[] fsplits = splits.toArray(new FileSplit[splits.size()]);
    boolean replicate = job.getBoolean("replicate", false);

    final IndexRecordWriter<Shape> recordWriter = new IndexRecordWriter<Shape>(partitioner, replicate, sindex,
            outPath, params);

    for (FileSplit fsplit : fsplits) {
        RecordReader<Rectangle, Iterable<? extends Shape>> reader = inputFormat.getRecordReader(fsplit, job,
                null);
        Rectangle partitionMBR = reader.createKey();
        Iterable<? extends Shape> shapes = reader.createValue();

        final IntWritable partitionID = new IntWritable();

        while (reader.next(partitionMBR, shapes)) {
            if (replicate) {
                // Replicate each shape to all overlapping partitions
                for (final Shape s : shapes) {
                    partitioner.overlapPartitions(s, new ResultCollector<Integer>() {
                        @Override
                        public void collect(Integer id) {
                            partitionID.set(id);
                            try {
                                recordWriter.write(partitionID, s);
                            } catch (IOException e) {
                                throw new RuntimeException(e);
                            }
                        }
                    });
                }
            } else {
                for (Shape s : shapes) {
                    partitionID.set(partitioner.overlapPartition(s));
                    recordWriter.write(partitionID, s);
                }
            }
        }
        reader.close();
    }

    recordWriter.close(null);
}