List of usage examples for org.apache.hadoop.io IntWritable IntWritable
public IntWritable()
From source file:edu.umn.cs.spatialHadoop.core.RectangleNN.java
License:Open Source License
/** * The general version of self join algorithm which works with arbitrary * shapes. First, it performs a filter step where it finds shapes with * overlapping MBRs. Second, an optional refine step can be executed to * return only shapes which actually overlap. * @param R - input set of shapes// w w w . j a va 2 s . co m * @param refine - Whether or not to run a refine step * @param output - output collector where the results are reported * @return - number of pairs returned by the planesweep algorithm * @throws IOException */ public static <S extends Shape> int SelfJoin_planeSweep(final S[] R, boolean refine, final OutputCollector<S, S> output, Progressable reporter) throws IOException { // Use a two-phase filter and refine approach // 1- Use MBRs as a first filter // 2- Use ConvexHull as a second filter // 3- Use the exact shape for refinement final RectangleID[] mbrs = new RectangleID[R.length]; for (int i = 0; i < R.length; i++) { mbrs[i] = new RectangleID(i, R[i].getMBR()); } if (refine) { final IntWritable count = new IntWritable(); int filterCount = SelfJoin_rectangles(mbrs, new OutputCollector<RectangleID, RectangleID>() { @Override public void collect(RectangleID r1, RectangleID r2) throws IOException { if (R[r1.id].isIntersected(R[r2.id])) { if (output != null) output.collect(R[r1.id], R[r2.id]); count.set(count.get() + 1); } } }, reporter); LOG.debug("Filtered result size " + filterCount + ", refined result size " + count.get()); return count.get(); } else { return SelfJoin_rectangles(mbrs, new OutputCollector<RectangleID, RectangleID>() { @Override public void collect(RectangleID r1, RectangleID r2) throws IOException { if (output != null) output.collect(R[r1.id], R[r2.id]); } }, reporter); } }
From source file:edu.umn.cs.spatialHadoop.indexing.Indexer.java
License:Open Source License
private static void indexLocal(Path inPath, final Path outPath, OperationsParams params) throws IOException, InterruptedException { Job job = Job.getInstance(params);//from w w w. j a v a2s .c om final Configuration conf = job.getConfiguration(); final String sindex = conf.get("sindex"); // Start reading input file List<InputSplit> splits = new ArrayList<InputSplit>(); final SpatialInputFormat3<Rectangle, Shape> inputFormat = new SpatialInputFormat3<Rectangle, Shape>(); FileSystem inFs = inPath.getFileSystem(conf); FileStatus inFStatus = inFs.getFileStatus(inPath); if (inFStatus != null && !inFStatus.isDir()) { // One file, retrieve it immediately. // This is useful if the input is a hidden file which is automatically // skipped by FileInputFormat. We need to plot a hidden file for the case // of plotting partition boundaries of a spatial index splits.add(new FileSplit(inPath, 0, inFStatus.getLen(), new String[0])); } else { SpatialInputFormat3.setInputPaths(job, inPath); for (InputSplit s : inputFormat.getSplits(job)) splits.add(s); } // Copy splits to a final array to be used in parallel final FileSplit[] fsplits = splits.toArray(new FileSplit[splits.size()]); boolean replicate = PartitionerReplicate.get(sindex); // Set input file MBR if not already set Rectangle inputMBR = (Rectangle) OperationsParams.getShape(conf, "mbr"); if (inputMBR == null) { inputMBR = FileMBR.fileMBR(inPath, new OperationsParams(conf)); OperationsParams.setShape(conf, "mbr", inputMBR); } setLocalIndexer(conf, sindex); final Partitioner partitioner = createPartitioner(inPath, outPath, conf, sindex); final IndexRecordWriter<Shape> recordWriter = new IndexRecordWriter<Shape>(partitioner, replicate, sindex, outPath, conf); for (FileSplit fsplit : fsplits) { RecordReader<Rectangle, Iterable<Shape>> reader = inputFormat.createRecordReader(fsplit, null); if (reader instanceof SpatialRecordReader3) { ((SpatialRecordReader3) reader).initialize(fsplit, conf); } else if (reader instanceof RTreeRecordReader3) { ((RTreeRecordReader3) reader).initialize(fsplit, conf); } else if (reader instanceof HDFRecordReader) { ((HDFRecordReader) reader).initialize(fsplit, conf); } else { throw new RuntimeException("Unknown record reader"); } final IntWritable partitionID = new IntWritable(); while (reader.nextKeyValue()) { Iterable<Shape> shapes = reader.getCurrentValue(); if (replicate) { for (final Shape s : shapes) { partitioner.overlapPartitions(s, new ResultCollector<Integer>() { @Override public void collect(Integer id) { partitionID.set(id); try { recordWriter.write(partitionID, s); } catch (IOException e) { throw new RuntimeException(e); } } }); } } else { for (final Shape s : shapes) { int pid = partitioner.overlapPartition(s); if (pid != -1) { partitionID.set(pid); recordWriter.write(partitionID, s); } } } } reader.close(); } recordWriter.close(null); // Write the WKT formatted master file Path masterPath = new Path(outPath, "_master." + sindex); FileSystem outFs = outPath.getFileSystem(params); Path wktPath = new Path(outPath, "_" + sindex + ".wkt"); PrintStream wktOut = new PrintStream(outFs.create(wktPath)); wktOut.println("ID\tBoundaries\tRecord Count\tSize\tFile name"); Text tempLine = new Text2(); Partition tempPartition = new Partition(); LineReader in = new LineReader(outFs.open(masterPath)); while (in.readLine(tempLine) > 0) { tempPartition.fromText(tempLine); wktOut.println(tempPartition.toWKT()); } in.close(); wktOut.close(); }
From source file:edu.umn.cs.spatialHadoop.operations.Indexer.java
License:Open Source License
private static void indexLocal(Path inPath, Path outPath, OperationsParams params) throws IOException { JobConf job = new JobConf(params); String sindex = params.get("sindex"); Partitioner partitioner = createPartitioner(inPath, outPath, job, sindex); // Start reading input file Vector<InputSplit> splits = new Vector<InputSplit>(); final ShapeIterInputFormat inputFormat = new ShapeIterInputFormat(); FileSystem inFs = inPath.getFileSystem(params); FileStatus inFStatus = inFs.getFileStatus(inPath); if (inFStatus != null && !inFStatus.isDir()) { // One file, retrieve it immediately. // This is useful if the input is a hidden file which is automatically // skipped by FileInputFormat. We need to plot a hidden file for the case // of plotting partition boundaries of a spatial index splits.add(new FileSplit(inPath, 0, inFStatus.getLen(), new String[0])); } else {/*from ww w .j a v a2 s . com*/ ShapeIterInputFormat.addInputPath(job, inPath); for (InputSplit s : inputFormat.getSplits(job, 1)) splits.add(s); } // Copy splits to a final array to be used in parallel final FileSplit[] fsplits = splits.toArray(new FileSplit[splits.size()]); boolean replicate = job.getBoolean("replicate", false); final IndexRecordWriter<Shape> recordWriter = new IndexRecordWriter<Shape>(partitioner, replicate, sindex, outPath, params); for (FileSplit fsplit : fsplits) { RecordReader<Rectangle, Iterable<? extends Shape>> reader = inputFormat.getRecordReader(fsplit, job, null); Rectangle partitionMBR = reader.createKey(); Iterable<? extends Shape> shapes = reader.createValue(); final IntWritable partitionID = new IntWritable(); while (reader.next(partitionMBR, shapes)) { if (replicate) { // Replicate each shape to all overlapping partitions for (final Shape s : shapes) { partitioner.overlapPartitions(s, new ResultCollector<Integer>() { @Override public void collect(Integer id) { partitionID.set(id); try { recordWriter.write(partitionID, s); } catch (IOException e) { throw new RuntimeException(e); } } }); } } else { for (Shape s : shapes) { partitionID.set(partitioner.overlapPartition(s)); recordWriter.write(partitionID, s); } } } reader.close(); } recordWriter.close(null); }