List of usage examples for org.apache.hadoop.mapred Reporter progress
public void progress();
From source file:com.ask.hive.hbase.HiveHBaseTextTableInputFormat.java
License:Apache License
public RecordReader<Text, Text> getRecordReader(InputSplit split, JobConf jobConf, final Reporter reporter) throws IOException { HBaseSplit hbaseSplit = (HBaseSplit) split; TableSplit tableSplit = hbaseSplit.getSplit(); String hbaseTableName = jobConf.get(HBaseSerDe.HBASE_TABLE_NAME); setHTable(new HTable(new HBaseConfiguration(jobConf), Bytes.toBytes(hbaseTableName))); String hbaseColumnsMapping = jobConf.get(HBaseSerDe.HBASE_COLUMNS_MAPPING); List<String> hbaseColumnFamilies = new ArrayList<String>(); List<String> hbaseColumnQualifiers = new ArrayList<String>(); List<byte[]> hbaseColumnFamiliesBytes = new ArrayList<byte[]>(); List<byte[]> hbaseColumnQualifiersBytes = new ArrayList<byte[]>(); int iKey;/*www. j av a 2 s. c om*/ try { iKey = parseColumnMapping(hbaseColumnsMapping, hbaseColumnFamilies, hbaseColumnFamiliesBytes, hbaseColumnQualifiers, hbaseColumnQualifiersBytes); } catch (Exception se) { throw new IOException(se); } List<Integer> readColIDs = ColumnProjectionUtils.getReadColumnIDs(jobConf); if (hbaseColumnFamilies.size() < readColIDs.size()) { throw new IOException("Cannot read more columns than the given table contains."); } boolean addAll = (readColIDs.size() == 0); Scan scan = new Scan(); boolean empty = true; if (!addAll) { for (int i : readColIDs) { if (i == iKey) { continue; } scan.addFamily(hbaseColumnFamiliesBytes.get(i)); empty = false; } } // The HBase table's row key maps to a Hive table column. In the corner case when only the // row key column is selected in Hive, the HBase Scan will be empty i.e. no column family/ // column qualifier will have been added to the scan. We arbitrarily add at least one column // to the HBase scan so that we can retrieve all of the row keys and return them as the Hive // tables column projection. if (empty) { for (int i = 0; i < hbaseColumnFamilies.size(); i++) { if (i == iKey) { continue; } if (hbaseColumnQualifiers.get(i) == null) { scan.addFamily(hbaseColumnFamiliesBytes.get(i)); } else { scan.addColumn(hbaseColumnFamiliesBytes.get(i), hbaseColumnQualifiersBytes.get(i)); } if (!addAll) { break; } } } //setting start and end time for scanning setTime(jobConf, scan); // If Hive's optimizer gave us a filter to process, convert it to the // HBase scan form now. tableSplit = convertFilter(jobConf, scan, tableSplit, iKey); setScan(scan); Job job = new Job(jobConf); TaskAttemptContext tac = new TaskAttemptContext(job.getConfiguration(), new TaskAttemptID()) { @Override public void progress() { reporter.progress(); } }; final org.apache.hadoop.mapreduce.RecordReader<ImmutableBytesWritable, Result> recordReader = createRecordReader( tableSplit, tac); return new RecordReader<Text, Text>() { //@Override public void close() throws IOException { recordReader.close(); } // @Override public Text createKey() { return new Text(); } // @Override public Text createValue() { return new Text(); } // @Override public long getPos() throws IOException { return 0; } // @Override public float getProgress() throws IOException { float progress = 0.0F; try { progress = recordReader.getProgress(); } catch (InterruptedException e) { throw new IOException(e); } return progress; } // @Override public boolean next(Text rowKey, Text value) throws IOException { boolean next = false; try { next = recordReader.nextKeyValue(); //logic for to find the column name if (next) { rowKey.set(Bytes.toString(recordReader.getCurrentValue().getRow())); StringBuilder val = new StringBuilder(); String prev = ""; for (KeyValue kv : recordReader.getCurrentValue().raw()) { String current = new String(kv.getQualifier()); char[] col = new String(current).toCharArray(); if (val.length() > 0) { if (prev.equals(current)) val.append(","); else val.append("\t"); } prev = current; val.append(col[0]).append("_"); val.append(Bytes.toString(kv.getValue())); } value.set(val.toString()); // rowKey.set(Bytes.toString(recordReader.getCurrentValue().getRow()));; // value.set(Bytes.toString(recordReader.getCurrentValue().value())); } } catch (InterruptedException e) { throw new IOException(e); } return next; } }; }
From source file:com.ask.hive.hbase.HiveHBaseTimeTableInputFormat.java
License:Apache License
public RecordReader<ImmutableBytesWritable, Result> getRecordReader(InputSplit split, JobConf jobConf, final Reporter reporter) throws IOException { HBaseSplit hbaseSplit = (HBaseSplit) split; TableSplit tableSplit = hbaseSplit.getSplit(); String hbaseTableName = jobConf.get(HBaseSerDe.HBASE_TABLE_NAME); setHTable(new HTable(new HBaseConfiguration(jobConf), Bytes.toBytes(hbaseTableName))); String hbaseColumnsMapping = jobConf.get(HBaseSerDe.HBASE_COLUMNS_MAPPING); List<String> hbaseColumnFamilies = new ArrayList<String>(); List<String> hbaseColumnQualifiers = new ArrayList<String>(); List<byte[]> hbaseColumnFamiliesBytes = new ArrayList<byte[]>(); List<byte[]> hbaseColumnQualifiersBytes = new ArrayList<byte[]>(); int iKey;/* w w w .j a v a2s. co m*/ try { iKey = HBaseSerDe.parseColumnMapping(hbaseColumnsMapping, hbaseColumnFamilies, hbaseColumnFamiliesBytes, hbaseColumnQualifiers, hbaseColumnQualifiersBytes); } catch (SerDeException se) { throw new IOException(se); } List<Integer> readColIDs = ColumnProjectionUtils.getReadColumnIDs(jobConf); if (hbaseColumnFamilies.size() < readColIDs.size()) { throw new IOException("Cannot read more columns than the given table contains."); } boolean addAll = (readColIDs.size() == 0); Scan scan = new Scan(); boolean empty = true; if (!addAll) { for (int i : readColIDs) { if (i == iKey) { continue; } if (hbaseColumnQualifiers.get(i) == null) { scan.addFamily(hbaseColumnFamiliesBytes.get(i)); } else { scan.addColumn(hbaseColumnFamiliesBytes.get(i), hbaseColumnQualifiersBytes.get(i)); } empty = false; } } // The HBase table's row key maps to a Hive table column. In the corner case when only the // row key column is selected in Hive, the HBase Scan will be empty i.e. no column family/ // column qualifier will have been added to the scan. We arbitrarily add at least one column // to the HBase scan so that we can retrieve all of the row keys and return them as the Hive // tables column projection. if (empty) { for (int i = 0; i < hbaseColumnFamilies.size(); i++) { if (i == iKey) { continue; } if (hbaseColumnQualifiers.get(i) == null) { scan.addFamily(hbaseColumnFamiliesBytes.get(i)); } else { scan.addColumn(hbaseColumnFamiliesBytes.get(i), hbaseColumnQualifiersBytes.get(i)); } if (!addAll) { break; } } } //setting start and end time for scanning setTime(jobConf, scan); // If Hive's optimizer gave us a filter to process, convert it to the // HBase scan form now. tableSplit = convertFilter(jobConf, scan, tableSplit, iKey); setScan(scan); Job job = new Job(jobConf); TaskAttemptContext tac = new TaskAttemptContext(job.getConfiguration(), new TaskAttemptID()) { @Override public void progress() { reporter.progress(); } }; final org.apache.hadoop.mapreduce.RecordReader<ImmutableBytesWritable, Result> recordReader = createRecordReader( tableSplit, tac); return new RecordReader<ImmutableBytesWritable, Result>() { //@Override public void close() throws IOException { recordReader.close(); } // @Override public ImmutableBytesWritable createKey() { return new ImmutableBytesWritable(); } // @Override public Result createValue() { return new Result(); } // @Override public long getPos() throws IOException { return 0; } // @Override public float getProgress() throws IOException { float progress = 0.0F; try { progress = recordReader.getProgress(); } catch (InterruptedException e) { throw new IOException(e); } return progress; } // @Override public boolean next(ImmutableBytesWritable rowKey, Result value) throws IOException { boolean next = false; try { next = recordReader.nextKeyValue(); if (next) { rowKey.set(recordReader.getCurrentValue().getRow()); Writables.copyWritable(recordReader.getCurrentValue(), value); } } catch (InterruptedException e) { throw new IOException(e); } return next; } }; }
From source file:com.clojurewerkz.cascading.cassandra.hadoop.ColumnFamilyInputFormat.java
License:Apache License
public org.apache.hadoop.mapred.RecordReader<ByteBuffer, SortedMap<ByteBuffer, IColumn>> getRecordReader( org.apache.hadoop.mapred.InputSplit split, JobConf jobConf, final Reporter reporter) throws IOException { TaskAttemptContext tac = new TaskAttemptContext(jobConf, TaskAttemptID.forName(jobConf.get(MAPRED_TASK_ID))) { @Override//from ww w . jav a 2 s.c om public void progress() { reporter.progress(); } }; ColumnFamilyRecordReader recordReader = new ColumnFamilyRecordReader( jobConf.getInt(CASSANDRA_HADOOP_MAX_KEY_SIZE, CASSANDRA_HADOOP_MAX_KEY_SIZE_DEFAULT)); recordReader.initialize((org.apache.hadoop.mapreduce.InputSplit) split, tac); return recordReader; }
From source file:com.ebay.erl.mobius.core.mapred.DefaultMobiusCombiner.java
License:Apache License
@Override public void joinreduce(Tuple key, DataJoinValueGroup<Tuple> values, OutputCollector<DataJoinKey, DataJoinValue> output, Reporter reporter) throws IOException { if (!reporterSet) { for (Projectable p : this._projections) { p.setReporter(reporter);// w ww . j a v a2 s.c om } reporterSet = true; } if (values.hasNext()) { // reset group function results. if (groupFunctionResults.size() > 0) { for (GroupFunction func : this.groupFunctionResults.keySet()) { this.groupFunctionResults.get(func).clear(); func.reset(); } } Byte datasetID = values.nextDatasetID(); if (!key.hasSchema()) { key.setSchema(this.getKeySchemaByDatasetID(datasetID)); } Iterator<Tuple> tuples = values.next(); Tuple combinedValue = new Tuple(); long progress = 0L; while (tuples.hasNext()) { Tuple aTuple = tuples.next(); if (++progress % 3000 == 0) { reporter.progress(); } aTuple.setSchema(this.getValueSchemaByDatasetID(datasetID)); for (Projectable p : this.dsToFuncsMapping.get(datasetID)) { if (p instanceof GroupFunction) { ((GroupFunction) p).consume(aTuple); } else { ExtendFunction func = (ExtendFunction) p; Tuple computedResult = func.getResult(aTuple); String name = func.getInputColumns()[0].getInputColumnName(); combinedValue.insert(name, computedResult.get(0)); } } } for (Projectable p : this.dsToFuncsMapping.get(datasetID)) { if (p instanceof GroupFunction) { BigTupleList aggregatedResult = ((GroupFunction) p).getResult(); if (aggregatedResult.size() == 1) { Tuple aggResult = aggregatedResult.getFirst(); String name = p.getInputColumns()[0].getInputColumnName(); combinedValue.insert(name, aggResult.get(0)); } else if (aggregatedResult.size() > 1) throw new IllegalArgumentException( p.toString() + " is a group function that generates " + "more than one rows (" + aggregatedResult.size() + ") per key, so it is not combinable."); } } DataJoinKey outKey = new DataJoinKey(datasetID, key); DataJoinValue outValue = new DataJoinValue(datasetID, combinedValue); output.collect(outKey, outValue); } }
From source file:com.hyperiongray.ccmr.s3wordcount.WordCountOnlyMapper.java
License:Apache License
public void map(Object key, Text value, OutputCollector<NullWritable, Text> outputCollector, Reporter reporter) throws IOException { // We're accessing a publicly available bucket so don't need to fill in // our credentials ArchiveReader ar;//w w w.jav a 2s. c o m try { S3Service s3s = new RestS3Service(null); // Let's grab a file out of the CommonCrawl S3 bucket String fn = value.toString(); logger.info(fn); S3Object f = s3s.getObject("aws-publicdatasets", fn, null, null, null, null, null, null); // The file name identifies the ArchiveReader and indicates if it // should be decompressed ar = WARCReaderFactory.get(fn, f.getDataInputStream(), true); } catch (ServiceException e) { logger.error("S3 connection Failed", e); throw new RuntimeException(e); } // Once we have an ArchiveReader, we can work through each of the // records it contains int i = 0; logger.info("Started" + new Date()); for (ArchiveRecord r : ar) { reporter.progress(); String url = ""; try { // The header file contains information such as the type of // record, size, creation time, and URL url = r.getHeader().getUrl(); String crawledDate = r.getHeader().getDate(); if (url == null) continue; // If we want to read the contents of the record, we can use the // ArchiveRecord as an InputStream // Create a byte array that is as long as all the record's // stated length OutputStream os = new ByteArrayOutputStream(); try { r.dump(os); } finally { try { if (r != null) r.close(); } catch (Exception e) { logger.error("reading inputstream Failed", e); } } // Note: potential optimization would be to have a large buffer // only allocated once // Why don't we convert it to a string and print the start of // it? String content = new String(os.toString()); Map<String, Integer> matches = contentMatcher.matchContent(content); int score = contentMatcher.score(matches); if (score > LOWER_SCORE_THRESHOLD) { logger.info("****************************************"); logger.info("URL: " + url + " Score: " + score + " Detail: " + matches); // outputCollector.collect(new IntWritable(score), new Text(url)); outputCollector.collect(NullWritable.get(), new Text(outputParser .parse(contentMatcher.getTitle(content), url, crawledDate, score, matches))); } logger.debug(new Integer(i).toString()); if (i++ > sampleSize) { logger.info("Finished " + new Date()); break; } } catch (Exception e) { logger.error("url failed " + url, e); } } }
From source file:com.liveramp.hank.hadoop.DomainBuilderMapper.java
License:Apache License
public final void map(K key, V value, OutputCollector<KeyAndPartitionWritableComparable, ValueWritable> outputCollector, Reporter reporter) throws IOException { KeyValuePair keyValue = buildHankKeyValue(key, value); KeyAndPartitionWritableComparable hankKeyWritableComparable = new KeyAndPartitionWritableComparable( storageEngine, partitioner, numPartitions, keyValue.getKey()); ValueWritable hankValueWritable = new ValueWritable(keyValue.getValue()); outputCollector.collect(hankKeyWritableComparable, hankValueWritable); reporter.progress(); }
From source file:edu.umn.cs.spatialHadoop.core.RTree.java
License:Open Source License
/** * Performs a spatial join between records in two R-trees * @param R//from ww w . j a v a 2 s. com * @param S * @param output * @return * @throws IOException * SuppresWarnings("resource") is used because we create LineReaders on the * internal data stream of both R and S. We do not want to close the * LineReader because it will subsequently close the internal data stream * of R and S which is something we want to avoid because both R and S are * not created by this function and it should not free these resources. */ protected static <S1 extends Shape, S2 extends Shape> int spatialJoinDisk(final RTree<S1> R, final RTree<S2> S, final ResultCollector2<S1, S2> output, final Reporter reporter) throws IOException { // Reserve locations for nodes MBRs and data offset [start, end) final Rectangle[] r_nodes = new Rectangle[R.degree]; for (int i = 0; i < r_nodes.length; i++) r_nodes[i] = new Rectangle(); final int[] r_data_offset = new int[R.degree + 1]; final Rectangle[] s_nodes = new Rectangle[S.degree]; for (int i = 0; i < s_nodes.length; i++) s_nodes[i] = new Rectangle(); final int[] s_data_offset = new int[S.degree + 1]; PriorityQueue<Long> nodesToJoin = new PriorityQueue<Long>() { { initialize(R.leafNodeCount + S.leafNodeCount); } @Override protected boolean lessThan(Object a, Object b) { return ((Long) a) < ((Long) b); } }; nodesToJoin.put(0L); LruCache<Integer, Shape[]> r_records_cache = new LruCache<Integer, Shape[]>(R.degree * 2); LruCache<Integer, Shape[]> s_records_cache = new LruCache<Integer, Shape[]>(S.degree * R.degree * 4); Text line = new Text2(); int result_count = 0; LineReader r_lr = null, s_lr = null; // Last offset read from r and s int r_last_offset = 0; int s_last_offset = 0; while (nodesToJoin.size() > 0) { long nodes_to_join = nodesToJoin.pop(); int r_node = (int) (nodes_to_join >>> 32); int s_node = (int) (nodes_to_join & 0xFFFFFFFF); // Read all R nodes int r_mbrsToTest = r_node == 0 ? 1 : R.degree; boolean r_leaf = r_node * R.degree + 1 >= R.nodeCount; long nodeOffset = NodeSize * r_node; R.structure.seek(nodeOffset); for (int i = 0; i < r_mbrsToTest; i++) { r_data_offset[i] = R.structure.readInt(); r_nodes[i].readFields(R.structure); } r_data_offset[r_mbrsToTest] = (r_node + r_mbrsToTest) == R.nodeCount ? R.treeSize : R.structure.readInt(); // Read all S nodes int s_mbrsToTest = s_node == 0 ? 1 : S.degree; boolean s_leaf = s_node * S.degree + 1 >= S.nodeCount; if (r_leaf != s_leaf) { // This case happens when the two trees are of different heights if (r_leaf) r_mbrsToTest = 1; else s_mbrsToTest = 1; } nodeOffset = NodeSize * s_node; S.structure.seek(nodeOffset); for (int i = 0; i < s_mbrsToTest; i++) { s_data_offset[i] = S.structure.readInt(); s_nodes[i].readFields(S.structure); } s_data_offset[s_mbrsToTest] = (s_node + s_mbrsToTest) == S.nodeCount ? S.treeSize : S.structure.readInt(); // Find overlapping nodes by Cartesian product for (int i = 0; i < r_mbrsToTest; i++) { for (int j = 0; j < s_mbrsToTest; j++) { if (r_nodes[i].isIntersected(s_nodes[j])) { if (r_leaf && s_leaf) { // Reached leaf nodes in both trees. Start comparing records int r_start_offset = r_data_offset[i]; int r_end_offset = r_data_offset[i + 1]; int s_start_offset = s_data_offset[j]; int s_end_offset = s_data_offset[j + 1]; /////////////////////////////////////////////////////////////////// // Read or retrieve r_records Shape[] r_records = r_records_cache.get(r_start_offset); if (r_records == null) { int cache_key = r_start_offset; r_records = r_records_cache.popUnusedEntry(); if (r_records == null) { r_records = new Shape[R.degree * 2]; } // Need to read it from stream if (r_last_offset != r_start_offset) { long seekTo = r_start_offset + R.treeStartOffset; R.data.seek(seekTo); r_lr = new LineReader(R.data); } int record_i = 0; while (r_start_offset < r_end_offset) { r_start_offset += r_lr.readLine(line); if (r_records[record_i] == null) r_records[record_i] = R.stockObject.clone(); r_records[record_i].fromText(line); record_i++; } r_last_offset = r_start_offset; // Nullify other records while (record_i < r_records.length) r_records[record_i++] = null; r_records_cache.put(cache_key, r_records); } // Read or retrieve s_records Shape[] s_records = s_records_cache.get(s_start_offset); if (s_records == null) { int cache_key = s_start_offset; // Need to read it from stream if (s_lr == null || s_last_offset != s_start_offset) { // Need to reposition s_lr (LineReader of S) long seekTo = s_start_offset + S.treeStartOffset; S.data.seek(seekTo); s_lr = new LineReader(S.data); } s_records = s_records_cache.popUnusedEntry(); if (s_records == null) { s_records = new Shape[S.degree * 2]; } int record_i = 0; while (s_start_offset < s_end_offset) { s_start_offset += s_lr.readLine(line); if (s_records[record_i] == null) s_records[record_i] = S.stockObject.clone(); s_records[record_i].fromText(line); record_i++; } // Nullify other records while (record_i < s_records.length) s_records[record_i++] = null; // Put in cache s_records_cache.put(cache_key, s_records); s_last_offset = s_start_offset; } // Do Cartesian product between records to find overlapping pairs for (int i_r = 0; i_r < r_records.length && r_records[i_r] != null; i_r++) { for (int i_s = 0; i_s < s_records.length && s_records[i_s] != null; i_s++) { if (r_records[i_r].isIntersected(s_records[i_s]) && !r_records[i_r].equals(s_records[i_s])) { result_count++; if (output != null) { output.collect((S1) r_records[i_r], (S2) s_records[i_s]); } } } } /////////////////////////////////////////////////////////////////// } else { // Add a new pair to node pairs to be tested // Go down one level if possible int new_r_node, new_s_node; if (!r_leaf) { new_r_node = (r_node + i) * R.degree + 1; } else { new_r_node = r_node + i; } if (!s_leaf) { new_s_node = (s_node + j) * S.degree + 1; } else { new_s_node = s_node + j; } long new_pair = (((long) new_r_node) << 32) | new_s_node; nodesToJoin.put(new_pair); } } } } reporter.progress(); } return result_count; }
From source file:edu.umn.cs.spatialHadoop.core.RectangleNN.java
License:Open Source License
/** * @param R/* w ww . ja va 2s. c o m*/ * @param S * @param output * @return * @throws IOException */ public static <S1 extends Shape, S2 extends Shape> int SpatialJoin_planeSweep(List<S1> R, List<S2> S, ResultCollector2<S1, S2> output, Reporter reporter) throws IOException { int count = 0; Comparator<Shape> comparator = new Comparator<Shape>() { @Override public int compare(Shape o1, Shape o2) { if (o1.getMBR().x1 == o2.getMBR().x1) return 0; return o1.getMBR().x1 < o2.getMBR().x1 ? -1 : 1; } }; long t1 = System.currentTimeMillis(); LOG.debug("Joining lists " + R.size() + " with " + S.size()); Collections.sort(R, comparator); Collections.sort(S, comparator); int i = 0, j = 0; try { while (i < R.size() && j < S.size()) { S1 r; S2 s; if (comparator.compare(R.get(i), S.get(j)) < 0) { r = R.get(i); int jj = j; while ((jj < S.size()) && ((s = S.get(jj)).getMBR().x1 <= r.getMBR().x2)) { // Check if r and s are overlapping but not the same object // for self join if (r.isIntersected(s) && !r.equals(s)) { if (output != null) output.collect(r, s); count++; } jj++; if (reporter != null) reporter.progress(); } i++; } else { s = S.get(j); int ii = i; while ((ii < R.size()) && ((r = R.get(ii)).getMBR().x1 <= s.getMBR().x2)) { if (r.isIntersected(s) && !r.equals(s)) { if (output != null) output.collect(r, s); count++; } ii++; if (reporter != null) reporter.progress(); } j++; } if (reporter != null) reporter.progress(); } } catch (RuntimeException e) { e.printStackTrace(); } long t2 = System.currentTimeMillis(); LOG.debug("Finished plane sweep in " + (t2 - t1) + " millis and found " + count + " pairs"); return count; }
From source file:edu.umn.cs.spatialHadoop.core.RectangleNN.java
License:Open Source License
public static <S1 extends Shape, S2 extends Shape> int SpatialJoin_planeSweepFilterOnly(final S1[] R, final S2[] S, ResultCollector2<S1, S2> output, Reporter reporter) { int count = 0; final Comparator<Shape> comparator = new Comparator<Shape>() { @Override//from w ww. j a v a 2 s . co m public int compare(Shape o1, Shape o2) { if (o1.getMBR().x1 == o2.getMBR().x1) return 0; return o1.getMBR().x1 < o2.getMBR().x1 ? -1 : 1; } }; long t1 = System.currentTimeMillis(); LOG.debug("Joining arrays " + R.length + " with " + S.length); Arrays.sort(R, comparator); Arrays.sort(S, comparator); int i = 0, j = 0; try { while (i < R.length && j < S.length) { S1 r; S2 s; if (comparator.compare(R[i], S[j]) < 0) { r = R[i]; int jj = j; while ((jj < S.length) && ((s = S[jj]).getMBR().x1 <= r.getMBR().x2)) { if (r.getMBR().isIntersected(s.getMBR())) { if (output != null) output.collect(r, s); count++; } jj++; if (reporter != null) reporter.progress(); } i++; } else { s = S[j]; int ii = i; while ((ii < R.length) && ((r = R[ii]).getMBR().x1 <= s.getMBR().x2)) { if (r.getMBR().isIntersected(s.getMBR())) { if (output != null) output.collect(r, s); count++; } ii++; } j++; if (reporter != null) reporter.progress(); } if (reporter != null) reporter.progress(); } } catch (RuntimeException e) { e.printStackTrace(); } long t2 = System.currentTimeMillis(); LOG.debug("Finished plane sweep filter only in " + (t2 - t1) + " millis and found " + count + " pairs"); return count; }
From source file:edu.umn.cs.spatialHadoop.core.RectangleNN.java
License:Open Source License
public static <S1 extends Shape, S2 extends Shape> int SpatialJoin_planeSweep(final S1[] R, final S2[] S, ResultCollector2<S1, S2> output, Reporter reporter) { int count = 0; final Comparator<Shape> comparator = new Comparator<Shape>() { @Override/*w w w .java 2 s .c om*/ public int compare(Shape o1, Shape o2) { if (o1.getMBR().x1 == o2.getMBR().x1) return 0; return o1.getMBR().x1 < o2.getMBR().x1 ? -1 : 1; } }; long t1 = System.currentTimeMillis(); LOG.debug("Joining arrays " + R.length + " with " + S.length); Arrays.sort(R, comparator); Arrays.sort(S, comparator); int i = 0, j = 0; try { while (i < R.length && j < S.length) { S1 r; S2 s; if (comparator.compare(R[i], S[j]) < 0) { r = R[i]; int jj = j; while ((jj < S.length) && ((s = S[jj]).getMBR().x1 <= r.getMBR().x2)) { if (r.isIntersected(s)) { if (output != null) output.collect(r, s); count++; } jj++; if (reporter != null) reporter.progress(); } i++; } else { s = S[j]; int ii = i; while ((ii < R.length) && ((r = R[ii]).getMBR().x1 <= s.getMBR().x2)) { if (r.isIntersected(s)) { if (output != null) output.collect(r, s); count++; } ii++; if (reporter != null) reporter.progress(); } j++; } if (reporter != null) reporter.progress(); } } catch (RuntimeException e) { e.printStackTrace(); } long t2 = System.currentTimeMillis(); LOG.debug("Finished plane sweep in " + (t2 - t1) + " millis and found " + count + " pairs"); return count; }