Example usage for org.apache.hadoop.mapred Reporter progress

List of usage examples for org.apache.hadoop.mapred Reporter progress

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred Reporter progress.

Prototype

public void progress();

Source Link

Document

Report progress to the Hadoop framework.

Usage

From source file:com.ask.hive.hbase.HiveHBaseTextTableInputFormat.java

License:Apache License

public RecordReader<Text, Text> getRecordReader(InputSplit split, JobConf jobConf, final Reporter reporter)
        throws IOException {

    HBaseSplit hbaseSplit = (HBaseSplit) split;
    TableSplit tableSplit = hbaseSplit.getSplit();
    String hbaseTableName = jobConf.get(HBaseSerDe.HBASE_TABLE_NAME);
    setHTable(new HTable(new HBaseConfiguration(jobConf), Bytes.toBytes(hbaseTableName)));
    String hbaseColumnsMapping = jobConf.get(HBaseSerDe.HBASE_COLUMNS_MAPPING);
    List<String> hbaseColumnFamilies = new ArrayList<String>();
    List<String> hbaseColumnQualifiers = new ArrayList<String>();
    List<byte[]> hbaseColumnFamiliesBytes = new ArrayList<byte[]>();
    List<byte[]> hbaseColumnQualifiersBytes = new ArrayList<byte[]>();

    int iKey;/*www. j av a 2  s.  c  om*/
    try {
        iKey = parseColumnMapping(hbaseColumnsMapping, hbaseColumnFamilies, hbaseColumnFamiliesBytes,
                hbaseColumnQualifiers, hbaseColumnQualifiersBytes);
    } catch (Exception se) {
        throw new IOException(se);
    }
    List<Integer> readColIDs = ColumnProjectionUtils.getReadColumnIDs(jobConf);

    if (hbaseColumnFamilies.size() < readColIDs.size()) {
        throw new IOException("Cannot read more columns than the given table contains.");
    }

    boolean addAll = (readColIDs.size() == 0);
    Scan scan = new Scan();
    boolean empty = true;

    if (!addAll) {
        for (int i : readColIDs) {
            if (i == iKey) {
                continue;
            }
            scan.addFamily(hbaseColumnFamiliesBytes.get(i));
            empty = false;
        }
    }

    // The HBase table's row key maps to a Hive table column. In the corner case when only the
    // row key column is selected in Hive, the HBase Scan will be empty i.e. no column family/
    // column qualifier will have been added to the scan. We arbitrarily add at least one column
    // to the HBase scan so that we can retrieve all of the row keys and return them as the Hive
    // tables column projection.
    if (empty) {
        for (int i = 0; i < hbaseColumnFamilies.size(); i++) {
            if (i == iKey) {
                continue;
            }

            if (hbaseColumnQualifiers.get(i) == null) {
                scan.addFamily(hbaseColumnFamiliesBytes.get(i));
            } else {
                scan.addColumn(hbaseColumnFamiliesBytes.get(i), hbaseColumnQualifiersBytes.get(i));
            }

            if (!addAll) {
                break;
            }
        }
    }

    //setting start and end time for scanning
    setTime(jobConf, scan);
    // If Hive's optimizer gave us a filter to process, convert it to the
    // HBase scan form now.
    tableSplit = convertFilter(jobConf, scan, tableSplit, iKey);

    setScan(scan);

    Job job = new Job(jobConf);
    TaskAttemptContext tac = new TaskAttemptContext(job.getConfiguration(), new TaskAttemptID()) {

        @Override
        public void progress() {
            reporter.progress();
        }
    };

    final org.apache.hadoop.mapreduce.RecordReader<ImmutableBytesWritable, Result> recordReader = createRecordReader(
            tableSplit, tac);

    return new RecordReader<Text, Text>() {

        //@Override
        public void close() throws IOException {
            recordReader.close();
        }

        // @Override
        public Text createKey() {
            return new Text();
        }

        // @Override
        public Text createValue() {
            return new Text();
        }

        // @Override
        public long getPos() throws IOException {
            return 0;
        }

        // @Override
        public float getProgress() throws IOException {
            float progress = 0.0F;

            try {
                progress = recordReader.getProgress();
            } catch (InterruptedException e) {
                throw new IOException(e);
            }

            return progress;
        }

        // @Override
        public boolean next(Text rowKey, Text value) throws IOException {

            boolean next = false;

            try {
                next = recordReader.nextKeyValue();

                //logic for to find the column name 
                if (next) {
                    rowKey.set(Bytes.toString(recordReader.getCurrentValue().getRow()));
                    StringBuilder val = new StringBuilder();
                    String prev = "";
                    for (KeyValue kv : recordReader.getCurrentValue().raw()) {
                        String current = new String(kv.getQualifier());
                        char[] col = new String(current).toCharArray();
                        if (val.length() > 0) {
                            if (prev.equals(current))
                                val.append(",");
                            else
                                val.append("\t");
                        }
                        prev = current;
                        val.append(col[0]).append("_");
                        val.append(Bytes.toString(kv.getValue()));
                    }
                    value.set(val.toString()); // rowKey.set(Bytes.toString(recordReader.getCurrentValue().getRow()));;
                    // value.set(Bytes.toString(recordReader.getCurrentValue().value()));
                }
            } catch (InterruptedException e) {
                throw new IOException(e);
            }

            return next;
        }
    };
}

From source file:com.ask.hive.hbase.HiveHBaseTimeTableInputFormat.java

License:Apache License

public RecordReader<ImmutableBytesWritable, Result> getRecordReader(InputSplit split, JobConf jobConf,
        final Reporter reporter) throws IOException {

    HBaseSplit hbaseSplit = (HBaseSplit) split;
    TableSplit tableSplit = hbaseSplit.getSplit();
    String hbaseTableName = jobConf.get(HBaseSerDe.HBASE_TABLE_NAME);
    setHTable(new HTable(new HBaseConfiguration(jobConf), Bytes.toBytes(hbaseTableName)));
    String hbaseColumnsMapping = jobConf.get(HBaseSerDe.HBASE_COLUMNS_MAPPING);
    List<String> hbaseColumnFamilies = new ArrayList<String>();
    List<String> hbaseColumnQualifiers = new ArrayList<String>();
    List<byte[]> hbaseColumnFamiliesBytes = new ArrayList<byte[]>();
    List<byte[]> hbaseColumnQualifiersBytes = new ArrayList<byte[]>();

    int iKey;/*  w  w  w  .j a  v  a2s. co  m*/
    try {
        iKey = HBaseSerDe.parseColumnMapping(hbaseColumnsMapping, hbaseColumnFamilies, hbaseColumnFamiliesBytes,
                hbaseColumnQualifiers, hbaseColumnQualifiersBytes);
    } catch (SerDeException se) {
        throw new IOException(se);
    }
    List<Integer> readColIDs = ColumnProjectionUtils.getReadColumnIDs(jobConf);

    if (hbaseColumnFamilies.size() < readColIDs.size()) {
        throw new IOException("Cannot read more columns than the given table contains.");
    }

    boolean addAll = (readColIDs.size() == 0);
    Scan scan = new Scan();
    boolean empty = true;

    if (!addAll) {
        for (int i : readColIDs) {
            if (i == iKey) {
                continue;
            }

            if (hbaseColumnQualifiers.get(i) == null) {
                scan.addFamily(hbaseColumnFamiliesBytes.get(i));
            } else {
                scan.addColumn(hbaseColumnFamiliesBytes.get(i), hbaseColumnQualifiersBytes.get(i));
            }

            empty = false;
        }
    }

    // The HBase table's row key maps to a Hive table column. In the corner case when only the
    // row key column is selected in Hive, the HBase Scan will be empty i.e. no column family/
    // column qualifier will have been added to the scan. We arbitrarily add at least one column
    // to the HBase scan so that we can retrieve all of the row keys and return them as the Hive
    // tables column projection.
    if (empty) {
        for (int i = 0; i < hbaseColumnFamilies.size(); i++) {
            if (i == iKey) {
                continue;
            }

            if (hbaseColumnQualifiers.get(i) == null) {
                scan.addFamily(hbaseColumnFamiliesBytes.get(i));
            } else {
                scan.addColumn(hbaseColumnFamiliesBytes.get(i), hbaseColumnQualifiersBytes.get(i));
            }

            if (!addAll) {
                break;
            }
        }
    }

    //setting start and end time for scanning
    setTime(jobConf, scan);
    // If Hive's optimizer gave us a filter to process, convert it to the
    // HBase scan form now.
    tableSplit = convertFilter(jobConf, scan, tableSplit, iKey);

    setScan(scan);

    Job job = new Job(jobConf);
    TaskAttemptContext tac = new TaskAttemptContext(job.getConfiguration(), new TaskAttemptID()) {

        @Override
        public void progress() {
            reporter.progress();
        }
    };

    final org.apache.hadoop.mapreduce.RecordReader<ImmutableBytesWritable, Result> recordReader = createRecordReader(
            tableSplit, tac);

    return new RecordReader<ImmutableBytesWritable, Result>() {

        //@Override
        public void close() throws IOException {
            recordReader.close();
        }

        // @Override
        public ImmutableBytesWritable createKey() {
            return new ImmutableBytesWritable();
        }

        // @Override
        public Result createValue() {
            return new Result();
        }

        // @Override
        public long getPos() throws IOException {
            return 0;
        }

        // @Override
        public float getProgress() throws IOException {
            float progress = 0.0F;

            try {
                progress = recordReader.getProgress();
            } catch (InterruptedException e) {
                throw new IOException(e);
            }

            return progress;
        }

        // @Override
        public boolean next(ImmutableBytesWritable rowKey, Result value) throws IOException {

            boolean next = false;

            try {
                next = recordReader.nextKeyValue();

                if (next) {
                    rowKey.set(recordReader.getCurrentValue().getRow());
                    Writables.copyWritable(recordReader.getCurrentValue(), value);
                }
            } catch (InterruptedException e) {
                throw new IOException(e);
            }

            return next;
        }
    };
}

From source file:com.clojurewerkz.cascading.cassandra.hadoop.ColumnFamilyInputFormat.java

License:Apache License

public org.apache.hadoop.mapred.RecordReader<ByteBuffer, SortedMap<ByteBuffer, IColumn>> getRecordReader(
        org.apache.hadoop.mapred.InputSplit split, JobConf jobConf, final Reporter reporter)
        throws IOException {
    TaskAttemptContext tac = new TaskAttemptContext(jobConf,
            TaskAttemptID.forName(jobConf.get(MAPRED_TASK_ID))) {
        @Override//from   ww w . jav a 2  s.c  om
        public void progress() {
            reporter.progress();
        }
    };

    ColumnFamilyRecordReader recordReader = new ColumnFamilyRecordReader(
            jobConf.getInt(CASSANDRA_HADOOP_MAX_KEY_SIZE, CASSANDRA_HADOOP_MAX_KEY_SIZE_DEFAULT));
    recordReader.initialize((org.apache.hadoop.mapreduce.InputSplit) split, tac);
    return recordReader;
}

From source file:com.ebay.erl.mobius.core.mapred.DefaultMobiusCombiner.java

License:Apache License

@Override
public void joinreduce(Tuple key, DataJoinValueGroup<Tuple> values,
        OutputCollector<DataJoinKey, DataJoinValue> output, Reporter reporter) throws IOException {
    if (!reporterSet) {
        for (Projectable p : this._projections) {
            p.setReporter(reporter);//  w  ww . j  a v a2 s.c  om
        }
        reporterSet = true;
    }

    if (values.hasNext()) {
        // reset group function results.
        if (groupFunctionResults.size() > 0) {
            for (GroupFunction func : this.groupFunctionResults.keySet()) {
                this.groupFunctionResults.get(func).clear();
                func.reset();
            }
        }

        Byte datasetID = values.nextDatasetID();

        if (!key.hasSchema()) {
            key.setSchema(this.getKeySchemaByDatasetID(datasetID));
        }

        Iterator<Tuple> tuples = values.next();

        Tuple combinedValue = new Tuple();

        long progress = 0L;
        while (tuples.hasNext()) {
            Tuple aTuple = tuples.next();
            if (++progress % 3000 == 0) {
                reporter.progress();
            }
            aTuple.setSchema(this.getValueSchemaByDatasetID(datasetID));

            for (Projectable p : this.dsToFuncsMapping.get(datasetID)) {
                if (p instanceof GroupFunction) {
                    ((GroupFunction) p).consume(aTuple);
                } else {
                    ExtendFunction func = (ExtendFunction) p;
                    Tuple computedResult = func.getResult(aTuple);

                    String name = func.getInputColumns()[0].getInputColumnName();
                    combinedValue.insert(name, computedResult.get(0));
                }
            }
        }

        for (Projectable p : this.dsToFuncsMapping.get(datasetID)) {
            if (p instanceof GroupFunction) {
                BigTupleList aggregatedResult = ((GroupFunction) p).getResult();
                if (aggregatedResult.size() == 1) {
                    Tuple aggResult = aggregatedResult.getFirst();
                    String name = p.getInputColumns()[0].getInputColumnName();
                    combinedValue.insert(name, aggResult.get(0));
                } else if (aggregatedResult.size() > 1)
                    throw new IllegalArgumentException(
                            p.toString() + " is a group function that generates " + "more than one rows ("
                                    + aggregatedResult.size() + ") per key, so it is not combinable.");
            }
        }

        DataJoinKey outKey = new DataJoinKey(datasetID, key);
        DataJoinValue outValue = new DataJoinValue(datasetID, combinedValue);
        output.collect(outKey, outValue);
    }
}

From source file:com.hyperiongray.ccmr.s3wordcount.WordCountOnlyMapper.java

License:Apache License

public void map(Object key, Text value, OutputCollector<NullWritable, Text> outputCollector, Reporter reporter)
        throws IOException {

    // We're accessing a publicly available bucket so don't need to fill in
    // our credentials
    ArchiveReader ar;//w w  w.jav a 2s. c  o m
    try {
        S3Service s3s = new RestS3Service(null);

        // Let's grab a file out of the CommonCrawl S3 bucket
        String fn = value.toString();
        logger.info(fn);

        S3Object f = s3s.getObject("aws-publicdatasets", fn, null, null, null, null, null, null);

        // The file name identifies the ArchiveReader and indicates if it
        // should be decompressed
        ar = WARCReaderFactory.get(fn, f.getDataInputStream(), true);

    } catch (ServiceException e) {
        logger.error("S3 connection Failed", e);
        throw new RuntimeException(e);
    }

    // Once we have an ArchiveReader, we can work through each of the
    // records it contains
    int i = 0;
    logger.info("Started" + new Date());
    for (ArchiveRecord r : ar) {

        reporter.progress();
        String url = "";
        try {

            // The header file contains information such as the type of
            // record, size, creation time, and URL
            url = r.getHeader().getUrl();
            String crawledDate = r.getHeader().getDate();
            if (url == null)
                continue;

            // If we want to read the contents of the record, we can use the
            // ArchiveRecord as an InputStream
            // Create a byte array that is as long as all the record's
            // stated length
            OutputStream os = new ByteArrayOutputStream();
            try {
                r.dump(os);
            } finally {
                try {
                    if (r != null)
                        r.close();
                } catch (Exception e) {
                    logger.error("reading inputstream Failed", e);
                }
            }
            // Note: potential optimization would be to have a large buffer
            // only allocated once

            // Why don't we convert it to a string and print the start of
            // it?
            String content = new String(os.toString());

            Map<String, Integer> matches = contentMatcher.matchContent(content);
            int score = contentMatcher.score(matches);

            if (score > LOWER_SCORE_THRESHOLD) {

                logger.info("****************************************");
                logger.info("URL: " + url + " Score: " + score + " Detail: " + matches);
                // outputCollector.collect(new IntWritable(score), new Text(url));
                outputCollector.collect(NullWritable.get(), new Text(outputParser
                        .parse(contentMatcher.getTitle(content), url, crawledDate, score, matches)));
            }

            logger.debug(new Integer(i).toString());

            if (i++ > sampleSize) {
                logger.info("Finished " + new Date());
                break;
            }

        } catch (Exception e) {
            logger.error("url failed " + url, e);
        }
    }

}

From source file:com.liveramp.hank.hadoop.DomainBuilderMapper.java

License:Apache License

public final void map(K key, V value,
        OutputCollector<KeyAndPartitionWritableComparable, ValueWritable> outputCollector, Reporter reporter)
        throws IOException {
    KeyValuePair keyValue = buildHankKeyValue(key, value);
    KeyAndPartitionWritableComparable hankKeyWritableComparable = new KeyAndPartitionWritableComparable(
            storageEngine, partitioner, numPartitions, keyValue.getKey());
    ValueWritable hankValueWritable = new ValueWritable(keyValue.getValue());
    outputCollector.collect(hankKeyWritableComparable, hankValueWritable);
    reporter.progress();
}

From source file:edu.umn.cs.spatialHadoop.core.RTree.java

License:Open Source License

/**
 * Performs a spatial join between records in two R-trees
 * @param R//from ww  w . j  a v a 2  s.  com
 * @param S
 * @param output
 * @return
 * @throws IOException
 * SuppresWarnings("resource") is used because we create LineReaders on the
 * internal data stream of both R and S. We do not want to close the
 * LineReader because it will subsequently close the internal data stream
 * of R and S which is something we want to avoid because both R and S are
 * not created by this function and it should not free these resources.
 */
protected static <S1 extends Shape, S2 extends Shape> int spatialJoinDisk(final RTree<S1> R, final RTree<S2> S,
        final ResultCollector2<S1, S2> output, final Reporter reporter) throws IOException {
    // Reserve locations for nodes MBRs and data offset [start, end)
    final Rectangle[] r_nodes = new Rectangle[R.degree];
    for (int i = 0; i < r_nodes.length; i++)
        r_nodes[i] = new Rectangle();
    final int[] r_data_offset = new int[R.degree + 1];

    final Rectangle[] s_nodes = new Rectangle[S.degree];
    for (int i = 0; i < s_nodes.length; i++)
        s_nodes[i] = new Rectangle();
    final int[] s_data_offset = new int[S.degree + 1];

    PriorityQueue<Long> nodesToJoin = new PriorityQueue<Long>() {
        {
            initialize(R.leafNodeCount + S.leafNodeCount);
        }

        @Override
        protected boolean lessThan(Object a, Object b) {
            return ((Long) a) < ((Long) b);
        }
    };

    nodesToJoin.put(0L);

    LruCache<Integer, Shape[]> r_records_cache = new LruCache<Integer, Shape[]>(R.degree * 2);
    LruCache<Integer, Shape[]> s_records_cache = new LruCache<Integer, Shape[]>(S.degree * R.degree * 4);

    Text line = new Text2();

    int result_count = 0;

    LineReader r_lr = null, s_lr = null;
    // Last offset read from r and s
    int r_last_offset = 0;
    int s_last_offset = 0;

    while (nodesToJoin.size() > 0) {
        long nodes_to_join = nodesToJoin.pop();
        int r_node = (int) (nodes_to_join >>> 32);
        int s_node = (int) (nodes_to_join & 0xFFFFFFFF);
        // Read all R nodes
        int r_mbrsToTest = r_node == 0 ? 1 : R.degree;
        boolean r_leaf = r_node * R.degree + 1 >= R.nodeCount;

        long nodeOffset = NodeSize * r_node;
        R.structure.seek(nodeOffset);

        for (int i = 0; i < r_mbrsToTest; i++) {
            r_data_offset[i] = R.structure.readInt();
            r_nodes[i].readFields(R.structure);
        }
        r_data_offset[r_mbrsToTest] = (r_node + r_mbrsToTest) == R.nodeCount ? R.treeSize
                : R.structure.readInt();

        // Read all S nodes
        int s_mbrsToTest = s_node == 0 ? 1 : S.degree;
        boolean s_leaf = s_node * S.degree + 1 >= S.nodeCount;

        if (r_leaf != s_leaf) {
            // This case happens when the two trees are of different heights
            if (r_leaf)
                r_mbrsToTest = 1;
            else
                s_mbrsToTest = 1;
        }

        nodeOffset = NodeSize * s_node;
        S.structure.seek(nodeOffset);

        for (int i = 0; i < s_mbrsToTest; i++) {
            s_data_offset[i] = S.structure.readInt();
            s_nodes[i].readFields(S.structure);
        }
        s_data_offset[s_mbrsToTest] = (s_node + s_mbrsToTest) == S.nodeCount ? S.treeSize
                : S.structure.readInt();

        // Find overlapping nodes by Cartesian product
        for (int i = 0; i < r_mbrsToTest; i++) {
            for (int j = 0; j < s_mbrsToTest; j++) {
                if (r_nodes[i].isIntersected(s_nodes[j])) {
                    if (r_leaf && s_leaf) {
                        // Reached leaf nodes in both trees. Start comparing records
                        int r_start_offset = r_data_offset[i];
                        int r_end_offset = r_data_offset[i + 1];

                        int s_start_offset = s_data_offset[j];
                        int s_end_offset = s_data_offset[j + 1];

                        ///////////////////////////////////////////////////////////////////
                        // Read or retrieve r_records
                        Shape[] r_records = r_records_cache.get(r_start_offset);
                        if (r_records == null) {
                            int cache_key = r_start_offset;
                            r_records = r_records_cache.popUnusedEntry();
                            if (r_records == null) {
                                r_records = new Shape[R.degree * 2];
                            }

                            // Need to read it from stream
                            if (r_last_offset != r_start_offset) {
                                long seekTo = r_start_offset + R.treeStartOffset;
                                R.data.seek(seekTo);
                                r_lr = new LineReader(R.data);
                            }
                            int record_i = 0;
                            while (r_start_offset < r_end_offset) {
                                r_start_offset += r_lr.readLine(line);
                                if (r_records[record_i] == null)
                                    r_records[record_i] = R.stockObject.clone();
                                r_records[record_i].fromText(line);
                                record_i++;
                            }
                            r_last_offset = r_start_offset;
                            // Nullify other records
                            while (record_i < r_records.length)
                                r_records[record_i++] = null;
                            r_records_cache.put(cache_key, r_records);
                        }

                        // Read or retrieve s_records
                        Shape[] s_records = s_records_cache.get(s_start_offset);
                        if (s_records == null) {
                            int cache_key = s_start_offset;

                            // Need to read it from stream
                            if (s_lr == null || s_last_offset != s_start_offset) {
                                // Need to reposition s_lr (LineReader of S)
                                long seekTo = s_start_offset + S.treeStartOffset;
                                S.data.seek(seekTo);
                                s_lr = new LineReader(S.data);
                            }
                            s_records = s_records_cache.popUnusedEntry();
                            if (s_records == null) {
                                s_records = new Shape[S.degree * 2];
                            }
                            int record_i = 0;
                            while (s_start_offset < s_end_offset) {
                                s_start_offset += s_lr.readLine(line);
                                if (s_records[record_i] == null)
                                    s_records[record_i] = S.stockObject.clone();
                                s_records[record_i].fromText(line);
                                record_i++;
                            }
                            // Nullify other records
                            while (record_i < s_records.length)
                                s_records[record_i++] = null;
                            // Put in cache
                            s_records_cache.put(cache_key, s_records);
                            s_last_offset = s_start_offset;
                        }

                        // Do Cartesian product between records to find overlapping pairs
                        for (int i_r = 0; i_r < r_records.length && r_records[i_r] != null; i_r++) {
                            for (int i_s = 0; i_s < s_records.length && s_records[i_s] != null; i_s++) {
                                if (r_records[i_r].isIntersected(s_records[i_s])
                                        && !r_records[i_r].equals(s_records[i_s])) {
                                    result_count++;
                                    if (output != null) {
                                        output.collect((S1) r_records[i_r], (S2) s_records[i_s]);
                                    }
                                }
                            }
                        }
                        ///////////////////////////////////////////////////////////////////

                    } else {
                        // Add a new pair to node pairs to be tested
                        // Go down one level if possible
                        int new_r_node, new_s_node;
                        if (!r_leaf) {
                            new_r_node = (r_node + i) * R.degree + 1;
                        } else {
                            new_r_node = r_node + i;
                        }
                        if (!s_leaf) {
                            new_s_node = (s_node + j) * S.degree + 1;
                        } else {
                            new_s_node = s_node + j;
                        }
                        long new_pair = (((long) new_r_node) << 32) | new_s_node;
                        nodesToJoin.put(new_pair);
                    }
                }
            }
        }
        reporter.progress();
    }
    return result_count;
}

From source file:edu.umn.cs.spatialHadoop.core.RectangleNN.java

License:Open Source License

/**
 * @param R/*  w  ww . ja va  2s.  c o  m*/
 * @param S
 * @param output
 * @return
 * @throws IOException
 */
public static <S1 extends Shape, S2 extends Shape> int SpatialJoin_planeSweep(List<S1> R, List<S2> S,
        ResultCollector2<S1, S2> output, Reporter reporter) throws IOException {
    int count = 0;

    Comparator<Shape> comparator = new Comparator<Shape>() {
        @Override
        public int compare(Shape o1, Shape o2) {
            if (o1.getMBR().x1 == o2.getMBR().x1)
                return 0;
            return o1.getMBR().x1 < o2.getMBR().x1 ? -1 : 1;
        }
    };

    long t1 = System.currentTimeMillis();
    LOG.debug("Joining lists " + R.size() + " with " + S.size());
    Collections.sort(R, comparator);
    Collections.sort(S, comparator);

    int i = 0, j = 0;

    try {
        while (i < R.size() && j < S.size()) {
            S1 r;
            S2 s;
            if (comparator.compare(R.get(i), S.get(j)) < 0) {
                r = R.get(i);
                int jj = j;

                while ((jj < S.size()) && ((s = S.get(jj)).getMBR().x1 <= r.getMBR().x2)) {
                    // Check if r and s are overlapping but not the same object
                    // for self join
                    if (r.isIntersected(s) && !r.equals(s)) {
                        if (output != null)
                            output.collect(r, s);
                        count++;
                    }
                    jj++;
                    if (reporter != null)
                        reporter.progress();
                }
                i++;
            } else {
                s = S.get(j);
                int ii = i;

                while ((ii < R.size()) && ((r = R.get(ii)).getMBR().x1 <= s.getMBR().x2)) {
                    if (r.isIntersected(s) && !r.equals(s)) {
                        if (output != null)
                            output.collect(r, s);
                        count++;
                    }
                    ii++;
                    if (reporter != null)
                        reporter.progress();
                }
                j++;
            }
            if (reporter != null)
                reporter.progress();
        }
    } catch (RuntimeException e) {
        e.printStackTrace();
    }
    long t2 = System.currentTimeMillis();
    LOG.debug("Finished plane sweep in " + (t2 - t1) + " millis and found " + count + " pairs");
    return count;
}

From source file:edu.umn.cs.spatialHadoop.core.RectangleNN.java

License:Open Source License

public static <S1 extends Shape, S2 extends Shape> int SpatialJoin_planeSweepFilterOnly(final S1[] R,
        final S2[] S, ResultCollector2<S1, S2> output, Reporter reporter) {
    int count = 0;

    final Comparator<Shape> comparator = new Comparator<Shape>() {
        @Override//from w  ww. j a v a  2 s . co  m
        public int compare(Shape o1, Shape o2) {
            if (o1.getMBR().x1 == o2.getMBR().x1)
                return 0;
            return o1.getMBR().x1 < o2.getMBR().x1 ? -1 : 1;
        }
    };

    long t1 = System.currentTimeMillis();
    LOG.debug("Joining arrays " + R.length + " with " + S.length);
    Arrays.sort(R, comparator);
    Arrays.sort(S, comparator);

    int i = 0, j = 0;

    try {
        while (i < R.length && j < S.length) {
            S1 r;
            S2 s;
            if (comparator.compare(R[i], S[j]) < 0) {
                r = R[i];
                int jj = j;

                while ((jj < S.length) && ((s = S[jj]).getMBR().x1 <= r.getMBR().x2)) {
                    if (r.getMBR().isIntersected(s.getMBR())) {
                        if (output != null)
                            output.collect(r, s);
                        count++;
                    }
                    jj++;

                    if (reporter != null)
                        reporter.progress();
                }
                i++;
            } else {
                s = S[j];
                int ii = i;

                while ((ii < R.length) && ((r = R[ii]).getMBR().x1 <= s.getMBR().x2)) {
                    if (r.getMBR().isIntersected(s.getMBR())) {
                        if (output != null)
                            output.collect(r, s);
                        count++;
                    }
                    ii++;
                }
                j++;
                if (reporter != null)
                    reporter.progress();
            }
            if (reporter != null)
                reporter.progress();
        }
    } catch (RuntimeException e) {
        e.printStackTrace();
    }
    long t2 = System.currentTimeMillis();
    LOG.debug("Finished plane sweep filter only in " + (t2 - t1) + " millis and found " + count + " pairs");
    return count;
}

From source file:edu.umn.cs.spatialHadoop.core.RectangleNN.java

License:Open Source License

public static <S1 extends Shape, S2 extends Shape> int SpatialJoin_planeSweep(final S1[] R, final S2[] S,
        ResultCollector2<S1, S2> output, Reporter reporter) {
    int count = 0;

    final Comparator<Shape> comparator = new Comparator<Shape>() {
        @Override/*w  w  w .java 2 s  .c  om*/
        public int compare(Shape o1, Shape o2) {
            if (o1.getMBR().x1 == o2.getMBR().x1)
                return 0;
            return o1.getMBR().x1 < o2.getMBR().x1 ? -1 : 1;
        }
    };

    long t1 = System.currentTimeMillis();
    LOG.debug("Joining arrays " + R.length + " with " + S.length);
    Arrays.sort(R, comparator);
    Arrays.sort(S, comparator);

    int i = 0, j = 0;

    try {
        while (i < R.length && j < S.length) {
            S1 r;
            S2 s;
            if (comparator.compare(R[i], S[j]) < 0) {
                r = R[i];
                int jj = j;

                while ((jj < S.length) && ((s = S[jj]).getMBR().x1 <= r.getMBR().x2)) {
                    if (r.isIntersected(s)) {
                        if (output != null)
                            output.collect(r, s);
                        count++;
                    }
                    jj++;
                    if (reporter != null)
                        reporter.progress();
                }
                i++;
            } else {
                s = S[j];
                int ii = i;

                while ((ii < R.length) && ((r = R[ii]).getMBR().x1 <= s.getMBR().x2)) {
                    if (r.isIntersected(s)) {
                        if (output != null)
                            output.collect(r, s);
                        count++;
                    }
                    ii++;
                    if (reporter != null)
                        reporter.progress();
                }
                j++;
            }
            if (reporter != null)
                reporter.progress();
        }
    } catch (RuntimeException e) {
        e.printStackTrace();
    }
    long t2 = System.currentTimeMillis();
    LOG.debug("Finished plane sweep in " + (t2 - t1) + " millis and found " + count + " pairs");
    return count;
}