Example usage for org.apache.hadoop.mapred Reporter progress

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred Reporter progress.

Prototype

public void progress();

Source Link

Document

Report progress to the Hadoop framework.

Usage

From source file:com.ask.hive.hbase.HiveHBaseTextTableInputFormat.java

License:Apache License

public RecordReader<Text, Text> getRecordReader(InputSplit split, JobConf jobConf, final Reporter reporter)
        throws IOException {

    HBaseSplit hbaseSplit = (HBaseSplit) split;
    TableSplit tableSplit = hbaseSplit.getSplit();
    String hbaseTableName = jobConf.get(HBaseSerDe.HBASE_TABLE_NAME);
    setHTable(new HTable(new HBaseConfiguration(jobConf), Bytes.toBytes(hbaseTableName)));
    String hbaseColumnsMapping = jobConf.get(HBaseSerDe.HBASE_COLUMNS_MAPPING);
    List<String> hbaseColumnFamilies = new ArrayList<String>();
    List<String> hbaseColumnQualifiers = new ArrayList<String>();
    List<byte[]> hbaseColumnFamiliesBytes = new ArrayList<byte[]>();
    List<byte[]> hbaseColumnQualifiersBytes = new ArrayList<byte[]>();

    int iKey;/*www. j av a 2  s.  c  om*/
    try {
        iKey = parseColumnMapping(hbaseColumnsMapping, hbaseColumnFamilies, hbaseColumnFamiliesBytes,
                hbaseColumnQualifiers, hbaseColumnQualifiersBytes);
    } catch (Exception se) {
        throw new IOException(se);
    }
    List<Integer> readColIDs = ColumnProjectionUtils.getReadColumnIDs(jobConf);

    if (hbaseColumnFamilies.size() < readColIDs.size()) {
        throw new IOException("Cannot read more columns than the given table contains.");
    }

    boolean addAll = (readColIDs.size() == 0);
    Scan scan = new Scan();
    boolean empty = true;

    if (!addAll) {
        for (int i : readColIDs) {
            if (i == iKey) {
                continue;
            }
            scan.addFamily(hbaseColumnFamiliesBytes.get(i));
            empty = false;
        }
    }

    // The HBase table's row key maps to a Hive table column. In the corner case when only the
    // row key column is selected in Hive, the HBase Scan will be empty i.e. no column family/
    // column qualifier will have been added to the scan. We arbitrarily add at least one column
    // to the HBase scan so that we can retrieve all of the row keys and return them as the Hive
    // tables column projection.
    if (empty) {
        for (int i = 0; i < hbaseColumnFamilies.size(); i++) {
            if (i == iKey) {
                continue;
            }

            if (hbaseColumnQualifiers.get(i) == null) {
                scan.addFamily(hbaseColumnFamiliesBytes.get(i));
            } else {
                scan.addColumn(hbaseColumnFamiliesBytes.get(i), hbaseColumnQualifiersBytes.get(i));
            }

            if (!addAll) {
                break;
            }
        }
    }

    //setting start and end time for scanning
    setTime(jobConf, scan);
    // If Hive's optimizer gave us a filter to process, convert it to the
    // HBase scan form now.
    tableSplit = convertFilter(jobConf, scan, tableSplit, iKey);

    setScan(scan);

    Job job = new Job(jobConf);
    TaskAttemptContext tac = new TaskAttemptContext(job.getConfiguration(), new TaskAttemptID()) {

        @Override
        public void progress() {
            reporter.progress();
        }
    };

    final org.apache.hadoop.mapreduce.RecordReader<ImmutableBytesWritable, Result> recordReader = createRecordReader(
            tableSplit, tac);

    return new RecordReader<Text, Text>() {

        //@Override
        public void close() throws IOException {
            recordReader.close();
        }

        // @Override
        public Text createKey() {
            return new Text();
        }

        // @Override
        public Text createValue() {
            return new Text();
        }

        // @Override
        public long getPos() throws IOException {
            return 0;
        }

        // @Override
        public float getProgress() throws IOException {
            float progress = 0.0F;

            try {
                progress = recordReader.getProgress();
            } catch (InterruptedException e) {
                throw new IOException(e);
            }

            return progress;
        }

        // @Override
        public boolean next(Text rowKey, Text value) throws IOException {

            boolean next = false;

            try {
                next = recordReader.nextKeyValue();

                //logic for to find the column name 
                if (next) {
                    rowKey.set(Bytes.toString(recordReader.getCurrentValue().getRow()));
                    StringBuilder val = new StringBuilder();
                    String prev = "";
                    for (KeyValue kv : recordReader.getCurrentValue().raw()) {
                        String current = new String(kv.getQualifier());
                        char[] col = new String(current).toCharArray();
                        if (val.length() > 0) {
                            if (prev.equals(current))
                                val.append(",");
                            else
                                val.append("\t");
                        }
                        prev = current;
                        val.append(col[0]).append("_");
                        val.append(Bytes.toString(kv.getValue()));
                    }
                    value.set(val.toString()); // rowKey.set(Bytes.toString(recordReader.getCurrentValue().getRow()));;
                    // value.set(Bytes.toString(recordReader.getCurrentValue().value()));
                }
            } catch (InterruptedException e) {
                throw new IOException(e);
            }

            return next;
        }
    };
}

From source file:com.ask.hive.hbase.HiveHBaseTimeTableInputFormat.java

License:Apache License

public RecordReader<ImmutableBytesWritable, Result> getRecordReader(InputSplit split, JobConf jobConf,
        final Reporter reporter) throws IOException {

    HBaseSplit hbaseSplit = (HBaseSplit) split;
    TableSplit tableSplit = hbaseSplit.getSplit();
    String hbaseTableName = jobConf.get(HBaseSerDe.HBASE_TABLE_NAME);
    setHTable(new HTable(new HBaseConfiguration(jobConf), Bytes.toBytes(hbaseTableName)));
    String hbaseColumnsMapping = jobConf.get(HBaseSerDe.HBASE_COLUMNS_MAPPING);
    List<String> hbaseColumnFamilies = new ArrayList<String>();
    List<String> hbaseColumnQualifiers = new ArrayList<String>();
    List<byte[]> hbaseColumnFamiliesBytes = new ArrayList<byte[]>();
    List<byte[]> hbaseColumnQualifiersBytes = new ArrayList<byte[]>();

    int iKey;/*  w  w  w  .j a  v  a2s. co  m*/
    try {
        iKey = HBaseSerDe.parseColumnMapping(hbaseColumnsMapping, hbaseColumnFamilies, hbaseColumnFamiliesBytes,
                hbaseColumnQualifiers, hbaseColumnQualifiersBytes);
    } catch (SerDeException se) {
        throw new IOException(se);
    }
    List<Integer> readColIDs = ColumnProjectionUtils.getReadColumnIDs(jobConf);

    if (hbaseColumnFamilies.size() < readColIDs.size()) {
        throw new IOException("Cannot read more columns than the given table contains.");
    }

    boolean addAll = (readColIDs.size() == 0);
    Scan scan = new Scan();
    boolean empty = true;

    if (!addAll) {
        for (int i : readColIDs) {
            if (i == iKey) {
                continue;
            }

            if (hbaseColumnQualifiers.get(i) == null) {
                scan.addFamily(hbaseColumnFamiliesBytes.get(i));
            } else {
                scan.addColumn(hbaseColumnFamiliesBytes.get(i), hbaseColumnQualifiersBytes.get(i));
            }

            empty = false;
        }
    }

    // The HBase table's row key maps to a Hive table column. In the corner case when only the
    // row key column is selected in Hive, the HBase Scan will be empty i.e. no column family/
    // column qualifier will have been added to the scan. We arbitrarily add at least one column
    // to the HBase scan so that we can retrieve all of the row keys and return them as the Hive
    // tables column projection.
    if (empty) {
        for (int i = 0; i < hbaseColumnFamilies.size(); i++) {
            if (i == iKey) {
                continue;
            }

            if (hbaseColumnQualifiers.get(i) == null) {
                scan.addFamily(hbaseColumnFamiliesBytes.get(i));
            } else {
                scan.addColumn(hbaseColumnFamiliesBytes.get(i), hbaseColumnQualifiersBytes.get(i));
            }

            if (!addAll) {
                break;
            }
        }
    }

    //setting start and end time for scanning
    setTime(jobConf, scan);
    // If Hive's optimizer gave us a filter to process, convert it to the
    // HBase scan form now.
    tableSplit = convertFilter(jobConf, scan, tableSplit, iKey);

    setScan(scan);

    Job job = new Job(jobConf);
    TaskAttemptContext tac = new TaskAttemptContext(job.getConfiguration(), new TaskAttemptID()) {

        @Override
        public void progress() {
            reporter.progress();
        }
    };

    final org.apache.hadoop.mapreduce.RecordReader<ImmutableBytesWritable, Result> recordReader = createRecordReader(
            tableSplit, tac);

    return new RecordReader<ImmutableBytesWritable, Result>() {

        //@Override
        public void close() throws IOException {
            recordReader.close();
        }

        // @Override
        public ImmutableBytesWritable createKey() {
            return new ImmutableBytesWritable();
        }

        // @Override
        public Result createValue() {
            return new Result();
        }

        // @Override
        public long getPos() throws IOException {
            return 0;
        }

        // @Override
        public float getProgress() throws IOException {
            float progress = 0.0F;

            try {
                progress = recordReader.getProgress();
            } catch (InterruptedException e) {
                throw new IOException(e);
            }

            return progress;
        }

        // @Override
        public boolean next(ImmutableBytesWritable rowKey, Result value) throws IOException {

            boolean next = false;

            try {
                next = recordReader.nextKeyValue();

                if (next) {
                    rowKey.set(recordReader.getCurrentValue().getRow());
                    Writables.copyWritable(recordReader.getCurrentValue(), value);
                }
            } catch (InterruptedException e) {
                throw new IOException(e);
            }

            return next;
        }
    };
}

From source file:com.clojurewerkz.cascading.cassandra.hadoop.ColumnFamilyInputFormat.java

License:Apache License

public org.apache.hadoop.mapred.RecordReader<ByteBuffer, SortedMap<ByteBuffer, IColumn>> getRecordReader(
        org.apache.hadoop.mapred.InputSplit split, JobConf jobConf, final Reporter reporter)
        throws IOException {
    TaskAttemptContext tac = new TaskAttemptContext(jobConf,
            TaskAttemptID.forName(jobConf.get(MAPRED_TASK_ID))) {
        @Override//from   ww w . jav a 2  s.c  om
        public void progress() {
            reporter.progress();
        }
    };

    ColumnFamilyRecordReader recordReader = new ColumnFamilyRecordReader(
            jobConf.getInt(CASSANDRA_HADOOP_MAX_KEY_SIZE, CASSANDRA_HADOOP_MAX_KEY_SIZE_DEFAULT));
    recordReader.initialize((org.apache.hadoop.mapreduce.InputSplit) split, tac);
    return recordReader;
}

From source file:com.ebay.erl.mobius.core.mapred.DefaultMobiusCombiner.java

License:Apache License

@Override
public void joinreduce(Tuple key, DataJoinValueGroup<Tuple> values,
        OutputCollector<DataJoinKey, DataJoinValue> output, Reporter reporter) throws IOException {
    if (!reporterSet) {
        for (Projectable p : this._projections) {
            p.setReporter(reporter);//  w  ww . j  a v a2 s.c  om
        }
        reporterSet = true;
    }

    if (values.hasNext()) {
        // reset group function results.
        if (groupFunctionResults.size() > 0) {
            for (GroupFunction func : this.groupFunctionResults.keySet()) {
                this.groupFunctionResults.get(func).clear();
                func.reset();
            }
        }

        Byte datasetID = values.nextDatasetID();

        if (!key.hasSchema()) {
            key.setSchema(this.getKeySchemaByDatasetID(datasetID));
        }

        Iterator<Tuple> tuples = values.next();

        Tuple combinedValue = new Tuple();

        long progress = 0L;
        while (tuples.hasNext()) {
            Tuple aTuple = tuples.next();
            if (++progress % 3000 == 0) {
                reporter.progress();
            }
            aTuple.setSchema(this.getValueSchemaByDatasetID(datasetID));

            for (Projectable p : this.dsToFuncsMapping.get(datasetID)) {
                if (p instanceof GroupFunction) {
                    ((GroupFunction) p).consume(aTuple);
                } else {
                    ExtendFunction func = (ExtendFunction) p;
                    Tuple computedResult = func.getResult(aTuple);

                    String name = func.getInputColumns()[0].getInputColumnName();
                    combinedValue.insert(name, computedResult.get(0));
                }
            }
        }

        for (Projectable p : this.dsToFuncsMapping.get(datasetID)) {
            if (p instanceof GroupFunction) {
                BigTupleList aggregatedResult = ((GroupFunction) p).getResult();
                if (aggregatedResult.size() == 1) {
                    Tuple aggResult = aggregatedResult.getFirst();
                    String name = p.getInputColumns()[0].getInputColumnName();
                    combinedValue.insert(name, aggResult.get(0));
                } else if (aggregatedResult.size() > 1)
                    throw new IllegalArgumentException(
                            p.toString() + " is a group function that generates " + "more than one rows ("
                                    + aggregatedResult.size() + ") per key, so it is not combinable.");
            }
        }

        DataJoinKey outKey = new DataJoinKey(datasetID, key);
        DataJoinValue outValue = new DataJoinValue(datasetID, combinedValue);
        output.collect(outKey, outValue);
    }
}

From source file:com.hyperiongray.ccmr.s3wordcount.WordCountOnlyMapper.java

License:Apache License

public void map(Object key, Text value, OutputCollector<NullWritable, Text> outputCollector, Reporter reporter)
        throws IOException {

    // We're accessing a publicly available bucket so don't need to fill in
    // our credentials
    ArchiveReader ar;//w w  w.jav a 2s. c  o m
    try {
        S3Service s3s = new RestS3Service(null);

        // Let's grab a file out of the CommonCrawl S3 bucket
        String fn = value.toString();
        logger.info(fn);

        S3Object f = s3s.getObject("aws-publicdatasets", fn, null, null, null, null, null, null);

        // The file name identifies the ArchiveReader and indicates if it
        // should be decompressed
        ar = WARCReaderFactory.get(fn, f.getDataInputStream(), true);

    } catch (ServiceException e) {
        logger.error("S3 connection Failed", e);
        throw new RuntimeException(e);
    }

    // Once we have an ArchiveReader, we can work through each of the
    // records it contains
    int i = 0;
    logger.info("Started" + new Date());
    for (ArchiveRecord r : ar) {

        reporter.progress();
        String url = "";
        try {

            // The header file contains information such as the type of
            // record, size, creation time, and URL
            url = r.getHeader().getUrl();
            String crawledDate = r.getHeader().getDate();
            if (url == null)
                continue;

            // If we want to read the contents of the record, we can use the
            // ArchiveRecord as an InputStream
            // Create a byte array that is as long as all the record's
            // stated length
            OutputStream os = new ByteArrayOutputStream();
            try {
                r.dump(os);
            } finally {
                try {
                    if (r != null)
                        r.close();
                } catch (Exception e) {
                    logger.error("reading inputstream Failed", e);
                }
            }
            // Note: potential optimization would be to have a large buffer
            // only allocated once

            // Why don't we convert it to a string and print the start of
            // it?
            String content = new String(os.toString());

            Map<String, Integer> matches = contentMatcher.matchContent(content);
            int score = contentMatcher.score(matches);

            if (score > LOWER_SCORE_THRESHOLD) {

                logger.info("****************************************");
                logger.info("URL: " + url + " Score: " + score + " Detail: " + matches);
                // outputCollector.collect(new IntWritable(score), new Text(url));
                outputCollector.collect(NullWritable.get(), new Text(outputParser
                        .parse(contentMatcher.getTitle(content), url, crawledDate, score, matches)));
            }

            logger.debug(new Integer(i).toString());

            if (i++ > sampleSize) {
                logger.info("Finished " + new Date());
                break;
            }

        } catch (Exception e) {
            logger.error("url failed " + url, e);
        }
    }

}

From source file:com.liveramp.hank.hadoop.DomainBuilderMapper.java

License:Apache License

public final void map(K key, V value,
        OutputCollector<KeyAndPartitionWritableComparable, ValueWritable> outputCollector, Reporter reporter)
        throws IOException {
    KeyValuePair keyValue = buildHankKeyValue(key, value);
    KeyAndPartitionWritableComparable hankKeyWritableComparable = new KeyAndPartitionWritableComparable(
            storageEngine, partitioner, numPartitions, keyValue.getKey());
    ValueWritable hankValueWritable = new ValueWritable(keyValue.getValue());
    outputCollector.collect(hankKeyWritableComparable, hankValueWritable);
    reporter.progress();
}

From source file:edu.umn.cs.spatialHadoop.core.RTree.java

License:Open Source License

/**
 * Performs a spatial join between records in two R-trees
 * @param R//from ww  w . j  a v a 2  s.  com
 * @param S
 * @param output
 * @return
 * @throws IOException
 * SuppresWarnings("resource") is used because we create LineReaders on the
 * internal data stream of both R and S. We do not want to close the
 * LineReader because it will subsequently close the internal data stream
 * of R and S which is something we want to avoid because both R and S are
 * not created by this function and it should not free these resources.
 */
protected static <S1 extends Shape, S2 extends Shape> int spatialJoinDisk(final RTree<S1> R, final RTree<S2> S,
        final ResultCollector2<S1, S2> output, final Reporter reporter) throws IOException {
    // Reserve locations for nodes MBRs and data offset [start, end)
    final Rectangle[] r_nodes = new Rectangle[R.degree];
    for (int i = 0; i < r_nodes.length; i++)
        r_nodes[i] = new Rectangle();
    final int[] r_data_offset = new int[R.degree + 1];

    final Rectangle[] s_nodes = new Rectangle[S.degree];
    for (int i = 0; i < s_nodes.length; i++)
        s_nodes[i] = new Rectangle();
    final int[] s_data_offset = new int[S.degree + 1];

    PriorityQueue<Long> nodesToJoin = new PriorityQueue<Long>() {
        {
            initialize(R.leafNodeCount + S.leafNodeCount);
        }

        @Override
        protected boolean lessThan(Object a, Object b) {
            return ((Long) a) < ((Long) b);
        }
    };

    nodesToJoin.put(0L);

    LruCache<Integer, Shape[]> r_records_cache = new LruCache<Integer, Shape[]>(R.degree * 2);
    LruCache<Integer, Shape[]> s_records_cache = new LruCache<Integer, Shape[]>(S.degree * R.degree * 4);

    Text line = new Text2();

    int result_count = 0;

    LineReader r_lr = null, s_lr = null;
    // Last offset read from r and s
    int r_last_offset = 0;
    int s_last_offset = 0;

    while (nodesToJoin.size() > 0) {
        long nodes_to_join = nodesToJoin.pop();
        int r_node = (int) (nodes_to_join >>> 32);
        int s_node = (int) (nodes_to_join & 0xFFFFFFFF);
        // Read all R nodes
        int r_mbrsToTest = r_node == 0 ? 1 : R.degree;
        boolean r_leaf = r_node * R.degree + 1 >= R.nodeCount;

        long nodeOffset = NodeSize * r_node;
        R.structure.seek(nodeOffset);

        for (int i = 0; i < r_mbrsToTest; i++) {
            r_data_offset[i] = R.structure.readInt();
            r_nodes[i].readFields(R.structure);
        }
        r_data_offset[r_mbrsToTest] = (r_node + r_mbrsToTest) == R.nodeCount ? R.treeSize
                : R.structure.readInt();

        // Read all S nodes
        int s_mbrsToTest = s_node == 0 ? 1 : S.degree;
        boolean s_leaf = s_node * S.degree + 1 >= S.nodeCount;

        if (r_leaf != s_leaf) {
            // This case happens when the two trees are of different heights
            if (r_leaf)
                r_mbrsToTest = 1;
            else
                s_mbrsToTest = 1;
        }

        nodeOffset = NodeSize * s_node;
        S.structure.seek(nodeOffset);

        for (int i = 0; i < s_mbrsToTest; i++) {
            s_data_offset[i] = S.structure.readInt();
            s_nodes[i].readFields(S.structure);
        }
        s_data_offset[s_mbrsToTest] = (s_node + s_mbrsToTest) == S.nodeCount ? S.treeSize
                : S.structure.readInt();

        // Find overlapping nodes by Cartesian product
        for (int i = 0; i < r_mbrsToTest; i++) {
            for (int j = 0; j < s_mbrsToTest; j++) {
                if (r_nodes[i].isIntersected(s_nodes[j])) {
                    if (r_leaf && s_leaf) {
                        // Reached leaf nodes in both trees. Start comparing records
                        int r_start_offset = r_data_offset[i];
                        int r_end_offset = r_data_offset[i + 1];

                        int s_start_offset = s_data_offset[j];
                        int s_end_offset = s_data_offset[j + 1];

                        ///////////////////////////////////////////////////////////////////
                        // Read or retrieve r_records
                        Shape[] r_records = r_records_cache.get(r_start_offset);
                        if (r_records == null) {
                            int cache_key = r_start_offset;
                            r_records = r_records_cache.popUnusedEntry();
                            if (r_records == null) {
                                r_records = new Shape[R.degree * 2];
                            }

                            // Need to read it from stream
                            if (r_last_offset != r_start_offset) {
                                long seekTo = r_start_offset + R.treeStartOffset;
                                R.data.seek(seekTo);
                                r_lr = new LineReader(R.data);
                            }
                            int record_i = 0;
                            while (r_start_offset < r_end_offset) {
                                r_start_offset += r_lr.readLine(line);
                                if (r_records[record_i] == null)
                                    r_records[record_i] = R.stockObject.clone();
                                r_records[record_i].fromText(line);
                                record_i++;
                            }
                            r_last_offset = r_start_offset;
                            // Nullify other records
                            while (record_i < r_records.length)
                                r_records[record_i++] = null;
                            r_records_cache.put(cache_key, r_records);
                        }

                        // Read or retrieve s_records
                        Shape[] s_records = s_records_cache.get(s_start_offset);
                        if (s_records == null) {
                            int cache_key = s_start_offset;

                            // Need to read it from stream
                            if (s_lr == null || s_last_offset != s_start_offset) {
                                // Need to reposition s_lr (LineReader of S)
                                long seekTo = s_start_offset + S.treeStartOffset;
                                S.data.seek(seekTo);
                                s_lr = new LineReader(S.data);
                            }
                            s_records = s_records_cache.popUnusedEntry();
                            if (s_records == null) {
                                s_records = new Shape[S.degree * 2];
                            }
                            int record_i = 0;
                            while (s_start_offset < s_end_offset) {
                                s_start_offset += s_lr.readLine(line);
                                if (s_records[record_i] == null)
                                    s_records[record_i] = S.stockObject.clone();
                                s_records[record_i].fromText(line);
                                record_i++;
                            }
                            // Nullify other records
                            while (record_i < s_records.length)
                                s_records[record_i++] = null;
                            // Put in cache
                            s_records_cache.put(cache_key, s_records);
                            s_last_offset = s_start_offset;
                        }

                        // Do Cartesian product between records to find overlapping pairs
                        for (int i_r = 0; i_r < r_records.length && r_records[i_r] != null; i_r++) {
                            for (int i_s = 0; i_s < s_records.length && s_records[i_s] != null; i_s++) {
                                if (r_records[i_r].isIntersected(s_records[i_s])
                                        && !r_records[i_r].equals(s_records[i_s])) {
                                    result_count++;
                                    if (output != null) {
                                        output.collect((S1) r_records[i_r], (S2) s_records[i_s]);
                                    }
                                }
                            }
                        }
                        ///////////////////////////////////////////////////////////////////

                    } else {
                        // Add a new pair to node pairs to be tested
                        // Go down one level if possible
                        int new_r_node, new_s_node;
                        if (!r_leaf) {
                            new_r_node = (r_node + i) * R.degree + 1;
                        } else {
                            new_r_node = r_node + i;
                        }
                        if (!s_leaf) {
                            new_s_node = (s_node + j) * S.degree + 1;
                        } else {
                            new_s_node = s_node + j;
                        }
                        long new_pair = (((long) new_r_node) << 32) | new_s_node;
                        nodesToJoin.put(new_pair);
                    }
                }
            }
        }
        reporter.progress();
    }
    return result_count;
}

From source file:edu.umn.cs.spatialHadoop.core.RectangleNN.java

License:Open Source License

/**
 * @param R/*  w  ww . ja va  2s.  c o  m*/
 * @param S
 * @param output
 * @return
 * @throws IOException
 */
public static <S1 extends Shape, S2 extends Shape> int SpatialJoin_planeSweep(List<S1> R, List<S2> S,
        ResultCollector2<S1, S2> output, Reporter reporter) throws IOException {
    int count = 0;

    Comparator<Shape> comparator = new Comparator<Shape>() {
        @Override
        public int compare(Shape o1, Shape o2) {
            if (o1.getMBR().x1 == o2.getMBR().x1)
                return 0;
            return o1.getMBR().x1 < o2.getMBR().x1 ? -1 : 1;
        }
    };

    long t1 = System.currentTimeMillis();
    LOG.debug("Joining lists " + R.size() + " with " + S.size());
    Collections.sort(R, comparator);
    Collections.sort(S, comparator);

    int i = 0, j = 0;

    try {
        while (i < R.size() && j < S.size()) {
            S1 r;
            S2 s;
            if (comparator.compare(R.get(i), S.get(j)) < 0) {
                r = R.get(i);
                int jj = j;

                while ((jj < S.size()) && ((s = S.get(jj)).getMBR().x1 <= r.getMBR().x2)) {
                    // Check if r and s are overlapping but not the same object
                    // for self join
                    if (r.isIntersected(s) && !r.equals(s)) {
                        if (output != null)
                            output.collect(r, s);
                        count++;
                    }
                    jj++;
                    if (reporter != null)
                        reporter.progress();
                }
                i++;
            } else {
                s = S.get(j);
                int ii = i;

                while ((ii < R.size()) && ((r = R.get(ii)).getMBR().x1 <= s.getMBR().x2)) {
                    if (r.isIntersected(s) && !r.equals(s)) {
                        if (output != null)
                            output.collect(r, s);
                        count++;
                    }
                    ii++;
                    if (reporter != null)
                        reporter.progress();
                }
                j++;
            }
            if (reporter != null)
                reporter.progress();
        }
    } catch (RuntimeException e) {
        e.printStackTrace();
    }
    long t2 = System.currentTimeMillis();
    LOG.debug("Finished plane sweep in " + (t2 - t1) + " millis and found " + count + " pairs");
    return count;
}

From source file:edu.umn.cs.spatialHadoop.core.RectangleNN.java

License:Open Source License

public static <S1 extends Shape, S2 extends Shape> int SpatialJoin_planeSweepFilterOnly(final S1[] R,
        final S2[] S, ResultCollector2<S1, S2> output, Reporter reporter) {
    int count = 0;

    final Comparator<Shape> comparator = new Comparator<Shape>() {
        @Override//from w  ww. j a v a  2 s . co  m
        public int compare(Shape o1, Shape o2) {
            if (o1.getMBR().x1 == o2.getMBR().x1)
                return 0;
            return o1.getMBR().x1 < o2.getMBR().x1 ? -1 : 1;
        }
    };

    long t1 = System.currentTimeMillis();
    LOG.debug("Joining arrays " + R.length + " with " + S.length);
    Arrays.sort(R, comparator);
    Arrays.sort(S, comparator);

    int i = 0, j = 0;

    try {
        while (i < R.length && j < S.length) {
            S1 r;
            S2 s;
            if (comparator.compare(R[i], S[j]) < 0) {
                r = R[i];
                int jj = j;

                while ((jj < S.length) && ((s = S[jj]).getMBR().x1 <= r.getMBR().x2)) {
                    if (r.getMBR().isIntersected(s.getMBR())) {
                        if (output != null)
                            output.collect(r, s);
                        count++;
                    }
                    jj++;

                    if (reporter != null)
                        reporter.progress();
                }
                i++;
            } else {
                s = S[j];
                int ii = i;

                while ((ii < R.length) && ((r = R[ii]).getMBR().x1 <= s.getMBR().x2)) {
                    if (r.getMBR().isIntersected(s.getMBR())) {
                        if (output != null)
                            output.collect(r, s);
                        count++;
                    }
                    ii++;
                }
                j++;
                if (reporter != null)
                    reporter.progress();
            }
            if (reporter != null)
                reporter.progress();
        }
    } catch (RuntimeException e) {
        e.printStackTrace();
    }
    long t2 = System.currentTimeMillis();
    LOG.debug("Finished plane sweep filter only in " + (t2 - t1) + " millis and found " + count + " pairs");
    return count;
}

From source file:edu.umn.cs.spatialHadoop.core.RectangleNN.java

License:Open Source License

public static <S1 extends Shape, S2 extends Shape> int SpatialJoin_planeSweep(final S1[] R, final S2[] S,
        ResultCollector2<S1, S2> output, Reporter reporter) {
    int count = 0;

    final Comparator<Shape> comparator = new Comparator<Shape>() {
        @Override/*w  w  w .java 2 s  .c  om*/
        public int compare(Shape o1, Shape o2) {
            if (o1.getMBR().x1 == o2.getMBR().x1)
                return 0;
            return o1.getMBR().x1 < o2.getMBR().x1 ? -1 : 1;
        }
    };

    long t1 = System.currentTimeMillis();
    LOG.debug("Joining arrays " + R.length + " with " + S.length);
    Arrays.sort(R, comparator);
    Arrays.sort(S, comparator);

    int i = 0, j = 0;

    try {
        while (i < R.length && j < S.length) {
            S1 r;
            S2 s;
            if (comparator.compare(R[i], S[j]) < 0) {
                r = R[i];
                int jj = j;

                while ((jj < S.length) && ((s = S[jj]).getMBR().x1 <= r.getMBR().x2)) {
                    if (r.isIntersected(s)) {
                        if (output != null)
                            output.collect(r, s);
                        count++;
                    }
                    jj++;
                    if (reporter != null)
                        reporter.progress();
                }
                i++;
            } else {
                s = S[j];
                int ii = i;

                while ((ii < R.length) && ((r = R[ii]).getMBR().x1 <= s.getMBR().x2)) {
                    if (r.isIntersected(s)) {
                        if (output != null)
                            output.collect(r, s);
                        count++;
                    }
                    ii++;
                    if (reporter != null)
                        reporter.progress();
                }
                j++;
            }
            if (reporter != null)
                reporter.progress();
        }
    } catch (RuntimeException e) {
        e.printStackTrace();
    }
    long t2 = System.currentTimeMillis();
    LOG.debug("Finished plane sweep in " + (t2 - t1) + " millis and found " + count + " pairs");
    return count;
}