solr2155.solr.search.function.GeoHashValueSource.java Source code

Java tutorial

Introduction

Here is the source code for solr2155.solr.search.function.GeoHashValueSource.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package solr2155.solr.search.function;

import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermDocs;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.solr.schema.FieldType;
import org.apache.solr.search.FunctionQParser;
import org.apache.solr.search.SolrIndexReader;
import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.search.function.DocValues;
import org.apache.solr.search.function.MultiValueSource;
import org.apache.solr.search.function.ValueSource;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import solr2155.lucene.TermsEnumCompatibility;
import solr2155.lucene.spatial.geohash.GridNode;
import solr2155.lucene.spatial.geometry.shape.Point2D;
import solr2155.solr.schema.GeoHashField;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;

/**
 * TODO consider moving this to lucene package and remove dependency on Solr.
 * TODO implement single-value data structure differently
 */
public class GeoHashValueSource extends MultiValueSource {

    private static final String CACHE_NAME = "fieldValueCache";//"geoHashValues";

    private static final int DEFAULT_ARRAY_CAPACITY = 5;
    private final String fieldName;

    /** Factory method invoked by {@link org.apache.solr.schema.GeoHashField#getValueSource(org.apache.solr.schema.SchemaField, org.apache.solr.search.QParser)}. */
    public static ValueSource getValueSource(String fieldName, FunctionQParser parser) {
        final SolrIndexSearcher searcher = parser.getReq().getSearcher();
        GeoHashValueSource valueSource = (GeoHashValueSource) searcher.cacheLookup(CACHE_NAME, fieldName);
        if (valueSource == null) {
            try {
                valueSource = new GeoHashValueSource(fieldName, searcher);
            } catch (IOException e) {
                throw new RuntimeException(e);
            }
            searcher.cacheInsert(CACHE_NAME, fieldName, valueSource);
        }
        return valueSource;
    }

    private final Logger log = LoggerFactory.getLogger(getClass());

    /**
     * A multi-value cache across the entire index (all Lucene segments).
     * Admittedly the List<Point2D> part isn't particularly memory efficient or kind to the GC.
     */
    private List<Point2D>[] doc2PointsCache;//index by doc id, then list of points

    @SuppressWarnings({ "unchecked" })
    GeoHashValueSource(String fieldName, SolrIndexSearcher searcher) throws IOException {
        log.info("Loading geohash field " + fieldName + " into memory.");
        this.fieldName = fieldName;

        //Get gridReferenceSystem
        final GridNode.GridReferenceSystem gridReferenceSystem;
        FieldType fieldType = searcher.getSchema().getField(fieldName).getType();
        if (fieldType instanceof GeoHashField) {
            gridReferenceSystem = ((GeoHashField) fieldType).getGridReferenceSystem();
        } else
            throw new RuntimeException(
                    "field " + fieldName + " should be a GeoHashField, not " + fieldType.getTypeName());

        //Traverse the index to load up doc2PointsCache
        IndexReader reader = searcher.getIndexReader();
        TermsEnumCompatibility termsEnum = new TermsEnumCompatibility(reader, fieldName);
        TermDocs termDocs = reader.termDocs(); //cached for termsEnum.docs() calls
        try {
            while (true) {
                final Term term = termsEnum.next();
                if (term == null)
                    break;
                if (term.text().length() != gridReferenceSystem.getPrecision())
                    continue;
                Point2D point = gridReferenceSystem.decodeXY(term.text());
                termDocs.seek(termsEnum.getTermEnum());
                while (termDocs.next()) {
                    final int docId = termDocs.doc();
                    if (docId == DocIdSetIterator.NO_MORE_DOCS)
                        break;
                    if (doc2PointsCache == null)
                        doc2PointsCache = (List<Point2D>[]) new List[reader.maxDoc()];//java generics hack
                    List<Point2D> points = doc2PointsCache[docId];
                    if (points == null) {
                        points = new ArrayList<Point2D>(DEFAULT_ARRAY_CAPACITY);
                        doc2PointsCache[docId] = points;
                    }
                    points.add(point);
                }
            }
        } finally { // in Lucene 3 these should be closed (not in Lucene 4)
            termDocs.close();
            termsEnum.close();
        }

        //Log statistics
        if (log.isInfoEnabled()) {
            int min = Integer.MAX_VALUE, sum = 0, max = 0;
            int dlen = 0;
            if (doc2PointsCache != null) {
                dlen = doc2PointsCache.length;
                for (List<Point2D> point2Ds : doc2PointsCache) {
                    int plen = (point2Ds == null ? 0 : point2Ds.size());
                    min = Math.min(min, plen);
                    max = Math.max(max, plen);
                    sum += plen;
                }
            }
            if (min == Integer.MAX_VALUE)
                min = 0;
            float avg = (float) sum / dlen;
            log.info("field '" + fieldName + "' in RAM: loaded min/avg/max per doc #: (" + min + "," + avg + ","
                    + max + ") #" + dlen);
        }
    }

    @Override
    public int dimension() {
        return 2;
    }

    /** This class is public so that {@link #point2Ds(int)} is exposed. */
    public class GeoHashDocValues extends DocValues {
        private final int docIdBase;

        public GeoHashDocValues(int docIdBase) {
            this.docIdBase = docIdBase;
        }

        @Override
        public void doubleVal(int doc, double[] vals) {
            super.doubleVal(doc, vals);//TODO
        }

        /**
         * Do NOT modify the returned array!  May return null.
         */
        public List<Point2D> point2Ds(int doc) {
            //This cache is over the entire index (all Lucene segments).
            final List<Point2D>[] cache = GeoHashValueSource.this.doc2PointsCache;
            if (cache == null)
                return null;
            return cache[docIdBase + doc];
        }

        @Override
        public String toString(int doc) {
            StringBuilder buf = new StringBuilder(100);
            buf.append("geohash(").append(fieldName).append(")x,y=");
            List<Point2D> points = point2Ds(doc);
            if (points != null) {
                for (Point2D point : points) {
                    buf.append(point.getX()).append(',').append(point.getY());
                    buf.append(' ');
                }
            }
            return buf.toString();
        }
    }

    @Override
    public GeoHashDocValues getValues(Map context, IndexReader reader) throws IOException {
        int docIdBase = 0;
        if (reader instanceof SolrIndexReader) {
            SolrIndexReader solrIndexReader = (SolrIndexReader) reader;
            docIdBase = solrIndexReader.getBase();
        }
        return new GeoHashDocValues(docIdBase);
    }

    @Override
    public boolean equals(Object o) {
        if (o == null || !getClass().equals(o.getClass()))
            return false;
        return fieldName.equals(((GeoHashValueSource) o).fieldName);
    }

    @Override
    public int hashCode() {
        return fieldName.hashCode();
    }

    @Override
    public String description() {
        return "Loads the geohash based field values into memory, in their lat-lon equivalent.";
    }

}