com.berico.clavin.resolver.impl.lucene.LuceneUtils.java Source code

Java tutorial

Introduction

Here is the source code for com.berico.clavin.resolver.impl.lucene.LuceneUtils.java

Source

package com.berico.clavin.resolver.impl.lucene;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

import org.apache.lucene.document.Document;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.TopDocs;

import com.berico.clavin.extractor.CoordinateOccurrence;
import com.berico.clavin.extractor.LocationOccurrence;
import com.berico.clavin.gazetteer.LatLon;
import com.berico.clavin.gazetteer.Place;
import com.berico.clavin.resolver.ResolvedCoordinate;
import com.berico.clavin.resolver.ResolvedLocation;
import com.berico.clavin.resolver.Vector;
import com.berico.clavin.util.DamerauLevenshtein;
import com.berico.clavin.util.Serializer;
import com.spatial4j.core.context.SpatialContext;
import com.spatial4j.core.distance.DistanceUtils;
import com.spatial4j.core.shape.Point;

/*#####################################################################
 * 
 * CLAVIN (Cartographic Location And Vicinity INdexer)
 * ---------------------------------------------------
 * 
 * Copyright (C) 2012-2013 Berico Technologies
 * http://clavin.bericotechnologies.com
 * 
 * ====================================================================
 * 
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *       http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
 * implied. See the License for the specific language governing
 * permissions and limitations under the License.
 * 
 * ====================================================================
 * 
 * LuceneUtils.java
 * 
 *###################################################################*/

/**
 * A set of utilities for working with the Lucene index.
 */
public class LuceneUtils {

    /**
     * Convert a set of Lucene Document Results into a list of
     * ResolvedCoordinates.
     * @param occurrence the CoordinateOccurrence in the document.
     * @param searcher the Lucene Searcher that retrieved the results.
     * @param results the Search results context
     * @param components LuceneComponents (specifically, the spatial components
     * needed to calculate vectors).
     * @return a List of ResolvedCoordinates.
     * @throws Exception 
     */
    public static List<ResolvedCoordinate> convertToCoordinate(CoordinateOccurrence<?> occurrence,
            IndexSearcher searcher, TopDocs results, LuceneComponents components) throws Exception {

        // Results
        ArrayList<ResolvedCoordinate> resolvedCoordinates = new ArrayList<ResolvedCoordinate>();

        // Grab the Lucene spatial context
        SpatialContext spatialContext = components.getSpatialContext();

        // If we have results
        if (results.scoreDocs.length > 0) {

            // Get the center coordinate of the location occurrence
            LatLon center = occurrence.convertToLatLon();

            // Convert to a Spatial4j point
            Point occurrencePoint = spatialContext.makePoint(center.getLongitude(), center.getLatitude());

            // Iterate over the results
            for (int i = 0; i < results.scoreDocs.length; i++) {

                // Grab the document from Lucene
                Document doc = searcher.doc(results.scoreDocs[i].doc);

                // Get the Place record
                Place record = dehydrate(doc);

                // Get the centroid of the Place
                String positionOfLocation = doc.get(FieldConstants.GEOMETRY);

                // TODO: Spatial4J supposedly has a ShapeReaderWriter implementation
                // that we should be using instead.
                @SuppressWarnings("deprecation")
                Point point = (Point) spatialContext.readShape(positionOfLocation);

                // Calculate the distance
                double distanceInDegrees = spatialContext.getDistCalc().distance(point, occurrencePoint);

                double distanceInKm = DistanceUtils.degrees2Dist(distanceInDegrees,
                        DistanceUtils.EARTH_MEAN_RADIUS_KM);

                // Calculate the direction
                double direction = calculateDirection(point.getX(), point.getY(), occurrencePoint.getX(),
                        occurrencePoint.getY());

                // Add the ResolvedCoordinate to the list.
                resolvedCoordinates
                        .add(new ResolvedCoordinate(occurrence, record, new Vector(distanceInKm, direction)));
            }
        }

        return resolvedCoordinates;
    }

    /**
     * Calculate the direction from a center point "c" to an offset point "o".
     * @param cx Center X
     * @param cy Center Y
     * @param ox Offset X
     * @param oy Offset Y
     * @return Direction in degrees.
     */
    private static double calculateDirection(double cx, double cy, double ox, double oy) {

        // Direction of a Vector: tan = (y2 - y1) / (x2 - x1)
        // direction (i.e. ) = tan^-1 * ((y2 - y1) / (x2 - x1))

        double dy = oy - cy;
        double dx = ox - cx;

        double theta = Math.atan2(dy, dx);

        // Convert to degrees
        double angle = Math.toDegrees(theta);

        // atan2 will produce negative degrees for coordinates west (left of or in quadrant
        // III or IV) of center.  We need to normalize the values by adding 2 * PI,
        // which in degrees happens to be 360.
        // Note:  we'll let 0 degrees be represented as 360.
        if (angle <= 0) {

            angle = 360 + angle;
        }

        return angle;
    }

    /**
     * Convert a set of Lucene Document Results into a list of ResolvedLocations.
     * @param occurrence LocationOccurrence in the document.
     * @param searcher the Lucene Searcher used to find the locations.
     * @param results the results of the Lucene Search
     * @param usingFuzzy whether fuzzy matching was used
     * @return List of ResolvedLocations
     * @throws IOException
     */
    public static List<ResolvedLocation> convertToLocations(LocationOccurrence occurrence, IndexSearcher searcher,
            TopDocs results, boolean usingFuzzy) throws IOException {

        ArrayList<ResolvedLocation> locations = new ArrayList<ResolvedLocation>();

        if (results.scoreDocs.length > 0) {

            for (int i = 0; i < results.scoreDocs.length; i++) {

                Document doc = searcher.doc(results.scoreDocs[i].doc);

                ResolvedLocation location = convertToLocation(doc, occurrence, usingFuzzy);

                locations.add(location);
            }
        }

        return locations;
    }

    /**
     * Convert a single index entry into a ResolvedLocation
     * @param document the index entry
     * @param location the LocationOccurrence within a document
     * @param fuzzy whether fuzzy matching was used.
     * @return ResolvedLocation
     */
    public static ResolvedLocation convertToLocation(Document document, LocationOccurrence location,
            boolean fuzzy) {

        Place place = dehydrate(document);

        String matchedName = document.get(FieldConstants.NAME);

        float confidence = DamerauLevenshtein.damerauLevenshteinDistanceCaseInsensitive(location.getText(),
                matchedName);

        return new ResolvedLocation(matchedName, place, location, fuzzy, confidence);
    }

    /**
     * Dehydrate a Place object from the Lucene index using the default
     * serializer.
     * @param document Document with the Place field to dehydrate.
     * @return Place object.
     */
    public static Place dehydrate(Document document) {

        String serializedPlace = document.get(FieldConstants.PLACE);

        return Serializer.Default.deserialize(serializedPlace, Place.class);
    }
}