com.browseengine.local.service.geocode.SegmentSearcher.java Source code

Java tutorial

Introduction

Here is the source code for com.browseengine.local.service.geocode.SegmentSearcher.java

Source

/**
 * Bobo Browse Engine - High performance faceted/parametric search implementation 
 * that handles various types of semi-structured data.  Written in Java.
 * 
 * Copyright (C) 2006  Spackle
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 * 
 * To contact the project administrators for the bobo-browse project, 
 * please go to https://sourceforge.net/projects/bobo-browse/.
 */

package com.browseengine.local.service.geocode;

import java.io.File;
import java.io.IOException;
import java.util.BitSet;

import org.apache.log4j.Logger;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermDocs;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Searcher;

import com.browseengine.local.service.Address;
import com.browseengine.local.service.LocatedAddress;
import com.browseengine.local.service.index.SegmentsFields;

/**
 * @author spackle
 *
 */
public class SegmentSearcher {
    private static final Logger LOGGER = Logger.getLogger(SegmentSearcher.class);

    private Searcher _searcher; // TLID,placeL,stateL,placeR,stateR,startLon,startLat,endLon,endLat
    private IndexReader _reader;
    private int _maxDoc;

    public SegmentSearcher(File f) throws IOException {
        try {
            _reader = IndexReader.open(f);
            _searcher = new IndexSearcher(_reader);
            _maxDoc = _reader.maxDoc();
        } catch (IOException ioe) {
            try {
                close();
            } catch (IOException ioe2) {
                LOGGER.warn("trouble closing SegmentsSearcher, after trouble opening it", ioe2);
            }
            throw ioe;
        }
    }

    public LocatedAddress constructLocalResource(RangeMatch rangeMatch, Address addressSoFar) throws IOException {
        TermDocs termDocs = null;
        try {
            Term term = new Term(SegmentsFields.TLID.getField(), "" + rangeMatch.tlid);
            termDocs = _reader.termDocs(term);
            if (termDocs.next()) {
                int docid = termDocs.doc();
                Document doc = _reader.document(docid);
                String placeL = doc.get(SegmentsFields.PLACEL.getField());
                String stateL = doc.get(SegmentsFields.STATEL.getField());
                String placeR = doc.get(SegmentsFields.PLACER.getField());
                String stateR = doc.get(SegmentsFields.STATER.getField());

                String city;
                String state;
                if (rangeMatch.lrboth == PlaceMatch.LEFT) {
                    if (placeL != null) {
                        city = placeL;
                    } else {
                        city = placeR;
                    }
                    if (stateL != null) {
                        state = stateL;
                    } else {
                        state = stateR;
                    }
                } else {
                    if (placeR != null) {
                        city = placeR;
                    } else {
                        city = placeL;
                    }
                    if (stateR != null) {
                        state = stateR;
                    } else {
                        state = stateL;
                    }
                }
                Address a = addressSoFar;
                Address addr = new Address(a.getNumber(), a.getStreetPrefix(), a.getStreetName(), a.getStreetType(),
                        a.getStreetSuffix(), a.getAptNo(), city, state, a.getZip5(), a.getCountry());
                // linear interpolation
                float percent = rangeMatch.toPercent;
                double startLon = Double.parseDouble(doc.get(SegmentsFields.START_LON.getField()));
                double startLat = Double.parseDouble(doc.get(SegmentsFields.START_LAT.getField()));
                double endLon = Double.parseDouble(doc.get(SegmentsFields.END_LON.getField()));
                double endLat = Double.parseDouble(doc.get(SegmentsFields.END_LAT.getField()));
                double lon = startLon + percent * (endLon - startLon);
                double lat = startLat + percent * (endLat - startLat);
                lon = adjustDegr(lon);
                lat = adjustDegr(lat);
                LocatedAddress localResource = new LocatedAddress(addr, lon, lat);
                return localResource;
            }
            return null;
        } finally {
            try {
                if (termDocs != null) {
                    termDocs.close();
                }
            } catch (IOException ioe) {
                LOGGER.warn("trouble closing termdocs: " + ioe, ioe);
            }
        }

    }

    private static double adjustDegr(double in) {
        return in / 1000000.;
    }

    public PlaceMatch lookupPlace(String city, String state) throws GeoCodingException, IOException {
        if (state.length() != 2) {
            throw new GeoCodingException("improper 2-letter state code: " + state);
        }
        city = GeoCodeImpl.replaceWhitespacesWithASpace(city);
        state = state.toLowerCase();
        city = city.toLowerCase();
        // now, if we search on city,state, we should get back a 
        // set of docids, and whether we matched on left, right, or both

        // could eventually pull stateL and stateR into TernarySets in memory
        // this would cost about 2x8 bits per segment = 2 bytes per segment
        TermDocs td = null;
        try {
            td = _reader.termDocs();
            // right
            BitSet right = cityAndStateMatch(td, city, state, false);
            // left 
            BitSet left = cityAndStateMatch(td, city, state, true);

            PlaceMatch place = new PlaceMatch();
            int nextRight = right.nextSetBit(0);
            int nextLeft = left.nextSetBit(0);
            while (nextLeft >= 0 || nextRight >= 0) {
                if (nextLeft >= 0 && nextRight >= 0) {
                    // we want the smaller one
                    if (nextLeft == nextRight) {
                        // advance both
                        place.tlidSideMap.put(getTLID(nextLeft), PlaceMatch.BOTH);
                        nextLeft = left.nextSetBit(nextLeft + 1);
                        nextRight = right.nextSetBit(nextRight + 1);
                    } else if (nextLeft < nextRight) {
                        // advance left
                        place.tlidSideMap.put(getTLID(nextLeft), PlaceMatch.LEFT);
                        nextLeft = left.nextSetBit(nextLeft + 1);
                    } else {
                        // advance right
                        place.tlidSideMap.put(getTLID(nextRight), PlaceMatch.RIGHT);
                        nextRight = right.nextSetBit(nextRight + 1);
                    }
                } else if (nextLeft >= 0) {
                    // we only have answers remaining on left list
                    place.tlidSideMap.put(getTLID(nextLeft), PlaceMatch.LEFT);
                    nextLeft = left.nextSetBit(nextLeft + 1);
                } else {
                    // we only have answers remaining on right list
                    place.tlidSideMap.put(getTLID(nextRight), PlaceMatch.RIGHT);
                    nextRight = right.nextSetBit(nextRight + 1);
                }
            }
            return place;
        } finally {
            try {
                if (td != null) {
                    td.close();
                }
            } finally {
                //
            }
        }

    }

    private int getTLID(int docid) throws IOException {
        Document d = _reader.document(docid);
        return Integer.parseInt(d.get(SegmentsFields.TLID.getField()));
    }

    private BitSet getCityMatch(TermDocs termDocs, String city, boolean isLeft) throws IOException {
        Term term;
        if (isLeft) {
            term = new Term(SegmentsFields.PLACEL.getField(), city);
        } else {
            term = new Term(SegmentsFields.PLACER.getField(), city);
        }
        BitSet bits = new BitSet(_maxDoc);
        termDocs.seek(term);
        while (termDocs.next()) {
            bits.set(termDocs.doc());
        }
        return bits;
    }

    /**
     * modifies: thisAndState, by clearing any bits not matching the 
     * state.
     * 
     * @param thisAndState
     * @param termDocs
     * @param stateCode
     * @param isLeft
     * @throws IOException
     */
    private BitSet cityAndStateMatch(TermDocs termDocs, String city, String stateCode, boolean isLeft)
            throws IOException {
        BitSet match = getCityMatch(termDocs, city, isLeft);
        if (match.isEmpty()) {
            return match;
        }
        Term term;
        if (isLeft) {
            term = new Term(SegmentsFields.STATEL.getField(), stateCode);
        } else {
            term = new Term(SegmentsFields.STATER.getField(), stateCode);
        }
        termDocs.seek(term);
        int setBit = match.nextSetBit(0);
        while (setBit >= 0 && termDocs.skipTo(setBit)) {
            int doc = termDocs.doc();
            if (doc != setBit) {
                // clear from [setBit,doc-1]
                match.clear(setBit, doc - 1);
            }
            // current bit at doc stays as is
            setBit = (doc + 1 < _maxDoc ? match.nextSetBit(doc + 1) : -1);
        }
        return match;
    }

    private BitSet getStateMatch(TermDocs termDocs, String stateCode, boolean isLeft) throws IOException {
        Term term;
        if (isLeft) {
            term = new Term(SegmentsFields.STATEL.getField(), stateCode);
        } else {
            term = new Term(SegmentsFields.STATER.getField(), stateCode);
        }
        BitSet bits = new BitSet(_maxDoc);
        termDocs.seek(term);
        while (termDocs.next()) {
            bits.set(termDocs.doc());
        }
        return bits;
    }

    public synchronized void close() throws IOException {
        try {
            if (_searcher != null) {
                _searcher.close();
            }
        } finally {
            try {
                if (_reader != null) {
                    _reader.close();
                }
            } finally {
                _searcher = null;
                _reader = null;
            }
        }
    }
}