Java tutorial
/** * Licensed under the Apache License, Version 2.0 (the "License"); you may not * use this file except in compliance with the License. You may obtain a copy of * the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations under * the License. * * * ************************************************************************** * NOTICE This software was produced for the U. S. Government under Contract No. * W15P7T-12-C-F600, and is subject to the Rights in Noncommercial Computer * Software and Noncommercial Computer Software Documentation Clause * 252.227-7014 (JUN 1995) * * (c) 2012-2015 The MITRE Corporation. All Rights Reserved. * ************************************************************************** * * Continue contributions: * Copyright 2013-2015 The MITRE Corporation. */ package org.opensextant.extractors.geo; import java.io.IOException; import java.util.HashMap; import java.util.List; import java.util.Map; import org.apache.commons.lang3.StringUtils; import org.apache.solr.client.solrj.SolrServer; import org.apache.solr.client.solrj.SolrServerException; import org.apache.solr.client.solrj.response.QueryResponse; import org.apache.solr.common.SolrDocument; import org.apache.solr.common.SolrDocumentList; import org.apache.solr.common.params.CommonParams; import org.apache.solr.common.params.ModifiableSolrParams; import org.opensextant.ConfigException; import org.opensextant.data.Country; import org.opensextant.data.LatLon; import org.opensextant.data.Place; import org.opensextant.util.GeodeticUtility; import org.opensextant.util.GeonamesUtility; import org.opensextant.util.SolrProxy; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * Connects to a Solr sever via HTTP and tags place names in document. The * <code>SOLR_HOME</code> environment variable must be set to the location of * the Solr server. * * @author David Smiley - dsmiley@mitre.org * @author Marc Ubaldino - ubaldino@mitre.org */ public class SolrGazetteer { /** * In the interest of optimization we made the Solr instance a static class * attribute that should be thread safe and shareable across instances of * SolrMatcher */ private ModifiableSolrParams params = new ModifiableSolrParams(); private SolrProxy solr = null; /** * fast lookup by ISO2 country code. */ private Map<String, Country> countryCodes = null; /** * Default country code in solr gazetteer is ISO, so if given a FIPS code, * we need a helpful lookup to get ISO code for lookup. */ private Map<String, String> countryFIPS_ISO = new HashMap<String, String>(); /** * Geodetic search parameters. */ private ModifiableSolrParams geoLookup = createGeodeticLookupParams(); /** * Instantiates a new solr gazetteer. * * @throws ConfigException * Signals that a configuration exception has occurred. */ public SolrGazetteer() throws ConfigException { this((String) null); } /** * Instantiates a new solr gazetteer with the specified Solr Home location. * * @param solrHome * the location of solrHome. * @throws ConfigException * Signals that a configuration exception has occurred. */ public SolrGazetteer(String solrHome) throws ConfigException { initialize(solrHome); } public SolrGazetteer(SolrProxy currentIndex) throws ConfigException { // initialize(); solr = currentIndex; try { this.countryCodes = loadCountries(solr.getInternalSolrServer()); } catch (SolrServerException loadErr) { throw new ConfigException("SolrGazetteer is unable to load countries due to Solr error", loadErr); } catch (IOException ioErr) { throw new ConfigException("SolrGazetteer is unable to load countries due to IO/file error", ioErr); } } /** * Returns the SolrProxy used internally. * * @return the solr proxy */ public SolrProxy getSolrProxy() { return solr; } /** * Normalize country name. * * @param c * the c * @return the string */ public static String normalizeCountryName(String c) { return StringUtils.capitalize(c.toLowerCase()); } // * Do Not use. // * // * return solr params // * deprecated DO NOT USE. Keeping this as a reminder of what not to do. // * This will load entire index into memory. // * // @Deprecated // private static ModifiableSolrParams createGeodeticLookupParamsXX() { // /* // * Basic parameters for geospatial lookup. These are reused, and only pt // * and d are set for each lookup. // * // */ // ModifiableSolrParams p = new ModifiableSolrParams(); // p.set(CommonParams.FL, // "id,name,cc,adm1,adm2,feat_class,feat_code," + "geo,place_id,name_bias,id_bias,name_type"); // p.set(CommonParams.ROWS, 25); // p.set(CommonParams.Q, "*:*"); // p.set(CommonParams.FQ, "{!geofilt}"); // p.set("spatial", true); // p.set("sfield", "geo"); // p.set(CommonParams.SORT, "geodist() asc"); // Find closest places first. // return p; // } /** * Creates a generic spatial query for up to first 25 rows. * * @return default params */ protected static ModifiableSolrParams createGeodeticLookupParams() { return createGeodeticLookupParams(25); } /** * For larger areas choose a higher number of Rows to return. If you choose * to use Solr spatial score-by-distance for sorting or anything, then Solr * appears to want to load entire index into memory. So this sort mechanism * is off by default. * * @param rows * rows to include in spatial lookups * @return solr params */ protected static ModifiableSolrParams createGeodeticLookupParams(int rows) { /* * Basic parameters for geospatial lookup. These are reused, and only pt * and d are set for each lookup. * */ ModifiableSolrParams p = new ModifiableSolrParams(); p.set(CommonParams.FL, "id,name,cc,adm1,adm2,feat_class,feat_code," + "geo,place_id,name_bias,id_bias,name_type"); p.set(CommonParams.ROWS, rows); p.set(CommonParams.Q, "{!geofilt sfield=geo}"); // p.set(CommonParams.SORT, "score desc"); p.set("spatial", "true"); return p; } /** * Initialize. Cascading env variables: First use value from constructor, * then opensextant.solr, then solr.solr.home * * @throws ConfigException * Signals that a configuration exception has occurred. */ private void initialize(String solrHome) throws ConfigException { solr = solrHome != null ? new SolrProxy(solrHome, "gazetteer") : new SolrProxy("gazetteer"); params.set(CommonParams.Q, "*:*"); params.set(CommonParams.FL, "id,name,cc,adm1,adm2,feat_class,feat_code,geo,place_id,name_bias,id_bias,name_type"); try { this.countryCodes = loadCountries(solr.getInternalSolrServer()); } catch (SolrServerException loadErr) { throw new ConfigException("SolrGazetteer is unable to load countries due to Solr error", loadErr); } catch (IOException ioErr) { throw new ConfigException("SolrGazetteer is unable to load countries due to IO/file error", ioErr); } } /** * Close or release all resources. */ public void shutdown() { if (solr != null) { solr.close(); } } /** * List all country names, official and variant names. * Distinct territories (whose own ISO codes are unique) are listed as well. * Territories owned by other countries -- their ISO code is their owning nation -- are attached * as Country.territory (call Country.getTerritories() to list them). * * Name aliases are listed as Country.getAliases() * * The hash map returned contains all 260+ country listings keyed by ISO2 and ISO3. * Odd commonly used variant codes are added as well. * * @return the countries */ public Map<String, Country> getCountries() { return countryCodes; } /** The Constant UNK_Country. */ public static final Country UNK_Country = new Country("UNK", "invalid"); /** * Get Country by the default ISO digraph returns the Unknown country if you * are not using an ISO2 code. * * TODO: throw a GazetteerException of some sort. for null query or invalid * code. * * @param isocode * the isocode * @return the country */ public Country getCountry(String isocode) { if (isocode == null) { return null; } if (countryCodes.containsKey(isocode)) { return countryCodes.get(isocode); } return UNK_Country; } /** * Gets the country by fips. * * @param fips * the fips * @return the country by fips */ public Country getCountryByFIPS(String fips) { String isocode = countryFIPS_ISO.get(fips); return getCountry(isocode); } /** * This only returns Country objects that are names; It does not produce any * abbreviation variants. * * TODO: allow caller to get all entries, including abbreviations. * * @param index * solr instance to query * @return country data hash * @throws SolrServerException * the solr server exception * @throws IOException * on err, if country metadata file is not found in classpath */ public static Map<String, Country> loadCountries(SolrServer index) throws SolrServerException, IOException { GeonamesUtility geodataUtil = new GeonamesUtility(); Map<String, Country> countryCodeMap = geodataUtil.getISOCountries(); Logger log = LoggerFactory.getLogger(SolrGazetteer.class); ModifiableSolrParams ctryparams = new ModifiableSolrParams(); ctryparams.set(CommonParams.FL, "id,name,cc,FIPS_cc,ISO3_cc,adm1,adm2,feat_class,feat_code,geo,name_type"); /* TODO: Consider different behaviors for PCLI vs. PCL[DFS] */ ctryparams.set("q", "+feat_class:A +feat_code:(PCLI OR PCLIX OR TERR) +name_type:N"); /* As of 2015 we have 2300+ name variants for countries and territories */ ctryparams.set("rows", 5000); QueryResponse response = index.query(ctryparams); // Process Solr Response // SolrDocumentList docList = response.getResults(); for (SolrDocument gazEntry : docList) { Country C = createCountry(gazEntry); Country existingCountry = countryCodeMap.get(C.getCountryCode()); if (existingCountry != null) { if (existingCountry.ownsTerritory(C.getName())) { // do nothing. } else if (C.isTerritory) { log.debug("{} territory of {}", C, existingCountry); existingCountry.addTerritory(C); } else { log.debug("{} alias of {}", C, existingCountry); existingCountry.addAlias(C.getName()); // all other metadata is same. } continue; } log.info("Unknown country in gazetteer, that is not in flat files. C={}", C); countryCodeMap.put(C.getCountryCode(), C); countryCodeMap.put(C.CC_ISO3, C); } return countryCodeMap; } private static final Country createCountry(SolrDocument gazEntry) { String code = SolrProxy.getString(gazEntry, "cc"); String name = SolrProxy.getString(gazEntry, "name"); String featCode = SolrProxy.getString(gazEntry, "feat_code"); Country C = new Country(code, name); if ("TERR".equals(featCode)) { C.isTerritory = true; // Other conditions? } // Set this once. Yes, indeed we would see this metadata repeated for every country entry. // Geo field is specifically Spatial4J lat,lon format. double[] xy = SolrProxy.getCoordinate(gazEntry, "geo"); C.setLatitude(xy[0]); C.setLongitude(xy[1]); String fips = SolrProxy.getString(gazEntry, "FIPS_cc"); String iso3 = SolrProxy.getString(gazEntry, "ISO3_cc"); C.CC_FIPS = fips; C.CC_ISO3 = iso3; C.setName_type(SolrProxy.getChar(gazEntry, "name_type")); return C; } /** * <pre> * Search the gazetteer using a phrase. * The phrase will be quoted internally as it searches Solr * * e.g., search( "\"Boston City\"" ) * * Solr Gazetteer uses OR as default joiner for clauses. Without quotes * the above search would be "Boston" OR "City" effectively. * * </pre> * * @param place_string * the place_string * @return places List of place entries * @throws SolrServerException * the solr server exception */ public List<Place> search(String place_string) throws SolrServerException { return search(place_string, false); } /** * Instance method that reuses a set of SolrParams for optimized search. * * <pre> * Search the gazetteer using one of the following: * * a name or keyword * a Solr style fielded query, which by default includes bare keyword searches * * search( "\"Boston City\"" ) * * Solr Gazetteer uses OR as default joiner for clauses. * * </pre> * * @param place * the place * @param as_solr * the as_solr * @return places List of place entries * @throws SolrServerException * the solr server exception */ public List<Place> search(String place, boolean as_solr) throws SolrServerException { if (as_solr) { params.set("q", place); } else { // Bare keyword query needs to be quoted as "word word word" params.set("q", "\"" + place + "\""); } return SolrProxy.searchGazetteer(solr.getInternalSolrServer(), params); } /** * Find places located at a particular location. * * @param yx * location * @param withinKM * positive distance radius is required. * @return unsorted list of places near location * @throws SolrServerException * on err */ public List<Place> placesAt(LatLon yx, int withinKM) throws SolrServerException { geoLookup.set("pt", GeodeticUtility.formatLatLon(yx)); geoLookup.set("d", withinKM); return SolrProxy.searchGazetteer(solr.getInternalSolrServer(), geoLookup); } /** * Variation on placesAt(). * * @param yx * location * @param withinKM * distance - required. * @param feature * feature class * @return unsorted list of places near location * @throws SolrServerException * on err */ public List<Place> placesAt(LatLon yx, int withinKM, String feature) throws SolrServerException { /* */ ModifiableSolrParams spatialQuery = createGeodeticLookupParams(); spatialQuery.set(CommonParams.FQ, String.format("feat_class:%s", feature)); // The point in question. spatialQuery.set("pt", GeodeticUtility.formatLatLon(yx)); // Example: Find places within 50KM, but only first N rows returned. spatialQuery.set("d", withinKM); return SolrProxy.searchGazetteer(solr.getInternalSolrServer(), spatialQuery); } /** * Iterate through a list and choose a place closest to the given point * * @param yx * point of interest * @param places * list of places * @return closest place */ public static final Place closest(LatLon yx, List<Place> places) { long dist = 10000000L; Place chosen = null; for (Place p : places) { long currentDist = GeodeticUtility.distanceMeters(yx, p); if (currentDist < dist) { dist = currentDist; chosen = p; } } return chosen; // Is not null. } /** * This is a reasonable guess. CAVEAT: This does not use Solr Spatial * location sorting. * * @param yx * location * @param withinKM * distance in KM * @param feature * feature type * @return closest place to given location. * @throws SolrServerException * on err */ public Place placeAt(LatLon yx, int withinKM, String feature) throws SolrServerException { List<Place> candidates = placesAt(yx, withinKM, feature); if (candidates == null || candidates.isEmpty()) { return null; } return closest(yx, candidates); } }