Java tutorial
package uk.ac.susx.tag.method51.twitter.geocoding.geonames; /* * #%L * GeonamesSPARQLLocationDatabase.java - method51 - University of Sussex - 2,013 * %% * Copyright (C) 2013 - 2014 University of Sussex * %% * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * #L% */ import com.google.common.io.Resources; import org.apache.http.client.HttpClient; import org.apache.http.client.ResponseHandler; import org.apache.http.client.methods.HttpGet; import org.apache.http.client.utils.URIUtils; import org.apache.http.impl.client.BasicResponseHandler; import org.apache.http.impl.client.DefaultHttpClient; import org.json.simple.JSONArray; import org.json.simple.JSONObject; import org.json.simple.parser.JSONParser; import org.json.simple.parser.ParseException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import uk.ac.susx.mlcl.lib.io.Files; import uk.ac.susx.tag.method51.core.MiscUtil; import uk.ac.susx.tag.method51.twitter.geocoding.*; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStreamReader; import java.net.URI; import java.net.URISyntaxException; import java.net.URLEncoder; import java.util.*; /** * User: sw206 * Date: 03/01/2013 * Time: 14:53 */ public class GeonamesSPARQLLocationDatabase implements LocationDatabase { private static final Logger LOG = LoggerFactory.getLogger(GeonamesSPARQLLocationDatabase.class); private final String sparqlQuery; private static final String POPULATION_KEY = "population"; private static final String FEATURE_CODE_KEY = "code"; private static final String LATITUDE_KEY = "lat"; private static final String LONGITUDE_KEY = "lon"; private static final String CODE_PREFIX = "http://www.geonames.org/ontology#"; private final Map<String, Integer> featureCodeScores; private final double populationScore; private final int populationThreshold; private final double defaultScore; private final double distanceTolerance; //km private final String host; private final int port; public GeonamesSPARQLLocationDatabase() throws IOException { this("127.0.0.1", 3030); } public GeonamesSPARQLLocationDatabase(String host, int port) throws IOException { this.host = host; this.port = port; try (BufferedReader reader = new BufferedReader(new InputStreamReader( Resources.getResource(this.getClass(), "location_lookup.rq").openStream()));) { sparqlQuery = Files.getText(reader).toString(); } featureCodeScores = new HashMap<>(); featureCodeScores.put(CODE_PREFIX + "P.PPLL", 3); featureCodeScores.put(CODE_PREFIX + "P.PPL", 4); featureCodeScores.put(CODE_PREFIX + "P.PPLX", 5); featureCodeScores.put(CODE_PREFIX + "P.PPLA4", 6); featureCodeScores.put(CODE_PREFIX + "P.PPLA3", 7); featureCodeScores.put(CODE_PREFIX + "P.PPLA2", 8); featureCodeScores.put(CODE_PREFIX + "P.PPLA", 9); featureCodeScores.put(CODE_PREFIX + "P.PPLC", 10); featureCodeScores.put(CODE_PREFIX + "A.ADM1", 10); featureCodeScores.put(CODE_PREFIX + "A.ADM2", 10); featureCodeScores.put(CODE_PREFIX + "A.ADM3", 10); featureCodeScores.put(CODE_PREFIX + "A.ADM4", 10); featureCodeScores.put(CODE_PREFIX + "A.ADM5", 10); populationScore = 25; double gbScore = 0; defaultScore = 3; populationThreshold = 2000000; distanceTolerance = 20; } /** * Performs a request to the SPARQL server and parses the response. * * @param candidate * @return list of matching entries * @throws java.io.IOException */ @Override public List<LocationMatch> query(LocationCandidate candidate) throws IOException { String query = String.format(sparqlQuery, candidate.getCandidateString(), populationThreshold); List<LocationMatch> matches = null; try { String response = makeRequest(query); //System.out.println(response); matches = parseResponse(response, candidate); collapseCloseMatches(matches); if (matches.size() > 1) { matches = rankMatches(matches); } } catch (URISyntaxException | ParseException | LocationUnresolvedException e) { LOG.error(e.getMessage(), e); } return matches; } public String makeRequest(String query) throws IOException, URISyntaxException { HttpGet get = new HttpGet(); String queryString = "query=" + URLEncoder.encode(query, "UTF-8"); queryString += "&output=json"; String locationLookupURI = "/ds/query"; int locationLookupPort = port; String locationLookupHost = host; URI uri = URIUtils.createURI("http", locationLookupHost, locationLookupPort, locationLookupURI, queryString, null); get.setURI(uri); HttpClient httpClient = new DefaultHttpClient(); ResponseHandler<String> responseHandler = new BasicResponseHandler(); String responseBody = httpClient.execute(get, responseHandler); httpClient.getConnectionManager().shutdown(); return responseBody; } private List<LocationMatch> parseResponse(String response, LocationCandidate candidate) throws ParseException, LocationUnresolvedException { JSONParser jsonParser = new JSONParser(); List<LocationMatch> matches = new LinkedList<>(); JSONObject root = (JSONObject) jsonParser.parse(response); JSONArray bindings = (JSONArray) ((Map) root.get("results")).get("bindings"); for (Object object1 : bindings) { JSONObject binding = (JSONObject) object1; Map<String, String> data = new HashMap<>(); for (Object object2 : binding.entrySet()) { Map.Entry field = (Map.Entry) object2; String key = (String) field.getKey(); String value = (String) ((Map) field.getValue()).get("value"); data.put(key, value); } LocationMatch match = new LocationMatch(); match.data = data; if (candidate != null) { match.matchingText = candidate.getCandidateString(); match.matchBegin = candidate.getBegin(); match.matchEnd = candidate.getEnd(); } String lat = data.get(LATITUDE_KEY); String lon = data.get(LONGITUDE_KEY); if (lat == null || lon == null) { throw new LocationUnresolvedException("null lat or lon in geonames db entry " + data.get("x")); } try { match.lat = Double.parseDouble(lat); match.lon = Double.parseDouble(lon); } catch (NumberFormatException e) { throw new LocationUnresolvedException("could not parse lat / lon to numbers " + lat + " / " + lon); } matches.add(match); //System.out.println((String)entry.get("x")); } return matches; } private void collapseCloseMatches(List<LocationMatch> match) { int i = 0; while (i < match.size()) { LocationMatch mI = match.get(i); int j = i + 1; while (j < match.size()) { LocationMatch mJ = match.get(j); if (euclideanDistance(mI, mJ) < distanceTolerance) { match.remove((mJ)); } ++j; } ++i; } } /** * Scores matches for the same mention by featureCode and population attributes * * @param matches */ public List<LocationMatch> rankMatches(List<LocationMatch> matches) { double[] scores = new double[matches.size()]; Arrays.fill(scores, 0.0); int i = 0; int totalPop = 0; for (LocationMatch match : matches) { try { scores[i] = featureCodeScores.get(match.data.get(FEATURE_CODE_KEY)); } catch (NullPointerException e) { scores[i] = defaultScore; } if (match.data.containsKey(POPULATION_KEY)) { try { totalPop += Integer.parseInt(match.data.get(POPULATION_KEY)); } catch (NumberFormatException e) { LOG.error("Exception caught", e); } } ++i; } i = 0; for (LocationMatch match : matches) { if (match.data.containsKey(POPULATION_KEY)) { String pop = match.data.get(POPULATION_KEY); try { int population = Integer.parseInt(pop); double score = populationScore * (population / (double) totalPop); scores[i] += score; } catch (NumberFormatException e) { LOG.error("POPULATION: Couldn't parse " + pop + " to a number!", e); } } ++i; } //System.out.println(Arrays.toString(scores)); //normalise double total = 0; for (double score : scores) { total += score; } double[] likelihoods = new double[scores.length]; for (i = 0; i < scores.length; ++i) { likelihoods[i] = scores[i] / total; } i = 0; for (LocationMatch match : matches) { match.likelihood = likelihoods[i]; match.score = scores[i]; ++i; } Collections.sort(matches, new LocationMatchLikelihoodComparator()); if (matches.size() > 30) { matches = matches.subList(0, 30); rankMatches(matches); } return matches; } public double euclideanDistance(LocationMatch m1, LocationMatch m2) { double[] p1 = new double[2]; p1[0] = m1.lat; p1[1] = m1.lon; double[] p2 = new double[2]; p2[0] = m2.lat; p2[1] = m2.lon; return euclideanDistance(p1, p2); } public static double euclideanDistance(double[] p1, double[] p2) { double EARTH_RADIUS = 6371; double lat1 = p1[0]; double lon1 = p1[1]; double lat2 = p2[0]; double lon2 = p2[1]; double dLat = Math.toRadians(lat2 - lat1); double dLng = Math.toRadians(lon2 - lon1); double a = Math.sin(dLat / 2) * Math.sin(dLat / 2) + Math.cos(Math.toRadians(lat1)) * Math.cos(Math.toRadians(lat2)) * Math.sin(dLng / 2) * Math.sin(dLng / 2); double c = 2 * Math.atan2(Math.sqrt(a), Math.sqrt(1 - a)); double x = (lon2 - lon1) * Math.cos((lat1 + lat2) / 2); double y = (lat2 - lat1); //double dist = Math.sqrt( x*x + y*y ) * EARTH_RADIUS; double dist = c * EARTH_RADIUS; return dist; } }