linkipedia.EsorService.java Source code

Java tutorial

Introduction

Here is the source code for linkipedia.EsorService.java

Source

/**
 * Linkipedia, Copyright (c) 2015 Tetherless World Constellation 
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

package linkipedia;

import java.io.InputStream;
import java.net.URI;
import java.net.URLEncoder;
import java.util.ArrayList;
import java.util.List;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.http.HttpResponse;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.HttpClients;
import utils.ConceptItem;
import utils.ConceptMatcher;
import utils.QueryItem;
import org.json.JSONArray;
import org.json.JSONObject;

public class EsorService implements ConceptMatcher {

    private static Log log = LogFactory.getLog(EsorService.class);

    private static final String REST_URL = "http://esor.tw.rpi.edu/annotate/search";

    //temporary server for testing
    //private static final String REST_URL = "http://dataonetwc.tw.rpi.edu/linkipedia/search";

    public List<ConceptItem> getConcepts(String fullText) throws Exception {
        String query = parseTerm(fullText);
        String escapedSpaceQuery = escapeToSpace(query);
        String escapedCommaQuery = escapeToComma(query);

        if (escapedSpaceQuery == escapedCommaQuery) {
            return getConcepts(new QueryItem(escapedSpaceQuery));
        } else {
            List<ConceptItem> res_escapedSpace = getConcepts(new QueryItem(escapedSpaceQuery));
            List<ConceptItem> res_escapedComma = getConcepts(new QueryItem(escapedCommaQuery));
            return mergeRes(res_escapedSpace, res_escapedComma);
        }
        //return getConcepts(new QueryItem(fullText));
    }

    public List<ConceptItem> getConcepts(QueryItem queryItem) throws Exception {
        return lookupEsor(queryItem);
    }

    private static List<ConceptItem> lookupEsor(QueryItem queryItem) throws Exception {

        HttpClient client = HttpClients.createDefault();
        String uriStr = REST_URL + "?query=" + queryItem.toString();
        //System.out.println(uriStr);

        HttpGet method = new HttpGet(uriStr);
        method.setHeader("Accept", "application/json");
        HttpResponse response = client.execute(method);
        int code = response.getStatusLine().getStatusCode();
        if (2 != code / 100) {
            throw new Exception("response code " + code + " for resource at " + uriStr);
        }
        InputStream body = response.getEntity().getContent();
        String jsonStr = convertStreamToString(body);
        //System.out.println(jsonStr);

        JSONObject json = new JSONObject(jsonStr);
        String query = json.getString("query");
        JSONArray results = json.getJSONArray("results");

        //analysis the result and return
        ArrayList<ConceptItem> concepts = new ArrayList<ConceptItem>();
        for (int i = 0; i < results.length(); i++) {
            JSONObject r = results.getJSONObject(i);
            String url = r.getString("url");
            String score = r.getString("score");

            if (url.length() > 0) {
                if (url.contains("http")) {
                    ConceptItem c = new ConceptItem(new URI(url.substring(1, url.length() - 1)),
                            Double.parseDouble(score));
                    concepts.add(c);
                }
            } else {
                //System.out.println(queryItem+":NA");
            }
        }

        return concepts;
    }

    static String convertStreamToString(java.io.InputStream is) {
        java.util.Scanner s = new java.util.Scanner(is).useDelimiter("\\A");
        return s.hasNext() ? s.next() : "";
    }

    private String parseTerm(String str) throws Exception {

        if (str.contains("(")) {
            str = str.substring(0, str.indexOf("("));
        }

        str = str.replaceAll("\\s+$", "");

        str = replaceFromSlashToSpace(replaceFromDotToSpace(replaceFromDashToSpace(str)));
        str = str.replace("%", " percent");
        str = insertSpaceBeforeCapital(str);
        str = URLEncoder.encode(str, "UTF-8").replaceAll("\\+", "%20");
        return str;
    }

    private String replaceFromDotToSpace(String str) {
        return str.replace(".", " ");
    }

    private String replaceFromSlashToSpace(String str) {
        return str.replace("/", " ");
    }

    private String insertSpaceBeforeCapital(String str) {
        char[] charArr = str.toCharArray();
        StringBuilder sb = new StringBuilder();
        for (int i = 0; i < charArr.length; i++) {
            if (i > 0 && charArr[i] >= 'A' && charArr[i] <= 'Z' && charArr[i - 1] >= 'a' && charArr[i - 1] <= 'z')
                sb.append(" ");
            sb.append(charArr[i]);
        }
        return sb.toString();
    }

    private String replaceFromDashToSpace(String original) {
        return original.replace("_", " ");
    }

    private String escapeToSpace(String original) {
        return original.replace(" ", "%20");
    }

    private String escapeToComma(String original) {
        return original.replace("%20", ",");
    }

    private List<ConceptItem> mergeRes(List<ConceptItem> res_escapedSpace, List<ConceptItem> res_escapedComma) {
        if (res_escapedSpace.size() == 0)
            return res_escapedComma;
        if (res_escapedComma.size() == 0)
            return res_escapedSpace;

        int indexS = 0;
        int indexC = 0;
        while (indexS < res_escapedSpace.size()) {
            if (indexC < res_escapedComma.size()
                    && res_escapedComma.get(indexC).getWeight() >= res_escapedSpace.get(indexS).getWeight()) {
                res_escapedSpace.add(indexS, res_escapedComma.get(indexC));
                indexS++;
                indexC++;
            } else {
                indexS++;
            }
        }

        for (int i = indexC; i < res_escapedComma.size(); i++) {
            res_escapedSpace.add(res_escapedComma.get(i));
        }

        return res_escapedSpace;
    }
}