org.aksw.simba.cetus.fox.FoxBasedTypeSearcher.java Source code

Java tutorial

Introduction

Here is the source code for org.aksw.simba.cetus.fox.FoxBasedTypeSearcher.java

Source

/**
 * This file is part of Cetus.
 *
 * Cetus is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * Cetus is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with Cetus.  If not, see <http://www.gnu.org/licenses/>.
 */
package org.aksw.simba.cetus.fox;

import java.io.IOException;
import java.net.URLDecoder;
import java.nio.charset.UnsupportedCharsetException;
import java.util.HashSet;
import java.util.List;

import org.aksw.gerbil.transfer.nif.Document;
import org.aksw.gerbil.transfer.nif.data.NamedEntity;
import org.aksw.gerbil.transfer.nif.data.TypedNamedEntity;
import org.aksw.simba.cetus.annotator.CetusTypeSearcher;
import org.aksw.simba.cetus.datatypes.ExtendedTypedNamedEntity;
import org.apache.commons.io.IOUtils;
import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.fluent.Request;
import org.apache.http.client.fluent.Response;
import org.apache.http.entity.ContentType;
import org.apache.http.entity.StringEntity;
import org.apache.http.util.EntityUtils;
import org.json.JSONArray;
import org.json.JSONException;
import org.json.JSONObject;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class FoxBasedTypeSearcher implements CetusTypeSearcher {

    private static final Logger LOGGER = LoggerFactory.getLogger(FoxBasedTypeSearcher.class);

    private static final String FOX_SERVICE = "http://139.18.2.164:4444/call/ner/entities";

    private static final String PERSON_TYPE_URI = "scmsann:PERSON";
    private static final String LOCATION_TYPE_URI = "scmsann:LOCATION";
    private static final String ORGANIZATION_TYPE_URI = "scmsann:ORGANIZATION";

    private static final String DOLCE_PERSON_TYPE_URI = "http://www.ontologydesignpatterns.org/ont/dul/DUL.owl#Person";
    private static final String DOLCE_LOCATION_TYPE_URI = "http://www.ontologydesignpatterns.org/ont/d0.owl#Location";
    private static final String DOLCE_ORGANIZATION_TYPE_URI = "http://www.ontologydesignpatterns.org/ont/dul/DUL.owl#Organization";

    @Override
    public TypedNamedEntity getAllTypes(Document document, NamedEntity ne,
            List<ExtendedTypedNamedEntity> surfaceForms) {
        TypedNamedEntity tne = new TypedNamedEntity(ne.getStartPosition(), ne.getLength(), ne.getUris(),
                new HashSet<String>());
        for (ExtendedTypedNamedEntity surfaceForm : surfaceForms) {
            tne.getTypes().addAll(surfaceForm.getUris());
        }

        try {
            // request FOX
            Response response = Request.Post(FOX_SERVICE).addHeader("Content-type", "application/json")
                    .addHeader("Accept-Charset", "UTF-8")
                    .body(new StringEntity(new JSONObject().put("input", document.getText()).put("type", "text")
                            .put("task", "ner").put("output", "JSON-LD").toString(), ContentType.APPLICATION_JSON))
                    .execute();

            HttpResponse httpResponse = response.returnResponse();
            HttpEntity entry = httpResponse.getEntity();

            String content = IOUtils.toString(entry.getContent(), "UTF-8");
            EntityUtils.consume(entry);

            // parse results
            JSONObject outObj = new JSONObject(content);
            if (outObj.has("@graph")) {

                JSONArray graph = outObj.getJSONArray("@graph");
                for (int i = 0; i < graph.length(); i++) {
                    parseType(graph.getJSONObject(i), tne, surfaceForms);
                }
            } else {
                parseType(outObj, tne, surfaceForms);
            }
        } catch (Exception e) {
            LOGGER.error("Got an exception while communicating with the FOX web service.", e);
        }
        return tne;
    }

    protected void parseType(JSONObject entity, TypedNamedEntity ne, List<ExtendedTypedNamedEntity> surfaceForms)
            throws Exception {
        try {

            if (entity != null && entity.has("means") && entity.has("beginIndex") && entity.has("ann:body")) {

                String uri = entity.getString("means");
                String body = entity.getString("ann:body");
                Object begin = entity.get("beginIndex");
                Object typeObject = entity.get("@type");
                String types[];
                if (typeObject instanceof JSONArray) {
                    JSONArray typeArray = (JSONArray) typeObject;
                    types = new String[typeArray.length()];
                    for (int i = 0; i < types.length; ++i) {
                        types[i] = typeArray.getString(i);
                    }
                } else {
                    types = new String[] { typeObject.toString() };
                }
                URLDecoder.decode(uri, "UTF-8");
                if (begin instanceof JSONArray) {
                    // for all indices
                    for (int i = 0; i < ((JSONArray) begin).length(); ++i) {
                        addTypeIfOverlapping(ne, Integer.valueOf(((JSONArray) begin).getString(i)), body.length(),
                                types);
                    }
                } else if (begin instanceof String) {
                    addTypeIfOverlapping(ne, Integer.valueOf((String) begin), body.length(), types);
                } else if (LOGGER.isDebugEnabled())
                    LOGGER.debug("Couldn't find index");
            }
        } catch (Exception e) {
            LOGGER.error("Got an Exception while parsing the response of FOX.", e);
            throw new Exception("Got an Exception while parsing the response of FOX.", e);
        }
    }

    private void addTypeIfOverlapping(TypedNamedEntity ne, int begin, int length, String[] types) {
        int neStart = ne.getStartPosition();
        int neEnd = neStart + ne.getLength();
        int end = begin + length;
        // FIXME We should also check the overlapping with extracted type
        // strings
        if (((neStart <= begin) && (neEnd > begin)) || ((neStart < end) && (neEnd >= end))
                || ((neStart <= begin) && (neEnd >= end)) || ((neStart < end) && (neEnd > begin))) {
            for (int i = 0; i < types.length; ++i) {
                switch (types[i]) {
                case PERSON_TYPE_URI: {
                    ne.getTypes().add(DOLCE_PERSON_TYPE_URI);
                    break;
                }
                case LOCATION_TYPE_URI: {
                    ne.getTypes().add(DOLCE_LOCATION_TYPE_URI);
                    break;
                }
                case ORGANIZATION_TYPE_URI: {
                    ne.getTypes().add(DOLCE_ORGANIZATION_TYPE_URI);
                    break;
                }
                }
            }
        }
    }

    public static void main(String[] args)
            throws UnsupportedCharsetException, ClientProtocolException, JSONException, IOException {
        Response response = Request.Post(FOX_SERVICE).addHeader("Content-type", "application/json")
                .addHeader("Accept-Charset", "UTF-8")
                .body(new StringEntity(new JSONObject().put("input",
                        "Brian Banner is a fictional villain from the Marvel Comics Universe created by Bill Mantlo and Mike Mignola and first appearing in print in late 1985.")
                        .put("type", "text").put("task", "ner").put("output", "JSON-LD").toString(),
                        ContentType.APPLICATION_JSON))
                .execute();

        HttpResponse httpResponse = response.returnResponse();
        HttpEntity entry = httpResponse.getEntity();

        String content = IOUtils.toString(entry.getContent(), "UTF-8");
        System.out.println(content);
    }

}