Example usage for edu.stanford.nlp.pipeline Annotator annotate

Introduction

In this page you can find the example usage for edu.stanford.nlp.pipeline Annotator annotate.

Prototype

void annotate(Annotation annotation);

Source Link

Document

Given an Annotation, perform a task on this Annotation.

Usage

From source file:org.nlp2rdf.implementation.stanfordcorenlp.StanfordWrapper.java

License:Apache License

public void process(Individual context, OntModel inputModel, OntModel outputModel,
        NIFParameters nifParameters) {//from  w ww  .  j av a2 s . c  om
    String contextString = context
            .getPropertyValue(NIFDatatypeProperties.isString.getDatatypeProperty(inputModel)).asLiteral()
            .getString();
    String prefix = nifParameters.getPrefix();
    URIScheme urischeme = nifParameters.getUriScheme();

    Annotator pipeline = buildAnnotator(nifParameters);

    // create an empty Annotation just with the given text
    Annotation document = new Annotation(contextString);
    // run all Annotators on this text
    pipeline.annotate(document);

    // these are all the sentences in this document
    // a CoreMap is essentially a Map that uses class objects as keys and has values with custom types
    List<CoreMap> sentences = document.get(CoreAnnotations.SentencesAnnotation.class);

    //get all the sentences and words and read it in an intermediate structure
    //NOTE: this can be greatly optimized of course
    // for now it is just simple and cheap to implement it like this
    int wordCount = 0;
    TreeMap<Span, List<Span>> tokenizedText = new TreeMap<Span, List<Span>>();
    for (CoreMap sentence : sentences) {
        Span sentenceSpan = new Span(sentence.get(CoreAnnotations.CharacterOffsetBeginAnnotation.class),
                sentence.get(CoreAnnotations.CharacterOffsetEndAnnotation.class));
        List<Span> wordSpans = new ArrayList<Span>();
        for (CoreLabel coreLabel : sentence.get(CoreAnnotations.TokensAnnotation.class)) {
            wordSpans.add(new Span(coreLabel.get(CoreAnnotations.CharacterOffsetBeginAnnotation.class),
                    coreLabel.get(CoreAnnotations.CharacterOffsetEndAnnotation.class)));
            wordCount++;
        }
        tokenizedText.put(sentenceSpan, wordSpans);
    }

    /**
     * Basic Model Setup
     **/
    //get parameters for the URIGenerator
    Text2RDF text2RDF = new Text2RDF();
    text2RDF.generateNIFModel(prefix, context, urischeme, outputModel, tokenizedText);
    outputModel.add(RLOGSLF4JBinding.log(nifParameters.getLogPrefix(),
            "Finished creating " + tokenizedText.size() + " sentence(s) with " + wordCount + " word(s) ",
            RLOGIndividuals.DEBUG, this.getClass().getCanonicalName(), null, null));
    // text2RDF.addNextAndPreviousProperties(prefix,urischeme,model);

    // traversing the words in the current sentence
    // a CoreLabel is a CoreMap with additional token-specific methods
    for (CoreMap sentence : sentences) {

        for (CoreLabel token : sentence.get(CoreAnnotations.TokensAnnotation.class)) {
            Span wordSpan = new Span(token.get(CoreAnnotations.CharacterOffsetBeginAnnotation.class),
                    token.get(CoreAnnotations.CharacterOffsetEndAnnotation.class));
            //the word should exist already
            Individual wordIndividual = outputModel
                    .getIndividual(urischeme.generate(prefix, contextString, wordSpan));

            if (wordIndividual == null) {
                log.error("SKIPPING: word was not found in the model: "
                        + urischeme.generate(prefix, contextString, wordSpan));
                continue;
            }
            /********************************
             * Lemma
             ******/

            if (token.get(CoreAnnotations.LemmaAnnotation.class) != null) {
                wordIndividual.addProperty(NIFDatatypeProperties.lemma.getDatatypeProperty(outputModel),
                        token.get(CoreAnnotations.LemmaAnnotation.class), XSDDatatype.XSDstring);
            }

            /********************************
             * POS tag
             ******/
            outputModel.setNsPrefix("olia", "http://purl.org/olia/olia.owl#");
            // this is the POS tag of the token
            String posTag = token.get(CoreAnnotations.PartOfSpeechAnnotation.class);

            List<String> oliaIndividual = (List<String>) Penn.hasTag.get(posTag);
            if (oliaIndividual != null) {

                for (String s : oliaIndividual) {
                    wordIndividual.addProperty(NIFObjectProperties.oliaLink.getObjectProperty(outputModel),
                            outputModel.createIndividual(s, OWL.Thing));
                    List<String> pennlinks = (List<String>) Penn.links.get(s);
                    if (pennlinks != null) {
                        for (String oc : pennlinks) {
                            wordIndividual.addProperty(
                                    NIFAnnotationProperties.oliaCategory.getAnnotationProperty(outputModel),
                                    outputModel.createClass(oc));
                        }
                    } else {
                        outputModel.add(
                                RLOGSLF4JBinding.log(nifParameters.getLogPrefix(), "missing oliaLinks for " + s,
                                        RLOGIndividuals.ERROR, this.getClass().getCanonicalName(), null, null));
                    }
                }
            } else {
                outputModel.add(
                        RLOGSLF4JBinding.log(nifParameters.getLogPrefix(), "missing oliaLinks for " + posTag,
                                RLOGIndividuals.ERROR, this.getClass().getCanonicalName(), null, null));

            }
        }

        SemanticGraph dependencies = sentence
                .get(SemanticGraphCoreAnnotations.CollapsedCCProcessedDependenciesAnnotation.class);

        if (dependencies != null) {
            //time to add the prefix
            StanfordSimple.addStanfordSimplePrefix(outputModel);

            // create relation annotations for each Stanford dependency
            for (SemanticGraphEdge stanfordEdge : dependencies.edgeIterable()) {

                Span govSpan = new Span(
                        stanfordEdge.getGovernor().get(CoreAnnotations.CharacterOffsetBeginAnnotation.class),
                        stanfordEdge.getGovernor().get(CoreAnnotations.CharacterOffsetEndAnnotation.class));
                Span depSpan = new Span(
                        stanfordEdge.getDependent().get(CoreAnnotations.CharacterOffsetBeginAnnotation.class),
                        stanfordEdge.getDependent().get(CoreAnnotations.CharacterOffsetEndAnnotation.class));
                //String relationType = stanfordEdge.getRelation().toString();

                String[] edgeURIs = StanfordSimple.getURIforEdgeLabel(stanfordEdge.getRelation().toString());
                //ObjectProperty relation = model.createObjectProperty(new CStringInst().generate(prefix, contextString, new Span[]{}));
                ObjectProperty relation = null;
                switch (edgeURIs.length) {
                case 1:
                    relation = outputModel.createObjectProperty(edgeURIs[0]);

                    break;
                case 2:
                    relation = outputModel.createObjectProperty(edgeURIs[0]);
                    relation.addSubProperty(outputModel.createObjectProperty(edgeURIs[1]));
                    break;
                default:
                    String message = "Empty edge label, no URI written: " + edgeURIs;
                    outputModel.add(RLOGSLF4JBinding.log(nifParameters.getLogPrefix(), message,
                            RLOGIndividuals.ERROR, this.getClass().getCanonicalName(), null, null));
                    continue;

                }

                Individual gov = text2RDF.createCStringIndividual(prefix, context, govSpan, urischeme,
                        outputModel);
                Individual dep = text2RDF.createCStringIndividual(prefix, context, depSpan, urischeme,
                        outputModel);
                gov.addProperty(relation, dep);
                relation.addSuperProperty(NIFObjectProperties.inter.getObjectProperty(outputModel));
                relation.addSuperProperty(NIFObjectProperties.dependency.getObjectProperty(outputModel));

                if (gov == null || dep == null) {
                    String message = "SKIPPING Either gov or dep was null for the dependencies\n" + "gov: "
                            + gov + "\ndep: " + dep;
                    outputModel.add(RLOGSLF4JBinding.log(nifParameters.getLogPrefix(), message,
                            RLOGIndividuals.ERROR, this.getClass().getCanonicalName(), null, null));
                    continue;
                }

                //  List<String> oliaIndividual = (List<String>) Stanford.hasTag.get(stanfordEdge.getRelation().getShortName());

                /** for (String s : oliaIndividual) {
                        
                 relation.addProperty(NIFAnnotationProperties.oliaPropLink.getAnnotationProperty(model), model.createIndividual(s, OWL.Thing));
                 for (String oc : (List<String>) Stanford.links.get(s)) {
                 relation.addProperty(NIFAnnotationProperties.oliaCategory.getAnnotationProperty(model), oc);
                 }
                 if (((List<String>) Stanford.links.get(s)).isEmpty()) {
                 log.error("missing links for: " + s);
                 }
                 } **/

                /* Individual relation = null;//dependency.getOLiAIndividualForTag(relationType);
                        
                //in an ideal world, all used tags should also be in OLiA, this tends to be null sometimes
                if (relation == null) {
                    log.error("reltype was null for: " + relationType);
                    continue;
                }
                        
                ObjectProperty dependencyRelation = model.createObjectProperty(relation.getURI());
                //add the property from governer to dependent
                gov.addProperty(dependencyRelation, dep);
                        
                        
                Set<String> classUris = dependency.getClassURIsForTag(relationType);
                for (String cl : classUris) {
                    if (!cl.startsWith("http://purl.org/olia/stanford.owl")) {
                        continue;
                    }
                    //add the property from governer to dependent
                    ObjectProperty nn = model.createObjectProperty(cl);
                    gov.addProperty(nn, dep);
                    dependencyRelation.addSuperProperty(nn);
                        
                    //copy and transform the hierarchy
                    //removed for 2.0
                    //OLiAOntology.classHierarchy2PropertyHierarchy(dependency.getHierarchy(cl), model, "http://purl.org/olia/stanford.owl");
                }
                }*/

            }
        } //end sentences
        /**************
         * Syntax Tree
         * */

        //Tree tree = sentence.get(TreeAnnotation.class);
        //if (tree != null) {
        //removed for 2.0
        //processTree(tree, urigenerator, prefix, text, model);
        //}

    }

}