Example usage for edu.stanford.nlp.semgraph SemanticGraphEdge getDependent

List of usage examples for edu.stanford.nlp.semgraph SemanticGraphEdge getDependent

Introduction

In this page you can find the example usage for edu.stanford.nlp.semgraph SemanticGraphEdge getDependent.

Prototype

public IndexedWord getDependent() 

Source Link

Usage

From source file:ca.mcgill.cs.crown.procedure.ParseExtractor.java

License:Creative Commons License

/** 
 * Gets the candidate hypernyms form the provided subdef
 *
 * @returns a mapping from the candidate to the heuristics that generated it
 *//*from   w w w .  j  av  a  2  s .  co m*/
MultiMap<String, String> getCandidates(SemanticGraph dependencies, String subdef, POS spos_) {

    MultiMap<String, String> candidates = new HashMultiMap<String, String>();
    char sensePos = toChar(spos_);

    Collection<IndexedWord> roots = dependencies.getRoots();
    next_root: for (IndexedWord root : roots) {
        String word = root.get(TextAnnotation.class);
        String lemma = root.get(LemmaAnnotation.class);
        String pos = root.get(PartOfSpeechAnnotation.class);
        char lemmaPos = pos.substring(0, 1).toLowerCase().charAt(0);

        String lemmaLc = lemma.toLowerCase();

        //System.out.println("testing: " + lemma + "/" + pos);

        // If the lemma is a verb, check for phrasal verbal particle (e.g.,
        // "lead on", "edge out") and if present, add them to the lemma
        if (lemmaPos == 'v') {
            List<SemanticGraphEdge> edges = dependencies.outgoingEdgeList(root);
            for (SemanticGraphEdge e : edges) {
                if (e.getRelation().getShortName().equals("prt")) {
                    IndexedWord dep = e.getDependent();
                    lemma = lemma + " " + dep.get(LemmaAnnotation.class);
                    break;
                }
            }
        }

        // Heuristic 1: root matches exact POS
        if (lemmaPos == sensePos) {

            // Edge case for Heuristics 7: If the lemma is a noun and is
            // saying that this is an instance (e.g., "An instance of ..."),
            // then we take the dependent noun from instance
            //
            // Terrible example:
            //   The second of the two Books of Chronicles and the
            //   fourteenth book of the Old Testament of the Bible.
            //
            boolean foundExistentialDependent = false;
            if (lemma.equals("instance") || lemma.equals("example") || lemma.equals("first")
                    || lemma.equals("second") || lemma.equals("third") || lemma.equals("fourth")
                    || lemma.equals("fifth") || lemma.equals("sixth") || lemma.equals("series")) {
                // Check that there's actually a prepositional phrase
                // attached
                List<SemanticGraphEdge> edges = dependencies.outgoingEdgeList(root);

                for (SemanticGraphEdge e : edges) {
                    if (e.getRelation().getShortName().equals("prep")) {
                        IndexedWord dep = e.getDependent();
                        String depLemma = dep.get(LemmaAnnotation.class);
                        char depPos = dep.get(PartOfSpeechAnnotation.class).substring(0, 1).toLowerCase()
                                .charAt(0);

                        //System.out.println("HEURISTIC 7");
                        if (depPos == sensePos) {
                            candidates.put(depLemma, "Heuristic-7");
                            addSiblings(dep, candidates, sensePos, dependencies, "Heuristic-7");
                            foundExistentialDependent = true;
                        }
                    }
                }
            }
            if (foundExistentialDependent)
                continue next_root;

            // Heuristic 10: In the case of noun phrases, take the last noun
            // in the phrase, e.g., "Molten material", "pringtime snow
            // runoff"
            List<SemanticGraphEdge> edges = dependencies.outgoingEdgeList(root);
            boolean foundDependent = false;
            for (SemanticGraphEdge e : edges) {
                if (e.getRelation().getShortName().equals("dep")) {
                    IndexedWord dep = e.getDependent();
                    String depLemma = dep.get(LemmaAnnotation.class);
                    char depPos = dep.get(PartOfSpeechAnnotation.class).substring(0, 1).toLowerCase().charAt(0);

                    //System.out.println("HEURISTIC 10");
                    if (depPos == sensePos) {
                        foundDependent = true;
                        candidates.put(depLemma, "Heuristic-10");
                        addSiblings(dep, candidates, sensePos, dependencies, "Heuristic-10");
                    }
                }
            }

            if (!foundDependent) {
                //System.out.println("HEURISTIC 1");
                candidates.put(lemma, "Heuristic-1");
                addSiblings(root, candidates, sensePos, dependencies, "Heuristic-1");
            }
        }

        // Heuristic 2: subdef is either (1) one word or (2) two or more
        // word that *must be connected by a conjunction, and (3) the lemma
        // has the wrong part of speech, but could have the same POS (i.e.,
        // the lemma was probably POS-tagged incorrectly).  
        if (sensePos != lemmaPos) {

            // Only one word in the subdef, which can manifest itself as the
            // graph having no vertices! (size == 0)
            if (dependencies.size() < 1) {
                // System.out.println("HEURISTIC 2a");
                IIndexWord iword = dict.getIndexWord(lemma, spos_);
                if (iword != null)
                    candidates.put(lemma, "Heuristic-2a");
                else {
                    // Sometimes adjectves get lemmatized to a verb form
                    // which is in correct.  Check to see if the token
                    // matches
                    String token = root.get(TextAnnotation.class);
                    iword = dict.getIndexWord(token, spos_);
                    if (iword != null)
                        candidates.put(token, "Heuristic-2a");
                }
            } else {
                // System.out.println("HEURISTIC 2b");
                Set<IndexedWord> tmp = new HashSet<IndexedWord>();
                List<SemanticGraphEdge> edges = dependencies.outgoingEdgeList(root);
                for (SemanticGraphEdge e : edges) {
                    // System.out.printf("edge from %s -> %s %s%n", lemma,
                    //                   e.getRelation().getShortName(),
                    //                   e.getRelation().getLongName());
                    if (e.getRelation().getShortName().equals("conj")) {
                        if (tmp.size() == 0)
                            tmp.add(root);
                        tmp.add(e.getDependent());
                    }
                }
                if (!tmp.isEmpty()) {
                    for (IndexedWord iw : tmp) {
                        String lem = iw.get(LemmaAnnotation.class);
                        IIndexWord iword = dict.getIndexWord(lem, spos_);
                        if (iword != null)
                            candidates.put(lem, "Heuristic-2b");
                        else {
                            // Sometimes adjectves get lemmatized to a verb
                            // form which is in correct.  Check to see if
                            // the token matches
                            String token = iw.get(TextAnnotation.class);
                            iword = dict.getIndexWord(token, spos_);
                            if (iword != null)
                                candidates.put(token, "Heuristic-2b");
                        }
                    }
                    //System.out.println(tmp);
                }
            }
        }

        // Heuristics 3: the subdef is phrased as an overly-general description
        // of a person using "one", e.g., "one who does X".  Replace this with
        // "person"
        if (sensePos == 'n' && (lemma.equals("one") || lemma.equals("someone"))) {
            // check the dependency graph for a "who" attachment

            // TODO

            // ... or be lazy and just check for the token
            Matcher m = WHO.matcher(subdef);
            if (m.find()) {
                candidates.put("person", "Heuristic-3: Person");
            }
        }

        // Heuristic 4: if the root lemma is an adjective and the target
        // sense is a noun, look for a modifying a noun or set of nouns,
        // report those
        ///
        // Example: "a small, arched passageway"
        if (sensePos == 'n' && lemmaPos == 'j') {
            //System.out.println("HEURISTIC 4");
            List<SemanticGraphEdge> edges = dependencies.outgoingEdgeList(root);
            for (SemanticGraphEdge e : edges) {
                // System.out.printf("edge from %s -> %s %s%n", lemma,
                //                   e.getRelation().getShortName(),
                //                   e.getRelation().getLongName());

                if (e.getRelation().getShortName().equals("appos")
                        || e.getRelation().getShortName().equals("dep")) {
                    IndexedWord dep = e.getDependent();
                    String depLemma = dep.get(LemmaAnnotation.class);
                    // System.out.println("!!! " + depLemma);
                    char depPos = dep.get(PartOfSpeechAnnotation.class).substring(0, 1).toLowerCase().charAt(0);

                    if (depPos == sensePos) {
                        candidates.put(depLemma, "Heuristic-4: Head Noun");
                        addSiblings(dep, candidates, sensePos, dependencies, "Heuristic-4: Head Noun");
                    }
                    //break;

                }
            }

        }

        // Heuristic 5: if the root lemma is a verb and the target sense is
        // a noun, look for a subject noun
        if (sensePos == 'n' && lemmaPos == 'v') {
            List<SemanticGraphEdge> edges = dependencies.outgoingEdgeList(root);
            for (SemanticGraphEdge e : edges) {
                if (e.getRelation().getShortName().equals("nsubj")) {
                    IndexedWord dep = e.getDependent();

                    String depLemma = dep.get(LemmaAnnotation.class);
                    char depPos = dep.get(PartOfSpeechAnnotation.class).substring(0, 1).toLowerCase().charAt(0);

                    if (depPos == sensePos) {
                        candidates.put(depLemma, "Heuristic-5: Subject Noun");
                        addSiblings(dep, candidates, sensePos, dependencies, "Heuristic-5: Subject Noun");
                    }
                    break;

                }
            }
        }

        // Heuristic 6: if the root lemma is an existential quantifier or
        // something like it (e.g., "Any of ...") and
        // the target sense is a noun, look for a subject noun
        if (sensePos == 'n' && lemmaPos == 'd') {
            List<SemanticGraphEdge> edges = dependencies.outgoingEdgeList(root);
            for (SemanticGraphEdge e : edges) {
                // System.out.printf("edge from %s -> %s %s%n", lemma,
                //                    e.getRelation().getShortName(),
                //                    e.getRelation().getLongName());

                if (e.getRelation().getShortName().equals("prep")
                        || e.getRelation().getShortName().equals("dep")) {
                    IndexedWord dep = e.getDependent();

                    String depLemma = dep.get(LemmaAnnotation.class);
                    char depPos = dep.get(PartOfSpeechAnnotation.class).substring(0, 1).toLowerCase().charAt(0);

                    // System.out.println(depLemma + "/" + depPos);

                    // This should be the common case
                    if (depPos == sensePos) {
                        candidates.put(depLemma, "Heuristic-6: Existential Example");
                        addSiblings(dep, candidates, sensePos, dependencies,
                                "Heuristic-6: Existential Example");
                    }
                    // This is for some really (really) unusually parsed
                    // edge cases
                    else {
                        List<SemanticGraphEdge> depEdges = dependencies.outgoingEdgeList(dep);
                        for (SemanticGraphEdge e2 : depEdges) {

                            if (e2.getRelation().getShortName().equals("rcmod")) {
                                IndexedWord dep2 = e2.getDependent();
                                String depLemma2 = dep2.get(LemmaAnnotation.class);
                                char depPos2 = dep2.get(PartOfSpeechAnnotation.class).substring(0, 1)
                                        .toLowerCase().charAt(0);

                                if (depPos2 == sensePos) {
                                    candidates.put(depLemma2, "Heuristic-6: Existential Example");
                                    addSiblings(dep2, candidates, sensePos, dependencies,
                                            "Heuristic-6: Existential Example");
                                }
                            }
                        }
                    }
                }
            }
        }

        // Heuristic 8: if the root lemma is a verb and the sense is an
        // adjective, but the verb is modified by an adverb, this catches
        // that cases that Heuristics 2 does not
        if (sensePos == 'j' && lemmaPos == 'v') {

            Set<IndexedWord> tmp = new HashSet<IndexedWord>();
            List<SemanticGraphEdge> edges = dependencies.outgoingEdgeList(root);
            for (SemanticGraphEdge e : edges) {
                // System.out.printf("edge from %s -> %s %s%n", lemma,
                //                   e.getRelation().getShortName(),
                //                   e.getRelation().getLongName());
                if (e.getRelation().getShortName().equals("advmod")) {
                    IIndexWord iword = dict.getIndexWord(lemma, spos_);
                    if (iword != null)
                        candidates.put(lemma, "Heuristic-8: Adv-modified Verb");
                    else {
                        // Sometimes adjectves get lemmatized to a verb
                        // form which is in correct.  Check to see if
                        // the token matches
                        String token = root.get(TextAnnotation.class);
                        iword = dict.getIndexWord(token, spos_);
                        if (iword != null)
                            candidates.put(token, "Heuristic-8: Adv-modified Verb");
                    }
                }
            }
        }

        // Heuristic 9: if the sense is an adjective and the root lemma
        // begins with with a negative *and* the gloss contains something
        // like "not [x]", then pull out the "x" and use it as the hypernym
        if (sensePos == 'j' && lemma.equals("not")) {
            List<SemanticGraphEdge> edges = dependencies.outgoingEdgeList(root);
            for (SemanticGraphEdge e : edges) {
                // System.out.printf("edge from %s -> %s %s%n", lemma,
                //                    e.getRelation().getShortName(),
                //                    e.getRelation().getLongName());

                if (e.getRelation().getShortName().equals("dep")) {
                    IndexedWord dep = e.getDependent();

                    String depLemma = dep.get(LemmaAnnotation.class);
                    char depPos = dep.get(PartOfSpeechAnnotation.class).substring(0, 1).toLowerCase().charAt(0);

                    if (depPos == sensePos) {
                        candidates.put(depLemma, "Heuristic-9: negated adj");
                        addSiblings(dep, candidates, sensePos, dependencies, "Heuristic-9: negated adj");
                    }
                    break;

                }
            }
        }

        // Heuristic 11: if the sense is a verb and the root lemma
        // is "to", this is probably a case of mistaken POS-tagging
        if (sensePos == 'v' && lemma.equals("to")) {
            List<SemanticGraphEdge> edges = dependencies.outgoingEdgeList(root);
            for (SemanticGraphEdge e : edges) {
                if (e.getRelation().getShortName().equals("pobj")) {
                    IndexedWord dep = e.getDependent();
                    IIndexWord iword = dict.getIndexWord(lemma, spos_);
                    if (iword != null)
                        candidates.put(lemma, "Heuristic-11: verbal infinitive");
                    else {
                        // Sometimes verbs get lemmatized to a noun form
                        // that is incorrect.  Check to see if the token
                        // matches
                        String token = dep.get(TextAnnotation.class);
                        iword = dict.getIndexWord(token, spos_);
                        if (iword != null)
                            candidates.put(token, "Heuristic-9: verbal infinitive");
                    }
                }
            }
        }

    }
    return candidates;
}

From source file:ca.mcgill.cs.crown.procedure.ParseExtractor.java

License:Creative Commons License

/**
 * If we know we want {@code toAdd}, get all of its siblings that are joined
 * by conjunctions as candidates too/*  w w w.j  a va  2s  .c  om*/
 */
void addSiblings(IndexedWord toAdd, MultiMap<String, String> candidates, char targetPos, SemanticGraph parse,
        String reason) {
    List<SemanticGraphEdge> edges = parse.outgoingEdgeList(toAdd);
    for (SemanticGraphEdge e : edges) {
        if (e.getRelation().getShortName().equals("conj")) {
            IndexedWord dep = e.getDependent();
            String depLemma = dep.get(LemmaAnnotation.class);
            char depPos = dep.get(PartOfSpeechAnnotation.class).substring(0, 1).toLowerCase().charAt(0);
            if (targetPos == depPos) {
                if (targetPos != 'v') {
                    candidates.put(depLemma, reason + " (In conjunction)");
                }
                // Check for phrasal verb particles
                else {
                    List<SemanticGraphEdge> depEdges = parse.outgoingEdgeList(dep);
                    for (SemanticGraphEdge e2 : depEdges) {
                        if (e2.getRelation().getShortName().equals("prt")) {
                            IndexedWord dep2 = e.getDependent();
                            depLemma = depLemma + " " + dep2.get(LemmaAnnotation.class);
                            break;
                        }
                    }
                }
            }
        }
    }
}

From source file:de.tudarmstadt.ukp.dkpro.core.corenlp.internal.CoreNlp2DKPro.java

License:Open Source License

public static void convertDependencies(JCas aJCas, Annotation document, MappingProvider mappingProvider,
        boolean internStrings) {
    for (CoreMap s : document.get(SentencesAnnotation.class)) {
        SemanticGraph graph = s.get(CollapsedDependenciesAnnotation.class);
        //SemanticGraph graph = s.get(EnhancedDependenciesAnnotation.class);

        // If there are no dependencies for this sentence, skip it. Might well mean we
        // skip all sentences because normally either there are dependencies for all or for
        // none./* www  .  j a  v  a 2 s .  c  o m*/
        if (graph == null) {
            continue;
        }

        for (IndexedWord root : graph.getRoots()) {
            Dependency dep = new ROOT(aJCas);
            dep.setDependencyType("root");
            dep.setDependent(root.get(TokenKey.class));
            dep.setGovernor(root.get(TokenKey.class));
            dep.setBegin(dep.getDependent().getBegin());
            dep.setEnd(dep.getDependent().getEnd());
            dep.setFlavor(DependencyFlavor.BASIC);
            dep.addToIndexes();
        }

        for (SemanticGraphEdge edge : graph.edgeListSorted()) {
            Token dependent = edge.getDependent().get(TokenKey.class);
            Token governor = edge.getGovernor().get(TokenKey.class);

            // For the type mapping, we use getShortName() instead, because the <specific>
            // actually doesn't change the relation type
            String labelUsedForMapping = edge.getRelation().getShortName();

            // The nndepparser may produce labels in which the shortName contains a colon.
            // These represent language-specific labels of the UD, cf: 
            // http://universaldependencies.github.io/docs/ext-dep-index.html
            labelUsedForMapping = StringUtils.substringBefore(labelUsedForMapping, ":");

            // Need to use toString() here to get "<shortname>_<specific>"
            String actualLabel = edge.getRelation().toString();

            Type depRel = mappingProvider.getTagType(labelUsedForMapping);
            Dependency dep = (Dependency) aJCas.getCas().createFS(depRel);
            dep.setDependencyType(internStrings ? actualLabel.intern() : actualLabel);
            dep.setDependent(dependent);
            dep.setGovernor(governor);
            dep.setBegin(dep.getDependent().getBegin());
            dep.setEnd(dep.getDependent().getEnd());
            dep.setFlavor(edge.isExtra() ? DependencyFlavor.ENHANCED : DependencyFlavor.BASIC);
            dep.addToIndexes();
        }
    }
}

From source file:edu.cmu.deiis.annotator.StanfordCoreNLPAnnotator.java

License:Open Source License

@Override
public void process(JCas jCas) throws AnalysisEngineProcessException {
    Annotation document = this.processor.process(jCas.getDocumentText());

    String lastNETag = "O";
    int lastNEBegin = -1;
    int lastNEEnd = -1;
    for (CoreMap tokenAnn : document.get(TokensAnnotation.class)) {

        // create the token annotation
        int begin = tokenAnn.get(CharacterOffsetBeginAnnotation.class);
        int end = tokenAnn.get(CharacterOffsetEndAnnotation.class);
        String pos = tokenAnn.get(PartOfSpeechAnnotation.class);
        String lemma = tokenAnn.get(LemmaAnnotation.class);
        Token token = new Token(jCas, begin, end);
        token.setPos(pos);//from   w  ww . j  av  a 2 s.c  o  m
        token.setLemma(lemma);
        token.addToIndexes();

        // hackery to convert token-level named entity tag into phrase-level tag
        String neTag = tokenAnn.get(NamedEntityTagAnnotation.class);
        if (neTag.equals("O") && !lastNETag.equals("O")) {
            NamedEntityMention ne = new NamedEntityMention(jCas, lastNEBegin, lastNEEnd);
            ne.setMentionType(lastNETag);
            ne.addToIndexes();
        } else {
            if (lastNETag.equals("O")) {
                lastNEBegin = begin;
            } else if (lastNETag.equals(neTag)) {
                // do nothing - begin was already set
            } else {
                NamedEntityMention ne = new NamedEntityMention(jCas, lastNEBegin, lastNEEnd);
                ne.setMentionType(lastNETag);
                ne.addToIndexes();
                lastNEBegin = begin;
            }
            lastNEEnd = end;
        }
        lastNETag = neTag;
    }
    if (!lastNETag.equals("O")) {
        NamedEntityMention ne = new NamedEntityMention(jCas, lastNEBegin, lastNEEnd);
        ne.setMentionType(lastNETag);
        ne.addToIndexes();
    }

    // add sentences and trees
    for (CoreMap sentenceAnn : document.get(SentencesAnnotation.class)) {

        // add the sentence annotation
        int sentBegin = sentenceAnn.get(CharacterOffsetBeginAnnotation.class);
        int sentEnd = sentenceAnn.get(CharacterOffsetEndAnnotation.class);
        Sentence sentence = new Sentence(jCas, sentBegin, sentEnd);
        sentence.addToIndexes();

        // add the syntactic tree annotation
        List<CoreLabel> tokenAnns = sentenceAnn.get(TokensAnnotation.class);
        Tree tree = sentenceAnn.get(TreeAnnotation.class);
        if (tree.children().length != 1) {
            throw new RuntimeException("Expected single root node, found " + tree);
        }
        tree = tree.firstChild();
        tree.indexSpans(0);
        TopTreebankNode root = new TopTreebankNode(jCas);
        root.setTreebankParse(tree.toString());
        // TODO: root.setTerminals(v)
        this.addTreebankNodeToIndexes(root, jCas, tree, tokenAnns);

        // get the dependencies
        SemanticGraph dependencies = sentenceAnn.get(CollapsedCCProcessedDependenciesAnnotation.class);

        // convert Stanford nodes to UIMA annotations
        List<Token> tokens = JCasUtil.selectCovered(jCas, Token.class, sentence);
        Map<IndexedWord, DependencyNode> stanfordToUima = new HashMap<IndexedWord, DependencyNode>();
        for (IndexedWord stanfordNode : dependencies.vertexSet()) {
            int indexBegin = stanfordNode.get(BeginIndexAnnotation.class);
            int indexEnd = stanfordNode.get(EndIndexAnnotation.class);
            int tokenBegin = tokens.get(indexBegin).getBegin();
            int tokenEnd = tokens.get(indexEnd - 1).getEnd();
            DependencyNode node;
            if (dependencies.getRoots().contains(stanfordNode)) {
                node = new TopDependencyNode(jCas, tokenBegin, tokenEnd);
            } else {
                node = new DependencyNode(jCas, tokenBegin, tokenEnd);
            }
            stanfordToUima.put(stanfordNode, node);
        }

        // create relation annotations for each Stanford dependency
        ArrayListMultimap<DependencyNode, DependencyRelation> headRelations = ArrayListMultimap.create();
        ArrayListMultimap<DependencyNode, DependencyRelation> childRelations = ArrayListMultimap.create();
        for (SemanticGraphEdge stanfordEdge : dependencies.edgeIterable()) {
            DependencyRelation relation = new DependencyRelation(jCas);
            DependencyNode head = stanfordToUima.get(stanfordEdge.getGovernor());
            DependencyNode child = stanfordToUima.get(stanfordEdge.getDependent());
            String relationType = stanfordEdge.getRelation().toString();
            if (head == null || child == null || relationType == null) {
                throw new RuntimeException(String.format(
                        "null elements not allowed in relation:\nrelation=%s\nchild=%s\nhead=%s\n", relation,
                        child, head));
            }
            relation.setHead(head);
            relation.setChild(child);
            relation.setRelation(relationType);
            relation.addToIndexes();
            headRelations.put(child, relation);
            childRelations.put(head, relation);
        }

        // set the relations for each node annotation
        for (DependencyNode node : stanfordToUima.values()) {
            List<DependencyRelation> heads = headRelations.get(node);
            node.setHeadRelations(new FSArray(jCas, heads == null ? 0 : heads.size()));
            if (heads != null) {
                FSCollectionFactory.fillArrayFS(node.getHeadRelations(), heads);
            }
            List<DependencyRelation> children = childRelations.get(node);
            node.setChildRelations(new FSArray(jCas, children == null ? 0 : children.size()));
            if (children != null) {
                FSCollectionFactory.fillArrayFS(node.getChildRelations(), children);
            }
            node.addToIndexes();
        }
    }

    // map from spans to named entity mentions
    Map<Span, NamedEntityMention> spanMentionMap = new HashMap<Span, NamedEntityMention>();
    for (NamedEntityMention mention : JCasUtil.select(jCas, NamedEntityMention.class)) {
        spanMentionMap.put(new Span(mention.getBegin(), mention.getEnd()), mention);
    }

    // add mentions for all entities identified by the coreference system
    List<NamedEntity> entities = new ArrayList<NamedEntity>();
    List<List<Token>> sentenceTokens = new ArrayList<List<Token>>();
    for (Sentence sentence : JCasUtil.select(jCas, Sentence.class)) {
        sentenceTokens.add(JCasUtil.selectCovered(jCas, Token.class, sentence));
    }
    Map<Integer, CorefChain> corefChains = document.get(CorefChainAnnotation.class);
    for (CorefChain chain : corefChains.values()) {
        List<NamedEntityMention> mentions = new ArrayList<NamedEntityMention>();
        for (CorefMention corefMention : chain.getMentionsInTextualOrder()) {

            // figure out the character span of the token
            List<Token> tokens = sentenceTokens.get(corefMention.sentNum - 1);
            int begin = tokens.get(corefMention.startIndex - 1).getBegin();
            int end = tokens.get(corefMention.endIndex - 2).getEnd();

            // use an existing named entity mention when possible; otherwise create a new one
            NamedEntityMention mention = spanMentionMap.get(new Span(begin, end));
            if (mention == null) {
                mention = new NamedEntityMention(jCas, begin, end);
                mention.addToIndexes();
            }
            mentions.add(mention);
        }

        // create an entity for the mentions
        Collections.sort(mentions, new Comparator<NamedEntityMention>() {
            @Override
            public int compare(NamedEntityMention m1, NamedEntityMention m2) {
                return m1.getBegin() - m2.getBegin();
            }
        });

        // create mentions and add them to entity
        NamedEntity entity = new NamedEntity(jCas);
        entity.setMentions(new FSArray(jCas, mentions.size()));
        int index = 0;
        for (NamedEntityMention mention : mentions) {
            mention.setMentionedEntity(entity);
            entity.setMentions(index, mention);
            index += 1;
        }
        entities.add(entity);
    }

    // add singleton entities for any named entities not picked up by coreference system
    for (NamedEntityMention mention : JCasUtil.select(jCas, NamedEntityMention.class)) {
        if (mention.getMentionedEntity() == null) {
            NamedEntity entity = new NamedEntity(jCas);
            entity.setMentions(new FSArray(jCas, 1));
            entity.setMentions(0, mention);
            mention.setMentionedEntity(entity);
            entity.getMentions();
            entities.add(entity);
        }
    }

    // sort entities by document order
    Collections.sort(entities, new Comparator<NamedEntity>() {
        @Override
        public int compare(NamedEntity o1, NamedEntity o2) {
            return getFirstBegin(o1) - getFirstBegin(o2);
        }

        private int getFirstBegin(NamedEntity entity) {
            int min = Integer.MAX_VALUE;
            for (NamedEntityMention mention : JCasUtil.select(entity.getMentions(), NamedEntityMention.class)) {
                if (mention.getBegin() < min) {
                    min = mention.getBegin();
                }
            }
            return min;
        }
    });

    // add entities to document
    for (NamedEntity entity : entities) {
        entity.addToIndexes();
    }

}

From source file:edu.cmu.deiis.annotators.StanfordAnnotator.java

License:Open Source License

@Override
public void process(JCas jCas) throws AnalysisEngineProcessException {
    Annotation document = this.processor.process(jCas.getDocumentText());

    String lastNETag = "O";
    int lastNEBegin = -1;
    int lastNEEnd = -1;
    for (CoreMap tokenAnn : document.get(TokensAnnotation.class)) {

        // create the token annotation
        int begin = tokenAnn.get(CharacterOffsetBeginAnnotation.class);
        int end = tokenAnn.get(CharacterOffsetEndAnnotation.class);
        String pos = tokenAnn.get(PartOfSpeechAnnotation.class);
        String lemma = tokenAnn.get(LemmaAnnotation.class);
        Token token = new Token(jCas, begin, end);
        token.setPos(pos);//from  w w w  .ja v  a2s  .  c o  m
        token.setLemma(lemma);
        token.addToIndexes();

        // hackery to convert token-level named entity tag into phrase-level tag
        String neTag = tokenAnn.get(NamedEntityTagAnnotation.class);
        if (neTag.equals("O") && !lastNETag.equals("O")) {
            NamedEntityMention ne = new NamedEntityMention(jCas, lastNEBegin, lastNEEnd);
            ne.setMentionType(lastNETag);
            ne.addToIndexes();
        } else {
            if (lastNETag.equals("O")) {
                lastNEBegin = begin;
            } else if (lastNETag.equals(neTag)) {
                // do nothing - begin was already set
            } else {
                NamedEntityMention ne = new NamedEntityMention(jCas, lastNEBegin, lastNEEnd);
                ne.setMentionType(lastNETag);
                ne.addToIndexes();
                lastNEBegin = begin;
            }
            lastNEEnd = end;
        }
        lastNETag = neTag;
    }
    if (!lastNETag.equals("O")) {
        NamedEntityMention ne = new NamedEntityMention(jCas, lastNEBegin, lastNEEnd);
        ne.setMentionType(lastNETag);
        ne.addToIndexes();
    }

    // add sentences and trees
    for (CoreMap sentenceAnn : document.get(SentencesAnnotation.class)) {

        // add the sentence annotation
        int sentBegin = sentenceAnn.get(CharacterOffsetBeginAnnotation.class);
        int sentEnd = sentenceAnn.get(CharacterOffsetEndAnnotation.class);
        Sentence sentence = new Sentence(jCas, sentBegin, sentEnd);
        sentence.addToIndexes();

        // add the syntactic tree annotation
        List<CoreLabel> tokenAnns = sentenceAnn.get(TokensAnnotation.class);
        Tree tree = sentenceAnn.get(TreeAnnotation.class);
        if (tree.children().length != 1) {
            throw new RuntimeException("Expected single root node, found " + tree);
        }
        tree = tree.firstChild();
        tree.indexSpans(0);
        TopTreebankNode root = new TopTreebankNode(jCas);
        root.setTreebankParse(tree.toString());
        // TODO: root.setTerminals(v)
        this.addTreebankNodeToIndexes(root, jCas, tree, tokenAnns);

        // get the dependencies
        SemanticGraph dependencies = sentenceAnn.get(CollapsedCCProcessedDependenciesAnnotation.class);

        // convert Stanford nodes to UIMA annotations
        List<Token> tokens = JCasUtil.selectCovered(jCas, Token.class, sentence);
        Map<IndexedWord, DependencyNode> stanfordToUima = new HashMap<IndexedWord, DependencyNode>();
        for (IndexedWord stanfordNode : dependencies.vertexSet()) {
            int indexBegin = stanfordNode.get(BeginIndexAnnotation.class);
            int indexEnd = stanfordNode.get(EndIndexAnnotation.class);
            int tokenBegin = tokens.get(indexBegin).getBegin();
            int tokenEnd = tokens.get(indexEnd - 1).getEnd();
            DependencyNode node;
            if (dependencies.getRoots().contains(stanfordNode)) {
                node = new TopDependencyNode(jCas, tokenBegin, tokenEnd);
            } else {
                node = new DependencyNode(jCas, tokenBegin, tokenEnd);
            }
            stanfordToUima.put(stanfordNode, node);
        }

        // create relation annotations for each Stanford dependency
        ArrayListMultimap<DependencyNode, DependencyRelation> headRelations = ArrayListMultimap.create();
        ArrayListMultimap<DependencyNode, DependencyRelation> childRelations = ArrayListMultimap.create();
        for (SemanticGraphEdge stanfordEdge : dependencies.edgeIterable()) {
            DependencyRelation relation = new DependencyRelation(jCas);
            DependencyNode head = stanfordToUima.get(stanfordEdge.getGovernor());
            DependencyNode child = stanfordToUima.get(stanfordEdge.getDependent());
            String relationType = stanfordEdge.getRelation().toString();
            if (head == null || child == null || relationType == null) {
                throw new RuntimeException(String.format(
                        "null elements not allowed in relation:\nrelation=%s\nchild=%s\nhead=%s\n", relation,
                        child, head));
            }
            relation.setHead(head);
            relation.setChild(child);
            relation.setRelation(relationType);
            relation.addToIndexes();
            headRelations.put(child, relation);
            childRelations.put(head, relation);
        }

        // set the relations for each node annotation
        for (DependencyNode node : stanfordToUima.values()) {
            List<DependencyRelation> heads = headRelations.get(node);
            node.setHeadRelations(new FSArray(jCas, heads == null ? 0 : heads.size()));
            if (heads != null) {
                FSCollectionFactory.fillArrayFS(node.getHeadRelations(), heads);
            }
            List<DependencyRelation> children = childRelations.get(node);
            node.setChildRelations(new FSArray(jCas, children == null ? 0 : children.size()));
            if (children != null) {
                FSCollectionFactory.fillArrayFS(node.getChildRelations(), children);
            }
            node.addToIndexes();
        }
    }

    // map from spans to named entity mentions
    Map<Span, NamedEntityMention> spanMentionMap = new HashMap<Span, NamedEntityMention>();
    for (NamedEntityMention mention : JCasUtil.select(jCas, NamedEntityMention.class)) {
        spanMentionMap.put(new Span(mention.getBegin(), mention.getEnd()), mention);
    }

    // add mentions for all entities identified by the coreference system
    List<NamedEntity> entities = new ArrayList<NamedEntity>();
    List<List<Token>> sentenceTokens = new ArrayList<List<Token>>();
    for (Sentence sentence : JCasUtil.select(jCas, Sentence.class)) {
        sentenceTokens.add(JCasUtil.selectCovered(jCas, Token.class, sentence));
    }
    Map<Integer, CorefChain> corefChains = document.get(CorefChainAnnotation.class);
    for (CorefChain chain : corefChains.values()) {
        List<NamedEntityMention> mentions = new ArrayList<NamedEntityMention>();
        for (CorefMention corefMention : chain.getMentionsInTextualOrder()) {

            // figure out the character span of the token
            List<Token> tokens = sentenceTokens.get(corefMention.sentNum - 1);
            int begin = tokens.get(corefMention.startIndex - 1).getBegin();
            int end = tokens.get(corefMention.endIndex - 2).getEnd();

            // use an existing named entity mention when possible; otherwise create a new one
            NamedEntityMention mention = spanMentionMap.get(new Span(begin, end));
            if (mention == null) {
                mention = new NamedEntityMention(jCas, begin, end);
                //String line = mention.getCoveredText();
                //System.out.println(line);
                mention.addToIndexes();
            }
            mentions.add(mention);
        }

        // create an entity for the mentions
        Collections.sort(mentions, new Comparator<NamedEntityMention>() {
            @Override
            public int compare(NamedEntityMention m1, NamedEntityMention m2) {
                return m1.getBegin() - m2.getBegin();
            }
        });

        // create mentions and add them to entity
        NamedEntity entity = new NamedEntity(jCas);
        entity.setMentions(new FSArray(jCas, mentions.size()));
        int index = 0;
        for (NamedEntityMention mention : mentions) {
            mention.setMentionedEntity(entity);
            entity.setMentions(index, mention);
            index += 1;
        }
        entities.add(entity);
    }

    // add singleton entities for any named entities not picked up by coreference system
    for (NamedEntityMention mention : JCasUtil.select(jCas, NamedEntityMention.class)) {
        if (mention.getMentionedEntity() == null) {
            NamedEntity entity = new NamedEntity(jCas);
            entity.setMentions(new FSArray(jCas, 1));
            entity.setMentions(0, mention);
            mention.setMentionedEntity(entity);
            entity.getMentions();
            entities.add(entity);
        }
    }

    // sort entities by document order
    Collections.sort(entities, new Comparator<NamedEntity>() {
        @Override
        public int compare(NamedEntity o1, NamedEntity o2) {
            return getFirstBegin(o1) - getFirstBegin(o2);
        }

        private int getFirstBegin(NamedEntity entity) {
            int min = Integer.MAX_VALUE;
            for (NamedEntityMention mention : JCasUtil.select(entity.getMentions(), NamedEntityMention.class)) {
                if (mention.getBegin() < min) {
                    min = mention.getBegin();
                }
            }
            return min;
        }
    });

    // add entities to document
    for (NamedEntity entity : entities) {
        //NamedEntityMention mention=entity.getMentions(3);
        //System.out.println(mention.getBegin());
        entity.addToIndexes();
    }

}

From source file:edu.csupomona.nlp.util.Sentence2Clause.java

public void process(String text) {
    // create an empty Annotation just with the given text
    Annotation document = new Annotation(text);

    // run all Annotators on this text
    pipeline.annotate(document);/*from  w w w  .  j  a v a 2  s. c  o m*/

    List<CoreMap> sentences = document.get(SentencesAnnotation.class);

    for (CoreMap sentence : sentences) {
        // this is the Stanford dependency graph of the current sentence
        SemanticGraph dependencies = sentence.get(BasicDependenciesAnnotation.class);
        System.out.println(dependencies.toString("plain"));

        for (SemanticGraphEdge edge : dependencies.getEdgeSet()) {
            System.out.println(edge.getRelation().getShortName() + ": " + edge.getGovernor().value() + "("
                    + edge.getGovernor().index() + ") => " + edge.getDependent().value() + "("
                    + edge.getDependent().index() + ")");

        }

    }
}

From source file:edu.nus.comp.nlp.stanford.UtilParser.java

License:Open Source License

public static DefaultMutableTreeNode toDMTree(IndexedWord root, SemanticGraph dependencies) {

    if (root == null) {
        root = dependencies.getFirstRoot();
    }/*ww w.j a v a 2s  .c  o m*/

    DefaultMutableTreeNode node = new DefaultMutableTreeNode();

    String nodeContent = root.value();

    for (SemanticGraphEdge edge : dependencies.edgeIterable()) {
        if (edge.getDependent().equals(root)) {
            nodeContent = "<-" + edge.getRelation() + "- " + nodeContent;
            break;
        }
    }

    node.setUserObject(nodeContent);
    for (IndexedWord c : dependencies.getChildList(root)) {
        DefaultMutableTreeNode n = toDMTree(c, dependencies);
        node.add(n);
    }
    return node;
}

From source file:ims.cs.parc.ParcUtils.java

License:Open Source License

/**
 * Compute cached dependency paths using Floyd Warshall
 * @param dependencies/*from w ww.  j  av a 2s .  c  o m*/
 * @return
 */
public static FloydWarshallShortestPaths computeFloydWarshallSGE(List<SemanticGraphEdge> dependencies) {
    SimpleDirectedGraph<IndexedWord, IndexedEdge> graph = new SimpleDirectedGraph<IndexedWord, IndexedEdge>(
            IndexedEdge.class);
    int edgeId = 0;
    for (SemanticGraphEdge dep : dependencies) {
        graph.addVertex(dep.getGovernor());
        graph.addVertex(dep.getDependent());
        graph.addEdge(dep.getGovernor(), dep.getDependent(), new IndexedEdge(dep.getRelation(), edgeId));
    }
    return new FloydWarshallShortestPaths(graph);
}

From source file:ims.cs.qsample.features.components.SentenceDependencyFeatures.java

License:Open Source License

/**
 * Add features about the child of a token
 * @param pcToken//from  ww w. ja v a 2s .com
 */
private static void addChildFeatures(Token pcToken) {
    List<SemanticGraphEdge> childEdgeList = Helper.getDependencyChildrenRels(pcToken);
    FeatureSet fs = pcToken.boundaryFeatureSet;

    if (childEdgeList != null) {
        for (SemanticGraphEdge childEdge : childEdgeList) {
            // plain child
            if (StaticConfig.dependencyChildRel)
                fs.add(CHILD_REL_PREFIX + "=" + childEdge.getRelation());

            // child and relation label
            if (StaticConfig.dependencyChildRelHead)
                fs.add(CHILD_RELHEAD_PREFIX + "=" + childEdge.getRelation() + ","
                        + childEdge.getDependent().lemma());
        }
    }
}

From source file:opendial.bn.values.RelationalVal.java

License:Open Source License

public void addGraph(SemanticGraph newGraph) {
    int oldGraphSize = graph.size();
    for (IndexedWord iw : newGraph.vertexListSorted()) {
        IndexedWord copy = new IndexedWord(iw);
        copy.setIndex(graph.size());/*from w  w w.  java 2  s .co m*/
        graph.addVertex(copy);
    }
    for (SemanticGraphEdge edge : newGraph.edgeListSorted()) {
        int dep = edge.getDependent().index() + oldGraphSize;
        int gov = edge.getGovernor().index() + oldGraphSize;
        GrammaticalRelation rel = edge.getRelation();
        addEdge(gov, dep, rel.getLongName());
    }
    cachedHashCode = 0;
}