Example usage for edu.stanford.nlp.trees Tree label

Introduction

In this page you can find the example usage for edu.stanford.nlp.trees Tree label.

Prototype

@Override
public Label label()

Source Link

Document

Returns the label associated with the current node, or null if there is no label.

Usage

From source file:englishparser.EnglishParser.java

private static boolean isNNP(Tree t) {
    //        return t.label().value().equals("NNS") || t.label().value().equals("NP") || t.label().value().equals("NN");
    return t.label().value().equals("NNP");
}

From source file:ims.cs.parc.ParcUtils.java

License:Open Source License

/**
 * Find all head verbs in the corpus. The algorithm is taken from Pareti (2015).
 * @param sentence//from w w w  .  j  a v a  2 s  . c om
 */
public static void markHeadVerbs(Sentence sentence) {

    for (Tree tree : sentence.tree.preOrderNodeList()) {
        if (tree.label().value().equals("VP")) {
            boolean valid = true;
            for (Tree child : tree.children()) {
                if (child.label().value().equals("VP")) {
                    valid = false;
                    break;
                }
            }

            if (valid) {
                for (Tree child : tree.children()) {
                    if (child.firstChild().isLeaf() && child.label().value().startsWith("V")) {
                        Token token = sentence.treeLookup.get(child.firstChild());
                        if (token != null)
                            token.isHeadVerb = true;
                    }
                }
            }
        }
    }
}

From source file:ims.cs.qsample.features.components.SentenceConstituentFeatures.java

License:Open Source License

/**
 * Recursion step for tree featues//from  w w  w  .  jav a2s .com
 * @param sentence
 * @param t complete tree
 * @param level current level
 * @param governingLabels list of governing labels
 * @param parent information about direct parent
 * @param isLeftmost is the node the leftmost one in the constituent specified by ancestorWhereLeftmost
 * @param ancestorWhereLeftmost
 */
private static void addTreeFeatures(Sentence sentence, Tree t, int level, List<NodeFeatures> governingLabels,
        NodeFeatures parent, boolean isLeftmost, NodeFeatures ancestorWhereLeftmost) {

    if (t.isLeaf()) { /* terminal nodes */
        // get the current token represented by this subtree
        Token pToken = sentence.treeLookup.get(t);

        // check if token is null. this can happen if the token was unaligned previously (e.g., because of
        // a parser error)
        if (pToken == null) {
            if (StaticConfig.verbose)
                System.err.println(sentence.sentenceId + " Dropping tree without associated token: " + t + " ");
            return;
        }

        FeatureSet fs = pToken.boundaryFeatureSet;

        // leftmost feature (see Pareti paper for description)
        if (StaticConfig.constituentLeftmost && isLeftmost)
            fs.add(LEFTMOST_FEATURE);

        // level in tree
        if (StaticConfig.constituentLevel) {
            fs.add(LEVEL_FEATURE + level);
            addLevelBinHeuristic(pToken, LEVEL_FEATURE, level);
        }

        // leftmost feature label
        if (StaticConfig.constituentAncestorL) {
            fs.add(AL_FEATURE + "LBL:" + ancestorWhereLeftmost.label);
            fs.add(AL_FEATURE + "LVL:" + ancestorWhereLeftmost.level);

            addLevelBinHeuristic(pToken, AL_FEATURE + "LVL", ancestorWhereLeftmost.level);
        }

        // parent in constituent tree
        if (StaticConfig.constituentParent) {
            fs.add(PARENT_FEATURE + "LBL:" + parent.label);
        }

        // labels of all ancestors
        if (StaticConfig.constituentGoverning) { /* "Ancestor" features in the paper */
            for (NodeFeatures nf : governingLabels) {
                // label with and without depth
                fs.add(GOV_FEATURE + nf.label + "@" + nf.level); /* ambiguous in paper */
                fs.add(GOV_FEATURE + nf.label);
                fs.add(GOV_FEATURE + nf.label + "@-" + (level - nf.level)); /* ambiguous in paper */

                addLevelBinHeuristic(pToken, GOV_FEATURE + nf.label + "@", nf.level);
                addLevelBinHeuristic(pToken, GOV_FEATURE + nf.label + "@-", (level - nf.level));
            }
        }
    } else { // non-terminal node
        List<Tree> childList = t.getChildrenAsList();
        String label = t.label().toString();

        // copy governing node features for next recursion step
        List<NodeFeatures> governingLabelsUpdate = new LinkedList<NodeFeatures>(governingLabels);
        governingLabelsUpdate.add(new NodeFeatures(label, level));

        // set leftmost ancestor
        if (ancestorWhereLeftmost == null) {
            ancestorWhereLeftmost = new NodeFeatures(label, level);
        }

        // check for pre-terminals -- otherwise, set the leftmost flag for the first constituent
        if (childList.size() > 1) {
            isLeftmost = true;
        }

        // call function for all children
        for (Tree child : childList) {
            addTreeFeatures(sentence, child, level + 1, governingLabelsUpdate, new NodeFeatures(label, level),
                    isLeftmost, ancestorWhereLeftmost);
            isLeftmost = false;
            ancestorWhereLeftmost = null;
        }
    }
}

From source file:info.mhaas.ma.Evaluation.CCEvaluator.java

private static boolean labelMatches(Tree gold, Tree predicted) {
    Label goldLabel = gold.label();
    Label predLabel = predicted.label();
    return collapseFineGrained(goldLabel) == collapseFineGrained(predLabel);

}

From source file:it.uniud.ailab.dcore.wrappers.external.StanfordBootstrapperAnnotator.java

License:Open Source License

/**
 * Annotate the document by splitting the document, tokenizing it,
 * performing PoS tagging and Named Entity Recognition using the Stanford
 * Core NLP tools./*from   w w w  .  ja v  a  2  s.  c  o  m*/
 *
 * @param component the component to annotate.
 */
@Override
public void annotate(Blackboard blackboard, DocumentComponent component) {

    if (pipeline == null) {
        // creates a StanfordCoreNLP object, with POS tagging, lemmatization, 
        //NER, parsing, and coreference resolution 
        Properties props = new Properties();
        props.put("annotators", "tokenize, ssplit, pos, parse, lemma, ner, dcoref");
        pipeline = new StanfordCoreNLP(props);

    }

    // read some text in the text variable
    String text = component.getText();

    // create an empty Annotation just with the given text
    Annotation document = new Annotation(text);

    // run all Annotators on this text
    pipeline.annotate(document);

    //get the graph for coreference resolution
    Map<Integer, CorefChain> graph = document.get(CorefCoreAnnotations.CorefChainAnnotation.class);

    //prepare the map for coreference graph of document
    Map<String, Collection<Set<CorefChain.CorefMention>>> coreferenceGraph = new HashMap<>();

    for (CorefChain corefChain : graph.values()) {

        //get the representative mention, that is the word recall in other sentences
        CorefChain.CorefMention cm = corefChain.getRepresentativeMention();

        //eliminate auto-references
        if (corefChain.getMentionMap().size() <= 1) {
            continue;
        }

        //get the stemmed form of the references, so the comparison with 
        //grams will be easier
        List<CoreLabel> tks = document.get(SentencesAnnotation.class).get(cm.sentNum - 1)
                .get(TokensAnnotation.class);
        //list of tokens which compose the anaphor

        List<Token> anaphorsTokens = new ArrayList<>();
        for (int i = cm.startIndex - 1; i < cm.endIndex - 1; i++) {
            CoreLabel current = tks.get(i);
            Token t = new Token(current.word());
            t.setPoS(current.tag());
            t.setLemma(current.lemma());
            anaphorsTokens.add(t);
        }

        //the mention n-gram which is formed by the anaphor and a 
        //list of references
        Mention mention = new Mention(cm.mentionSpan, anaphorsTokens, cm.mentionSpan);

        //get map of the references to the corefchain obj
        Collection<Set<CorefChain.CorefMention>> mentionMap = corefChain.getMentionMap().values();
        for (Set<CorefChain.CorefMention> mentions : mentionMap) {

            for (CorefChain.CorefMention reference : mentions) {
                //eliminate self-references
                if (reference.mentionSpan.equalsIgnoreCase(cm.mentionSpan)) {
                    continue;
                }
                List<CoreLabel> tokens = document.get(SentencesAnnotation.class).get(reference.sentNum - 1)
                        .get(TokensAnnotation.class);

                //list of tokens which compose the mention
                List<Token> mentionTokens = new ArrayList<>();
                for (int i = reference.startIndex - 1; i < reference.endIndex - 1; i++) {
                    CoreLabel current = tokens.get(i);
                    //set token features 
                    Token t = new Token(current.word());
                    t.setPoS(current.tag());
                    t.setLemma(current.lemma());
                    mentionTokens.add(t);
                }
                //add to mention a new reference
                mention.addReference(reference.mentionSpan, mentionTokens, reference.mentionType.toString());
            }
        }

        //assign to the document a new corenference obj
        //containing the anaphor and its mentions 
        blackboard.addGram(mention);
    }

    // these are all the sentences in this document
    // a CoreMap is essentially a Map that uses class objects as keys and 
    //has values with custom types
    List<CoreMap> sentences = document.get(SentencesAnnotation.class);

    //A counter that keeps track of the number of phrases in a sentences
    int phraseCounter = 0;

    for (CoreMap stanfordSentence : sentences) {

        Sentence distilledSentence = new Sentence(stanfordSentence.toString(), "" + sentenceCounter++);

        distilledSentence.setLanguage(Locale.ENGLISH);

        //getting the dependency graph of the document so to count the number of phrases 
        //ROOT sentences are the first level children in the parse tree; every ROOT sentence
        //is constitute by a group of clauses which can be the principal (main clauses) or not
        //(coordinate and subordinate). We use ROOT sentences as a starting point to find out all
        //the phrases present in the sentences themselves, checking out for the tag "S".
        Tree sentenceTree = stanfordSentence.get(TreeCoreAnnotations.TreeAnnotation.class);

        for (Tree sub : sentenceTree.subTreeList()) {
            if (sub.label().value().equals("S")) {
                phraseCounter++;
            }
        }

        //annotate the sentence with a new feature counting all the phrases
        //cointained in the sentence    
        distilledSentence.addAnnotation(new FeatureAnnotation(DefaultAnnotations.PHRASES_COUNT, phraseCounter));

        // traversing the words in the current sentence
        // for each token in the text, we create a new token annotate it 
        // with the word representing it, its pos tag and its lemma
        for (CoreLabel token : stanfordSentence.get(TokensAnnotation.class)) {

            // this is the text of the token
            Token t = new Token(token.originalText());

            // this is the POS tag of the token                
            t.setPoS(token.tag());

            // this is the lemma of the ttoken
            t.setLemma(token.lemma());

            String ner = token.get(NamedEntityTagAnnotation.class);
            if (!ner.equalsIgnoreCase("O")) {
                t.addAnnotation(new NERAnnotation(DefaultAnnotations.IS_NER, ner));
            }
            //add the token to the sentence
            distilledSentence.addToken(t);
        }

        //add the sentence to document
        ((DocumentComposite) component).addComponent(distilledSentence);
    }
}

From source file:knu.univ.lingvo.coref.Mention.java

License:Open Source License

public String lowestNPIncludesHead() {
    String ret = "";
    Tree head = this.contextParseTree.getLeaves().get(this.headIndex);
    Tree lowestNP = head;
    String s;//from  w w  w.  ja  va  2s. c om
    while (true) {
        if (lowestNP == null)
            return ret;
        s = ((CoreLabel) lowestNP.label()).get(CoreAnnotations.ValueAnnotation.class);
        if (s.equals("NP") || s.equals("ROOT"))
            break;
        lowestNP = lowestNP.ancestor(1, this.contextParseTree);
    }
    if (s.equals("ROOT"))
        lowestNP = head;
    for (Tree t : lowestNP.getLeaves()) {
        if (!ret.equals(""))
            ret = ret + " ";
        ret = ret + ((CoreLabel) t.label()).get(CoreAnnotations.TextAnnotation.class);
    }
    if (!this.spanToString().contains(ret))
        return this.sentenceWords.get(this.headIndex).get(CoreAnnotations.TextAnnotation.class);
    return ret;
}

From source file:knu.univ.lingvo.coref.MentionExtractor.java

License:Open Source License

protected int getHeadIndex(Tree t) {
    // The trees passed in do not have the CoordinationTransformer
    // applied, but that just means the SemanticHeadFinder results are
    // slightly worse.
    Tree ht = t.headTerminal(headFinder);
    if (ht == null)
        return -1; // temporary: a key which is matched to nothing
    CoreLabel l = (CoreLabel) ht.label();
    return l.get(CoreAnnotations.IndexAnnotation.class);
}

From source file:knu.univ.lingvo.coref.MentionExtractor.java

License:Open Source License

/**
 * Post-processes the extracted mentions. Here we set the Mention fields required for coref and order mentions by tree-traversal order.
 * @param words List of words in each sentence, in textual order
 * @param trees List of trees, one per sentence
 * @param unorderedMentions List of unordered, unprocessed mentions
 *                 Each mention MUST have startIndex and endIndex set!
 *                 Optionally, if scoring is desired, mentions must have mentionID and originalRef set.
 *                 All the other Mention fields are set here.
 * @return List of mentions ordered according to the tree traversal
 * @throws Exception/*from w w  w  .jav  a  2s .co  m*/
 */
public List<List<Mention>> arrange(Annotation anno, List<List<CoreLabel>> words, List<Tree> trees,
        List<List<Mention>> unorderedMentions, boolean doMergeLabels) throws Exception {

    List<List<Mention>> orderedMentionsBySentence = new ArrayList<List<Mention>>();

    //
    // traverse all sentences and process each individual one
    //
    int mentionNumber = 0;
    for (int sent = 0, sz = words.size(); sent < sz; sent++) {
        List<CoreLabel> sentence = words.get(sent);
        Tree tree = trees.get(sent);
        List<Mention> mentions = unorderedMentions.get(sent);
        Map<String, List<Mention>> mentionsToTrees = Generics.newHashMap();

        // merge the parse tree of the entire sentence with the sentence words
        if (doMergeLabels)
            mergeLabels(tree, sentence);

        //
        // set the surface information and the syntactic info in each mention
        // startIndex and endIndex MUST be set before!
        //
        for (Mention mention : mentions) {
            mention.sentenceNumber = sent;
            mention.mentionNumber = mentionNumber++;
            mention.contextParseTree = tree;
            mention.sentenceWords = sentence;
            mention.originalSpan = new ArrayList<CoreLabel>(
                    mention.sentenceWords.subList(mention.startIndex, mention.endIndex));
            if (!((CoreLabel) tree.label()).has(CoreAnnotations.BeginIndexAnnotation.class))
                tree.indexSpans(0);
            if (mention.headWord == null) {
                Tree headTree = ((RuleBasedCorefMentionFinder) mentionFinder).findSyntacticHead(mention, tree,
                        sentence);
                mention.headWord = (CoreLabel) headTree.label();
                mention.headIndex = mention.headWord.get(CoreAnnotations.IndexAnnotation.class) - 1;
            }
            if (mention.mentionSubTree == null) {
                // mentionSubTree = highest NP that has the same head
                Tree headTree = tree.getLeaves().get(mention.headIndex);
                if (headTree == null) {
                    throw new RuntimeException("Missing head tree for a mention!");
                }
                Tree t = headTree;
                while ((t = t.parent(tree)) != null) {
                    if (t.headTerminal(headFinder) == headTree && t.value().equals("NP")) {
                        mention.mentionSubTree = t;
                    } else if (mention.mentionSubTree != null) {
                        break;
                    }
                }
                if (mention.mentionSubTree == null) {
                    mention.mentionSubTree = headTree;
                }
            }

            List<Mention> mentionsForTree = mentionsToTrees.get(treeToKey(mention.mentionSubTree));
            if (mentionsForTree == null) {
                mentionsForTree = new ArrayList<Mention>();
                mentionsToTrees.put(treeToKey(mention.mentionSubTree), mentionsForTree);
            }
            mentionsForTree.add(mention);

            // generates all fields required for coref, such as gender, number, etc.
            mention.process(dictionaries, semantics, this, singletonPredictor);
        }

        //
        // Order all mentions in tree-traversal order
        //
        List<Mention> orderedMentions = new ArrayList<Mention>();
        orderedMentionsBySentence.add(orderedMentions);

        // extract all mentions in tree traversal order (alternative: tree.postOrderNodeList())
        for (Tree t : tree.preOrderNodeList()) {
            List<Mention> lm = mentionsToTrees.get(treeToKey(t));
            if (lm != null) {
                for (Mention m : lm) {
                    orderedMentions.add(m);
                }
            }
        }

        //
        // find appositions, predicate nominatives, relative pronouns in this sentence
        //
        findSyntacticRelations(tree, orderedMentions);
        assert (mentions.size() == orderedMentions.size());
    }
    return orderedMentionsBySentence;
}

From source file:knu.univ.lingvo.coref.MentionExtractor.java

License:Open Source License

private void addFoundPair(Tree np1, Tree np2, Tree t, Set<Pair<Integer, Integer>> foundPairs) {
    Tree head1 = np1.headTerminal(headFinder);
    Tree head2 = np2.headTerminal(headFinder);
    int h1 = ((CoreMap) head1.label()).get(CoreAnnotations.IndexAnnotation.class) - 1;
    int h2 = ((CoreMap) head2.label()).get(CoreAnnotations.IndexAnnotation.class) - 1;
    Pair<Integer, Integer> p = new Pair<Integer, Integer>(h1, h2);
    foundPairs.add(p);//from w  w  w.  ja  va2  s .c  o  m
}

From source file:knu.univ.lingvo.coref.sievepasses.DeterministicCorefSieve.java

License:Open Source License

/** Divides a sentence into clauses and sorts the antecedents for pronoun matching. */
private static List<Mention> sortMentionsForPronoun(List<Mention> l, Mention m1, boolean sameSentence) {
    List<Mention> sorted = new ArrayList<Mention>();
    if (sameSentence) {
        Tree tree = m1.contextParseTree;
        Tree current = m1.mentionSubTree;
        while (true) {
            current = current.ancestor(1, tree);
            if (current.label().value().startsWith("S")) {
                for (Mention m : l) {
                    if (!sorted.contains(m) && current.dominates(m.mentionSubTree)) {
                        sorted.add(m);/* w  ww . ja  va2 s .  co m*/
                    }
                }
            }
            if (current.label().value().equals("ROOT") || current.ancestor(1, tree) == null)
                break;
        }
        if (SieveCoreferenceSystem.logger.isLoggable(Level.FINEST)) {
            if (l.size() != sorted.size()) {
                SieveCoreferenceSystem.logger.finest("sorting failed!!! -> parser error?? \tmentionID: "
                        + m1.mentionID + " " + m1.spanToString());
                sorted = l;
            } else if (!l.equals(sorted)) {
                SieveCoreferenceSystem.logger.finest("sorting succeeded & changed !! \tmentionID: "
                        + m1.mentionID + " " + m1.spanToString());
                for (int i = 0; i < l.size(); i++) {
                    Mention ml = l.get(i);
                    Mention msorted = sorted.get(i);
                    SieveCoreferenceSystem.logger
                            .finest("\t[" + ml.spanToString() + "]\t[" + msorted.spanToString() + "]");
                }
            } else {
                SieveCoreferenceSystem.logger
                        .finest("no changed !! \tmentionID: " + m1.mentionID + " " + m1.spanToString());
            }
        }
    }
    return sorted;
}