Example usage for edu.stanford.nlp.trees Tree subTreeList

Introduction

In this page you can find the example usage for edu.stanford.nlp.trees Tree subTreeList.

Prototype

public List<Tree> subTreeList()

Source Link

Document

Get the list of all subtrees inside the tree by returning a tree rooted at each node.

Usage

From source file:edu.albany.cubism.util.StanfordChineseParser.java

public void printTree(Tree t) {
    tp.printTree(t);/*from w ww .j a  v  a  2 s.co m*/
    tp.printTree(t.headTerminal(new CollinsHeadFinder()));//SemanticHeadFinder()));
    //System.out.println("tree label: " + t.label());
    List trees = t.subTreeList();

    for (int i = 0; i < trees.size(); i++) {
        Tree sbt = (Tree) trees.get(i);
        /*
         * if (!sbt.isLeaf()) { trees.addAll(sbt.subTreeList()); }
         */
        //System.out.println("sbt lable: " + sbt.label());
    }
    //System.out.println("done");
    List<Tree> leaves = t.getLeaves();
    for (int i = 0; i < leaves.size(); i++) {
        Tree leaf = leaves.get(i);
        //if (leaf.parent() != null) {
        System.out.println(leaf.pennString() + " " + leaf.value());
        //}
    }
    /*
     * Set dependencies = t.dependencies(); Iterator it =
     * dependencies.iterator(); while (it.hasNext()) { Dependency dependency
     * = (Dependency)it.next(); System.out.println(dependency.toString());
     * System.out.println(dependency.name()); }
     */
}

From source file:it.uniud.ailab.dcore.wrappers.external.StanfordBootstrapperAnnotator.java

License:Open Source License

/**
 * Annotate the document by splitting the document, tokenizing it,
 * performing PoS tagging and Named Entity Recognition using the Stanford
 * Core NLP tools.//  w  w  w. j  ava  2  s .  co  m
 *
 * @param component the component to annotate.
 */
@Override
public void annotate(Blackboard blackboard, DocumentComponent component) {

    if (pipeline == null) {
        // creates a StanfordCoreNLP object, with POS tagging, lemmatization, 
        //NER, parsing, and coreference resolution 
        Properties props = new Properties();
        props.put("annotators", "tokenize, ssplit, pos, parse, lemma, ner, dcoref");
        pipeline = new StanfordCoreNLP(props);

    }

    // read some text in the text variable
    String text = component.getText();

    // create an empty Annotation just with the given text
    Annotation document = new Annotation(text);

    // run all Annotators on this text
    pipeline.annotate(document);

    //get the graph for coreference resolution
    Map<Integer, CorefChain> graph = document.get(CorefCoreAnnotations.CorefChainAnnotation.class);

    //prepare the map for coreference graph of document
    Map<String, Collection<Set<CorefChain.CorefMention>>> coreferenceGraph = new HashMap<>();

    for (CorefChain corefChain : graph.values()) {

        //get the representative mention, that is the word recall in other sentences
        CorefChain.CorefMention cm = corefChain.getRepresentativeMention();

        //eliminate auto-references
        if (corefChain.getMentionMap().size() <= 1) {
            continue;
        }

        //get the stemmed form of the references, so the comparison with 
        //grams will be easier
        List<CoreLabel> tks = document.get(SentencesAnnotation.class).get(cm.sentNum - 1)
                .get(TokensAnnotation.class);
        //list of tokens which compose the anaphor

        List<Token> anaphorsTokens = new ArrayList<>();
        for (int i = cm.startIndex - 1; i < cm.endIndex - 1; i++) {
            CoreLabel current = tks.get(i);
            Token t = new Token(current.word());
            t.setPoS(current.tag());
            t.setLemma(current.lemma());
            anaphorsTokens.add(t);
        }

        //the mention n-gram which is formed by the anaphor and a 
        //list of references
        Mention mention = new Mention(cm.mentionSpan, anaphorsTokens, cm.mentionSpan);

        //get map of the references to the corefchain obj
        Collection<Set<CorefChain.CorefMention>> mentionMap = corefChain.getMentionMap().values();
        for (Set<CorefChain.CorefMention> mentions : mentionMap) {

            for (CorefChain.CorefMention reference : mentions) {
                //eliminate self-references
                if (reference.mentionSpan.equalsIgnoreCase(cm.mentionSpan)) {
                    continue;
                }
                List<CoreLabel> tokens = document.get(SentencesAnnotation.class).get(reference.sentNum - 1)
                        .get(TokensAnnotation.class);

                //list of tokens which compose the mention
                List<Token> mentionTokens = new ArrayList<>();
                for (int i = reference.startIndex - 1; i < reference.endIndex - 1; i++) {
                    CoreLabel current = tokens.get(i);
                    //set token features 
                    Token t = new Token(current.word());
                    t.setPoS(current.tag());
                    t.setLemma(current.lemma());
                    mentionTokens.add(t);
                }
                //add to mention a new reference
                mention.addReference(reference.mentionSpan, mentionTokens, reference.mentionType.toString());
            }
        }

        //assign to the document a new corenference obj
        //containing the anaphor and its mentions 
        blackboard.addGram(mention);
    }

    // these are all the sentences in this document
    // a CoreMap is essentially a Map that uses class objects as keys and 
    //has values with custom types
    List<CoreMap> sentences = document.get(SentencesAnnotation.class);

    //A counter that keeps track of the number of phrases in a sentences
    int phraseCounter = 0;

    for (CoreMap stanfordSentence : sentences) {

        Sentence distilledSentence = new Sentence(stanfordSentence.toString(), "" + sentenceCounter++);

        distilledSentence.setLanguage(Locale.ENGLISH);

        //getting the dependency graph of the document so to count the number of phrases 
        //ROOT sentences are the first level children in the parse tree; every ROOT sentence
        //is constitute by a group of clauses which can be the principal (main clauses) or not
        //(coordinate and subordinate). We use ROOT sentences as a starting point to find out all
        //the phrases present in the sentences themselves, checking out for the tag "S".
        Tree sentenceTree = stanfordSentence.get(TreeCoreAnnotations.TreeAnnotation.class);

        for (Tree sub : sentenceTree.subTreeList()) {
            if (sub.label().value().equals("S")) {
                phraseCounter++;
            }
        }

        //annotate the sentence with a new feature counting all the phrases
        //cointained in the sentence    
        distilledSentence.addAnnotation(new FeatureAnnotation(DefaultAnnotations.PHRASES_COUNT, phraseCounter));

        // traversing the words in the current sentence
        // for each token in the text, we create a new token annotate it 
        // with the word representing it, its pos tag and its lemma
        for (CoreLabel token : stanfordSentence.get(TokensAnnotation.class)) {

            // this is the text of the token
            Token t = new Token(token.originalText());

            // this is the POS tag of the token                
            t.setPoS(token.tag());

            // this is the lemma of the ttoken
            t.setLemma(token.lemma());

            String ner = token.get(NamedEntityTagAnnotation.class);
            if (!ner.equalsIgnoreCase("O")) {
                t.addAnnotation(new NERAnnotation(DefaultAnnotations.IS_NER, ner));
            }
            //add the token to the sentence
            distilledSentence.addToken(t);
        }

        //add the sentence to document
        ((DocumentComposite) component).addComponent(distilledSentence);
    }
}