Example usage for edu.stanford.nlp.trees Tree subTreeList

List of usage examples for edu.stanford.nlp.trees Tree subTreeList

Introduction

In this page you can find the example usage for edu.stanford.nlp.trees Tree subTreeList.

Prototype

public List<Tree> subTreeList() 

Source Link

Document

Get the list of all subtrees inside the tree by returning a tree rooted at each node.

Usage

From source file:edu.albany.cubism.util.StanfordChineseParser.java

public void printTree(Tree t) {
    tp.printTree(t);/*from w ww .j a  v  a  2 s.co m*/
    tp.printTree(t.headTerminal(new CollinsHeadFinder()));//SemanticHeadFinder()));
    //System.out.println("tree label: " + t.label());
    List trees = t.subTreeList();

    for (int i = 0; i < trees.size(); i++) {
        Tree sbt = (Tree) trees.get(i);
        /*
         * if (!sbt.isLeaf()) { trees.addAll(sbt.subTreeList()); }
         */
        //System.out.println("sbt lable: " + sbt.label());
    }
    //System.out.println("done");
    List<Tree> leaves = t.getLeaves();
    for (int i = 0; i < leaves.size(); i++) {
        Tree leaf = leaves.get(i);
        //if (leaf.parent() != null) {
        System.out.println(leaf.pennString() + " " + leaf.value());
        //}
    }
    /*
     * Set dependencies = t.dependencies(); Iterator it =
     * dependencies.iterator(); while (it.hasNext()) { Dependency dependency
     * = (Dependency)it.next(); System.out.println(dependency.toString());
     * System.out.println(dependency.name()); }
     */
}

From source file:it.uniud.ailab.dcore.wrappers.external.StanfordBootstrapperAnnotator.java

License:Open Source License

/**
 * Annotate the document by splitting the document, tokenizing it,
 * performing PoS tagging and Named Entity Recognition using the Stanford
 * Core NLP tools.//  w  w  w. j  ava  2  s .  co  m
 *
 * @param component the component to annotate.
 */
@Override
public void annotate(Blackboard blackboard, DocumentComponent component) {

    if (pipeline == null) {
        // creates a StanfordCoreNLP object, with POS tagging, lemmatization, 
        //NER, parsing, and coreference resolution 
        Properties props = new Properties();
        props.put("annotators", "tokenize, ssplit, pos, parse, lemma, ner, dcoref");
        pipeline = new StanfordCoreNLP(props);

    }

    // read some text in the text variable
    String text = component.getText();

    // create an empty Annotation just with the given text
    Annotation document = new Annotation(text);

    // run all Annotators on this text
    pipeline.annotate(document);

    //get the graph for coreference resolution
    Map<Integer, CorefChain> graph = document.get(CorefCoreAnnotations.CorefChainAnnotation.class);

    //prepare the map for coreference graph of document
    Map<String, Collection<Set<CorefChain.CorefMention>>> coreferenceGraph = new HashMap<>();

    for (CorefChain corefChain : graph.values()) {

        //get the representative mention, that is the word recall in other sentences
        CorefChain.CorefMention cm = corefChain.getRepresentativeMention();

        //eliminate auto-references
        if (corefChain.getMentionMap().size() <= 1) {
            continue;
        }

        //get the stemmed form of the references, so the comparison with 
        //grams will be easier
        List<CoreLabel> tks = document.get(SentencesAnnotation.class).get(cm.sentNum - 1)
                .get(TokensAnnotation.class);
        //list of tokens which compose the anaphor

        List<Token> anaphorsTokens = new ArrayList<>();
        for (int i = cm.startIndex - 1; i < cm.endIndex - 1; i++) {
            CoreLabel current = tks.get(i);
            Token t = new Token(current.word());
            t.setPoS(current.tag());
            t.setLemma(current.lemma());
            anaphorsTokens.add(t);
        }

        //the mention n-gram which is formed by the anaphor and a 
        //list of references
        Mention mention = new Mention(cm.mentionSpan, anaphorsTokens, cm.mentionSpan);

        //get map of the references to the corefchain obj
        Collection<Set<CorefChain.CorefMention>> mentionMap = corefChain.getMentionMap().values();
        for (Set<CorefChain.CorefMention> mentions : mentionMap) {

            for (CorefChain.CorefMention reference : mentions) {
                //eliminate self-references
                if (reference.mentionSpan.equalsIgnoreCase(cm.mentionSpan)) {
                    continue;
                }
                List<CoreLabel> tokens = document.get(SentencesAnnotation.class).get(reference.sentNum - 1)
                        .get(TokensAnnotation.class);

                //list of tokens which compose the mention
                List<Token> mentionTokens = new ArrayList<>();
                for (int i = reference.startIndex - 1; i < reference.endIndex - 1; i++) {
                    CoreLabel current = tokens.get(i);
                    //set token features 
                    Token t = new Token(current.word());
                    t.setPoS(current.tag());
                    t.setLemma(current.lemma());
                    mentionTokens.add(t);
                }
                //add to mention a new reference
                mention.addReference(reference.mentionSpan, mentionTokens, reference.mentionType.toString());
            }
        }

        //assign to the document a new corenference obj
        //containing the anaphor and its mentions 
        blackboard.addGram(mention);
    }

    // these are all the sentences in this document
    // a CoreMap is essentially a Map that uses class objects as keys and 
    //has values with custom types
    List<CoreMap> sentences = document.get(SentencesAnnotation.class);

    //A counter that keeps track of the number of phrases in a sentences
    int phraseCounter = 0;

    for (CoreMap stanfordSentence : sentences) {

        Sentence distilledSentence = new Sentence(stanfordSentence.toString(), "" + sentenceCounter++);

        distilledSentence.setLanguage(Locale.ENGLISH);

        //getting the dependency graph of the document so to count the number of phrases 
        //ROOT sentences are the first level children in the parse tree; every ROOT sentence
        //is constitute by a group of clauses which can be the principal (main clauses) or not
        //(coordinate and subordinate). We use ROOT sentences as a starting point to find out all
        //the phrases present in the sentences themselves, checking out for the tag "S".
        Tree sentenceTree = stanfordSentence.get(TreeCoreAnnotations.TreeAnnotation.class);

        for (Tree sub : sentenceTree.subTreeList()) {
            if (sub.label().value().equals("S")) {
                phraseCounter++;
            }
        }

        //annotate the sentence with a new feature counting all the phrases
        //cointained in the sentence    
        distilledSentence.addAnnotation(new FeatureAnnotation(DefaultAnnotations.PHRASES_COUNT, phraseCounter));

        // traversing the words in the current sentence
        // for each token in the text, we create a new token annotate it 
        // with the word representing it, its pos tag and its lemma
        for (CoreLabel token : stanfordSentence.get(TokensAnnotation.class)) {

            // this is the text of the token
            Token t = new Token(token.originalText());

            // this is the POS tag of the token                
            t.setPoS(token.tag());

            // this is the lemma of the ttoken
            t.setLemma(token.lemma());

            String ner = token.get(NamedEntityTagAnnotation.class);
            if (!ner.equalsIgnoreCase("O")) {
                t.addAnnotation(new NERAnnotation(DefaultAnnotations.IS_NER, ner));
            }
            //add the token to the sentence
            distilledSentence.addToken(t);
        }

        //add the sentence to document
        ((DocumentComposite) component).addComponent(distilledSentence);
    }
}