Example usage for edu.stanford.nlp.trees Tree label

List of usage examples for edu.stanford.nlp.trees Tree label

Introduction

In this page you can find the example usage for edu.stanford.nlp.trees Tree label.

Prototype

@Override
public Label label() 

Source Link

Document

Returns the label associated with the current node, or null if there is no label.

Usage

From source file:edu.cmu.ark.SentenceSimplifier.java

License:Open Source License

/**
 * //from w w  w.ja  va 2s .  c  om
 * John studied, hoping to get a good grade. -> John hoped to get a good grade.
 * 
 * @param extracted
 * @param input
 */
private void extractVerbParticipialModifiers(Collection<Question> extracted, Question input) {
    String tregexOpStr;
    TregexPattern matchPattern;
    TregexMatcher matcher;

    tregexOpStr = "S=sub $- /,/ !< NP < (VP=participial < VBG=verb) " + " >+(VP) (S|SINV < NP=subj) "
            + " >> (ROOT <<# /VB.*/=tense) "; //tense determined by top-most verb

    matchPattern = TregexPatternFactory.getPattern(tregexOpStr);
    matcher = matchPattern.matcher(input.getIntermediateTree());
    while (matcher.find()) {
        String verbPOS = findTense(matcher.getNode("tense"));
        Tree p = matcher.getNode("participial").deeperCopy();
        Tree verb = matcher.getNode("verb");
        String verbLemma = AnalysisUtilities.getInstance().getLemma(verb.getChild(0).label().toString(),
                verb.label().toString());
        String newVerb = AnalysisUtilities.getInstance().getSurfaceForm(verbLemma, verbPOS);
        int verbIndex = p.indexOf(verb);
        p.removeChild(verbIndex);
        p.addChild(verbIndex,
                AnalysisUtilities.getInstance().readTreeFromString("(" + verbPOS + " " + newVerb + ")"));
        String treeStr = "(ROOT (S " + matcher.getNode("subj").toString() + " " + p.toString() + " (. .)))";
        Tree newTree = AnalysisUtilities.getInstance().readTreeFromString(treeStr);
        correctTense(newTree.getChild(0).getChild(0), newTree.getChild(0));

        addQuotationMarksIfNeeded(newTree);
        Question newTreeWithFeatures = input.deeperCopy();
        newTreeWithFeatures.setIntermediateTree(newTree);
        if (GlobalProperties.getComputeFeatures())
            newTreeWithFeatures.setFeatureValue("extractedFromParticipial", 1.0);
        if (GlobalProperties.getComputeFeatures())
            newTreeWithFeatures.setFeatureValue("extractedFromVerbParticipial", 1.0);
        if (GlobalProperties.getDebug())
            System.err.println("extractVerbParticipialModifiers: " + newTree.toString());
        addIfNovel(extracted, newTreeWithFeatures);
    }
}

From source file:edu.cmu.ark.SentenceSimplifier.java

License:Open Source License

private void extractComplementClauses(Collection<Question> extracted, Question input) {
    Tree subord;// w  ww  .ja v a2 s.c  o  m
    String tregexOpStr;
    TregexPattern matchPattern;
    TregexMatcher matcher;

    //TODO should also address infinitive complements
    tregexOpStr = "SBAR " + " < (S=sub !< (VP < VBG)) " + //+ //not a participial phrase
            " !> NP|PP " + //not part of a noun phrase or PP (other methods for those)
            " [ $- /^VB.*/=verb | >+(SBAR) (SBAR $- /^VB.*/=verb) ] "; //complement of a VP (follows the verb)

    matchPattern = TregexPatternFactory.getPattern(tregexOpStr);
    matcher = matchPattern.matcher(input.getIntermediateTree());
    while (matcher.find()) {
        Tree newTree = factory.newTreeNode("ROOT", new ArrayList<Tree>());
        subord = matcher.getNode("sub");
        Tree verb = matcher.getNode("verb");
        String verbLemma = AnalysisUtilities.getInstance().getLemma(verb.yield().toString(),
                verb.label().toString());

        if (!verbImpliesComplement(verbLemma)) {
            continue;
        }
        newTree.addChild(subord.deeperCopy());

        AnalysisUtilities.addPeriodIfNeeded(newTree);
        addQuotationMarksIfNeeded(newTree);
        Question newTreeWithFeatures = input.deeperCopy();
        newTreeWithFeatures.setIntermediateTree(newTree);
        if (GlobalProperties.getComputeFeatures())
            newTreeWithFeatures.setFeatureValue("extractedFromFiniteClause", 1.0); //old feature name
        if (GlobalProperties.getComputeFeatures())
            newTreeWithFeatures.setFeatureValue("extractedFromComplementClause", 1.0);
        if (GlobalProperties.getDebug())
            System.err.println("extractComplementClauses: " + newTree.toString());
        addIfNovel(extracted, newTreeWithFeatures);
    }
}

From source file:edu.cmu.ark.SentenceSimplifier.java

License:Open Source License

/**
 * e.g., Lincoln, the 16th president, was tall. -> Lincoln was the 16th president.
 * The meeting, in 1984, was important. -> The meeting was in 1984.
 *//*w  w w.j  a va  2s.  com*/
private void extractAppositives(Collection<Question> extracted, Question input) {
    String tregexOpStr;
    TregexPattern matchPattern;
    TregexMatcher matcher;

    tregexOpStr = "NP < (NP=noun !$-- NP $+ (/,/ $++ NP|PP=appositive !$ CC|CONJP)) "
            + " >> (ROOT <<# /^VB.*/=mainverb) "; //extract the main verb to capture the verb tense
    matchPattern = TregexPatternFactory.getPattern(tregexOpStr);
    matcher = matchPattern.matcher(input.getIntermediateTree());
    while (matcher.find()) {
        Tree verbtree = matcher.getNode("mainverb");
        Tree nountree = matcher.getNode("noun").deeperCopy();
        Tree appositivetree = matcher.getNode("appositive");

        makeDeterminerDefinite(nountree);

        //if both are proper nouns, do not extract because this is not an appositive(e.g., "Pittsburgh, PA")
        /*if(nountree.headPreTerminal(AnalysisUtilities.getInstance().getHeadFinder()).label().toString().equals("NNP")
              && appositivetree.headPreTerminal(AnalysisUtilities.getInstance().getHeadFinder()).label().toString().equals("NNP"))
        {
           continue;
        }*/

        //make a new tree for a copula sentence with the noun and appositive
        String pos = verbtree.label().toString();
        String copula;
        if (pos.equals("VBD")) {
            if (isPlural(nountree)) {
                copula = "(VBD were)";
            } else {
                copula = "(VBD was)";
            }
        } else {
            if (isPlural(nountree)) {
                copula = "(VBD are)";
            } else {
                copula = "(VBD is)";
            }
        }
        Tree newTree = AnalysisUtilities.getInstance().readTreeFromString(
                "(ROOT (S " + nountree + " (VP " + copula + " " + appositivetree + ") (. .)))");

        addQuotationMarksIfNeeded(newTree);
        if (GlobalProperties.getDebug())
            System.err.println("extractAppositives: " + newTree.toString());
        Question newTreeWithFeatures = input.deeperCopy();
        newTreeWithFeatures.setIntermediateTree(newTree);
        if (GlobalProperties.getComputeFeatures())
            newTreeWithFeatures.setFeatureValue("extractedFromAppositive", 1.0);

        addIfNovel(extracted, newTreeWithFeatures);

    }
}

From source file:edu.cmu.ark.SentenceSimplifier.java

License:Open Source License

/**
 * e.g., John, hoping to get a good grade, studied. -> John hoped to get a good grade.
 *   Walking to the store, John saw Susan -> John was walking to the store.
 *   /*  w  w w  .jav  a 2s  . c  om*/
 *   NOTE: This method produces false positives for sentences like, 
 *            "Broadly speaking, the project was successful."
 *         where the participial phrase does not modify the subject.
 *   
 * @param extracted
 * @param input
 */
private void extractNounParticipialModifiers(Collection<Question> extracted, Question input) {
    String tregexOpStr;
    TregexPattern matchPattern;
    TregexMatcher matcher;

    tregexOpStr = "ROOT < (S " + " [ << (NP < (NP=subj  $++ (/,/ $+ (VP=modifier <# VBN|VBG|VP=tense )))) " //modifiers that appear after nouns
            + " | < (S !< NP|SBAR < (VP=modifier <# VBN|VBG|VP=tense) $+ (/,/ $+ NP=subj)) " //modifiers before the subject. e.g., Founded by John, the company...
            + " | < (SBAR < (S !< NP|SBAR < (VP=modifier <# VBN|VBG=tense)) $+ (/,/ $+ NP=subj)) " //e.g., While walking to the store, John saw Susan.
            + " | < (PP=modifier !< NP <# VBG=tense $+ (/,/ $+ NP=subj)) ] ) " // e.g., Walking to the store, John saw Susan.
            + " <<# /^VB.*$/=maintense "; //tense determined by top-most verb

    matchPattern = TregexPatternFactory.getPattern(tregexOpStr);
    matcher = matchPattern.matcher(input.getIntermediateTree());
    while (matcher.find()) {
        Tree nountree = matcher.getNode("subj").deeperCopy();
        Tree vptree = matcher.getNode("modifier");
        Tree verb = matcher.getNode("tense");
        makeDeterminerDefinite(nountree);

        if (vptree.label().toString().equals("PP"))
            vptree.label().setValue("VP");
        String verbPOS = findTense(matcher.getNode("maintense"));
        if (vptree == null || nountree == null)
            return;

        String newTreeStr;
        if (verb.label().toString().equals("VBG")) {
            //for present partcipials, change the tense to the tense of the main verb
            //e.g., walking to the store -> walked to the store
            String verbLemma = AnalysisUtilities.getInstance().getLemma(verb.getChild(0).label().toString(),
                    verb.label().toString());
            String newVerb = AnalysisUtilities.getInstance().getSurfaceForm(verbLemma, verbPOS);
            int verbIndex = vptree.indexOf(verb);
            vptree = vptree.deeperCopy();
            vptree.removeChild(verbIndex);
            vptree.addChild(verbIndex,
                    AnalysisUtilities.getInstance().readTreeFromString("(" + verbPOS + " " + newVerb + ")"));
            newTreeStr = "(ROOT (S " + matcher.getNode("subj").toString() + " " + vptree.toString()
                    + " (. .)))";
        } else {
            //for past participials, add a copula
            //e.g., John, exhausted, -> John was exhausted
            //(or for conjunctions, just add the copula---kind of a hack to make the moby dick sentence work out)
            String auxiliary;
            if (verbPOS.equals("VBP") || verbPOS.equals("VBD")) {
                if (isPlural(nountree))
                    auxiliary = "(VBD were)";
                else
                    auxiliary = "(VBD was)";
            } else {
                if (isPlural(nountree))
                    auxiliary = "(VB are)";
                else
                    auxiliary = "(VBZ is)";
            }

            newTreeStr = "(ROOT (S " + nountree + " (VP " + auxiliary + " " + vptree + ") (. .)))";
        }

        Tree newTree = AnalysisUtilities.getInstance().readTreeFromString(newTreeStr);
        correctTense(newTree.getChild(0).getChild(0), newTree.getChild(0));
        addQuotationMarksIfNeeded(newTree);

        if (GlobalProperties.getDebug())
            System.err.println("extractNounParticipialModifiers: " + newTree.toString());
        Question newTreeWithFeatures = input.deeperCopy();
        newTreeWithFeatures.setIntermediateTree(newTree);
        if (GlobalProperties.getComputeFeatures())
            newTreeWithFeatures.setFeatureValue("extractedFromParticipial", 1.0); //old feature name
        if (GlobalProperties.getComputeFeatures())
            newTreeWithFeatures.setFeatureValue("extractedFromNounParticipial", 1.0);
        extracted.add(newTreeWithFeatures);
    }

}

From source file:edu.cmu.cs.in.hoop.visualizers.HoopParseTreeViewer.java

License:Open Source License

/**
 * /*from ww  w.java 2 s.com*/
 */
private void processInput(String aSentence) {
    debug("processInput (" + aSentence + ")");

    Tree thisTree = theLexicalizedParser.apply(aSentence);

    debug("We have a tree!");

    PrintWriter pw = new PrintWriter(System.out, true);

    TreePrint posPrinter = new TreePrint("wordsAndTags");
    posPrinter.printTree(thisTree, pw);

    ArrayList ar = thisTree.taggedYield();
    debug(ar.toString());

    for (Tree subtree : thisTree) {
        if (thisTree.isLeaf() == true) {
            debug("Tree leaf: " + subtree.label().value());
        } else {
            debug("Tree node: " + subtree.label().value());
        }
    }

    treePanel.setTree(thisTree);
}

From source file:edu.cmu.deiis.annotator.StanfordCoreNLPAnnotator.java

License:Open Source License

private void addTreebankNodeToIndexes(TreebankNode node, JCas jCas, Tree tree, List<CoreLabel> tokenAnns) {
    // figure out begin and end character offsets
    CoreMap label = (CoreMap) tree.label();
    CoreMap beginToken = tokenAnns.get(label.get(BeginIndexAnnotation.class));
    CoreMap endToken = tokenAnns.get(label.get(EndIndexAnnotation.class) - 1);
    int nodeBegin = beginToken.get(CharacterOffsetBeginAnnotation.class);
    int nodeEnd = endToken.get(CharacterOffsetEndAnnotation.class);

    // set span, node type, children (mutual recursion), and add it to the JCas
    node.setBegin(nodeBegin);/*from  ww  w . ja v a2  s .  c om*/
    node.setEnd(nodeEnd);
    node.setNodeType(tree.value());
    node.setChildren(this.addTreebankNodeChildrenToIndexes(node, jCas, tokenAnns, tree));
    node.setLeaf(node.getChildren().size() == 0);
    node.addToIndexes();
}

From source file:edu.cornell.law.entitylinking.utils.Utility.java

public static List<String> getInnerNounPhrases(String paragraph) {
    List<String> nounPhrases = new ArrayList<String>();
    try {/*w w  w .ja va  2s . c  o m*/
        StringTokenizer tokenizer = new StringTokenizer(paragraph, "\\.;?,:");
        while (tokenizer.hasMoreTokens()) {
            Annotation document = new Annotation(tokenizer.nextToken());
            pipeline.annotate(document);
            Tree tree = null;
            // these are all the sentences in this document
            // a CoreMap is essentially a Map that uses class objects as keys and has values with custom types
            List<CoreMap> sentences = document.get(SentencesAnnotation.class);

            for (CoreMap sentence : sentences) {
                // the parse tree of the current sentence
                tree = sentence.get(TreeAnnotation.class);

                List<Tree> phraseList = new ArrayList<Tree>();
                for (Tree subtree : tree) {
                    if ((subtree.label().value().equals("NP")) || (subtree.label().value().equals("WHNP"))) {
                        phraseList.add(subtree);
                    }
                }

                if (!phraseList.isEmpty()) {
                    String skipPhrase = "false";
                    for (Tree subList : phraseList) {
                        StringBuilder phraseString = new StringBuilder();
                        String phrase = subList.toString();
                        String[] tokens = phrase.split(" ");
                        for (String token : tokens) {
                            if (token.contains("(")) {
                                if (token.contains("(NP")) {
                                    // Check if there are more NP or WHNP in it?
                                    String subPhrase = phrase.replaceFirst("\\(NP", "");
                                    if ((subPhrase.contains("(NP")) || (subPhrase.contains("(WHNP"))) {
                                        skipPhrase = "true";
                                        break;
                                    }
                                } else if (token.contains("(WHNP")) {
                                    // Check if there are more NP or WHNP in it?
                                    String subPhrase = phrase.replaceFirst("\\(WHNP", "");
                                    if ((subPhrase.contains("(NP")) || (subPhrase.contains("(WHNP"))) {
                                        skipPhrase = "true";
                                        break;
                                    }
                                } else {
                                    // do nothing, just drop the keyword.
                                }
                            } else {
                                token = token.replace(")", "");
                                phraseString.append(token + " ");
                                skipPhrase = "false";
                            }

                        }
                        if (!skipPhrase.equals("true")) {
                            String temp = phraseString.toString().trim();
                            if (temp.startsWith("(?i)the"))
                                temp = temp.replaceFirst("(?i)the ", "");

                            else if (temp.startsWith("(?i)a"))
                                temp = temp.replaceFirst("(?i)a ", "");

                            else if (temp.startsWith("(?i)an"))
                                temp = temp.replaceFirst("(?i)an ", "");

                            if (temp.contains(" or ")) {
                                String[] nptokens = temp.split(" or ");
                                for (String s : nptokens) {
                                    nounPhrases.add(s);
                                }
                            } else {
                                nounPhrases.add(temp);
                            }
                        }
                    }
                }
            }
        }
    } catch (OutOfMemoryError e) {
        System.out.println("Result too long to read into memory");
    }
    return nounPhrases;
}

From source file:edu.cornell.law.entitylinking.utils.Utility.java

public static List<String> getAllNounPhrases(String paragraph) {
    List<String> nounPhrases = new ArrayList<String>();
    try {//from   w w  w  . j av a  2 s. c  o  m
        StringTokenizer tokenizer = new StringTokenizer(paragraph, "\\.;?:,");
        while (tokenizer.hasMoreTokens()) {
            Annotation document = new Annotation(tokenizer.nextToken());
            pipeline.annotate(document);
            Tree tree = null;
            List<CoreMap> sentences = document.get(SentencesAnnotation.class);

            for (CoreMap sentence : sentences) {
                // this is the parse tree of the current sentence
                tree = sentence.get(TreeAnnotation.class);

                for (Tree subtree : tree) {
                    if ((subtree.label().value().equals("NP")) || (subtree.label().value().equals("WHNP"))) {
                        String phraseString = Sentence.listToString(subtree.yieldWords())
                                .replace(" -LRB- ", "(").replace(" -RRB- ", ")");

                        String temp = phraseString.trim();
                        if (temp.startsWith("(?i)the"))
                            temp = temp.replaceFirst("(?i)the ", "");

                        else if (temp.startsWith("(?i)a"))
                            temp = temp.replaceFirst("(?i)a ", "");

                        else if (temp.startsWith("(?i)an"))
                            temp = temp.replaceFirst("(?i)an ", "");

                        if (subtree.getChildrenAsList().contains(tree.label().value().equals("NN"))) {
                            //System.out.println("PHRASE");
                        }

                        if (temp.contains(" or ")) {
                            String[] nptokens = temp.split(" or ");
                            for (String s : nptokens) {
                                nounPhrases.add(s);
                            }
                        } else {
                            nounPhrases.add(temp);
                        }
                    }
                }
            }
        }
    } catch (OutOfMemoryError e) {
        System.out.println("Result too long to read into memory");
    }

    return nounPhrases;
}

From source file:edu.nus.comp.nlp.stanford.UtilParser.java

License:Open Source License

private static Tree putOnBranch(TypedDependency dep, Tree tree) {
    /*//w w w . ja  v a2 s  .co  m
     * Each node is a tree with a single child
     */
    Tree mySubtree = lstf.newTreeNode(dep.gov().label(), new LinkedList<Tree>(dep.dep()));
    mySubtree.setValue("[<-" + dep.reln() + "-] " + dep.dep().value());//nudge in the dependency relation information

    if (tree.children().length == 0) {
        if (tree.label().value().toString().equals("DUMMYROOT")) {
            tree.addChild(mySubtree);
            return tree;
        } else {
            //Shouldn't happen
            System.err.println("Forgot to add a child earlier.");
            return null;
        }
    } else {
        //         System.err.println(dep.dep().label() +"\t[on]\t" + tree.label());
        for (Tree child : tree.children()) {
            //if dep is child's parent, insert dep between child and its parent
            if (((CoreLabel) child.label()).index() == dep.dep().label().index()) {
                tree.removeChild(tree.objectIndexOf(child));
                mySubtree.addChild(child);
            }
        }
        if (mySubtree.children().length > 1) {
            tree.addChild(mySubtree);
            return tree;
        }

        for (Tree child : tree.children()) {
            //if dep is Child's sibling, or child
            if (((CoreLabel) child.label()).index() == dep.gov().label().index()) {
                tree.addChild(mySubtree);
                return tree;
            }

            if (child.children().length > 0) {
                if (putOnBranch(dep, child) != null) {
                    return tree;
                }
            }
        }
    }
    //          tree.getLeaves() == null
    //check its childrens, recurisively.
    return null;
}

From source file:edu.rpi.tw.linkipedia.search.nlp.NaturalLanguageProcessor.java

License:Open Source License

private List<String> getNounPhraseFromParseTree(Tree parse) {

    List<String> phraseList = new ArrayList<String>();
    for (Tree subtree : parse) {
        if (subtree.label().value().equals("NP")) {
            String subtreeString = subtree.toString();
            if (subtreeString.lastIndexOf("(NP") != subtreeString.indexOf("(NP"))
                continue;
            //System.out.println(subtree);
            List<LabeledWord> words = subtree.labeledYield();
            String currentPhrase = "";
            for (LabeledWord word : words) {

                currentPhrase += word.word() + "|" + word.tag() + " ";
            }//  w w w .ja v a2s .c o m
            currentPhrase = currentPhrase.trim();
            phraseList.add(currentPhrase);
        }
    }

    return phraseList;

}