Example usage for edu.stanford.nlp.trees Tree yield

Introduction

In this page you can find the example usage for edu.stanford.nlp.trees Tree yield.

Prototype

public ArrayList<Label> yield()

Source Link

Document

Gets the yield of the tree.

Usage

From source file:com.github.kutschkem.Qgen.annotators.SimpleSentenceDecompositionAnnotator.java

License:Open Source License

private List<String> decompose(String documentText) {
    List<Tree> trees = new ArrayList<Tree>();
    for (String sentence : AnalysisUtilities.getSentences(documentText)) {
        trees.add(AnalysisUtilities.getInstance().parseSentence(sentence).parse);
    }//from w w  w  . ja  v  a2s  . c om

    List<String> result = new ArrayList<String>();

    for (Tree t : trees) {
        TregexPattern p = TregexPattern.compile("ROOT << (NP=np $++ VP=vp) ");
        TregexMatcher m = p.matcher(t);
        while (m.find()) {
            Tree np = m.getNode("np");
            Tree vp = m.getNode("vp");

            Tree np2 = np.deepCopy();
            TregexPattern p2 = TregexPattern.compile("NP << (/^S.*/=sbarq ?. /,/=c1 ?, /,/=c2)");
            List<TsurgeonPattern> ps = new ArrayList<TsurgeonPattern>();
            ps.add(Tsurgeon.parseOperation("prune sbarq"));
            ps.add(Tsurgeon.parseOperation("prune c1"));
            ps.add(Tsurgeon.parseOperation("prune c2"));

            Tsurgeon.processPattern(p2, Tsurgeon.collectOperations(ps), np2);
            np = np2;

            Tree newTree = Tree.valueOf("(S " + np + vp + "(. .))");
            result.add(AnalysisUtilities.orginialSentence(newTree.yield()));
        }

    }

    return result;
}

From source file:com.github.kutschkem.Qgen.annotators.SmithHeilmannTagger.java

License:Open Source License

@Override
public void process(JCas aJCas) throws AnalysisEngineProcessException {
    List<Sentence> sentences = new ArrayList<Sentence>(JCasUtil.select(aJCas, Sentence.class));
    List<String> strings = new ArrayList<String>();
    for (Sentence s : sentences) {
        strings.add(s.getCoveredText());
    }/*from   w  w w . j a  v a 2 s .  com*/
    List<edu.cmu.ark.Question> questions = asker.ask(strings);

    for (edu.cmu.ark.Question q : questions) {
        Sentence s = sentences.get(q.getSourceSentenceNumber());
        Question question = new Question(aJCas);
        question.setBegin(s.getBegin());
        question.setEnd(s.getEnd());
        Tree answerPhraseTree = q.getAnswerPhraseTree();
        if (answerPhraseTree != null) {
            question.setAnswer(AnalysisUtilities.orginialSentence(answerPhraseTree.yield()));
            NERelabeler.relabelWH(JCasUtil.selectCovered(NamedEntity.class, s), question.getAnswer(),
                    q.getTree());
        } else {
            question.setAnswer("Yes");
        }
        String text = AnalysisUtilities.orginialSentence(q.getTree().yield());
        question.setText(text);
        question.addToIndexes();
    }

}

From source file:com.project.NLP.Requirement.ClassRelationIdentifier.java

public HashSet identifyAssociation(Tree tree, Set documentClass) {
    HashSet classRelations = new HashSet();
    String phraseNotation = "S<(NP.(VP<NP))";
    String verbPhraseNotation = "VBZ|VBP>(VP,(NP>S))";
    /* Stemming the sentence */
    wordStemmer.visitTree(tree);//  ww w  . ja  v a  2s . c  om
    TregexPattern pattern = TregexPattern.compile(phraseNotation);
    TregexMatcher matcher = pattern.matcher((Tree) tree);
    TregexPattern verbPattern = TregexPattern.compile(verbPhraseNotation);

    while (matcher.findNextMatchingNode()) {
        Tree match = matcher.getMatch();
        System.out.println("Sentence match : " + Sentence.listToString(match.yield()));
        TregexMatcher verbMatcher = verbPattern.matcher(match);
        // while(verbMatcher.findNextMatchingNode()){
        if (verbMatcher.findNextMatchingNode()) {
            Tree verbMatch = verbMatcher.getMatch();
            String verb = Sentence.listToString(verbMatch.yield());
            System.out.println("Verb match : " + verb);
            if (verbPhraseList.contains(verb)) {
                System.out.println("list contains verb : " + verb);
                String noun_1_phraseNotation = "NN|NNS>(NP>S)";
                String noun_2_phraseNotation = "NN|NNS>>(NP,(VBZ|VBP>(VP,NP)))";
                TregexPattern noun_pattern = TregexPattern.compile(noun_1_phraseNotation);
                TregexMatcher noun_matcher = noun_pattern.matcher((Tree) tree);
                if (noun_matcher.findNextMatchingNode()) {
                    Tree nounMatch = noun_matcher.getMatch();
                    String noun1 = Sentence.listToString(nounMatch.yield());

                    if (documentClass.contains(noun1)) {
                        noun_pattern = TregexPattern.compile(noun_2_phraseNotation);
                        noun_matcher = noun_pattern.matcher((Tree) tree);
                        System.out.println("class list contains noun1 : " + noun1);
                        if (noun_matcher.findNextMatchingNode()) {
                            nounMatch = noun_matcher.getMatch();
                            String noun2 = Sentence.listToString(nounMatch.yield());
                            if (!noun1.equals(noun2) && documentClass.contains(noun2)) {
                                ClassRelation clr;
                                System.out.println("class list contains noun2 : " + noun2);
                                if (verb.equals("be")) {
                                    clr = new ClassRelation("Generalization", noun1, noun2);
                                    System.out.println("class generalization");
                                } else {
                                    clr = new ClassRelation("Association", noun2, noun1);
                                    System.out.println("class association");
                                }
                                classRelations.add(clr);
                            }
                        }
                    }
                }
            }
        }
    }
    return classRelations;
}

From source file:com.project.NLP.Requirement.MethodIdentifier.java

ArrayList getPhrase(ArrayList<Tree> sentenceTree) {
    /*ref : patterns -http://nlp.stanford.edu/nlp/javadoc/javanlp/edu/stanford/nlp/trees/tregex/TregexPattern.html  */
    String phraseNotation = "VB|VBN>VP";//@VB>VP" ; //& VBN >VP";//"VP<(VB $++NP)";//"VP:VB";//"@"+"VP"+"! << @"+"VP";
    ArrayList vpList = new ArrayList();
    for (Tree tree : sentenceTree) {
        System.out.print("\n---tree_sen----" + tree + "----\n");
        /* Stemming the sentence */
        wordStemmer.visitTree(tree);/*  w  ww .j  a v a  2  s  . co  m*/
        TregexPattern VBpattern = TregexPattern.compile(phraseNotation);
        TregexMatcher matcher = VBpattern.matcher((Tree) tree);
        while (matcher.findNextMatchingNode()) {
            Tree match = matcher.getMatch();
            String verb = Sentence.listToString(match.yield());

            /* Filter to unique verbs  */
            //List<String> newList = new ArrayList<String>(new HashSet<String>(oldList));
            if (!vpList.contains(verb)) {
                vpList.add(verb);
            }
            System.out.print("\n---phrase match----" + match + "----\n");

        }
    }
    System.out.print("\n---VPList----" + vpList + "----\n");
    return vpList;
}

From source file:com.project.NLP.Requirement.MethodIdentifier.java

HashSet identifyCandidateMethods(Tree[] tree) {

    String phraseNotation = "VB|VBN>VP";//@VB>VP" ; //& VBN >VP";//"VP<(VB $++NP)";//"VP:VB";//"@"+"VP"+"! << @"+"VP";
    HashSet vpList = new HashSet();
    for (Tree childTree : tree) {
        System.out.print("\n---tree_sen----" + childTree + "----\n");
        /* Stemming the sentence */
        wordStemmer.visitTree(childTree);
        TregexPattern VBpattern = TregexPattern.compile(phraseNotation);
        TregexMatcher matcher = VBpattern.matcher((Tree) childTree);
        while (matcher.findNextMatchingNode()) {
            Tree match = matcher.getMatch();
            String verb = Sentence.listToString(match.yield());

            /* Filter to unique verbs  */
            //List<String> newList = new ArrayList<String>(new HashSet<String>(oldList));
            //if(!vpList.contains(verb)){
            vpList.add(verb);/*  w  w  w .  j  a v a2  s  .  c om*/
            //}
            System.out.print("\n---phrase match----" + match + "----\n");

        }
    }
    vpList.removeAll(commonVerbs);
    System.out.print("\n---VPList----" + vpList + "----\n");
    return vpList;
}

From source file:com.project.NLP.Requirement.MethodIdentifier.java

HashSet identifyCandidateMethods(Tree tree) {

    String phraseNotation = "VB|VBN>VP";//@VB>VP" ; //& VBN >VP";//"VP<(VB $++NP)";//"VP:VB";//"@"+"VP"+"! << @"+"VP";
    HashSet vpList = new HashSet();

    /* Stemming the sentence */
    wordStemmer.visitTree(tree);//from   ww w . j a  v a2s.  co  m
    TregexPattern VBpattern = TregexPattern.compile(phraseNotation);
    TregexMatcher matcher = VBpattern.matcher(tree);
    while (matcher.findNextMatchingNode()) {
        Tree match = matcher.getMatch();
        String verb = Sentence.listToString(match.yield());

        /* Filter to unique verbs  */
        //List<String> newList = new ArrayList<String>(new HashSet<String>(oldList));
        //if(!vpList.contains(verb)){
        vpList.add(verb);
        //}
        System.out.print("\n---phrase match----" + match + "----\n");

    }

    vpList.removeAll(commonVerbs);
    System.out.print("\n------VPList----" + vpList + "----\n");

    vpList = removeDesignElements(vpList);
    return vpList;
}

From source file:conditionalCFG.ConditionalCFGParser.java

License:Open Source License

public double validateBinarizedTree(Tree tree, int start) {
    if (tree.isLeaf()) {
        return 0.0;
    }//from w ww .ja v  a2 s.  com
    float epsilon = 0.0001f;
    if (tree.isPreTerminal()) {
        String wordStr = tree.children()[0].label().value();
        int tag = tagIndex.indexOf(tree.label().value());
        int word = wordIndex.indexOf(wordStr);
        IntTaggedWord iTW = new IntTaggedWord(word, tag);
        float score = lex.score(iTW, start, wordStr, null);
        float bound = iScore[start][start + 1][stateIndex.indexOf(tree.label().value())];
        if (score > bound + epsilon) {
            System.out.println("Invalid tagging:");
            System.out.println("  Tag: " + tree.label().value());
            System.out.println("  Word: " + tree.children()[0].label().value());
            System.out.println("  Score: " + score);
            System.out.println("  Bound: " + bound);
        }
        return score;
    }
    int parent = stateIndex.indexOf(tree.label().value());
    int firstChild = stateIndex.indexOf(tree.children()[0].label().value());
    if (tree.numChildren() == 1) {
        UnaryRule ur = new UnaryRule(parent, firstChild);
        double score = SloppyMath.max(ug.scoreRule(ur), -10000.0)
                + validateBinarizedTree(tree.children()[0], start);
        double bound = iScore[start][start + tree.yield().size()][parent];
        if (score > bound + epsilon) {
            System.out.println("Invalid unary:");
            System.out.println("  Parent: " + tree.label().value());
            System.out.println("  Child: " + tree.children()[0].label().value());
            System.out.println("  Start: " + start);
            System.out.println("  End: " + (start + tree.yield().size()));
            System.out.println("  Score: " + score);
            System.out.println("  Bound: " + bound);
        }
        return score;
    }
    int secondChild = stateIndex.indexOf(tree.children()[1].label().value());
    BinaryRule br = new BinaryRule(parent, firstChild, secondChild);
    double score = SloppyMath.max(bg.scoreRule(br), -10000.0) + validateBinarizedTree(tree.children()[0], start)
            + validateBinarizedTree(tree.children()[1], start + tree.children()[0].yield().size());
    double bound = iScore[start][start + tree.yield().size()][parent];
    if (score > bound + epsilon) {
        System.out.println("Invalid binary:");
        System.out.println("  Parent: " + tree.label().value());
        System.out.println("  LChild: " + tree.children()[0].label().value());
        System.out.println("  RChild: " + tree.children()[1].label().value());
        System.out.println("  Start: " + start);
        System.out.println("  End: " + (start + tree.yield().size()));
        System.out.println("  Score: " + score);
        System.out.println("  Bound: " + bound);
    }
    return score;
}

From source file:edu.cmu.ark.AnalysisUtilities.java

License:Open Source License

public static void downcaseFirstToken(Tree inputTree) {
    Tree firstWordTree = inputTree.getLeaves().get(0);
    if (firstWordTree == null)
        return;//from   www . j av a2  s .c  o  m
    Tree preterm = firstWordTree.parent(inputTree);
    String firstWord = firstWordTree.yield().toString();
    if (!preterm.label().toString().matches("^NNP.*") && !firstWord.equals("I")) {
        //if(firstWord.indexOf('-') == -1 && !firstWord.equals("I")){
        firstWord = firstWord.substring(0, 1).toLowerCase() + firstWord.substring(1);
        firstWordTree.label().setValue(firstWord);
    }

    //if(QuestionTransducer.DEBUG) System.err.println("downcaseFirstToken: "+inputTree.toString());
}

From source file:edu.cmu.ark.AnalysisUtilities.java

License:Open Source License

public static void upcaseFirstToken(Tree inputTree) {
    Tree firstWordTree = inputTree.getLeaves().get(0);
    if (firstWordTree == null)
        return;//from   w  w  w  .ja v a2  s . c  om

    String firstWord = firstWordTree.yield().toString();
    firstWord = firstWord.substring(0, 1).toUpperCase() + firstWord.substring(1);
    firstWordTree.label().setValue(firstWord);

    //if(QuestionTransducer.DEBUG) System.err.println("upcaseFirstToken: "+inputTree.toString());
}

From source file:edu.cmu.ark.AnalysisUtilities.java

License:Open Source License

public static String getCleanedUpYield(Tree inputTree) {
    Tree copyTree = inputTree.deeperCopy();

    //if(GlobalProperties.getDebug()) System.err.println("yield:"+copyTree.toString());

    return cleanUpSentenceString(copyTree.yield().toString());
}