Example usage for edu.stanford.nlp.trees Tree yield

List of usage examples for edu.stanford.nlp.trees Tree yield

Introduction

In this page you can find the example usage for edu.stanford.nlp.trees Tree yield.

Prototype

public ArrayList<Label> yield() 

Source Link

Document

Gets the yield of the tree.

Usage

From source file:com.github.kutschkem.Qgen.annotators.SimpleSentenceDecompositionAnnotator.java

License:Open Source License

private List<String> decompose(String documentText) {
    List<Tree> trees = new ArrayList<Tree>();
    for (String sentence : AnalysisUtilities.getSentences(documentText)) {
        trees.add(AnalysisUtilities.getInstance().parseSentence(sentence).parse);
    }//from w w  w  . ja  v  a2s  . c om

    List<String> result = new ArrayList<String>();

    for (Tree t : trees) {
        TregexPattern p = TregexPattern.compile("ROOT << (NP=np $++ VP=vp) ");
        TregexMatcher m = p.matcher(t);
        while (m.find()) {
            Tree np = m.getNode("np");
            Tree vp = m.getNode("vp");

            Tree np2 = np.deepCopy();
            TregexPattern p2 = TregexPattern.compile("NP << (/^S.*/=sbarq ?. /,/=c1 ?, /,/=c2)");
            List<TsurgeonPattern> ps = new ArrayList<TsurgeonPattern>();
            ps.add(Tsurgeon.parseOperation("prune sbarq"));
            ps.add(Tsurgeon.parseOperation("prune c1"));
            ps.add(Tsurgeon.parseOperation("prune c2"));

            Tsurgeon.processPattern(p2, Tsurgeon.collectOperations(ps), np2);
            np = np2;

            Tree newTree = Tree.valueOf("(S " + np + vp + "(. .))");
            result.add(AnalysisUtilities.orginialSentence(newTree.yield()));
        }

    }

    return result;
}

From source file:com.github.kutschkem.Qgen.annotators.SmithHeilmannTagger.java

License:Open Source License

@Override
public void process(JCas aJCas) throws AnalysisEngineProcessException {
    List<Sentence> sentences = new ArrayList<Sentence>(JCasUtil.select(aJCas, Sentence.class));
    List<String> strings = new ArrayList<String>();
    for (Sentence s : sentences) {
        strings.add(s.getCoveredText());
    }/*from   w  w w . j a  v a 2 s .  com*/
    List<edu.cmu.ark.Question> questions = asker.ask(strings);

    for (edu.cmu.ark.Question q : questions) {
        Sentence s = sentences.get(q.getSourceSentenceNumber());
        Question question = new Question(aJCas);
        question.setBegin(s.getBegin());
        question.setEnd(s.getEnd());
        Tree answerPhraseTree = q.getAnswerPhraseTree();
        if (answerPhraseTree != null) {
            question.setAnswer(AnalysisUtilities.orginialSentence(answerPhraseTree.yield()));
            NERelabeler.relabelWH(JCasUtil.selectCovered(NamedEntity.class, s), question.getAnswer(),
                    q.getTree());
        } else {
            question.setAnswer("Yes");
        }
        String text = AnalysisUtilities.orginialSentence(q.getTree().yield());
        question.setText(text);
        question.addToIndexes();
    }

}

From source file:com.project.NLP.Requirement.ClassRelationIdentifier.java

public HashSet identifyAssociation(Tree tree, Set documentClass) {
    HashSet classRelations = new HashSet();
    String phraseNotation = "S<(NP.(VP<NP))";
    String verbPhraseNotation = "VBZ|VBP>(VP,(NP>S))";
    /* Stemming the sentence */
    wordStemmer.visitTree(tree);//  ww w  . ja  v a  2s . c  om
    TregexPattern pattern = TregexPattern.compile(phraseNotation);
    TregexMatcher matcher = pattern.matcher((Tree) tree);
    TregexPattern verbPattern = TregexPattern.compile(verbPhraseNotation);

    while (matcher.findNextMatchingNode()) {
        Tree match = matcher.getMatch();
        System.out.println("Sentence match : " + Sentence.listToString(match.yield()));
        TregexMatcher verbMatcher = verbPattern.matcher(match);
        // while(verbMatcher.findNextMatchingNode()){
        if (verbMatcher.findNextMatchingNode()) {
            Tree verbMatch = verbMatcher.getMatch();
            String verb = Sentence.listToString(verbMatch.yield());
            System.out.println("Verb match : " + verb);
            if (verbPhraseList.contains(verb)) {
                System.out.println("list contains verb : " + verb);
                String noun_1_phraseNotation = "NN|NNS>(NP>S)";
                String noun_2_phraseNotation = "NN|NNS>>(NP,(VBZ|VBP>(VP,NP)))";
                TregexPattern noun_pattern = TregexPattern.compile(noun_1_phraseNotation);
                TregexMatcher noun_matcher = noun_pattern.matcher((Tree) tree);
                if (noun_matcher.findNextMatchingNode()) {
                    Tree nounMatch = noun_matcher.getMatch();
                    String noun1 = Sentence.listToString(nounMatch.yield());

                    if (documentClass.contains(noun1)) {
                        noun_pattern = TregexPattern.compile(noun_2_phraseNotation);
                        noun_matcher = noun_pattern.matcher((Tree) tree);
                        System.out.println("class list contains noun1 : " + noun1);
                        if (noun_matcher.findNextMatchingNode()) {
                            nounMatch = noun_matcher.getMatch();
                            String noun2 = Sentence.listToString(nounMatch.yield());
                            if (!noun1.equals(noun2) && documentClass.contains(noun2)) {
                                ClassRelation clr;
                                System.out.println("class list contains noun2 : " + noun2);
                                if (verb.equals("be")) {
                                    clr = new ClassRelation("Generalization", noun1, noun2);
                                    System.out.println("class generalization");
                                } else {
                                    clr = new ClassRelation("Association", noun2, noun1);
                                    System.out.println("class association");
                                }
                                classRelations.add(clr);
                            }
                        }
                    }
                }
            }
        }
    }
    return classRelations;
}

From source file:com.project.NLP.Requirement.MethodIdentifier.java

ArrayList getPhrase(ArrayList<Tree> sentenceTree) {
    /*ref : patterns -http://nlp.stanford.edu/nlp/javadoc/javanlp/edu/stanford/nlp/trees/tregex/TregexPattern.html  */
    String phraseNotation = "VB|VBN>VP";//@VB>VP" ; //& VBN >VP";//"VP<(VB $++NP)";//"VP:VB";//"@"+"VP"+"! << @"+"VP";
    ArrayList vpList = new ArrayList();
    for (Tree tree : sentenceTree) {
        System.out.print("\n---tree_sen----" + tree + "----\n");
        /* Stemming the sentence */
        wordStemmer.visitTree(tree);/*  w  ww .j  a v a  2  s  . co  m*/
        TregexPattern VBpattern = TregexPattern.compile(phraseNotation);
        TregexMatcher matcher = VBpattern.matcher((Tree) tree);
        while (matcher.findNextMatchingNode()) {
            Tree match = matcher.getMatch();
            String verb = Sentence.listToString(match.yield());

            /* Filter to unique verbs  */
            //List<String> newList = new ArrayList<String>(new HashSet<String>(oldList));
            if (!vpList.contains(verb)) {
                vpList.add(verb);
            }
            System.out.print("\n---phrase match----" + match + "----\n");

        }
    }
    System.out.print("\n---VPList----" + vpList + "----\n");
    return vpList;
}

From source file:com.project.NLP.Requirement.MethodIdentifier.java

HashSet identifyCandidateMethods(Tree[] tree) {

    String phraseNotation = "VB|VBN>VP";//@VB>VP" ; //& VBN >VP";//"VP<(VB $++NP)";//"VP:VB";//"@"+"VP"+"! << @"+"VP";
    HashSet vpList = new HashSet();
    for (Tree childTree : tree) {
        System.out.print("\n---tree_sen----" + childTree + "----\n");
        /* Stemming the sentence */
        wordStemmer.visitTree(childTree);
        TregexPattern VBpattern = TregexPattern.compile(phraseNotation);
        TregexMatcher matcher = VBpattern.matcher((Tree) childTree);
        while (matcher.findNextMatchingNode()) {
            Tree match = matcher.getMatch();
            String verb = Sentence.listToString(match.yield());

            /* Filter to unique verbs  */
            //List<String> newList = new ArrayList<String>(new HashSet<String>(oldList));
            //if(!vpList.contains(verb)){
            vpList.add(verb);/*  w  w  w .  j  a v a2  s  .  c om*/
            //}
            System.out.print("\n---phrase match----" + match + "----\n");

        }
    }
    vpList.removeAll(commonVerbs);
    System.out.print("\n---VPList----" + vpList + "----\n");
    return vpList;
}

From source file:com.project.NLP.Requirement.MethodIdentifier.java

HashSet identifyCandidateMethods(Tree tree) {

    String phraseNotation = "VB|VBN>VP";//@VB>VP" ; //& VBN >VP";//"VP<(VB $++NP)";//"VP:VB";//"@"+"VP"+"! << @"+"VP";
    HashSet vpList = new HashSet();

    /* Stemming the sentence */
    wordStemmer.visitTree(tree);//from   ww w . j a  v a2s.  co  m
    TregexPattern VBpattern = TregexPattern.compile(phraseNotation);
    TregexMatcher matcher = VBpattern.matcher(tree);
    while (matcher.findNextMatchingNode()) {
        Tree match = matcher.getMatch();
        String verb = Sentence.listToString(match.yield());

        /* Filter to unique verbs  */
        //List<String> newList = new ArrayList<String>(new HashSet<String>(oldList));
        //if(!vpList.contains(verb)){
        vpList.add(verb);
        //}
        System.out.print("\n---phrase match----" + match + "----\n");

    }

    vpList.removeAll(commonVerbs);
    System.out.print("\n------VPList----" + vpList + "----\n");

    vpList = removeDesignElements(vpList);
    return vpList;
}

From source file:conditionalCFG.ConditionalCFGParser.java

License:Open Source License

public double validateBinarizedTree(Tree tree, int start) {
    if (tree.isLeaf()) {
        return 0.0;
    }//from w ww .ja v  a2 s.  com
    float epsilon = 0.0001f;
    if (tree.isPreTerminal()) {
        String wordStr = tree.children()[0].label().value();
        int tag = tagIndex.indexOf(tree.label().value());
        int word = wordIndex.indexOf(wordStr);
        IntTaggedWord iTW = new IntTaggedWord(word, tag);
        float score = lex.score(iTW, start, wordStr, null);
        float bound = iScore[start][start + 1][stateIndex.indexOf(tree.label().value())];
        if (score > bound + epsilon) {
            System.out.println("Invalid tagging:");
            System.out.println("  Tag: " + tree.label().value());
            System.out.println("  Word: " + tree.children()[0].label().value());
            System.out.println("  Score: " + score);
            System.out.println("  Bound: " + bound);
        }
        return score;
    }
    int parent = stateIndex.indexOf(tree.label().value());
    int firstChild = stateIndex.indexOf(tree.children()[0].label().value());
    if (tree.numChildren() == 1) {
        UnaryRule ur = new UnaryRule(parent, firstChild);
        double score = SloppyMath.max(ug.scoreRule(ur), -10000.0)
                + validateBinarizedTree(tree.children()[0], start);
        double bound = iScore[start][start + tree.yield().size()][parent];
        if (score > bound + epsilon) {
            System.out.println("Invalid unary:");
            System.out.println("  Parent: " + tree.label().value());
            System.out.println("  Child: " + tree.children()[0].label().value());
            System.out.println("  Start: " + start);
            System.out.println("  End: " + (start + tree.yield().size()));
            System.out.println("  Score: " + score);
            System.out.println("  Bound: " + bound);
        }
        return score;
    }
    int secondChild = stateIndex.indexOf(tree.children()[1].label().value());
    BinaryRule br = new BinaryRule(parent, firstChild, secondChild);
    double score = SloppyMath.max(bg.scoreRule(br), -10000.0) + validateBinarizedTree(tree.children()[0], start)
            + validateBinarizedTree(tree.children()[1], start + tree.children()[0].yield().size());
    double bound = iScore[start][start + tree.yield().size()][parent];
    if (score > bound + epsilon) {
        System.out.println("Invalid binary:");
        System.out.println("  Parent: " + tree.label().value());
        System.out.println("  LChild: " + tree.children()[0].label().value());
        System.out.println("  RChild: " + tree.children()[1].label().value());
        System.out.println("  Start: " + start);
        System.out.println("  End: " + (start + tree.yield().size()));
        System.out.println("  Score: " + score);
        System.out.println("  Bound: " + bound);
    }
    return score;
}

From source file:edu.cmu.ark.AnalysisUtilities.java

License:Open Source License

public static void downcaseFirstToken(Tree inputTree) {
    Tree firstWordTree = inputTree.getLeaves().get(0);
    if (firstWordTree == null)
        return;//from   www . j av a2  s .c  o  m
    Tree preterm = firstWordTree.parent(inputTree);
    String firstWord = firstWordTree.yield().toString();
    if (!preterm.label().toString().matches("^NNP.*") && !firstWord.equals("I")) {
        //if(firstWord.indexOf('-') == -1 && !firstWord.equals("I")){
        firstWord = firstWord.substring(0, 1).toLowerCase() + firstWord.substring(1);
        firstWordTree.label().setValue(firstWord);
    }

    //if(QuestionTransducer.DEBUG) System.err.println("downcaseFirstToken: "+inputTree.toString());
}

From source file:edu.cmu.ark.AnalysisUtilities.java

License:Open Source License

public static void upcaseFirstToken(Tree inputTree) {
    Tree firstWordTree = inputTree.getLeaves().get(0);
    if (firstWordTree == null)
        return;//from   w  w  w  .ja v a2  s . c  om

    String firstWord = firstWordTree.yield().toString();
    firstWord = firstWord.substring(0, 1).toUpperCase() + firstWord.substring(1);
    firstWordTree.label().setValue(firstWord);

    //if(QuestionTransducer.DEBUG) System.err.println("upcaseFirstToken: "+inputTree.toString());
}

From source file:edu.cmu.ark.AnalysisUtilities.java

License:Open Source License

public static String getCleanedUpYield(Tree inputTree) {
    Tree copyTree = inputTree.deeperCopy();

    //if(GlobalProperties.getDebug()) System.err.println("yield:"+copyTree.toString());

    return cleanUpSentenceString(copyTree.yield().toString());
}