Example usage for edu.stanford.nlp.trees Tree yield

Introduction

In this page you can find the example usage for edu.stanford.nlp.trees Tree yield.

Prototype

public ArrayList<Label> yield()

Source Link

Document

Gets the yield of the tree.

Usage

From source file:edu.cmu.ark.nlp.question.Question.java

License:Open Source License

public List<Tree> findLogicalWordsAboveIntermediateTree() {
    List<Tree> res = new ArrayList<Tree>();

    Tree pred = intermediateTree.getChild(0).headPreTerminal(this.hf);
    String lemma = QuestionUtil.getLemma(pred.yield().toString(), pred.label().toString());

    String tregexOpStr;//from  w  ww  .j ava 2s. c o m
    TregexPattern matchPattern;
    TregexMatcher matcher;

    Tree sourcePred = null;
    for (Tree leaf : sourceTree.getLeaves()) {
        Tree tmp = leaf.parent(sourceTree);
        String sourceLemma = QuestionUtil.getLemma(leaf.label().toString(), tmp.label().toString());
        if (sourceLemma.equals(lemma)) {
            sourcePred = tmp;
            break;
        }
    }

    tregexOpStr = "RB|VB|VBD|VBP|VBZ|IN|MD|WRB|WDT|CC=command";
    matchPattern = TregexPatternFactory.getPattern(tregexOpStr);
    matcher = matchPattern.matcher(sourceTree);

    Tree command;
    while (matcher.find() && sourcePred != null) {
        command = matcher.getNode("command");
        if (QuestionUtil.cCommands(sourceTree, command, sourcePred)
                && command.parent(sourceTree) != sourcePred.parent(sourceTree)) {
            res.add(command);
        }
    }

    return res;
}

From source file:edu.cmu.ark.nlp.question.QuestionUtil.java

License:Open Source License

public static String getCleanedUpYield(Tree inputTree) {
    Tree copyTree = inputTree.deepCopy();

    //if(GlobalProperties.getDebug()) System.err.println("yield:"+copyTree.toString());

    return cleanUpSentenceString(copyTree.yield().toString());
}

From source file:edu.cmu.ark.nlp.sent.SentenceSimplifier.java

License:Open Source License

/**
 * e.g., John and Mary like Bill.  -> John LIKES Bill.  Mary LIKES Bill.
 * John and I like Bill -> John LIKES Bill.  I LIKE Bill.
 * John and I are old. -> I IS old. John IS old.
 *//*from  w w w.  j a va 2  s . c om*/
private void correctTense(Tree subject, Tree clause) {
    int tmpIndex;
    //correct verb tense when modifying subjects
    for (Tree uncle : clause.getChildrenAsList()) {
        String newVerbPOS = null;
        Tree verbPreterminal = null;
        boolean needToModifyVerb = false;
        //if the node is a subject (i.e., its uncle is a VP), then check
        //to see if its tense needs to be changed
        String headPOS = subject.headPreTerminal(this.hf).label().toString();
        if (uncle.label().toString().equals("VP") && !headPOS.endsWith("S")) {
            verbPreterminal = uncle.headPreTerminal(this.hf);
            //original main verb was plural but the conjoined subject word is singular
            //e.g., John (and Mary) like Bill.  -> John like Bill.
            if ((verbPreterminal.label().toString().equals("VB")
                    || verbPreterminal.label().toString().equals("VBP"))) { //the parser confuses VBP with VB
                if (subject.yield().toString().equals("I") || subject.yield().toString().equals("you")) {
                    newVerbPOS = "VBP";
                } else {
                    newVerbPOS = "VBZ";
                }
                needToModifyVerb = true;
            } else if (verbPreterminal.label().toString().equals("VBD")) {
                newVerbPOS = "VBD";
                needToModifyVerb = true;
            }
        }
        //if needed, change the tense of the verb
        if (needToModifyVerb) {
            String verbLemma = QuestionUtil.getLemma(verbPreterminal.getChild(0).label().toString(),
                    verbPreterminal.label().toString());
            String newVerb;
            //special cases
            if (verbLemma.equals("be") && newVerbPOS.equals("VBD")) {
                if (subject.label().toString().endsWith("S"))
                    newVerb = "were";
                else
                    newVerb = "was";
            } else if (verbLemma.equals("be") && subject.yield().toString().equals("I")
                    && newVerbPOS.equals("VBP")) {
                newVerb = "am";
            } else { //default
                newVerb = this.conjugator.getSurfaceForm(verbLemma, newVerbPOS);
            }
            tmpIndex = verbPreterminal.parent(uncle).objectIndexOf(verbPreterminal);
            Tree verbParent = verbPreterminal.parent(uncle);
            verbParent.removeChild(tmpIndex);
            verbParent.addChild(tmpIndex,
                    QuestionUtil.readTreeFromString("(" + newVerbPOS + " " + newVerb + ")"));
        }
    }
}

From source file:edu.cmu.ark.nlp.sent.SentenceSimplifier.java

License:Open Source License

private String findTense(Tree node) {
    if (node.label().equals("MD")) {
        if (node.yield().toString().matches("^(would|could)$")) {
            return "VBD";
        }/* ww w .  j a  v a2  s  . c o  m*/
    }
    return node.label().toString();
}

From source file:edu.cmu.ark.nlp.sent.SentenceSimplifier.java

License:Open Source License

private void extractComplementClauses(Collection<Question> extracted, Question input) {
    Tree subord;/*from  w w w. ja  v a  2 s. c  o  m*/
    String tregexOpStr;
    TregexPattern matchPattern;
    TregexMatcher matcher;

    //TODO should also address infinitive complements
    tregexOpStr = "SBAR " + " < (S=sub !< (VP < VBG)) " + //+ //not a participial phrase
            " !> NP|PP " + //not part of a noun phrase or PP (other methods for those)
            " [ $- /^VB.*/=verb | >+(SBAR) (SBAR $- /^VB.*/=verb) ] "; //complement of a VP (follows the verb)

    matchPattern = TregexPatternFactory.getPattern(tregexOpStr);
    matcher = matchPattern.matcher(input.getIntermediateTree());
    while (matcher.find()) {
        Tree newTree = factory.newTreeNode("ROOT", new ArrayList<Tree>());
        subord = matcher.getNode("sub");
        Tree verb = matcher.getNode("verb");
        String verbLemma = QuestionUtil.getLemma(verb.yield().toString(), verb.label().toString());

        if (!verbImpliesComplement(verbLemma)) {
            continue;
        }
        newTree.addChild(subord.deepCopy());

        QuestionUtil.addPeriodIfNeeded(newTree);
        addQuotationMarksIfNeeded(newTree);
        Question newTreeWithFeatures = input.deeperCopy();
        newTreeWithFeatures.setIntermediateTree(newTree);
        if (this.getComputeFeatures)
            newTreeWithFeatures.setFeatureValue("extractedFromFiniteClause", 1.0); //old feature name
        if (this.getComputeFeatures)
            newTreeWithFeatures.setFeatureValue("extractedFromComplementClause", 1.0);
        //if(GlobalProperties.getDebug()) System.err.println("extractComplementClauses: "+newTree.toString());
        addIfNovel(extracted, newTreeWithFeatures);
    }
}

From source file:edu.cmu.ark.Question.java

License:Open Source License

public List<Tree> findLogicalWordsAboveIntermediateTree() {
    List<Tree> res = new ArrayList<Tree>();

    Tree pred = intermediateTree.getChild(0).headPreTerminal(AnalysisUtilities.getInstance().getHeadFinder());
    String lemma = AnalysisUtilities.getInstance().getLemma(pred.yield().toString(), pred.label().toString());

    String tregexOpStr;//from   www .ja  v a 2  s  . c o  m
    TregexPattern matchPattern;
    TregexMatcher matcher;

    Tree sourcePred = null;
    for (Tree leaf : sourceTree.getLeaves()) {
        Tree tmp = leaf.parent(sourceTree);
        String sourceLemma = AnalysisUtilities.getInstance().getLemma(leaf.label().toString(),
                tmp.label().toString());
        if (sourceLemma.equals(lemma)) {
            sourcePred = tmp;
            break;
        }
    }

    tregexOpStr = "RB|VB|VBD|VBP|VBZ|IN|MD|WRB|WDT|CC=command";
    matchPattern = TregexPatternFactory.getPattern(tregexOpStr);
    matcher = matchPattern.matcher(sourceTree);

    Tree command;
    while (matcher.find() && sourcePred != null) {
        command = matcher.getNode("command");
        if (AnalysisUtilities.cCommands(sourceTree, command, sourcePred)
                && command.parent(sourceTree) != sourcePred.parent(sourceTree)) {
            res.add(command);
        }
    }

    return res;
}

From source file:edu.cmu.ark.QuestionTransducer.java

License:Open Source License

/**
 *
 * This method removes the answer phrase from its original position
 * and places it at the front of the main clause.
 *
 * Note: Tsurgeon operations are perhaps not optimal here.
 * Using the Stanford API to move nodes directly might be simpler...
 *
 *///  w w w.jav  a  2 s.  co m
private List<Tree> moveWHPhraseUnmovable(Tree inputTree, Tree intermediateTree, int i,
        boolean subjectMovement) {
    Tree copyTree2;
    List<Tree> res = new ArrayList<Tree>();
    Tree mainclauseNode;

    String marker = "/^(UNMOVABLE-NP|UNMOVABLE-PP|UNMOVABLE-SBAR)-" + i + "$/";

    List<Pair<TregexPattern, TsurgeonPattern>> ops = new ArrayList<Pair<TregexPattern, TsurgeonPattern>>();
    List<TsurgeonPattern> ps = new ArrayList<TsurgeonPattern>();
    String tregexOpStr;
    TregexPattern matchPattern;

    //extract the "answer" phrase and generate a WH phrase from it
    tregexOpStr = "ROOT=root < (SQ=qclause << " + marker + "=answer < VP=predicate)";
    matchPattern = TregexPatternFactory.getPattern(tregexOpStr);
    if (GlobalProperties.getDebug())
        System.err.println("moveWHPhrase: inputTree:" + inputTree.toString());
    if (GlobalProperties.getDebug())
        System.err.println("moveWHPhrase: tregexOpStr:" + tregexOpStr);
    TregexMatcher matcher = matchPattern.matcher(inputTree);
    matcher.find();
    Tree phraseToMove = matcher.getNode("answer");

    String whPhraseSubtree;

    if (printExtractedPhrases)
        System.out.println("EXTRACTED\t" + phraseToMove.yield().toString());

    whGen.generateWHPhraseSubtrees(removeMarkersFromTree(phraseToMove), intermediateTree.yield().toString());
    List<String> whPhraseSubtrees = whGen.getWHPhraseSubtrees();
    List<String> leftOverPrepositions = whGen.getLeftOverPrepositions();

    //copyTree = inputTree.deeperCopy();
    //The placeholder is necessary because tsurgeon will complain
    //if an added node has no children. This placeholder is removed below.
    //      ps.add(Tsurgeon.parseOperation("insert (PREPPLACEHOLDER dummy) $+ answer"));
    //      ps.add(Tsurgeon.parseOperation("prune answer"));
    //      ps.add(Tsurgeon.parseOperation("insert (SBARQ=mainclause PLACEHOLDER=placeholder) >0 root"));
    //      ps.add(Tsurgeon.parseOperation("move qclause >-1 mainclause"));
    //      p = Tsurgeon.collectOperations(ps);
    //      ops.add(new Pair<TregexPattern,TsurgeonPattern>(matchPattern,p));
    //      Tsurgeon.processPatternsOnTree(ops, copyTree);

    //copyTree = removeMarkersFromTree(copyTree);

    //Now put each WH phrase into the tree and remove the original answer.
    //Operate on the tree directly rather than using tsurgeon
    //because tsurgeon can't parse operations that insert trees with special characters (e.g., ":")
    for (int j = 0; j < whPhraseSubtrees.size(); j++) {
        copyTree2 = inputTree.deeperCopy();
        whPhraseSubtree = whPhraseSubtrees.get(j);

        //         if(GlobalProperties.getDebug()) System.err.println("moveWHPhrase: whPhraseSubtree:"+whPhraseSubtree);
        //         tregexOpStr = "ROOT < (SBARQ=mainclause < PLACEHOLDER=ph1) << (__=ph2Parent < PREPPLACEHOLDER=ph2)";
        //         matchPattern = TregexPatternFactory.getPattern(tregexOpStr);
        //         matcher = matchPattern.matcher(copyTree2);
        //         if(!matcher.find()){
        //            continue;
        //         }
        matcher = matchPattern.matcher(copyTree2);
        matcher.find();
        mainclauseNode = matcher.getNode("answer");
        if (mainclauseNode == null)
            continue;
        //replace the wh placeholder with a wh phrase
        int cc = mainclauseNode.numChildren();
        for (int c = 0; c < cc; c++)
            mainclauseNode.removeChild(0);
        mainclauseNode.addChild(0, AnalysisUtilities.getInstance().readTreeFromString(whPhraseSubtree));

        copyTree2 = removeMarkersFromTree(copyTree2);
        //Replace the pp placeholder with the left over preposition.
        //This may happen when the answer phrase was a PP.
        //e.g., John went to the game. -> What did John go to?
        //         prepPlaceholderParent = matcher.getNode("ph2Parent");
        //         int index = prepPlaceholderParent.indexOf(matcher.getNode("ph2"));
        //         if(leftOverPreposition != null && leftOverPreposition.length()>0){
        //            prepPlaceholderParent.addChild(index, AnalysisUtilities.getInstance().readTreeFromString(leftOverPreposition));
        //         }
        //         //now remove the left-over-preposition placeholder
        //         ps.clear();
        //         ps.add(Tsurgeon.parseOperation("prune ph2"));
        //         p = Tsurgeon.collectOperations(ps);
        //         ops.clear();
        //         ops.add(new Pair<TregexPattern,TsurgeonPattern>(TregexPatternFactory.getPattern("PREPPLACEHOLDER=ph2"),p));
        //         Tsurgeon.processPatternsOnTree(ops, copyTree2);

        copyTree2 = moveLeadingAdjuncts(copyTree2);

        if (GlobalProperties.getDebug())
            System.err.println("moveWHPhrase: " + copyTree2.toString());
        res.add(copyTree2);
    }

    return res;
}

From source file:edu.cmu.ark.QuestionTransducer.java

License:Open Source License

/**
 *
 * This method removes the answer phrase from its original position
 * and places it at the front of the main clause.
 *
 * Note: Tsurgeon operations are perhaps not optimal here.
 * Using the Stanford API to move nodes directly might be simpler...
 *
 *//* w ww . ja v a  2  s .  c  o m*/
private List<Tree> moveWHPhrase(Tree inputTree, Tree intermediateTree, int i, boolean subjectMovement) {
    Tree copyTree;
    Tree copyTree2;
    List<Tree> res = new ArrayList<Tree>();
    Tree mainclauseNode;
    Tree prepPlaceholderParent;

    String marker = "/^(NP|PP|SBAR)-" + i + "$/";

    List<Pair<TregexPattern, TsurgeonPattern>> ops = new ArrayList<Pair<TregexPattern, TsurgeonPattern>>();
    List<TsurgeonPattern> ps = new ArrayList<TsurgeonPattern>();
    String tregexOpStr;
    TregexPattern matchPattern;
    TsurgeonPattern p;

    //extract the "answer" phrase and generate a WH phrase from it
    tregexOpStr = "ROOT=root < (SQ=qclause << " + marker + "=answer < VP=predicate)";
    matchPattern = TregexPatternFactory.getPattern(tregexOpStr);
    if (GlobalProperties.getDebug())
        System.err.println("moveWHPhrase: inputTree:" + inputTree.toString());
    if (GlobalProperties.getDebug())
        System.err.println("moveWHPhrase: tregexOpStr:" + tregexOpStr);
    TregexMatcher matcher = matchPattern.matcher(inputTree);
    matcher.find();
    Tree phraseToMove = matcher.getNode("answer");

    String whPhraseSubtree;
    String leftOverPreposition;

    if (printExtractedPhrases)
        System.out.println("EXTRACTED\t" + phraseToMove.yield().toString());

    whGen.generateWHPhraseSubtrees(removeMarkersFromTree(phraseToMove), intermediateTree.yield().toString());
    List<String> whPhraseSubtrees = whGen.getWHPhraseSubtrees();
    List<String> leftOverPrepositions = whGen.getLeftOverPrepositions();

    copyTree = inputTree.deeperCopy();
    //The placeholder is necessary because tsurgeon will complain
    //if an added node has no children. This placeholder is removed below.
    ps.add(Tsurgeon.parseOperation("insert (PREPPLACEHOLDER dummy) $+ answer"));
    ps.add(Tsurgeon.parseOperation("prune answer"));
    ps.add(Tsurgeon.parseOperation("insert (SBARQ=mainclause PLACEHOLDER=placeholder) >0 root"));
    ps.add(Tsurgeon.parseOperation("move qclause >-1 mainclause"));
    p = Tsurgeon.collectOperations(ps);
    ops.add(new Pair<TregexPattern, TsurgeonPattern>(matchPattern, p));
    Tsurgeon.processPatternsOnTree(ops, copyTree);

    copyTree = removeMarkersFromTree(copyTree);

    //Now put each WH phrase into the tree and remove the original answer.
    //Operate on the tree directly rather than using tsurgeon
    //because tsurgeon can't parse operations that insert trees with special characters (e.g., ":")
    for (int j = 0; j < whPhraseSubtrees.size(); j++) {
        copyTree2 = copyTree.deeperCopy();
        whPhraseSubtree = whPhraseSubtrees.get(j);
        leftOverPreposition = leftOverPrepositions.get(j);

        if (GlobalProperties.getDebug())
            System.err.println("moveWHPhrase: whPhraseSubtree:" + whPhraseSubtree);
        tregexOpStr = "ROOT < (SBARQ=mainclause < PLACEHOLDER=ph1) << (__=ph2Parent < PREPPLACEHOLDER=ph2)";
        matchPattern = TregexPatternFactory.getPattern(tregexOpStr);
        matcher = matchPattern.matcher(copyTree2);
        if (!matcher.find()) {
            continue;
        }
        mainclauseNode = matcher.getNode("mainclause");
        //replace the wh placeholder with a wh phrase
        mainclauseNode.removeChild(0);
        mainclauseNode.addChild(0, AnalysisUtilities.getInstance().readTreeFromString(whPhraseSubtree));

        //Replace the pp placeholder with the left over preposition.
        //This may happen when the answer phrase was a PP.
        //e.g., John went to the game. -> What did John go to?
        prepPlaceholderParent = matcher.getNode("ph2Parent");
        int index = prepPlaceholderParent.indexOf(matcher.getNode("ph2"));
        if (leftOverPreposition != null && leftOverPreposition.length() > 0) {
            prepPlaceholderParent.addChild(index,
                    AnalysisUtilities.getInstance().readTreeFromString(leftOverPreposition));
        }
        //now remove the left-over-preposition placeholder
        ps.clear();
        ps.add(Tsurgeon.parseOperation("prune ph2"));
        p = Tsurgeon.collectOperations(ps);
        ops.clear();
        ops.add(new Pair<TregexPattern, TsurgeonPattern>(TregexPatternFactory.getPattern("PREPPLACEHOLDER=ph2"),
                p));
        Tsurgeon.processPatternsOnTree(ops, copyTree2);

        copyTree2 = moveLeadingAdjuncts(copyTree2);

        if (GlobalProperties.getDebug())
            System.err.println("moveWHPhrase: " + copyTree2.toString());
        res.add(copyTree2);
    }

    return res;
}

From source file:edu.cmu.ark.QuestionTransducer.java

License:Open Source License

/**
 * Changes the inflection of the main verb for questions with
 * first and second person pronouns are the subject.
 * Note: this probably isn't necessary for most applications.
 *
 * E.g.,//from  www.  j  a v  a  2s . c  om
 * Affects:
 * I walk -> Who walks? (rather than, Who walk?)
 *
 * Does not affect:
 * He walks -> Who walks?
 *
 */
private void ensureVerbAgreementForSubjectWH(Tree inputTree) {
    String tregexOpStr;
    TregexMatcher matcher;
    TregexPattern matchPattern;
    Tree subjectTree;
    String subjectString;

    tregexOpStr = "/^(NP|PP|SBAR)-" + 0 + "$/";
    matchPattern = TregexPatternFactory.getPattern(tregexOpStr);
    matcher = matchPattern.matcher(inputTree);
    if (matcher.find()) {
        subjectTree = matcher.getMatch();
        subjectString = subjectTree.yield().toString();
        if (subjectString.equalsIgnoreCase("I") || subjectString.equalsIgnoreCase("you")) {
            tregexOpStr = "ROOT=root < (S=mainclause < (VP=verbphrase < (/VB.?/=tensedverb)))";
            matchPattern = TregexPatternFactory.getPattern(tregexOpStr);
            matcher = matchPattern.matcher(inputTree);
            if (matcher.find()) {
                Tree verbSubtree = matcher.getNode("tensedverb");
                Tree vpSubtree = matcher.getNode("verbphrase");
                Tree singularFormSubtree = AnalysisUtilities.getInstance()
                        .readTreeFromString(getSingularFormSubtree(verbSubtree));
                int index = vpSubtree.indexOf(verbSubtree);
                vpSubtree.removeChild(index);
                vpSubtree.addChild(index, singularFormSubtree);
                if (GlobalProperties.getDebug())
                    System.err.println("ensureVerbAgreementForSubjectWH: " + inputTree.toString());
            }
        }
    }
}

From source file:edu.cmu.ark.SentenceSimplifier.java

License:Open Source License

/**
 * e.g., John and Mary like Bill.  -> John LIKES Bill.  Mary LIKES Bill.
 * John and I like Bill -> John LIKES Bill.  I LIKE Bill.
 * John and I are old. -> I IS old. John IS old.
 *//*from   www .j a  v  a2 s . co m*/
private void correctTense(Tree subject, Tree clause) {
    int tmpIndex;
    //correct verb tense when modifying subjects
    for (Tree uncle : clause.getChildrenAsList()) {
        String newVerbPOS = null;
        Tree verbPreterminal = null;
        boolean needToModifyVerb = false;
        //if the node is a subject (i.e., its uncle is a VP), then check
        //to see if its tense needs to be changed
        String headPOS = subject.headPreTerminal(AnalysisUtilities.getInstance().getHeadFinder()).label()
                .toString();
        if (uncle.label().toString().equals("VP") && !headPOS.endsWith("S")) {
            verbPreterminal = uncle.headPreTerminal(AnalysisUtilities.getInstance().getHeadFinder());
            //original main verb was plural but the conjoined subject word is singular
            //e.g., John (and Mary) like Bill.  -> John like Bill.
            if ((verbPreterminal.label().toString().equals("VB")
                    || verbPreterminal.label().toString().equals("VBP"))) { //the parser confuses VBP with VB
                if (subject.yield().toString().equals("I") || subject.yield().toString().equals("you")) {
                    newVerbPOS = "VBP";
                } else {
                    newVerbPOS = "VBZ";
                }
                needToModifyVerb = true;
            } else if (verbPreterminal.label().toString().equals("VBD")) {
                newVerbPOS = "VBD";
                needToModifyVerb = true;
            }
        }
        //if needed, change the tense of the verb
        if (needToModifyVerb) {
            String verbLemma = AnalysisUtilities.getInstance().getLemma(
                    verbPreterminal.getChild(0).label().toString(), verbPreterminal.label().toString());
            String newVerb;
            //special cases
            if (verbLemma.equals("be") && newVerbPOS.equals("VBD")) {
                if (subject.label().toString().endsWith("S"))
                    newVerb = "were";
                else
                    newVerb = "was";
            } else if (verbLemma.equals("be") && subject.yield().toString().equals("I")
                    && newVerbPOS.equals("VBP")) {
                newVerb = "am";
            } else { //default
                newVerb = AnalysisUtilities.getInstance().getSurfaceForm(verbLemma, newVerbPOS);
            }
            tmpIndex = verbPreterminal.parent(uncle).indexOf(verbPreterminal);
            Tree verbParent = verbPreterminal.parent(uncle);
            verbParent.removeChild(tmpIndex);
            verbParent.addChild(tmpIndex,
                    AnalysisUtilities.getInstance().readTreeFromString("(" + newVerbPOS + " " + newVerb + ")"));
        }
    }
}