Example usage for edu.stanford.nlp.trees Tree toString

List of usage examples for edu.stanford.nlp.trees Tree toString

Introduction

In this page you can find the example usage for edu.stanford.nlp.trees Tree toString.

Prototype

@Override
public String toString() 

Source Link

Document

Converts parse tree to string in Penn Treebank format.

Usage

From source file:edu.cmu.ark.QuestionTransducer.java

License:Open Source License

/**
 * Changes the inflection of the main verb for questions with
 * first and second person pronouns are the subject.
 * Note: this probably isn't necessary for most applications.
 *
 * E.g.,/*ww w  .ja v  a 2  s.c  om*/
 * Affects:
 * I walk -> Who walks? (rather than, Who walk?)
 *
 * Does not affect:
 * He walks -> Who walks?
 *
 */
private void ensureVerbAgreementForSubjectWH(Tree inputTree) {
    String tregexOpStr;
    TregexMatcher matcher;
    TregexPattern matchPattern;
    Tree subjectTree;
    String subjectString;

    tregexOpStr = "/^(NP|PP|SBAR)-" + 0 + "$/";
    matchPattern = TregexPatternFactory.getPattern(tregexOpStr);
    matcher = matchPattern.matcher(inputTree);
    if (matcher.find()) {
        subjectTree = matcher.getMatch();
        subjectString = subjectTree.yield().toString();
        if (subjectString.equalsIgnoreCase("I") || subjectString.equalsIgnoreCase("you")) {
            tregexOpStr = "ROOT=root < (S=mainclause < (VP=verbphrase < (/VB.?/=tensedverb)))";
            matchPattern = TregexPatternFactory.getPattern(tregexOpStr);
            matcher = matchPattern.matcher(inputTree);
            if (matcher.find()) {
                Tree verbSubtree = matcher.getNode("tensedverb");
                Tree vpSubtree = matcher.getNode("verbphrase");
                Tree singularFormSubtree = AnalysisUtilities.getInstance()
                        .readTreeFromString(getSingularFormSubtree(verbSubtree));
                int index = vpSubtree.indexOf(verbSubtree);
                vpSubtree.removeChild(index);
                vpSubtree.addChild(index, singularFormSubtree);
                if (GlobalProperties.getDebug())
                    System.err.println("ensureVerbAgreementForSubjectWH: " + inputTree.toString());
            }
        }
    }
}

From source file:edu.cmu.ark.QuestionTransducer.java

License:Open Source License

/**
 * Also mark UNMOVABLE phrases as possible answers
 *
 * @param inputTree//from  w  w  w .j  a va 2 s  .c  om
 * @return
 */
private Tree markPossibleAnswerPhrasesUnmovable(Tree inputTree) {
    Tree copyTree = inputTree.deeperCopy();
    numWHPhrasesUnmovable = 0;

    List<TsurgeonPattern> ps = new ArrayList<TsurgeonPattern>();
    String tregexOpStr;
    TregexPattern matchPattern;
    TregexMatcher matcher;
    Tree tmp;

    //find and mark the main clause subject
    tregexOpStr = "ROOT < (S < (NP|SBAR=subj $+ /,/ !$++ NP|SBAR))";
    ps.add(Tsurgeon.parseOperation("relabel subj NP-0"));
    matchPattern = TregexPatternFactory.getPattern(tregexOpStr);
    matcher = matchPattern.matcher(copyTree);
    if (matcher.find()) {
        tmp = matcher.getNode("subj");
        tmp.label().setValue(tmp.label().toString() + "-0");
        numWHPhrasesUnmovable++;
    }

    //noun phrases
    tregexOpStr = "ROOT=root << UNMOVABLE-NP=np";
    matchPattern = TregexPatternFactory.getPattern(tregexOpStr);
    matcher = matchPattern.matcher(copyTree);
    while (matcher.find()) {
        tmp = matcher.getNode("np");
        tmp.label().setValue(tmp.label().toString() + "-" + numWHPhrasesUnmovable);
        numWHPhrasesUnmovable++;
    }

    if (GlobalProperties.getDebug())
        System.err.println("markPossibleAnswerPhrases: " + copyTree.toString());
    return copyTree;
}

From source file:edu.cmu.ark.QuestionTransducer.java

License:Open Source License

/**
 * Marks possible answer phrase nodes with indexes for later processing.
 * This step might be easier with the Stanford Parser API's Tree class methods
 * than with Tsurgeon...//from  w  w  w.  j a va 2  s  . c  o  m
 *
 * @param inputTree
 * @return
 */
private Tree markPossibleAnswerPhrases(Tree inputTree) {
    Tree copyTree = inputTree.deeperCopy();
    numWHPhrases = 0;

    List<TsurgeonPattern> ps = new ArrayList<TsurgeonPattern>();
    String tregexOpStr;
    TregexPattern matchPattern;
    TregexMatcher matcher;
    Tree tmp;

    //find and mark the main clause subject
    tregexOpStr = "ROOT < (S < (NP|SBAR=subj $+ /,/ !$++ NP|SBAR))";
    ps.add(Tsurgeon.parseOperation("relabel subj NP-0"));
    matchPattern = TregexPatternFactory.getPattern(tregexOpStr);
    matcher = matchPattern.matcher(copyTree);
    if (matcher.find()) {
        tmp = matcher.getNode("subj");
        tmp.label().setValue(tmp.label().toString() + "-0");
        numWHPhrases++;
    }

    //noun phrases
    tregexOpStr = "ROOT=root << NP|PP|SBAR=np";
    matchPattern = TregexPatternFactory.getPattern(tregexOpStr);
    matcher = matchPattern.matcher(copyTree);
    while (matcher.find()) {
        tmp = matcher.getNode("np");
        tmp.label().setValue(tmp.label().toString() + "-" + numWHPhrases);
        numWHPhrases++;
    }

    if (GlobalProperties.getDebug())
        System.err.println("markPossibleAnswerPhrases: " + copyTree.toString());
    return copyTree;
}

From source file:edu.cmu.ark.SentenceSimplifier.java

License:Open Source License

/**
 * e.g., John ran and Bill walked.  -> John ran. Bill walked.
 * //from   w ww  .  ja va  2  s  . c o  m
 */
private void extractConjoinedPhrases(Collection<Question> extracted, Question input) {
    String tregexOpStr;
    TregexPattern matchPattern;
    Tree conjoinedNode;

    TregexMatcher matcher;
    //Tree newTree = copy.getIntermediateTree();
    Tree newTree;
    int nodeindex;

    tregexOpStr = "__ " + " [ < (VP < (/VB.*/=child $ (CC|CONJP !< or|nor !$++ CC|CONJP))) " + //get the first conjunction, to avoid spurious duplicate matches
            " | < (VP < (VP=child $ (CC|CONJP !< or|nor !$++ CC|CONJP))) " + // verb phrases may be conjoined by commas and adverbs (e.g., "John ran, then walked.")
            " | < (S|SINV < (S|SINV=child $ (CC|CONJP !< or|nor !$++ CC|CONJP))) "
            + " | < (S|SINV < (S|SINV=child $ (/:/ < /;/ !$++ /:/))) " +
            //" | < (ADJP < (JJ|JJR|ADJP=child $ (CC|CONJP !< or|nor !$++ CC|CONJP))) " +
            //" | < (ADVP < (RB|RBR|ADVP=child $ RB|RBR|ADVP=child $ (CC|CONJP !< or|nor !$++ CC|CONJP)))  "+ 
            //" | < (PP < (PP=child $ (CC|CONJP !< or|nor !$++ CC|CONJP))) " +
            " | < (SBAR < (SBAR=child $ (CC|CONJP !< or|nor !$++ CC|CONJP))) ] " + " !$ (CC|CONJP !< or|nor)" + //this cannot be nested within a larger conjunction or followed by a conjunction (we recur later to catch this)
            " !< (CC|CONJP !< or|nor) " + " !.. (CC|CONJP !< or|nor > NP|PP|S|SBAR|VP) !>> SBAR";

    matchPattern = TregexPatternFactory.getPattern(tregexOpStr);
    matcher = matchPattern.matcher(input.getIntermediateTree());

    while (matcher.find()) {
        conjoinedNode = matcher.getNode("child");
        nodeindex = conjoinedNode.nodeNumber(input.getIntermediateTree());

        //make a copy of the input for this iteration
        newTree = input.getIntermediateTree().deeperCopy();
        removeConjoinedSiblingsHelper(newTree, nodeindex);

        //for conjoined main clauses, add punctuation if necessary
        AnalysisUtilities.addPeriodIfNeeded(newTree);

        //make a new Question object and add it
        addQuotationMarksIfNeeded(newTree);

        Question newTreeWithFeatures = input.deeperCopy();
        newTreeWithFeatures.setIntermediateTree(newTree);
        if (GlobalProperties.getDebug())
            System.err.println("extractConjoinedPhrases: " + newTree.toString());
        if (GlobalProperties.getComputeFeatures())
            newTreeWithFeatures.setFeatureValue("extractedFromConjoinedPhrases", 1.0); //old feature name
        if (GlobalProperties.getComputeFeatures())
            newTreeWithFeatures.setFeatureValue("extractedFromConjoined", 1.0);
        addIfNovel(extracted, newTreeWithFeatures);
    }
}

From source file:edu.cmu.ark.SentenceSimplifier.java

License:Open Source License

private void removeConjoinedSiblingsHelper(Tree copy, int childindex) {
    if (GlobalProperties.getDebug())
        System.err.println("removeConjoinedSiblingsHelper: " + copy.toString());
    Tree child = copy.getNodeNumber(childindex);
    Tree parent = child.parent(copy);/*w w w  .j ava  2  s .  c o m*/
    Tree gparent = parent.parent(copy);

    int parentIdx = gparent.indexOf(parent);

    //By an annoying PTB convention, some verb phrase conjunctions 
    //can conjoin two verb preterminals under a VP,
    //rather than only allowing VP nodes to be conjoined.
    //e.g., John walked and played.
    //So, we add an extra VP node in between if necessary
    if (child.label().toString().startsWith("VB")) {
        gparent.removeChild(parentIdx);
        Tree newTree = factory.newTreeNode("VP", new ArrayList<Tree>());
        newTree.addChild(child);
        gparent.addChild(parentIdx, newTree);
    } else {
        gparent.setChild(parentIdx, child);
    }
}

From source file:edu.cmu.ark.SentenceSimplifier.java

License:Open Source License

/**
 * //  w ww  .ja v a2  s.  c o  m
 * John studied, hoping to get a good grade. -> John hoped to get a good grade.
 * 
 * @param extracted
 * @param input
 */
private void extractVerbParticipialModifiers(Collection<Question> extracted, Question input) {
    String tregexOpStr;
    TregexPattern matchPattern;
    TregexMatcher matcher;

    tregexOpStr = "S=sub $- /,/ !< NP < (VP=participial < VBG=verb) " + " >+(VP) (S|SINV < NP=subj) "
            + " >> (ROOT <<# /VB.*/=tense) "; //tense determined by top-most verb

    matchPattern = TregexPatternFactory.getPattern(tregexOpStr);
    matcher = matchPattern.matcher(input.getIntermediateTree());
    while (matcher.find()) {
        String verbPOS = findTense(matcher.getNode("tense"));
        Tree p = matcher.getNode("participial").deeperCopy();
        Tree verb = matcher.getNode("verb");
        String verbLemma = AnalysisUtilities.getInstance().getLemma(verb.getChild(0).label().toString(),
                verb.label().toString());
        String newVerb = AnalysisUtilities.getInstance().getSurfaceForm(verbLemma, verbPOS);
        int verbIndex = p.indexOf(verb);
        p.removeChild(verbIndex);
        p.addChild(verbIndex,
                AnalysisUtilities.getInstance().readTreeFromString("(" + verbPOS + " " + newVerb + ")"));
        String treeStr = "(ROOT (S " + matcher.getNode("subj").toString() + " " + p.toString() + " (. .)))";
        Tree newTree = AnalysisUtilities.getInstance().readTreeFromString(treeStr);
        correctTense(newTree.getChild(0).getChild(0), newTree.getChild(0));

        addQuotationMarksIfNeeded(newTree);
        Question newTreeWithFeatures = input.deeperCopy();
        newTreeWithFeatures.setIntermediateTree(newTree);
        if (GlobalProperties.getComputeFeatures())
            newTreeWithFeatures.setFeatureValue("extractedFromParticipial", 1.0);
        if (GlobalProperties.getComputeFeatures())
            newTreeWithFeatures.setFeatureValue("extractedFromVerbParticipial", 1.0);
        if (GlobalProperties.getDebug())
            System.err.println("extractVerbParticipialModifiers: " + newTree.toString());
        addIfNovel(extracted, newTreeWithFeatures);
    }
}

From source file:edu.cmu.ark.SentenceSimplifier.java

License:Open Source License

/**
 * e.g., As John slept, I studied. ->  John slept.
 * //ww w  .j a  v  a  2s. c o  m
 */
private void extractSubordinateClauses(Collection<Question> extracted, Question input) {
    Tree subord;
    String tregexOpStr;
    TregexPattern matchPattern;
    TregexMatcher matcher;

    tregexOpStr = " SBAR [ > VP < IN | > S|SINV ]  " + //not a complement
            " !< (IN < if|unless|that)" + //not a conditional antecedent
            " < (S=sub !< (VP < VBG)) " + //+ //not a participial phrase
            " >S|SINV|VP "; //not part of a noun phrase or PP (other methods for those)

    matchPattern = TregexPatternFactory.getPattern(tregexOpStr);
    matcher = matchPattern.matcher(input.getIntermediateTree());
    while (matcher.find()) {
        Tree newTree = factory.newTreeNode("ROOT", new ArrayList<Tree>());
        subord = matcher.getNode("sub");
        newTree.addChild(subord.deeperCopy());

        AnalysisUtilities.addPeriodIfNeeded(newTree);
        addQuotationMarksIfNeeded(newTree);
        Question newTreeWithFeatures = input.deeperCopy();
        newTreeWithFeatures.setIntermediateTree(newTree);
        if (GlobalProperties.getComputeFeatures())
            newTreeWithFeatures.setFeatureValue("extractedFromFiniteClause", 1.0); //old feature name
        if (GlobalProperties.getComputeFeatures())
            newTreeWithFeatures.setFeatureValue("extractedFromSubordinateClause", 1.0);
        if (GlobalProperties.getDebug())
            System.err.println("extractSubordinateClauses: " + newTree.toString());
        addIfNovel(extracted, newTreeWithFeatures);
    }
}

From source file:edu.cmu.ark.SentenceSimplifier.java

License:Open Source License

private void extractComplementClauses(Collection<Question> extracted, Question input) {
    Tree subord;//from   w w  w.  j av a2  s .  c  om
    String tregexOpStr;
    TregexPattern matchPattern;
    TregexMatcher matcher;

    //TODO should also address infinitive complements
    tregexOpStr = "SBAR " + " < (S=sub !< (VP < VBG)) " + //+ //not a participial phrase
            " !> NP|PP " + //not part of a noun phrase or PP (other methods for those)
            " [ $- /^VB.*/=verb | >+(SBAR) (SBAR $- /^VB.*/=verb) ] "; //complement of a VP (follows the verb)

    matchPattern = TregexPatternFactory.getPattern(tregexOpStr);
    matcher = matchPattern.matcher(input.getIntermediateTree());
    while (matcher.find()) {
        Tree newTree = factory.newTreeNode("ROOT", new ArrayList<Tree>());
        subord = matcher.getNode("sub");
        Tree verb = matcher.getNode("verb");
        String verbLemma = AnalysisUtilities.getInstance().getLemma(verb.yield().toString(),
                verb.label().toString());

        if (!verbImpliesComplement(verbLemma)) {
            continue;
        }
        newTree.addChild(subord.deeperCopy());

        AnalysisUtilities.addPeriodIfNeeded(newTree);
        addQuotationMarksIfNeeded(newTree);
        Question newTreeWithFeatures = input.deeperCopy();
        newTreeWithFeatures.setIntermediateTree(newTree);
        if (GlobalProperties.getComputeFeatures())
            newTreeWithFeatures.setFeatureValue("extractedFromFiniteClause", 1.0); //old feature name
        if (GlobalProperties.getComputeFeatures())
            newTreeWithFeatures.setFeatureValue("extractedFromComplementClause", 1.0);
        if (GlobalProperties.getDebug())
            System.err.println("extractComplementClauses: " + newTree.toString());
        addIfNovel(extracted, newTreeWithFeatures);
    }
}

From source file:edu.cmu.ark.SentenceSimplifier.java

License:Open Source License

/**
 * e.g., Lincoln, the 16th president, was tall. -> Lincoln was the 16th president.
 * The meeting, in 1984, was important. -> The meeting was in 1984.
 *///from www  .  j a  v  a 2s.c o  m
private void extractAppositives(Collection<Question> extracted, Question input) {
    String tregexOpStr;
    TregexPattern matchPattern;
    TregexMatcher matcher;

    tregexOpStr = "NP < (NP=noun !$-- NP $+ (/,/ $++ NP|PP=appositive !$ CC|CONJP)) "
            + " >> (ROOT <<# /^VB.*/=mainverb) "; //extract the main verb to capture the verb tense
    matchPattern = TregexPatternFactory.getPattern(tregexOpStr);
    matcher = matchPattern.matcher(input.getIntermediateTree());
    while (matcher.find()) {
        Tree verbtree = matcher.getNode("mainverb");
        Tree nountree = matcher.getNode("noun").deeperCopy();
        Tree appositivetree = matcher.getNode("appositive");

        makeDeterminerDefinite(nountree);

        //if both are proper nouns, do not extract because this is not an appositive(e.g., "Pittsburgh, PA")
        /*if(nountree.headPreTerminal(AnalysisUtilities.getInstance().getHeadFinder()).label().toString().equals("NNP")
              && appositivetree.headPreTerminal(AnalysisUtilities.getInstance().getHeadFinder()).label().toString().equals("NNP"))
        {
           continue;
        }*/

        //make a new tree for a copula sentence with the noun and appositive
        String pos = verbtree.label().toString();
        String copula;
        if (pos.equals("VBD")) {
            if (isPlural(nountree)) {
                copula = "(VBD were)";
            } else {
                copula = "(VBD was)";
            }
        } else {
            if (isPlural(nountree)) {
                copula = "(VBD are)";
            } else {
                copula = "(VBD is)";
            }
        }
        Tree newTree = AnalysisUtilities.getInstance().readTreeFromString(
                "(ROOT (S " + nountree + " (VP " + copula + " " + appositivetree + ") (. .)))");

        addQuotationMarksIfNeeded(newTree);
        if (GlobalProperties.getDebug())
            System.err.println("extractAppositives: " + newTree.toString());
        Question newTreeWithFeatures = input.deeperCopy();
        newTreeWithFeatures.setIntermediateTree(newTree);
        if (GlobalProperties.getComputeFeatures())
            newTreeWithFeatures.setFeatureValue("extractedFromAppositive", 1.0);

        addIfNovel(extracted, newTreeWithFeatures);

    }
}

From source file:edu.cmu.ark.SentenceSimplifier.java

License:Open Source License

/**
 * e.g., John, who is a friend of mine, likes Susan. -> John is a friend of mine.
 * //from w w w.  j a v a2  s .  c  o m
 */
private void extractNonRestrictiveRelativeClauses(Collection<Question> extracted, Question input) {
    String tregexOpStr;
    TregexPattern matchPattern;
    TregexMatcher matcher;
    TregexMatcher matcherclause;

    tregexOpStr = "NP=np < (SBAR=sbar [ < (WHADVP=wherecomp < (WRB < where)) " + " | < (WHNP !< /WP\\$/) "
            + " | < (WHNP=possessive < /WP\\$/)" //John, whose car was
            + " | < (WHPP < IN|TO=preposition) ] $-- NP $- /,/ " + " < S=relclause  !< WHADJP)";

    matchPattern = TregexPatternFactory.getPattern(tregexOpStr);
    matcher = matchPattern.matcher(input.getIntermediateTree());

    //iterate over all the relative clauses in the input
    //and create an output sentence for each one.
    while (matcher.find()) {
        Tree missingArgumentTree = matcher.getNode("np");
        Tree relclause = matcher.getNode("relclause");
        if (missingArgumentTree == null || relclause == null)
            continue;
        missingArgumentTree = missingArgumentTree.deeperCopy();
        relclause = relclause.deeperCopy();
        Tree possessive = matcher.getNode("possessive");
        Tree sbar = matcher.getNode("sbar").deeperCopy();

        makeDeterminerDefinite(missingArgumentTree);

        if (possessive != null) {
            possessive = possessive.deeperCopy();
            possessive.removeChild(0);
            String newTree = "(NP (NP " + missingArgumentTree.toString() + " (POS 's))";
            for (int i = 0; i < possessive.numChildren(); i++)
                newTree += possessive.getChild(i).toString() + " ";
            newTree += ")";
            missingArgumentTree = AnalysisUtilities.getInstance().readTreeFromString(newTree);
        }

        //remove the relative clause and the commas surrounding it from the missing argument tree
        for (int i = 0; i < missingArgumentTree.numChildren(); i++) {
            if (missingArgumentTree.getChild(i).equals(sbar)) {
                //remove the relative clause
                missingArgumentTree.removeChild(i);
                //remove the comma after the relative clause
                if (i < missingArgumentTree.numChildren()
                        && missingArgumentTree.getChild(i).label().toString().equals(",")) {
                    missingArgumentTree.removeChild(i);
                }
                //remove the comma before the relative clause
                if (i > 0 && missingArgumentTree.getChild(i - 1).label().toString().equals(",")) {
                    missingArgumentTree.removeChild(i - 1);
                    i--;
                }
                i--;
            }
        }

        //put the noun in the clause at the topmost place with an opening for a noun. 
        //Note that this may mess up if there are noun phrase adjuncts like "The man I met Tuesday".

        //specifically: 
        //the parent of the noun can be either a clause (S) as in "The man who met me"
        //or a verb phrase as in "The man who I met".
        //for verb phrases, add the noun to the end since it will be an object.
        //for clauses, add the noun to the beginning since it will be the subject.
        tregexOpStr = "S|VP=newparent !< NP < (VP=verb !< TO !$ TO)";
        matchPattern = TregexPatternFactory.getPattern(tregexOpStr);
        matcherclause = matchPattern.matcher(relclause);
        boolean subjectMovement = true;
        if (!matcherclause.find()) {
            tregexOpStr = "VP=newparent !< VP < /VB.*/=verb !>> (S !< NP) !<< (VP !< VP !< NP)";
            matchPattern = TregexPatternFactory.getPattern(tregexOpStr);
            matcherclause = matchPattern.matcher(relclause);
            subjectMovement = false;
        }

        //reset (so the first match isn't skipped)
        matcherclause = matchPattern.matcher(relclause);

        if (matcherclause.find()) {
            Tree newparenttree = matcherclause.getNode("newparent");
            Tree verbtree = matcherclause.getNode("verb");
            boolean ppRelativeClause = false;

            if (matcher.getNode("wherecomp") != null) {
                String tmp = "(PP (IN at) " + missingArgumentTree.toString() + ")";
                missingArgumentTree = AnalysisUtilities.getInstance().readTreeFromString(tmp);
                ppRelativeClause = true;
                subjectMovement = false;
            } else if (matcher.getNode("preposition") != null) {
                String tmp = "(PP (IN " + matcher.getNode("preposition").yield().toString() + ") "
                        + missingArgumentTree.toString() + ")";
                missingArgumentTree = AnalysisUtilities.getInstance().readTreeFromString(tmp);
                ppRelativeClause = true;
            }

            if (subjectMovement) { //subject
                newparenttree.addChild(newparenttree.indexOf(verbtree), missingArgumentTree);
            } else { // newparentlabel is VP   
                if (ppRelativeClause)
                    newparenttree.addChild(newparenttree.numChildren(), missingArgumentTree);
                else
                    newparenttree.addChild(newparenttree.indexOf(verbtree) + 1, missingArgumentTree);
            }

            //create a new tree with punctuation
            Tree newTree = factory.newTreeNode("ROOT", new ArrayList<Tree>());
            newTree.addChild(relclause);
            AnalysisUtilities.addPeriodIfNeeded(newTree);

            if (GlobalProperties.getDebug())
                System.err.println("extractRelativeClauses: " + newTree.toString());
            addQuotationMarksIfNeeded(newTree);
            Question newTreeWithFeatures = input.deeperCopy();
            newTreeWithFeatures.setIntermediateTree(newTree);
            if (GlobalProperties.getComputeFeatures())
                newTreeWithFeatures.setFeatureValue("extractedFromRelativeClause", 1.0);
            addIfNovel(extracted, newTreeWithFeatures);
        }
    }
}