Example usage for edu.stanford.nlp.trees Tree addChild

List of usage examples for edu.stanford.nlp.trees Tree addChild

Introduction

In this page you can find the example usage for edu.stanford.nlp.trees Tree addChild.

Prototype

public void addChild(int i, Tree t) 

Source Link

Document

Adds the tree t at the index position among the daughters.

Usage

From source file:edu.cmu.ark.nlp.sent.SentenceSimplifier.java

License:Open Source License

/**
 * e.g., John and James like Susan.  ->  John likes Susan.
 * //from w  w  w  .ja  v  a2s  . c om
 */
private void extractConjoinedNPs(Collection<Question> extracted, Question input) {
    String tregexOpStr;
    TregexPattern matchPattern;
    Tree conjoinedNode;
    Tree parent;

    TregexMatcher matcher;
    Question newQuestion;

    //only extract conjoined NPs that are arguments or adjuncts of the main verb
    // in the tree, this means the closest S will be the one under the root
    tregexOpStr = "NP=parent < (CONJP|CC !< or|nor [ "
            + " $+ /^(N.*|PRP|SBAR)$/=child $-- /^(N.*|PRP|SBAR)$/ | " //there must be a noun on each side of the conjunction
            + " $-- /^(N.*|PRP|SBAR)$/=child $+ /^(N.*|PRP|SBAR)$/ ] ) " //this avoids extracting from flat NPs such as "the smaller and darker form"
            + " !>> (/.*/ $ (CC|CONJP !< or|nor)) " //this cannot be nested within a larger conjunction or followed by a conjunction (we recur later to catch this) 
            + " !$ (CC|CONJP !< or|nor)" + " !.. (CC|CONJP !< or|nor > NP|PP|S|SBAR|VP) !>> SBAR ";
    //+ " >> (ROOT !< (S <+(VP) (/^VB.*$/ < are|were|be|seem|appear))) " ; //don't break plural predicate nominatives (e.g., "John and Mary are two of my best friends.")
    matchPattern = TregexPatternFactory.getPattern(tregexOpStr);
    matcher = matchPattern.matcher(input.getIntermediateTree());
    List<Integer> nodeIndexes = new ArrayList<Integer>();
    List<Integer> parentIDs = new ArrayList<Integer>();

    while (matcher.find()) {
        //store the parents' IDs (in the tree)
        parent = matcher.getNode("parent");
        parentIDs.add(parent.nodeNumber(input.getIntermediateTree()));

        conjoinedNode = matcher.getNode("child");
        //store the conjoined nodes' index into their parent's list of children
        int idx = parent.objectIndexOf(conjoinedNode);
        if (!nodeIndexes.contains(idx))
            nodeIndexes.add(idx);
    }

    //for each of the conjoined children,
    //create a new tree by removing all the nodes they are conjoined with
    Collections.sort(nodeIndexes);//sort, just to keep them in the original order
    for (int i = 0; i < nodeIndexes.size(); i++) {
        newQuestion = input.deeperCopy();

        Tree t = newQuestion.getIntermediateTree();
        parent = t.getNodeNumber(parentIDs.get(i));
        Tree gparent = parent.parent(t);
        conjoinedNode = parent.getChild(nodeIndexes.get(i));
        String siblingLabel;

        //Remove all the nodes that are conjoined
        //with the selected noun (or are conjunctions, commas).
        //These can have labels NP, NN, ..., PRP for pronouns, CC, "," for commas, ":" for semi-colons
        for (int j = 0; j < parent.numChildren(); j++) {
            if (parent.getChild(j) == conjoinedNode)
                continue;
            siblingLabel = parent.getChild(j).label().toString();
            if (siblingLabel.matches("^[NCP,:S].*")) {
                parent.removeChild(j);
                j--;
            }
        }

        //if there is an trivial unary "NP -> NP",
        //remove the parent and put the child in its place
        if (parent.numChildren() == 1 && parent.getChild(0).label().equals("NP")) {
            int tmpIndex = gparent.objectIndexOf(parent);
            gparent.removeChild(tmpIndex);
            gparent.addChild(tmpIndex, parent.getChild(0));
        }

        correctTense(conjoinedNode, gparent);
        addQuotationMarksIfNeeded(newQuestion.getIntermediateTree());

        //if(GlobalProperties.getDebug()) System.err.println("extractConjoinedNPs: "+newQuestion.getIntermediateTree().toString());
        if (this.getComputeFeatures)
            newQuestion.setFeatureValue("extractedFromConjoinedPhrases", 1.0); //old feature name
        if (this.getComputeFeatures)
            newQuestion.setFeatureValue("extractedFromConjoinedNPs", 1.0);
        extracted.add(newQuestion);
    }
}

From source file:edu.cmu.ark.nlp.sent.SentenceSimplifier.java

License:Open Source License

/**
 * e.g., John and Mary like Bill.  -> John LIKES Bill.  Mary LIKES Bill.
 * John and I like Bill -> John LIKES Bill.  I LIKE Bill.
 * John and I are old. -> I IS old. John IS old.
 */// w  w  w  . j  ava2  s .c  om
private void correctTense(Tree subject, Tree clause) {
    int tmpIndex;
    //correct verb tense when modifying subjects
    for (Tree uncle : clause.getChildrenAsList()) {
        String newVerbPOS = null;
        Tree verbPreterminal = null;
        boolean needToModifyVerb = false;
        //if the node is a subject (i.e., its uncle is a VP), then check
        //to see if its tense needs to be changed
        String headPOS = subject.headPreTerminal(this.hf).label().toString();
        if (uncle.label().toString().equals("VP") && !headPOS.endsWith("S")) {
            verbPreterminal = uncle.headPreTerminal(this.hf);
            //original main verb was plural but the conjoined subject word is singular
            //e.g., John (and Mary) like Bill.  -> John like Bill.
            if ((verbPreterminal.label().toString().equals("VB")
                    || verbPreterminal.label().toString().equals("VBP"))) { //the parser confuses VBP with VB
                if (subject.yield().toString().equals("I") || subject.yield().toString().equals("you")) {
                    newVerbPOS = "VBP";
                } else {
                    newVerbPOS = "VBZ";
                }
                needToModifyVerb = true;
            } else if (verbPreterminal.label().toString().equals("VBD")) {
                newVerbPOS = "VBD";
                needToModifyVerb = true;
            }
        }
        //if needed, change the tense of the verb
        if (needToModifyVerb) {
            String verbLemma = QuestionUtil.getLemma(verbPreterminal.getChild(0).label().toString(),
                    verbPreterminal.label().toString());
            String newVerb;
            //special cases
            if (verbLemma.equals("be") && newVerbPOS.equals("VBD")) {
                if (subject.label().toString().endsWith("S"))
                    newVerb = "were";
                else
                    newVerb = "was";
            } else if (verbLemma.equals("be") && subject.yield().toString().equals("I")
                    && newVerbPOS.equals("VBP")) {
                newVerb = "am";
            } else { //default
                newVerb = this.conjugator.getSurfaceForm(verbLemma, newVerbPOS);
            }
            tmpIndex = verbPreterminal.parent(uncle).objectIndexOf(verbPreterminal);
            Tree verbParent = verbPreterminal.parent(uncle);
            verbParent.removeChild(tmpIndex);
            verbParent.addChild(tmpIndex,
                    QuestionUtil.readTreeFromString("(" + newVerbPOS + " " + newVerb + ")"));
        }
    }
}

From source file:edu.cmu.ark.nlp.sent.SentenceSimplifier.java

License:Open Source License

/**
 * //from   ww  w.j a v  a 2 s. c o m
 * John studied, hoping to get a good grade. -> John hoped to get a good grade.
 * 
 * @param extracted
 * @param input
 */
private void extractVerbParticipialModifiers(Collection<Question> extracted, Question input) {
    String tregexOpStr;
    TregexPattern matchPattern;
    TregexMatcher matcher;

    tregexOpStr = "S=sub $- /,/ !< NP < (VP=participial < VBG=verb) " + " >+(VP) (S|SINV < NP=subj) "
            + " >> (ROOT <<# /VB.*/=tense) "; //tense determined by top-most verb

    matchPattern = TregexPatternFactory.getPattern(tregexOpStr);
    matcher = matchPattern.matcher(input.getIntermediateTree());
    while (matcher.find()) {
        String verbPOS = findTense(matcher.getNode("tense"));
        Tree p = matcher.getNode("participial").deepCopy();
        Tree verb = matcher.getNode("verb");
        String verbLemma = QuestionUtil.getLemma(verb.getChild(0).label().toString(), verb.label().toString());
        String newVerb = this.conjugator.getSurfaceForm(verbLemma, verbPOS);
        int verbIndex = p.objectIndexOf(verb);
        p.removeChild(verbIndex);
        p.addChild(verbIndex, QuestionUtil.readTreeFromString("(" + verbPOS + " " + newVerb + ")"));
        String treeStr = "(ROOT (S " + matcher.getNode("subj").toString() + " " + p.toString() + " (. .)))";
        Tree newTree = QuestionUtil.readTreeFromString(treeStr);
        correctTense(newTree.getChild(0).getChild(0), newTree.getChild(0));

        addQuotationMarksIfNeeded(newTree);
        Question newTreeWithFeatures = input.deeperCopy();
        newTreeWithFeatures.setIntermediateTree(newTree);
        if (this.getComputeFeatures)
            newTreeWithFeatures.setFeatureValue("extractedFromParticipial", 1.0);
        if (this.getComputeFeatures)
            newTreeWithFeatures.setFeatureValue("extractedFromVerbParticipial", 1.0);
        if (this.getComputeFeatures)
            System.err.println("extractVerbParticipialModifiers: " + newTree.toString());
        addIfNovel(extracted, newTreeWithFeatures);
    }
}

From source file:edu.cmu.ark.nlp.sent.SentenceSimplifier.java

License:Open Source License

/**
 * e.g., John, who is a friend of mine, likes Susan. -> John is a friend of mine.
 * /*from ww  w .ja  v  a2  s.  com*/
 */
private void extractNonRestrictiveRelativeClauses(Collection<Question> extracted, Question input) {
    String tregexOpStr;
    TregexPattern matchPattern;
    TregexMatcher matcher;
    TregexMatcher matcherclause;

    tregexOpStr = "NP=np < (SBAR=sbar [ < (WHADVP=wherecomp < (WRB < where)) " + " | < (WHNP !< /WP\\$/) "
            + " | < (WHNP=possessive < /WP\\$/)" //John, whose car was
            + " | < (WHPP < IN|TO=preposition) ] $-- NP $- /,/ " + " < S=relclause  !< WHADJP)";

    matchPattern = TregexPatternFactory.getPattern(tregexOpStr);
    matcher = matchPattern.matcher(input.getIntermediateTree());

    //iterate over all the relative clauses in the input
    //and create an output sentence for each one.
    while (matcher.find()) {
        Tree missingArgumentTree = matcher.getNode("np");
        Tree relclause = matcher.getNode("relclause");
        if (missingArgumentTree == null || relclause == null)
            continue;
        missingArgumentTree = missingArgumentTree.deepCopy();
        relclause = relclause.deepCopy();
        Tree possessive = matcher.getNode("possessive");
        Tree sbar = matcher.getNode("sbar").deepCopy();

        makeDeterminerDefinite(missingArgumentTree);

        if (possessive != null) {
            possessive = possessive.deepCopy();
            possessive.removeChild(0);
            String newTree = "(NP (NP " + missingArgumentTree.toString() + " (POS 's))";
            for (int i = 0; i < possessive.numChildren(); i++)
                newTree += possessive.getChild(i).toString() + " ";
            newTree += ")";
            missingArgumentTree = QuestionUtil.readTreeFromString(newTree);
        }

        //remove the relative clause and the commas surrounding it from the missing argument tree
        for (int i = 0; i < missingArgumentTree.numChildren(); i++) {
            if (missingArgumentTree.getChild(i).equals(sbar)) {
                //remove the relative clause
                missingArgumentTree.removeChild(i);
                //remove the comma after the relative clause
                if (i < missingArgumentTree.numChildren()
                        && missingArgumentTree.getChild(i).label().toString().equals(",")) {
                    missingArgumentTree.removeChild(i);
                }
                //remove the comma before the relative clause
                if (i > 0 && missingArgumentTree.getChild(i - 1).label().toString().equals(",")) {
                    missingArgumentTree.removeChild(i - 1);
                    i--;
                }
                i--;
            }
        }

        //put the noun in the clause at the topmost place with an opening for a noun. 
        //Note that this may mess up if there are noun phrase adjuncts like "The man I met Tuesday".

        //specifically: 
        //the parent of the noun can be either a clause (S) as in "The man who met me"
        //or a verb phrase as in "The man who I met".
        //for verb phrases, add the noun to the end since it will be an object.
        //for clauses, add the noun to the beginning since it will be the subject.
        tregexOpStr = "S|VP=newparent !< NP < (VP=verb !< TO !$ TO)";
        matchPattern = TregexPatternFactory.getPattern(tregexOpStr);
        matcherclause = matchPattern.matcher(relclause);
        boolean subjectMovement = true;
        if (!matcherclause.find()) {
            tregexOpStr = "VP=newparent !< VP < /VB.*/=verb !>> (S !< NP) !<< (VP !< VP !< NP)";
            matchPattern = TregexPatternFactory.getPattern(tregexOpStr);
            matcherclause = matchPattern.matcher(relclause);
            subjectMovement = false;
        }

        //reset (so the first match isn't skipped)
        matcherclause = matchPattern.matcher(relclause);

        if (matcherclause.find()) {
            Tree newparenttree = matcherclause.getNode("newparent");
            Tree verbtree = matcherclause.getNode("verb");
            boolean ppRelativeClause = false;

            if (matcher.getNode("wherecomp") != null) {
                String tmp = "(PP (IN at) " + missingArgumentTree.toString() + ")";
                missingArgumentTree = QuestionUtil.readTreeFromString(tmp);
                ppRelativeClause = true;
                subjectMovement = false;
            } else if (matcher.getNode("preposition") != null) {
                String tmp = "(PP (IN " + matcher.getNode("preposition").yield().toString() + ") "
                        + missingArgumentTree.toString() + ")";
                missingArgumentTree = QuestionUtil.readTreeFromString(tmp);
                ppRelativeClause = true;
            }

            if (subjectMovement) { //subject
                newparenttree.addChild(newparenttree.objectIndexOf(verbtree), missingArgumentTree);
            } else { // newparentlabel is VP   
                if (ppRelativeClause)
                    newparenttree.addChild(newparenttree.numChildren(), missingArgumentTree);
                else
                    newparenttree.addChild(newparenttree.objectIndexOf(verbtree) + 1, missingArgumentTree);
            }

            //create a new tree with punctuation
            Tree newTree = factory.newTreeNode("ROOT", new ArrayList<Tree>());
            newTree.addChild(relclause);
            QuestionUtil.addPeriodIfNeeded(newTree);

            //if(GlobalProperties.getDebug()) System.err.println("extractRelativeClauses: "+ newTree.toString());
            addQuotationMarksIfNeeded(newTree);
            Question newTreeWithFeatures = input.deeperCopy();
            newTreeWithFeatures.setIntermediateTree(newTree);
            if (this.getComputeFeatures)
                newTreeWithFeatures.setFeatureValue("extractedFromRelativeClause", 1.0);
            addIfNovel(extracted, newTreeWithFeatures);
        }
    }
}

From source file:edu.cmu.ark.nlp.sent.SentenceSimplifier.java

License:Open Source License

/**
 * e.g., John, hoping to get a good grade, studied. -> John hoped to get a good grade.
 *   Walking to the store, John saw Susan -> John was walking to the store.
 *   //w  ww. j av a2 s  .co  m
 *   NOTE: This method produces false positives for sentences like, 
 *            "Broadly speaking, the project was successful."
 *         where the participial phrase does not modify the subject.
 *   
 * @param extracted
 * @param input
 */
private void extractNounParticipialModifiers(Collection<Question> extracted, Question input) {
    String tregexOpStr;
    TregexPattern matchPattern;
    TregexMatcher matcher;

    tregexOpStr = "ROOT < (S " + " [ << (NP < (NP=subj  $++ (/,/ $+ (VP=modifier <# VBN|VBG|VP=tense )))) " //modifiers that appear after nouns
            + " | < (S !< NP|SBAR < (VP=modifier <# VBN|VBG|VP=tense) $+ (/,/ $+ NP=subj)) " //modifiers before the subject. e.g., Founded by John, the company...
            + " | < (SBAR < (S !< NP|SBAR < (VP=modifier <# VBN|VBG=tense)) $+ (/,/ $+ NP=subj)) " //e.g., While walking to the store, John saw Susan.
            + " | < (PP=modifier !< NP <# VBG=tense $+ (/,/ $+ NP=subj)) ] ) " // e.g., Walking to the store, John saw Susan.
            + " <<# /^VB.*$/=maintense "; //tense determined by top-most verb

    matchPattern = TregexPatternFactory.getPattern(tregexOpStr);
    matcher = matchPattern.matcher(input.getIntermediateTree());
    while (matcher.find()) {
        Tree nountree = matcher.getNode("subj").deepCopy();
        Tree vptree = matcher.getNode("modifier");
        Tree verb = matcher.getNode("tense");
        makeDeterminerDefinite(nountree);

        if (vptree.label().toString().equals("PP"))
            vptree.label().setValue("VP");
        String verbPOS = findTense(matcher.getNode("maintense"));
        if (vptree == null || nountree == null)
            return;

        String newTreeStr;
        if (verb.label().toString().equals("VBG")) {
            //for present partcipials, change the tense to the tense of the main verb
            //e.g., walking to the store -> walked to the store
            String verbLemma = QuestionUtil.getLemma(verb.getChild(0).label().toString(),
                    verb.label().toString());
            String newVerb = this.conjugator.getSurfaceForm(verbLemma, verbPOS);
            int verbIndex = vptree.objectIndexOf(verb);
            vptree = vptree.deepCopy();
            vptree.removeChild(verbIndex);
            vptree.addChild(verbIndex, QuestionUtil.readTreeFromString("(" + verbPOS + " " + newVerb + ")"));
            newTreeStr = "(ROOT (S " + matcher.getNode("subj").toString() + " " + vptree.toString()
                    + " (. .)))";
        } else {
            //for past participials, add a copula
            //e.g., John, exhausted, -> John was exhausted
            //(or for conjunctions, just add the copula---kind of a hack to make the moby dick sentence work out)
            String auxiliary;
            if (verbPOS.equals("VBP") || verbPOS.equals("VBD")) {
                if (isPlural(nountree))
                    auxiliary = "(VBD were)";
                else
                    auxiliary = "(VBD was)";
            } else {
                if (isPlural(nountree))
                    auxiliary = "(VB are)";
                else
                    auxiliary = "(VBZ is)";
            }

            newTreeStr = "(ROOT (S " + nountree + " (VP " + auxiliary + " " + vptree + ") (. .)))";
        }

        Tree newTree = QuestionUtil.readTreeFromString(newTreeStr);
        correctTense(newTree.getChild(0).getChild(0), newTree.getChild(0));
        addQuotationMarksIfNeeded(newTree);

        //if(GlobalProperties.getDebug()) System.err.println("extractNounParticipialModifiers: "+ newTree.toString());
        Question newTreeWithFeatures = input.deeperCopy();
        newTreeWithFeatures.setIntermediateTree(newTree);
        if (this.getComputeFeatures)
            newTreeWithFeatures.setFeatureValue("extractedFromParticipial", 1.0); //old feature name
        if (this.getComputeFeatures)
            newTreeWithFeatures.setFeatureValue("extractedFromNounParticipial", 1.0);
        extracted.add(newTreeWithFeatures);
    }

}

From source file:edu.cmu.ark.QuestionTransducer.java

License:Open Source License

/**
 *
 * This method removes the answer phrase from its original position
 * and places it at the front of the main clause.
 *
 * Note: Tsurgeon operations are perhaps not optimal here.
 * Using the Stanford API to move nodes directly might be simpler...
 *
 *//* ww w.ja v a2 s  . c  o m*/
private List<Tree> moveWHPhraseUnmovable(Tree inputTree, Tree intermediateTree, int i,
        boolean subjectMovement) {
    Tree copyTree2;
    List<Tree> res = new ArrayList<Tree>();
    Tree mainclauseNode;

    String marker = "/^(UNMOVABLE-NP|UNMOVABLE-PP|UNMOVABLE-SBAR)-" + i + "$/";

    List<Pair<TregexPattern, TsurgeonPattern>> ops = new ArrayList<Pair<TregexPattern, TsurgeonPattern>>();
    List<TsurgeonPattern> ps = new ArrayList<TsurgeonPattern>();
    String tregexOpStr;
    TregexPattern matchPattern;

    //extract the "answer" phrase and generate a WH phrase from it
    tregexOpStr = "ROOT=root < (SQ=qclause << " + marker + "=answer < VP=predicate)";
    matchPattern = TregexPatternFactory.getPattern(tregexOpStr);
    if (GlobalProperties.getDebug())
        System.err.println("moveWHPhrase: inputTree:" + inputTree.toString());
    if (GlobalProperties.getDebug())
        System.err.println("moveWHPhrase: tregexOpStr:" + tregexOpStr);
    TregexMatcher matcher = matchPattern.matcher(inputTree);
    matcher.find();
    Tree phraseToMove = matcher.getNode("answer");

    String whPhraseSubtree;

    if (printExtractedPhrases)
        System.out.println("EXTRACTED\t" + phraseToMove.yield().toString());

    whGen.generateWHPhraseSubtrees(removeMarkersFromTree(phraseToMove), intermediateTree.yield().toString());
    List<String> whPhraseSubtrees = whGen.getWHPhraseSubtrees();
    List<String> leftOverPrepositions = whGen.getLeftOverPrepositions();

    //copyTree = inputTree.deeperCopy();
    //The placeholder is necessary because tsurgeon will complain
    //if an added node has no children. This placeholder is removed below.
    //      ps.add(Tsurgeon.parseOperation("insert (PREPPLACEHOLDER dummy) $+ answer"));
    //      ps.add(Tsurgeon.parseOperation("prune answer"));
    //      ps.add(Tsurgeon.parseOperation("insert (SBARQ=mainclause PLACEHOLDER=placeholder) >0 root"));
    //      ps.add(Tsurgeon.parseOperation("move qclause >-1 mainclause"));
    //      p = Tsurgeon.collectOperations(ps);
    //      ops.add(new Pair<TregexPattern,TsurgeonPattern>(matchPattern,p));
    //      Tsurgeon.processPatternsOnTree(ops, copyTree);

    //copyTree = removeMarkersFromTree(copyTree);

    //Now put each WH phrase into the tree and remove the original answer.
    //Operate on the tree directly rather than using tsurgeon
    //because tsurgeon can't parse operations that insert trees with special characters (e.g., ":")
    for (int j = 0; j < whPhraseSubtrees.size(); j++) {
        copyTree2 = inputTree.deeperCopy();
        whPhraseSubtree = whPhraseSubtrees.get(j);

        //         if(GlobalProperties.getDebug()) System.err.println("moveWHPhrase: whPhraseSubtree:"+whPhraseSubtree);
        //         tregexOpStr = "ROOT < (SBARQ=mainclause < PLACEHOLDER=ph1) << (__=ph2Parent < PREPPLACEHOLDER=ph2)";
        //         matchPattern = TregexPatternFactory.getPattern(tregexOpStr);
        //         matcher = matchPattern.matcher(copyTree2);
        //         if(!matcher.find()){
        //            continue;
        //         }
        matcher = matchPattern.matcher(copyTree2);
        matcher.find();
        mainclauseNode = matcher.getNode("answer");
        if (mainclauseNode == null)
            continue;
        //replace the wh placeholder with a wh phrase
        int cc = mainclauseNode.numChildren();
        for (int c = 0; c < cc; c++)
            mainclauseNode.removeChild(0);
        mainclauseNode.addChild(0, AnalysisUtilities.getInstance().readTreeFromString(whPhraseSubtree));

        copyTree2 = removeMarkersFromTree(copyTree2);
        //Replace the pp placeholder with the left over preposition.
        //This may happen when the answer phrase was a PP.
        //e.g., John went to the game. -> What did John go to?
        //         prepPlaceholderParent = matcher.getNode("ph2Parent");
        //         int index = prepPlaceholderParent.indexOf(matcher.getNode("ph2"));
        //         if(leftOverPreposition != null && leftOverPreposition.length()>0){
        //            prepPlaceholderParent.addChild(index, AnalysisUtilities.getInstance().readTreeFromString(leftOverPreposition));
        //         }
        //         //now remove the left-over-preposition placeholder
        //         ps.clear();
        //         ps.add(Tsurgeon.parseOperation("prune ph2"));
        //         p = Tsurgeon.collectOperations(ps);
        //         ops.clear();
        //         ops.add(new Pair<TregexPattern,TsurgeonPattern>(TregexPatternFactory.getPattern("PREPPLACEHOLDER=ph2"),p));
        //         Tsurgeon.processPatternsOnTree(ops, copyTree2);

        copyTree2 = moveLeadingAdjuncts(copyTree2);

        if (GlobalProperties.getDebug())
            System.err.println("moveWHPhrase: " + copyTree2.toString());
        res.add(copyTree2);
    }

    return res;
}

From source file:edu.cmu.ark.QuestionTransducer.java

License:Open Source License

/**
 *
 * This method removes the answer phrase from its original position
 * and places it at the front of the main clause.
 *
 * Note: Tsurgeon operations are perhaps not optimal here.
 * Using the Stanford API to move nodes directly might be simpler...
 *
 */// ww  w .  j ava2s.c  o m
private List<Tree> moveWHPhrase(Tree inputTree, Tree intermediateTree, int i, boolean subjectMovement) {
    Tree copyTree;
    Tree copyTree2;
    List<Tree> res = new ArrayList<Tree>();
    Tree mainclauseNode;
    Tree prepPlaceholderParent;

    String marker = "/^(NP|PP|SBAR)-" + i + "$/";

    List<Pair<TregexPattern, TsurgeonPattern>> ops = new ArrayList<Pair<TregexPattern, TsurgeonPattern>>();
    List<TsurgeonPattern> ps = new ArrayList<TsurgeonPattern>();
    String tregexOpStr;
    TregexPattern matchPattern;
    TsurgeonPattern p;

    //extract the "answer" phrase and generate a WH phrase from it
    tregexOpStr = "ROOT=root < (SQ=qclause << " + marker + "=answer < VP=predicate)";
    matchPattern = TregexPatternFactory.getPattern(tregexOpStr);
    if (GlobalProperties.getDebug())
        System.err.println("moveWHPhrase: inputTree:" + inputTree.toString());
    if (GlobalProperties.getDebug())
        System.err.println("moveWHPhrase: tregexOpStr:" + tregexOpStr);
    TregexMatcher matcher = matchPattern.matcher(inputTree);
    matcher.find();
    Tree phraseToMove = matcher.getNode("answer");

    String whPhraseSubtree;
    String leftOverPreposition;

    if (printExtractedPhrases)
        System.out.println("EXTRACTED\t" + phraseToMove.yield().toString());

    whGen.generateWHPhraseSubtrees(removeMarkersFromTree(phraseToMove), intermediateTree.yield().toString());
    List<String> whPhraseSubtrees = whGen.getWHPhraseSubtrees();
    List<String> leftOverPrepositions = whGen.getLeftOverPrepositions();

    copyTree = inputTree.deeperCopy();
    //The placeholder is necessary because tsurgeon will complain
    //if an added node has no children. This placeholder is removed below.
    ps.add(Tsurgeon.parseOperation("insert (PREPPLACEHOLDER dummy) $+ answer"));
    ps.add(Tsurgeon.parseOperation("prune answer"));
    ps.add(Tsurgeon.parseOperation("insert (SBARQ=mainclause PLACEHOLDER=placeholder) >0 root"));
    ps.add(Tsurgeon.parseOperation("move qclause >-1 mainclause"));
    p = Tsurgeon.collectOperations(ps);
    ops.add(new Pair<TregexPattern, TsurgeonPattern>(matchPattern, p));
    Tsurgeon.processPatternsOnTree(ops, copyTree);

    copyTree = removeMarkersFromTree(copyTree);

    //Now put each WH phrase into the tree and remove the original answer.
    //Operate on the tree directly rather than using tsurgeon
    //because tsurgeon can't parse operations that insert trees with special characters (e.g., ":")
    for (int j = 0; j < whPhraseSubtrees.size(); j++) {
        copyTree2 = copyTree.deeperCopy();
        whPhraseSubtree = whPhraseSubtrees.get(j);
        leftOverPreposition = leftOverPrepositions.get(j);

        if (GlobalProperties.getDebug())
            System.err.println("moveWHPhrase: whPhraseSubtree:" + whPhraseSubtree);
        tregexOpStr = "ROOT < (SBARQ=mainclause < PLACEHOLDER=ph1) << (__=ph2Parent < PREPPLACEHOLDER=ph2)";
        matchPattern = TregexPatternFactory.getPattern(tregexOpStr);
        matcher = matchPattern.matcher(copyTree2);
        if (!matcher.find()) {
            continue;
        }
        mainclauseNode = matcher.getNode("mainclause");
        //replace the wh placeholder with a wh phrase
        mainclauseNode.removeChild(0);
        mainclauseNode.addChild(0, AnalysisUtilities.getInstance().readTreeFromString(whPhraseSubtree));

        //Replace the pp placeholder with the left over preposition.
        //This may happen when the answer phrase was a PP.
        //e.g., John went to the game. -> What did John go to?
        prepPlaceholderParent = matcher.getNode("ph2Parent");
        int index = prepPlaceholderParent.indexOf(matcher.getNode("ph2"));
        if (leftOverPreposition != null && leftOverPreposition.length() > 0) {
            prepPlaceholderParent.addChild(index,
                    AnalysisUtilities.getInstance().readTreeFromString(leftOverPreposition));
        }
        //now remove the left-over-preposition placeholder
        ps.clear();
        ps.add(Tsurgeon.parseOperation("prune ph2"));
        p = Tsurgeon.collectOperations(ps);
        ops.clear();
        ops.add(new Pair<TregexPattern, TsurgeonPattern>(TregexPatternFactory.getPattern("PREPPLACEHOLDER=ph2"),
                p));
        Tsurgeon.processPatternsOnTree(ops, copyTree2);

        copyTree2 = moveLeadingAdjuncts(copyTree2);

        if (GlobalProperties.getDebug())
            System.err.println("moveWHPhrase: " + copyTree2.toString());
        res.add(copyTree2);
    }

    return res;
}

From source file:edu.cmu.ark.QuestionTransducer.java

License:Open Source License

/**
 * This method decomposes the main verb of the sentence
 * for yes-no questions and WH questions where the answer
 * phrase is not the subject./*from ww w .j a va2s  . c  om*/
 *
 * e.g., I met John -> I did meet John.
 * (which would later become "Who did I meet?")
 *
 */
private Tree decomposePredicate(Tree inputTree) {
    Tree copyTree = inputTree.deeperCopy();

    List<Pair<TregexPattern, TsurgeonPattern>> ops = new ArrayList<Pair<TregexPattern, TsurgeonPattern>>();
    String tregexOpStr;
    List<TsurgeonPattern> ps = new ArrayList<TsurgeonPattern>();
    TregexPattern matchPattern;
    TsurgeonPattern p;
    TregexMatcher matcher;
    Tree tmpNode;
    //tregexOpStr = "ROOT < (S=mainclause < (VP=predphrase < /VB.?/=tensedverb !< (VP < /VB.?/)))";
    //tregexOpStr = "ROOT < (S=mainclause < (VP=predphrase < (/VB.?/=tensedverb !< is|was|were|am|are|has|have|had|do|does|did)))";

    //This rather complex rule identifies predicates to decompose.
    //There are two cases, separated by a disjunction.
    //One could break it apart into separate rules to make it simpler...
    //
    //The first part of the disjunction
    //(i.e., < (/VB.?/=tensedverb !< is|was|were|am|are|has|have|had|do|does|did) )
    //is for handling basic sentences
    //(e.g., John bought an apple -> What did John buy?),
    //sentences with auxiliaries
    //(e.g., John had bought an apple -> Had John bought an apple?),
    //and sentences with participial phrases
    //(e.g., John seemed finished with the apple -> What did John seem finished with?).
    //
    //The second part of the disjunction
    //(i.e., < /VB.?/=tensedverb !< VP )
    //is for handling sentences that have predicates
    //that can also be auxiliaries (e.g., I have a book).
    //In these cases, we do want to decompose have, has, had, etc.
    //(e.g., What did I have?)
    tregexOpStr = "ROOT < (S=mainclause < (VP=predphrase [ < (/VB.?/=tensedverb !< is|was|were|am|are|has|have|had|do|does|did) | < /VB.?/=tensedverb !< VP ]))";

    matchPattern = TregexPatternFactory.getPattern(tregexOpStr);
    matcher = matchPattern.matcher(copyTree);
    if (matcher.find()) {
        Tree subtree = matcher.getNode("tensedverb");
        String lemma = AnalysisUtilities.getInstance().getLemma(subtree.getChild(0).label().toString(),
                subtree.label().toString());
        String aux = getAuxiliarySubtree(subtree);

        if (!lemma.equals("be")) {
            ps.add(Tsurgeon.parseOperation("replace predphrase (MAINVP=newpred PLACEHOLDER)"));
            ps.add(Tsurgeon.parseOperation("insert predphrase >-1 newpred"));
            ps.add(Tsurgeon.parseOperation("insert (VBLEMMA PLACEHOLDER) $+ tensedverb"));
            ps.add(Tsurgeon.parseOperation("delete tensedverb"));
            p = Tsurgeon.collectOperations(ps);
            ops.add(new Pair<TregexPattern, TsurgeonPattern>(matchPattern, p));
            Tsurgeon.processPatternsOnTree(ops, copyTree);
            matchPattern = TregexPatternFactory.getPattern("MAINVP=mainvp");
            matcher = matchPattern.matcher(copyTree);
            matcher.find();
            tmpNode = matcher.getNode("mainvp");
            tmpNode.removeChild(0);
            tmpNode.label().setValue("VP");
            tmpNode.addChild(0, AnalysisUtilities.getInstance().readTreeFromString(aux));

            matchPattern = TregexPatternFactory.getPattern("VBLEMMA=vblemma");
            matcher = matchPattern.matcher(copyTree);
            matcher.find();
            tmpNode = matcher.getNode("vblemma");
            tmpNode.removeChild(0);
            tmpNode.label().setValue("VB");
            tmpNode.addChild(0, AnalysisUtilities.getInstance().readTreeFromString(lemma));
        }
    }

    if (GlobalProperties.getDebug())
        System.err.println("decomposePredicate: " + copyTree.toString());
    return copyTree;
}

From source file:edu.cmu.ark.QuestionTransducer.java

License:Open Source License

/**
 * Changes the inflection of the main verb for questions with
 * first and second person pronouns are the subject.
 * Note: this probably isn't necessary for most applications.
 *
 * E.g.,/*ww w.j  ava  2s .co m*/
 * Affects:
 * I walk -> Who walks? (rather than, Who walk?)
 *
 * Does not affect:
 * He walks -> Who walks?
 *
 */
private void ensureVerbAgreementForSubjectWH(Tree inputTree) {
    String tregexOpStr;
    TregexMatcher matcher;
    TregexPattern matchPattern;
    Tree subjectTree;
    String subjectString;

    tregexOpStr = "/^(NP|PP|SBAR)-" + 0 + "$/";
    matchPattern = TregexPatternFactory.getPattern(tregexOpStr);
    matcher = matchPattern.matcher(inputTree);
    if (matcher.find()) {
        subjectTree = matcher.getMatch();
        subjectString = subjectTree.yield().toString();
        if (subjectString.equalsIgnoreCase("I") || subjectString.equalsIgnoreCase("you")) {
            tregexOpStr = "ROOT=root < (S=mainclause < (VP=verbphrase < (/VB.?/=tensedverb)))";
            matchPattern = TregexPatternFactory.getPattern(tregexOpStr);
            matcher = matchPattern.matcher(inputTree);
            if (matcher.find()) {
                Tree verbSubtree = matcher.getNode("tensedverb");
                Tree vpSubtree = matcher.getNode("verbphrase");
                Tree singularFormSubtree = AnalysisUtilities.getInstance()
                        .readTreeFromString(getSingularFormSubtree(verbSubtree));
                int index = vpSubtree.indexOf(verbSubtree);
                vpSubtree.removeChild(index);
                vpSubtree.addChild(index, singularFormSubtree);
                if (GlobalProperties.getDebug())
                    System.err.println("ensureVerbAgreementForSubjectWH: " + inputTree.toString());
            }
        }
    }
}

From source file:edu.cmu.ark.SentenceSimplifier.java

License:Open Source License

/**
 * e.g., John and James like Susan.  ->  John likes Susan.
 * /*from  www . j av  a2 s  .  co  m*/
 */
private void extractConjoinedNPs(Collection<Question> extracted, Question input) {
    String tregexOpStr;
    TregexPattern matchPattern;
    Tree conjoinedNode;
    Tree parent;

    TregexMatcher matcher;
    Question newQuestion;

    //only extract conjoined NPs that are arguments or adjuncts of the main verb
    // in the tree, this means the closest S will be the one under the root
    tregexOpStr = "NP=parent < (CONJP|CC !< or|nor [ "
            + " $+ /^(N.*|PRP|SBAR)$/=child $-- /^(N.*|PRP|SBAR)$/ | " //there must be a noun on each side of the conjunction
            + " $-- /^(N.*|PRP|SBAR)$/=child $+ /^(N.*|PRP|SBAR)$/ ] ) " //this avoids extracting from flat NPs such as "the smaller and darker form"
            + " !>> (/.*/ $ (CC|CONJP !< or|nor)) " //this cannot be nested within a larger conjunction or followed by a conjunction (we recur later to catch this) 
            + " !$ (CC|CONJP !< or|nor)" + " !.. (CC|CONJP !< or|nor > NP|PP|S|SBAR|VP) !>> SBAR ";
    //+ " >> (ROOT !< (S <+(VP) (/^VB.*$/ < are|were|be|seem|appear))) " ; //don't break plural predicate nominatives (e.g., "John and Mary are two of my best friends.")
    matchPattern = TregexPatternFactory.getPattern(tregexOpStr);
    matcher = matchPattern.matcher(input.getIntermediateTree());
    List<Integer> nodeIndexes = new ArrayList<Integer>();
    List<Integer> parentIDs = new ArrayList<Integer>();

    while (matcher.find()) {
        //store the parents' IDs (in the tree)
        parent = matcher.getNode("parent");
        parentIDs.add(parent.nodeNumber(input.getIntermediateTree()));

        conjoinedNode = matcher.getNode("child");
        //store the conjoined nodes' index into their parent's list of children
        int idx = parent.indexOf(conjoinedNode);
        if (!nodeIndexes.contains(idx))
            nodeIndexes.add(idx);
    }

    //for each of the conjoined children,
    //create a new tree by removing all the nodes they are conjoined with
    Collections.sort(nodeIndexes);//sort, just to keep them in the original order
    for (int i = 0; i < nodeIndexes.size(); i++) {
        newQuestion = input.deeperCopy();

        Tree t = newQuestion.getIntermediateTree();
        parent = t.getNodeNumber(parentIDs.get(i));
        Tree gparent = parent.parent(t);
        conjoinedNode = parent.getChild(nodeIndexes.get(i));
        String siblingLabel;

        //Remove all the nodes that are conjoined
        //with the selected noun (or are conjunctions, commas).
        //These can have labels NP, NN, ..., PRP for pronouns, CC, "," for commas, ":" for semi-colons
        for (int j = 0; j < parent.numChildren(); j++) {
            if (parent.getChild(j) == conjoinedNode)
                continue;
            siblingLabel = parent.getChild(j).label().toString();
            if (siblingLabel.matches("^[NCP,:S].*")) {
                parent.removeChild(j);
                j--;
            }
        }

        //if there is an trivial unary "NP -> NP",
        //remove the parent and put the child in its place
        if (parent.numChildren() == 1 && parent.getChild(0).label().equals("NP")) {
            int tmpIndex = gparent.indexOf(parent);
            gparent.removeChild(tmpIndex);
            gparent.addChild(tmpIndex, parent.getChild(0));
        }

        correctTense(conjoinedNode, gparent);
        addQuotationMarksIfNeeded(newQuestion.getIntermediateTree());

        if (GlobalProperties.getDebug())
            System.err.println("extractConjoinedNPs: " + newQuestion.getIntermediateTree().toString());
        if (GlobalProperties.getComputeFeatures())
            newQuestion.setFeatureValue("extractedFromConjoinedPhrases", 1.0); //old feature name
        if (GlobalProperties.getComputeFeatures())
            newQuestion.setFeatureValue("extractedFromConjoinedNPs", 1.0);
        extracted.add(newQuestion);
    }
}