Example usage for edu.stanford.nlp.trees Tree deepCopy

Introduction

In this page you can find the example usage for edu.stanford.nlp.trees Tree deepCopy.

Prototype

public Tree deepCopy()

Source Link

Document

Makes a deep copy of not only the Tree structure but of the labels as well.

Usage

From source file:CollapseUnaryTransformer.java

License:Apache License

public Tree transformTree(Tree tree) {
    if (tree.isPreTerminal() || tree.isLeaf()) {
        return tree.deepCopy();
    }/*from  w w  w.  j  a  v  a 2  s.  co m*/

    Label label = tree.label().labelFactory().newLabel(tree.label());
    Tree[] children = tree.children();
    while (children.length == 1 && !children[0].isLeaf()) {
        children = children[0].children();
    }
    List<Tree> processedChildren = Generics.newArrayList();
    for (Tree child : children) {
        processedChildren.add(transformTree(child));
    }
    return tree.treeFactory().newTreeNode(label, processedChildren);
}

From source file:com.github.kutschkem.Qgen.annotators.SimpleSentenceDecompositionAnnotator.java

License:Open Source License

private List<String> decompose(String documentText) {
    List<Tree> trees = new ArrayList<Tree>();
    for (String sentence : AnalysisUtilities.getSentences(documentText)) {
        trees.add(AnalysisUtilities.getInstance().parseSentence(sentence).parse);
    }//from   w w  w .  j a  v  a 2s  .  com

    List<String> result = new ArrayList<String>();

    for (Tree t : trees) {
        TregexPattern p = TregexPattern.compile("ROOT << (NP=np $++ VP=vp) ");
        TregexMatcher m = p.matcher(t);
        while (m.find()) {
            Tree np = m.getNode("np");
            Tree vp = m.getNode("vp");

            Tree np2 = np.deepCopy();
            TregexPattern p2 = TregexPattern.compile("NP << (/^S.*/=sbarq ?. /,/=c1 ?, /,/=c2)");
            List<TsurgeonPattern> ps = new ArrayList<TsurgeonPattern>();
            ps.add(Tsurgeon.parseOperation("prune sbarq"));
            ps.add(Tsurgeon.parseOperation("prune c1"));
            ps.add(Tsurgeon.parseOperation("prune c2"));

            Tsurgeon.processPattern(p2, Tsurgeon.collectOperations(ps), np2);
            np = np2;

            Tree newTree = Tree.valueOf("(S " + np + vp + "(. .))");
            result.add(AnalysisUtilities.orginialSentence(newTree.yield()));
        }

    }

    return result;
}

From source file:edu.albany.cubism.sentiment.Sandbox.java

public void addMapping(int recursion_level, Tree tree) {
    if (!this.recusion_mapping.containsKey(recursion_level)) {
        this.recusion_mapping.put(recursion_level, new ArrayList());
        this.recusion_mapping.get(recursion_level).add(tree.deepCopy());
    } else {//from   w w w .  ja va 2s .c o m
        this.recusion_mapping.get(recursion_level).add(tree.deepCopy());
    }
}

From source file:edu.cmu.ark.nlp.question.QuestionUtil.java

License:Open Source License

public static String getCleanedUpYield(Tree inputTree) {
    Tree copyTree = inputTree.deepCopy();

    //if(GlobalProperties.getDebug()) System.err.println("yield:"+copyTree.toString());

    return cleanUpSentenceString(copyTree.yield().toString());
}

From source file:edu.cmu.ark.nlp.sent.SentenceSimplifier.java

License:Open Source License

/**
 * Primary method for simplifying sentences.
 * Takes an input sentence in the form of a tree
 * and returns a list of Question objects, which help 
 * to track what operations were performed.
 * /*from  w w w .  j  a  v a 2 s .  c  o m*/
 * @param sentence
 * @param fixCapitalization
 * @return
 */
public List<Question> simplify(Tree sentence, boolean fixCapitalization) {
    List<Question> treeList = new ArrayList<Question>();
    numSimplifyHelperCalls = 0;
    //if(GlobalProperties.getDebug()) System.err.println("simplify input:"+ sentence);
    //add original tree
    Question orig = new Question(this.props);
    orig.setSourceTree(sentence);
    orig.setIntermediateTree(sentence.deepCopy());

    //if the input contains any UCP or other odd nodes, then just return the original sentence 
    //such nodes indicate that the parse failed, or at least that our system will likely produce bad output
    if (uglyParse(sentence)) {
        treeList.add(orig);
        return treeList;
    }

    QuestionUtil.downcaseFirstToken(orig.getIntermediateTree());
    //treeSet.add(originalWithFeatures);
    Question current = orig.deeperCopy();

    List<Question> extracted = new ArrayList<Question>();

    //for each nested element in the INPUT... (nested elements include finite verbs, non-restrictive relative clauses, appositives, conjunction of VPs, conjunction of clauses, participial phrases)
    //transform the nested element into a declarative sentence (preserving tense), removing conjunctions, etc.
    extracted.add(current);
    extractSubordinateClauses(extracted, orig);
    extractNounParticipialModifiers(extracted, orig);
    extractNonRestrictiveRelativeClauses(extracted, orig);
    extractAppositives(extracted, orig);
    extractVerbParticipialModifiers(extracted, orig);
    //extractWITHPartcipialPhrases(extracted, orig); //too rare to worry about
    if (extractFromVerbComplements)
        extractComplementClauses(extracted, orig);

    for (Question q : extracted) {
        addAllIfNovel(treeList, simplifyHelper(q));
    }

    //make sure there is at least one output
    if (treeList.size() == 0) {
        addIfNovel(treeList, current);
    }

    if (fixCapitalization) {
        //upcase the first tokens of all output trees.
        for (Question q : treeList) {
            QuestionUtil.upcaseFirstToken(q.getIntermediateTree());
        }
    }

    //clean up the output
    for (Question q : treeList) {
        QuestionUtil.removeExtraQuotes(q.getIntermediateTree());
    }

    if (this.getComputeFeatures) {
        Question t = treeList.get(0);
        boolean fromMainClause = true;
        String[] nestedExtractionFeatureNames = { "extractedFromParticipial", "extractedFromVerbParticipial",
                "extractedFromFiniteClause", "extractedFromSubordinateClause", "extractedFromComplementClause",
                "extractedFromAppositive", "extractedFromRelativeClause", "extractedFromParticipial",
                "extractedFromNounParticipial" };
        for (String name : nestedExtractionFeatureNames) {
            if (t.getFeatureValue(name) != 0) {
                fromMainClause = false;
                break;
            }
        }
        if (fromMainClause)
            t.setFeatureValue("extractedFromLeftMostMainClause", 1.0);
        //t.setFeatureValue("extractedFromLeftMostMainClause", 1.0);
    }
    //if(GlobalProperties.getDebug()) System.err.println("simplifyHelperCalls:\t"+numSimplifyHelperCalls);

    if (mainClauseOnly) {
        Question tmp = treeList.get(0);
        treeList.clear();
        if (tmp.getFeatureValue("extractedFromLeftMostMainClause") == 1.0)
            treeList.add(tmp);
    }

    return treeList;
}

From source file:edu.cmu.ark.nlp.sent.SentenceSimplifier.java

License:Open Source License

/**
 * e.g., As John slept, I studied. ->  John slept.
 * //from www .  ja  v  a  2  s  .  com
 */
private void extractSubordinateClauses(Collection<Question> extracted, Question input) {
    Tree subord;
    String tregexOpStr;
    TregexPattern matchPattern;
    TregexMatcher matcher;

    tregexOpStr = " SBAR [ > VP < IN | > S|SINV ]  " + //not a complement
            " !< (IN < if|unless|that)" + //not a conditional antecedent
            " < (S=sub !< (VP < VBG)) " + //+ //not a participial phrase
            " >S|SINV|VP "; //not part of a noun phrase or PP (other methods for those)

    matchPattern = TregexPatternFactory.getPattern(tregexOpStr);
    matcher = matchPattern.matcher(input.getIntermediateTree());
    while (matcher.find()) {
        Tree newTree = factory.newTreeNode("ROOT", new ArrayList<Tree>());
        subord = matcher.getNode("sub");
        newTree.addChild(subord.deepCopy());

        QuestionUtil.addPeriodIfNeeded(newTree);
        addQuotationMarksIfNeeded(newTree);
        Question newTreeWithFeatures = input.deeperCopy();
        newTreeWithFeatures.setIntermediateTree(newTree);
        if (this.getComputeFeatures)
            newTreeWithFeatures.setFeatureValue("extractedFromFiniteClause", 1.0); //old feature name
        if (this.getComputeFeatures)
            newTreeWithFeatures.setFeatureValue("extractedFromSubordinateClause", 1.0);
        //if(GlobalProperties.getDebug()) System.err.println("extractSubordinateClauses: "+newTree.toString());
        addIfNovel(extracted, newTreeWithFeatures);
    }
}

From source file:edu.cmu.ark.nlp.sent.SentenceSimplifier.java

License:Open Source License

private void extractComplementClauses(Collection<Question> extracted, Question input) {
    Tree subord;
    String tregexOpStr;/* w ww.  j  av a  2  s.  com*/
    TregexPattern matchPattern;
    TregexMatcher matcher;

    //TODO should also address infinitive complements
    tregexOpStr = "SBAR " + " < (S=sub !< (VP < VBG)) " + //+ //not a participial phrase
            " !> NP|PP " + //not part of a noun phrase or PP (other methods for those)
            " [ $- /^VB.*/=verb | >+(SBAR) (SBAR $- /^VB.*/=verb) ] "; //complement of a VP (follows the verb)

    matchPattern = TregexPatternFactory.getPattern(tregexOpStr);
    matcher = matchPattern.matcher(input.getIntermediateTree());
    while (matcher.find()) {
        Tree newTree = factory.newTreeNode("ROOT", new ArrayList<Tree>());
        subord = matcher.getNode("sub");
        Tree verb = matcher.getNode("verb");
        String verbLemma = QuestionUtil.getLemma(verb.yield().toString(), verb.label().toString());

        if (!verbImpliesComplement(verbLemma)) {
            continue;
        }
        newTree.addChild(subord.deepCopy());

        QuestionUtil.addPeriodIfNeeded(newTree);
        addQuotationMarksIfNeeded(newTree);
        Question newTreeWithFeatures = input.deeperCopy();
        newTreeWithFeatures.setIntermediateTree(newTree);
        if (this.getComputeFeatures)
            newTreeWithFeatures.setFeatureValue("extractedFromFiniteClause", 1.0); //old feature name
        if (this.getComputeFeatures)
            newTreeWithFeatures.setFeatureValue("extractedFromComplementClause", 1.0);
        //if(GlobalProperties.getDebug()) System.err.println("extractComplementClauses: "+newTree.toString());
        addIfNovel(extracted, newTreeWithFeatures);
    }
}

From source file:edu.cmu.ark.nlp.sent.SentenceSimplifier.java

License:Open Source License

/**
 * e.g., John, who is a friend of mine, likes Susan. -> John is a friend of mine.
 * //from  ww  w.j ava2s . co  m
 */
private void extractNonRestrictiveRelativeClauses(Collection<Question> extracted, Question input) {
    String tregexOpStr;
    TregexPattern matchPattern;
    TregexMatcher matcher;
    TregexMatcher matcherclause;

    tregexOpStr = "NP=np < (SBAR=sbar [ < (WHADVP=wherecomp < (WRB < where)) " + " | < (WHNP !< /WP\\$/) "
            + " | < (WHNP=possessive < /WP\\$/)" //John, whose car was
            + " | < (WHPP < IN|TO=preposition) ] $-- NP $- /,/ " + " < S=relclause  !< WHADJP)";

    matchPattern = TregexPatternFactory.getPattern(tregexOpStr);
    matcher = matchPattern.matcher(input.getIntermediateTree());

    //iterate over all the relative clauses in the input
    //and create an output sentence for each one.
    while (matcher.find()) {
        Tree missingArgumentTree = matcher.getNode("np");
        Tree relclause = matcher.getNode("relclause");
        if (missingArgumentTree == null || relclause == null)
            continue;
        missingArgumentTree = missingArgumentTree.deepCopy();
        relclause = relclause.deepCopy();
        Tree possessive = matcher.getNode("possessive");
        Tree sbar = matcher.getNode("sbar").deepCopy();

        makeDeterminerDefinite(missingArgumentTree);

        if (possessive != null) {
            possessive = possessive.deepCopy();
            possessive.removeChild(0);
            String newTree = "(NP (NP " + missingArgumentTree.toString() + " (POS 's))";
            for (int i = 0; i < possessive.numChildren(); i++)
                newTree += possessive.getChild(i).toString() + " ";
            newTree += ")";
            missingArgumentTree = QuestionUtil.readTreeFromString(newTree);
        }

        //remove the relative clause and the commas surrounding it from the missing argument tree
        for (int i = 0; i < missingArgumentTree.numChildren(); i++) {
            if (missingArgumentTree.getChild(i).equals(sbar)) {
                //remove the relative clause
                missingArgumentTree.removeChild(i);
                //remove the comma after the relative clause
                if (i < missingArgumentTree.numChildren()
                        && missingArgumentTree.getChild(i).label().toString().equals(",")) {
                    missingArgumentTree.removeChild(i);
                }
                //remove the comma before the relative clause
                if (i > 0 && missingArgumentTree.getChild(i - 1).label().toString().equals(",")) {
                    missingArgumentTree.removeChild(i - 1);
                    i--;
                }
                i--;
            }
        }

        //put the noun in the clause at the topmost place with an opening for a noun. 
        //Note that this may mess up if there are noun phrase adjuncts like "The man I met Tuesday".

        //specifically: 
        //the parent of the noun can be either a clause (S) as in "The man who met me"
        //or a verb phrase as in "The man who I met".
        //for verb phrases, add the noun to the end since it will be an object.
        //for clauses, add the noun to the beginning since it will be the subject.
        tregexOpStr = "S|VP=newparent !< NP < (VP=verb !< TO !$ TO)";
        matchPattern = TregexPatternFactory.getPattern(tregexOpStr);
        matcherclause = matchPattern.matcher(relclause);
        boolean subjectMovement = true;
        if (!matcherclause.find()) {
            tregexOpStr = "VP=newparent !< VP < /VB.*/=verb !>> (S !< NP) !<< (VP !< VP !< NP)";
            matchPattern = TregexPatternFactory.getPattern(tregexOpStr);
            matcherclause = matchPattern.matcher(relclause);
            subjectMovement = false;
        }

        //reset (so the first match isn't skipped)
        matcherclause = matchPattern.matcher(relclause);

        if (matcherclause.find()) {
            Tree newparenttree = matcherclause.getNode("newparent");
            Tree verbtree = matcherclause.getNode("verb");
            boolean ppRelativeClause = false;

            if (matcher.getNode("wherecomp") != null) {
                String tmp = "(PP (IN at) " + missingArgumentTree.toString() + ")";
                missingArgumentTree = QuestionUtil.readTreeFromString(tmp);
                ppRelativeClause = true;
                subjectMovement = false;
            } else if (matcher.getNode("preposition") != null) {
                String tmp = "(PP (IN " + matcher.getNode("preposition").yield().toString() + ") "
                        + missingArgumentTree.toString() + ")";
                missingArgumentTree = QuestionUtil.readTreeFromString(tmp);
                ppRelativeClause = true;
            }

            if (subjectMovement) { //subject
                newparenttree.addChild(newparenttree.objectIndexOf(verbtree), missingArgumentTree);
            } else { // newparentlabel is VP   
                if (ppRelativeClause)
                    newparenttree.addChild(newparenttree.numChildren(), missingArgumentTree);
                else
                    newparenttree.addChild(newparenttree.objectIndexOf(verbtree) + 1, missingArgumentTree);
            }

            //create a new tree with punctuation
            Tree newTree = factory.newTreeNode("ROOT", new ArrayList<Tree>());
            newTree.addChild(relclause);
            QuestionUtil.addPeriodIfNeeded(newTree);

            //if(GlobalProperties.getDebug()) System.err.println("extractRelativeClauses: "+ newTree.toString());
            addQuotationMarksIfNeeded(newTree);
            Question newTreeWithFeatures = input.deeperCopy();
            newTreeWithFeatures.setIntermediateTree(newTree);
            if (this.getComputeFeatures)
                newTreeWithFeatures.setFeatureValue("extractedFromRelativeClause", 1.0);
            addIfNovel(extracted, newTreeWithFeatures);
        }
    }
}

From source file:edu.cmu.ark.nlp.sent.SentenceSimplifier.java

License:Open Source License

/**
 * e.g., John, hoping to get a good grade, studied. -> John hoped to get a good grade.
 *   Walking to the store, John saw Susan -> John was walking to the store.
 *   //  w  ww  .  j  av  a 2 s  .  c om
 *   NOTE: This method produces false positives for sentences like, 
 *            "Broadly speaking, the project was successful."
 *         where the participial phrase does not modify the subject.
 *   
 * @param extracted
 * @param input
 */
private void extractNounParticipialModifiers(Collection<Question> extracted, Question input) {
    String tregexOpStr;
    TregexPattern matchPattern;
    TregexMatcher matcher;

    tregexOpStr = "ROOT < (S " + " [ << (NP < (NP=subj  $++ (/,/ $+ (VP=modifier <# VBN|VBG|VP=tense )))) " //modifiers that appear after nouns
            + " | < (S !< NP|SBAR < (VP=modifier <# VBN|VBG|VP=tense) $+ (/,/ $+ NP=subj)) " //modifiers before the subject. e.g., Founded by John, the company...
            + " | < (SBAR < (S !< NP|SBAR < (VP=modifier <# VBN|VBG=tense)) $+ (/,/ $+ NP=subj)) " //e.g., While walking to the store, John saw Susan.
            + " | < (PP=modifier !< NP <# VBG=tense $+ (/,/ $+ NP=subj)) ] ) " // e.g., Walking to the store, John saw Susan.
            + " <<# /^VB.*$/=maintense "; //tense determined by top-most verb

    matchPattern = TregexPatternFactory.getPattern(tregexOpStr);
    matcher = matchPattern.matcher(input.getIntermediateTree());
    while (matcher.find()) {
        Tree nountree = matcher.getNode("subj").deepCopy();
        Tree vptree = matcher.getNode("modifier");
        Tree verb = matcher.getNode("tense");
        makeDeterminerDefinite(nountree);

        if (vptree.label().toString().equals("PP"))
            vptree.label().setValue("VP");
        String verbPOS = findTense(matcher.getNode("maintense"));
        if (vptree == null || nountree == null)
            return;

        String newTreeStr;
        if (verb.label().toString().equals("VBG")) {
            //for present partcipials, change the tense to the tense of the main verb
            //e.g., walking to the store -> walked to the store
            String verbLemma = QuestionUtil.getLemma(verb.getChild(0).label().toString(),
                    verb.label().toString());
            String newVerb = this.conjugator.getSurfaceForm(verbLemma, verbPOS);
            int verbIndex = vptree.objectIndexOf(verb);
            vptree = vptree.deepCopy();
            vptree.removeChild(verbIndex);
            vptree.addChild(verbIndex, QuestionUtil.readTreeFromString("(" + verbPOS + " " + newVerb + ")"));
            newTreeStr = "(ROOT (S " + matcher.getNode("subj").toString() + " " + vptree.toString()
                    + " (. .)))";
        } else {
            //for past participials, add a copula
            //e.g., John, exhausted, -> John was exhausted
            //(or for conjunctions, just add the copula---kind of a hack to make the moby dick sentence work out)
            String auxiliary;
            if (verbPOS.equals("VBP") || verbPOS.equals("VBD")) {
                if (isPlural(nountree))
                    auxiliary = "(VBD were)";
                else
                    auxiliary = "(VBD was)";
            } else {
                if (isPlural(nountree))
                    auxiliary = "(VB are)";
                else
                    auxiliary = "(VBZ is)";
            }

            newTreeStr = "(ROOT (S " + nountree + " (VP " + auxiliary + " " + vptree + ") (. .)))";
        }

        Tree newTree = QuestionUtil.readTreeFromString(newTreeStr);
        correctTense(newTree.getChild(0).getChild(0), newTree.getChild(0));
        addQuotationMarksIfNeeded(newTree);

        //if(GlobalProperties.getDebug()) System.err.println("extractNounParticipialModifiers: "+ newTree.toString());
        Question newTreeWithFeatures = input.deeperCopy();
        newTreeWithFeatures.setIntermediateTree(newTree);
        if (this.getComputeFeatures)
            newTreeWithFeatures.setFeatureValue("extractedFromParticipial", 1.0); //old feature name
        if (this.getComputeFeatures)
            newTreeWithFeatures.setFeatureValue("extractedFromNounParticipial", 1.0);
        extracted.add(newTreeWithFeatures);
    }

}

From source file:elkfed.coref.mentions.Mention.java

License:Apache License

/**
 * Takes the sentence tree of this mention and replaces the unlexicalized
 * tokens in the leaves with their discourse ids. Usefull to derive spans
 * based on syntax./*from ww  w .j a  va 2s.  c  om*/
 * 
 * @author samuel
 * @param sentTree
 *            original sentence tree
 * @return Tree with discourse ids in leaves
 */
private Tree getSentenceTreeWithDiscurseIdsInLeaves(Tree sentTree) {
    Tree result = sentTree.deepCopy();
    List<Tree> leaves = result.getLeaves();
    String[] discElements = getSentenceDiscourseElements();
    String[] discIDs = getSentenceDiscourseElementIDs();
    if (discElements.length != leaves.size() || discIDs.length != leaves.size()) {
        throw new RuntimeException("SentenceTree leaves and discourse elements do not match for mention "
                + this.getMarkable().getID());
    }

    /*
     * sometimes tueba-d/z punctuations in the parse tree are out of context
     * appended to the root of the tree hence the corrector part
     * 
     * yv: ... and we need to unescape those words for the semeval2mmax
     * output. duh.
     */
    int corrector = 0;
    for (int i = 0; i < discElements.length; i++) {
        String leafWord = leaves.get(i - corrector).value().toLowerCase();
        leafWord = leafWord.replaceAll("-lrb-", "(");
        leafWord = leafWord.replaceAll("-rrb-", ")");
        leafWord = leafWord.replaceAll("\\\\", "");
        String discWord = discElements[i].replaceAll("\\\\", "");
        if (discWord.equalsIgnoreCase(leafWord)) {
            leaves.get(i - corrector).setValue(discIDs[i]);
        } else {
            System.err.format("non-matching: %s vs %s\n", leafWord, discElements[i]);
            corrector++;
        }
    }
    return result;
}