Example usage for edu.stanford.nlp.trees Tree deepCopy

List of usage examples for edu.stanford.nlp.trees Tree deepCopy

Introduction

In this page you can find the example usage for edu.stanford.nlp.trees Tree deepCopy.

Prototype

public Tree deepCopy() 

Source Link

Document

Makes a deep copy of not only the Tree structure but of the labels as well.

Usage

From source file:CollapseUnaryTransformer.java

License:Apache License

public Tree transformTree(Tree tree) {
    if (tree.isPreTerminal() || tree.isLeaf()) {
        return tree.deepCopy();
    }/*from  w w  w.  j  a  v  a 2  s.  co m*/

    Label label = tree.label().labelFactory().newLabel(tree.label());
    Tree[] children = tree.children();
    while (children.length == 1 && !children[0].isLeaf()) {
        children = children[0].children();
    }
    List<Tree> processedChildren = Generics.newArrayList();
    for (Tree child : children) {
        processedChildren.add(transformTree(child));
    }
    return tree.treeFactory().newTreeNode(label, processedChildren);
}

From source file:com.github.kutschkem.Qgen.annotators.SimpleSentenceDecompositionAnnotator.java

License:Open Source License

private List<String> decompose(String documentText) {
    List<Tree> trees = new ArrayList<Tree>();
    for (String sentence : AnalysisUtilities.getSentences(documentText)) {
        trees.add(AnalysisUtilities.getInstance().parseSentence(sentence).parse);
    }//from   w w  w .  j a  v  a 2s  .  com

    List<String> result = new ArrayList<String>();

    for (Tree t : trees) {
        TregexPattern p = TregexPattern.compile("ROOT << (NP=np $++ VP=vp) ");
        TregexMatcher m = p.matcher(t);
        while (m.find()) {
            Tree np = m.getNode("np");
            Tree vp = m.getNode("vp");

            Tree np2 = np.deepCopy();
            TregexPattern p2 = TregexPattern.compile("NP << (/^S.*/=sbarq ?. /,/=c1 ?, /,/=c2)");
            List<TsurgeonPattern> ps = new ArrayList<TsurgeonPattern>();
            ps.add(Tsurgeon.parseOperation("prune sbarq"));
            ps.add(Tsurgeon.parseOperation("prune c1"));
            ps.add(Tsurgeon.parseOperation("prune c2"));

            Tsurgeon.processPattern(p2, Tsurgeon.collectOperations(ps), np2);
            np = np2;

            Tree newTree = Tree.valueOf("(S " + np + vp + "(. .))");
            result.add(AnalysisUtilities.orginialSentence(newTree.yield()));
        }

    }

    return result;
}

From source file:edu.albany.cubism.sentiment.Sandbox.java

public void addMapping(int recursion_level, Tree tree) {
    if (!this.recusion_mapping.containsKey(recursion_level)) {
        this.recusion_mapping.put(recursion_level, new ArrayList());
        this.recusion_mapping.get(recursion_level).add(tree.deepCopy());
    } else {//from   w w w .  ja va 2s .c o m
        this.recusion_mapping.get(recursion_level).add(tree.deepCopy());
    }
}

From source file:edu.cmu.ark.nlp.question.QuestionUtil.java

License:Open Source License

public static String getCleanedUpYield(Tree inputTree) {
    Tree copyTree = inputTree.deepCopy();

    //if(GlobalProperties.getDebug()) System.err.println("yield:"+copyTree.toString());

    return cleanUpSentenceString(copyTree.yield().toString());
}

From source file:edu.cmu.ark.nlp.sent.SentenceSimplifier.java

License:Open Source License

/**
 * Primary method for simplifying sentences.
 * Takes an input sentence in the form of a tree
 * and returns a list of Question objects, which help 
 * to track what operations were performed.
 * /*from  w w w .  j  a  v a 2 s .  c  o m*/
 * @param sentence
 * @param fixCapitalization
 * @return
 */
public List<Question> simplify(Tree sentence, boolean fixCapitalization) {
    List<Question> treeList = new ArrayList<Question>();
    numSimplifyHelperCalls = 0;
    //if(GlobalProperties.getDebug()) System.err.println("simplify input:"+ sentence);
    //add original tree
    Question orig = new Question(this.props);
    orig.setSourceTree(sentence);
    orig.setIntermediateTree(sentence.deepCopy());

    //if the input contains any UCP or other odd nodes, then just return the original sentence 
    //such nodes indicate that the parse failed, or at least that our system will likely produce bad output
    if (uglyParse(sentence)) {
        treeList.add(orig);
        return treeList;
    }

    QuestionUtil.downcaseFirstToken(orig.getIntermediateTree());
    //treeSet.add(originalWithFeatures);
    Question current = orig.deeperCopy();

    List<Question> extracted = new ArrayList<Question>();

    //for each nested element in the INPUT... (nested elements include finite verbs, non-restrictive relative clauses, appositives, conjunction of VPs, conjunction of clauses, participial phrases)
    //transform the nested element into a declarative sentence (preserving tense), removing conjunctions, etc.
    extracted.add(current);
    extractSubordinateClauses(extracted, orig);
    extractNounParticipialModifiers(extracted, orig);
    extractNonRestrictiveRelativeClauses(extracted, orig);
    extractAppositives(extracted, orig);
    extractVerbParticipialModifiers(extracted, orig);
    //extractWITHPartcipialPhrases(extracted, orig); //too rare to worry about
    if (extractFromVerbComplements)
        extractComplementClauses(extracted, orig);

    for (Question q : extracted) {
        addAllIfNovel(treeList, simplifyHelper(q));
    }

    //make sure there is at least one output
    if (treeList.size() == 0) {
        addIfNovel(treeList, current);
    }

    if (fixCapitalization) {
        //upcase the first tokens of all output trees.
        for (Question q : treeList) {
            QuestionUtil.upcaseFirstToken(q.getIntermediateTree());
        }
    }

    //clean up the output
    for (Question q : treeList) {
        QuestionUtil.removeExtraQuotes(q.getIntermediateTree());
    }

    if (this.getComputeFeatures) {
        Question t = treeList.get(0);
        boolean fromMainClause = true;
        String[] nestedExtractionFeatureNames = { "extractedFromParticipial", "extractedFromVerbParticipial",
                "extractedFromFiniteClause", "extractedFromSubordinateClause", "extractedFromComplementClause",
                "extractedFromAppositive", "extractedFromRelativeClause", "extractedFromParticipial",
                "extractedFromNounParticipial" };
        for (String name : nestedExtractionFeatureNames) {
            if (t.getFeatureValue(name) != 0) {
                fromMainClause = false;
                break;
            }
        }
        if (fromMainClause)
            t.setFeatureValue("extractedFromLeftMostMainClause", 1.0);
        //t.setFeatureValue("extractedFromLeftMostMainClause", 1.0);
    }
    //if(GlobalProperties.getDebug()) System.err.println("simplifyHelperCalls:\t"+numSimplifyHelperCalls);

    if (mainClauseOnly) {
        Question tmp = treeList.get(0);
        treeList.clear();
        if (tmp.getFeatureValue("extractedFromLeftMostMainClause") == 1.0)
            treeList.add(tmp);
    }

    return treeList;
}

From source file:edu.cmu.ark.nlp.sent.SentenceSimplifier.java

License:Open Source License

/**
 * e.g., As John slept, I studied. ->  John slept.
 * //from www .  ja  v  a  2  s  .  com
 */
private void extractSubordinateClauses(Collection<Question> extracted, Question input) {
    Tree subord;
    String tregexOpStr;
    TregexPattern matchPattern;
    TregexMatcher matcher;

    tregexOpStr = " SBAR [ > VP < IN | > S|SINV ]  " + //not a complement
            " !< (IN < if|unless|that)" + //not a conditional antecedent
            " < (S=sub !< (VP < VBG)) " + //+ //not a participial phrase
            " >S|SINV|VP "; //not part of a noun phrase or PP (other methods for those)

    matchPattern = TregexPatternFactory.getPattern(tregexOpStr);
    matcher = matchPattern.matcher(input.getIntermediateTree());
    while (matcher.find()) {
        Tree newTree = factory.newTreeNode("ROOT", new ArrayList<Tree>());
        subord = matcher.getNode("sub");
        newTree.addChild(subord.deepCopy());

        QuestionUtil.addPeriodIfNeeded(newTree);
        addQuotationMarksIfNeeded(newTree);
        Question newTreeWithFeatures = input.deeperCopy();
        newTreeWithFeatures.setIntermediateTree(newTree);
        if (this.getComputeFeatures)
            newTreeWithFeatures.setFeatureValue("extractedFromFiniteClause", 1.0); //old feature name
        if (this.getComputeFeatures)
            newTreeWithFeatures.setFeatureValue("extractedFromSubordinateClause", 1.0);
        //if(GlobalProperties.getDebug()) System.err.println("extractSubordinateClauses: "+newTree.toString());
        addIfNovel(extracted, newTreeWithFeatures);
    }
}

From source file:edu.cmu.ark.nlp.sent.SentenceSimplifier.java

License:Open Source License

private void extractComplementClauses(Collection<Question> extracted, Question input) {
    Tree subord;
    String tregexOpStr;/* w ww.  j  av a  2  s.  com*/
    TregexPattern matchPattern;
    TregexMatcher matcher;

    //TODO should also address infinitive complements
    tregexOpStr = "SBAR " + " < (S=sub !< (VP < VBG)) " + //+ //not a participial phrase
            " !> NP|PP " + //not part of a noun phrase or PP (other methods for those)
            " [ $- /^VB.*/=verb | >+(SBAR) (SBAR $- /^VB.*/=verb) ] "; //complement of a VP (follows the verb)

    matchPattern = TregexPatternFactory.getPattern(tregexOpStr);
    matcher = matchPattern.matcher(input.getIntermediateTree());
    while (matcher.find()) {
        Tree newTree = factory.newTreeNode("ROOT", new ArrayList<Tree>());
        subord = matcher.getNode("sub");
        Tree verb = matcher.getNode("verb");
        String verbLemma = QuestionUtil.getLemma(verb.yield().toString(), verb.label().toString());

        if (!verbImpliesComplement(verbLemma)) {
            continue;
        }
        newTree.addChild(subord.deepCopy());

        QuestionUtil.addPeriodIfNeeded(newTree);
        addQuotationMarksIfNeeded(newTree);
        Question newTreeWithFeatures = input.deeperCopy();
        newTreeWithFeatures.setIntermediateTree(newTree);
        if (this.getComputeFeatures)
            newTreeWithFeatures.setFeatureValue("extractedFromFiniteClause", 1.0); //old feature name
        if (this.getComputeFeatures)
            newTreeWithFeatures.setFeatureValue("extractedFromComplementClause", 1.0);
        //if(GlobalProperties.getDebug()) System.err.println("extractComplementClauses: "+newTree.toString());
        addIfNovel(extracted, newTreeWithFeatures);
    }
}

From source file:edu.cmu.ark.nlp.sent.SentenceSimplifier.java

License:Open Source License

/**
 * e.g., John, who is a friend of mine, likes Susan. -> John is a friend of mine.
 * //from  ww  w.j ava2s . co  m
 */
private void extractNonRestrictiveRelativeClauses(Collection<Question> extracted, Question input) {
    String tregexOpStr;
    TregexPattern matchPattern;
    TregexMatcher matcher;
    TregexMatcher matcherclause;

    tregexOpStr = "NP=np < (SBAR=sbar [ < (WHADVP=wherecomp < (WRB < where)) " + " | < (WHNP !< /WP\\$/) "
            + " | < (WHNP=possessive < /WP\\$/)" //John, whose car was
            + " | < (WHPP < IN|TO=preposition) ] $-- NP $- /,/ " + " < S=relclause  !< WHADJP)";

    matchPattern = TregexPatternFactory.getPattern(tregexOpStr);
    matcher = matchPattern.matcher(input.getIntermediateTree());

    //iterate over all the relative clauses in the input
    //and create an output sentence for each one.
    while (matcher.find()) {
        Tree missingArgumentTree = matcher.getNode("np");
        Tree relclause = matcher.getNode("relclause");
        if (missingArgumentTree == null || relclause == null)
            continue;
        missingArgumentTree = missingArgumentTree.deepCopy();
        relclause = relclause.deepCopy();
        Tree possessive = matcher.getNode("possessive");
        Tree sbar = matcher.getNode("sbar").deepCopy();

        makeDeterminerDefinite(missingArgumentTree);

        if (possessive != null) {
            possessive = possessive.deepCopy();
            possessive.removeChild(0);
            String newTree = "(NP (NP " + missingArgumentTree.toString() + " (POS 's))";
            for (int i = 0; i < possessive.numChildren(); i++)
                newTree += possessive.getChild(i).toString() + " ";
            newTree += ")";
            missingArgumentTree = QuestionUtil.readTreeFromString(newTree);
        }

        //remove the relative clause and the commas surrounding it from the missing argument tree
        for (int i = 0; i < missingArgumentTree.numChildren(); i++) {
            if (missingArgumentTree.getChild(i).equals(sbar)) {
                //remove the relative clause
                missingArgumentTree.removeChild(i);
                //remove the comma after the relative clause
                if (i < missingArgumentTree.numChildren()
                        && missingArgumentTree.getChild(i).label().toString().equals(",")) {
                    missingArgumentTree.removeChild(i);
                }
                //remove the comma before the relative clause
                if (i > 0 && missingArgumentTree.getChild(i - 1).label().toString().equals(",")) {
                    missingArgumentTree.removeChild(i - 1);
                    i--;
                }
                i--;
            }
        }

        //put the noun in the clause at the topmost place with an opening for a noun. 
        //Note that this may mess up if there are noun phrase adjuncts like "The man I met Tuesday".

        //specifically: 
        //the parent of the noun can be either a clause (S) as in "The man who met me"
        //or a verb phrase as in "The man who I met".
        //for verb phrases, add the noun to the end since it will be an object.
        //for clauses, add the noun to the beginning since it will be the subject.
        tregexOpStr = "S|VP=newparent !< NP < (VP=verb !< TO !$ TO)";
        matchPattern = TregexPatternFactory.getPattern(tregexOpStr);
        matcherclause = matchPattern.matcher(relclause);
        boolean subjectMovement = true;
        if (!matcherclause.find()) {
            tregexOpStr = "VP=newparent !< VP < /VB.*/=verb !>> (S !< NP) !<< (VP !< VP !< NP)";
            matchPattern = TregexPatternFactory.getPattern(tregexOpStr);
            matcherclause = matchPattern.matcher(relclause);
            subjectMovement = false;
        }

        //reset (so the first match isn't skipped)
        matcherclause = matchPattern.matcher(relclause);

        if (matcherclause.find()) {
            Tree newparenttree = matcherclause.getNode("newparent");
            Tree verbtree = matcherclause.getNode("verb");
            boolean ppRelativeClause = false;

            if (matcher.getNode("wherecomp") != null) {
                String tmp = "(PP (IN at) " + missingArgumentTree.toString() + ")";
                missingArgumentTree = QuestionUtil.readTreeFromString(tmp);
                ppRelativeClause = true;
                subjectMovement = false;
            } else if (matcher.getNode("preposition") != null) {
                String tmp = "(PP (IN " + matcher.getNode("preposition").yield().toString() + ") "
                        + missingArgumentTree.toString() + ")";
                missingArgumentTree = QuestionUtil.readTreeFromString(tmp);
                ppRelativeClause = true;
            }

            if (subjectMovement) { //subject
                newparenttree.addChild(newparenttree.objectIndexOf(verbtree), missingArgumentTree);
            } else { // newparentlabel is VP   
                if (ppRelativeClause)
                    newparenttree.addChild(newparenttree.numChildren(), missingArgumentTree);
                else
                    newparenttree.addChild(newparenttree.objectIndexOf(verbtree) + 1, missingArgumentTree);
            }

            //create a new tree with punctuation
            Tree newTree = factory.newTreeNode("ROOT", new ArrayList<Tree>());
            newTree.addChild(relclause);
            QuestionUtil.addPeriodIfNeeded(newTree);

            //if(GlobalProperties.getDebug()) System.err.println("extractRelativeClauses: "+ newTree.toString());
            addQuotationMarksIfNeeded(newTree);
            Question newTreeWithFeatures = input.deeperCopy();
            newTreeWithFeatures.setIntermediateTree(newTree);
            if (this.getComputeFeatures)
                newTreeWithFeatures.setFeatureValue("extractedFromRelativeClause", 1.0);
            addIfNovel(extracted, newTreeWithFeatures);
        }
    }
}

From source file:edu.cmu.ark.nlp.sent.SentenceSimplifier.java

License:Open Source License

/**
 * e.g., John, hoping to get a good grade, studied. -> John hoped to get a good grade.
 *   Walking to the store, John saw Susan -> John was walking to the store.
 *   //  w  ww  .  j  av  a 2 s  .  c om
 *   NOTE: This method produces false positives for sentences like, 
 *            "Broadly speaking, the project was successful."
 *         where the participial phrase does not modify the subject.
 *   
 * @param extracted
 * @param input
 */
private void extractNounParticipialModifiers(Collection<Question> extracted, Question input) {
    String tregexOpStr;
    TregexPattern matchPattern;
    TregexMatcher matcher;

    tregexOpStr = "ROOT < (S " + " [ << (NP < (NP=subj  $++ (/,/ $+ (VP=modifier <# VBN|VBG|VP=tense )))) " //modifiers that appear after nouns
            + " | < (S !< NP|SBAR < (VP=modifier <# VBN|VBG|VP=tense) $+ (/,/ $+ NP=subj)) " //modifiers before the subject. e.g., Founded by John, the company...
            + " | < (SBAR < (S !< NP|SBAR < (VP=modifier <# VBN|VBG=tense)) $+ (/,/ $+ NP=subj)) " //e.g., While walking to the store, John saw Susan.
            + " | < (PP=modifier !< NP <# VBG=tense $+ (/,/ $+ NP=subj)) ] ) " // e.g., Walking to the store, John saw Susan.
            + " <<# /^VB.*$/=maintense "; //tense determined by top-most verb

    matchPattern = TregexPatternFactory.getPattern(tregexOpStr);
    matcher = matchPattern.matcher(input.getIntermediateTree());
    while (matcher.find()) {
        Tree nountree = matcher.getNode("subj").deepCopy();
        Tree vptree = matcher.getNode("modifier");
        Tree verb = matcher.getNode("tense");
        makeDeterminerDefinite(nountree);

        if (vptree.label().toString().equals("PP"))
            vptree.label().setValue("VP");
        String verbPOS = findTense(matcher.getNode("maintense"));
        if (vptree == null || nountree == null)
            return;

        String newTreeStr;
        if (verb.label().toString().equals("VBG")) {
            //for present partcipials, change the tense to the tense of the main verb
            //e.g., walking to the store -> walked to the store
            String verbLemma = QuestionUtil.getLemma(verb.getChild(0).label().toString(),
                    verb.label().toString());
            String newVerb = this.conjugator.getSurfaceForm(verbLemma, verbPOS);
            int verbIndex = vptree.objectIndexOf(verb);
            vptree = vptree.deepCopy();
            vptree.removeChild(verbIndex);
            vptree.addChild(verbIndex, QuestionUtil.readTreeFromString("(" + verbPOS + " " + newVerb + ")"));
            newTreeStr = "(ROOT (S " + matcher.getNode("subj").toString() + " " + vptree.toString()
                    + " (. .)))";
        } else {
            //for past participials, add a copula
            //e.g., John, exhausted, -> John was exhausted
            //(or for conjunctions, just add the copula---kind of a hack to make the moby dick sentence work out)
            String auxiliary;
            if (verbPOS.equals("VBP") || verbPOS.equals("VBD")) {
                if (isPlural(nountree))
                    auxiliary = "(VBD were)";
                else
                    auxiliary = "(VBD was)";
            } else {
                if (isPlural(nountree))
                    auxiliary = "(VB are)";
                else
                    auxiliary = "(VBZ is)";
            }

            newTreeStr = "(ROOT (S " + nountree + " (VP " + auxiliary + " " + vptree + ") (. .)))";
        }

        Tree newTree = QuestionUtil.readTreeFromString(newTreeStr);
        correctTense(newTree.getChild(0).getChild(0), newTree.getChild(0));
        addQuotationMarksIfNeeded(newTree);

        //if(GlobalProperties.getDebug()) System.err.println("extractNounParticipialModifiers: "+ newTree.toString());
        Question newTreeWithFeatures = input.deeperCopy();
        newTreeWithFeatures.setIntermediateTree(newTree);
        if (this.getComputeFeatures)
            newTreeWithFeatures.setFeatureValue("extractedFromParticipial", 1.0); //old feature name
        if (this.getComputeFeatures)
            newTreeWithFeatures.setFeatureValue("extractedFromNounParticipial", 1.0);
        extracted.add(newTreeWithFeatures);
    }

}

From source file:elkfed.coref.mentions.Mention.java

License:Apache License

/**
 * Takes the sentence tree of this mention and replaces the unlexicalized
 * tokens in the leaves with their discourse ids. Usefull to derive spans
 * based on syntax./*from ww  w .j a  va 2s.  c  om*/
 * 
 * @author samuel
 * @param sentTree
 *            original sentence tree
 * @return Tree with discourse ids in leaves
 */
private Tree getSentenceTreeWithDiscurseIdsInLeaves(Tree sentTree) {
    Tree result = sentTree.deepCopy();
    List<Tree> leaves = result.getLeaves();
    String[] discElements = getSentenceDiscourseElements();
    String[] discIDs = getSentenceDiscourseElementIDs();
    if (discElements.length != leaves.size() || discIDs.length != leaves.size()) {
        throw new RuntimeException("SentenceTree leaves and discourse elements do not match for mention "
                + this.getMarkable().getID());
    }

    /*
     * sometimes tueba-d/z punctuations in the parse tree are out of context
     * appended to the root of the tree hence the corrector part
     * 
     * yv: ... and we need to unescape those words for the semeval2mmax
     * output. duh.
     */
    int corrector = 0;
    for (int i = 0; i < discElements.length; i++) {
        String leafWord = leaves.get(i - corrector).value().toLowerCase();
        leafWord = leafWord.replaceAll("-lrb-", "(");
        leafWord = leafWord.replaceAll("-rrb-", ")");
        leafWord = leafWord.replaceAll("\\\\", "");
        String discWord = discElements[i].replaceAll("\\\\", "");
        if (discWord.equalsIgnoreCase(leafWord)) {
            leaves.get(i - corrector).setValue(discIDs[i]);
        } else {
            System.err.format("non-matching: %s vs %s\n", leafWord, discElements[i]);
            corrector++;
        }
    }
    return result;
}