Example usage for edu.stanford.nlp.trees Tree label

Introduction

In this page you can find the example usage for edu.stanford.nlp.trees Tree label.

Prototype

@Override
public Label label()

Source Link

Document

Returns the label associated with the current node, or null if there is no label.

Usage

From source file:edu.cmu.ark.nlp.sent.SentenceSimplifier.java

License:Open Source License

/**
 * e.g., Lincoln, the 16th president, was tall. -> Lincoln was the 16th president.
 * The meeting, in 1984, was important. -> The meeting was in 1984.
 *///from  ww  w  .  jav  a  2 s . co  m
private void extractAppositives(Collection<Question> extracted, Question input) {
    String tregexOpStr;
    TregexPattern matchPattern;
    TregexMatcher matcher;

    tregexOpStr = "NP < (NP=noun !$-- NP $+ (/,/ $++ NP|PP=appositive !$ CC|CONJP)) "
            + " >> (ROOT <<# /^VB.*/=mainverb) "; //extract the main verb to capture the verb tense
    matchPattern = TregexPatternFactory.getPattern(tregexOpStr);
    matcher = matchPattern.matcher(input.getIntermediateTree());
    while (matcher.find()) {
        Tree verbtree = matcher.getNode("mainverb");
        Tree nountree = matcher.getNode("noun").deepCopy();
        Tree appositivetree = matcher.getNode("appositive");

        makeDeterminerDefinite(nountree);

        //if both are proper nouns, do not extract because this is not an appositive(e.g., "Pittsburgh, PA")
        /*if(nountree.headPreTerminal(this.hf).label().toString().equals("NNP")
              && appositivetree.headPreTerminal(this.hf).label().toString().equals("NNP"))
        {
           continue;
        }*/

        //make a new tree for a copula sentence with the noun and appositive
        String pos = verbtree.label().toString();
        String copula;
        if (pos.equals("VBD")) {
            if (isPlural(nountree)) {
                copula = "(VBD were)";
            } else {
                copula = "(VBD was)";
            }
        } else {
            if (isPlural(nountree)) {
                copula = "(VBD are)";
            } else {
                copula = "(VBD is)";
            }
        }
        Tree newTree = QuestionUtil.readTreeFromString(
                "(ROOT (S " + nountree + " (VP " + copula + " " + appositivetree + ") (. .)))");

        addQuotationMarksIfNeeded(newTree);
        //if(GlobalProperties.getDebug()) System.err.println("extractAppositives: "+ newTree.toString());
        Question newTreeWithFeatures = input.deeperCopy();
        newTreeWithFeatures.setIntermediateTree(newTree);
        if (this.getComputeFeatures)
            newTreeWithFeatures.setFeatureValue("extractedFromAppositive", 1.0);

        addIfNovel(extracted, newTreeWithFeatures);

    }
}

From source file:edu.cmu.ark.nlp.sent.SentenceSimplifier.java

License:Open Source License

/**
 * e.g., John, hoping to get a good grade, studied. -> John hoped to get a good grade.
 *   Walking to the store, John saw Susan -> John was walking to the store.
 *   //  ww  w  .  j  a v a  2s  .  c o  m
 *   NOTE: This method produces false positives for sentences like, 
 *            "Broadly speaking, the project was successful."
 *         where the participial phrase does not modify the subject.
 *   
 * @param extracted
 * @param input
 */
private void extractNounParticipialModifiers(Collection<Question> extracted, Question input) {
    String tregexOpStr;
    TregexPattern matchPattern;
    TregexMatcher matcher;

    tregexOpStr = "ROOT < (S " + " [ << (NP < (NP=subj  $++ (/,/ $+ (VP=modifier <# VBN|VBG|VP=tense )))) " //modifiers that appear after nouns
            + " | < (S !< NP|SBAR < (VP=modifier <# VBN|VBG|VP=tense) $+ (/,/ $+ NP=subj)) " //modifiers before the subject. e.g., Founded by John, the company...
            + " | < (SBAR < (S !< NP|SBAR < (VP=modifier <# VBN|VBG=tense)) $+ (/,/ $+ NP=subj)) " //e.g., While walking to the store, John saw Susan.
            + " | < (PP=modifier !< NP <# VBG=tense $+ (/,/ $+ NP=subj)) ] ) " // e.g., Walking to the store, John saw Susan.
            + " <<# /^VB.*$/=maintense "; //tense determined by top-most verb

    matchPattern = TregexPatternFactory.getPattern(tregexOpStr);
    matcher = matchPattern.matcher(input.getIntermediateTree());
    while (matcher.find()) {
        Tree nountree = matcher.getNode("subj").deepCopy();
        Tree vptree = matcher.getNode("modifier");
        Tree verb = matcher.getNode("tense");
        makeDeterminerDefinite(nountree);

        if (vptree.label().toString().equals("PP"))
            vptree.label().setValue("VP");
        String verbPOS = findTense(matcher.getNode("maintense"));
        if (vptree == null || nountree == null)
            return;

        String newTreeStr;
        if (verb.label().toString().equals("VBG")) {
            //for present partcipials, change the tense to the tense of the main verb
            //e.g., walking to the store -> walked to the store
            String verbLemma = QuestionUtil.getLemma(verb.getChild(0).label().toString(),
                    verb.label().toString());
            String newVerb = this.conjugator.getSurfaceForm(verbLemma, verbPOS);
            int verbIndex = vptree.objectIndexOf(verb);
            vptree = vptree.deepCopy();
            vptree.removeChild(verbIndex);
            vptree.addChild(verbIndex, QuestionUtil.readTreeFromString("(" + verbPOS + " " + newVerb + ")"));
            newTreeStr = "(ROOT (S " + matcher.getNode("subj").toString() + " " + vptree.toString()
                    + " (. .)))";
        } else {
            //for past participials, add a copula
            //e.g., John, exhausted, -> John was exhausted
            //(or for conjunctions, just add the copula---kind of a hack to make the moby dick sentence work out)
            String auxiliary;
            if (verbPOS.equals("VBP") || verbPOS.equals("VBD")) {
                if (isPlural(nountree))
                    auxiliary = "(VBD were)";
                else
                    auxiliary = "(VBD was)";
            } else {
                if (isPlural(nountree))
                    auxiliary = "(VB are)";
                else
                    auxiliary = "(VBZ is)";
            }

            newTreeStr = "(ROOT (S " + nountree + " (VP " + auxiliary + " " + vptree + ") (. .)))";
        }

        Tree newTree = QuestionUtil.readTreeFromString(newTreeStr);
        correctTense(newTree.getChild(0).getChild(0), newTree.getChild(0));
        addQuotationMarksIfNeeded(newTree);

        //if(GlobalProperties.getDebug()) System.err.println("extractNounParticipialModifiers: "+ newTree.toString());
        Question newTreeWithFeatures = input.deeperCopy();
        newTreeWithFeatures.setIntermediateTree(newTree);
        if (this.getComputeFeatures)
            newTreeWithFeatures.setFeatureValue("extractedFromParticipial", 1.0); //old feature name
        if (this.getComputeFeatures)
            newTreeWithFeatures.setFeatureValue("extractedFromNounParticipial", 1.0);
        extracted.add(newTreeWithFeatures);
    }

}

From source file:edu.cmu.ark.Question.java

License:Open Source License

public List<Tree> findLogicalWordsAboveIntermediateTree() {
    List<Tree> res = new ArrayList<Tree>();

    Tree pred = intermediateTree.getChild(0).headPreTerminal(AnalysisUtilities.getInstance().getHeadFinder());
    String lemma = AnalysisUtilities.getInstance().getLemma(pred.yield().toString(), pred.label().toString());

    String tregexOpStr;//www . j a  v  a 2  s  .  co m
    TregexPattern matchPattern;
    TregexMatcher matcher;

    Tree sourcePred = null;
    for (Tree leaf : sourceTree.getLeaves()) {
        Tree tmp = leaf.parent(sourceTree);
        String sourceLemma = AnalysisUtilities.getInstance().getLemma(leaf.label().toString(),
                tmp.label().toString());
        if (sourceLemma.equals(lemma)) {
            sourcePred = tmp;
            break;
        }
    }

    tregexOpStr = "RB|VB|VBD|VBP|VBZ|IN|MD|WRB|WDT|CC=command";
    matchPattern = TregexPatternFactory.getPattern(tregexOpStr);
    matcher = matchPattern.matcher(sourceTree);

    Tree command;
    while (matcher.find() && sourcePred != null) {
        command = matcher.getNode("command");
        if (AnalysisUtilities.cCommands(sourceTree, command, sourcePred)
                && command.parent(sourceTree) != sourcePred.parent(sourceTree)) {
            res.add(command);
        }
    }

    return res;
}

From source file:edu.cmu.ark.QuestionTransducer.java

License:Open Source License

/**
 * Note: It would probably be easier to use the Tregex operation to find the nodes
 * and then change the labels directly rather than writing a Tsurgeon operation.
 * But, when I wrote the original code, I used Tsurgeon.  Probably not worth refactoring.
 *
 * @param inputTree// w  w  w .  j a va 2s  .  c o m
 * @param tregexOpStr
 */
private void markNodesAsUnmovableUsingPattern(Tree inputTree, String tregexOpStr) {
    TregexPattern matchPattern;
    TregexMatcher matcher;
    String label;
    matchPattern = TregexPatternFactory.getPattern(tregexOpStr);
    matcher = matchPattern.matcher(inputTree);
    Tree tmp;

    while (matcher.find()) {
        tmp = matcher.getNode("unmovable");
        label = tmp.label().toString();
        tmp.label().setValue("UNMOVABLE-" + label);
    }
}

From source file:edu.cmu.ark.QuestionTransducer.java

License:Open Source License

/**
 * This method decomposes the main verb of the sentence
 * for yes-no questions and WH questions where the answer
 * phrase is not the subject./*from   w  ww  . j a va  2s .  co m*/
 *
 * e.g., I met John -> I did meet John.
 * (which would later become "Who did I meet?")
 *
 */
private Tree decomposePredicate(Tree inputTree) {
    Tree copyTree = inputTree.deeperCopy();

    List<Pair<TregexPattern, TsurgeonPattern>> ops = new ArrayList<Pair<TregexPattern, TsurgeonPattern>>();
    String tregexOpStr;
    List<TsurgeonPattern> ps = new ArrayList<TsurgeonPattern>();
    TregexPattern matchPattern;
    TsurgeonPattern p;
    TregexMatcher matcher;
    Tree tmpNode;
    //tregexOpStr = "ROOT < (S=mainclause < (VP=predphrase < /VB.?/=tensedverb !< (VP < /VB.?/)))";
    //tregexOpStr = "ROOT < (S=mainclause < (VP=predphrase < (/VB.?/=tensedverb !< is|was|were|am|are|has|have|had|do|does|did)))";

    //This rather complex rule identifies predicates to decompose.
    //There are two cases, separated by a disjunction.
    //One could break it apart into separate rules to make it simpler...
    //
    //The first part of the disjunction
    //(i.e., < (/VB.?/=tensedverb !< is|was|were|am|are|has|have|had|do|does|did) )
    //is for handling basic sentences
    //(e.g., John bought an apple -> What did John buy?),
    //sentences with auxiliaries
    //(e.g., John had bought an apple -> Had John bought an apple?),
    //and sentences with participial phrases
    //(e.g., John seemed finished with the apple -> What did John seem finished with?).
    //
    //The second part of the disjunction
    //(i.e., < /VB.?/=tensedverb !< VP )
    //is for handling sentences that have predicates
    //that can also be auxiliaries (e.g., I have a book).
    //In these cases, we do want to decompose have, has, had, etc.
    //(e.g., What did I have?)
    tregexOpStr = "ROOT < (S=mainclause < (VP=predphrase [ < (/VB.?/=tensedverb !< is|was|were|am|are|has|have|had|do|does|did) | < /VB.?/=tensedverb !< VP ]))";

    matchPattern = TregexPatternFactory.getPattern(tregexOpStr);
    matcher = matchPattern.matcher(copyTree);
    if (matcher.find()) {
        Tree subtree = matcher.getNode("tensedverb");
        String lemma = AnalysisUtilities.getInstance().getLemma(subtree.getChild(0).label().toString(),
                subtree.label().toString());
        String aux = getAuxiliarySubtree(subtree);

        if (!lemma.equals("be")) {
            ps.add(Tsurgeon.parseOperation("replace predphrase (MAINVP=newpred PLACEHOLDER)"));
            ps.add(Tsurgeon.parseOperation("insert predphrase >-1 newpred"));
            ps.add(Tsurgeon.parseOperation("insert (VBLEMMA PLACEHOLDER) $+ tensedverb"));
            ps.add(Tsurgeon.parseOperation("delete tensedverb"));
            p = Tsurgeon.collectOperations(ps);
            ops.add(new Pair<TregexPattern, TsurgeonPattern>(matchPattern, p));
            Tsurgeon.processPatternsOnTree(ops, copyTree);
            matchPattern = TregexPatternFactory.getPattern("MAINVP=mainvp");
            matcher = matchPattern.matcher(copyTree);
            matcher.find();
            tmpNode = matcher.getNode("mainvp");
            tmpNode.removeChild(0);
            tmpNode.label().setValue("VP");
            tmpNode.addChild(0, AnalysisUtilities.getInstance().readTreeFromString(aux));

            matchPattern = TregexPatternFactory.getPattern("VBLEMMA=vblemma");
            matcher = matchPattern.matcher(copyTree);
            matcher.find();
            tmpNode = matcher.getNode("vblemma");
            tmpNode.removeChild(0);
            tmpNode.label().setValue("VB");
            tmpNode.addChild(0, AnalysisUtilities.getInstance().readTreeFromString(lemma));
        }
    }

    if (GlobalProperties.getDebug())
        System.err.println("decomposePredicate: " + copyTree.toString());
    return copyTree;
}

From source file:edu.cmu.ark.QuestionTransducer.java

License:Open Source License

/**
 * Returns the singular present tense form of a tensed verb.
 * This only affects the output when generating from sentences where
 * first and second person pronouns are the subject.
 *
 * E.g.,//from w  ww. java2s .c om
 * Affects:
 * I walk -> Who walks? (rather than, Who walk?)
 *
 * Does not affect:
 * He walks -> Who walks?
 *
 */
private String getSingularFormSubtree(Tree tensedVerbSubtree) {
    String res = "";
    String lemma = AnalysisUtilities.getInstance().getLemma(tensedVerbSubtree.getChild(0).label().toString(),
            tensedVerbSubtree.label().toString());
    String pos = tensedVerbSubtree.value();
    if (pos.equals("VBD")) {
        res = tensedVerbSubtree.toString();
    } else {
        res = "(VBZ " + AnalysisUtilities.getInstance().getSurfaceForm(lemma, "VBZ") + ")";
    }

    return res;
}

From source file:edu.cmu.ark.QuestionTransducer.java

License:Open Source License

/**
 * Also mark UNMOVABLE phrases as possible answers
 *
 * @param inputTree//from   www. ja v a 2s  .  c om
 * @return
 */
private Tree markPossibleAnswerPhrasesUnmovable(Tree inputTree) {
    Tree copyTree = inputTree.deeperCopy();
    numWHPhrasesUnmovable = 0;

    List<TsurgeonPattern> ps = new ArrayList<TsurgeonPattern>();
    String tregexOpStr;
    TregexPattern matchPattern;
    TregexMatcher matcher;
    Tree tmp;

    //find and mark the main clause subject
    tregexOpStr = "ROOT < (S < (NP|SBAR=subj $+ /,/ !$++ NP|SBAR))";
    ps.add(Tsurgeon.parseOperation("relabel subj NP-0"));
    matchPattern = TregexPatternFactory.getPattern(tregexOpStr);
    matcher = matchPattern.matcher(copyTree);
    if (matcher.find()) {
        tmp = matcher.getNode("subj");
        tmp.label().setValue(tmp.label().toString() + "-0");
        numWHPhrasesUnmovable++;
    }

    //noun phrases
    tregexOpStr = "ROOT=root << UNMOVABLE-NP=np";
    matchPattern = TregexPatternFactory.getPattern(tregexOpStr);
    matcher = matchPattern.matcher(copyTree);
    while (matcher.find()) {
        tmp = matcher.getNode("np");
        tmp.label().setValue(tmp.label().toString() + "-" + numWHPhrasesUnmovable);
        numWHPhrasesUnmovable++;
    }

    if (GlobalProperties.getDebug())
        System.err.println("markPossibleAnswerPhrases: " + copyTree.toString());
    return copyTree;
}

From source file:edu.cmu.ark.QuestionTransducer.java

License:Open Source License

/**
 * Marks possible answer phrase nodes with indexes for later processing.
 * This step might be easier with the Stanford Parser API's Tree class methods
 * than with Tsurgeon...//from   w  w w .  j av a  2s .  co  m
 *
 * @param inputTree
 * @return
 */
private Tree markPossibleAnswerPhrases(Tree inputTree) {
    Tree copyTree = inputTree.deeperCopy();
    numWHPhrases = 0;

    List<TsurgeonPattern> ps = new ArrayList<TsurgeonPattern>();
    String tregexOpStr;
    TregexPattern matchPattern;
    TregexMatcher matcher;
    Tree tmp;

    //find and mark the main clause subject
    tregexOpStr = "ROOT < (S < (NP|SBAR=subj $+ /,/ !$++ NP|SBAR))";
    ps.add(Tsurgeon.parseOperation("relabel subj NP-0"));
    matchPattern = TregexPatternFactory.getPattern(tregexOpStr);
    matcher = matchPattern.matcher(copyTree);
    if (matcher.find()) {
        tmp = matcher.getNode("subj");
        tmp.label().setValue(tmp.label().toString() + "-0");
        numWHPhrases++;
    }

    //noun phrases
    tregexOpStr = "ROOT=root << NP|PP|SBAR=np";
    matchPattern = TregexPatternFactory.getPattern(tregexOpStr);
    matcher = matchPattern.matcher(copyTree);
    while (matcher.find()) {
        tmp = matcher.getNode("np");
        tmp.label().setValue(tmp.label().toString() + "-" + numWHPhrases);
        numWHPhrases++;
    }

    if (GlobalProperties.getDebug())
        System.err.println("markPossibleAnswerPhrases: " + copyTree.toString());
    return copyTree;
}

From source file:edu.cmu.ark.SentenceSimplifier.java

License:Open Source License

/**
 * e.g., John and Mary like Bill.  -> John LIKES Bill.  Mary LIKES Bill.
 * John and I like Bill -> John LIKES Bill.  I LIKE Bill.
 * John and I are old. -> I IS old. John IS old.
 *///  w ww . j av a  2 s.  c o m
private void correctTense(Tree subject, Tree clause) {
    int tmpIndex;
    //correct verb tense when modifying subjects
    for (Tree uncle : clause.getChildrenAsList()) {
        String newVerbPOS = null;
        Tree verbPreterminal = null;
        boolean needToModifyVerb = false;
        //if the node is a subject (i.e., its uncle is a VP), then check
        //to see if its tense needs to be changed
        String headPOS = subject.headPreTerminal(AnalysisUtilities.getInstance().getHeadFinder()).label()
                .toString();
        if (uncle.label().toString().equals("VP") && !headPOS.endsWith("S")) {
            verbPreterminal = uncle.headPreTerminal(AnalysisUtilities.getInstance().getHeadFinder());
            //original main verb was plural but the conjoined subject word is singular
            //e.g., John (and Mary) like Bill.  -> John like Bill.
            if ((verbPreterminal.label().toString().equals("VB")
                    || verbPreterminal.label().toString().equals("VBP"))) { //the parser confuses VBP with VB
                if (subject.yield().toString().equals("I") || subject.yield().toString().equals("you")) {
                    newVerbPOS = "VBP";
                } else {
                    newVerbPOS = "VBZ";
                }
                needToModifyVerb = true;
            } else if (verbPreterminal.label().toString().equals("VBD")) {
                newVerbPOS = "VBD";
                needToModifyVerb = true;
            }
        }
        //if needed, change the tense of the verb
        if (needToModifyVerb) {
            String verbLemma = AnalysisUtilities.getInstance().getLemma(
                    verbPreterminal.getChild(0).label().toString(), verbPreterminal.label().toString());
            String newVerb;
            //special cases
            if (verbLemma.equals("be") && newVerbPOS.equals("VBD")) {
                if (subject.label().toString().endsWith("S"))
                    newVerb = "were";
                else
                    newVerb = "was";
            } else if (verbLemma.equals("be") && subject.yield().toString().equals("I")
                    && newVerbPOS.equals("VBP")) {
                newVerb = "am";
            } else { //default
                newVerb = AnalysisUtilities.getInstance().getSurfaceForm(verbLemma, newVerbPOS);
            }
            tmpIndex = verbPreterminal.parent(uncle).indexOf(verbPreterminal);
            Tree verbParent = verbPreterminal.parent(uncle);
            verbParent.removeChild(tmpIndex);
            verbParent.addChild(tmpIndex,
                    AnalysisUtilities.getInstance().readTreeFromString("(" + newVerbPOS + " " + newVerb + ")"));
        }
    }
}

From source file:edu.cmu.ark.SentenceSimplifier.java

License:Open Source License

private void removeConjoinedSiblingsHelper(Tree copy, int childindex) {
    if (GlobalProperties.getDebug())
        System.err.println("removeConjoinedSiblingsHelper: " + copy.toString());
    Tree child = copy.getNodeNumber(childindex);
    Tree parent = child.parent(copy);/*from   w  w  w .jav a  2 s  . c o  m*/
    Tree gparent = parent.parent(copy);

    int parentIdx = gparent.indexOf(parent);

    //By an annoying PTB convention, some verb phrase conjunctions 
    //can conjoin two verb preterminals under a VP,
    //rather than only allowing VP nodes to be conjoined.
    //e.g., John walked and played.
    //So, we add an extra VP node in between if necessary
    if (child.label().toString().startsWith("VB")) {
        gparent.removeChild(parentIdx);
        Tree newTree = factory.newTreeNode("VP", new ArrayList<Tree>());
        newTree.addChild(child);
        gparent.addChild(parentIdx, newTree);
    } else {
        gparent.setChild(parentIdx, child);
    }
}