Example usage for edu.stanford.nlp.trees Tree label

Introduction

In this page you can find the example usage for edu.stanford.nlp.trees Tree label.

Prototype

@Override
public Label label()

Source Link

Document

Returns the label associated with the current node, or null if there is no label.

Usage

From source file:edu.cmu.ark.AnalysisUtilities.java

License:Open Source License

public static void downcaseFirstToken(Tree inputTree) {
    Tree firstWordTree = inputTree.getLeaves().get(0);
    if (firstWordTree == null)
        return;// ww w .ja  va  2 s.c  o m
    Tree preterm = firstWordTree.parent(inputTree);
    String firstWord = firstWordTree.yield().toString();
    if (!preterm.label().toString().matches("^NNP.*") && !firstWord.equals("I")) {
        //if(firstWord.indexOf('-') == -1 && !firstWord.equals("I")){
        firstWord = firstWord.substring(0, 1).toLowerCase() + firstWord.substring(1);
        firstWordTree.label().setValue(firstWord);
    }

    //if(QuestionTransducer.DEBUG) System.err.println("downcaseFirstToken: "+inputTree.toString());
}

From source file:edu.cmu.ark.AnalysisUtilities.java

License:Open Source License

public static void upcaseFirstToken(Tree inputTree) {
    Tree firstWordTree = inputTree.getLeaves().get(0);
    if (firstWordTree == null)
        return;// w  w w. j  a  va  2  s .  c o m

    String firstWord = firstWordTree.yield().toString();
    firstWord = firstWord.substring(0, 1).toUpperCase() + firstWord.substring(1);
    firstWordTree.label().setValue(firstWord);

    //if(QuestionTransducer.DEBUG) System.err.println("upcaseFirstToken: "+inputTree.toString());
}

From source file:edu.cmu.ark.AnalysisUtilities.java

License:Open Source License

/**
 * Remove traces and non-terminal decorations (e.g., "-SUBJ" in "NP-SUBJ") from a Penn Treebank-style tree.
 *
 * @param inputTree/*from w  w  w.j  av  a2 s  .co  m*/
 */
public void normalizeTree(Tree inputTree) {
    inputTree.label().setFromString("ROOT");

    List<Pair<TregexPattern, TsurgeonPattern>> ops = new ArrayList<Pair<TregexPattern, TsurgeonPattern>>();
    List<TsurgeonPattern> ps = new ArrayList<TsurgeonPattern>();
    String tregexOpStr;
    TregexPattern matchPattern;
    TsurgeonPattern p;
    TregexMatcher matcher;

    tregexOpStr = "/\\-NONE\\-/=emptynode";
    matchPattern = TregexPatternFactory.getPattern(tregexOpStr);
    matcher = matchPattern.matcher(inputTree);
    ps.add(Tsurgeon.parseOperation("prune emptynode"));
    matchPattern = TregexPatternFactory.getPattern(tregexOpStr);
    p = Tsurgeon.collectOperations(ps);
    ops.add(new Pair<TregexPattern, TsurgeonPattern>(matchPattern, p));
    Tsurgeon.processPatternsOnTree(ops, inputTree);

    Label nonterminalLabel;

    tregexOpStr = "/.+\\-.+/=nonterminal < __";
    matchPattern = TregexPatternFactory.getPattern(tregexOpStr);
    matcher = matchPattern.matcher(inputTree);
    while (matcher.find()) {
        nonterminalLabel = matcher.getNode("nonterminal");
        if (nonterminalLabel == null)
            continue;
        nonterminalLabel.setFromString(tlp.basicCategory(nonterminalLabel.value()));
    }

}

From source file:edu.cmu.ark.AnalysisUtilities.java

License:Open Source License

public String getContentWords(String sentence) {
    String content = "";
    ParseResult result = this.parseSentence(sentence);
    List<Tree> leaves = result.parse.getLeaves();
    for (int i = 0; i < leaves.size(); i++) {
        String word = leaves.get(i).label().toString();
        Tree preterm = leaves.get(i).parent(result.parse);
        String pos = preterm.label().toString();
        if (pos.equals("DT") || pos.equals("CD") || pos.equals("IN"))
            // we don't want determiner, number and prepositions.
            continue;
        content += " " + word;
    }/*from w  ww. ja  v a 2  s .  c  o  m*/
    if (content.length() > 0)
        return content.substring(1);
    else
        return sentence;
}

From source file:edu.cmu.ark.nlp.question.Question.java

License:Open Source License

public List<Tree> findLogicalWordsAboveIntermediateTree() {
    List<Tree> res = new ArrayList<Tree>();

    Tree pred = intermediateTree.getChild(0).headPreTerminal(this.hf);
    String lemma = QuestionUtil.getLemma(pred.yield().toString(), pred.label().toString());

    String tregexOpStr;//from   ww  w .  j  av  a 2  s .c om
    TregexPattern matchPattern;
    TregexMatcher matcher;

    Tree sourcePred = null;
    for (Tree leaf : sourceTree.getLeaves()) {
        Tree tmp = leaf.parent(sourceTree);
        String sourceLemma = QuestionUtil.getLemma(leaf.label().toString(), tmp.label().toString());
        if (sourceLemma.equals(lemma)) {
            sourcePred = tmp;
            break;
        }
    }

    tregexOpStr = "RB|VB|VBD|VBP|VBZ|IN|MD|WRB|WDT|CC=command";
    matchPattern = TregexPatternFactory.getPattern(tregexOpStr);
    matcher = matchPattern.matcher(sourceTree);

    Tree command;
    while (matcher.find() && sourcePred != null) {
        command = matcher.getNode("command");
        if (QuestionUtil.cCommands(sourceTree, command, sourcePred)
                && command.parent(sourceTree) != sourcePred.parent(sourceTree)) {
            res.add(command);
        }
    }

    return res;
}

From source file:edu.cmu.ark.nlp.sent.SentenceSimplifier.java

License:Open Source License

/**
 * e.g., John and Mary like Bill.  -> John LIKES Bill.  Mary LIKES Bill.
 * John and I like Bill -> John LIKES Bill.  I LIKE Bill.
 * John and I are old. -> I IS old. John IS old.
 *//*from  w  w w.j a v  a  2s .co  m*/
private void correctTense(Tree subject, Tree clause) {
    int tmpIndex;
    //correct verb tense when modifying subjects
    for (Tree uncle : clause.getChildrenAsList()) {
        String newVerbPOS = null;
        Tree verbPreterminal = null;
        boolean needToModifyVerb = false;
        //if the node is a subject (i.e., its uncle is a VP), then check
        //to see if its tense needs to be changed
        String headPOS = subject.headPreTerminal(this.hf).label().toString();
        if (uncle.label().toString().equals("VP") && !headPOS.endsWith("S")) {
            verbPreterminal = uncle.headPreTerminal(this.hf);
            //original main verb was plural but the conjoined subject word is singular
            //e.g., John (and Mary) like Bill.  -> John like Bill.
            if ((verbPreterminal.label().toString().equals("VB")
                    || verbPreterminal.label().toString().equals("VBP"))) { //the parser confuses VBP with VB
                if (subject.yield().toString().equals("I") || subject.yield().toString().equals("you")) {
                    newVerbPOS = "VBP";
                } else {
                    newVerbPOS = "VBZ";
                }
                needToModifyVerb = true;
            } else if (verbPreterminal.label().toString().equals("VBD")) {
                newVerbPOS = "VBD";
                needToModifyVerb = true;
            }
        }
        //if needed, change the tense of the verb
        if (needToModifyVerb) {
            String verbLemma = QuestionUtil.getLemma(verbPreterminal.getChild(0).label().toString(),
                    verbPreterminal.label().toString());
            String newVerb;
            //special cases
            if (verbLemma.equals("be") && newVerbPOS.equals("VBD")) {
                if (subject.label().toString().endsWith("S"))
                    newVerb = "were";
                else
                    newVerb = "was";
            } else if (verbLemma.equals("be") && subject.yield().toString().equals("I")
                    && newVerbPOS.equals("VBP")) {
                newVerb = "am";
            } else { //default
                newVerb = this.conjugator.getSurfaceForm(verbLemma, newVerbPOS);
            }
            tmpIndex = verbPreterminal.parent(uncle).objectIndexOf(verbPreterminal);
            Tree verbParent = verbPreterminal.parent(uncle);
            verbParent.removeChild(tmpIndex);
            verbParent.addChild(tmpIndex,
                    QuestionUtil.readTreeFromString("(" + newVerbPOS + " " + newVerb + ")"));
        }
    }
}

From source file:edu.cmu.ark.nlp.sent.SentenceSimplifier.java

License:Open Source License

private void removeConjoinedSiblingsHelper(Tree copy, int childindex) {
    //if(GlobalProperties.getDebug()) System.err.println("removeConjoinedSiblingsHelper: "+copy.toString());
    Tree child = copy.getNodeNumber(childindex);
    Tree parent = child.parent(copy);//from w  w  w.  j  a  va2 s .  c  om
    Tree gparent = parent.parent(copy);

    int parentIdx = gparent.objectIndexOf(parent);

    //By an annoying PTB convention, some verb phrase conjunctions 
    //can conjoin two verb preterminals under a VP,
    //rather than only allowing VP nodes to be conjoined.
    //e.g., John walked and played.
    //So, we add an extra VP node in between if necessary
    if (child.label().toString().startsWith("VB")) {
        gparent.removeChild(parentIdx);
        Tree newTree = factory.newTreeNode("VP", new ArrayList<Tree>());
        newTree.addChild(child);
        gparent.addChild(parentIdx, newTree);
    } else {
        gparent.setChild(parentIdx, child);
    }
}

From source file:edu.cmu.ark.nlp.sent.SentenceSimplifier.java

License:Open Source License

/**
 * //from w  ww  .  j a  va  2 s .c om
 * John studied, hoping to get a good grade. -> John hoped to get a good grade.
 * 
 * @param extracted
 * @param input
 */
private void extractVerbParticipialModifiers(Collection<Question> extracted, Question input) {
    String tregexOpStr;
    TregexPattern matchPattern;
    TregexMatcher matcher;

    tregexOpStr = "S=sub $- /,/ !< NP < (VP=participial < VBG=verb) " + " >+(VP) (S|SINV < NP=subj) "
            + " >> (ROOT <<# /VB.*/=tense) "; //tense determined by top-most verb

    matchPattern = TregexPatternFactory.getPattern(tregexOpStr);
    matcher = matchPattern.matcher(input.getIntermediateTree());
    while (matcher.find()) {
        String verbPOS = findTense(matcher.getNode("tense"));
        Tree p = matcher.getNode("participial").deepCopy();
        Tree verb = matcher.getNode("verb");
        String verbLemma = QuestionUtil.getLemma(verb.getChild(0).label().toString(), verb.label().toString());
        String newVerb = this.conjugator.getSurfaceForm(verbLemma, verbPOS);
        int verbIndex = p.objectIndexOf(verb);
        p.removeChild(verbIndex);
        p.addChild(verbIndex, QuestionUtil.readTreeFromString("(" + verbPOS + " " + newVerb + ")"));
        String treeStr = "(ROOT (S " + matcher.getNode("subj").toString() + " " + p.toString() + " (. .)))";
        Tree newTree = QuestionUtil.readTreeFromString(treeStr);
        correctTense(newTree.getChild(0).getChild(0), newTree.getChild(0));

        addQuotationMarksIfNeeded(newTree);
        Question newTreeWithFeatures = input.deeperCopy();
        newTreeWithFeatures.setIntermediateTree(newTree);
        if (this.getComputeFeatures)
            newTreeWithFeatures.setFeatureValue("extractedFromParticipial", 1.0);
        if (this.getComputeFeatures)
            newTreeWithFeatures.setFeatureValue("extractedFromVerbParticipial", 1.0);
        if (this.getComputeFeatures)
            System.err.println("extractVerbParticipialModifiers: " + newTree.toString());
        addIfNovel(extracted, newTreeWithFeatures);
    }
}

From source file:edu.cmu.ark.nlp.sent.SentenceSimplifier.java

License:Open Source License

private String findTense(Tree node) {
    if (node.label().equals("MD")) {
        if (node.yield().toString().matches("^(would|could)$")) {
            return "VBD";
        }//from  w  w w.  j a  va2 s.co  m
    }
    return node.label().toString();
}

From source file:edu.cmu.ark.nlp.sent.SentenceSimplifier.java

License:Open Source License

private void extractComplementClauses(Collection<Question> extracted, Question input) {
    Tree subord;/*from w  w  w . ja  v  a 2 s . c  o m*/
    String tregexOpStr;
    TregexPattern matchPattern;
    TregexMatcher matcher;

    //TODO should also address infinitive complements
    tregexOpStr = "SBAR " + " < (S=sub !< (VP < VBG)) " + //+ //not a participial phrase
            " !> NP|PP " + //not part of a noun phrase or PP (other methods for those)
            " [ $- /^VB.*/=verb | >+(SBAR) (SBAR $- /^VB.*/=verb) ] "; //complement of a VP (follows the verb)

    matchPattern = TregexPatternFactory.getPattern(tregexOpStr);
    matcher = matchPattern.matcher(input.getIntermediateTree());
    while (matcher.find()) {
        Tree newTree = factory.newTreeNode("ROOT", new ArrayList<Tree>());
        subord = matcher.getNode("sub");
        Tree verb = matcher.getNode("verb");
        String verbLemma = QuestionUtil.getLemma(verb.yield().toString(), verb.label().toString());

        if (!verbImpliesComplement(verbLemma)) {
            continue;
        }
        newTree.addChild(subord.deepCopy());

        QuestionUtil.addPeriodIfNeeded(newTree);
        addQuotationMarksIfNeeded(newTree);
        Question newTreeWithFeatures = input.deeperCopy();
        newTreeWithFeatures.setIntermediateTree(newTree);
        if (this.getComputeFeatures)
            newTreeWithFeatures.setFeatureValue("extractedFromFiniteClause", 1.0); //old feature name
        if (this.getComputeFeatures)
            newTreeWithFeatures.setFeatureValue("extractedFromComplementClause", 1.0);
        //if(GlobalProperties.getDebug()) System.err.println("extractComplementClauses: "+newTree.toString());
        addIfNovel(extracted, newTreeWithFeatures);
    }
}