Example usage for edu.stanford.nlp.trees Tree nodeString

Introduction

In this page you can find the example usage for edu.stanford.nlp.trees Tree nodeString.

Prototype

public String nodeString()

Source Link

Document

Returns the value of the node's label as a String.

Usage

From source file:Ceist.CeistView.java

License:Open Source License

/**
 * Displays the match results in a table with the matched parts
 * formatted./*from   w  ww.  j  a  va  2s  .co m*/
 *
 * @param m the matcher containing the match results
 * @param matchedTree the tree which was matched
 * @param showTagged whether to show POS tags or not
 * @return the HTML to be displayed in the table row
 */
private String[] getMatcherTableRow(TregexMatcher m, Tree matchedTree, boolean showTagged) {
    //List<Tree> allMatches = new ArrayList<Tree>();

    // Find matches for templates
    String strQuestion = QuestionTemplate.getQuestionString(m, txtQuestionTemplate.getText());
    String strAnswer = AnswerTemplate.getAnswerString(m, txtAnswerTemplate.getText());

    // Display the full tree in which the match was found
    String strMatchAll = "<html>";
    String lastRef = "";

    for (Tree t : matchedTree.getLeaves()) {
        String nodeValue = t.nodeString();

        if (nodeValue.startsWith("{Q")) { // This is a match for the question string
            String ref = nodeValue.substring(2, nodeValue.indexOf("}"));
            nodeValue = nodeValue.substring(nodeValue.indexOf("}") + 1);
            t.setValue(nodeValue);

            if (!ref.equals(lastRef))
                lastRef = ref;
            else
                ref = "";

            if (!showTagged)
                strMatchAll += "<sup>" + ref + "</sup><b><font color=green>" + nodeValue + "</font></b> ";
            else
                strMatchAll += "<sup>" + ref + "</sup><b><font color=green>" + nodeValue
                        + "</font><font color=gray>/" + t.parent(matchedTree).nodeString() + "</font></b> ";

        } else if (nodeValue.startsWith("{A")) { // This is a match for the answer string
            String ref = nodeValue.substring(2, nodeValue.indexOf("}"));
            nodeValue = nodeValue.substring(nodeValue.indexOf("}") + 1);
            t.setValue(nodeValue);

            if (!ref.equals(lastRef))
                lastRef = ref;
            else
                ref = "";

            if (!showTagged)
                strMatchAll += "<sup>" + ref + "</sup><b>" + nodeValue + "</b> ";
            else
                strMatchAll += "<sup>" + ref + "</sup><b>" + nodeValue + "<font color=gray>/"
                        + t.parent(matchedTree).nodeString() + "</font></b> ";
        } else { // Normal unmatched text
            if (!showTagged)
                strMatchAll += nodeValue + " ";
            else
                strMatchAll += nodeValue + "<font color=gray>/" + t.parent(matchedTree).nodeString()
                        + "</font> ";
        }
    }

    strMatchAll += "</html>";

    return new String[] { strMatchAll, strQuestion, strAnswer };

}

From source file:edu.nus.comp.nlp.stanford.UtilParser.java

License:Open Source License

public static DefaultMutableTreeNode toDMTree(DefaultMutableTreeNode root, Tree tree) {
    if (root == null) {
        root = new DefaultMutableTreeNode();
    }/*from  w ww .  j av  a2s  .  c  o  m*/

    String nodeContent = tree.nodeString();
    root.setUserObject(nodeContent);
    for (Tree c : tree.children()) {
        DefaultMutableTreeNode n = toDMTree(null, c);
        root.add(n);
    }
    return root;
}

From source file:elkfed.coref.discourse_entities.DiscourseEntity.java

License:Open Source License

/**
 * Reads the premodifiers from the input Mention and creates Property objects as attributes for every premodifier
 * not part of an embedded NE. Embedded NEs are to be treated as relations to other Discourse Entities.
 * @param np The NP to be processed/*from w ww .j a  va  2  s .  com*/
 * @return Set A Set of Property Objects; one for every premodifier (attribute)
 */
private Set<Property> computeAttributes(Mention np) {
    LanguagePlugin lang_plugin = ConfigProperties.getInstance().getLanguagePlugin();
    Set<Property> result = new LinkedHashSet<Property>();
    List<Tree> preModifiers = np.getPremodifiers(); // straight from Mention
    //DEBUG
    //System.out.println("Number of premodifiers of "+np.getMarkableString()+" :"+
    //        preModifiers.size());
    char pos = '\0';
    if ((preModifiers != null) && (preModifiers.size() > 0)) {
        for (int i = 0; i < preModifiers.size(); i++) {
            Tree mod = preModifiers.get(i); // Expected structure:
            // (NP (DT the) (JJ last) (NN supper))
            if (mod.isLeaf()) {
                // this shouldn't happen'
                System.out.println("WARNING: UNEXPECTED LEAF " + mod.nodeString());
                //result.add(new Property(Property.ATTRIBUTE, mod.nodeString()));
                //result.add(new Property(Property.ATTRIBUTE, getSense(mod.nodeString())));
            } else {
                NodeCategory ncat = lang_plugin.labelCat(mod.nodeString());
                if (mod.isPreTerminal()) {
                    if (ncat == NodeCategory.CN || ncat == NodeCategory.ADJ) {
                        if (ncat == NodeCategory.CN) {
                            pos = 'N';
                        }
                        if (ncat == NodeCategory.ADJ) {
                            pos = 'A';
                        }

                        //System.out.println("Pre terminal node "+ mod.nodeString());
                        Tree wordNode = mod.firstChild();
                        _logger.fine("Adding attribute " + wordNode.nodeString() + " to entity");
                        result.add(new Property(Property.ATTRIBUTE, wordNode.nodeString(), pos));
                    }
                }
            }
        }
    }
    return result;
}

From source file:elkfed.coref.discourse_entities.DiscourseEntity.java

License:Open Source License

private Set<Property> computeInitialRelations(Mention np) {
    LanguagePlugin lang_plugin = ConfigProperties.getInstance().getLanguagePlugin();
    Set<Property> result = new LinkedHashSet<Property>();
    List<Tree> postModifiers = np.getPostmodifiers(); // straight from Mention

    char pos = '\0';
    //DEBUG/*from ww w  .  j av  a2s.  c  om*/
    //System.out.println("Number of postmodifiers of "+np.getMarkableString()+" :"+
    //        postModifiers.size());
    if ((postModifiers != null) && (postModifiers.size() > 0)) {
        for (int i = 0; i < postModifiers.size(); i++) {
            Tree mod = postModifiers.get(i); // Expected structure:
            // (NP  (NN software) (PP from (NP India))
            if (mod.isLeaf()) {
                // this shouldn't happen'
                System.out.println("WARNING: UNEXPECTED LEAF " + mod.nodeString());
                //result.add(new Property(Property.ATTRIBUTE, mod.nodeString()));
                //result.add(new Property(Property.ATTRIBUTE, getSense(mod.nodeString())));
            } else {
                if (mod.isPreTerminal()) { // this shouldn't happen either,
                    // but we'll add it to the properties
                    NodeCategory ncat = lang_plugin.labelCat(mod.nodeString());
                    if (ncat == NodeCategory.CN || ncat == NodeCategory.ADJ) {
                        if (ncat == NodeCategory.CN) {
                            pos = 'N';
                        }
                        if (ncat == NodeCategory.ADJ) {
                            pos = 'A';
                        }
                    }
                } else {
                    //System.out.println("Type of postmodifier: " + mod.nodeString());
                    NodeCategory ncat = lang_plugin.labelCat(mod.nodeString());
                    if (ncat == NodeCategory.PP) {
                        if (mod.numChildren() == 2) { // (PP (in from) (NP (nnp India)))
                            Tree prepNode = mod.getChild(0);
                            Tree npNode = mod.getChild(1);
                            Tree npHead = massimoHeadFindHack(npNode);
                            if (npHead != null && prepNode != null) {

                                //DEBUG
                                //System.out.println("Adding relation "+
                                //                  prepNode.firstChild().nodeString()+" "+
                                //                  npHead.firstChild().nodeString() );

                                /* -- no clue what it means, just fixed so that it doesn't crash  (Olga) -- */
                                if (prepNode.numChildren() > 0)
                                    prepNode = prepNode.firstChild();
                                result.add(
                                        new Property(prepNode.nodeString(), npHead.firstChild().nodeString()));
                            }
                        }
                    }
                }
            }
        } //end outer loop
    } //end if premodified
    return result;
}

From source file:elkfed.coref.discourse_entities.DiscourseEntity.java

License:Open Source License

private Tree massimoHeadFindHack(Tree npNode) {
    LanguagePlugin lang_plugin = ConfigProperties.getInstance().getLanguagePlugin();
    /*/*from ww w.  j a  va 2 s .c  o m*/
     * NOTE (yv):
     * We should really have a decent configurable head finder.
     * The "generic" head finder below probably works, but ...
     * this is ugly enough for English, but making it work for
     * English *and* Italian (and possibly other languages)
     * is only something for very enthusiastic people with
     * slight masochistic tendencies.
     */
    //CollinsHeadFinder hf = new CollinsHeadFinder();
    //ModCollinsHeadFinder hf = new ModCollinsHeadFinder();

    /* -- trivial -- */
    if (npNode.numChildren() == 0)
        return npNode;
    if (npNode.numChildren() == 1) {
        if (npNode.firstChild().numChildren() == 0)
            return npNode;
        return massimoHeadFindHack(npNode.firstChild());
    }
    /* -- coordination -- */
    if (npNode.numChildren() > 2) {
        for (Tree n : npNode.children()) {
            if (lang_plugin.labelCat(n.nodeString()) == NodeCategory.CC)
                return null;
        }
    }

    /* -- last child is a noun (common/proper) --*/
    /* NB: will it work for italian though? */

    NodeCategory firstpos = lang_plugin.labelCat(npNode.firstChild().nodeString());
    NodeCategory nextpos = lang_plugin.labelCat(npNode.getChild(1).nodeString());
    NodeCategory lastpos = lang_plugin.labelCat(npNode.lastChild().nodeString());

    if (lastpos == NodeCategory.CN)
        return npNode.lastChild();
    if (lastpos == NodeCategory.PN)
        return npNode.lastChild();

    /* -- (NP (NP (DT the) (NN man)) (PP (in from) (NP (NNP UNCLE)))) -- */

    if (firstpos == NodeCategory.NP && nextpos != NodeCategory.CN)
        return massimoHeadFindHack(npNode.firstChild());

    /* -- misc -- */

    Tree found_head = null;
    int state = 0;
    for (Tree n : npNode.children()) {
        NodeCategory ncat = lang_plugin.labelCat(n.nodeString());
        if (ncat == NodeCategory.CN || ncat == NodeCategory.PN || ncat == NodeCategory.PRO) {
            state = 4;
            found_head = n;
        } else if (ncat == NodeCategory.NP && state < 3) {
            state = 3;
            found_head = n;
        } else if (ncat == NodeCategory.ADJ && state < 3) {
            state = 2;
            found_head = n;
        }
    }
    if (found_head != null) {
        if (state == 3) {
            return massimoHeadFindHack(found_head);
        }
        return found_head;
    }

    //    if (ConfigProperties.getInstance().getDbgPrint()) 
    System.err.println("Couldn't find a head for NP:" + npNode.pennString());
    return null;
}

From source file:pltag.parser.semantics.discriminative.ExtractFeatures.java

License:Open Source License

private void extractStringTreeAnalysisFeatures(String inputTree, String stringAnalysisTree, boolean endOfSent,
        IncrementalAnalysis analysis, DiscriminativeFeatureIndexers featureIndexers, boolean train) {
    //        System.out.println(inputTree);        
    Tree tree = Tree.valueOf(inputTree);
    List<Tree> leaves = tree.getLeaves();
    Tree currentWord = leaves.get(leaves.size() - 1);
    int currentWordIndex = featureIndexers.getWordIndex(currentWord.nodeString(), train);
    // right branch (2): 1. Count number of nodes from root to rightmost non-terminal, 2. Count rest nodes
    // compute domination path from root to rightmost leaf. Subtract 2 from size to exclude root and pre-terminal
    int pathSize = tree.dominationPath(currentWord.parent(tree)).size();
    analysis.setRightBranchSpine(pathSize > 2 ? pathSize - 2 : 0);
    int rest = tree.size() - analysis.getRightBranchSpine() - leaves.size() * 2 - 1;
    analysis.setRightBranchRest(rest > 0 ? rest : 0); // Subtract the number of terminals, pre-terminals (leaves.size()*2) and root symbol

    // get list of rightmost complete non-terminals. We don't compute the whole list of complete non-terminals, but only the ones that have been just completed,
    // hence lie at the rightmost position of the tree. Since we compute the features piecemeal, by the end of the sentence we will have computed all complete
    // non-terminals, depending on the training strategy. Used for features: heavy, neighbours, and edges
    Tree analysisTree = Tree.valueOf(stringAnalysisTree);
    analysisTree.indexLeaves();/*from   w  w  w.  j  a va2  s .  com*/
    List<Tree> complete = getListOfRightMostCompleteNonTerminals(analysisTree);
    String[] heavyStr = new String[complete.size()];
    String[] neighboursL1Str = new String[complete.size()];
    String[] neighboursL2Str = new String[complete.size()];
    int i = 0;
    for (Tree subTree : complete) {
        // heavy feature
        int categoryIndex = featureIndexers.getCategoryIndex(subTree.nodeString(), train);
        List<Label> yield = subTree.yield();
        String yieldSize = yield.size() > 5 ? "5+" : String.valueOf(yield.size());
        heavyStr[i] = String.format("%s %s %s", categoryIndex, yieldSize, endOfSent ? "y" : "n");
        // neighbours l1, l2 features            
        int leftmostLeafId = ((CoreLabel) yield.get(0)).index();
        if (leftmostLeafId > 1) {
            int l1CategoryId = featureIndexers
                    .getCategoryIndex(leaves.get(leftmostLeafId - 2).parent(tree).nodeString(), train);
            if (leftmostLeafId > 2) {
                neighboursL1Str[i] = String.format("%s %s %s", categoryIndex, yieldSize, l1CategoryId);
                int l2CategoryId = featureIndexers
                        .getCategoryIndex(leaves.get(leftmostLeafId - 3).parent(tree).nodeString(), train);
                neighboursL2Str[i] = String.format("%s %s %s %s", categoryIndex, yieldSize, l2CategoryId,
                        l1CategoryId);
            } else {
                neighboursL2Str[i] = String.format("%s %s SoS %s", categoryIndex, yieldSize, l1CategoryId);
            }
        } else // leftmost leaf is at the beginning of the sentence
        {
            neighboursL1Str[i] = String.format("%s %s SoS", categoryIndex, yieldSize);
            neighboursL2Str[i] = String.format("%s %s SoS SoS", categoryIndex, yieldSize);
        }

        // coPar and coLenPar features
        Tree[] children = subTree.children();
        if (children.length > 2) {
            // found structure: (X (A ...) (CC and/or) (B ...))
            if (children.length == 3 && children[1].nodeString().startsWith("CC")) {
                analysis.setCoPar(getCoParString(children[0], children[2]), featureIndexers, train);
                analysis.setCoLenPar(getCoLenParString(children[0], children[2], endOfSent), featureIndexers,
                        train);
            }
            // found structure ((CC either) (A ...) (CC or) (B...))
            else if (children.length == 4 && children[0].nodeString().startsWith("CC")
                    && children[2].nodeString().startsWith("CC")) {
                analysis.setCoPar(getCoParString(children[1], children[3]), featureIndexers, train);
                analysis.setCoLenPar(getCoLenParString(children[1], children[3], endOfSent), featureIndexers,
                        train);
            }
            // found structure ((A ...) (, ,) (CC but) (B...))
            else if (children.length == 4 && children[1].nodeString().equals(",")
                    && children[2].nodeString().startsWith("CC")) {
                analysis.setCoPar(getCoParString(children[0], children[3]), featureIndexers, train);
                analysis.setCoLenPar(getCoLenParString(children[0], children[3], endOfSent), featureIndexers,
                        train);
            }
        }
        i++;
    }
    analysis.setHeavy(heavyStr, featureIndexers, train);
    analysis.setNeighboursL1(neighboursL1Str, featureIndexers, train);
    analysis.setNeighboursL2(neighboursL2Str, featureIndexers, train);

    // compute word + L=2 ancestor nodes, L=3 ancestor nodes
    Tree preTerminal = currentWord.parent(tree);
    Tree wordL2 = preTerminal.parent(tree);
    if (wordL2 != null) {
        int preTerminalIndex = featureIndexers.getCategoryIndex(preTerminal.nodeString(), train);
        int wordL2Index = featureIndexers.getCategoryIndex(wordL2.nodeString(), train);
        analysis.setWordL2(String.format("%s %s %s", currentWordIndex, preTerminalIndex, wordL2Index),
                featureIndexers, train);
        Tree wordL3 = wordL2.parent(tree);
        if (wordL3 != null) {
            analysis.setWordL3(String.format("%s %s %s %s", currentWordIndex, preTerminalIndex, wordL2Index,
                    featureIndexers.getCategoryIndex(wordL3.nodeString())), featureIndexers, train);
        }
    }

    // get integration point + elem tree (Parent-emulation feature)
    analysis.setIpElemTree(String.format("%s,%s", analysis.getIntegrationPoint(), analysis.getElemTree()),
            featureIndexers, train);
    analysis.setIpElemTreeUnlex(
            String.format("%s,%s", analysis.getIntegrationPoint(), analysis.getElemTreeUnlex()),
            featureIndexers, train);
}

From source file:pltag.parser.semantics.discriminative.ExtractFeatures.java

License:Open Source License

/**
 * Identify the list of rightmost non-terminals that span a complete subtree, i.e., one that
 * a) the leaf of its' rightmost child is a word, OR
 * b) the index of the leaf of its' rightmost is a word AND is the last in the yield (AND this leaf is the last word - optional, as this condition breeches incrementality).
 * @param analysisTree/*from  w ww.  j  ava  2  s.c  o  m*/
 * @return 
 */
private List<Tree> getListOfRightMostCompleteNonTerminals(Tree tree) {
    List<Tree> list = new ArrayList();
    List<Tree> leaves = tree.getLeaves();
    // check if the last leaf is a word.
    Tree currentWord = leaves.get(leaves.size() - 1);
    if (currentWord.nodeString().endsWith("<>")) {
        Tree parent = currentWord.parent(tree);
        while (parent != tree) {
            if (parent.isPhrasal()) {
                list.add(parent);
            }
            parent = parent.parent(tree);
        }
        list.add(tree);
    }
    return list;
}

From source file:pltag.parser.semantics.discriminative.ExtractFeatures.java

License:Open Source License

private boolean sameLabelAtLevel(Tree left, Tree right, int level) {
    if (left.isLeaf() || right.isLeaf())
        return false;
    if (level == 0)
        return left.nodeString().equals(right.nodeString());
    Tree[] leftChildren = left.children();
    Tree[] rightChildren = right.children();
    if (leftChildren.length != rightChildren.length)
        return false;
    for (int i = 0; i < leftChildren.length; i++) {
        if (!sameLabelAtLevel(leftChildren[i], rightChildren[i], level - 1))
            return false;
    }/*  ww  w .  j a  v  a2s .  c  o  m*/
    return true;
}

From source file:tml.utils.StanfordUtils.java

License:Apache License

/**
 * Calculates the typed dependencies from a grammatical tree
 * @param tree the grammatical tree//  www. ja  v a  2 s.c o m
 */
public static List<String> calculateTypedDependencies(Tree tree) {
    double time = System.nanoTime();
    List<String> output = new ArrayList<String>();
    GrammaticalStructure gs = null;
    try {
        gs = getGrammaticalStructureFactory().newGrammaticalStructure(tree);
    } catch (Exception e) {
        logger.error(e);
        return null;
    }

    Collection<TypedDependency> tdl = gs.typedDependenciesCollapsed();

    // Get the POS tag from each word
    Hashtable<String, String> posInfo = new Hashtable<String, String>();
    for (Tree t : tree.getLeaves()) {
        Tree pt = null;
        for (Tree tt : tree.dominationPath(t)) {
            if (tt.isLeaf()) {
                posInfo.put(tt.nodeString(), pt.nodeString());
            }
            pt = tt;
        }
    }

    for (Object obj : tdl.toArray()) {
        TypedDependency dep = (TypedDependency) obj;

        String wordGov = dep.gov().nodeString().split("-")[0];
        String wordDep = dep.dep().nodeString().split("-")[0];
        String posGov = posInfo.get(wordGov);
        String posDep = posInfo.get(wordDep);
        String dependencyString = dep.reln().toString() + "(" + dep.gov().pennString().trim() + "-" + posGov
                + ", " + dep.dep().pennString().trim() + "-" + posDep + ")";
        output.add(dependencyString);
    }

    time = System.nanoTime() - time;
    logger.debug("Typed dependencies obtained in " + time * 10E-6 + " milliseconds");
    return output;
}

From source file:wseproject.nlp.StanfordManager.java

private static void printTree(Tree tree, int d) {
    for (int i = 0; i < d; i++)
        System.out.print("--");
    System.out.println(tree.nodeString());
    Tree[] children = tree.children();// w w  w  .  j a  v  a  2  s.  c  o m
    for (Tree child : children) {
        //System.out.println("str = " + child.nodeString());
        printTree(child, d + 1);
    }
}