Example usage for edu.stanford.nlp.trees Tree toString

Introduction

In this page you can find the example usage for edu.stanford.nlp.trees Tree toString.

Prototype

@Override
public String toString()

Source Link

Document

Converts parse tree to string in Penn Treebank format.

Usage

From source file:opennlp.tools.parse_thicket.kernel_interface.TreeExtenderByAnotherLinkedTree.java

License:Apache License

public List<String> buildForestForRSTArcs(ParseThicket pt) {
    List<String> results = new ArrayList<String>();
    for (WordWordInterSentenceRelationArc arc : pt.getArcs()) {
        // TODO - uncomment
        // if (!arc.getArcType().getType().startsWith("rst"))
        // continue;
        int fromSent = arc.getCodeFrom().getFirst();
        int toSent = arc.getCodeTo().getFirst();

        String wordFrom = arc.getLemmaFrom();
        String wordTo = arc.getLemmaTo();

        if (wordFrom == null || wordFrom.length() < 1 || wordTo == null || wordTo.length() < 1)
            log.severe("Empty lemmas for RST arc " + arc);

        List<Tree> trees = getASubtreeWithRootAsNodeForWord1(pt.getSentences().get(fromSent),
                pt.getSentences().get(fromSent), new String[] { wordFrom });
        if (trees == null || trees.size() < 1)
            continue;
        System.out.println(trees);
        StringBuilder sb = new StringBuilder(10000);
        Tree tree = trees.get(0);/*from ww  w .  j  av a 2  s .co m*/
        // instead of phrase type for the root of the tree, we want to put
        // the RST relation name
        if (arc.getArcType().getType().startsWith("rst"))
            tree.setValue(arc.getArcType().getSubtype());

        toStringBuilderExtenderByAnotherLinkedTree1(sb, pt.getSentences().get(toSent), tree,
                new String[] { wordTo });
        System.out.println(sb.toString());
        results.add(sb.toString());
    }
    // if no arcs then orig sentences
    if (results.isEmpty()) {
        for (Tree t : pt.getSentences()) {
            results.add(t.toString());
        }
    }
    return results;
}

From source file:opennlp.tools.parse_thicket.kernel_interface.TreeExtenderByAnotherLinkedTree.java

License:Apache License

public StringBuilder toStringBuilderExtenderByAnotherLinkedTree1(StringBuilder sb, Tree t, Tree treeToInsert,
        String[] corefWords) {// w w w . jav a  2  s .c o  m
    if (t.isLeaf()) {
        if (t.label() != null) {
            sb.append(t.label().value());
        }
        return sb;
    } else {
        sb.append('(');
        if (t.label() != null) {
            if (t.value() != null) {
                sb.append(t.label().value());
            }
        }
        boolean bInsertNow = false;
        Tree[] kids = t.children();
        if (kids != null) {
            for (Tree kid : kids) {
                if (corefWords != null) {
                    String word = corefWords[corefWords.length - 1];
                    String phraseStr = kid.toString();
                    phraseStr = phraseStr.replace(")", "");
                    if (phraseStr.endsWith(word)) {
                        bInsertNow = true;
                    }
                }
            }
            if (bInsertNow) {
                for (Tree kid : kids) {
                    sb.append(' ');
                    toStringBuilderExtenderByAnotherLinkedTree1(sb, kid, null, null);
                }
                sb.append(' ');
                toStringBuilderExtenderByAnotherLinkedTree1(sb, treeToInsert, null, null);
            } else {
                for (Tree kid : kids) {
                    sb.append(' ');
                    toStringBuilderExtenderByAnotherLinkedTree1(sb, kid, treeToInsert, corefWords);
                }

            }
        }

        return sb.append(')');
    }
}

From source file:opennlp.tools.parse_thicket.kernel_interface.TreeExtenderByAnotherLinkedTree.java

License:Apache License

public List<Tree> getASubtreeWithRootAsNodeForWord1(Tree tree, Tree currentSubTree, String[] corefWords) {
    if (currentSubTree.isLeaf()) {
        return null;
    }// w w  w  .  jav a 2  s .  co  m
    List<Tree> result = null;
    Tree[] kids = currentSubTree.children();
    if (kids != null) {
        boolean bFound = false;
        String word = corefWords[corefWords.length - 1];
        for (Tree kid : kids) {
            if (bFound) {
                result.add(kid);
            } else {
                String phraseStr = kid.toString();
                phraseStr = phraseStr.replace(")", "");
                if (phraseStr.endsWith(word)) { // found
                    bFound = true;
                    result = new ArrayList<Tree>();
                }
            }
        }
        if (bFound) {
            return result;
        }
        // if not a selected node, proceed with iteration
        for (Tree kid : kids) {
            List<Tree> ts = getASubtreeWithRootAsNodeForWord1(tree, kid, corefWords);
            if (ts != null)
                return ts;
        }

    }
    return null;
}

From source file:opennlp.tools.parse_thicket.kernel_interface.TreeExtenderByAnotherLinkedTree.java

License:Apache License

public Tree[] getASubtreeWithRootAsNodeForWord(Tree tree, Tree currentSubTree, String[] corefWords) {
    if (currentSubTree.isLeaf()) {
        return null;
    }//from w w w.j  a v a2s.co m

    boolean bInsertNow = false;
    /*
     * List<ParseTreeNode> bigTreeNodes =
     * parsePhrase(currentSubTree.label().value()); for(ParseTreeNode
     * smallNode: bigTreeNodes ){ if
     * (bigTreeNodes.get(0).getWord().equals("") ) continue; String word =
     * bigTreeNodes.get(0).getWord(); for(String cWord: corefWords){
     * 
     * if (word.equalsIgnoreCase(cWord)) bInsertNow=true; } }
     */

    String nodePhraseStr = currentSubTree.toString();
    System.out.println(nodePhraseStr);
    for (String w : corefWords)
        nodePhraseStr = nodePhraseStr.replace(w, "");
    // all words are covered
    if (nodePhraseStr.toUpperCase().equals(nodePhraseStr))
        bInsertNow = true;

    // if(bInsertNow)
    // return currentSubTree;

    Tree[] kids = currentSubTree.children();
    if (kids != null) {
        /*
         * for (Tree kid : kids) { List<ParseTreeNode> bigTreeNodes =
         * parsePhrase(kid.label().value()); if (bigTreeNodes!=null &&
         * bigTreeNodes.size()>0 && bigTreeNodes.get(0)!=null &&
         * bigTreeNodes.get(0).getWord().equalsIgnoreCase(corefWords[0])){
         * bInsertNow=true; return kids; }
         * 
         * }
         */

        for (Tree kid : kids) {
            Tree[] t = getASubtreeWithRootAsNodeForWord(tree, kid, corefWords);
            if (t != null)
                return t;
        }

    }
    return null;
}

From source file:opennlp.tools.parse_thicket.matching.PT2ThicketPhraseBuilder.java

License:Apache License

private void navigateR(Tree t, List<ParseTreeNode> sentence, List<List<ParseTreeNode>> phrases) {
    if (!t.isPreTerminal()) {
        if (t.label() != null) {
            if (t.value() != null) {
                // if ROOT or S, returns empty
                List<ParseTreeNode> nodes = parsePhrase(t.label().value(), t.toString());
                nodes = assignIndexToNodes(nodes, sentence);
                if (!nodes.isEmpty())
                    phrases.add(nodes);//from   www. j  a  va 2s  .com
                if (nodes.size() > 0 && nodes.get(0).getId() == null) {
                    if (nodes.size() > 1 && nodes.get(1) != null && nodes.get(1).getId() != null) {
                        try {
                            ParseTreeNode n = nodes.get(0);
                            n.setId(nodes.get(1).getId() - 1);
                            nodes.set(0, n);
                        } catch (Exception e) {
                            e.printStackTrace();
                        }
                    } else {
                        log.severe("Failed alignment:" + nodes);
                    }
                }
            }
        }
        Tree[] kids = t.children();
        if (kids != null) {
            for (Tree kid : kids) {
                navigateR(kid, sentence, phrases);
            }
        }
        return;
    }
}

From source file:opennlp.tools.parse_thicket.matching.PT2ThicketPhraseBuilder.java

License:Apache License

private void navigateR1(Tree t, List<ParseTreeNode> sentence, int l, List<List<ParseTreeNode>> phrases) {
    if (t.isPreTerminal()) {
        if (t.label() != null) {
            List<ParseTreeNode> node = parsePhrase(t.toString());
            if (!node.isEmpty())
                phrases.add(node);//from ww w.  j av  a  2s  . c  o m
        }
        return;
    } else {
        if (t.label() != null) {
            if (t.value() != null) {
                List<ParseTreeNode> node = parsePhrase(t.label().value());
                if (!node.isEmpty())
                    phrases.add(node);
            }
        }
        Tree[] kids = t.children();
        if (kids != null) {
            for (Tree kid : kids) {
                navigateR1(kid, sentence, l, phrases);
            }
        }
        return;
    }
}

From source file:opennlp.tools.parse_thicket.parse_thicket2graph.GraphFromPTreeBuilder.java

License:Apache License

private void navigate(Tree tree, Graph<ParseGraphNode, DefaultEdge> g, int l, ParseGraphNode currParent) {
    // String currParent = tree.label().value()+" $"+Integer.toString(l);
    // g.addVertex(currParent);
    if (tree.getChildrenAsList().size() == 1)
        navigate(tree.getChildrenAsList().get(0), g, l + 1, currParent);
    else if (tree.getChildrenAsList().size() == 0)
        return;//from  ww w  . j a v  a  2s .c o  m

    for (Tree child : tree.getChildrenAsList()) {
        String currChild = null;
        ParseGraphNode currChildNode = null;
        try {
            if (child.isLeaf())
                continue;
            if (child.label().value().startsWith("S"))
                navigate(child.getChildrenAsList().get(0), g, l + 1, currParent);

            if (!child.isPhrasal() || child.isPreTerminal())
                currChild = child.toString() + " #" + Integer.toString(l);
            else
                currChild = child.label().value() + " #" + Integer.toString(l);
            currChildNode = new ParseGraphNode(child, currChild);
            g.addVertex(currChildNode);
            g.addEdge(currParent, currChildNode);
        } catch (Exception e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
        navigate(child, g, l + 1, currChildNode);
    }
}

From source file:org.textmining.annotator.StanfordCoreNlpAnnotator.java

License:Open Source License

@Override
public void process(JCas jCas) throws AnalysisEngineProcessException {
    Annotation document = this.processor.process(jCas.getDocumentText());

    String lastNETag = "O";
    int lastNEBegin = -1;
    int lastNEEnd = -1;
    for (CoreMap tokenAnn : document.get(TokensAnnotation.class)) {

        // create the token annotation
        int begin = tokenAnn.get(CharacterOffsetBeginAnnotation.class);
        int end = tokenAnn.get(CharacterOffsetEndAnnotation.class);
        String pos = tokenAnn.get(PartOfSpeechAnnotation.class);
        String lemma = tokenAnn.get(LemmaAnnotation.class);
        Token token = new Token(jCas, begin, end);
        token.setPos(pos);/*from  ww  w.j a va 2s  . c  om*/
        token.setLemma(lemma);
        token.addToIndexes();

        // hackery to convert token-level named entity tag into phrase-level tag
        String neTag = tokenAnn.get(NamedEntityTagAnnotation.class);
        if (neTag.equals("O") && !lastNETag.equals("O")) {
            NamedEntityMention ne = new NamedEntityMention(jCas, lastNEBegin, lastNEEnd);
            ne.setMentionType(lastNETag);
            ne.addToIndexes();
        } else {
            if (lastNETag.equals("O")) {
                lastNEBegin = begin;
            } else if (lastNETag.equals(neTag)) {
                // do nothing - begin was already set
            } else {
                NamedEntityMention ne = new NamedEntityMention(jCas, lastNEBegin, lastNEEnd);
                ne.setMentionType(lastNETag);
                ne.addToIndexes();
                lastNEBegin = begin;
            }
            lastNEEnd = end;
        }
        lastNETag = neTag;
    }
    if (!lastNETag.equals("O")) {
        NamedEntityMention ne = new NamedEntityMention(jCas, lastNEBegin, lastNEEnd);
        ne.setMentionType(lastNETag);
        ne.addToIndexes();
    }

    // add sentences and trees
    for (CoreMap sentenceAnn : document.get(SentencesAnnotation.class)) {

        // add the sentence annotation
        int sentBegin = sentenceAnn.get(CharacterOffsetBeginAnnotation.class);
        int sentEnd = sentenceAnn.get(CharacterOffsetEndAnnotation.class);
        Sentence sentence = new Sentence(jCas, sentBegin, sentEnd);
        sentence.addToIndexes();

        // add the syntactic tree annotation
        List<CoreLabel> tokenAnns = sentenceAnn.get(TokensAnnotation.class);
        Tree tree = sentenceAnn.get(TreeAnnotation.class);
        if (tree.children().length != 1) {
            throw new RuntimeException("Expected single root node, found " + tree);
        }
        tree = tree.firstChild();
        tree.indexSpans(0);
        TopTreebankNode root = new TopTreebankNode(jCas);
        root.setTreebankParse(tree.toString());
        // TODO: root.setTerminals(v)
        this.addTreebankNodeToIndexes(root, jCas, tree, tokenAnns);

        // get the dependencies
        SemanticGraph dependencies = sentenceAnn.get(CollapsedCCProcessedDependenciesAnnotation.class);

        // convert Stanford nodes to UIMA annotations
        List<Token> tokens = JCasUtil.selectCovered(jCas, Token.class, sentence);
        Map<IndexedWord, DependencyNode> stanfordToUima = new HashMap<IndexedWord, DependencyNode>();
        for (IndexedWord stanfordNode : dependencies.vertexSet()) {
            int indexBegin = stanfordNode.get(BeginIndexAnnotation.class);
            int indexEnd = stanfordNode.get(EndIndexAnnotation.class);
            int tokenBegin = tokens.get(indexBegin).getBegin();
            int tokenEnd = tokens.get(indexEnd - 1).getEnd();
            DependencyNode node;
            if (dependencies.getRoots().contains(stanfordNode)) {
                node = new TopDependencyNode(jCas, tokenBegin, tokenEnd);
            } else {
                node = new DependencyNode(jCas, tokenBegin, tokenEnd);
            }
            stanfordToUima.put(stanfordNode, node);
        }

        // create relation annotations for each Stanford dependency
        ArrayListMultimap<DependencyNode, DependencyRelation> headRelations = ArrayListMultimap.create();
        ArrayListMultimap<DependencyNode, DependencyRelation> childRelations = ArrayListMultimap.create();
        for (SemanticGraphEdge stanfordEdge : dependencies.edgeIterable()) {
            DependencyRelation relation = new DependencyRelation(jCas);
            DependencyNode head = stanfordToUima.get(stanfordEdge.getGovernor());
            DependencyNode child = stanfordToUima.get(stanfordEdge.getDependent());
            String relationType = stanfordEdge.getRelation().toString();
            if (head == null || child == null || relationType == null) {
                throw new RuntimeException(String.format(
                        "null elements not allowed in relation:\nrelation=%s\nchild=%s\nhead=%s\n", relation,
                        child, head));
            }
            relation.setHead(head);
            relation.setChild(child);
            relation.setRelation(relationType);
            relation.addToIndexes();
            headRelations.put(child, relation);
            childRelations.put(head, relation);
        }

        // set the relations for each node annotation
        for (DependencyNode node : stanfordToUima.values()) {
            List<DependencyRelation> heads = headRelations.get(node);
            node.setHeadRelations(new FSArray(jCas, heads == null ? 0 : heads.size()));
            if (heads != null) {
                FSCollectionFactory.fillArrayFS(node.getHeadRelations(), heads);
            }
            List<DependencyRelation> children = childRelations.get(node);
            node.setChildRelations(new FSArray(jCas, children == null ? 0 : children.size()));
            if (children != null) {
                FSCollectionFactory.fillArrayFS(node.getChildRelations(), children);
            }
            node.addToIndexes();
        }
    }

    // map from spans to named entity mentions
    Map<Span, NamedEntityMention> spanMentionMap = new HashMap<Span, NamedEntityMention>();
    for (NamedEntityMention mention : JCasUtil.select(jCas, NamedEntityMention.class)) {
        spanMentionMap.put(new Span(mention.getBegin(), mention.getEnd()), mention);
    }

    // add mentions for all entities identified by the coreference system
    List<NamedEntity> entities = new ArrayList<NamedEntity>();
    List<List<Token>> sentenceTokens = new ArrayList<List<Token>>();
    for (Sentence sentence : JCasUtil.select(jCas, Sentence.class)) {
        sentenceTokens.add(JCasUtil.selectCovered(jCas, Token.class, sentence));
    }
    Map<Integer, CorefChain> corefChains = document.get(CorefChainAnnotation.class);
    for (CorefChain chain : corefChains.values()) {
        List<NamedEntityMention> mentions = new ArrayList<NamedEntityMention>();
        for (CorefMention corefMention : chain.getMentionsInTextualOrder()) {

            // figure out the character span of the token
            List<Token> tokens = sentenceTokens.get(corefMention.sentNum - 1);
            int begin = tokens.get(corefMention.startIndex - 1).getBegin();
            int end = tokens.get(corefMention.endIndex - 2).getEnd();

            // use an existing named entity mention when possible; otherwise create a new one
            NamedEntityMention mention = spanMentionMap.get(new Span(begin, end));
            if (mention == null) {
                mention = new NamedEntityMention(jCas, begin, end);
                mention.addToIndexes();
            }
            mentions.add(mention);
        }

        // create an entity for the mentions
        Collections.sort(mentions, new Comparator<NamedEntityMention>() {
            @Override
            public int compare(NamedEntityMention m1, NamedEntityMention m2) {
                return m1.getBegin() - m2.getBegin();
            }
        });

        // create mentions and add them to entity
        NamedEntity entity = new NamedEntity(jCas);
        entity.setMentions(new FSArray(jCas, mentions.size()));
        int index = 0;
        for (NamedEntityMention mention : mentions) {
            mention.setMentionedEntity(entity);
            entity.setMentions(index, mention);
            index += 1;
        }
        entities.add(entity);
    }

    // add singleton entities for any named entities not picked up by coreference system
    for (NamedEntityMention mention : JCasUtil.select(jCas, NamedEntityMention.class)) {
        if (mention.getMentionedEntity() == null) {
            NamedEntity entity = new NamedEntity(jCas);
            entity.setMentions(new FSArray(jCas, 1));
            entity.setMentions(0, mention);
            mention.setMentionedEntity(entity);
            entity.getMentions();
            entities.add(entity);
        }
    }

    // sort entities by document order
    Collections.sort(entities, new Comparator<NamedEntity>() {
        @Override
        public int compare(NamedEntity o1, NamedEntity o2) {
            return getFirstBegin(o1) - getFirstBegin(o2);
        }

        private int getFirstBegin(NamedEntity entity) {
            int min = Integer.MAX_VALUE;
            for (NamedEntityMention mention : JCasUtil.select(entity.getMentions(), NamedEntityMention.class)) {
                if (mention.getBegin() < min) {
                    min = mention.getBegin();
                }
            }
            return min;
        }
    });

    // add entities to document
    for (NamedEntity entity : entities) {
        entity.addToIndexes();
    }

    //end of process-method
}

From source file:parsers.CoreNLPSuite.java

public static String parse(String input) {
    String ans = "";
    Annotation document = new Annotation(input);
    Parsers.pipeline.annotate(document);
    List<CoreMap> sentences = document.get(SentencesAnnotation.class);
    for (CoreMap sentence : sentences) {
        Tree tree = sentence.get(TreeAnnotation.class);
        String parseExpr = tree.toString();
        ans = ans + parseExpr + "\n";
    }/*from  w  ww.  j  ava2s .c o  m*/
    return ans;
}

From source file:pltag.util.Utils.java

License:Open Source License

public static String removeSubtreesAfterWord(String inputTree, int numOfLeaves) {
    Tree tree = Tree.valueOf(inputTree);
    List<Tree> leaves = tree.getLeaves();
    if (leaves.size() > numOfLeaves) {
        // find common ancestor between last valid leaf and extraneous leaf
        Tree firstLeaf = leaves.get(numOfLeaves - 1);
        Tree lastLeaf = leaves.get(leaves.size() - 1);
        Tree commonAncestorNode = lastLeaf.parent(tree);
        while (!commonAncestorNode.getLeaves().contains(firstLeaf)) {
            commonAncestorNode = commonAncestorNode.parent(tree);
        }/*from  w w w.ja  v a 2  s.com*/
        // found the common ancestor, now we need to chop the children nodes the span of which is outwith the last valid leaf

        Tree p = lastLeaf.parent(tree);
        while (p != commonAncestorNode) {
            int numOfChildren = p.numChildren();
            for (int i = 0; i < numOfChildren; i++)
                p.removeChild(0);
            p = p.parent(tree);
        }
        // remove last leftover parent node of the invalid leaf
        commonAncestorNode.removeChild(commonAncestorNode.numChildren() - 1);
        return tree.toString();
    } else {
        return inputTree;
    }

}