Example usage for edu.stanford.nlp.trees Tree label

List of usage examples for edu.stanford.nlp.trees Tree label

Introduction

In this page you can find the example usage for edu.stanford.nlp.trees Tree label.

Prototype

@Override
public Label label() 

Source Link

Document

Returns the label associated with the current node, or null if there is no label.

Usage

From source file:de.tudarmstadt.ukp.dkpro.core.corenlp.internal.CoreNlp2DKPro.java

License:Open Source License

private static org.apache.uima.jcas.tcas.Annotation convertConstituentTreeNode(JCas aJCas,
        TreebankLanguagePack aTreebankLanguagePack, Tree aNode, org.apache.uima.jcas.tcas.Annotation aParentFS,
        boolean internStrings, MappingProvider constituentMappingProvider, List<CoreLabel> tokens) {
    // Get node label
    String nodeLabelValue = aNode.value();

    // Extract syntactic function from node label
    String syntacticFunction = null;
    AbstractTreebankLanguagePack tlp = (AbstractTreebankLanguagePack) aTreebankLanguagePack;
    int gfIdx = nodeLabelValue.indexOf(tlp.getGfCharacter());
    if (gfIdx > 0) {
        syntacticFunction = nodeLabelValue.substring(gfIdx + 1);
        nodeLabelValue = nodeLabelValue.substring(0, gfIdx);
    }//from w ww  . j a v a2  s  .co  m

    // Check if node is a constituent node on sentence or phrase-level
    if (aNode.isPhrasal()) {
        Type constType = constituentMappingProvider.getTagType(nodeLabelValue);

        IntPair span = aNode.getSpan();
        int begin = tokens.get(span.getSource()).get(CharacterOffsetBeginAnnotation.class);
        int end = tokens.get(span.getTarget()).get(CharacterOffsetEndAnnotation.class);

        Constituent constituent = (Constituent) aJCas.getCas().createAnnotation(constType, begin, end);
        constituent.setConstituentType(internStrings ? nodeLabelValue.intern() : nodeLabelValue);
        constituent.setSyntacticFunction(
                internStrings && syntacticFunction != null ? syntacticFunction.intern() : syntacticFunction);
        constituent.setParent(aParentFS);

        // Do we have any children?
        List<org.apache.uima.jcas.tcas.Annotation> childAnnotations = new ArrayList<>();
        for (Tree child : aNode.getChildrenAsList()) {
            org.apache.uima.jcas.tcas.Annotation childAnnotation = convertConstituentTreeNode(aJCas,
                    aTreebankLanguagePack, child, constituent, internStrings, constituentMappingProvider,
                    tokens);
            if (childAnnotation != null) {
                childAnnotations.add(childAnnotation);
            }
        }

        // Now that we know how many children we have, link annotation of
        // current node with its children
        constituent.setChildren(FSCollectionFactory.createFSArray(aJCas, childAnnotations));

        constituent.addToIndexes();

        return constituent;
    }
    // Create parent link on token
    else if (aNode.isPreTerminal()) {
        // link token to its parent constituent
        List<Tree> children = aNode.getChildrenAsList();
        assert children.size() == 1;
        Tree terminal = children.get(0);
        CoreLabel label = (CoreLabel) terminal.label();
        Token token = label.get(TokenKey.class);
        token.setParent(aParentFS);
        return token;
    } else {
        throw new IllegalArgumentException("Node must be either phrasal nor pre-terminal");
    }
}

From source file:de.tudarmstadt.ukp.dkpro.core.corenlp.internal.DKPro2CoreNlp.java

License:Open Source License

public static Tree createStanfordTree(org.apache.uima.jcas.tcas.Annotation root, TreeFactory tFact,
        Map<Token, IndexedWord> aIdxTokens) {
    JCas aJCas;//from  www  .ja  v  a  2  s .c o m
    try {
        aJCas = root.getCAS().getJCas();
    } catch (CASException e) {
        throw new IllegalStateException("Unable to get JCas from JCas wrapper");
    }

    // define the new (root) node
    Tree rootNode;

    // before we can create a node, we must check if we have any children (we have to know
    // whether to create a node or a leaf - not very dynamic)
    if (root instanceof Constituent && !isLeaf((Constituent) root)) {
        Constituent node = (Constituent) root;
        List<Tree> childNodes = new ArrayList<Tree>();

        // get childNodes from child annotations
        FSArray children = node.getChildren();
        for (int i = 0; i < children.size(); i++) {
            childNodes.add(createStanfordTree(node.getChildren(i), tFact, aIdxTokens));
        }

        // now create the node with its children
        rootNode = tFact.newTreeNode(node.getConstituentType(), childNodes);

    } else {
        // Handle leaf annotations
        // Leafs are always Token-annotations
        // We also have to insert a Preterminal node with the value of the
        // POS-Annotation on the token
        // because the POS is not directly stored within the treee
        Token wordAnnotation = (Token) root;

        // create leaf-node for the tree
        Tree wordNode;
        if (aIdxTokens != null) {
            wordNode = tFact.newLeaf(aIdxTokens.get(wordAnnotation));
        } else {
            wordNode = tFact.newLeaf(wordAnnotation.getCoveredText());
        }

        // create information about preceding and trailing whitespaces in the leaf node
        StringBuilder preWhitespaces = new StringBuilder();
        StringBuilder trailWhitespaces = new StringBuilder();

        List<Token> precedingTokenList = selectPreceding(aJCas, Token.class, wordAnnotation, 1);
        List<Token> followingTokenList = selectFollowing(aJCas, Token.class, wordAnnotation, 1);

        if (precedingTokenList.size() > 0) {
            Token precedingToken = precedingTokenList.get(0);
            int precedingWhitespaces = wordAnnotation.getBegin() - precedingToken.getEnd();
            for (int i = 0; i < precedingWhitespaces; i++) {
                preWhitespaces.append(" ");
            }
        }
        if (followingTokenList.size() > 0) {
            Token followingToken = followingTokenList.get(0);
            int trailingWhitespaces = followingToken.getBegin() - wordAnnotation.getEnd();
            for (int i = 0; i < trailingWhitespaces; i++) {
                trailWhitespaces.append(" ");
            }
        }

        // write whitespace information as CoreAnnotation.BeforeAnnotation and
        // CoreAnnotation.AfterAnnotation to the node add annotation to list and write back to
        // node label
        ((CoreLabel) wordNode.label()).set(CoreAnnotations.BeforeAnnotation.class, preWhitespaces.toString());
        ((CoreLabel) wordNode.label()).set(CoreAnnotations.AfterAnnotation.class, trailWhitespaces.toString());

        // get POS-annotation
        POS pos = wordAnnotation.getPos();

        // create POS-Node in the tree and attach word-node to it
        rootNode = tFact.newTreeNode(pos.getPosValue(), Arrays.asList((new Tree[] { wordNode })));
    }

    return rootNode;
}

From source file:de.tudarmstadt.ukp.dkpro.core.stanfordnlp.util.StanfordAnnotator.java

License:Open Source License

/**
 * Recovers annotations from a Stanford Tree-Object, which have been saved within the CoreLabel
 * of the tree.//from www . j av  a 2  s  . co  m
 *<p>
 * Note:
 * Copying has to be done in batch, because we need to have ALL annotations that should be
 * recovered together when copying them. The reason is that some annotations reference each
 * other, which can cause problem if a referenced annotation has not yet been recovered.
 */
public void recoverAnnotationsFromNodes() {
    // create batch-copy list for recovered annotations
    List<Annotation> annoList = new ArrayList<Annotation>();

    Iterator<Tree> treeIterator = tokenTree.getTree().iterator();
    CAS srcCAS = null;

    while (treeIterator.hasNext()) {

        Tree curTree = treeIterator.next();

        // get the collection from the label of the best-fitting node in
        // which we store UIMA annotations
        Collection<Annotation> annotations = ((CoreLabel) curTree.label()).get(UIMAAnnotations.class);

        // do we have any annotations stored in the node?
        if (annotations != null && annotations.size() > 0) {

            // translate values which are now relative to the
            // node-span back to absolute value (depending on the
            // new offset of the node-span within the new CAS)

            IntPair span = tokenTree.getSpan(curTree);
            // iterate over all annotations
            for (Annotation curAnno : annotations) {
                srcCAS = srcCAS == null ? curAnno.getCAS() : srcCAS;

                // TODO using the SPAN as new annotation index might not
                // be correct in all cases - if not an EXACTLY MATCHING
                // node had been found for the saved annotation, this will
                // be wrong. Find a way to incorporate the anno-index here
                curAnno.setBegin(span.getSource());
                curAnno.setEnd(span.getTarget());

                // add anno to batch-copy list
                annoList.add(curAnno);

            } // endfor iterate over annotations

        } // endif check for annotations in node

    } // endwhile iterate over subtrees

    /*
     * Now that we have gathered all annotations from the tree, batch-copy them to the new CAS
     */

    // create CasRecoverer (=adapted version of the CasCopier)
    CasCopier copier = new CasCopier(srcCAS, jCas.getCas());

    // now batch-copy the annos
    List<Annotation> copiedAnnos = copier.batchCopyAnnotations(annoList);

    // add copied annos to indexes
    for (Annotation cAnno : copiedAnnos) {
        jCas.addFsToIndexes(cAnno);
    }
}

From source file:de.tudarmstadt.ukp.dkpro.core.stanfordnlp.util.TreeUtils.java

License:Open Source License

public static Tree createStanfordTree(Annotation root, TreeFactory tFact) {
    JCas aJCas;/* w  ww. ja va  2s.  com*/
    try {
        aJCas = root.getCAS().getJCas();
    } catch (CASException e) {
        throw new IllegalStateException("Unable to get JCas from JCas wrapper");
    }

    // define the new (root) node
    Tree rootNode;

    // before we can create a node, we must check if we have any children (we have to know
    // whether to create a node or a leaf - not very dynamic)
    if (root instanceof Constituent && !isLeaf((Constituent) root)) {
        Constituent node = (Constituent) root;
        List<Tree> childNodes = new ArrayList<Tree>();

        // get childNodes from child annotations
        FSArray children = node.getChildren();
        for (int i = 0; i < children.size(); i++) {
            childNodes.add(createStanfordTree(node.getChildren(i), tFact));
        }

        // now create the node with its children
        rootNode = tFact.newTreeNode(node.getConstituentType(), childNodes);

    } else {
        // Handle leaf annotations
        // Leafs are always Token-annotations
        // We also have to insert a Preterminal node with the value of the
        // POS-Annotation on the token
        // because the POS is not directly stored within the treee
        Token wordAnnotation = (Token) root;

        // create leaf-node for the tree
        Tree wordNode = tFact.newLeaf(wordAnnotation.getCoveredText());

        // create information about preceding and trailing whitespaces in the leaf node
        StringBuilder preWhitespaces = new StringBuilder();
        StringBuilder trailWhitespaces = new StringBuilder();

        List<Token> precedingTokenList = selectPreceding(aJCas, Token.class, wordAnnotation, 1);
        List<Token> followingTokenList = selectFollowing(aJCas, Token.class, wordAnnotation, 1);

        if (precedingTokenList.size() > 0) {
            Token precedingToken = precedingTokenList.get(0);
            int precedingWhitespaces = wordAnnotation.getBegin() - precedingToken.getEnd();
            for (int i = 0; i < precedingWhitespaces; i++) {
                preWhitespaces.append(" ");
            }
        }
        if (followingTokenList.size() > 0) {
            Token followingToken = followingTokenList.get(0);
            int trailingWhitespaces = followingToken.getBegin() - wordAnnotation.getEnd();
            for (int i = 0; i < trailingWhitespaces; i++) {
                trailWhitespaces.append(" ");
            }
        }

        // write whitespace information as CoreAnnotation.BeforeAnnotation and
        // CoreAnnotation.AfterAnnotation to the node add annotation to list and write back to
        // node label
        ((CoreLabel) wordNode.label()).set(CoreAnnotations.BeforeAnnotation.class, preWhitespaces.toString());
        ((CoreLabel) wordNode.label()).set(CoreAnnotations.AfterAnnotation.class, trailWhitespaces.toString());

        // get POS-annotation
        // get the token that is covered by the POS
        List<POS> coveredPos = JCasUtil.selectCovered(aJCas, POS.class, wordAnnotation);
        // the POS should only cover one token
        assert coveredPos.size() == 1;
        POS pos = coveredPos.get(0);

        // create POS-Node in the tree and attach word-node to it
        rootNode = tFact.newTreeNode(pos.getPosValue(), Arrays.asList((new Tree[] { wordNode })));
    }

    return rootNode;
}

From source file:de.tudarmstadt.ukp.dkpro.core.stanfordnlp.util.TreeUtils.java

License:Open Source License

/**
 * <p>/*www  .j ava  2  s.  c o m*/
 * Recreates a Stanford Tree from the StanfordParser annotations and saves all
 * non-StanfordParser-Annotations within the scope of the sentence in the label of the best
 * fitting node.
 * </p>
 * 
 * <p>
 * <strong>CAUTION: </strong><i>This method is intended for the use by CAS Multipliers, which
 * create new CASes from this tree. The annotation-spans in the source-CAS will be changed!!!!!!
 * You do NOT want to use the source CAS after this method has been called. The
 * createStanfordTree()-method does not change the CAS, so use this instead, if the annotations
 * do not have to be recovered or accessed in the tree.</i>
 * </p>
 * 
 * <p>
 * TODO: This behavior could be changed by making COPIES of the annotations and changing the
 * copied instead of the originals. However, in order to being able to make copies, a dummy CAS
 * must be introduced to which the annotations can be copied. When they are recovered, they will
 * be copied to the new destination CAS anyway.
 * </p>
 * 
 * @param root
 *            the ROOT annotation
 * @return an {@link Tree} object representing the syntax structure of the sentence
 * @throws CASException if the JCas cannot be accessed.
 */
public static Tree createStanfordTreeWithAnnotations(Annotation root) throws CASException {
    JCas aJCas = root.getCAS().getJCas();

    // Create tree
    Tree tree = createStanfordTree(root);

    // Get all non-parser related annotations
    // and all tokens (needed for span-calculations later on)
    List<Annotation> nonParserAnnotations = new ArrayList<Annotation>();
    List<Token> tokens = new ArrayList<Token>();

    // Using getCoveredAnnotations instead of iterate, because subiterators did not work in all
    // cases
    List<Annotation> annosWithinRoot = JCasUtil.selectCovered(aJCas, Annotation.class, root);

    for (Annotation curAnno : annosWithinRoot) {
        if (!(curAnno instanceof POS) && !(curAnno instanceof Constituent) && !(curAnno instanceof Dependency)
                && !(curAnno instanceof PennTree) && !(curAnno instanceof Lemma) && !(curAnno instanceof Token)
                && !(curAnno instanceof DocumentMetaData)) {
            nonParserAnnotations.add(curAnno);
        } else if (curAnno instanceof Token) {
            tokens.add((Token) curAnno);
        }

    }

    // create wrapper for tree and its tokens
    TreeWithTokens annoTree = new TreeWithTokens(tree, tokens);

    /*
     * Add annotations to the best-fitting nodes. The best-fitting node for an annotation is the
     * deepest node in the tree that still completely contains the annotation.
     */
    for (Annotation curAnno : nonParserAnnotations) {
        // get best fitting node
        Tree bestFittingNode = annoTree.getBestFit(curAnno);

        // Add annotation to node
        if (bestFittingNode != null) {

            // translate annotation span to a value relative to the
            // node-span
            IntPair span = annoTree.getSpan(bestFittingNode);
            curAnno.setBegin(curAnno.getBegin() - span.getSource());
            curAnno.setEnd(curAnno.getEnd() - span.getSource());

            // get the collection from the label of the best-fitting node in which we store UIMA
            // annotations or create it, if it does not exist
            Collection<Annotation> annotations = ((CoreLabel) bestFittingNode.label())
                    .get(UIMAAnnotations.class);
            if (annotations == null) {
                annotations = new ArrayList<Annotation>();
            }

            // add annotation + checksum of annotated text to list and write it back to node
            // label
            annotations.add(curAnno);

            ((CoreLabel) bestFittingNode.label()).set(UIMAAnnotations.class, annotations);
        }
    }

    return tree;
}

From source file:de.tudarmstadt.ukp.dkpro.core.stanfordnlp.util.TreeUtils.java

License:Open Source License

/**
 * Returns the sentence from its tree representation.
 * // ww  w.  j a  va  2s . co  m
 * @param t
 *            the tree representation of the sentence
 * @return the sentence
 */
public static String tree2Words(Tree t) {
    StringBuilder buffer = new StringBuilder();

    List<Tree> leaves = t.getLeaves();
    for (Tree leaf : leaves) {
        String word = ((CoreLabel) leaf.label()).get(CoreAnnotations.ValueAnnotation.class);

        // TODO maybe double check preceding whitespaces, because transformations could have
        // resulted in the situation that the trailing
        // whitespaces of out last tokens is not the same as the preceding whitespaces of out
        // current token BUT: This has also to be done in getTokenListFromTree(...)

        // now add the trailing whitespaces
        String trailingWhitespaces = ((CoreLabel) leaf.label()).get(CoreAnnotations.AfterAnnotation.class);
        // if no whitespace-info is available, insert a whitespace this may happen for nodes
        // inserted by TSurgeon operations
        if (trailingWhitespaces == null) {
            trailingWhitespaces = " ";
        }

        buffer.append(word).append(trailingWhitespaces);
    }

    return buffer.toString();
}

From source file:de.tudarmstadt.ukp.dkpro.core.stanfordnlp.util.TreeUtils.java

License:Open Source License

/**
 * Returns a list of Token annotations from a Tree-object
 * /*from  www  .  j  a  v a  2 s  .co m*/
 * @param aJCas
 *            a JCas.
 * @param t
 *            a tree.
 * @return the tokens.
 */
public static List<Token> getTokenListFromTree(JCas aJCas, Tree t) {
    List<Token> tokenList = new ArrayList<Token>();
    int index = 0;
    for (Tree leaf : t.getLeaves()) {

        String word = ((CoreLabel) leaf.label()).get(CoreAnnotations.ValueAnnotation.class);

        tokenList.add(new Token(aJCas, index, index + word.length()));

        // get trailing whitespaces to calculate next index
        String whiteSpaces = ((CoreLabel) leaf.label()).get(CoreAnnotations.AfterAnnotation.class);
        if (whiteSpaces == null) {
            whiteSpaces = " ";
        }

        index += word.length() + whiteSpaces.length();
    }
    return tokenList;
}

From source file:de.tudarmstadt.ukp.dkpro.core.stanfordnlp.util.TreeUtils.java

License:Open Source License

private static int reIndexLeaves(Tree t, int startIndex) {
    if (t.isLeaf()) {
        CoreLabel afl = (CoreLabel) t.label();
        afl.setIndex(startIndex);/*from   w ww.j  a va  2 s .  c o m*/
        startIndex++;
    } else {
        for (Tree child : t.children()) {
            startIndex = reIndexLeaves(child, startIndex);
        }
    }
    return startIndex;
}

From source file:de.tudarmstadt.ukp.dkpro.core.stanfordnlp.util.TreeWithTokens.java

License:Open Source License

public void setTree(Tree tree) {
    if (!(tree.label() instanceof CoreLabel)) {
        tree = tree.deepCopy(tree.treeFactory(), CoreLabel.factory());
    }/*from w w w . j  a v a 2 s .c o  m*/

    tree.indexLeaves();

    this.tree = tree;
}

From source file:edu.cmu.ark.AnalysisUtilities.java

License:Open Source License

public static String abbrevTree(Tree tree) {
    ArrayList<String> toks = new ArrayList<String>();
    for (Tree L : tree.getLeaves()) {
        toks.add(L.label().toString());
    }//w ww  . jav a  2 s.  co m
    return tree.label().toString() + "[" + StringUtils.join(toks, " ") + "]";
}