List of usage examples for edu.stanford.nlp.trees Tree label
@Override
public Label label()
From source file:de.tudarmstadt.ukp.dkpro.core.corenlp.internal.CoreNlp2DKPro.java
License:Open Source License
private static org.apache.uima.jcas.tcas.Annotation convertConstituentTreeNode(JCas aJCas, TreebankLanguagePack aTreebankLanguagePack, Tree aNode, org.apache.uima.jcas.tcas.Annotation aParentFS, boolean internStrings, MappingProvider constituentMappingProvider, List<CoreLabel> tokens) { // Get node label String nodeLabelValue = aNode.value(); // Extract syntactic function from node label String syntacticFunction = null; AbstractTreebankLanguagePack tlp = (AbstractTreebankLanguagePack) aTreebankLanguagePack; int gfIdx = nodeLabelValue.indexOf(tlp.getGfCharacter()); if (gfIdx > 0) { syntacticFunction = nodeLabelValue.substring(gfIdx + 1); nodeLabelValue = nodeLabelValue.substring(0, gfIdx); }//from w ww . j a v a2 s .co m // Check if node is a constituent node on sentence or phrase-level if (aNode.isPhrasal()) { Type constType = constituentMappingProvider.getTagType(nodeLabelValue); IntPair span = aNode.getSpan(); int begin = tokens.get(span.getSource()).get(CharacterOffsetBeginAnnotation.class); int end = tokens.get(span.getTarget()).get(CharacterOffsetEndAnnotation.class); Constituent constituent = (Constituent) aJCas.getCas().createAnnotation(constType, begin, end); constituent.setConstituentType(internStrings ? nodeLabelValue.intern() : nodeLabelValue); constituent.setSyntacticFunction( internStrings && syntacticFunction != null ? syntacticFunction.intern() : syntacticFunction); constituent.setParent(aParentFS); // Do we have any children? List<org.apache.uima.jcas.tcas.Annotation> childAnnotations = new ArrayList<>(); for (Tree child : aNode.getChildrenAsList()) { org.apache.uima.jcas.tcas.Annotation childAnnotation = convertConstituentTreeNode(aJCas, aTreebankLanguagePack, child, constituent, internStrings, constituentMappingProvider, tokens); if (childAnnotation != null) { childAnnotations.add(childAnnotation); } } // Now that we know how many children we have, link annotation of // current node with its children constituent.setChildren(FSCollectionFactory.createFSArray(aJCas, childAnnotations)); constituent.addToIndexes(); return constituent; } // Create parent link on token else if (aNode.isPreTerminal()) { // link token to its parent constituent List<Tree> children = aNode.getChildrenAsList(); assert children.size() == 1; Tree terminal = children.get(0); CoreLabel label = (CoreLabel) terminal.label(); Token token = label.get(TokenKey.class); token.setParent(aParentFS); return token; } else { throw new IllegalArgumentException("Node must be either phrasal nor pre-terminal"); } }
From source file:de.tudarmstadt.ukp.dkpro.core.corenlp.internal.DKPro2CoreNlp.java
License:Open Source License
public static Tree createStanfordTree(org.apache.uima.jcas.tcas.Annotation root, TreeFactory tFact, Map<Token, IndexedWord> aIdxTokens) { JCas aJCas;//from www .ja v a 2 s .c o m try { aJCas = root.getCAS().getJCas(); } catch (CASException e) { throw new IllegalStateException("Unable to get JCas from JCas wrapper"); } // define the new (root) node Tree rootNode; // before we can create a node, we must check if we have any children (we have to know // whether to create a node or a leaf - not very dynamic) if (root instanceof Constituent && !isLeaf((Constituent) root)) { Constituent node = (Constituent) root; List<Tree> childNodes = new ArrayList<Tree>(); // get childNodes from child annotations FSArray children = node.getChildren(); for (int i = 0; i < children.size(); i++) { childNodes.add(createStanfordTree(node.getChildren(i), tFact, aIdxTokens)); } // now create the node with its children rootNode = tFact.newTreeNode(node.getConstituentType(), childNodes); } else { // Handle leaf annotations // Leafs are always Token-annotations // We also have to insert a Preterminal node with the value of the // POS-Annotation on the token // because the POS is not directly stored within the treee Token wordAnnotation = (Token) root; // create leaf-node for the tree Tree wordNode; if (aIdxTokens != null) { wordNode = tFact.newLeaf(aIdxTokens.get(wordAnnotation)); } else { wordNode = tFact.newLeaf(wordAnnotation.getCoveredText()); } // create information about preceding and trailing whitespaces in the leaf node StringBuilder preWhitespaces = new StringBuilder(); StringBuilder trailWhitespaces = new StringBuilder(); List<Token> precedingTokenList = selectPreceding(aJCas, Token.class, wordAnnotation, 1); List<Token> followingTokenList = selectFollowing(aJCas, Token.class, wordAnnotation, 1); if (precedingTokenList.size() > 0) { Token precedingToken = precedingTokenList.get(0); int precedingWhitespaces = wordAnnotation.getBegin() - precedingToken.getEnd(); for (int i = 0; i < precedingWhitespaces; i++) { preWhitespaces.append(" "); } } if (followingTokenList.size() > 0) { Token followingToken = followingTokenList.get(0); int trailingWhitespaces = followingToken.getBegin() - wordAnnotation.getEnd(); for (int i = 0; i < trailingWhitespaces; i++) { trailWhitespaces.append(" "); } } // write whitespace information as CoreAnnotation.BeforeAnnotation and // CoreAnnotation.AfterAnnotation to the node add annotation to list and write back to // node label ((CoreLabel) wordNode.label()).set(CoreAnnotations.BeforeAnnotation.class, preWhitespaces.toString()); ((CoreLabel) wordNode.label()).set(CoreAnnotations.AfterAnnotation.class, trailWhitespaces.toString()); // get POS-annotation POS pos = wordAnnotation.getPos(); // create POS-Node in the tree and attach word-node to it rootNode = tFact.newTreeNode(pos.getPosValue(), Arrays.asList((new Tree[] { wordNode }))); } return rootNode; }
From source file:de.tudarmstadt.ukp.dkpro.core.stanfordnlp.util.StanfordAnnotator.java
License:Open Source License
/** * Recovers annotations from a Stanford Tree-Object, which have been saved within the CoreLabel * of the tree.//from www . j av a 2 s . co m *<p> * Note: * Copying has to be done in batch, because we need to have ALL annotations that should be * recovered together when copying them. The reason is that some annotations reference each * other, which can cause problem if a referenced annotation has not yet been recovered. */ public void recoverAnnotationsFromNodes() { // create batch-copy list for recovered annotations List<Annotation> annoList = new ArrayList<Annotation>(); Iterator<Tree> treeIterator = tokenTree.getTree().iterator(); CAS srcCAS = null; while (treeIterator.hasNext()) { Tree curTree = treeIterator.next(); // get the collection from the label of the best-fitting node in // which we store UIMA annotations Collection<Annotation> annotations = ((CoreLabel) curTree.label()).get(UIMAAnnotations.class); // do we have any annotations stored in the node? if (annotations != null && annotations.size() > 0) { // translate values which are now relative to the // node-span back to absolute value (depending on the // new offset of the node-span within the new CAS) IntPair span = tokenTree.getSpan(curTree); // iterate over all annotations for (Annotation curAnno : annotations) { srcCAS = srcCAS == null ? curAnno.getCAS() : srcCAS; // TODO using the SPAN as new annotation index might not // be correct in all cases - if not an EXACTLY MATCHING // node had been found for the saved annotation, this will // be wrong. Find a way to incorporate the anno-index here curAnno.setBegin(span.getSource()); curAnno.setEnd(span.getTarget()); // add anno to batch-copy list annoList.add(curAnno); } // endfor iterate over annotations } // endif check for annotations in node } // endwhile iterate over subtrees /* * Now that we have gathered all annotations from the tree, batch-copy them to the new CAS */ // create CasRecoverer (=adapted version of the CasCopier) CasCopier copier = new CasCopier(srcCAS, jCas.getCas()); // now batch-copy the annos List<Annotation> copiedAnnos = copier.batchCopyAnnotations(annoList); // add copied annos to indexes for (Annotation cAnno : copiedAnnos) { jCas.addFsToIndexes(cAnno); } }
From source file:de.tudarmstadt.ukp.dkpro.core.stanfordnlp.util.TreeUtils.java
License:Open Source License
public static Tree createStanfordTree(Annotation root, TreeFactory tFact) { JCas aJCas;/* w ww. ja va 2s. com*/ try { aJCas = root.getCAS().getJCas(); } catch (CASException e) { throw new IllegalStateException("Unable to get JCas from JCas wrapper"); } // define the new (root) node Tree rootNode; // before we can create a node, we must check if we have any children (we have to know // whether to create a node or a leaf - not very dynamic) if (root instanceof Constituent && !isLeaf((Constituent) root)) { Constituent node = (Constituent) root; List<Tree> childNodes = new ArrayList<Tree>(); // get childNodes from child annotations FSArray children = node.getChildren(); for (int i = 0; i < children.size(); i++) { childNodes.add(createStanfordTree(node.getChildren(i), tFact)); } // now create the node with its children rootNode = tFact.newTreeNode(node.getConstituentType(), childNodes); } else { // Handle leaf annotations // Leafs are always Token-annotations // We also have to insert a Preterminal node with the value of the // POS-Annotation on the token // because the POS is not directly stored within the treee Token wordAnnotation = (Token) root; // create leaf-node for the tree Tree wordNode = tFact.newLeaf(wordAnnotation.getCoveredText()); // create information about preceding and trailing whitespaces in the leaf node StringBuilder preWhitespaces = new StringBuilder(); StringBuilder trailWhitespaces = new StringBuilder(); List<Token> precedingTokenList = selectPreceding(aJCas, Token.class, wordAnnotation, 1); List<Token> followingTokenList = selectFollowing(aJCas, Token.class, wordAnnotation, 1); if (precedingTokenList.size() > 0) { Token precedingToken = precedingTokenList.get(0); int precedingWhitespaces = wordAnnotation.getBegin() - precedingToken.getEnd(); for (int i = 0; i < precedingWhitespaces; i++) { preWhitespaces.append(" "); } } if (followingTokenList.size() > 0) { Token followingToken = followingTokenList.get(0); int trailingWhitespaces = followingToken.getBegin() - wordAnnotation.getEnd(); for (int i = 0; i < trailingWhitespaces; i++) { trailWhitespaces.append(" "); } } // write whitespace information as CoreAnnotation.BeforeAnnotation and // CoreAnnotation.AfterAnnotation to the node add annotation to list and write back to // node label ((CoreLabel) wordNode.label()).set(CoreAnnotations.BeforeAnnotation.class, preWhitespaces.toString()); ((CoreLabel) wordNode.label()).set(CoreAnnotations.AfterAnnotation.class, trailWhitespaces.toString()); // get POS-annotation // get the token that is covered by the POS List<POS> coveredPos = JCasUtil.selectCovered(aJCas, POS.class, wordAnnotation); // the POS should only cover one token assert coveredPos.size() == 1; POS pos = coveredPos.get(0); // create POS-Node in the tree and attach word-node to it rootNode = tFact.newTreeNode(pos.getPosValue(), Arrays.asList((new Tree[] { wordNode }))); } return rootNode; }
From source file:de.tudarmstadt.ukp.dkpro.core.stanfordnlp.util.TreeUtils.java
License:Open Source License
/** * <p>/*www .j ava 2 s. c o m*/ * Recreates a Stanford Tree from the StanfordParser annotations and saves all * non-StanfordParser-Annotations within the scope of the sentence in the label of the best * fitting node. * </p> * * <p> * <strong>CAUTION: </strong><i>This method is intended for the use by CAS Multipliers, which * create new CASes from this tree. The annotation-spans in the source-CAS will be changed!!!!!! * You do NOT want to use the source CAS after this method has been called. The * createStanfordTree()-method does not change the CAS, so use this instead, if the annotations * do not have to be recovered or accessed in the tree.</i> * </p> * * <p> * TODO: This behavior could be changed by making COPIES of the annotations and changing the * copied instead of the originals. However, in order to being able to make copies, a dummy CAS * must be introduced to which the annotations can be copied. When they are recovered, they will * be copied to the new destination CAS anyway. * </p> * * @param root * the ROOT annotation * @return an {@link Tree} object representing the syntax structure of the sentence * @throws CASException if the JCas cannot be accessed. */ public static Tree createStanfordTreeWithAnnotations(Annotation root) throws CASException { JCas aJCas = root.getCAS().getJCas(); // Create tree Tree tree = createStanfordTree(root); // Get all non-parser related annotations // and all tokens (needed for span-calculations later on) List<Annotation> nonParserAnnotations = new ArrayList<Annotation>(); List<Token> tokens = new ArrayList<Token>(); // Using getCoveredAnnotations instead of iterate, because subiterators did not work in all // cases List<Annotation> annosWithinRoot = JCasUtil.selectCovered(aJCas, Annotation.class, root); for (Annotation curAnno : annosWithinRoot) { if (!(curAnno instanceof POS) && !(curAnno instanceof Constituent) && !(curAnno instanceof Dependency) && !(curAnno instanceof PennTree) && !(curAnno instanceof Lemma) && !(curAnno instanceof Token) && !(curAnno instanceof DocumentMetaData)) { nonParserAnnotations.add(curAnno); } else if (curAnno instanceof Token) { tokens.add((Token) curAnno); } } // create wrapper for tree and its tokens TreeWithTokens annoTree = new TreeWithTokens(tree, tokens); /* * Add annotations to the best-fitting nodes. The best-fitting node for an annotation is the * deepest node in the tree that still completely contains the annotation. */ for (Annotation curAnno : nonParserAnnotations) { // get best fitting node Tree bestFittingNode = annoTree.getBestFit(curAnno); // Add annotation to node if (bestFittingNode != null) { // translate annotation span to a value relative to the // node-span IntPair span = annoTree.getSpan(bestFittingNode); curAnno.setBegin(curAnno.getBegin() - span.getSource()); curAnno.setEnd(curAnno.getEnd() - span.getSource()); // get the collection from the label of the best-fitting node in which we store UIMA // annotations or create it, if it does not exist Collection<Annotation> annotations = ((CoreLabel) bestFittingNode.label()) .get(UIMAAnnotations.class); if (annotations == null) { annotations = new ArrayList<Annotation>(); } // add annotation + checksum of annotated text to list and write it back to node // label annotations.add(curAnno); ((CoreLabel) bestFittingNode.label()).set(UIMAAnnotations.class, annotations); } } return tree; }
From source file:de.tudarmstadt.ukp.dkpro.core.stanfordnlp.util.TreeUtils.java
License:Open Source License
/** * Returns the sentence from its tree representation. * // ww w. j a va 2s . co m * @param t * the tree representation of the sentence * @return the sentence */ public static String tree2Words(Tree t) { StringBuilder buffer = new StringBuilder(); List<Tree> leaves = t.getLeaves(); for (Tree leaf : leaves) { String word = ((CoreLabel) leaf.label()).get(CoreAnnotations.ValueAnnotation.class); // TODO maybe double check preceding whitespaces, because transformations could have // resulted in the situation that the trailing // whitespaces of out last tokens is not the same as the preceding whitespaces of out // current token BUT: This has also to be done in getTokenListFromTree(...) // now add the trailing whitespaces String trailingWhitespaces = ((CoreLabel) leaf.label()).get(CoreAnnotations.AfterAnnotation.class); // if no whitespace-info is available, insert a whitespace this may happen for nodes // inserted by TSurgeon operations if (trailingWhitespaces == null) { trailingWhitespaces = " "; } buffer.append(word).append(trailingWhitespaces); } return buffer.toString(); }
From source file:de.tudarmstadt.ukp.dkpro.core.stanfordnlp.util.TreeUtils.java
License:Open Source License
/** * Returns a list of Token annotations from a Tree-object * /*from www . j a v a 2 s .co m*/ * @param aJCas * a JCas. * @param t * a tree. * @return the tokens. */ public static List<Token> getTokenListFromTree(JCas aJCas, Tree t) { List<Token> tokenList = new ArrayList<Token>(); int index = 0; for (Tree leaf : t.getLeaves()) { String word = ((CoreLabel) leaf.label()).get(CoreAnnotations.ValueAnnotation.class); tokenList.add(new Token(aJCas, index, index + word.length())); // get trailing whitespaces to calculate next index String whiteSpaces = ((CoreLabel) leaf.label()).get(CoreAnnotations.AfterAnnotation.class); if (whiteSpaces == null) { whiteSpaces = " "; } index += word.length() + whiteSpaces.length(); } return tokenList; }
From source file:de.tudarmstadt.ukp.dkpro.core.stanfordnlp.util.TreeUtils.java
License:Open Source License
private static int reIndexLeaves(Tree t, int startIndex) { if (t.isLeaf()) { CoreLabel afl = (CoreLabel) t.label(); afl.setIndex(startIndex);/*from w ww.j a va 2 s . c o m*/ startIndex++; } else { for (Tree child : t.children()) { startIndex = reIndexLeaves(child, startIndex); } } return startIndex; }
From source file:de.tudarmstadt.ukp.dkpro.core.stanfordnlp.util.TreeWithTokens.java
License:Open Source License
public void setTree(Tree tree) { if (!(tree.label() instanceof CoreLabel)) { tree = tree.deepCopy(tree.treeFactory(), CoreLabel.factory()); }/*from w w w . j a v a 2 s .c o m*/ tree.indexLeaves(); this.tree = tree; }
From source file:edu.cmu.ark.AnalysisUtilities.java
License:Open Source License
public static String abbrevTree(Tree tree) { ArrayList<String> toks = new ArrayList<String>(); for (Tree L : tree.getLeaves()) { toks.add(L.label().toString()); }//w ww . jav a 2 s. co m return tree.label().toString() + "[" + StringUtils.join(toks, " ") + "]"; }