List of usage examples for edu.stanford.nlp.trees AbstractTreebankLanguagePack getGfCharacter
public char getGfCharacter()
From source file:de.tudarmstadt.ukp.dkpro.core.corenlp.internal.CoreNlp2DKPro.java
License:Open Source License
private static org.apache.uima.jcas.tcas.Annotation convertConstituentTreeNode(JCas aJCas, TreebankLanguagePack aTreebankLanguagePack, Tree aNode, org.apache.uima.jcas.tcas.Annotation aParentFS, boolean internStrings, MappingProvider constituentMappingProvider, List<CoreLabel> tokens) { // Get node label String nodeLabelValue = aNode.value(); // Extract syntactic function from node label String syntacticFunction = null; AbstractTreebankLanguagePack tlp = (AbstractTreebankLanguagePack) aTreebankLanguagePack; int gfIdx = nodeLabelValue.indexOf(tlp.getGfCharacter()); if (gfIdx > 0) { syntacticFunction = nodeLabelValue.substring(gfIdx + 1); nodeLabelValue = nodeLabelValue.substring(0, gfIdx); }//w ww. j a v a 2 s . com // Check if node is a constituent node on sentence or phrase-level if (aNode.isPhrasal()) { Type constType = constituentMappingProvider.getTagType(nodeLabelValue); IntPair span = aNode.getSpan(); int begin = tokens.get(span.getSource()).get(CharacterOffsetBeginAnnotation.class); int end = tokens.get(span.getTarget()).get(CharacterOffsetEndAnnotation.class); Constituent constituent = (Constituent) aJCas.getCas().createAnnotation(constType, begin, end); constituent.setConstituentType(internStrings ? nodeLabelValue.intern() : nodeLabelValue); constituent.setSyntacticFunction( internStrings && syntacticFunction != null ? syntacticFunction.intern() : syntacticFunction); constituent.setParent(aParentFS); // Do we have any children? List<org.apache.uima.jcas.tcas.Annotation> childAnnotations = new ArrayList<>(); for (Tree child : aNode.getChildrenAsList()) { org.apache.uima.jcas.tcas.Annotation childAnnotation = convertConstituentTreeNode(aJCas, aTreebankLanguagePack, child, constituent, internStrings, constituentMappingProvider, tokens); if (childAnnotation != null) { childAnnotations.add(childAnnotation); } } // Now that we know how many children we have, link annotation of // current node with its children constituent.setChildren(FSCollectionFactory.createFSArray(aJCas, childAnnotations)); constituent.addToIndexes(); return constituent; } // Create parent link on token else if (aNode.isPreTerminal()) { // link token to its parent constituent List<Tree> children = aNode.getChildrenAsList(); assert children.size() == 1; Tree terminal = children.get(0); CoreLabel label = (CoreLabel) terminal.label(); Token token = label.get(TokenKey.class); token.setParent(aParentFS); return token; } else { throw new IllegalArgumentException("Node must be either phrasal nor pre-terminal"); } }
From source file:de.tudarmstadt.ukp.dkpro.core.stanfordnlp.util.StanfordAnnotator.java
License:Open Source License
/** * Creates linked constituent annotations + POS annotations * /* w w w .j a v a 2s. co m*/ * @param aTreebankLanguagePack * the language pack. * @param aNode * the source tree * @param aParentFS * the parent annotation * @param aCreatePos * sets whether to create or not to create POS tags * @return the child-structure (needed for recursive call only) */ private Annotation createConstituentAnnotationFromTree(TreebankLanguagePack aTreebankLanguagePack, Tree aNode, Annotation aParentFS, boolean aCreatePos) { String nodeLabelValue = aNode.value(); String syntacticFunction = null; AbstractTreebankLanguagePack tlp = (AbstractTreebankLanguagePack) aTreebankLanguagePack; int gfIdx = nodeLabelValue.indexOf(tlp.getGfCharacter()); if (gfIdx > 0) { syntacticFunction = nodeLabelValue.substring(gfIdx + 1); nodeLabelValue = nodeLabelValue.substring(0, gfIdx); } // calculate span for the current subtree IntPair span = tokenTree.getSpan(aNode); // Check if the node has been marked by a TSurgeon operation. // If so, add a tag-annotation on the constituent if (nodeLabelValue.contains(TAG_SEPARATOR) && !nodeLabelValue.equals(TAG_SEPARATOR)) { int separatorIndex = nodeLabelValue.indexOf(TAG_SEPARATOR); String tag = nodeLabelValue.substring(0, separatorIndex); nodeLabelValue = nodeLabelValue.substring(separatorIndex + 1, nodeLabelValue.length()); createTagAnnotation(span.getSource(), span.getTarget(), tag); } // Check if node is a constituent node on sentence or phrase-level if (aNode.isPhrasal()) { // add annotation to annotation tree Constituent constituent = createConstituentAnnotation(span.getSource(), span.getTarget(), nodeLabelValue, syntacticFunction); // link to parent if (aParentFS != null) { constituent.setParent(aParentFS); } // Do we have any children? List<Annotation> childAnnotations = new ArrayList<Annotation>(); for (Tree child : aNode.getChildrenAsList()) { Annotation childAnnotation = createConstituentAnnotationFromTree(aTreebankLanguagePack, child, constituent, aCreatePos); if (childAnnotation != null) { childAnnotations.add(childAnnotation); } } // Now that we know how many children we have, link annotation of // current node with its children FSArray children = new FSArray(jCas, childAnnotations.size()); int curChildNum = 0; for (FeatureStructure child : childAnnotations) { children.set(curChildNum, child); curChildNum++; } constituent.setChildren(children); // write annotation for current node to index jCas.addFsToIndexes(constituent); return constituent; } // If the node is a word-level constituent node (== POS): // create parent link on token and (if not turned off) create POS tag else if (aNode.isPreTerminal()) { // create POS-annotation (annotation over the token) POS pos = createPOSAnnotation(span.getSource(), span.getTarget(), nodeLabelValue); // in any case: get the token that is covered by the POS // TODO how about multi word prepositions etc. (e.g. "such as") List<Token> coveredTokens = JCasUtil.selectCovered(jCas, Token.class, pos); // the POS should only cover one token assert coveredTokens.size() == 1; Token token = coveredTokens.get(0); // only add POS to index if we want POS-tagging if (aCreatePos) { jCas.addFsToIndexes(pos); token.setPos(pos); } // link token to its parent constituent if (aParentFS != null) { token.setParent(aParentFS); } return token; } else { throw new IllegalArgumentException("Node must be either phrasal nor pre-terminal"); } }