List of usage examples for edu.stanford.nlp.trees Tree isPhrasal
public boolean isPhrasal()
From source file:de.tudarmstadt.ukp.dkpro.core.corenlp.internal.CoreNlp2DKPro.java
License:Open Source License
private static org.apache.uima.jcas.tcas.Annotation convertConstituentTreeNode(JCas aJCas, TreebankLanguagePack aTreebankLanguagePack, Tree aNode, org.apache.uima.jcas.tcas.Annotation aParentFS, boolean internStrings, MappingProvider constituentMappingProvider, List<CoreLabel> tokens) { // Get node label String nodeLabelValue = aNode.value(); // Extract syntactic function from node label String syntacticFunction = null; AbstractTreebankLanguagePack tlp = (AbstractTreebankLanguagePack) aTreebankLanguagePack; int gfIdx = nodeLabelValue.indexOf(tlp.getGfCharacter()); if (gfIdx > 0) { syntacticFunction = nodeLabelValue.substring(gfIdx + 1); nodeLabelValue = nodeLabelValue.substring(0, gfIdx); }/*from w w w .j av a 2 s. com*/ // Check if node is a constituent node on sentence or phrase-level if (aNode.isPhrasal()) { Type constType = constituentMappingProvider.getTagType(nodeLabelValue); IntPair span = aNode.getSpan(); int begin = tokens.get(span.getSource()).get(CharacterOffsetBeginAnnotation.class); int end = tokens.get(span.getTarget()).get(CharacterOffsetEndAnnotation.class); Constituent constituent = (Constituent) aJCas.getCas().createAnnotation(constType, begin, end); constituent.setConstituentType(internStrings ? nodeLabelValue.intern() : nodeLabelValue); constituent.setSyntacticFunction( internStrings && syntacticFunction != null ? syntacticFunction.intern() : syntacticFunction); constituent.setParent(aParentFS); // Do we have any children? List<org.apache.uima.jcas.tcas.Annotation> childAnnotations = new ArrayList<>(); for (Tree child : aNode.getChildrenAsList()) { org.apache.uima.jcas.tcas.Annotation childAnnotation = convertConstituentTreeNode(aJCas, aTreebankLanguagePack, child, constituent, internStrings, constituentMappingProvider, tokens); if (childAnnotation != null) { childAnnotations.add(childAnnotation); } } // Now that we know how many children we have, link annotation of // current node with its children constituent.setChildren(FSCollectionFactory.createFSArray(aJCas, childAnnotations)); constituent.addToIndexes(); return constituent; } // Create parent link on token else if (aNode.isPreTerminal()) { // link token to its parent constituent List<Tree> children = aNode.getChildrenAsList(); assert children.size() == 1; Tree terminal = children.get(0); CoreLabel label = (CoreLabel) terminal.label(); Token token = label.get(TokenKey.class); token.setParent(aParentFS); return token; } else { throw new IllegalArgumentException("Node must be either phrasal nor pre-terminal"); } }
From source file:de.tudarmstadt.ukp.dkpro.core.stanfordnlp.util.StanfordAnnotator.java
License:Open Source License
/** * Creates linked constituent annotations + POS annotations * /*from w w w . j a va 2 s . co m*/ * @param aTreebankLanguagePack * the language pack. * @param aNode * the source tree * @param aParentFS * the parent annotation * @param aCreatePos * sets whether to create or not to create POS tags * @return the child-structure (needed for recursive call only) */ private Annotation createConstituentAnnotationFromTree(TreebankLanguagePack aTreebankLanguagePack, Tree aNode, Annotation aParentFS, boolean aCreatePos) { String nodeLabelValue = aNode.value(); String syntacticFunction = null; AbstractTreebankLanguagePack tlp = (AbstractTreebankLanguagePack) aTreebankLanguagePack; int gfIdx = nodeLabelValue.indexOf(tlp.getGfCharacter()); if (gfIdx > 0) { syntacticFunction = nodeLabelValue.substring(gfIdx + 1); nodeLabelValue = nodeLabelValue.substring(0, gfIdx); } // calculate span for the current subtree IntPair span = tokenTree.getSpan(aNode); // Check if the node has been marked by a TSurgeon operation. // If so, add a tag-annotation on the constituent if (nodeLabelValue.contains(TAG_SEPARATOR) && !nodeLabelValue.equals(TAG_SEPARATOR)) { int separatorIndex = nodeLabelValue.indexOf(TAG_SEPARATOR); String tag = nodeLabelValue.substring(0, separatorIndex); nodeLabelValue = nodeLabelValue.substring(separatorIndex + 1, nodeLabelValue.length()); createTagAnnotation(span.getSource(), span.getTarget(), tag); } // Check if node is a constituent node on sentence or phrase-level if (aNode.isPhrasal()) { // add annotation to annotation tree Constituent constituent = createConstituentAnnotation(span.getSource(), span.getTarget(), nodeLabelValue, syntacticFunction); // link to parent if (aParentFS != null) { constituent.setParent(aParentFS); } // Do we have any children? List<Annotation> childAnnotations = new ArrayList<Annotation>(); for (Tree child : aNode.getChildrenAsList()) { Annotation childAnnotation = createConstituentAnnotationFromTree(aTreebankLanguagePack, child, constituent, aCreatePos); if (childAnnotation != null) { childAnnotations.add(childAnnotation); } } // Now that we know how many children we have, link annotation of // current node with its children FSArray children = new FSArray(jCas, childAnnotations.size()); int curChildNum = 0; for (FeatureStructure child : childAnnotations) { children.set(curChildNum, child); curChildNum++; } constituent.setChildren(children); // write annotation for current node to index jCas.addFsToIndexes(constituent); return constituent; } // If the node is a word-level constituent node (== POS): // create parent link on token and (if not turned off) create POS tag else if (aNode.isPreTerminal()) { // create POS-annotation (annotation over the token) POS pos = createPOSAnnotation(span.getSource(), span.getTarget(), nodeLabelValue); // in any case: get the token that is covered by the POS // TODO how about multi word prepositions etc. (e.g. "such as") List<Token> coveredTokens = JCasUtil.selectCovered(jCas, Token.class, pos); // the POS should only cover one token assert coveredTokens.size() == 1; Token token = coveredTokens.get(0); // only add POS to index if we want POS-tagging if (aCreatePos) { jCas.addFsToIndexes(pos); token.setPos(pos); } // link token to its parent constituent if (aParentFS != null) { token.setParent(aParentFS); } return token; } else { throw new IllegalArgumentException("Node must be either phrasal nor pre-terminal"); } }
From source file:opennlp.tools.parse_thicket.parse_thicket2graph.GraphFromPTreeBuilder.java
License:Apache License
private void navigate(Tree tree, Graph<ParseGraphNode, DefaultEdge> g, int l, ParseGraphNode currParent) { // String currParent = tree.label().value()+" $"+Integer.toString(l); // g.addVertex(currParent); if (tree.getChildrenAsList().size() == 1) navigate(tree.getChildrenAsList().get(0), g, l + 1, currParent); else if (tree.getChildrenAsList().size() == 0) return;//from www. ja va 2 s.c o m for (Tree child : tree.getChildrenAsList()) { String currChild = null; ParseGraphNode currChildNode = null; try { if (child.isLeaf()) continue; if (child.label().value().startsWith("S")) navigate(child.getChildrenAsList().get(0), g, l + 1, currParent); if (!child.isPhrasal() || child.isPreTerminal()) currChild = child.toString() + " #" + Integer.toString(l); else currChild = child.label().value() + " #" + Integer.toString(l); currChildNode = new ParseGraphNode(child, currChild); g.addVertex(currChildNode); g.addEdge(currParent, currChildNode); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } navigate(child, g, l + 1, currChildNode); } }
From source file:pltag.parser.semantics.discriminative.ExtractFeatures.java
License:Open Source License
/** * Identify the list of rightmost non-terminals that span a complete subtree, i.e., one that * a) the leaf of its' rightmost child is a word, OR * b) the index of the leaf of its' rightmost is a word AND is the last in the yield (AND this leaf is the last word - optional, as this condition breeches incrementality). * @param analysisTree/*from w w w. j av a2 s. c om*/ * @return */ private List<Tree> getListOfRightMostCompleteNonTerminals(Tree tree) { List<Tree> list = new ArrayList(); List<Tree> leaves = tree.getLeaves(); // check if the last leaf is a word. Tree currentWord = leaves.get(leaves.size() - 1); if (currentWord.nodeString().endsWith("<>")) { Tree parent = currentWord.parent(tree); while (parent != tree) { if (parent.isPhrasal()) { list.add(parent); } parent = parent.parent(tree); } list.add(tree); } return list; }
From source file:reactivetechnologies.sentigrade.engine.nlp.SentimentAnalyzer.java
License:Apache License
private static void print(Tree tree) { if (LOG.isDebugEnabled()) { Tree t = tree.deepCopy(); //setSentimentLabels(t); LOG.debug("leaf?" + t.isLeaf() + " phrrasal?" + t.isPhrasal() + " preterminal?" + t.isPreTerminal() + " class:" + RNNCoreAnnotations.getPredictedClass(t)); LOG.debug("" + t + ""); }//from w w w . j a va 2 s. c o m }
From source file:reactivetechnologies.sentigrade.engine.nlp.SentimentAnalyzer.java
License:Apache License
private SentimentVector calculatePOSScores(Tree parse) { SentimentVector vector = new SentimentVector(); if (parse.isPhrasal()) { // TregexPattern pattern = TregexPattern.compile("@NP");//noun // TregexPattern pattern = TregexPattern.compile("@VP"); // TregexPattern pattern = TregexPattern.compile("@JJS");//adjective // superlative // TregexPattern pattern = TregexPattern.compile("@CD");//numeric TregexPattern pattern = TregexPattern.compile("@ADJP"); TregexMatcher matcher = pattern.matcher(parse); while (matcher.find()) { Tree match = matcher.getMatch(); vector.adjScore += calcPOSAdj(match.deepCopy()); vector.advScore += calcPOSAdv(match.deepCopy()); vector.nounScore += calcPOSNoun(match.deepCopy()); vector.verbScore += calcPOSVerb(match.deepCopy()); }//from www .ja va 2 s .c om pattern = TregexPattern.compile("@VP"); matcher = pattern.matcher(parse); while (matcher.find()) { Tree match = matcher.getMatch(); vector.adjScore += calcPOSAdj(match.deepCopy()); vector.nounScore += calcPOSNoun(match.deepCopy()); vector.verbScore += calcPOSVerb(match.deepCopy()); vector.advScore += calcPOSAdv(match.deepCopy()); } } return vector; }