List of usage examples for edu.stanford.nlp.trees Tree isPreTerminal
public boolean isPreTerminal()
From source file:CollapseUnaryTransformer.java
License:Apache License
public Tree transformTree(Tree tree) { if (tree.isPreTerminal() || tree.isLeaf()) { return tree.deepCopy(); }/* w ww . j ava 2s . c om*/ Label label = tree.label().labelFactory().newLabel(tree.label()); Tree[] children = tree.children(); while (children.length == 1 && !children[0].isLeaf()) { children = children[0].children(); } List<Tree> processedChildren = Generics.newArrayList(); for (Tree child : children) { processedChildren.add(transformTree(child)); } return tree.treeFactory().newTreeNode(label, processedChildren); }
From source file:conditionalCFG.ConditionalCFGParser.java
License:Open Source License
public double validateBinarizedTree(Tree tree, int start) { if (tree.isLeaf()) { return 0.0; }/*from w w w. ja v a 2s . c om*/ float epsilon = 0.0001f; if (tree.isPreTerminal()) { String wordStr = tree.children()[0].label().value(); int tag = tagIndex.indexOf(tree.label().value()); int word = wordIndex.indexOf(wordStr); IntTaggedWord iTW = new IntTaggedWord(word, tag); float score = lex.score(iTW, start, wordStr, null); float bound = iScore[start][start + 1][stateIndex.indexOf(tree.label().value())]; if (score > bound + epsilon) { System.out.println("Invalid tagging:"); System.out.println(" Tag: " + tree.label().value()); System.out.println(" Word: " + tree.children()[0].label().value()); System.out.println(" Score: " + score); System.out.println(" Bound: " + bound); } return score; } int parent = stateIndex.indexOf(tree.label().value()); int firstChild = stateIndex.indexOf(tree.children()[0].label().value()); if (tree.numChildren() == 1) { UnaryRule ur = new UnaryRule(parent, firstChild); double score = SloppyMath.max(ug.scoreRule(ur), -10000.0) + validateBinarizedTree(tree.children()[0], start); double bound = iScore[start][start + tree.yield().size()][parent]; if (score > bound + epsilon) { System.out.println("Invalid unary:"); System.out.println(" Parent: " + tree.label().value()); System.out.println(" Child: " + tree.children()[0].label().value()); System.out.println(" Start: " + start); System.out.println(" End: " + (start + tree.yield().size())); System.out.println(" Score: " + score); System.out.println(" Bound: " + bound); } return score; } int secondChild = stateIndex.indexOf(tree.children()[1].label().value()); BinaryRule br = new BinaryRule(parent, firstChild, secondChild); double score = SloppyMath.max(bg.scoreRule(br), -10000.0) + validateBinarizedTree(tree.children()[0], start) + validateBinarizedTree(tree.children()[1], start + tree.children()[0].yield().size()); double bound = iScore[start][start + tree.yield().size()][parent]; if (score > bound + epsilon) { System.out.println("Invalid binary:"); System.out.println(" Parent: " + tree.label().value()); System.out.println(" LChild: " + tree.children()[0].label().value()); System.out.println(" RChild: " + tree.children()[1].label().value()); System.out.println(" Start: " + start); System.out.println(" End: " + (start + tree.yield().size())); System.out.println(" Score: " + score); System.out.println(" Bound: " + bound); } return score; }
From source file:conditionalCFG.ConditionalCFGParser.java
License:Open Source License
public double scoreBinarizedTree(Tree tree, int start, int debugLvl) { if (tree.isLeaf()) { return 0.0; }//from ww w . j a va 2s . c o m if (tree.isPreTerminal()) { String wordStr = tree.children()[0].label().value(); int tag = tagIndex.indexOf(tree.label().value()); int word = wordIndex.indexOf(wordStr); IntTaggedWord iTW = new IntTaggedWord(word, tag); // if (lex.score(iTW,(leftmost ? 0 : 1)) == Double.NEGATIVE_INFINITY) { // System.out.println("NO SCORE FOR: "+iTW); // } float score = lex.score(iTW, start, wordStr, null); tree.setScore(score); if (debugLvl > 0) System.out.println(score + " " + tree.getSpan()); return score; } int parent = stateIndex.indexOf(tree.label().value()); int firstChild = stateIndex.indexOf(tree.children()[0].label().value()); if (tree.numChildren() == 1) { UnaryRule ur = new UnaryRule(parent, firstChild); //+ DEBUG // if (ug.scoreRule(ur) < -10000) { // System.out.println("Grammar doesn't have rule: " + ur); // } // return SloppyMath.max(ug.scoreRule(ur), -10000.0) + scoreBinarizedTree(tree.children()[0], leftmost); double score = ug.scoreRule(ur) + scoreBinarizedTree(tree.children()[0], start, debugLvl) + lex.score(ur, start, start + tree.children()[0].yield().size()); tree.setScore(score); if (debugLvl > 0) System.out.println(score + " " + tree.getSpan()); return score; } int secondChild = stateIndex.indexOf(tree.children()[1].label().value()); BinaryRule br = new BinaryRule(parent, firstChild, secondChild); //+ DEBUG // if (bg.scoreRule(br) < -10000) { // System.out.println("Grammar doesn't have rule: " + br); // } // return SloppyMath.max(bg.scoreRule(br), -10000.0) + // scoreBinarizedTree(tree.children()[0], leftmost) + // scoreBinarizedTree(tree.children()[1], false); int sz0 = tree.children()[0].yield().size(); double score = bg.scoreRule(br) + scoreBinarizedTree(tree.children()[0], start, debugLvl) + scoreBinarizedTree(tree.children()[1], start + sz0, debugLvl) + lex.score(br, start, start + sz0 + tree.children()[1].yield().size(), start + sz0); tree.setScore(score); if (debugLvl > 0) System.out.println(score + " " + tree.getSpan() + " " + (sz0 + start)); return score; }
From source file:de.tudarmstadt.ukp.dkpro.core.corenlp.internal.CoreNlp2DKPro.java
License:Open Source License
private static org.apache.uima.jcas.tcas.Annotation convertConstituentTreeNode(JCas aJCas, TreebankLanguagePack aTreebankLanguagePack, Tree aNode, org.apache.uima.jcas.tcas.Annotation aParentFS, boolean internStrings, MappingProvider constituentMappingProvider, List<CoreLabel> tokens) { // Get node label String nodeLabelValue = aNode.value(); // Extract syntactic function from node label String syntacticFunction = null; AbstractTreebankLanguagePack tlp = (AbstractTreebankLanguagePack) aTreebankLanguagePack; int gfIdx = nodeLabelValue.indexOf(tlp.getGfCharacter()); if (gfIdx > 0) { syntacticFunction = nodeLabelValue.substring(gfIdx + 1); nodeLabelValue = nodeLabelValue.substring(0, gfIdx); }//from w ww . j a v a2 s . c o m // Check if node is a constituent node on sentence or phrase-level if (aNode.isPhrasal()) { Type constType = constituentMappingProvider.getTagType(nodeLabelValue); IntPair span = aNode.getSpan(); int begin = tokens.get(span.getSource()).get(CharacterOffsetBeginAnnotation.class); int end = tokens.get(span.getTarget()).get(CharacterOffsetEndAnnotation.class); Constituent constituent = (Constituent) aJCas.getCas().createAnnotation(constType, begin, end); constituent.setConstituentType(internStrings ? nodeLabelValue.intern() : nodeLabelValue); constituent.setSyntacticFunction( internStrings && syntacticFunction != null ? syntacticFunction.intern() : syntacticFunction); constituent.setParent(aParentFS); // Do we have any children? List<org.apache.uima.jcas.tcas.Annotation> childAnnotations = new ArrayList<>(); for (Tree child : aNode.getChildrenAsList()) { org.apache.uima.jcas.tcas.Annotation childAnnotation = convertConstituentTreeNode(aJCas, aTreebankLanguagePack, child, constituent, internStrings, constituentMappingProvider, tokens); if (childAnnotation != null) { childAnnotations.add(childAnnotation); } } // Now that we know how many children we have, link annotation of // current node with its children constituent.setChildren(FSCollectionFactory.createFSArray(aJCas, childAnnotations)); constituent.addToIndexes(); return constituent; } // Create parent link on token else if (aNode.isPreTerminal()) { // link token to its parent constituent List<Tree> children = aNode.getChildrenAsList(); assert children.size() == 1; Tree terminal = children.get(0); CoreLabel label = (CoreLabel) terminal.label(); Token token = label.get(TokenKey.class); token.setParent(aParentFS); return token; } else { throw new IllegalArgumentException("Node must be either phrasal nor pre-terminal"); } }
From source file:de.tudarmstadt.ukp.dkpro.core.stanfordnlp.util.StanfordAnnotator.java
License:Open Source License
/** * Creates linked constituent annotations + POS annotations * /*from w ww. ja v a 2s . com*/ * @param aTreebankLanguagePack * the language pack. * @param aNode * the source tree * @param aParentFS * the parent annotation * @param aCreatePos * sets whether to create or not to create POS tags * @return the child-structure (needed for recursive call only) */ private Annotation createConstituentAnnotationFromTree(TreebankLanguagePack aTreebankLanguagePack, Tree aNode, Annotation aParentFS, boolean aCreatePos) { String nodeLabelValue = aNode.value(); String syntacticFunction = null; AbstractTreebankLanguagePack tlp = (AbstractTreebankLanguagePack) aTreebankLanguagePack; int gfIdx = nodeLabelValue.indexOf(tlp.getGfCharacter()); if (gfIdx > 0) { syntacticFunction = nodeLabelValue.substring(gfIdx + 1); nodeLabelValue = nodeLabelValue.substring(0, gfIdx); } // calculate span for the current subtree IntPair span = tokenTree.getSpan(aNode); // Check if the node has been marked by a TSurgeon operation. // If so, add a tag-annotation on the constituent if (nodeLabelValue.contains(TAG_SEPARATOR) && !nodeLabelValue.equals(TAG_SEPARATOR)) { int separatorIndex = nodeLabelValue.indexOf(TAG_SEPARATOR); String tag = nodeLabelValue.substring(0, separatorIndex); nodeLabelValue = nodeLabelValue.substring(separatorIndex + 1, nodeLabelValue.length()); createTagAnnotation(span.getSource(), span.getTarget(), tag); } // Check if node is a constituent node on sentence or phrase-level if (aNode.isPhrasal()) { // add annotation to annotation tree Constituent constituent = createConstituentAnnotation(span.getSource(), span.getTarget(), nodeLabelValue, syntacticFunction); // link to parent if (aParentFS != null) { constituent.setParent(aParentFS); } // Do we have any children? List<Annotation> childAnnotations = new ArrayList<Annotation>(); for (Tree child : aNode.getChildrenAsList()) { Annotation childAnnotation = createConstituentAnnotationFromTree(aTreebankLanguagePack, child, constituent, aCreatePos); if (childAnnotation != null) { childAnnotations.add(childAnnotation); } } // Now that we know how many children we have, link annotation of // current node with its children FSArray children = new FSArray(jCas, childAnnotations.size()); int curChildNum = 0; for (FeatureStructure child : childAnnotations) { children.set(curChildNum, child); curChildNum++; } constituent.setChildren(children); // write annotation for current node to index jCas.addFsToIndexes(constituent); return constituent; } // If the node is a word-level constituent node (== POS): // create parent link on token and (if not turned off) create POS tag else if (aNode.isPreTerminal()) { // create POS-annotation (annotation over the token) POS pos = createPOSAnnotation(span.getSource(), span.getTarget(), nodeLabelValue); // in any case: get the token that is covered by the POS // TODO how about multi word prepositions etc. (e.g. "such as") List<Token> coveredTokens = JCasUtil.selectCovered(jCas, Token.class, pos); // the POS should only cover one token assert coveredTokens.size() == 1; Token token = coveredTokens.get(0); // only add POS to index if we want POS-tagging if (aCreatePos) { jCas.addFsToIndexes(pos); token.setPos(pos); } // link token to its parent constituent if (aParentFS != null) { token.setParent(aParentFS); } return token; } else { throw new IllegalArgumentException("Node must be either phrasal nor pre-terminal"); } }
From source file:elkfed.coref.discourse_entities.DiscourseEntity.java
License:Open Source License
/** * Reads the premodifiers from the input Mention and creates Property objects as attributes for every premodifier * not part of an embedded NE. Embedded NEs are to be treated as relations to other Discourse Entities. * @param np The NP to be processed//from ww w.j a v a 2 s . c om * @return Set A Set of Property Objects; one for every premodifier (attribute) */ private Set<Property> computeAttributes(Mention np) { LanguagePlugin lang_plugin = ConfigProperties.getInstance().getLanguagePlugin(); Set<Property> result = new LinkedHashSet<Property>(); List<Tree> preModifiers = np.getPremodifiers(); // straight from Mention //DEBUG //System.out.println("Number of premodifiers of "+np.getMarkableString()+" :"+ // preModifiers.size()); char pos = '\0'; if ((preModifiers != null) && (preModifiers.size() > 0)) { for (int i = 0; i < preModifiers.size(); i++) { Tree mod = preModifiers.get(i); // Expected structure: // (NP (DT the) (JJ last) (NN supper)) if (mod.isLeaf()) { // this shouldn't happen' System.out.println("WARNING: UNEXPECTED LEAF " + mod.nodeString()); //result.add(new Property(Property.ATTRIBUTE, mod.nodeString())); //result.add(new Property(Property.ATTRIBUTE, getSense(mod.nodeString()))); } else { NodeCategory ncat = lang_plugin.labelCat(mod.nodeString()); if (mod.isPreTerminal()) { if (ncat == NodeCategory.CN || ncat == NodeCategory.ADJ) { if (ncat == NodeCategory.CN) { pos = 'N'; } if (ncat == NodeCategory.ADJ) { pos = 'A'; } //System.out.println("Pre terminal node "+ mod.nodeString()); Tree wordNode = mod.firstChild(); _logger.fine("Adding attribute " + wordNode.nodeString() + " to entity"); result.add(new Property(Property.ATTRIBUTE, wordNode.nodeString(), pos)); } } } } } return result; }
From source file:elkfed.coref.discourse_entities.DiscourseEntity.java
License:Open Source License
private Set<Property> computeInitialRelations(Mention np) { LanguagePlugin lang_plugin = ConfigProperties.getInstance().getLanguagePlugin(); Set<Property> result = new LinkedHashSet<Property>(); List<Tree> postModifiers = np.getPostmodifiers(); // straight from Mention char pos = '\0'; //DEBUG// w w w. j a va 2s .c o m //System.out.println("Number of postmodifiers of "+np.getMarkableString()+" :"+ // postModifiers.size()); if ((postModifiers != null) && (postModifiers.size() > 0)) { for (int i = 0; i < postModifiers.size(); i++) { Tree mod = postModifiers.get(i); // Expected structure: // (NP (NN software) (PP from (NP India)) if (mod.isLeaf()) { // this shouldn't happen' System.out.println("WARNING: UNEXPECTED LEAF " + mod.nodeString()); //result.add(new Property(Property.ATTRIBUTE, mod.nodeString())); //result.add(new Property(Property.ATTRIBUTE, getSense(mod.nodeString()))); } else { if (mod.isPreTerminal()) { // this shouldn't happen either, // but we'll add it to the properties NodeCategory ncat = lang_plugin.labelCat(mod.nodeString()); if (ncat == NodeCategory.CN || ncat == NodeCategory.ADJ) { if (ncat == NodeCategory.CN) { pos = 'N'; } if (ncat == NodeCategory.ADJ) { pos = 'A'; } } } else { //System.out.println("Type of postmodifier: " + mod.nodeString()); NodeCategory ncat = lang_plugin.labelCat(mod.nodeString()); if (ncat == NodeCategory.PP) { if (mod.numChildren() == 2) { // (PP (in from) (NP (nnp India))) Tree prepNode = mod.getChild(0); Tree npNode = mod.getChild(1); Tree npHead = massimoHeadFindHack(npNode); if (npHead != null && prepNode != null) { //DEBUG //System.out.println("Adding relation "+ // prepNode.firstChild().nodeString()+" "+ // npHead.firstChild().nodeString() ); /* -- no clue what it means, just fixed so that it doesn't crash (Olga) -- */ if (prepNode.numChildren() > 0) prepNode = prepNode.firstChild(); result.add( new Property(prepNode.nodeString(), npHead.firstChild().nodeString())); } } } } } } //end outer loop } //end if premodified return result; }
From source file:opennlp.tools.parse_thicket.matching.PT2ThicketPhraseBuilder.java
License:Apache License
private void navigateR(Tree t, List<ParseTreeNode> sentence, List<List<ParseTreeNode>> phrases) { if (!t.isPreTerminal()) { if (t.label() != null) { if (t.value() != null) { // if ROOT or S, returns empty List<ParseTreeNode> nodes = parsePhrase(t.label().value(), t.toString()); nodes = assignIndexToNodes(nodes, sentence); if (!nodes.isEmpty()) phrases.add(nodes);/*from ww w . java 2s . c o m*/ if (nodes.size() > 0 && nodes.get(0).getId() == null) { if (nodes.size() > 1 && nodes.get(1) != null && nodes.get(1).getId() != null) { try { ParseTreeNode n = nodes.get(0); n.setId(nodes.get(1).getId() - 1); nodes.set(0, n); } catch (Exception e) { e.printStackTrace(); } } else { log.severe("Failed alignment:" + nodes); } } } } Tree[] kids = t.children(); if (kids != null) { for (Tree kid : kids) { navigateR(kid, sentence, phrases); } } return; } }
From source file:opennlp.tools.parse_thicket.matching.PT2ThicketPhraseBuilder.java
License:Apache License
private void navigateR1(Tree t, List<ParseTreeNode> sentence, int l, List<List<ParseTreeNode>> phrases) { if (t.isPreTerminal()) { if (t.label() != null) { List<ParseTreeNode> node = parsePhrase(t.toString()); if (!node.isEmpty()) phrases.add(node);/*from www . ja v a 2 s.c o m*/ } return; } else { if (t.label() != null) { if (t.value() != null) { List<ParseTreeNode> node = parsePhrase(t.label().value()); if (!node.isEmpty()) phrases.add(node); } } Tree[] kids = t.children(); if (kids != null) { for (Tree kid : kids) { navigateR1(kid, sentence, l, phrases); } } return; } }
From source file:opennlp.tools.parse_thicket.parse_thicket2graph.GraphFromPTreeBuilder.java
License:Apache License
private void navigate(Tree tree, Graph<ParseGraphNode, DefaultEdge> g, int l, ParseGraphNode currParent) { // String currParent = tree.label().value()+" $"+Integer.toString(l); // g.addVertex(currParent); if (tree.getChildrenAsList().size() == 1) navigate(tree.getChildrenAsList().get(0), g, l + 1, currParent); else if (tree.getChildrenAsList().size() == 0) return;/*from w ww . j a v a2 s.c o m*/ for (Tree child : tree.getChildrenAsList()) { String currChild = null; ParseGraphNode currChildNode = null; try { if (child.isLeaf()) continue; if (child.label().value().startsWith("S")) navigate(child.getChildrenAsList().get(0), g, l + 1, currParent); if (!child.isPhrasal() || child.isPreTerminal()) currChild = child.toString() + " #" + Integer.toString(l); else currChild = child.label().value() + " #" + Integer.toString(l); currChildNode = new ParseGraphNode(child, currChild); g.addVertex(currChildNode); g.addEdge(currParent, currChildNode); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } navigate(child, g, l + 1, currChildNode); } }