List of usage examples for edu.stanford.nlp.trees Tree getChildrenAsList
public List<Tree> getChildrenAsList()
From source file:de.tudarmstadt.ukp.dkpro.core.corenlp.internal.CoreNlp2DKPro.java
License:Open Source License
private static org.apache.uima.jcas.tcas.Annotation convertConstituentTreeNode(JCas aJCas, TreebankLanguagePack aTreebankLanguagePack, Tree aNode, org.apache.uima.jcas.tcas.Annotation aParentFS, boolean internStrings, MappingProvider constituentMappingProvider, List<CoreLabel> tokens) { // Get node label String nodeLabelValue = aNode.value(); // Extract syntactic function from node label String syntacticFunction = null; AbstractTreebankLanguagePack tlp = (AbstractTreebankLanguagePack) aTreebankLanguagePack; int gfIdx = nodeLabelValue.indexOf(tlp.getGfCharacter()); if (gfIdx > 0) { syntacticFunction = nodeLabelValue.substring(gfIdx + 1); nodeLabelValue = nodeLabelValue.substring(0, gfIdx); }//from w ww .ja v a 2 s .c o m // Check if node is a constituent node on sentence or phrase-level if (aNode.isPhrasal()) { Type constType = constituentMappingProvider.getTagType(nodeLabelValue); IntPair span = aNode.getSpan(); int begin = tokens.get(span.getSource()).get(CharacterOffsetBeginAnnotation.class); int end = tokens.get(span.getTarget()).get(CharacterOffsetEndAnnotation.class); Constituent constituent = (Constituent) aJCas.getCas().createAnnotation(constType, begin, end); constituent.setConstituentType(internStrings ? nodeLabelValue.intern() : nodeLabelValue); constituent.setSyntacticFunction( internStrings && syntacticFunction != null ? syntacticFunction.intern() : syntacticFunction); constituent.setParent(aParentFS); // Do we have any children? List<org.apache.uima.jcas.tcas.Annotation> childAnnotations = new ArrayList<>(); for (Tree child : aNode.getChildrenAsList()) { org.apache.uima.jcas.tcas.Annotation childAnnotation = convertConstituentTreeNode(aJCas, aTreebankLanguagePack, child, constituent, internStrings, constituentMappingProvider, tokens); if (childAnnotation != null) { childAnnotations.add(childAnnotation); } } // Now that we know how many children we have, link annotation of // current node with its children constituent.setChildren(FSCollectionFactory.createFSArray(aJCas, childAnnotations)); constituent.addToIndexes(); return constituent; } // Create parent link on token else if (aNode.isPreTerminal()) { // link token to its parent constituent List<Tree> children = aNode.getChildrenAsList(); assert children.size() == 1; Tree terminal = children.get(0); CoreLabel label = (CoreLabel) terminal.label(); Token token = label.get(TokenKey.class); token.setParent(aParentFS); return token; } else { throw new IllegalArgumentException("Node must be either phrasal nor pre-terminal"); } }
From source file:de.tudarmstadt.ukp.dkpro.core.stanfordnlp.util.StanfordAnnotator.java
License:Open Source License
/** * Creates linked constituent annotations + POS annotations * /*from w ww . j a va2s . c om*/ * @param aTreebankLanguagePack * the language pack. * @param aNode * the source tree * @param aParentFS * the parent annotation * @param aCreatePos * sets whether to create or not to create POS tags * @return the child-structure (needed for recursive call only) */ private Annotation createConstituentAnnotationFromTree(TreebankLanguagePack aTreebankLanguagePack, Tree aNode, Annotation aParentFS, boolean aCreatePos) { String nodeLabelValue = aNode.value(); String syntacticFunction = null; AbstractTreebankLanguagePack tlp = (AbstractTreebankLanguagePack) aTreebankLanguagePack; int gfIdx = nodeLabelValue.indexOf(tlp.getGfCharacter()); if (gfIdx > 0) { syntacticFunction = nodeLabelValue.substring(gfIdx + 1); nodeLabelValue = nodeLabelValue.substring(0, gfIdx); } // calculate span for the current subtree IntPair span = tokenTree.getSpan(aNode); // Check if the node has been marked by a TSurgeon operation. // If so, add a tag-annotation on the constituent if (nodeLabelValue.contains(TAG_SEPARATOR) && !nodeLabelValue.equals(TAG_SEPARATOR)) { int separatorIndex = nodeLabelValue.indexOf(TAG_SEPARATOR); String tag = nodeLabelValue.substring(0, separatorIndex); nodeLabelValue = nodeLabelValue.substring(separatorIndex + 1, nodeLabelValue.length()); createTagAnnotation(span.getSource(), span.getTarget(), tag); } // Check if node is a constituent node on sentence or phrase-level if (aNode.isPhrasal()) { // add annotation to annotation tree Constituent constituent = createConstituentAnnotation(span.getSource(), span.getTarget(), nodeLabelValue, syntacticFunction); // link to parent if (aParentFS != null) { constituent.setParent(aParentFS); } // Do we have any children? List<Annotation> childAnnotations = new ArrayList<Annotation>(); for (Tree child : aNode.getChildrenAsList()) { Annotation childAnnotation = createConstituentAnnotationFromTree(aTreebankLanguagePack, child, constituent, aCreatePos); if (childAnnotation != null) { childAnnotations.add(childAnnotation); } } // Now that we know how many children we have, link annotation of // current node with its children FSArray children = new FSArray(jCas, childAnnotations.size()); int curChildNum = 0; for (FeatureStructure child : childAnnotations) { children.set(curChildNum, child); curChildNum++; } constituent.setChildren(children); // write annotation for current node to index jCas.addFsToIndexes(constituent); return constituent; } // If the node is a word-level constituent node (== POS): // create parent link on token and (if not turned off) create POS tag else if (aNode.isPreTerminal()) { // create POS-annotation (annotation over the token) POS pos = createPOSAnnotation(span.getSource(), span.getTarget(), nodeLabelValue); // in any case: get the token that is covered by the POS // TODO how about multi word prepositions etc. (e.g. "such as") List<Token> coveredTokens = JCasUtil.selectCovered(jCas, Token.class, pos); // the POS should only cover one token assert coveredTokens.size() == 1; Token token = coveredTokens.get(0); // only add POS to index if we want POS-tagging if (aCreatePos) { jCas.addFsToIndexes(pos); token.setPos(pos); } // link token to its parent constituent if (aParentFS != null) { token.setParent(aParentFS); } return token; } else { throw new IllegalArgumentException("Node must be either phrasal nor pre-terminal"); } }
From source file:de.tudarmstadt.ukp.experiments.argumentation.convincingness.features.ProductionRulesFeature.java
License:Apache License
public static void extractProductionRulesRecursively(Tree tree, FrequencyDistribution<String> rules) { if (tree.getChildrenAsList().size() > 1) { String rule = tree.value() + "->"; for (Tree t : tree.getChildrenAsList()) { rule = rule + t.value() + ","; }/* w w w .j a v a2 s.co m*/ //System.out.println(rule); rules.addSample(rule, 1); } for (Tree t : tree.getChildrenAsList()) { extractProductionRulesRecursively(t, rules); } }
From source file:edu.cmu.ark.nlp.sent.SentenceSimplifier.java
License:Open Source License
/** * e.g., John and Mary like Bill. -> John LIKES Bill. Mary LIKES Bill. * John and I like Bill -> John LIKES Bill. I LIKE Bill. * John and I are old. -> I IS old. John IS old. *//*from ww w. ja v a2 s .co m*/ private void correctTense(Tree subject, Tree clause) { int tmpIndex; //correct verb tense when modifying subjects for (Tree uncle : clause.getChildrenAsList()) { String newVerbPOS = null; Tree verbPreterminal = null; boolean needToModifyVerb = false; //if the node is a subject (i.e., its uncle is a VP), then check //to see if its tense needs to be changed String headPOS = subject.headPreTerminal(this.hf).label().toString(); if (uncle.label().toString().equals("VP") && !headPOS.endsWith("S")) { verbPreterminal = uncle.headPreTerminal(this.hf); //original main verb was plural but the conjoined subject word is singular //e.g., John (and Mary) like Bill. -> John like Bill. if ((verbPreterminal.label().toString().equals("VB") || verbPreterminal.label().toString().equals("VBP"))) { //the parser confuses VBP with VB if (subject.yield().toString().equals("I") || subject.yield().toString().equals("you")) { newVerbPOS = "VBP"; } else { newVerbPOS = "VBZ"; } needToModifyVerb = true; } else if (verbPreterminal.label().toString().equals("VBD")) { newVerbPOS = "VBD"; needToModifyVerb = true; } } //if needed, change the tense of the verb if (needToModifyVerb) { String verbLemma = QuestionUtil.getLemma(verbPreterminal.getChild(0).label().toString(), verbPreterminal.label().toString()); String newVerb; //special cases if (verbLemma.equals("be") && newVerbPOS.equals("VBD")) { if (subject.label().toString().endsWith("S")) newVerb = "were"; else newVerb = "was"; } else if (verbLemma.equals("be") && subject.yield().toString().equals("I") && newVerbPOS.equals("VBP")) { newVerb = "am"; } else { //default newVerb = this.conjugator.getSurfaceForm(verbLemma, newVerbPOS); } tmpIndex = verbPreterminal.parent(uncle).objectIndexOf(verbPreterminal); Tree verbParent = verbPreterminal.parent(uncle); verbParent.removeChild(tmpIndex); verbParent.addChild(tmpIndex, QuestionUtil.readTreeFromString("(" + newVerbPOS + " " + newVerb + ")")); } } }
From source file:edu.cmu.ark.SentenceSimplifier.java
License:Open Source License
/** * e.g., John and Mary like Bill. -> John LIKES Bill. Mary LIKES Bill. * John and I like Bill -> John LIKES Bill. I LIKE Bill. * John and I are old. -> I IS old. John IS old. *//*from w w w . j a v a 2 s . c o m*/ private void correctTense(Tree subject, Tree clause) { int tmpIndex; //correct verb tense when modifying subjects for (Tree uncle : clause.getChildrenAsList()) { String newVerbPOS = null; Tree verbPreterminal = null; boolean needToModifyVerb = false; //if the node is a subject (i.e., its uncle is a VP), then check //to see if its tense needs to be changed String headPOS = subject.headPreTerminal(AnalysisUtilities.getInstance().getHeadFinder()).label() .toString(); if (uncle.label().toString().equals("VP") && !headPOS.endsWith("S")) { verbPreterminal = uncle.headPreTerminal(AnalysisUtilities.getInstance().getHeadFinder()); //original main verb was plural but the conjoined subject word is singular //e.g., John (and Mary) like Bill. -> John like Bill. if ((verbPreterminal.label().toString().equals("VB") || verbPreterminal.label().toString().equals("VBP"))) { //the parser confuses VBP with VB if (subject.yield().toString().equals("I") || subject.yield().toString().equals("you")) { newVerbPOS = "VBP"; } else { newVerbPOS = "VBZ"; } needToModifyVerb = true; } else if (verbPreterminal.label().toString().equals("VBD")) { newVerbPOS = "VBD"; needToModifyVerb = true; } } //if needed, change the tense of the verb if (needToModifyVerb) { String verbLemma = AnalysisUtilities.getInstance().getLemma( verbPreterminal.getChild(0).label().toString(), verbPreterminal.label().toString()); String newVerb; //special cases if (verbLemma.equals("be") && newVerbPOS.equals("VBD")) { if (subject.label().toString().endsWith("S")) newVerb = "were"; else newVerb = "was"; } else if (verbLemma.equals("be") && subject.yield().toString().equals("I") && newVerbPOS.equals("VBP")) { newVerb = "am"; } else { //default newVerb = AnalysisUtilities.getInstance().getSurfaceForm(verbLemma, newVerbPOS); } tmpIndex = verbPreterminal.parent(uncle).indexOf(verbPreterminal); Tree verbParent = verbPreterminal.parent(uncle); verbParent.removeChild(tmpIndex); verbParent.addChild(tmpIndex, AnalysisUtilities.getInstance().readTreeFromString("(" + newVerbPOS + " " + newVerb + ")")); } } }
From source file:edu.cornell.law.entitylinking.utils.Utility.java
public static List<String> getAllNounPhrases(String paragraph) { List<String> nounPhrases = new ArrayList<String>(); try {//from w w w . ja va 2 s .c om StringTokenizer tokenizer = new StringTokenizer(paragraph, "\\.;?:,"); while (tokenizer.hasMoreTokens()) { Annotation document = new Annotation(tokenizer.nextToken()); pipeline.annotate(document); Tree tree = null; List<CoreMap> sentences = document.get(SentencesAnnotation.class); for (CoreMap sentence : sentences) { // this is the parse tree of the current sentence tree = sentence.get(TreeAnnotation.class); for (Tree subtree : tree) { if ((subtree.label().value().equals("NP")) || (subtree.label().value().equals("WHNP"))) { String phraseString = Sentence.listToString(subtree.yieldWords()) .replace(" -LRB- ", "(").replace(" -RRB- ", ")"); String temp = phraseString.trim(); if (temp.startsWith("(?i)the")) temp = temp.replaceFirst("(?i)the ", ""); else if (temp.startsWith("(?i)a")) temp = temp.replaceFirst("(?i)a ", ""); else if (temp.startsWith("(?i)an")) temp = temp.replaceFirst("(?i)an ", ""); if (subtree.getChildrenAsList().contains(tree.label().value().equals("NN"))) { //System.out.println("PHRASE"); } if (temp.contains(" or ")) { String[] nptokens = temp.split(" or "); for (String s : nptokens) { nounPhrases.add(s); } } else { nounPhrases.add(temp); } } } } } } catch (OutOfMemoryError e) { System.out.println("Result too long to read into memory"); } return nounPhrases; }
From source file:edu.jhu.hlt.concrete.stanford.PreNERCoreMapWrapper.java
License:Open Source License
/** * * @param root/*from www .ja v a 2 s . c om*/ * @param left * @param right * @param n * is the length of the sentence is tokens. * @param p * @param tokenizationUUID * @return The constituent ID * @throws AnalyticException */ private static int constructConstituent(Tree root, int left, int right, int n, Parse p, UUID tokenizationUUID, HeadFinder hf) throws AnalyticException { Constituent constituent = new Constituent(); constituent.setId(p.getConstituentListSize()); constituent.setTag(root.value()); constituent.setStart(left); constituent.setEnding(right); p.addToConstituentList(constituent); Tree headTree = null; if (!root.isLeaf()) { try { headTree = hf.determineHead(root); } catch (java.lang.IllegalArgumentException iae) { LOGGER.warn("Failed to find head, falling back on rightmost constituent."); headTree = root.children()[root.numChildren() - 1]; } } int i = 0, headTreeIdx = -1; int leftPtr = left; for (Tree child : root.getChildrenAsList()) { int width = child.getLeaves().size(); int childId = constructConstituent(child, leftPtr, leftPtr + width, n, p, tokenizationUUID, hf); constituent.addToChildList(childId); leftPtr += width; if (headTree != null && child == headTree) { assert (headTreeIdx < 0); headTreeIdx = i; } i++; } if (headTreeIdx >= 0) constituent.setHeadChildIndex(headTreeIdx); if (!constituent.isSetChildList()) constituent.setChildList(new ArrayList<Integer>()); return constituent.getId(); }
From source file:elkfed.mmax.importer.DetermineMinSpan.java
License:Apache License
/** adds min_ids and min_span attributes so that * BART's chunk-based coref resolution works *//*from ww w .j ava 2s . c om*/ public static void addMinSpan(int start, Tree tree, IMarkable tag, List<String> tokens) { List<Tree> leaves = tree.getLeaves(); Tree startNode; Tree endNode; try { startNode = leaves.get(tag.getLeftmostDiscoursePosition() - start); endNode = leaves.get(tag.getRightmostDiscoursePosition() - start); if (".".equals(endNode.parent(tree).value())) { //System.err.println("Sentence-final dot in "+ // tokens.subList(tag.start, tag.end + 1)+ "removed."); endNode = leaves.get(tag.getRightmostDiscoursePosition() - start - 1); } } catch (IndexOutOfBoundsException ex) { System.out.format("indices not found: %d,%d in %s [wanted: %s] [ctx: %s]", tag.getLeftmostDiscoursePosition() - start, tag.getRightmostDiscoursePosition() - start, leaves, tokens.subList(tag.getLeftmostDiscoursePosition(), tag.getRightmostDiscoursePosition() + 1), tokens.subList(start, tag.getLeftmostDiscoursePosition())); throw ex; } Tree parentNode = startNode; while (parentNode != null && !parentNode.dominates(endNode)) { parentNode = parentNode.parent(tree); } if (parentNode == null) { System.err.println("Could not match tree (1)"); return; } if (startNode.leftCharEdge(tree) != parentNode.leftCharEdge(tree) || endNode.rightCharEdge(tree) != parentNode.rightCharEdge(tree)) { System.err.println("Could not match tree (2)"); return; } Tree oldParent = parentNode; ModCollinsHeadFinder hf = new ModCollinsHeadFinder(); // use the head finder to narrow down the span. // stop if (a) the head is no longer an NP or // (b) the NP is a conjunction go_up: while (true) { for (Tree t : parentNode.getChildrenAsList()) { if (t.value().equals("CC")) { break go_up; } } Tree headDtr = hf.determineHead(parentNode); if (headDtr == null || !headDtr.value().equals("NP")) { break; } parentNode = headDtr; } if (parentNode != oldParent) { List<Tree> newLeaves = parentNode.getLeaves(); int newStart = start + find_same(leaves, newLeaves.get(0)); int newEnd = newStart + newLeaves.size() - 1; if (newStart <= tag.getLeftmostDiscoursePosition()) { if (tag.getLeftmostDiscoursePosition() - newStart > 1) { System.err.println("NP node is too big:" + parentNode.toString() + " wanted:" + tokens .subList(tag.getLeftmostDiscoursePosition(), tag.getRightmostDiscoursePosition() + 1) + " in: " + tree); return; } for (int i = newStart - start; i < tag.getLeftmostDiscoursePosition() - start; i++) { System.err.println("additional prefix in syntax:" + leaves.get(i)); } // switch NP boundary and tag boundary // (even [Connie Cheung]) => min_words="Connie Cheung" int tmp = tag.getLeftmostDiscoursePosition(); tag.adjustSpan(newStart, tag.getRightmostDiscoursePosition()); newStart = tmp; } assert newEnd <= tag.getRightmostDiscoursePosition(); // this relies on MiniDiscourse's default word numbering // which is ugly but should generally work... if (newStart == newEnd) { tag.setAttributeValue("min_ids", "word_" + (newStart + 1)); } else { tag.setAttributeValue("min_ids", String.format("word_%d..word_%d", newStart + 1, newEnd + 1)); } StringBuffer buf = new StringBuffer(); for (Tree t : newLeaves) { buf.append(t.toString().toLowerCase()); buf.append(' '); } buf.setLength(buf.length() - 1); tag.setAttributeValue("min_words", buf.toString()); } }
From source file:gate.stanford.Parser.java
License:Open Source License
/** * Generate a SyntaxTreeNode Annotation corresponding to this Tree. Work * recursively so that the annotations are actually generated from the * bottom up, in order to build the consists list of annotation IDs. * //from w w w. j a va 2 s. c om * @param tree the current subtree * @param rootTree the whole sentence, used to find the span of the current subtree * @return a GATE Annotation of type "SyntaxTreeNode" */ protected Annotation annotatePhraseStructureRecursively(AnnotationSet annotationSet, StanfordSentence stanfordSentence, Tree tree, Tree rootTree) { Annotation annotation = null; Annotation child; String label = tree.value(); List<Tree> children = tree.getChildrenAsList(); if (children.size() == 0) { return null; } /* implied else */ /* following line generates ClassCastException * IntPair span = tree.getSpan(); * edu.stanford.nlp.ling.CategoryWordTag * at edu.stanford.nlp.trees.Tree.getSpan(Tree.java:393) * but I think it's a bug in the parser, so I'm hacking * around it as follows. */ int startPos = Trees.leftEdge(tree, rootTree); int endPos = Trees.rightEdge(tree, rootTree); Long startNode = stanfordSentence.startPos2offset(startPos); Long endNode = stanfordSentence.endPos2offset(endPos); List<Integer> consists = new ArrayList<Integer>(); Iterator<Tree> childIter = children.iterator(); while (childIter.hasNext()) { child = annotatePhraseStructureRecursively(annotationSet, stanfordSentence, childIter.next(), rootTree); if ((child != null) && (!child.getType().equals(inputTokenType))) { consists.add(child.getId()); } } annotation = annotatePhraseStructureConstituent(annotationSet, startNode, endNode, label, consists, tree.depth()); return annotation; }
From source file:ims.cs.qsample.features.components.SentenceConstituentFeatures.java
License:Open Source License
/** * Recursion step for tree featues/* w ww .j a va 2s.c o m*/ * @param sentence * @param t complete tree * @param level current level * @param governingLabels list of governing labels * @param parent information about direct parent * @param isLeftmost is the node the leftmost one in the constituent specified by ancestorWhereLeftmost * @param ancestorWhereLeftmost */ private static void addTreeFeatures(Sentence sentence, Tree t, int level, List<NodeFeatures> governingLabels, NodeFeatures parent, boolean isLeftmost, NodeFeatures ancestorWhereLeftmost) { if (t.isLeaf()) { /* terminal nodes */ // get the current token represented by this subtree Token pToken = sentence.treeLookup.get(t); // check if token is null. this can happen if the token was unaligned previously (e.g., because of // a parser error) if (pToken == null) { if (StaticConfig.verbose) System.err.println(sentence.sentenceId + " Dropping tree without associated token: " + t + " "); return; } FeatureSet fs = pToken.boundaryFeatureSet; // leftmost feature (see Pareti paper for description) if (StaticConfig.constituentLeftmost && isLeftmost) fs.add(LEFTMOST_FEATURE); // level in tree if (StaticConfig.constituentLevel) { fs.add(LEVEL_FEATURE + level); addLevelBinHeuristic(pToken, LEVEL_FEATURE, level); } // leftmost feature label if (StaticConfig.constituentAncestorL) { fs.add(AL_FEATURE + "LBL:" + ancestorWhereLeftmost.label); fs.add(AL_FEATURE + "LVL:" + ancestorWhereLeftmost.level); addLevelBinHeuristic(pToken, AL_FEATURE + "LVL", ancestorWhereLeftmost.level); } // parent in constituent tree if (StaticConfig.constituentParent) { fs.add(PARENT_FEATURE + "LBL:" + parent.label); } // labels of all ancestors if (StaticConfig.constituentGoverning) { /* "Ancestor" features in the paper */ for (NodeFeatures nf : governingLabels) { // label with and without depth fs.add(GOV_FEATURE + nf.label + "@" + nf.level); /* ambiguous in paper */ fs.add(GOV_FEATURE + nf.label); fs.add(GOV_FEATURE + nf.label + "@-" + (level - nf.level)); /* ambiguous in paper */ addLevelBinHeuristic(pToken, GOV_FEATURE + nf.label + "@", nf.level); addLevelBinHeuristic(pToken, GOV_FEATURE + nf.label + "@-", (level - nf.level)); } } } else { // non-terminal node List<Tree> childList = t.getChildrenAsList(); String label = t.label().toString(); // copy governing node features for next recursion step List<NodeFeatures> governingLabelsUpdate = new LinkedList<NodeFeatures>(governingLabels); governingLabelsUpdate.add(new NodeFeatures(label, level)); // set leftmost ancestor if (ancestorWhereLeftmost == null) { ancestorWhereLeftmost = new NodeFeatures(label, level); } // check for pre-terminals -- otherwise, set the leftmost flag for the first constituent if (childList.size() > 1) { isLeftmost = true; } // call function for all children for (Tree child : childList) { addTreeFeatures(sentence, child, level + 1, governingLabelsUpdate, new NodeFeatures(label, level), isLeftmost, ancestorWhereLeftmost); isLeftmost = false; ancestorWhereLeftmost = null; } } }