List of usage examples for edu.stanford.nlp.trees Tree value
@Override
public String value()
From source file:edu.jhu.hlt.concrete.stanford.PreNERCoreMapWrapper.java
License:Open Source License
/** * * @param root//w w w . ja v a 2 s .com * @param left * @param right * @param n * is the length of the sentence is tokens. * @param p * @param tokenizationUUID * @return The constituent ID * @throws AnalyticException */ private static int constructConstituent(Tree root, int left, int right, int n, Parse p, UUID tokenizationUUID, HeadFinder hf) throws AnalyticException { Constituent constituent = new Constituent(); constituent.setId(p.getConstituentListSize()); constituent.setTag(root.value()); constituent.setStart(left); constituent.setEnding(right); p.addToConstituentList(constituent); Tree headTree = null; if (!root.isLeaf()) { try { headTree = hf.determineHead(root); } catch (java.lang.IllegalArgumentException iae) { LOGGER.warn("Failed to find head, falling back on rightmost constituent."); headTree = root.children()[root.numChildren() - 1]; } } int i = 0, headTreeIdx = -1; int leftPtr = left; for (Tree child : root.getChildrenAsList()) { int width = child.getLeaves().size(); int childId = constructConstituent(child, leftPtr, leftPtr + width, n, p, tokenizationUUID, hf); constituent.addToChildList(childId); leftPtr += width; if (headTree != null && child == headTree) { assert (headTreeIdx < 0); headTreeIdx = i; } i++; } if (headTreeIdx >= 0) constituent.setHeadChildIndex(headTreeIdx); if (!constituent.isSetChildList()) constituent.setChildList(new ArrayList<Integer>()); return constituent.getId(); }
From source file:edu.ucla.cs.scai.qa.questionclassifier.SyntacticTreeNode.java
public SyntacticTreeNode(Tree t, ArrayList<CoreLabel> tokens, SyntacticTreeNode parent) throws Exception { this.parent = parent; value = t.value(); if (t.isLeaf()) { CoreLabel c = tokens.remove(0);/*from w w w .jav a2 s . c om*/ begin = c.beginPosition(); end = c.endPosition(); if (c == null) { throw new Exception("Mapping between TreeNode and CoreLabel not found"); } else { lemma = c.lemma(); ner = c.ner(); //System.out.println(value + " -> " + c.value()); if (!value.equals(c.value())) { throw new Exception("Different words have been matched!"); } } } else { boolean hasNPchildren = false; boolean hasWHNPchildren = false; boolean hasQPchildren = false; begin = Integer.MAX_VALUE; end = Integer.MIN_VALUE; for (Tree c : t.children()) { SyntacticTreeNode child = new SyntacticTreeNode(c, tokens, this); children.add(child); if (child.value.equals("NP")) { hasNPchildren = true; } else if (child.value.equals("QP")) { hasQPchildren = true; } else if (child.value.equals("WHNP")) { hasWHNPchildren = true; } begin = Math.min(begin, child.begin); end = Math.max(end, child.end); } if (value.equals("NP")) { if (hasNPchildren) { npCompound = true; } else if (hasQPchildren) { npQp = true; } else { npSimple = true; } } else if (value.equals("WHNP")) { //can a WHNP node have QP children? if (hasNPchildren || hasWHNPchildren) { whnpCompound = true; } else if (!hasQPchildren) { whnpSimple = true; } } } }
From source file:elkfed.coref.features.pairs.FE_CCommand.java
License:Apache License
public static Boolean getCCommand(PairInstance inst) { // should be in the same sentence if (inst.getAnaphor().getSentId() != inst.getAntecedent().getSentId()) return false; //Ana should not be reflexive or reciprocal pronoun if (inst.getAnaphor().getReflPronoun()) return false; // should have not-null maxnp-trees (otherwise -- problematic mentions) Tree sentenceTree = inst.getAnaphor().getSentenceTree(); Tree AnaTree = inst.getAnaphor().getMaxNPParseTree(); Tree AnteTree = inst.getAntecedent().getMaxNPParseTree(); if (sentenceTree == null) return false; if (AnaTree == null) return false; if (AnteTree == null) return false; // should not dominate each other if (AnaTree.dominates(AnteTree)) return false; if (AnteTree.dominates(AnaTree)) return false; //the first branching node for ante should dominate ana (but not via S-node) AnteTree = AnteTree.parent(sentenceTree); while (AnteTree != null) { if (AnteTree.children().length > 1) { if (!AnteTree.dominates(AnaTree)) return false; while (AnaTree != null && AnaTree != AnteTree) { if (AnaTree.value().toLowerCase().startsWith("s")) return false; AnaTree = AnaTree.parent(sentenceTree); }//w w w. j ava 2 s .c om return true; } AnteTree = AnteTree.parent(sentenceTree); } return false; }
From source file:elkfed.coref.features.pairs.FE_Copula.java
License:Apache License
public static Boolean getCopula(PairInstance inst) { // should be in the same sentence if (inst.getAnaphor().getSentId() != inst.getAntecedent().getSentId()) { return false; }//w ww. j ava 2 s . c om // should have not-null maxnp-trees (otherwise -- problematic mentions) Tree sentenceTree = inst.getAnaphor().getSentenceTree(); Tree AnaTree = inst.getAnaphor().getMaxNPParseTree(); Tree AnteTree = inst.getAntecedent().getMaxNPParseTree(); if (sentenceTree == null) { return false; } if (AnaTree == null) { return false; } if (AnteTree == null) { return false; } // exclude "there is .." (ToDo: exclude other expletives!) if (inst.getAntecedent().getMarkableString().toLowerCase().matches(NONREF_NP)) { return false; } //exclude date and time if (inst.getAnaphor().getSemanticClass() == SemanticClass.TIME) { return false; } if (inst.getAnaphor().getSemanticClass() == SemanticClass.DATE) { return false; } if (inst.getAntecedent().getSemanticClass() == SemanticClass.TIME) { return false; } if (inst.getAntecedent().getSemanticClass() == SemanticClass.DATE) { return false; } // should be subj-obj of the same verb Tree vp = AnaTree.parent(sentenceTree); if (vp == null) { return false; } if (!vp.value().equalsIgnoreCase("vp")) { return false; } while (vp.parent(sentenceTree) != null && vp.parent(sentenceTree).value().equalsIgnoreCase("vp")) { vp = vp.parent(sentenceTree); } if (vp.parent(sentenceTree) == null) { return false; } Boolean foundante = false; Tree[] chldsup = vp.parent(sentenceTree).children(); for (int i = 0; i < chldsup.length; i++) { if (chldsup[i] == AnteTree) { foundante = true; } if (chldsup[i] == vp && foundante == false) { return false; } } vp = AnaTree.parent(sentenceTree); // we do not want to go higher here -- "S is *ing O" fires otherwise // should not contain a modal verb Tree[] chlds = vp.children(); for (int i = 0; i < chlds.length; i++) { if (chlds[i].value().equalsIgnoreCase("rb")) { return false; } if (chlds[i].value().equalsIgnoreCase("md") && chlds[i].getLeaves().get(0).value().toLowerCase().matches(MODAL_VERB)) { return false; } } // the verb should be one of the copula verbs for (int i = 0; i < chlds.length; i++) { if (chlds[i].value().equalsIgnoreCase("vbd") || chlds[i].value().equalsIgnoreCase("aux") || chlds[i].value().equalsIgnoreCase("vbn") || chlds[i].value().equalsIgnoreCase("vb") || chlds[i].value().equalsIgnoreCase("vbd") || chlds[i].value().equalsIgnoreCase("vbp") || chlds[i].value().equalsIgnoreCase("vbz") || chlds[i].value().equalsIgnoreCase("vbg")) { if (chlds[i].getLeaves().get(0).value().toLowerCase().matches(COPULA_VERB)) { /* System.out.println("Found positive copula verb (" +chlds[i].getLeaves().get(0).value() + ") for ("+ inst.getAnaphor().getMarkableString()+ "),("+ inst.getAntecedent().getMarkableString()+ ") "); */ return true; } } } return false; }
From source file:elkfed.coref.features.pairs.FE_Span.java
License:Apache License
public static Boolean getSpanEmbed(PairInstance inst) { // should be in the same sentence if (inst.getAnaphor().getSentId() != inst.getAntecedent().getSentId()) return false; // should not be in apposition if (FE_AppositiveParse.getAppositivePrs(inst)) return false; // should not be adjacent (should take from appo_icab, but doesn't work for some reason) Markable m1 = inst.getAntecedent().getMarkable(); Markable m2 = inst.getAnaphor().getMarkable(); int sana = m2.getLeftmostDiscoursePosition(); int sante = m1.getLeftmostDiscoursePosition(); int eana = m2.getRightmostDiscoursePosition(); int eante = m1.getRightmostDiscoursePosition(); MiniDiscourse doc = m1.getMarkableLevel().getDocument(); if (m1.getAttributeValue("min_ids") != null) { String[] spans = MarkableHelper.parseRanges(m1.getAttributeValue("min_ids")); sante = doc.DiscoursePositionFromDiscourseElementID(spans[0]); eante = doc.DiscoursePositionFromDiscourseElementID(spans[spans.length - 1]); }/* w w w.j a v a 2s . com*/ if (m2.getAttributeValue("min_ids") != null) { String[] spans = MarkableHelper.parseRanges(m2.getAttributeValue("min_ids")); sana = doc.DiscoursePositionFromDiscourseElementID(spans[0]); eana = doc.DiscoursePositionFromDiscourseElementID(spans[spans.length - 1]); } if (eana == sante - 1) return false; if (eante == sana - 1) return false; // check for trivial embedding, if maxnps are missing if (inst.getAntecedent().getMaxNPParseTree() == null && sana >= sante && eana <= eante) return true; if (inst.getAnaphor().getMaxNPParseTree() == null && sana <= sante && eana >= eante) return true; // check for maximal np embedding sana -= inst.getAnaphor().getSentenceStart(); sante -= inst.getAntecedent().getSentenceStart(); eana -= inst.getAnaphor().getSentenceStart(); eante -= inst.getAntecedent().getSentenceStart(); // if anaphor does have MaxNPParseTree -- check that it does not span over the antecedent // if it does -- check whether there is an s-node in between if (inst.getAnaphor().getMaxNPParseTree() != null) { Tree sentTree = inst.getAntecedent().getSentenceTree(); List<Tree> Leaves = sentTree.getLeaves(); Tree startNode = Leaves.get(sante); if (eante < Leaves.size()) { // check <leaves.size for markables spanning over sentence boundaries Tree endNode = Leaves.get(eante); if (inst.getAnaphor().getMaxNPParseTree().dominates(endNode) && inst.getAnaphor().getMaxNPParseTree().dominates(startNode)) { Boolean sfound = false; Tree t = startNode; while (t != null && t != inst.getAnaphor().getMaxNPParseTree() && !sfound) { if (t.value().toLowerCase().startsWith("s")) sfound = true; t = t.parent(sentTree); } if (!sfound) return true; } } } // same for antecedent if (inst.getAntecedent().getMaxNPParseTree() != null) { Tree sentTree = inst.getAntecedent().getSentenceTree(); List<Tree> Leaves = sentTree.getLeaves(); Tree startNode = Leaves.get(sana); if (eana < Leaves.size()) { Tree endNode = Leaves.get(eana); if (inst.getAntecedent().getMaxNPParseTree().dominates(endNode) && inst.getAntecedent().getMaxNPParseTree().dominates(startNode)) { Boolean sfound = false; Tree t = startNode; while (t != null && t != inst.getAntecedent().getMaxNPParseTree() && !sfound) { if (t.value().toLowerCase().startsWith("s")) sfound = true; t = t.parent(sentTree); } if (!sfound) return true; } } } return false; }
From source file:elkfed.coref.mentions.Mention.java
License:Apache License
public String getRootPath() { Tree top = getSentenceTree();//from w w w. j a v a 2s. c o m Tree here = getHighestProjection(); StringBuffer sb = new StringBuffer(); String lastValue = null; while (here != top) { here = here.parent(top); String val = here.value(); if (!val.equals(lastValue)) sb.append(here.value()).append("."); lastValue = val; } return sb.toString(); }
From source file:elkfed.coref.mentions.Mention.java
License:Apache License
/** * @author samuel//from w w w. j av a 2s. c om * @param markableSubtree * A subtree of the sentenceTreeWithDiscIds * @param postag * Parts of speech tag * @return Array of disc ids representing the highest projecting phrase * inside the markables subtree with the given postag */ public ArrayList<String> getHighestProjectingPhraseWithPOS(Tree markableSubtree, String postag) { LinkedList<Tree> stack = new LinkedList<Tree>(); stack.add(markableSubtree); ArrayList<String> result = new ArrayList<String>(); Tree head = null; fifo: while (stack.size() > 0) { head = stack.removeFirst(); for (Tree child : head.children()) { if (child.value().toString().equalsIgnoreCase(postag)) { break fifo; } else if (child.value().toString().equalsIgnoreCase("nx")) { stack.add(child); } } } for (Tree child : head.children()) { if (child.value().toString().equalsIgnoreCase(postag)) { result.add(child.children()[0].value().toString()); } } if (result.size() > 0) { return result; } else { return null; } }
From source file:elkfed.expletives.EF_Tree.java
License:Apache License
public static Tree tree_pred(Tree node) { LabelFactory lf = new StringLabelFactory(); Tree result = new LabeledScoredTreeNode(); result.setLabel(lf.newLabel(node.value() + "-PRD")); if (node.value().equals("PP") && node.children().length == 2 && node.children()[0].value().equals("IN")) { Tree[] dtrs = new Tree[2]; dtrs[0] = node.children()[0];/*from w ww.ja v a 2s.c o m*/ dtrs[1] = node.children()[1].headPreTerminal(new ModCollinsHeadFinder()); result.setChildren(dtrs); } Tree[] dtrs = new Tree[1]; dtrs[0] = node.headPreTerminal(new ModCollinsHeadFinder()); result.setChildren(dtrs); return result; }
From source file:elkfed.expletives.EF_Tree.java
License:Apache License
/** constructs a marked subtree for a subclause * outside the path to the pronoun/*from w w w . ja va 2 s . c o m*/ * @param node the starting point * @return a marked subtree for the tree starting with node */ public static Tree tree_pruned(Tree node) { LabelFactory lf = new StringLabelFactory(); Tree result = new LabeledScoredTreeNode(); result.setLabel(lf.newLabel(node.value() + "-X")); List<Tree> dtrs = new ArrayList<Tree>(); boolean cpl_seen = false; if (node.value().matches("S|SBAR|VP")) { for (Tree t : node.children()) { // modals are copied verbatim String cat = t.value(); if (cat.matches("TO|MD|IN")) { dtrs.add(t); cpl_seen = true; } else if (cat.startsWith("WH")) { Tree dtr = tagged_word(cat, "WH"); cpl_seen = true; } else if (t.value().startsWith("VB")) { break; } else if (t.value().matches("S|SBAR|VP")) { if (cpl_seen) { //ignore } else { dtrs.add(tree_pruned(t)); } } } } result.setChildren(dtrs); return result; }
From source file:elkfed.expletives.EF_Tree.java
License:Apache License
private static void dtrs_inside(Tree node, List<Tree> dtrs) { boolean after_be = false; for (Tree t : node.children()) { String cat = t.value(); if (cat.matches("''|``")) { // ignore } else if ((cat.startsWith("VB") || cat.equals("AUX")) && t.children()[0].value().matches("'s|is|was|be|being|been")) { //dtrs.add(t); dtrs.add(tagged_word(cat, "BE")); after_be = true;// w w w. j a v a 2s .com } else if (after_be && cat.matches("NP|ADJP|VP|PP")) { if (cat.equals("VP") && t.children()[0].value().matches("VB[GN]")) { dtrs_inside(t, dtrs); //dtrs.add(tree_outside(t)); } else { dtrs.add(tree_pred(t)); } after_be = false; } else { if (cat.matches("S|SBAR")) { dtrs.add(tree_pruned(t)); } else if (cat.equals("VP")) { dtrs_inside(t, dtrs); //dtrs.add(tree_outside(t)); } else { dtrs.add(tree_outside(t)); } } } }