List of usage examples for edu.stanford.nlp.trees Tree getChild
public Tree getChild(int i)
From source file:edu.cmu.ark.SentenceSimplifier.java
License:Open Source License
/** * e.g., John and James like Susan. -> John likes Susan. * /*w ww . ja v a2 s .c om*/ */ private void extractConjoinedNPs(Collection<Question> extracted, Question input) { String tregexOpStr; TregexPattern matchPattern; Tree conjoinedNode; Tree parent; TregexMatcher matcher; Question newQuestion; //only extract conjoined NPs that are arguments or adjuncts of the main verb // in the tree, this means the closest S will be the one under the root tregexOpStr = "NP=parent < (CONJP|CC !< or|nor [ " + " $+ /^(N.*|PRP|SBAR)$/=child $-- /^(N.*|PRP|SBAR)$/ | " //there must be a noun on each side of the conjunction + " $-- /^(N.*|PRP|SBAR)$/=child $+ /^(N.*|PRP|SBAR)$/ ] ) " //this avoids extracting from flat NPs such as "the smaller and darker form" + " !>> (/.*/ $ (CC|CONJP !< or|nor)) " //this cannot be nested within a larger conjunction or followed by a conjunction (we recur later to catch this) + " !$ (CC|CONJP !< or|nor)" + " !.. (CC|CONJP !< or|nor > NP|PP|S|SBAR|VP) !>> SBAR "; //+ " >> (ROOT !< (S <+(VP) (/^VB.*$/ < are|were|be|seem|appear))) " ; //don't break plural predicate nominatives (e.g., "John and Mary are two of my best friends.") matchPattern = TregexPatternFactory.getPattern(tregexOpStr); matcher = matchPattern.matcher(input.getIntermediateTree()); List<Integer> nodeIndexes = new ArrayList<Integer>(); List<Integer> parentIDs = new ArrayList<Integer>(); while (matcher.find()) { //store the parents' IDs (in the tree) parent = matcher.getNode("parent"); parentIDs.add(parent.nodeNumber(input.getIntermediateTree())); conjoinedNode = matcher.getNode("child"); //store the conjoined nodes' index into their parent's list of children int idx = parent.indexOf(conjoinedNode); if (!nodeIndexes.contains(idx)) nodeIndexes.add(idx); } //for each of the conjoined children, //create a new tree by removing all the nodes they are conjoined with Collections.sort(nodeIndexes);//sort, just to keep them in the original order for (int i = 0; i < nodeIndexes.size(); i++) { newQuestion = input.deeperCopy(); Tree t = newQuestion.getIntermediateTree(); parent = t.getNodeNumber(parentIDs.get(i)); Tree gparent = parent.parent(t); conjoinedNode = parent.getChild(nodeIndexes.get(i)); String siblingLabel; //Remove all the nodes that are conjoined //with the selected noun (or are conjunctions, commas). //These can have labels NP, NN, ..., PRP for pronouns, CC, "," for commas, ":" for semi-colons for (int j = 0; j < parent.numChildren(); j++) { if (parent.getChild(j) == conjoinedNode) continue; siblingLabel = parent.getChild(j).label().toString(); if (siblingLabel.matches("^[NCP,:S].*")) { parent.removeChild(j); j--; } } //if there is an trivial unary "NP -> NP", //remove the parent and put the child in its place if (parent.numChildren() == 1 && parent.getChild(0).label().equals("NP")) { int tmpIndex = gparent.indexOf(parent); gparent.removeChild(tmpIndex); gparent.addChild(tmpIndex, parent.getChild(0)); } correctTense(conjoinedNode, gparent); addQuotationMarksIfNeeded(newQuestion.getIntermediateTree()); if (GlobalProperties.getDebug()) System.err.println("extractConjoinedNPs: " + newQuestion.getIntermediateTree().toString()); if (GlobalProperties.getComputeFeatures()) newQuestion.setFeatureValue("extractedFromConjoinedPhrases", 1.0); //old feature name if (GlobalProperties.getComputeFeatures()) newQuestion.setFeatureValue("extractedFromConjoinedNPs", 1.0); extracted.add(newQuestion); } }
From source file:edu.cmu.ark.SentenceSimplifier.java
License:Open Source License
/** * e.g., John and Mary like Bill. -> John LIKES Bill. Mary LIKES Bill. * John and I like Bill -> John LIKES Bill. I LIKE Bill. * John and I are old. -> I IS old. John IS old. *//* w w w . j a v a 2 s . c o m*/ private void correctTense(Tree subject, Tree clause) { int tmpIndex; //correct verb tense when modifying subjects for (Tree uncle : clause.getChildrenAsList()) { String newVerbPOS = null; Tree verbPreterminal = null; boolean needToModifyVerb = false; //if the node is a subject (i.e., its uncle is a VP), then check //to see if its tense needs to be changed String headPOS = subject.headPreTerminal(AnalysisUtilities.getInstance().getHeadFinder()).label() .toString(); if (uncle.label().toString().equals("VP") && !headPOS.endsWith("S")) { verbPreterminal = uncle.headPreTerminal(AnalysisUtilities.getInstance().getHeadFinder()); //original main verb was plural but the conjoined subject word is singular //e.g., John (and Mary) like Bill. -> John like Bill. if ((verbPreterminal.label().toString().equals("VB") || verbPreterminal.label().toString().equals("VBP"))) { //the parser confuses VBP with VB if (subject.yield().toString().equals("I") || subject.yield().toString().equals("you")) { newVerbPOS = "VBP"; } else { newVerbPOS = "VBZ"; } needToModifyVerb = true; } else if (verbPreterminal.label().toString().equals("VBD")) { newVerbPOS = "VBD"; needToModifyVerb = true; } } //if needed, change the tense of the verb if (needToModifyVerb) { String verbLemma = AnalysisUtilities.getInstance().getLemma( verbPreterminal.getChild(0).label().toString(), verbPreterminal.label().toString()); String newVerb; //special cases if (verbLemma.equals("be") && newVerbPOS.equals("VBD")) { if (subject.label().toString().endsWith("S")) newVerb = "were"; else newVerb = "was"; } else if (verbLemma.equals("be") && subject.yield().toString().equals("I") && newVerbPOS.equals("VBP")) { newVerb = "am"; } else { //default newVerb = AnalysisUtilities.getInstance().getSurfaceForm(verbLemma, newVerbPOS); } tmpIndex = verbPreterminal.parent(uncle).indexOf(verbPreterminal); Tree verbParent = verbPreterminal.parent(uncle); verbParent.removeChild(tmpIndex); verbParent.addChild(tmpIndex, AnalysisUtilities.getInstance().readTreeFromString("(" + newVerbPOS + " " + newVerb + ")")); } } }
From source file:edu.cmu.ark.SentenceSimplifier.java
License:Open Source License
/** * //from w w w. j a va 2 s. c o m * John studied, hoping to get a good grade. -> John hoped to get a good grade. * * @param extracted * @param input */ private void extractVerbParticipialModifiers(Collection<Question> extracted, Question input) { String tregexOpStr; TregexPattern matchPattern; TregexMatcher matcher; tregexOpStr = "S=sub $- /,/ !< NP < (VP=participial < VBG=verb) " + " >+(VP) (S|SINV < NP=subj) " + " >> (ROOT <<# /VB.*/=tense) "; //tense determined by top-most verb matchPattern = TregexPatternFactory.getPattern(tregexOpStr); matcher = matchPattern.matcher(input.getIntermediateTree()); while (matcher.find()) { String verbPOS = findTense(matcher.getNode("tense")); Tree p = matcher.getNode("participial").deeperCopy(); Tree verb = matcher.getNode("verb"); String verbLemma = AnalysisUtilities.getInstance().getLemma(verb.getChild(0).label().toString(), verb.label().toString()); String newVerb = AnalysisUtilities.getInstance().getSurfaceForm(verbLemma, verbPOS); int verbIndex = p.indexOf(verb); p.removeChild(verbIndex); p.addChild(verbIndex, AnalysisUtilities.getInstance().readTreeFromString("(" + verbPOS + " " + newVerb + ")")); String treeStr = "(ROOT (S " + matcher.getNode("subj").toString() + " " + p.toString() + " (. .)))"; Tree newTree = AnalysisUtilities.getInstance().readTreeFromString(treeStr); correctTense(newTree.getChild(0).getChild(0), newTree.getChild(0)); addQuotationMarksIfNeeded(newTree); Question newTreeWithFeatures = input.deeperCopy(); newTreeWithFeatures.setIntermediateTree(newTree); if (GlobalProperties.getComputeFeatures()) newTreeWithFeatures.setFeatureValue("extractedFromParticipial", 1.0); if (GlobalProperties.getComputeFeatures()) newTreeWithFeatures.setFeatureValue("extractedFromVerbParticipial", 1.0); if (GlobalProperties.getDebug()) System.err.println("extractVerbParticipialModifiers: " + newTree.toString()); addIfNovel(extracted, newTreeWithFeatures); } }
From source file:edu.cmu.ark.SentenceSimplifier.java
License:Open Source License
/** * e.g., John, who is a friend of mine, likes Susan. -> John is a friend of mine. * // ww w . j ava 2 s . c o m */ private void extractNonRestrictiveRelativeClauses(Collection<Question> extracted, Question input) { String tregexOpStr; TregexPattern matchPattern; TregexMatcher matcher; TregexMatcher matcherclause; tregexOpStr = "NP=np < (SBAR=sbar [ < (WHADVP=wherecomp < (WRB < where)) " + " | < (WHNP !< /WP\\$/) " + " | < (WHNP=possessive < /WP\\$/)" //John, whose car was + " | < (WHPP < IN|TO=preposition) ] $-- NP $- /,/ " + " < S=relclause !< WHADJP)"; matchPattern = TregexPatternFactory.getPattern(tregexOpStr); matcher = matchPattern.matcher(input.getIntermediateTree()); //iterate over all the relative clauses in the input //and create an output sentence for each one. while (matcher.find()) { Tree missingArgumentTree = matcher.getNode("np"); Tree relclause = matcher.getNode("relclause"); if (missingArgumentTree == null || relclause == null) continue; missingArgumentTree = missingArgumentTree.deeperCopy(); relclause = relclause.deeperCopy(); Tree possessive = matcher.getNode("possessive"); Tree sbar = matcher.getNode("sbar").deeperCopy(); makeDeterminerDefinite(missingArgumentTree); if (possessive != null) { possessive = possessive.deeperCopy(); possessive.removeChild(0); String newTree = "(NP (NP " + missingArgumentTree.toString() + " (POS 's))"; for (int i = 0; i < possessive.numChildren(); i++) newTree += possessive.getChild(i).toString() + " "; newTree += ")"; missingArgumentTree = AnalysisUtilities.getInstance().readTreeFromString(newTree); } //remove the relative clause and the commas surrounding it from the missing argument tree for (int i = 0; i < missingArgumentTree.numChildren(); i++) { if (missingArgumentTree.getChild(i).equals(sbar)) { //remove the relative clause missingArgumentTree.removeChild(i); //remove the comma after the relative clause if (i < missingArgumentTree.numChildren() && missingArgumentTree.getChild(i).label().toString().equals(",")) { missingArgumentTree.removeChild(i); } //remove the comma before the relative clause if (i > 0 && missingArgumentTree.getChild(i - 1).label().toString().equals(",")) { missingArgumentTree.removeChild(i - 1); i--; } i--; } } //put the noun in the clause at the topmost place with an opening for a noun. //Note that this may mess up if there are noun phrase adjuncts like "The man I met Tuesday". //specifically: //the parent of the noun can be either a clause (S) as in "The man who met me" //or a verb phrase as in "The man who I met". //for verb phrases, add the noun to the end since it will be an object. //for clauses, add the noun to the beginning since it will be the subject. tregexOpStr = "S|VP=newparent !< NP < (VP=verb !< TO !$ TO)"; matchPattern = TregexPatternFactory.getPattern(tregexOpStr); matcherclause = matchPattern.matcher(relclause); boolean subjectMovement = true; if (!matcherclause.find()) { tregexOpStr = "VP=newparent !< VP < /VB.*/=verb !>> (S !< NP) !<< (VP !< VP !< NP)"; matchPattern = TregexPatternFactory.getPattern(tregexOpStr); matcherclause = matchPattern.matcher(relclause); subjectMovement = false; } //reset (so the first match isn't skipped) matcherclause = matchPattern.matcher(relclause); if (matcherclause.find()) { Tree newparenttree = matcherclause.getNode("newparent"); Tree verbtree = matcherclause.getNode("verb"); boolean ppRelativeClause = false; if (matcher.getNode("wherecomp") != null) { String tmp = "(PP (IN at) " + missingArgumentTree.toString() + ")"; missingArgumentTree = AnalysisUtilities.getInstance().readTreeFromString(tmp); ppRelativeClause = true; subjectMovement = false; } else if (matcher.getNode("preposition") != null) { String tmp = "(PP (IN " + matcher.getNode("preposition").yield().toString() + ") " + missingArgumentTree.toString() + ")"; missingArgumentTree = AnalysisUtilities.getInstance().readTreeFromString(tmp); ppRelativeClause = true; } if (subjectMovement) { //subject newparenttree.addChild(newparenttree.indexOf(verbtree), missingArgumentTree); } else { // newparentlabel is VP if (ppRelativeClause) newparenttree.addChild(newparenttree.numChildren(), missingArgumentTree); else newparenttree.addChild(newparenttree.indexOf(verbtree) + 1, missingArgumentTree); } //create a new tree with punctuation Tree newTree = factory.newTreeNode("ROOT", new ArrayList<Tree>()); newTree.addChild(relclause); AnalysisUtilities.addPeriodIfNeeded(newTree); if (GlobalProperties.getDebug()) System.err.println("extractRelativeClauses: " + newTree.toString()); addQuotationMarksIfNeeded(newTree); Question newTreeWithFeatures = input.deeperCopy(); newTreeWithFeatures.setIntermediateTree(newTree); if (GlobalProperties.getComputeFeatures()) newTreeWithFeatures.setFeatureValue("extractedFromRelativeClause", 1.0); addIfNovel(extracted, newTreeWithFeatures); } } }
From source file:edu.cmu.ark.SentenceSimplifier.java
License:Open Source License
/** * e.g., In January, John wore his winter coat. -> John wore his winter coat in January. * /* w ww.jav a 2 s . c om*/ * @param input * @return */ private void moveLeadingPPsAndQuotes(Question input) { String tregexOpStr; TregexPattern matchPattern; TregexMatcher matcher; Tree mainvp = null; Tree subj = null; tregexOpStr = "ROOT < (S|SINV=mainclause < (NP|SBAR=subj !$++ /,/) < VP=mainvp " + " [ < (PP=modifier < NP) " //must be a PP with an NP object + "| < (S=modifier < SBAR|NP <<# VB|VBD|VBP|VBZ) ] ) "; //OR: a quote, which is an S clause with a subject and finite main verb //the modifiers to move must be immediately followed by commas matchPattern = TregexPatternFactory.getPattern(tregexOpStr); matcher = matchPattern.matcher(input.getIntermediateTree()); List<Tree> modifiers = new ArrayList<Tree>(); while (matcher.find()) { if (mainvp == null) { mainvp = matcher.getNode("mainvp").deeperCopy(); subj = matcher.getNode("subj").deeperCopy(); } Tree mainclause = matcher.getNode("mainclause"); Tree modifier = matcher.getNode("modifier").deeperCopy(); int idx = mainclause.indexOf(modifier); if (modifiers.contains(modifier)) continue; //just in case the tregex expression catches duplicates //add commas and quotation marks if they appeared in the original if (idx > 0 && mainclause.getChild(idx - 1).label().toString().equals("``")) { modifiers.add(AnalysisUtilities.getInstance().readTreeFromString("(, ,)")); modifiers.add(AnalysisUtilities.getInstance().readTreeFromString("(`` ``)")); Tree sbar = factory.newTreeNode("SBAR", new ArrayList<Tree>()); sbar.addChild(modifier); modifiers.add(sbar); modifiers.add(AnalysisUtilities.getInstance().readTreeFromString("('' '')")); } else { modifiers.add(modifier); } } if (mainvp != null) { //any matches? for (Tree m : modifiers) { mainvp.addChild(m); } Tree newTree = factory.newTreeNode("ROOT", new ArrayList<Tree>()); Tree clause = factory.newTreeNode("S", new ArrayList<Tree>()); newTree.addChild(clause); clause.addChild(subj); clause.addChild(mainvp); AnalysisUtilities.addPeriodIfNeeded(newTree); addQuotationMarksIfNeeded(newTree); if (GlobalProperties.getDebug()) System.err.println("moveLeadingModifiers: " + newTree.toString()); input.setIntermediateTree(newTree); if (GlobalProperties.getComputeFeatures()) input.setFeatureValue("movedLeadingPPs", 1.0); } }
From source file:edu.cmu.ark.SentenceSimplifier.java
License:Open Source License
/** * e.g., John, hoping to get a good grade, studied. -> John hoped to get a good grade. * Walking to the store, John saw Susan -> John was walking to the store. * /*from w ww . j a v a2 s . com*/ * NOTE: This method produces false positives for sentences like, * "Broadly speaking, the project was successful." * where the participial phrase does not modify the subject. * * @param extracted * @param input */ private void extractNounParticipialModifiers(Collection<Question> extracted, Question input) { String tregexOpStr; TregexPattern matchPattern; TregexMatcher matcher; tregexOpStr = "ROOT < (S " + " [ << (NP < (NP=subj $++ (/,/ $+ (VP=modifier <# VBN|VBG|VP=tense )))) " //modifiers that appear after nouns + " | < (S !< NP|SBAR < (VP=modifier <# VBN|VBG|VP=tense) $+ (/,/ $+ NP=subj)) " //modifiers before the subject. e.g., Founded by John, the company... + " | < (SBAR < (S !< NP|SBAR < (VP=modifier <# VBN|VBG=tense)) $+ (/,/ $+ NP=subj)) " //e.g., While walking to the store, John saw Susan. + " | < (PP=modifier !< NP <# VBG=tense $+ (/,/ $+ NP=subj)) ] ) " // e.g., Walking to the store, John saw Susan. + " <<# /^VB.*$/=maintense "; //tense determined by top-most verb matchPattern = TregexPatternFactory.getPattern(tregexOpStr); matcher = matchPattern.matcher(input.getIntermediateTree()); while (matcher.find()) { Tree nountree = matcher.getNode("subj").deeperCopy(); Tree vptree = matcher.getNode("modifier"); Tree verb = matcher.getNode("tense"); makeDeterminerDefinite(nountree); if (vptree.label().toString().equals("PP")) vptree.label().setValue("VP"); String verbPOS = findTense(matcher.getNode("maintense")); if (vptree == null || nountree == null) return; String newTreeStr; if (verb.label().toString().equals("VBG")) { //for present partcipials, change the tense to the tense of the main verb //e.g., walking to the store -> walked to the store String verbLemma = AnalysisUtilities.getInstance().getLemma(verb.getChild(0).label().toString(), verb.label().toString()); String newVerb = AnalysisUtilities.getInstance().getSurfaceForm(verbLemma, verbPOS); int verbIndex = vptree.indexOf(verb); vptree = vptree.deeperCopy(); vptree.removeChild(verbIndex); vptree.addChild(verbIndex, AnalysisUtilities.getInstance().readTreeFromString("(" + verbPOS + " " + newVerb + ")")); newTreeStr = "(ROOT (S " + matcher.getNode("subj").toString() + " " + vptree.toString() + " (. .)))"; } else { //for past participials, add a copula //e.g., John, exhausted, -> John was exhausted //(or for conjunctions, just add the copula---kind of a hack to make the moby dick sentence work out) String auxiliary; if (verbPOS.equals("VBP") || verbPOS.equals("VBD")) { if (isPlural(nountree)) auxiliary = "(VBD were)"; else auxiliary = "(VBD was)"; } else { if (isPlural(nountree)) auxiliary = "(VB are)"; else auxiliary = "(VBZ is)"; } newTreeStr = "(ROOT (S " + nountree + " (VP " + auxiliary + " " + vptree + ") (. .)))"; } Tree newTree = AnalysisUtilities.getInstance().readTreeFromString(newTreeStr); correctTense(newTree.getChild(0).getChild(0), newTree.getChild(0)); addQuotationMarksIfNeeded(newTree); if (GlobalProperties.getDebug()) System.err.println("extractNounParticipialModifiers: " + newTree.toString()); Question newTreeWithFeatures = input.deeperCopy(); newTreeWithFeatures.setIntermediateTree(newTree); if (GlobalProperties.getComputeFeatures()) newTreeWithFeatures.setFeatureValue("extractedFromParticipial", 1.0); //old feature name if (GlobalProperties.getComputeFeatures()) newTreeWithFeatures.setFeatureValue("extractedFromNounParticipial", 1.0); extracted.add(newTreeWithFeatures); } }
From source file:edu.nus.comp.nlp.stanford.UtilParser.java
License:Open Source License
/** * //from w w w .j av a 2 s .c o m * @param tdl * @param fail set true to return a null tree if the constructiion fails * @param tree null to build a new tree, or you can start with a partial tree * @return * Feb 22. a gov could be missing, which fails the tree construction process. * Use GrammaticalStructure.root() instead, whose POS nodes to be removed. */ public static Tree makeTree(LinkedList<TypedDependency> toAssemble, boolean fail, Tree tree) { if (tree == null) { tree = lstf.newTreeNode("DUMMYROOT", null); } toAssemble.add(null);// int counter = toAssemble.size(); while (toAssemble.size() > 0) { //1. pick the next dep TypedDependency dep = toAssemble.getFirst(); if (dep == null) { toAssemble.poll(); if (counter-- > 0) { toAssemble.add(null); continue; } else { if (toAssemble.size() > 0 && fail) { tree = null; } break; } } //2. assemble it onto the tree Tree newRoot = putOnBranch(dep, tree); //2.1 success -> remove it from the set toAssemble.remove(dep); if (newRoot != null) { tree = newRoot; // System.out.println(tree+" BetterText.makeTree()"); // System.out.println("Added:\t"+dep.gov() +"-->"+dep.dep()); } else { //2.2 fail -> put it back at the tail of the set // System.out.println("Skipd:\t"+dep.gov() +"-->"+dep.dep()); // System.out.print("."); toAssemble.add(toAssemble.size(), dep); } } return tree.getChild(0); }
From source file:elkfed.coref.discourse_entities.DiscourseEntity.java
License:Open Source License
private Set<Property> computeInitialRelations(Mention np) { LanguagePlugin lang_plugin = ConfigProperties.getInstance().getLanguagePlugin(); Set<Property> result = new LinkedHashSet<Property>(); List<Tree> postModifiers = np.getPostmodifiers(); // straight from Mention char pos = '\0'; //DEBUG//from ww w . ja v a 2 s . c om //System.out.println("Number of postmodifiers of "+np.getMarkableString()+" :"+ // postModifiers.size()); if ((postModifiers != null) && (postModifiers.size() > 0)) { for (int i = 0; i < postModifiers.size(); i++) { Tree mod = postModifiers.get(i); // Expected structure: // (NP (NN software) (PP from (NP India)) if (mod.isLeaf()) { // this shouldn't happen' System.out.println("WARNING: UNEXPECTED LEAF " + mod.nodeString()); //result.add(new Property(Property.ATTRIBUTE, mod.nodeString())); //result.add(new Property(Property.ATTRIBUTE, getSense(mod.nodeString()))); } else { if (mod.isPreTerminal()) { // this shouldn't happen either, // but we'll add it to the properties NodeCategory ncat = lang_plugin.labelCat(mod.nodeString()); if (ncat == NodeCategory.CN || ncat == NodeCategory.ADJ) { if (ncat == NodeCategory.CN) { pos = 'N'; } if (ncat == NodeCategory.ADJ) { pos = 'A'; } } } else { //System.out.println("Type of postmodifier: " + mod.nodeString()); NodeCategory ncat = lang_plugin.labelCat(mod.nodeString()); if (ncat == NodeCategory.PP) { if (mod.numChildren() == 2) { // (PP (in from) (NP (nnp India))) Tree prepNode = mod.getChild(0); Tree npNode = mod.getChild(1); Tree npHead = massimoHeadFindHack(npNode); if (npHead != null && prepNode != null) { //DEBUG //System.out.println("Adding relation "+ // prepNode.firstChild().nodeString()+" "+ // npHead.firstChild().nodeString() ); /* -- no clue what it means, just fixed so that it doesn't crash (Olga) -- */ if (prepNode.numChildren() > 0) prepNode = prepNode.firstChild(); result.add( new Property(prepNode.nodeString(), npHead.firstChild().nodeString())); } } } } } } //end outer loop } //end if premodified return result; }
From source file:elkfed.coref.discourse_entities.DiscourseEntity.java
License:Open Source License
private Tree massimoHeadFindHack(Tree npNode) { LanguagePlugin lang_plugin = ConfigProperties.getInstance().getLanguagePlugin(); /*/*from w ww .ja v a2 s . co m*/ * NOTE (yv): * We should really have a decent configurable head finder. * The "generic" head finder below probably works, but ... * this is ugly enough for English, but making it work for * English *and* Italian (and possibly other languages) * is only something for very enthusiastic people with * slight masochistic tendencies. */ //CollinsHeadFinder hf = new CollinsHeadFinder(); //ModCollinsHeadFinder hf = new ModCollinsHeadFinder(); /* -- trivial -- */ if (npNode.numChildren() == 0) return npNode; if (npNode.numChildren() == 1) { if (npNode.firstChild().numChildren() == 0) return npNode; return massimoHeadFindHack(npNode.firstChild()); } /* -- coordination -- */ if (npNode.numChildren() > 2) { for (Tree n : npNode.children()) { if (lang_plugin.labelCat(n.nodeString()) == NodeCategory.CC) return null; } } /* -- last child is a noun (common/proper) --*/ /* NB: will it work for italian though? */ NodeCategory firstpos = lang_plugin.labelCat(npNode.firstChild().nodeString()); NodeCategory nextpos = lang_plugin.labelCat(npNode.getChild(1).nodeString()); NodeCategory lastpos = lang_plugin.labelCat(npNode.lastChild().nodeString()); if (lastpos == NodeCategory.CN) return npNode.lastChild(); if (lastpos == NodeCategory.PN) return npNode.lastChild(); /* -- (NP (NP (DT the) (NN man)) (PP (in from) (NP (NNP UNCLE)))) -- */ if (firstpos == NodeCategory.NP && nextpos != NodeCategory.CN) return massimoHeadFindHack(npNode.firstChild()); /* -- misc -- */ Tree found_head = null; int state = 0; for (Tree n : npNode.children()) { NodeCategory ncat = lang_plugin.labelCat(n.nodeString()); if (ncat == NodeCategory.CN || ncat == NodeCategory.PN || ncat == NodeCategory.PRO) { state = 4; found_head = n; } else if (ncat == NodeCategory.NP && state < 3) { state = 3; found_head = n; } else if (ncat == NodeCategory.ADJ && state < 3) { state = 2; found_head = n; } } if (found_head != null) { if (state == 3) { return massimoHeadFindHack(found_head); } return found_head; } // if (ConfigProperties.getInstance().getDbgPrint()) System.err.println("Couldn't find a head for NP:" + npNode.pennString()); return null; }
From source file:Engines.Test.StanfordParser.TreeHandling.java
License:Open Source License
/** * /*from www. j a va2s .c o m*/ * @param tdl * @param fail set true to return a null tree if the constructiion fails * @param tree null to build a new tree, or you can start with a partial tree * @return * Feb 22. a gov could be missing, which fails the tree construction process. * Use GrammaticalStructure.root() instead, whose POS nodes to be removed. */ public static Tree makeTree(LinkedList<TypedDependency> toAssemble, boolean fail, Tree tree) { if (tree == null) { tree = lstf.newTreeNode("DUMMYROOT", null); } toAssemble.add(null);// int counter = toAssemble.size(); while (toAssemble.size() > 0) { //1. pick the next dep TypedDependency dep = toAssemble.getFirst(); if (dep == null) { toAssemble.poll(); if (counter-- > 0) { toAssemble.add(null); continue; } else { if (toAssemble.size() > 0 && fail) { tree = null; } break; } } //2. assemble it onto the tree Tree newRoot = putOnBranch(dep, tree); //2.1 success -> remove it from the set toAssemble.remove(dep); if (newRoot != null) { tree = newRoot; // System.out.println(tree+" BetterText.makeTree()"); // System.out.println("Added:\t"+dep.gov() +"-->"+dep.dep()); } else { //2.2 fail -> put it back at the tail of the set // System.out.println("Skipd:\t"+dep.gov() +"-->"+dep.dep()); // System.out.print("."); toAssemble.add(toAssemble.size(), dep); } } return tree.getChild(0); }