List of usage examples for edu.stanford.nlp.trees Tree label
@Override
public Label label()
From source file:edu.cmu.ark.SentenceSimplifier.java
License:Open Source License
/** * //from w w w.ja va 2s . c om * John studied, hoping to get a good grade. -> John hoped to get a good grade. * * @param extracted * @param input */ private void extractVerbParticipialModifiers(Collection<Question> extracted, Question input) { String tregexOpStr; TregexPattern matchPattern; TregexMatcher matcher; tregexOpStr = "S=sub $- /,/ !< NP < (VP=participial < VBG=verb) " + " >+(VP) (S|SINV < NP=subj) " + " >> (ROOT <<# /VB.*/=tense) "; //tense determined by top-most verb matchPattern = TregexPatternFactory.getPattern(tregexOpStr); matcher = matchPattern.matcher(input.getIntermediateTree()); while (matcher.find()) { String verbPOS = findTense(matcher.getNode("tense")); Tree p = matcher.getNode("participial").deeperCopy(); Tree verb = matcher.getNode("verb"); String verbLemma = AnalysisUtilities.getInstance().getLemma(verb.getChild(0).label().toString(), verb.label().toString()); String newVerb = AnalysisUtilities.getInstance().getSurfaceForm(verbLemma, verbPOS); int verbIndex = p.indexOf(verb); p.removeChild(verbIndex); p.addChild(verbIndex, AnalysisUtilities.getInstance().readTreeFromString("(" + verbPOS + " " + newVerb + ")")); String treeStr = "(ROOT (S " + matcher.getNode("subj").toString() + " " + p.toString() + " (. .)))"; Tree newTree = AnalysisUtilities.getInstance().readTreeFromString(treeStr); correctTense(newTree.getChild(0).getChild(0), newTree.getChild(0)); addQuotationMarksIfNeeded(newTree); Question newTreeWithFeatures = input.deeperCopy(); newTreeWithFeatures.setIntermediateTree(newTree); if (GlobalProperties.getComputeFeatures()) newTreeWithFeatures.setFeatureValue("extractedFromParticipial", 1.0); if (GlobalProperties.getComputeFeatures()) newTreeWithFeatures.setFeatureValue("extractedFromVerbParticipial", 1.0); if (GlobalProperties.getDebug()) System.err.println("extractVerbParticipialModifiers: " + newTree.toString()); addIfNovel(extracted, newTreeWithFeatures); } }
From source file:edu.cmu.ark.SentenceSimplifier.java
License:Open Source License
private void extractComplementClauses(Collection<Question> extracted, Question input) { Tree subord;// w ww .ja v a2 s.c o m String tregexOpStr; TregexPattern matchPattern; TregexMatcher matcher; //TODO should also address infinitive complements tregexOpStr = "SBAR " + " < (S=sub !< (VP < VBG)) " + //+ //not a participial phrase " !> NP|PP " + //not part of a noun phrase or PP (other methods for those) " [ $- /^VB.*/=verb | >+(SBAR) (SBAR $- /^VB.*/=verb) ] "; //complement of a VP (follows the verb) matchPattern = TregexPatternFactory.getPattern(tregexOpStr); matcher = matchPattern.matcher(input.getIntermediateTree()); while (matcher.find()) { Tree newTree = factory.newTreeNode("ROOT", new ArrayList<Tree>()); subord = matcher.getNode("sub"); Tree verb = matcher.getNode("verb"); String verbLemma = AnalysisUtilities.getInstance().getLemma(verb.yield().toString(), verb.label().toString()); if (!verbImpliesComplement(verbLemma)) { continue; } newTree.addChild(subord.deeperCopy()); AnalysisUtilities.addPeriodIfNeeded(newTree); addQuotationMarksIfNeeded(newTree); Question newTreeWithFeatures = input.deeperCopy(); newTreeWithFeatures.setIntermediateTree(newTree); if (GlobalProperties.getComputeFeatures()) newTreeWithFeatures.setFeatureValue("extractedFromFiniteClause", 1.0); //old feature name if (GlobalProperties.getComputeFeatures()) newTreeWithFeatures.setFeatureValue("extractedFromComplementClause", 1.0); if (GlobalProperties.getDebug()) System.err.println("extractComplementClauses: " + newTree.toString()); addIfNovel(extracted, newTreeWithFeatures); } }
From source file:edu.cmu.ark.SentenceSimplifier.java
License:Open Source License
/** * e.g., Lincoln, the 16th president, was tall. -> Lincoln was the 16th president. * The meeting, in 1984, was important. -> The meeting was in 1984. *//*w w w.j a va 2s. com*/ private void extractAppositives(Collection<Question> extracted, Question input) { String tregexOpStr; TregexPattern matchPattern; TregexMatcher matcher; tregexOpStr = "NP < (NP=noun !$-- NP $+ (/,/ $++ NP|PP=appositive !$ CC|CONJP)) " + " >> (ROOT <<# /^VB.*/=mainverb) "; //extract the main verb to capture the verb tense matchPattern = TregexPatternFactory.getPattern(tregexOpStr); matcher = matchPattern.matcher(input.getIntermediateTree()); while (matcher.find()) { Tree verbtree = matcher.getNode("mainverb"); Tree nountree = matcher.getNode("noun").deeperCopy(); Tree appositivetree = matcher.getNode("appositive"); makeDeterminerDefinite(nountree); //if both are proper nouns, do not extract because this is not an appositive(e.g., "Pittsburgh, PA") /*if(nountree.headPreTerminal(AnalysisUtilities.getInstance().getHeadFinder()).label().toString().equals("NNP") && appositivetree.headPreTerminal(AnalysisUtilities.getInstance().getHeadFinder()).label().toString().equals("NNP")) { continue; }*/ //make a new tree for a copula sentence with the noun and appositive String pos = verbtree.label().toString(); String copula; if (pos.equals("VBD")) { if (isPlural(nountree)) { copula = "(VBD were)"; } else { copula = "(VBD was)"; } } else { if (isPlural(nountree)) { copula = "(VBD are)"; } else { copula = "(VBD is)"; } } Tree newTree = AnalysisUtilities.getInstance().readTreeFromString( "(ROOT (S " + nountree + " (VP " + copula + " " + appositivetree + ") (. .)))"); addQuotationMarksIfNeeded(newTree); if (GlobalProperties.getDebug()) System.err.println("extractAppositives: " + newTree.toString()); Question newTreeWithFeatures = input.deeperCopy(); newTreeWithFeatures.setIntermediateTree(newTree); if (GlobalProperties.getComputeFeatures()) newTreeWithFeatures.setFeatureValue("extractedFromAppositive", 1.0); addIfNovel(extracted, newTreeWithFeatures); } }
From source file:edu.cmu.ark.SentenceSimplifier.java
License:Open Source License
/** * e.g., John, hoping to get a good grade, studied. -> John hoped to get a good grade. * Walking to the store, John saw Susan -> John was walking to the store. * /* w w w .jav a 2s . c om*/ * NOTE: This method produces false positives for sentences like, * "Broadly speaking, the project was successful." * where the participial phrase does not modify the subject. * * @param extracted * @param input */ private void extractNounParticipialModifiers(Collection<Question> extracted, Question input) { String tregexOpStr; TregexPattern matchPattern; TregexMatcher matcher; tregexOpStr = "ROOT < (S " + " [ << (NP < (NP=subj $++ (/,/ $+ (VP=modifier <# VBN|VBG|VP=tense )))) " //modifiers that appear after nouns + " | < (S !< NP|SBAR < (VP=modifier <# VBN|VBG|VP=tense) $+ (/,/ $+ NP=subj)) " //modifiers before the subject. e.g., Founded by John, the company... + " | < (SBAR < (S !< NP|SBAR < (VP=modifier <# VBN|VBG=tense)) $+ (/,/ $+ NP=subj)) " //e.g., While walking to the store, John saw Susan. + " | < (PP=modifier !< NP <# VBG=tense $+ (/,/ $+ NP=subj)) ] ) " // e.g., Walking to the store, John saw Susan. + " <<# /^VB.*$/=maintense "; //tense determined by top-most verb matchPattern = TregexPatternFactory.getPattern(tregexOpStr); matcher = matchPattern.matcher(input.getIntermediateTree()); while (matcher.find()) { Tree nountree = matcher.getNode("subj").deeperCopy(); Tree vptree = matcher.getNode("modifier"); Tree verb = matcher.getNode("tense"); makeDeterminerDefinite(nountree); if (vptree.label().toString().equals("PP")) vptree.label().setValue("VP"); String verbPOS = findTense(matcher.getNode("maintense")); if (vptree == null || nountree == null) return; String newTreeStr; if (verb.label().toString().equals("VBG")) { //for present partcipials, change the tense to the tense of the main verb //e.g., walking to the store -> walked to the store String verbLemma = AnalysisUtilities.getInstance().getLemma(verb.getChild(0).label().toString(), verb.label().toString()); String newVerb = AnalysisUtilities.getInstance().getSurfaceForm(verbLemma, verbPOS); int verbIndex = vptree.indexOf(verb); vptree = vptree.deeperCopy(); vptree.removeChild(verbIndex); vptree.addChild(verbIndex, AnalysisUtilities.getInstance().readTreeFromString("(" + verbPOS + " " + newVerb + ")")); newTreeStr = "(ROOT (S " + matcher.getNode("subj").toString() + " " + vptree.toString() + " (. .)))"; } else { //for past participials, add a copula //e.g., John, exhausted, -> John was exhausted //(or for conjunctions, just add the copula---kind of a hack to make the moby dick sentence work out) String auxiliary; if (verbPOS.equals("VBP") || verbPOS.equals("VBD")) { if (isPlural(nountree)) auxiliary = "(VBD were)"; else auxiliary = "(VBD was)"; } else { if (isPlural(nountree)) auxiliary = "(VB are)"; else auxiliary = "(VBZ is)"; } newTreeStr = "(ROOT (S " + nountree + " (VP " + auxiliary + " " + vptree + ") (. .)))"; } Tree newTree = AnalysisUtilities.getInstance().readTreeFromString(newTreeStr); correctTense(newTree.getChild(0).getChild(0), newTree.getChild(0)); addQuotationMarksIfNeeded(newTree); if (GlobalProperties.getDebug()) System.err.println("extractNounParticipialModifiers: " + newTree.toString()); Question newTreeWithFeatures = input.deeperCopy(); newTreeWithFeatures.setIntermediateTree(newTree); if (GlobalProperties.getComputeFeatures()) newTreeWithFeatures.setFeatureValue("extractedFromParticipial", 1.0); //old feature name if (GlobalProperties.getComputeFeatures()) newTreeWithFeatures.setFeatureValue("extractedFromNounParticipial", 1.0); extracted.add(newTreeWithFeatures); } }
From source file:edu.cmu.cs.in.hoop.visualizers.HoopParseTreeViewer.java
License:Open Source License
/** * /*from ww w.java 2 s.com*/ */ private void processInput(String aSentence) { debug("processInput (" + aSentence + ")"); Tree thisTree = theLexicalizedParser.apply(aSentence); debug("We have a tree!"); PrintWriter pw = new PrintWriter(System.out, true); TreePrint posPrinter = new TreePrint("wordsAndTags"); posPrinter.printTree(thisTree, pw); ArrayList ar = thisTree.taggedYield(); debug(ar.toString()); for (Tree subtree : thisTree) { if (thisTree.isLeaf() == true) { debug("Tree leaf: " + subtree.label().value()); } else { debug("Tree node: " + subtree.label().value()); } } treePanel.setTree(thisTree); }
From source file:edu.cmu.deiis.annotator.StanfordCoreNLPAnnotator.java
License:Open Source License
private void addTreebankNodeToIndexes(TreebankNode node, JCas jCas, Tree tree, List<CoreLabel> tokenAnns) { // figure out begin and end character offsets CoreMap label = (CoreMap) tree.label(); CoreMap beginToken = tokenAnns.get(label.get(BeginIndexAnnotation.class)); CoreMap endToken = tokenAnns.get(label.get(EndIndexAnnotation.class) - 1); int nodeBegin = beginToken.get(CharacterOffsetBeginAnnotation.class); int nodeEnd = endToken.get(CharacterOffsetEndAnnotation.class); // set span, node type, children (mutual recursion), and add it to the JCas node.setBegin(nodeBegin);/*from ww w . ja v a2 s . c om*/ node.setEnd(nodeEnd); node.setNodeType(tree.value()); node.setChildren(this.addTreebankNodeChildrenToIndexes(node, jCas, tokenAnns, tree)); node.setLeaf(node.getChildren().size() == 0); node.addToIndexes(); }
From source file:edu.cornell.law.entitylinking.utils.Utility.java
public static List<String> getInnerNounPhrases(String paragraph) { List<String> nounPhrases = new ArrayList<String>(); try {/*w w w .ja va 2s . c o m*/ StringTokenizer tokenizer = new StringTokenizer(paragraph, "\\.;?,:"); while (tokenizer.hasMoreTokens()) { Annotation document = new Annotation(tokenizer.nextToken()); pipeline.annotate(document); Tree tree = null; // these are all the sentences in this document // a CoreMap is essentially a Map that uses class objects as keys and has values with custom types List<CoreMap> sentences = document.get(SentencesAnnotation.class); for (CoreMap sentence : sentences) { // the parse tree of the current sentence tree = sentence.get(TreeAnnotation.class); List<Tree> phraseList = new ArrayList<Tree>(); for (Tree subtree : tree) { if ((subtree.label().value().equals("NP")) || (subtree.label().value().equals("WHNP"))) { phraseList.add(subtree); } } if (!phraseList.isEmpty()) { String skipPhrase = "false"; for (Tree subList : phraseList) { StringBuilder phraseString = new StringBuilder(); String phrase = subList.toString(); String[] tokens = phrase.split(" "); for (String token : tokens) { if (token.contains("(")) { if (token.contains("(NP")) { // Check if there are more NP or WHNP in it? String subPhrase = phrase.replaceFirst("\\(NP", ""); if ((subPhrase.contains("(NP")) || (subPhrase.contains("(WHNP"))) { skipPhrase = "true"; break; } } else if (token.contains("(WHNP")) { // Check if there are more NP or WHNP in it? String subPhrase = phrase.replaceFirst("\\(WHNP", ""); if ((subPhrase.contains("(NP")) || (subPhrase.contains("(WHNP"))) { skipPhrase = "true"; break; } } else { // do nothing, just drop the keyword. } } else { token = token.replace(")", ""); phraseString.append(token + " "); skipPhrase = "false"; } } if (!skipPhrase.equals("true")) { String temp = phraseString.toString().trim(); if (temp.startsWith("(?i)the")) temp = temp.replaceFirst("(?i)the ", ""); else if (temp.startsWith("(?i)a")) temp = temp.replaceFirst("(?i)a ", ""); else if (temp.startsWith("(?i)an")) temp = temp.replaceFirst("(?i)an ", ""); if (temp.contains(" or ")) { String[] nptokens = temp.split(" or "); for (String s : nptokens) { nounPhrases.add(s); } } else { nounPhrases.add(temp); } } } } } } } catch (OutOfMemoryError e) { System.out.println("Result too long to read into memory"); } return nounPhrases; }
From source file:edu.cornell.law.entitylinking.utils.Utility.java
public static List<String> getAllNounPhrases(String paragraph) { List<String> nounPhrases = new ArrayList<String>(); try {//from w w w . j av a 2 s. c o m StringTokenizer tokenizer = new StringTokenizer(paragraph, "\\.;?:,"); while (tokenizer.hasMoreTokens()) { Annotation document = new Annotation(tokenizer.nextToken()); pipeline.annotate(document); Tree tree = null; List<CoreMap> sentences = document.get(SentencesAnnotation.class); for (CoreMap sentence : sentences) { // this is the parse tree of the current sentence tree = sentence.get(TreeAnnotation.class); for (Tree subtree : tree) { if ((subtree.label().value().equals("NP")) || (subtree.label().value().equals("WHNP"))) { String phraseString = Sentence.listToString(subtree.yieldWords()) .replace(" -LRB- ", "(").replace(" -RRB- ", ")"); String temp = phraseString.trim(); if (temp.startsWith("(?i)the")) temp = temp.replaceFirst("(?i)the ", ""); else if (temp.startsWith("(?i)a")) temp = temp.replaceFirst("(?i)a ", ""); else if (temp.startsWith("(?i)an")) temp = temp.replaceFirst("(?i)an ", ""); if (subtree.getChildrenAsList().contains(tree.label().value().equals("NN"))) { //System.out.println("PHRASE"); } if (temp.contains(" or ")) { String[] nptokens = temp.split(" or "); for (String s : nptokens) { nounPhrases.add(s); } } else { nounPhrases.add(temp); } } } } } } catch (OutOfMemoryError e) { System.out.println("Result too long to read into memory"); } return nounPhrases; }
From source file:edu.nus.comp.nlp.stanford.UtilParser.java
License:Open Source License
private static Tree putOnBranch(TypedDependency dep, Tree tree) { /*//w w w . ja v a2 s .co m * Each node is a tree with a single child */ Tree mySubtree = lstf.newTreeNode(dep.gov().label(), new LinkedList<Tree>(dep.dep())); mySubtree.setValue("[<-" + dep.reln() + "-] " + dep.dep().value());//nudge in the dependency relation information if (tree.children().length == 0) { if (tree.label().value().toString().equals("DUMMYROOT")) { tree.addChild(mySubtree); return tree; } else { //Shouldn't happen System.err.println("Forgot to add a child earlier."); return null; } } else { // System.err.println(dep.dep().label() +"\t[on]\t" + tree.label()); for (Tree child : tree.children()) { //if dep is child's parent, insert dep between child and its parent if (((CoreLabel) child.label()).index() == dep.dep().label().index()) { tree.removeChild(tree.objectIndexOf(child)); mySubtree.addChild(child); } } if (mySubtree.children().length > 1) { tree.addChild(mySubtree); return tree; } for (Tree child : tree.children()) { //if dep is Child's sibling, or child if (((CoreLabel) child.label()).index() == dep.gov().label().index()) { tree.addChild(mySubtree); return tree; } if (child.children().length > 0) { if (putOnBranch(dep, child) != null) { return tree; } } } } // tree.getLeaves() == null //check its childrens, recurisively. return null; }
From source file:edu.rpi.tw.linkipedia.search.nlp.NaturalLanguageProcessor.java
License:Open Source License
private List<String> getNounPhraseFromParseTree(Tree parse) { List<String> phraseList = new ArrayList<String>(); for (Tree subtree : parse) { if (subtree.label().value().equals("NP")) { String subtreeString = subtree.toString(); if (subtreeString.lastIndexOf("(NP") != subtreeString.indexOf("(NP")) continue; //System.out.println(subtree); List<LabeledWord> words = subtree.labeledYield(); String currentPhrase = ""; for (LabeledWord word : words) { currentPhrase += word.word() + "|" + word.tag() + " "; }// w w w .ja v a2s .c o m currentPhrase = currentPhrase.trim(); phraseList.add(currentPhrase); } } return phraseList; }