List of usage examples for edu.stanford.nlp.trees Tree yield
public ArrayList<Label> yield()
From source file:com.github.kutschkem.Qgen.annotators.SimpleSentenceDecompositionAnnotator.java
License:Open Source License
private List<String> decompose(String documentText) { List<Tree> trees = new ArrayList<Tree>(); for (String sentence : AnalysisUtilities.getSentences(documentText)) { trees.add(AnalysisUtilities.getInstance().parseSentence(sentence).parse); }//from w w w . ja v a2s . c om List<String> result = new ArrayList<String>(); for (Tree t : trees) { TregexPattern p = TregexPattern.compile("ROOT << (NP=np $++ VP=vp) "); TregexMatcher m = p.matcher(t); while (m.find()) { Tree np = m.getNode("np"); Tree vp = m.getNode("vp"); Tree np2 = np.deepCopy(); TregexPattern p2 = TregexPattern.compile("NP << (/^S.*/=sbarq ?. /,/=c1 ?, /,/=c2)"); List<TsurgeonPattern> ps = new ArrayList<TsurgeonPattern>(); ps.add(Tsurgeon.parseOperation("prune sbarq")); ps.add(Tsurgeon.parseOperation("prune c1")); ps.add(Tsurgeon.parseOperation("prune c2")); Tsurgeon.processPattern(p2, Tsurgeon.collectOperations(ps), np2); np = np2; Tree newTree = Tree.valueOf("(S " + np + vp + "(. .))"); result.add(AnalysisUtilities.orginialSentence(newTree.yield())); } } return result; }
From source file:com.github.kutschkem.Qgen.annotators.SmithHeilmannTagger.java
License:Open Source License
@Override public void process(JCas aJCas) throws AnalysisEngineProcessException { List<Sentence> sentences = new ArrayList<Sentence>(JCasUtil.select(aJCas, Sentence.class)); List<String> strings = new ArrayList<String>(); for (Sentence s : sentences) { strings.add(s.getCoveredText()); }/*from w w w . j a v a 2 s . com*/ List<edu.cmu.ark.Question> questions = asker.ask(strings); for (edu.cmu.ark.Question q : questions) { Sentence s = sentences.get(q.getSourceSentenceNumber()); Question question = new Question(aJCas); question.setBegin(s.getBegin()); question.setEnd(s.getEnd()); Tree answerPhraseTree = q.getAnswerPhraseTree(); if (answerPhraseTree != null) { question.setAnswer(AnalysisUtilities.orginialSentence(answerPhraseTree.yield())); NERelabeler.relabelWH(JCasUtil.selectCovered(NamedEntity.class, s), question.getAnswer(), q.getTree()); } else { question.setAnswer("Yes"); } String text = AnalysisUtilities.orginialSentence(q.getTree().yield()); question.setText(text); question.addToIndexes(); } }
From source file:com.project.NLP.Requirement.ClassRelationIdentifier.java
public HashSet identifyAssociation(Tree tree, Set documentClass) { HashSet classRelations = new HashSet(); String phraseNotation = "S<(NP.(VP<NP))"; String verbPhraseNotation = "VBZ|VBP>(VP,(NP>S))"; /* Stemming the sentence */ wordStemmer.visitTree(tree);// ww w . ja v a 2s . c om TregexPattern pattern = TregexPattern.compile(phraseNotation); TregexMatcher matcher = pattern.matcher((Tree) tree); TregexPattern verbPattern = TregexPattern.compile(verbPhraseNotation); while (matcher.findNextMatchingNode()) { Tree match = matcher.getMatch(); System.out.println("Sentence match : " + Sentence.listToString(match.yield())); TregexMatcher verbMatcher = verbPattern.matcher(match); // while(verbMatcher.findNextMatchingNode()){ if (verbMatcher.findNextMatchingNode()) { Tree verbMatch = verbMatcher.getMatch(); String verb = Sentence.listToString(verbMatch.yield()); System.out.println("Verb match : " + verb); if (verbPhraseList.contains(verb)) { System.out.println("list contains verb : " + verb); String noun_1_phraseNotation = "NN|NNS>(NP>S)"; String noun_2_phraseNotation = "NN|NNS>>(NP,(VBZ|VBP>(VP,NP)))"; TregexPattern noun_pattern = TregexPattern.compile(noun_1_phraseNotation); TregexMatcher noun_matcher = noun_pattern.matcher((Tree) tree); if (noun_matcher.findNextMatchingNode()) { Tree nounMatch = noun_matcher.getMatch(); String noun1 = Sentence.listToString(nounMatch.yield()); if (documentClass.contains(noun1)) { noun_pattern = TregexPattern.compile(noun_2_phraseNotation); noun_matcher = noun_pattern.matcher((Tree) tree); System.out.println("class list contains noun1 : " + noun1); if (noun_matcher.findNextMatchingNode()) { nounMatch = noun_matcher.getMatch(); String noun2 = Sentence.listToString(nounMatch.yield()); if (!noun1.equals(noun2) && documentClass.contains(noun2)) { ClassRelation clr; System.out.println("class list contains noun2 : " + noun2); if (verb.equals("be")) { clr = new ClassRelation("Generalization", noun1, noun2); System.out.println("class generalization"); } else { clr = new ClassRelation("Association", noun2, noun1); System.out.println("class association"); } classRelations.add(clr); } } } } } } } return classRelations; }
From source file:com.project.NLP.Requirement.MethodIdentifier.java
ArrayList getPhrase(ArrayList<Tree> sentenceTree) { /*ref : patterns -http://nlp.stanford.edu/nlp/javadoc/javanlp/edu/stanford/nlp/trees/tregex/TregexPattern.html */ String phraseNotation = "VB|VBN>VP";//@VB>VP" ; //& VBN >VP";//"VP<(VB $++NP)";//"VP:VB";//"@"+"VP"+"! << @"+"VP"; ArrayList vpList = new ArrayList(); for (Tree tree : sentenceTree) { System.out.print("\n---tree_sen----" + tree + "----\n"); /* Stemming the sentence */ wordStemmer.visitTree(tree);/* w ww .j a v a 2 s . co m*/ TregexPattern VBpattern = TregexPattern.compile(phraseNotation); TregexMatcher matcher = VBpattern.matcher((Tree) tree); while (matcher.findNextMatchingNode()) { Tree match = matcher.getMatch(); String verb = Sentence.listToString(match.yield()); /* Filter to unique verbs */ //List<String> newList = new ArrayList<String>(new HashSet<String>(oldList)); if (!vpList.contains(verb)) { vpList.add(verb); } System.out.print("\n---phrase match----" + match + "----\n"); } } System.out.print("\n---VPList----" + vpList + "----\n"); return vpList; }
From source file:com.project.NLP.Requirement.MethodIdentifier.java
HashSet identifyCandidateMethods(Tree[] tree) { String phraseNotation = "VB|VBN>VP";//@VB>VP" ; //& VBN >VP";//"VP<(VB $++NP)";//"VP:VB";//"@"+"VP"+"! << @"+"VP"; HashSet vpList = new HashSet(); for (Tree childTree : tree) { System.out.print("\n---tree_sen----" + childTree + "----\n"); /* Stemming the sentence */ wordStemmer.visitTree(childTree); TregexPattern VBpattern = TregexPattern.compile(phraseNotation); TregexMatcher matcher = VBpattern.matcher((Tree) childTree); while (matcher.findNextMatchingNode()) { Tree match = matcher.getMatch(); String verb = Sentence.listToString(match.yield()); /* Filter to unique verbs */ //List<String> newList = new ArrayList<String>(new HashSet<String>(oldList)); //if(!vpList.contains(verb)){ vpList.add(verb);/* w w w . j a v a2 s . c om*/ //} System.out.print("\n---phrase match----" + match + "----\n"); } } vpList.removeAll(commonVerbs); System.out.print("\n---VPList----" + vpList + "----\n"); return vpList; }
From source file:com.project.NLP.Requirement.MethodIdentifier.java
HashSet identifyCandidateMethods(Tree tree) { String phraseNotation = "VB|VBN>VP";//@VB>VP" ; //& VBN >VP";//"VP<(VB $++NP)";//"VP:VB";//"@"+"VP"+"! << @"+"VP"; HashSet vpList = new HashSet(); /* Stemming the sentence */ wordStemmer.visitTree(tree);//from ww w . j a v a2s. co m TregexPattern VBpattern = TregexPattern.compile(phraseNotation); TregexMatcher matcher = VBpattern.matcher(tree); while (matcher.findNextMatchingNode()) { Tree match = matcher.getMatch(); String verb = Sentence.listToString(match.yield()); /* Filter to unique verbs */ //List<String> newList = new ArrayList<String>(new HashSet<String>(oldList)); //if(!vpList.contains(verb)){ vpList.add(verb); //} System.out.print("\n---phrase match----" + match + "----\n"); } vpList.removeAll(commonVerbs); System.out.print("\n------VPList----" + vpList + "----\n"); vpList = removeDesignElements(vpList); return vpList; }
From source file:conditionalCFG.ConditionalCFGParser.java
License:Open Source License
public double validateBinarizedTree(Tree tree, int start) { if (tree.isLeaf()) { return 0.0; }//from w ww .ja v a2 s. com float epsilon = 0.0001f; if (tree.isPreTerminal()) { String wordStr = tree.children()[0].label().value(); int tag = tagIndex.indexOf(tree.label().value()); int word = wordIndex.indexOf(wordStr); IntTaggedWord iTW = new IntTaggedWord(word, tag); float score = lex.score(iTW, start, wordStr, null); float bound = iScore[start][start + 1][stateIndex.indexOf(tree.label().value())]; if (score > bound + epsilon) { System.out.println("Invalid tagging:"); System.out.println(" Tag: " + tree.label().value()); System.out.println(" Word: " + tree.children()[0].label().value()); System.out.println(" Score: " + score); System.out.println(" Bound: " + bound); } return score; } int parent = stateIndex.indexOf(tree.label().value()); int firstChild = stateIndex.indexOf(tree.children()[0].label().value()); if (tree.numChildren() == 1) { UnaryRule ur = new UnaryRule(parent, firstChild); double score = SloppyMath.max(ug.scoreRule(ur), -10000.0) + validateBinarizedTree(tree.children()[0], start); double bound = iScore[start][start + tree.yield().size()][parent]; if (score > bound + epsilon) { System.out.println("Invalid unary:"); System.out.println(" Parent: " + tree.label().value()); System.out.println(" Child: " + tree.children()[0].label().value()); System.out.println(" Start: " + start); System.out.println(" End: " + (start + tree.yield().size())); System.out.println(" Score: " + score); System.out.println(" Bound: " + bound); } return score; } int secondChild = stateIndex.indexOf(tree.children()[1].label().value()); BinaryRule br = new BinaryRule(parent, firstChild, secondChild); double score = SloppyMath.max(bg.scoreRule(br), -10000.0) + validateBinarizedTree(tree.children()[0], start) + validateBinarizedTree(tree.children()[1], start + tree.children()[0].yield().size()); double bound = iScore[start][start + tree.yield().size()][parent]; if (score > bound + epsilon) { System.out.println("Invalid binary:"); System.out.println(" Parent: " + tree.label().value()); System.out.println(" LChild: " + tree.children()[0].label().value()); System.out.println(" RChild: " + tree.children()[1].label().value()); System.out.println(" Start: " + start); System.out.println(" End: " + (start + tree.yield().size())); System.out.println(" Score: " + score); System.out.println(" Bound: " + bound); } return score; }
From source file:edu.cmu.ark.AnalysisUtilities.java
License:Open Source License
public static void downcaseFirstToken(Tree inputTree) { Tree firstWordTree = inputTree.getLeaves().get(0); if (firstWordTree == null) return;//from www . j av a2 s .c o m Tree preterm = firstWordTree.parent(inputTree); String firstWord = firstWordTree.yield().toString(); if (!preterm.label().toString().matches("^NNP.*") && !firstWord.equals("I")) { //if(firstWord.indexOf('-') == -1 && !firstWord.equals("I")){ firstWord = firstWord.substring(0, 1).toLowerCase() + firstWord.substring(1); firstWordTree.label().setValue(firstWord); } //if(QuestionTransducer.DEBUG) System.err.println("downcaseFirstToken: "+inputTree.toString()); }
From source file:edu.cmu.ark.AnalysisUtilities.java
License:Open Source License
public static void upcaseFirstToken(Tree inputTree) { Tree firstWordTree = inputTree.getLeaves().get(0); if (firstWordTree == null) return;//from w w w .ja v a2 s . c om String firstWord = firstWordTree.yield().toString(); firstWord = firstWord.substring(0, 1).toUpperCase() + firstWord.substring(1); firstWordTree.label().setValue(firstWord); //if(QuestionTransducer.DEBUG) System.err.println("upcaseFirstToken: "+inputTree.toString()); }
From source file:edu.cmu.ark.AnalysisUtilities.java
License:Open Source License
public static String getCleanedUpYield(Tree inputTree) { Tree copyTree = inputTree.deeperCopy(); //if(GlobalProperties.getDebug()) System.err.println("yield:"+copyTree.toString()); return cleanUpSentenceString(copyTree.yield().toString()); }