List of usage examples for edu.stanford.nlp.trees.tregex TregexMatcher find
public boolean find()
From source file:Ceist.CeistView.java
License:Open Source License
/** * Begin a search/* w w w .j av a2s. com*/ */ private void runSearch() { //setTregexState(true); Disable buttons while searching Thread searchThread = new Thread() { @Override public void run() { lblSearchStatus.setText("Searching..."); // Initialise search patterns final TregexPattern primary = MatchPattern.getMatchPattern(txtCurrentPattern); if (primary == null) { lblSearchStatus.setText("Bad Pattern!"); return; } SwingUtilities.invokeLater(new Runnable() { public void run() { Treebank treebank = new MemoryTreebank(); // Add the test data set if selected and loaded if (dataSet.testData.isLoaded() && btnUseTestData.isSelected()) treebank.addAll(dataSet.testData.getTreebank()); // Add the development data set if selected and loaded if (dataSet.testData.isLoaded() && btnUseDevelopmentData.isSelected()) treebank.addAll(dataSet.devData.getTreebank()); int treeCount = treebank.size(); int count = 0; // Copy the current matches diffTrees.clear(); diffTrees.addAll(matchedTrees); if (!chkShowPreview.isSelected()) { matchedTrees.clear(); // Clear the table ((DefaultTableModel) tableMatches.getModel()).setRowCount(0); } for (Tree testTree : treebank) { count++; lblSearchStatus.setText(String.format("Searching %d of %d", count, treeCount)); TregexMatcher m = primary.matcher(testTree); //Tree lastMatchingRootNode = null; boolean bMatchFound = false; while (m.find() && !bMatchFound) { matchedTrees.add(testTree); if (chkShowTagged.isSelected()) ((DefaultTableModel) tableMatches.getModel()) .addRow(getMatcherTableRow(m, testTree, true)); else ((DefaultTableModel) tableMatches.getModel()) .addRow(getMatcherTableRow(m, testTree, false)); bMatchFound = true; } } if (matchedTrees.size() > 0) lblSearchStatus.setText(String.format("Found %d matches.", matchedTrees.size())); else lblSearchStatus.setText(String.format("No matches found!")); } }); } }; searchThread.start(); }
From source file:com.github.kutschkem.Qgen.annotators.SimpleSentenceDecompositionAnnotator.java
License:Open Source License
private List<String> decompose(String documentText) { List<Tree> trees = new ArrayList<Tree>(); for (String sentence : AnalysisUtilities.getSentences(documentText)) { trees.add(AnalysisUtilities.getInstance().parseSentence(sentence).parse); }/*w ww .j a v a2 s . c om*/ List<String> result = new ArrayList<String>(); for (Tree t : trees) { TregexPattern p = TregexPattern.compile("ROOT << (NP=np $++ VP=vp) "); TregexMatcher m = p.matcher(t); while (m.find()) { Tree np = m.getNode("np"); Tree vp = m.getNode("vp"); Tree np2 = np.deepCopy(); TregexPattern p2 = TregexPattern.compile("NP << (/^S.*/=sbarq ?. /,/=c1 ?, /,/=c2)"); List<TsurgeonPattern> ps = new ArrayList<TsurgeonPattern>(); ps.add(Tsurgeon.parseOperation("prune sbarq")); ps.add(Tsurgeon.parseOperation("prune c1")); ps.add(Tsurgeon.parseOperation("prune c2")); Tsurgeon.processPattern(p2, Tsurgeon.collectOperations(ps), np2); np = np2; Tree newTree = Tree.valueOf("(S " + np + vp + "(. .))"); result.add(AnalysisUtilities.orginialSentence(newTree.yield())); } } return result; }
From source file:edu.cmu.ark.AnalysisUtilities.java
License:Open Source License
public static void addPeriodIfNeeded(Tree input) { String tregexOpStr = "ROOT < (S=mainclause !< /\\./)"; TregexPattern matchPattern = TregexPatternFactory.getPattern(tregexOpStr); TregexMatcher matcher = matchPattern.matcher(input); if (matcher.find()) { TsurgeonPattern p;//from w w w . j av a 2 s . c o m List<TsurgeonPattern> ps = new ArrayList<TsurgeonPattern>(); List<Pair<TregexPattern, TsurgeonPattern>> ops = new ArrayList<Pair<TregexPattern, TsurgeonPattern>>(); ps.add(Tsurgeon.parseOperation("insert (. .) >-1 mainclause")); p = Tsurgeon.collectOperations(ps); ops.add(new Pair<TregexPattern, TsurgeonPattern>(matchPattern, p)); Tsurgeon.processPatternsOnTree(ops, input); } }
From source file:edu.cmu.ark.AnalysisUtilities.java
License:Open Source License
public static int getNumberOfMatchesInTree(String tregexExpression, Tree t) { int res = 0;// ww w.j a v a 2 s . c om TregexMatcher m = TregexPatternFactory.getPattern(tregexExpression).matcher(t); while (m.find()) { res++; } return res; }
From source file:edu.cmu.ark.AnalysisUtilities.java
License:Open Source License
/** * Remove traces and non-terminal decorations (e.g., "-SUBJ" in "NP-SUBJ") from a Penn Treebank-style tree. * * @param inputTree/*from ww w . j av a2 s . co m*/ */ public void normalizeTree(Tree inputTree) { inputTree.label().setFromString("ROOT"); List<Pair<TregexPattern, TsurgeonPattern>> ops = new ArrayList<Pair<TregexPattern, TsurgeonPattern>>(); List<TsurgeonPattern> ps = new ArrayList<TsurgeonPattern>(); String tregexOpStr; TregexPattern matchPattern; TsurgeonPattern p; TregexMatcher matcher; tregexOpStr = "/\\-NONE\\-/=emptynode"; matchPattern = TregexPatternFactory.getPattern(tregexOpStr); matcher = matchPattern.matcher(inputTree); ps.add(Tsurgeon.parseOperation("prune emptynode")); matchPattern = TregexPatternFactory.getPattern(tregexOpStr); p = Tsurgeon.collectOperations(ps); ops.add(new Pair<TregexPattern, TsurgeonPattern>(matchPattern, p)); Tsurgeon.processPatternsOnTree(ops, inputTree); Label nonterminalLabel; tregexOpStr = "/.+\\-.+/=nonterminal < __"; matchPattern = TregexPatternFactory.getPattern(tregexOpStr); matcher = matchPattern.matcher(inputTree); while (matcher.find()) { nonterminalLabel = matcher.getNode("nonterminal"); if (nonterminalLabel == null) continue; nonterminalLabel.setFromString(tlp.basicCategory(nonterminalLabel.value())); } }
From source file:edu.cmu.ark.nlp.question.Question.java
License:Open Source License
public List<Tree> findLogicalWordsAboveIntermediateTree() { List<Tree> res = new ArrayList<Tree>(); Tree pred = intermediateTree.getChild(0).headPreTerminal(this.hf); String lemma = QuestionUtil.getLemma(pred.yield().toString(), pred.label().toString()); String tregexOpStr;//from w w w . j av a2 s.c o m TregexPattern matchPattern; TregexMatcher matcher; Tree sourcePred = null; for (Tree leaf : sourceTree.getLeaves()) { Tree tmp = leaf.parent(sourceTree); String sourceLemma = QuestionUtil.getLemma(leaf.label().toString(), tmp.label().toString()); if (sourceLemma.equals(lemma)) { sourcePred = tmp; break; } } tregexOpStr = "RB|VB|VBD|VBP|VBZ|IN|MD|WRB|WDT|CC=command"; matchPattern = TregexPatternFactory.getPattern(tregexOpStr); matcher = matchPattern.matcher(sourceTree); Tree command; while (matcher.find() && sourcePred != null) { command = matcher.getNode("command"); if (QuestionUtil.cCommands(sourceTree, command, sourcePred) && command.parent(sourceTree) != sourcePred.parent(sourceTree)) { res.add(command); } } return res; }
From source file:edu.cmu.ark.nlp.sent.SentenceSimplifier.java
License:Open Source License
/** * This is a simple hack to avoid bad output for a few special cases. * Specifically, we want to avoid extracting * from phrases with "according" and "including", * which syntactically look like participial phrases. * //from www .j av a 2 s. co m */ private boolean mainVerbOK(Question input) { String tregexOpStr; TregexPattern matchPattern; TregexMatcher matcher; //avoid extracting sentences from "...according to X..." tregexOpStr = "ROOT <+(VP|S) (/VB.*/ < /(accord.*|includ.*)/)"; matchPattern = TregexPatternFactory.getPattern(tregexOpStr); matcher = matchPattern.matcher(input.getIntermediateTree()); boolean res = !matcher.find(); return res; }
From source file:edu.cmu.ark.nlp.sent.SentenceSimplifier.java
License:Open Source License
private boolean hasBreakableConjunction(Question input) { String tregexOpStr;// w w w . ja v a 2s . co m TregexPattern matchPattern; TregexMatcher matcher; //conjoined VPs, clauses, etc. tregexOpStr = "CONJP|CC !< either|or|neither|nor > S|SBAR|VP" + " [ $ SBAR|S | !>> SBAR ] "; //we can break conjoined SBARs, but not anything else under an SBAR node matchPattern = TregexPatternFactory.getPattern(tregexOpStr); matcher = matchPattern.matcher(input.getIntermediateTree()); if (matcher.find()) { return true; } //clauses conjoined by semi-colons tregexOpStr = " S < (S=child $ (/:/ < /;/) !$++ (/:/ < /;/) ) "; matchPattern = TregexPatternFactory.getPattern(tregexOpStr); matcher = matchPattern.matcher(input.getIntermediateTree()); if (matcher.find()) { return true; } if (breakNPs) { tregexOpStr = "CONJP|CC !< either|or|neither|nor > NP !>> SBAR " + " !> (NP < (/^(N.*|SBAR|PRP)$/ !$ /^(N.*|SBAR|PRP)$/))"; //the latter part is to address special cases of flat NPs in treebank: //we allow NPs like "(NP (JJ eastern) (CC and) (JJ western) (NNS coasts))" //because we can't easily split them matchPattern = TregexPatternFactory.getPattern(tregexOpStr); matcher = matchPattern.matcher(input.getIntermediateTree()); if (matcher.find()) { return true; } } return false; }
From source file:edu.cmu.ark.nlp.sent.SentenceSimplifier.java
License:Open Source License
/** * Returns whether the input sentence has a subject and a finite main verb. * If it does not, then we do not want to add it to the output. * /*ww w . j a va2s . co m*/ * @param input * @return */ private boolean hasSubjectAndFiniteMainVerb(Question input) { String tregexOpStr; TregexPattern matchPattern; TregexMatcher matcher; tregexOpStr = "ROOT " + //main clause dominates... " <+(S) NP|SBAR <+(VP|S) VB|VBD|VBP|VBZ !<+(VP) TO"; //AND also dominates a finite, non-participle verb //allowing VBN would allow participial phrases like "founded by Bill Gates" //" [ < /^(PRP|N.*|SBAR|PP)$/ " + //either PRP for pronoun, N for NP|NN|NNS... //" | < (S < (VP < TO|VBG)) ] " + // or a non-finite verb phrase (e.g., "walking") matchPattern = TregexPatternFactory.getPattern(tregexOpStr); matcher = matchPattern.matcher(input.getIntermediateTree()); boolean res = matcher.find(); return res; }
From source file:edu.cmu.ark.nlp.sent.SentenceSimplifier.java
License:Open Source License
/** * e.g., John and James like Susan. -> John likes Susan. * /*from w ww .j ava 2 s . c o m*/ */ private void extractConjoinedNPs(Collection<Question> extracted, Question input) { String tregexOpStr; TregexPattern matchPattern; Tree conjoinedNode; Tree parent; TregexMatcher matcher; Question newQuestion; //only extract conjoined NPs that are arguments or adjuncts of the main verb // in the tree, this means the closest S will be the one under the root tregexOpStr = "NP=parent < (CONJP|CC !< or|nor [ " + " $+ /^(N.*|PRP|SBAR)$/=child $-- /^(N.*|PRP|SBAR)$/ | " //there must be a noun on each side of the conjunction + " $-- /^(N.*|PRP|SBAR)$/=child $+ /^(N.*|PRP|SBAR)$/ ] ) " //this avoids extracting from flat NPs such as "the smaller and darker form" + " !>> (/.*/ $ (CC|CONJP !< or|nor)) " //this cannot be nested within a larger conjunction or followed by a conjunction (we recur later to catch this) + " !$ (CC|CONJP !< or|nor)" + " !.. (CC|CONJP !< or|nor > NP|PP|S|SBAR|VP) !>> SBAR "; //+ " >> (ROOT !< (S <+(VP) (/^VB.*$/ < are|were|be|seem|appear))) " ; //don't break plural predicate nominatives (e.g., "John and Mary are two of my best friends.") matchPattern = TregexPatternFactory.getPattern(tregexOpStr); matcher = matchPattern.matcher(input.getIntermediateTree()); List<Integer> nodeIndexes = new ArrayList<Integer>(); List<Integer> parentIDs = new ArrayList<Integer>(); while (matcher.find()) { //store the parents' IDs (in the tree) parent = matcher.getNode("parent"); parentIDs.add(parent.nodeNumber(input.getIntermediateTree())); conjoinedNode = matcher.getNode("child"); //store the conjoined nodes' index into their parent's list of children int idx = parent.objectIndexOf(conjoinedNode); if (!nodeIndexes.contains(idx)) nodeIndexes.add(idx); } //for each of the conjoined children, //create a new tree by removing all the nodes they are conjoined with Collections.sort(nodeIndexes);//sort, just to keep them in the original order for (int i = 0; i < nodeIndexes.size(); i++) { newQuestion = input.deeperCopy(); Tree t = newQuestion.getIntermediateTree(); parent = t.getNodeNumber(parentIDs.get(i)); Tree gparent = parent.parent(t); conjoinedNode = parent.getChild(nodeIndexes.get(i)); String siblingLabel; //Remove all the nodes that are conjoined //with the selected noun (or are conjunctions, commas). //These can have labels NP, NN, ..., PRP for pronouns, CC, "," for commas, ":" for semi-colons for (int j = 0; j < parent.numChildren(); j++) { if (parent.getChild(j) == conjoinedNode) continue; siblingLabel = parent.getChild(j).label().toString(); if (siblingLabel.matches("^[NCP,:S].*")) { parent.removeChild(j); j--; } } //if there is an trivial unary "NP -> NP", //remove the parent and put the child in its place if (parent.numChildren() == 1 && parent.getChild(0).label().equals("NP")) { int tmpIndex = gparent.objectIndexOf(parent); gparent.removeChild(tmpIndex); gparent.addChild(tmpIndex, parent.getChild(0)); } correctTense(conjoinedNode, gparent); addQuotationMarksIfNeeded(newQuestion.getIntermediateTree()); //if(GlobalProperties.getDebug()) System.err.println("extractConjoinedNPs: "+newQuestion.getIntermediateTree().toString()); if (this.getComputeFeatures) newQuestion.setFeatureValue("extractedFromConjoinedPhrases", 1.0); //old feature name if (this.getComputeFeatures) newQuestion.setFeatureValue("extractedFromConjoinedNPs", 1.0); extracted.add(newQuestion); } }