List of usage examples for edu.stanford.nlp.trees.tregex TregexPattern matcher
public TregexMatcher matcher(Tree t)
From source file:Ceist.CeistView.java
License:Open Source License
/** * Begin a search// w w w. jav a 2 s. c om */ private void runSearch() { //setTregexState(true); Disable buttons while searching Thread searchThread = new Thread() { @Override public void run() { lblSearchStatus.setText("Searching..."); // Initialise search patterns final TregexPattern primary = MatchPattern.getMatchPattern(txtCurrentPattern); if (primary == null) { lblSearchStatus.setText("Bad Pattern!"); return; } SwingUtilities.invokeLater(new Runnable() { public void run() { Treebank treebank = new MemoryTreebank(); // Add the test data set if selected and loaded if (dataSet.testData.isLoaded() && btnUseTestData.isSelected()) treebank.addAll(dataSet.testData.getTreebank()); // Add the development data set if selected and loaded if (dataSet.testData.isLoaded() && btnUseDevelopmentData.isSelected()) treebank.addAll(dataSet.devData.getTreebank()); int treeCount = treebank.size(); int count = 0; // Copy the current matches diffTrees.clear(); diffTrees.addAll(matchedTrees); if (!chkShowPreview.isSelected()) { matchedTrees.clear(); // Clear the table ((DefaultTableModel) tableMatches.getModel()).setRowCount(0); } for (Tree testTree : treebank) { count++; lblSearchStatus.setText(String.format("Searching %d of %d", count, treeCount)); TregexMatcher m = primary.matcher(testTree); //Tree lastMatchingRootNode = null; boolean bMatchFound = false; while (m.find() && !bMatchFound) { matchedTrees.add(testTree); if (chkShowTagged.isSelected()) ((DefaultTableModel) tableMatches.getModel()) .addRow(getMatcherTableRow(m, testTree, true)); else ((DefaultTableModel) tableMatches.getModel()) .addRow(getMatcherTableRow(m, testTree, false)); bMatchFound = true; } } if (matchedTrees.size() > 0) lblSearchStatus.setText(String.format("Found %d matches.", matchedTrees.size())); else lblSearchStatus.setText(String.format("No matches found!")); } }); } }; searchThread.start(); }
From source file:com.github.kutschkem.Qgen.annotators.SimpleSentenceDecompositionAnnotator.java
License:Open Source License
private List<String> decompose(String documentText) { List<Tree> trees = new ArrayList<Tree>(); for (String sentence : AnalysisUtilities.getSentences(documentText)) { trees.add(AnalysisUtilities.getInstance().parseSentence(sentence).parse); }//from w w w . jav a2s .c o m List<String> result = new ArrayList<String>(); for (Tree t : trees) { TregexPattern p = TregexPattern.compile("ROOT << (NP=np $++ VP=vp) "); TregexMatcher m = p.matcher(t); while (m.find()) { Tree np = m.getNode("np"); Tree vp = m.getNode("vp"); Tree np2 = np.deepCopy(); TregexPattern p2 = TregexPattern.compile("NP << (/^S.*/=sbarq ?. /,/=c1 ?, /,/=c2)"); List<TsurgeonPattern> ps = new ArrayList<TsurgeonPattern>(); ps.add(Tsurgeon.parseOperation("prune sbarq")); ps.add(Tsurgeon.parseOperation("prune c1")); ps.add(Tsurgeon.parseOperation("prune c2")); Tsurgeon.processPattern(p2, Tsurgeon.collectOperations(ps), np2); np = np2; Tree newTree = Tree.valueOf("(S " + np + vp + "(. .))"); result.add(AnalysisUtilities.orginialSentence(newTree.yield())); } } return result; }
From source file:com.mycompany.stanlp.ChildSpeech.java
/** * @param args the command line arguments *///from ww w . ja va 2s . co m public static void main(String[] args) throws IOException { PrintWriter pw = new PrintWriter(new File("out.csv")); StringBuilder sb = new StringBuilder(); // creates a StanfordCoreNLP object, with POS tagging, lemmatization, NER, parsing, and coreference resolution Properties props = new Properties(); props.put("annotators", "tokenize, ssplit, pos, lemma, ner, parse, dcoref"); StanfordCoreNLP pipeline = new StanfordCoreNLP(props); TreeMap<String, String[]> tm = new TreeMap<String, String[]>(); String csvFile = "C:\\Users\\steve\\Downloads\\verbsome.csv"; BufferedReader br = null; String line = ""; String cvsSplitBy = ","; try { br = new BufferedReader(new FileReader(csvFile)); while ((line = br.readLine()) != null) { System.out.println("reached"); String[] country = line.split(cvsSplitBy); String[] input = new String[2]; input[0] = country[0]; input[1] = country[5]; tm.put(country[4], input); } } catch (FileNotFoundException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } finally { if (br != null) { try { br.close(); } catch (IOException e) { e.printStackTrace(); } } } for (Map.Entry<String, String[]> entry : tm.entrySet()) { String[] value = entry.getValue(); Annotation document = new Annotation(value[1]); pipeline.annotate(document); List<CoreMap> sentences = document.get(SentencesAnnotation.class); for (CoreMap sentence : sentences) { for (CoreLabel token : sentence.get(TokensAnnotation.class)) { // this is the text of the token String word = token.get(TextAnnotation.class); //ArrayList<CoreLabel> al = new ArrayList(); if (word.equals(value[0])) { Tree tree = sentence.get(TreeAnnotation.class); //TregexPattern patternMW = TregexPattern.compile("VP([ >># VB | >># VBP | >># VBD] <<" + value[0] + // ")"); TregexPattern patternMW = TregexPattern .compile(" VP [ <# VB | <# VBP | <# VBD] & <<" + value[0]); TregexMatcher matcher = patternMW.matcher(tree); while (matcher.findNextMatchingNode()) { Tree match = matcher.getMatch(); String tempString = tree.toString(); sb.append(entry.getKey()); sb.append(","); sb.append(value[0]); sb.append(","); sb.append(tempString); sb.append(","); if (match.preTerminalYield().size() == 1) { for (Label l : tree.preTerminalYield()) { sb.append(l.toString()); sb.append("&"); } } else { for (Label l : match.preTerminalYield()) { sb.append(l.toString()); sb.append("&"); } } sb.append(","); sb.append(match.toString()); //sb.append(","); //sb.append(token.get(PartOfSpeechAnnotation.class)); sb.append('\n'); } } // this is the POS tag of the token // this is the NER label of the token //String ne = token.get(NamedEntityTagAnnotation.class); } //SemanticGraph dependencies = sentence.get(CollapsedCCProcessedDependenciesAnnotation.class); } //Map<Integer, edu.stanford.nlp.dcoref.CorefChain> graph = //document.get(CorefChainAnnotation.class); } pw.write(sb.toString()); pw.close(); }
From source file:com.project.NLP.Requirement.ClassRelationIdentifier.java
public HashSet identifyAssociation(Tree tree, Set documentClass) { HashSet classRelations = new HashSet(); String phraseNotation = "S<(NP.(VP<NP))"; String verbPhraseNotation = "VBZ|VBP>(VP,(NP>S))"; /* Stemming the sentence */ wordStemmer.visitTree(tree);/*from ww w . j av a 2s .c o m*/ TregexPattern pattern = TregexPattern.compile(phraseNotation); TregexMatcher matcher = pattern.matcher((Tree) tree); TregexPattern verbPattern = TregexPattern.compile(verbPhraseNotation); while (matcher.findNextMatchingNode()) { Tree match = matcher.getMatch(); System.out.println("Sentence match : " + Sentence.listToString(match.yield())); TregexMatcher verbMatcher = verbPattern.matcher(match); // while(verbMatcher.findNextMatchingNode()){ if (verbMatcher.findNextMatchingNode()) { Tree verbMatch = verbMatcher.getMatch(); String verb = Sentence.listToString(verbMatch.yield()); System.out.println("Verb match : " + verb); if (verbPhraseList.contains(verb)) { System.out.println("list contains verb : " + verb); String noun_1_phraseNotation = "NN|NNS>(NP>S)"; String noun_2_phraseNotation = "NN|NNS>>(NP,(VBZ|VBP>(VP,NP)))"; TregexPattern noun_pattern = TregexPattern.compile(noun_1_phraseNotation); TregexMatcher noun_matcher = noun_pattern.matcher((Tree) tree); if (noun_matcher.findNextMatchingNode()) { Tree nounMatch = noun_matcher.getMatch(); String noun1 = Sentence.listToString(nounMatch.yield()); if (documentClass.contains(noun1)) { noun_pattern = TregexPattern.compile(noun_2_phraseNotation); noun_matcher = noun_pattern.matcher((Tree) tree); System.out.println("class list contains noun1 : " + noun1); if (noun_matcher.findNextMatchingNode()) { nounMatch = noun_matcher.getMatch(); String noun2 = Sentence.listToString(nounMatch.yield()); if (!noun1.equals(noun2) && documentClass.contains(noun2)) { ClassRelation clr; System.out.println("class list contains noun2 : " + noun2); if (verb.equals("be")) { clr = new ClassRelation("Generalization", noun1, noun2); System.out.println("class generalization"); } else { clr = new ClassRelation("Association", noun2, noun1); System.out.println("class association"); } classRelations.add(clr); } } } } } } } return classRelations; }
From source file:com.project.NLP.Requirement.MethodIdentifier.java
ArrayList getPhrase(ArrayList<Tree> sentenceTree) { /*ref : patterns -http://nlp.stanford.edu/nlp/javadoc/javanlp/edu/stanford/nlp/trees/tregex/TregexPattern.html */ String phraseNotation = "VB|VBN>VP";//@VB>VP" ; //& VBN >VP";//"VP<(VB $++NP)";//"VP:VB";//"@"+"VP"+"! << @"+"VP"; ArrayList vpList = new ArrayList(); for (Tree tree : sentenceTree) { System.out.print("\n---tree_sen----" + tree + "----\n"); /* Stemming the sentence */ wordStemmer.visitTree(tree);/*from w w w. j a v a 2 s .co m*/ TregexPattern VBpattern = TregexPattern.compile(phraseNotation); TregexMatcher matcher = VBpattern.matcher((Tree) tree); while (matcher.findNextMatchingNode()) { Tree match = matcher.getMatch(); String verb = Sentence.listToString(match.yield()); /* Filter to unique verbs */ //List<String> newList = new ArrayList<String>(new HashSet<String>(oldList)); if (!vpList.contains(verb)) { vpList.add(verb); } System.out.print("\n---phrase match----" + match + "----\n"); } } System.out.print("\n---VPList----" + vpList + "----\n"); return vpList; }
From source file:com.project.NLP.Requirement.MethodIdentifier.java
HashSet identifyCandidateMethods(Tree[] tree) { String phraseNotation = "VB|VBN>VP";//@VB>VP" ; //& VBN >VP";//"VP<(VB $++NP)";//"VP:VB";//"@"+"VP"+"! << @"+"VP"; HashSet vpList = new HashSet(); for (Tree childTree : tree) { System.out.print("\n---tree_sen----" + childTree + "----\n"); /* Stemming the sentence */ wordStemmer.visitTree(childTree); TregexPattern VBpattern = TregexPattern.compile(phraseNotation); TregexMatcher matcher = VBpattern.matcher((Tree) childTree); while (matcher.findNextMatchingNode()) { Tree match = matcher.getMatch(); String verb = Sentence.listToString(match.yield()); /* Filter to unique verbs */ //List<String> newList = new ArrayList<String>(new HashSet<String>(oldList)); //if(!vpList.contains(verb)){ vpList.add(verb);/*from ww w . j av a 2s . co m*/ //} System.out.print("\n---phrase match----" + match + "----\n"); } } vpList.removeAll(commonVerbs); System.out.print("\n---VPList----" + vpList + "----\n"); return vpList; }
From source file:com.project.NLP.Requirement.MethodIdentifier.java
HashSet identifyCandidateMethods(Tree tree) { String phraseNotation = "VB|VBN>VP";//@VB>VP" ; //& VBN >VP";//"VP<(VB $++NP)";//"VP:VB";//"@"+"VP"+"! << @"+"VP"; HashSet vpList = new HashSet(); /* Stemming the sentence */ wordStemmer.visitTree(tree);//from w w w . j a v a 2 s. co m TregexPattern VBpattern = TregexPattern.compile(phraseNotation); TregexMatcher matcher = VBpattern.matcher(tree); while (matcher.findNextMatchingNode()) { Tree match = matcher.getMatch(); String verb = Sentence.listToString(match.yield()); /* Filter to unique verbs */ //List<String> newList = new ArrayList<String>(new HashSet<String>(oldList)); //if(!vpList.contains(verb)){ vpList.add(verb); //} System.out.print("\n---phrase match----" + match + "----\n"); } vpList.removeAll(commonVerbs); System.out.print("\n------VPList----" + vpList + "----\n"); vpList = removeDesignElements(vpList); return vpList; }
From source file:com.project.NLP.Requirement.PhrasesIdentification.java
/** * method to get the negative characters from the sentence * * @return arrayList of negative words in a sentence which are denoted by RB * and CC// ww w. j av a 2s . co m */ public ArrayList NegativeSentenceDetection() { String phraseNotation = "RB|CC";//@" + phrase + "! << @" + phrase; TregexPattern VBpattern = TregexPattern.compile(phraseNotation); TregexMatcher matcher = VBpattern.matcher(sTree); ArrayList negativeLists = new ArrayList(); while (matcher.findNextMatchingNode()) { Tree match = matcher.getMatch(); Tree[] innerChild = match.children(); for (Tree inChild : innerChild) { negativeLists.add(inChild.getLeaves().get(0).yieldWords().get(0).word()); } } return negativeLists; }
From source file:com.project.NLP.Requirement.PhrasesIdentification.java
/** * method to extract the classes from a sentence * * @return ArrayList: arrayList of classes from a sentence *//*from www.j av a2 s .c o m*/ public ArrayList getClassList() { nounList = new ArrayList(); attributeLists = new ArrayList(); int adjectiveExist = 0; int adjectiveNoun = 0; String adj = ""; String storingClass = ""; HashSet classWithAttr = new HashSet(); storingClassWithAttr = new HashMap<String, HashSet>(); List<Tree> leaves; String phraseNotation = "(NP([<NNS|NN|NNP]$VP))";//@" + phrase + "! << @" + phrase; /*For the single Tree */ TregexPattern VBpattern = TregexPattern.compile(phraseNotation); TregexMatcher matcher = VBpattern.matcher(sTree); String tempClass = ""; while (matcher.findNextMatchingNode()) { Tree match = matcher.getMatch(); Tree[] innerChild = match.children(); adjectiveExist = 0; adjectiveNoun = 0; int separator = 0; if (innerChild.length > 1) { int count = 1; int loopCount = 1; for (Tree inChild : innerChild) { if (inChild.value().equals("CC")) { separator = 1; } if ((inChild.value().equals("JJ")) || (inChild.value().equals("VBG"))) { adjectiveExist++; leaves = inChild.getLeaves(); adj = leaves.get(0).yieldWords().get(0).word(); if (dictionaryForClassList.contains(adj)) { adj = ""; } } //if adjective exist store the classes and attributes separately if (adjectiveExist == 1) { storeClassesAndAttributesWhenAdjectiveExistToIdentifyClasses(inChild, adjectiveNoun, adj); } else { //storeClassesAndAttributesWhenAdjectiveNotExistToIdentifyClasses(inChild, loopCount, innerChild, separator, tempClass, count); if ((inChild.value().equals("NN")) || (inChild.value().equals("NNS") || (inChild.value().equals("NNP")))) { leaves = inChild.getLeaves(); //leaves correspond to the tokens if (separator == 0) { if (loopCount == innerChild.length) { String identifiedWord = ((leaves.get(0).yieldWords()).get(0).word()); String word = ""; word = stemmingForAWord(identifiedWord); if (!dictionaryForClassList.contains(word)) { nounList.remove(tempClass); nounList.add(word); attributeLists.add(tempClass); } } else if (count == 1) { String identifiedWord = ((leaves.get(0).yieldWords()).get(0).word()); /*if the identified word is having underscore skips the stemming part . ex: user_id*/ String word = stemmingForAWord(identifiedWord); nounList.add(word); tempClass = word; storingClass = word; } else { /*if the identified word is having underscore skips the stemming part . ex: user_id*/ if (tempClass.contains("_")) { nounList.remove(tempClass); } else { nounList.remove(morphology.stem(tempClass)); nounList.remove(tempClass); } String identifiedWord = ((leaves.get(0).yieldWords()).get(0).word()); tempClass += " " + identifiedWord; nounList.add(tempClass); storingClass = tempClass; } count++; } else { String identifiedWord = ((leaves.get(0).yieldWords()).get(0).word()); /*if the identified word is having underscore skips the stemming part . ex: user_id*/ String word = stemmingForAWord(identifiedWord); nounList.add(word); tempClass = word; storingClass = word; } } } loopCount++; } } else { for (Tree inChild : innerChild) { if ((inChild.value().equals("NN")) || (inChild.value().equals("NNS")) || (inChild.value().equals("NNP"))) { leaves = inChild.getLeaves(); //leaves correspond to the tokens String identifiedWord = ((leaves.get(0).yieldWords()).get(0).word()); if (!identifiedWord.contains("_")) { nounList.add(morphology.stem(identifiedWord)); } else { nounList.add(identifiedWord); } } if (inChild.value().equals("JJ")) { //leaves correspond to the tokens leaves = inChild.getLeaves(); nounList.add(((leaves.get(0).yieldWords()).get(0).word())); } } } } System.out.println("NOUN LIST :" + nounList); return nounList; }
From source file:com.project.NLP.Requirement.PhrasesIdentification.java
/** * method to identify the attributes using the tokenization * * @return ArrayList: arrayList of attributes *//*w ww .jav a 2s . c o m*/ public ArrayList getAttributeList() { nounList = new ArrayList(); attributeLists = new ArrayList(); ArrayList adjAtt = new ArrayList(); int separator = 0; List<Tree> leaves; String phraseNotation = "NP([<NNS|NN|NNP]![<JJ|VBG])!$VP";// !<VBG";//@" + phrase + "! << @" + phrase; TregexPattern VBpattern = TregexPattern.compile(phraseNotation); TregexMatcher matcher = VBpattern.matcher(sTree); while (matcher.findNextMatchingNode()) { Tree match = matcher.getMatch(); Tree[] innerChild = match.children(); int adjectiveExist = 0; String adj = ""; String attribute = ""; String b = ""; if (innerChild.length > 1) { int count = 1; for (Tree inChild : innerChild) { if (inChild.value().equals("CC") || inChild.value().equals(",")) { separator = 1; } if ((inChild.value().equals("JJ")) || (inChild.value().equals("VBG"))) { adjectiveExist++; leaves = inChild.getLeaves(); adj = leaves.get(0).toString(); if (designEleList.contains(adj)) { adj = ""; } } if ((inChild.value().equals("NN")) || (inChild.value().equals("NNS")) || (inChild.value().equals("NNP"))) { leaves = inChild.getLeaves(); //leaves correspond to the tokens if (count == 1) { if (adjectiveExist == 1) { attribute = adj + " " + leaves.get(0).yieldWords().get(0).word(); } else { attribute = leaves.get(0).yieldWords().get(0).word(); } if (!designEleList.contains(attribute)) { String identifiedWord = attribute; if (!identifiedWord.contains("_")) { attributeLists.add(morphology.stem(identifiedWord)); } else { attributeLists.add(identifiedWord); } } } else if (count >= 2 && separator == 0) { if (!attribute.contains("_")) { attributeLists.remove(morphology.stem(attribute)); attributeLists.remove(attribute); } else { attributeLists.remove(attribute); } attribute += " " + (leaves.get(0).yieldWords()).get(0).word(); attributeLists.add(attribute); } else if (count >= 2 && separator == 1) { attribute = (leaves.get(0).yieldWords()).get(0).word(); if (!attribute.contains("_")) { attributeLists.add(morphology.stem(attribute)); } else { attributeLists.add(attribute); } separator = 0; } count++; } } } else { for (Tree inChild : innerChild) { if ((inChild.value().equals("NN")) || (inChild.value().equals("NNS"))) { leaves = inChild.getLeaves(); //leaves correspond to the tokens String identifiedWord = ((leaves.get(0).yieldWords()).get(0).word()); if (!identifiedWord.contains("_")) { attributeLists.add(morphology.stem(identifiedWord)); } else { attributeLists.add(identifiedWord); } } } } } adjAtt = getAdjectiveAttribute(); if (!adjAtt.isEmpty()) { String att = ""; for (int i = 0; i < adjAtt.size(); i++) { att = adjAtt.get(i).toString(); if (!att.isEmpty() || !att.equals("") || !(att.equals(" "))) { attributeLists.add(att.trim()); } } } System.out.println("ATTRIBUTE LIST :" + attributeLists); return attributeLists; }