List of usage examples for edu.stanford.nlp.trees.tregex TregexMatcher findNextMatchingNode
public boolean findNextMatchingNode()
From source file:com.mycompany.stanlp.ChildSpeech.java
/** * @param args the command line arguments *//*from w w w . ja v a2s .c om*/ public static void main(String[] args) throws IOException { PrintWriter pw = new PrintWriter(new File("out.csv")); StringBuilder sb = new StringBuilder(); // creates a StanfordCoreNLP object, with POS tagging, lemmatization, NER, parsing, and coreference resolution Properties props = new Properties(); props.put("annotators", "tokenize, ssplit, pos, lemma, ner, parse, dcoref"); StanfordCoreNLP pipeline = new StanfordCoreNLP(props); TreeMap<String, String[]> tm = new TreeMap<String, String[]>(); String csvFile = "C:\\Users\\steve\\Downloads\\verbsome.csv"; BufferedReader br = null; String line = ""; String cvsSplitBy = ","; try { br = new BufferedReader(new FileReader(csvFile)); while ((line = br.readLine()) != null) { System.out.println("reached"); String[] country = line.split(cvsSplitBy); String[] input = new String[2]; input[0] = country[0]; input[1] = country[5]; tm.put(country[4], input); } } catch (FileNotFoundException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } finally { if (br != null) { try { br.close(); } catch (IOException e) { e.printStackTrace(); } } } for (Map.Entry<String, String[]> entry : tm.entrySet()) { String[] value = entry.getValue(); Annotation document = new Annotation(value[1]); pipeline.annotate(document); List<CoreMap> sentences = document.get(SentencesAnnotation.class); for (CoreMap sentence : sentences) { for (CoreLabel token : sentence.get(TokensAnnotation.class)) { // this is the text of the token String word = token.get(TextAnnotation.class); //ArrayList<CoreLabel> al = new ArrayList(); if (word.equals(value[0])) { Tree tree = sentence.get(TreeAnnotation.class); //TregexPattern patternMW = TregexPattern.compile("VP([ >># VB | >># VBP | >># VBD] <<" + value[0] + // ")"); TregexPattern patternMW = TregexPattern .compile(" VP [ <# VB | <# VBP | <# VBD] & <<" + value[0]); TregexMatcher matcher = patternMW.matcher(tree); while (matcher.findNextMatchingNode()) { Tree match = matcher.getMatch(); String tempString = tree.toString(); sb.append(entry.getKey()); sb.append(","); sb.append(value[0]); sb.append(","); sb.append(tempString); sb.append(","); if (match.preTerminalYield().size() == 1) { for (Label l : tree.preTerminalYield()) { sb.append(l.toString()); sb.append("&"); } } else { for (Label l : match.preTerminalYield()) { sb.append(l.toString()); sb.append("&"); } } sb.append(","); sb.append(match.toString()); //sb.append(","); //sb.append(token.get(PartOfSpeechAnnotation.class)); sb.append('\n'); } } // this is the POS tag of the token // this is the NER label of the token //String ne = token.get(NamedEntityTagAnnotation.class); } //SemanticGraph dependencies = sentence.get(CollapsedCCProcessedDependenciesAnnotation.class); } //Map<Integer, edu.stanford.nlp.dcoref.CorefChain> graph = //document.get(CorefChainAnnotation.class); } pw.write(sb.toString()); pw.close(); }
From source file:com.project.NLP.Requirement.ClassRelationIdentifier.java
public HashSet identifyAssociation(Tree tree, Set documentClass) { HashSet classRelations = new HashSet(); String phraseNotation = "S<(NP.(VP<NP))"; String verbPhraseNotation = "VBZ|VBP>(VP,(NP>S))"; /* Stemming the sentence */ wordStemmer.visitTree(tree);/* w w w . ja v a 2 s . c o m*/ TregexPattern pattern = TregexPattern.compile(phraseNotation); TregexMatcher matcher = pattern.matcher((Tree) tree); TregexPattern verbPattern = TregexPattern.compile(verbPhraseNotation); while (matcher.findNextMatchingNode()) { Tree match = matcher.getMatch(); System.out.println("Sentence match : " + Sentence.listToString(match.yield())); TregexMatcher verbMatcher = verbPattern.matcher(match); // while(verbMatcher.findNextMatchingNode()){ if (verbMatcher.findNextMatchingNode()) { Tree verbMatch = verbMatcher.getMatch(); String verb = Sentence.listToString(verbMatch.yield()); System.out.println("Verb match : " + verb); if (verbPhraseList.contains(verb)) { System.out.println("list contains verb : " + verb); String noun_1_phraseNotation = "NN|NNS>(NP>S)"; String noun_2_phraseNotation = "NN|NNS>>(NP,(VBZ|VBP>(VP,NP)))"; TregexPattern noun_pattern = TregexPattern.compile(noun_1_phraseNotation); TregexMatcher noun_matcher = noun_pattern.matcher((Tree) tree); if (noun_matcher.findNextMatchingNode()) { Tree nounMatch = noun_matcher.getMatch(); String noun1 = Sentence.listToString(nounMatch.yield()); if (documentClass.contains(noun1)) { noun_pattern = TregexPattern.compile(noun_2_phraseNotation); noun_matcher = noun_pattern.matcher((Tree) tree); System.out.println("class list contains noun1 : " + noun1); if (noun_matcher.findNextMatchingNode()) { nounMatch = noun_matcher.getMatch(); String noun2 = Sentence.listToString(nounMatch.yield()); if (!noun1.equals(noun2) && documentClass.contains(noun2)) { ClassRelation clr; System.out.println("class list contains noun2 : " + noun2); if (verb.equals("be")) { clr = new ClassRelation("Generalization", noun1, noun2); System.out.println("class generalization"); } else { clr = new ClassRelation("Association", noun2, noun1); System.out.println("class association"); } classRelations.add(clr); } } } } } } } return classRelations; }
From source file:com.project.NLP.Requirement.MethodIdentifier.java
ArrayList getPhrase(ArrayList<Tree> sentenceTree) { /*ref : patterns -http://nlp.stanford.edu/nlp/javadoc/javanlp/edu/stanford/nlp/trees/tregex/TregexPattern.html */ String phraseNotation = "VB|VBN>VP";//@VB>VP" ; //& VBN >VP";//"VP<(VB $++NP)";//"VP:VB";//"@"+"VP"+"! << @"+"VP"; ArrayList vpList = new ArrayList(); for (Tree tree : sentenceTree) { System.out.print("\n---tree_sen----" + tree + "----\n"); /* Stemming the sentence */ wordStemmer.visitTree(tree);/*w ww . j av a 2 s . c om*/ TregexPattern VBpattern = TregexPattern.compile(phraseNotation); TregexMatcher matcher = VBpattern.matcher((Tree) tree); while (matcher.findNextMatchingNode()) { Tree match = matcher.getMatch(); String verb = Sentence.listToString(match.yield()); /* Filter to unique verbs */ //List<String> newList = new ArrayList<String>(new HashSet<String>(oldList)); if (!vpList.contains(verb)) { vpList.add(verb); } System.out.print("\n---phrase match----" + match + "----\n"); } } System.out.print("\n---VPList----" + vpList + "----\n"); return vpList; }
From source file:com.project.NLP.Requirement.MethodIdentifier.java
HashSet identifyCandidateMethods(Tree[] tree) { String phraseNotation = "VB|VBN>VP";//@VB>VP" ; //& VBN >VP";//"VP<(VB $++NP)";//"VP:VB";//"@"+"VP"+"! << @"+"VP"; HashSet vpList = new HashSet(); for (Tree childTree : tree) { System.out.print("\n---tree_sen----" + childTree + "----\n"); /* Stemming the sentence */ wordStemmer.visitTree(childTree); TregexPattern VBpattern = TregexPattern.compile(phraseNotation); TregexMatcher matcher = VBpattern.matcher((Tree) childTree); while (matcher.findNextMatchingNode()) { Tree match = matcher.getMatch(); String verb = Sentence.listToString(match.yield()); /* Filter to unique verbs */ //List<String> newList = new ArrayList<String>(new HashSet<String>(oldList)); //if(!vpList.contains(verb)){ vpList.add(verb);/*from ww w .j a v a2 s. c om*/ //} System.out.print("\n---phrase match----" + match + "----\n"); } } vpList.removeAll(commonVerbs); System.out.print("\n---VPList----" + vpList + "----\n"); return vpList; }
From source file:com.project.NLP.Requirement.MethodIdentifier.java
HashSet identifyCandidateMethods(Tree tree) { String phraseNotation = "VB|VBN>VP";//@VB>VP" ; //& VBN >VP";//"VP<(VB $++NP)";//"VP:VB";//"@"+"VP"+"! << @"+"VP"; HashSet vpList = new HashSet(); /* Stemming the sentence */ wordStemmer.visitTree(tree);/*from ww w . ja va 2 s . c om*/ TregexPattern VBpattern = TregexPattern.compile(phraseNotation); TregexMatcher matcher = VBpattern.matcher(tree); while (matcher.findNextMatchingNode()) { Tree match = matcher.getMatch(); String verb = Sentence.listToString(match.yield()); /* Filter to unique verbs */ //List<String> newList = new ArrayList<String>(new HashSet<String>(oldList)); //if(!vpList.contains(verb)){ vpList.add(verb); //} System.out.print("\n---phrase match----" + match + "----\n"); } vpList.removeAll(commonVerbs); System.out.print("\n------VPList----" + vpList + "----\n"); vpList = removeDesignElements(vpList); return vpList; }
From source file:com.project.NLP.Requirement.PhrasesIdentification.java
/** * method to get the negative characters from the sentence * * @return arrayList of negative words in a sentence which are denoted by RB * and CC/*from w ww. ja v a 2s. c o m*/ */ public ArrayList NegativeSentenceDetection() { String phraseNotation = "RB|CC";//@" + phrase + "! << @" + phrase; TregexPattern VBpattern = TregexPattern.compile(phraseNotation); TregexMatcher matcher = VBpattern.matcher(sTree); ArrayList negativeLists = new ArrayList(); while (matcher.findNextMatchingNode()) { Tree match = matcher.getMatch(); Tree[] innerChild = match.children(); for (Tree inChild : innerChild) { negativeLists.add(inChild.getLeaves().get(0).yieldWords().get(0).word()); } } return negativeLists; }
From source file:com.project.NLP.Requirement.PhrasesIdentification.java
/** * method to extract the classes from a sentence * * @return ArrayList: arrayList of classes from a sentence */// w ww. jav a 2 s . c o m public ArrayList getClassList() { nounList = new ArrayList(); attributeLists = new ArrayList(); int adjectiveExist = 0; int adjectiveNoun = 0; String adj = ""; String storingClass = ""; HashSet classWithAttr = new HashSet(); storingClassWithAttr = new HashMap<String, HashSet>(); List<Tree> leaves; String phraseNotation = "(NP([<NNS|NN|NNP]$VP))";//@" + phrase + "! << @" + phrase; /*For the single Tree */ TregexPattern VBpattern = TregexPattern.compile(phraseNotation); TregexMatcher matcher = VBpattern.matcher(sTree); String tempClass = ""; while (matcher.findNextMatchingNode()) { Tree match = matcher.getMatch(); Tree[] innerChild = match.children(); adjectiveExist = 0; adjectiveNoun = 0; int separator = 0; if (innerChild.length > 1) { int count = 1; int loopCount = 1; for (Tree inChild : innerChild) { if (inChild.value().equals("CC")) { separator = 1; } if ((inChild.value().equals("JJ")) || (inChild.value().equals("VBG"))) { adjectiveExist++; leaves = inChild.getLeaves(); adj = leaves.get(0).yieldWords().get(0).word(); if (dictionaryForClassList.contains(adj)) { adj = ""; } } //if adjective exist store the classes and attributes separately if (adjectiveExist == 1) { storeClassesAndAttributesWhenAdjectiveExistToIdentifyClasses(inChild, adjectiveNoun, adj); } else { //storeClassesAndAttributesWhenAdjectiveNotExistToIdentifyClasses(inChild, loopCount, innerChild, separator, tempClass, count); if ((inChild.value().equals("NN")) || (inChild.value().equals("NNS") || (inChild.value().equals("NNP")))) { leaves = inChild.getLeaves(); //leaves correspond to the tokens if (separator == 0) { if (loopCount == innerChild.length) { String identifiedWord = ((leaves.get(0).yieldWords()).get(0).word()); String word = ""; word = stemmingForAWord(identifiedWord); if (!dictionaryForClassList.contains(word)) { nounList.remove(tempClass); nounList.add(word); attributeLists.add(tempClass); } } else if (count == 1) { String identifiedWord = ((leaves.get(0).yieldWords()).get(0).word()); /*if the identified word is having underscore skips the stemming part . ex: user_id*/ String word = stemmingForAWord(identifiedWord); nounList.add(word); tempClass = word; storingClass = word; } else { /*if the identified word is having underscore skips the stemming part . ex: user_id*/ if (tempClass.contains("_")) { nounList.remove(tempClass); } else { nounList.remove(morphology.stem(tempClass)); nounList.remove(tempClass); } String identifiedWord = ((leaves.get(0).yieldWords()).get(0).word()); tempClass += " " + identifiedWord; nounList.add(tempClass); storingClass = tempClass; } count++; } else { String identifiedWord = ((leaves.get(0).yieldWords()).get(0).word()); /*if the identified word is having underscore skips the stemming part . ex: user_id*/ String word = stemmingForAWord(identifiedWord); nounList.add(word); tempClass = word; storingClass = word; } } } loopCount++; } } else { for (Tree inChild : innerChild) { if ((inChild.value().equals("NN")) || (inChild.value().equals("NNS")) || (inChild.value().equals("NNP"))) { leaves = inChild.getLeaves(); //leaves correspond to the tokens String identifiedWord = ((leaves.get(0).yieldWords()).get(0).word()); if (!identifiedWord.contains("_")) { nounList.add(morphology.stem(identifiedWord)); } else { nounList.add(identifiedWord); } } if (inChild.value().equals("JJ")) { //leaves correspond to the tokens leaves = inChild.getLeaves(); nounList.add(((leaves.get(0).yieldWords()).get(0).word())); } } } } System.out.println("NOUN LIST :" + nounList); return nounList; }
From source file:com.project.NLP.Requirement.PhrasesIdentification.java
/** * method to identify the attributes using the tokenization * * @return ArrayList: arrayList of attributes *///w ww . j av a 2s . co m public ArrayList getAttributeList() { nounList = new ArrayList(); attributeLists = new ArrayList(); ArrayList adjAtt = new ArrayList(); int separator = 0; List<Tree> leaves; String phraseNotation = "NP([<NNS|NN|NNP]![<JJ|VBG])!$VP";// !<VBG";//@" + phrase + "! << @" + phrase; TregexPattern VBpattern = TregexPattern.compile(phraseNotation); TregexMatcher matcher = VBpattern.matcher(sTree); while (matcher.findNextMatchingNode()) { Tree match = matcher.getMatch(); Tree[] innerChild = match.children(); int adjectiveExist = 0; String adj = ""; String attribute = ""; String b = ""; if (innerChild.length > 1) { int count = 1; for (Tree inChild : innerChild) { if (inChild.value().equals("CC") || inChild.value().equals(",")) { separator = 1; } if ((inChild.value().equals("JJ")) || (inChild.value().equals("VBG"))) { adjectiveExist++; leaves = inChild.getLeaves(); adj = leaves.get(0).toString(); if (designEleList.contains(adj)) { adj = ""; } } if ((inChild.value().equals("NN")) || (inChild.value().equals("NNS")) || (inChild.value().equals("NNP"))) { leaves = inChild.getLeaves(); //leaves correspond to the tokens if (count == 1) { if (adjectiveExist == 1) { attribute = adj + " " + leaves.get(0).yieldWords().get(0).word(); } else { attribute = leaves.get(0).yieldWords().get(0).word(); } if (!designEleList.contains(attribute)) { String identifiedWord = attribute; if (!identifiedWord.contains("_")) { attributeLists.add(morphology.stem(identifiedWord)); } else { attributeLists.add(identifiedWord); } } } else if (count >= 2 && separator == 0) { if (!attribute.contains("_")) { attributeLists.remove(morphology.stem(attribute)); attributeLists.remove(attribute); } else { attributeLists.remove(attribute); } attribute += " " + (leaves.get(0).yieldWords()).get(0).word(); attributeLists.add(attribute); } else if (count >= 2 && separator == 1) { attribute = (leaves.get(0).yieldWords()).get(0).word(); if (!attribute.contains("_")) { attributeLists.add(morphology.stem(attribute)); } else { attributeLists.add(attribute); } separator = 0; } count++; } } } else { for (Tree inChild : innerChild) { if ((inChild.value().equals("NN")) || (inChild.value().equals("NNS"))) { leaves = inChild.getLeaves(); //leaves correspond to the tokens String identifiedWord = ((leaves.get(0).yieldWords()).get(0).word()); if (!identifiedWord.contains("_")) { attributeLists.add(morphology.stem(identifiedWord)); } else { attributeLists.add(identifiedWord); } } } } } adjAtt = getAdjectiveAttribute(); if (!adjAtt.isEmpty()) { String att = ""; for (int i = 0; i < adjAtt.size(); i++) { att = adjAtt.get(i).toString(); if (!att.isEmpty() || !att.equals("") || !(att.equals(" "))) { attributeLists.add(att.trim()); } } } System.out.println("ATTRIBUTE LIST :" + attributeLists); return attributeLists; }
From source file:com.project.NLP.Requirement.PhrasesIdentification.java
/** * method to identify the attributes when the words which are identifies as * nouns are in adjective phrases//www.j ava 2s .c om * * @return ArrayList of adjective attributes */ public ArrayList getAdjectiveAttribute() { adjAttributeList = new ArrayList(); //adjAttributeList = new ArrayList(); int adjectiveExist = 0; int adjectiveNoun = 0; int nnCount = 0; String adj = ""; List<Tree> leaves; String phraseNotation = "NP[<NNS|NN]!$VP";//@" + phrase + "! << @" + phrase; DesignElementClass designEle = new DesignElementClass(); ArrayList designEleList = designEle.getDesignElementsList(); /*For single Tree */ TregexPattern VBpattern = TregexPattern.compile(phraseNotation); TregexMatcher matcher = VBpattern.matcher(sTree); while (matcher.findNextMatchingNode()) { Tree match = matcher.getMatch(); Tree[] innerChild = match.children(); String a = ""; boolean separatorExist = false; if (innerChild.length > 1) { int count = 1; adjectiveExist = 0; adjectiveNoun = 0; nnCount = 0; String attribute = ""; adj = ""; for (Tree inChild : innerChild) { //checks whether there are any separators if (inChild.value().equals("CC")) { separatorExist = true; attribute = ""; adjectiveExist = 0; adjectiveNoun = 0; } //checks whether there are adjectives if ((inChild.value().equals("JJ")) || (inChild.value().equals("VBG"))) { adjectiveExist++; leaves = inChild.getLeaves(); adj = leaves.get(0).toString(); if (designEleList.contains(adj)) { adj = ""; } } //if the adjective exist store the attributes if (adjectiveExist == 1) { adjectiveNoun = storeAdjectiveAttribute(inChild, adjectiveNoun, nnCount, adj); } } if (adjectiveExist == 1 && adjectiveNoun == 0 && !adj.isEmpty()) { adjAttributeList.add(stemmingForAWord(adj)); } } } System.out.println("ADJECTVE ATTRIBUTE :" + adjAttributeList); return adjAttributeList; }
From source file:tml.utils.StanfordUtils.java
License:Apache License
/** * @param t a grammar tree to extract the verbs * @return a list of verbs in the tree, an empty list if nothing is found. *///w ww.ja va 2 s . c o m public static List<String> extractVerbs(Tree t) { List<String> verbs = new ArrayList<String>(); if (t == null) return verbs; TregexPattern pattern = null; try { pattern = TregexPattern.compile("/VB.?/"); TregexMatcher matcher = pattern.matcher(t); while (matcher.findNextMatchingNode()) { String content = cleanNodeContent(nodeContent(matcher.getMatch())); if (content.trim().length() > 0) verbs.add(content); } } catch (ParseException e) { logger.error(e); } return verbs; }