Example usage for edu.stanford.nlp.trees.tregex TregexMatcher getMatch

Introduction

In this page you can find the example usage for edu.stanford.nlp.trees.tregex TregexMatcher getMatch.

Prototype

public abstract Tree getMatch();

Source Link

Document

Get the last matching tree -- that is, the tree node that matches the root node of the pattern.

Usage

From source file:com.mycompany.stanlp.ChildSpeech.java

/**
 * @param args the command line arguments
 *///from w  ww.java  2  s .  co  m
public static void main(String[] args) throws IOException {

    PrintWriter pw = new PrintWriter(new File("out.csv"));
    StringBuilder sb = new StringBuilder();
    // creates a StanfordCoreNLP object, with POS tagging, lemmatization, NER, parsing, and coreference resolution 
    Properties props = new Properties();
    props.put("annotators", "tokenize, ssplit, pos, lemma, ner, parse, dcoref");
    StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
    TreeMap<String, String[]> tm = new TreeMap<String, String[]>();
    String csvFile = "C:\\Users\\steve\\Downloads\\verbsome.csv";
    BufferedReader br = null;
    String line = "";
    String cvsSplitBy = ",";

    try {

        br = new BufferedReader(new FileReader(csvFile));
        while ((line = br.readLine()) != null) {
            System.out.println("reached");
            String[] country = line.split(cvsSplitBy);
            String[] input = new String[2];
            input[0] = country[0];
            input[1] = country[5];
            tm.put(country[4], input);

        }

    } catch (FileNotFoundException e) {
        e.printStackTrace();
    } catch (IOException e) {
        e.printStackTrace();
    } finally {
        if (br != null) {
            try {
                br.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }

    for (Map.Entry<String, String[]> entry : tm.entrySet()) {
        String[] value = entry.getValue();
        Annotation document = new Annotation(value[1]);

        pipeline.annotate(document);
        List<CoreMap> sentences = document.get(SentencesAnnotation.class);

        for (CoreMap sentence : sentences) {

            for (CoreLabel token : sentence.get(TokensAnnotation.class)) {
                // this is the text of the token
                String word = token.get(TextAnnotation.class);
                //ArrayList<CoreLabel> al = new ArrayList();
                if (word.equals(value[0])) {
                    Tree tree = sentence.get(TreeAnnotation.class);
                    //TregexPattern patternMW = TregexPattern.compile("VP([ >># VB | >># VBP | >># VBD] <<" + value[0] +
                    //       ")");
                    TregexPattern patternMW = TregexPattern
                            .compile(" VP  [ <# VB | <# VBP | <# VBD] & <<" + value[0]);
                    TregexMatcher matcher = patternMW.matcher(tree);
                    while (matcher.findNextMatchingNode()) {
                        Tree match = matcher.getMatch();
                        String tempString = tree.toString();
                        sb.append(entry.getKey());
                        sb.append(",");
                        sb.append(value[0]);
                        sb.append(",");
                        sb.append(tempString);
                        sb.append(",");
                        if (match.preTerminalYield().size() == 1) {
                            for (Label l : tree.preTerminalYield()) {
                                sb.append(l.toString());
                                sb.append("&");
                            }
                        } else {
                            for (Label l : match.preTerminalYield()) {

                                sb.append(l.toString());
                                sb.append("&");
                            }
                        }
                        sb.append(",");
                        sb.append(match.toString());
                        //sb.append(",");
                        //sb.append(token.get(PartOfSpeechAnnotation.class));
                        sb.append('\n');
                    }

                }
                // this is the POS tag of the token
                // this is the NER label of the token
                //String ne = token.get(NamedEntityTagAnnotation.class);       
            }

            //SemanticGraph dependencies = sentence.get(CollapsedCCProcessedDependenciesAnnotation.class);
        }

        //Map<Integer, edu.stanford.nlp.dcoref.CorefChain> graph = 
        //document.get(CorefChainAnnotation.class);

    }
    pw.write(sb.toString());
    pw.close();
}

From source file:com.project.NLP.Requirement.ClassRelationIdentifier.java

public HashSet identifyAssociation(Tree tree, Set documentClass) {
    HashSet classRelations = new HashSet();
    String phraseNotation = "S<(NP.(VP<NP))";
    String verbPhraseNotation = "VBZ|VBP>(VP,(NP>S))";
    /* Stemming the sentence */
    wordStemmer.visitTree(tree);/*  w  w w .j  av  a2 s.  c  o  m*/
    TregexPattern pattern = TregexPattern.compile(phraseNotation);
    TregexMatcher matcher = pattern.matcher((Tree) tree);
    TregexPattern verbPattern = TregexPattern.compile(verbPhraseNotation);

    while (matcher.findNextMatchingNode()) {
        Tree match = matcher.getMatch();
        System.out.println("Sentence match : " + Sentence.listToString(match.yield()));
        TregexMatcher verbMatcher = verbPattern.matcher(match);
        // while(verbMatcher.findNextMatchingNode()){
        if (verbMatcher.findNextMatchingNode()) {
            Tree verbMatch = verbMatcher.getMatch();
            String verb = Sentence.listToString(verbMatch.yield());
            System.out.println("Verb match : " + verb);
            if (verbPhraseList.contains(verb)) {
                System.out.println("list contains verb : " + verb);
                String noun_1_phraseNotation = "NN|NNS>(NP>S)";
                String noun_2_phraseNotation = "NN|NNS>>(NP,(VBZ|VBP>(VP,NP)))";
                TregexPattern noun_pattern = TregexPattern.compile(noun_1_phraseNotation);
                TregexMatcher noun_matcher = noun_pattern.matcher((Tree) tree);
                if (noun_matcher.findNextMatchingNode()) {
                    Tree nounMatch = noun_matcher.getMatch();
                    String noun1 = Sentence.listToString(nounMatch.yield());

                    if (documentClass.contains(noun1)) {
                        noun_pattern = TregexPattern.compile(noun_2_phraseNotation);
                        noun_matcher = noun_pattern.matcher((Tree) tree);
                        System.out.println("class list contains noun1 : " + noun1);
                        if (noun_matcher.findNextMatchingNode()) {
                            nounMatch = noun_matcher.getMatch();
                            String noun2 = Sentence.listToString(nounMatch.yield());
                            if (!noun1.equals(noun2) && documentClass.contains(noun2)) {
                                ClassRelation clr;
                                System.out.println("class list contains noun2 : " + noun2);
                                if (verb.equals("be")) {
                                    clr = new ClassRelation("Generalization", noun1, noun2);
                                    System.out.println("class generalization");
                                } else {
                                    clr = new ClassRelation("Association", noun2, noun1);
                                    System.out.println("class association");
                                }
                                classRelations.add(clr);
                            }
                        }
                    }
                }
            }
        }
    }
    return classRelations;
}

From source file:com.project.NLP.Requirement.MethodIdentifier.java

ArrayList getPhrase(ArrayList<Tree> sentenceTree) {
    /*ref : patterns -http://nlp.stanford.edu/nlp/javadoc/javanlp/edu/stanford/nlp/trees/tregex/TregexPattern.html  */
    String phraseNotation = "VB|VBN>VP";//@VB>VP" ; //& VBN >VP";//"VP<(VB $++NP)";//"VP:VB";//"@"+"VP"+"! << @"+"VP";
    ArrayList vpList = new ArrayList();
    for (Tree tree : sentenceTree) {
        System.out.print("\n---tree_sen----" + tree + "----\n");
        /* Stemming the sentence */
        wordStemmer.visitTree(tree);//from  w ww .jav a 2 s  . c  o m
        TregexPattern VBpattern = TregexPattern.compile(phraseNotation);
        TregexMatcher matcher = VBpattern.matcher((Tree) tree);
        while (matcher.findNextMatchingNode()) {
            Tree match = matcher.getMatch();
            String verb = Sentence.listToString(match.yield());

            /* Filter to unique verbs  */
            //List<String> newList = new ArrayList<String>(new HashSet<String>(oldList));
            if (!vpList.contains(verb)) {
                vpList.add(verb);
            }
            System.out.print("\n---phrase match----" + match + "----\n");

        }
    }
    System.out.print("\n---VPList----" + vpList + "----\n");
    return vpList;
}

From source file:com.project.NLP.Requirement.MethodIdentifier.java

HashSet identifyCandidateMethods(Tree[] tree) {

    String phraseNotation = "VB|VBN>VP";//@VB>VP" ; //& VBN >VP";//"VP<(VB $++NP)";//"VP:VB";//"@"+"VP"+"! << @"+"VP";
    HashSet vpList = new HashSet();
    for (Tree childTree : tree) {
        System.out.print("\n---tree_sen----" + childTree + "----\n");
        /* Stemming the sentence */
        wordStemmer.visitTree(childTree);
        TregexPattern VBpattern = TregexPattern.compile(phraseNotation);
        TregexMatcher matcher = VBpattern.matcher((Tree) childTree);
        while (matcher.findNextMatchingNode()) {
            Tree match = matcher.getMatch();
            String verb = Sentence.listToString(match.yield());

            /* Filter to unique verbs  */
            //List<String> newList = new ArrayList<String>(new HashSet<String>(oldList));
            //if(!vpList.contains(verb)){
            vpList.add(verb);//from w  ww  .  j av  a  2  s  . c om
            //}
            System.out.print("\n---phrase match----" + match + "----\n");

        }
    }
    vpList.removeAll(commonVerbs);
    System.out.print("\n---VPList----" + vpList + "----\n");
    return vpList;
}

From source file:com.project.NLP.Requirement.MethodIdentifier.java

HashSet identifyCandidateMethods(Tree tree) {

    String phraseNotation = "VB|VBN>VP";//@VB>VP" ; //& VBN >VP";//"VP<(VB $++NP)";//"VP:VB";//"@"+"VP"+"! << @"+"VP";
    HashSet vpList = new HashSet();

    /* Stemming the sentence */
    wordStemmer.visitTree(tree);//from w  w  w  . j  av  a2  s .c  o m
    TregexPattern VBpattern = TregexPattern.compile(phraseNotation);
    TregexMatcher matcher = VBpattern.matcher(tree);
    while (matcher.findNextMatchingNode()) {
        Tree match = matcher.getMatch();
        String verb = Sentence.listToString(match.yield());

        /* Filter to unique verbs  */
        //List<String> newList = new ArrayList<String>(new HashSet<String>(oldList));
        //if(!vpList.contains(verb)){
        vpList.add(verb);
        //}
        System.out.print("\n---phrase match----" + match + "----\n");

    }

    vpList.removeAll(commonVerbs);
    System.out.print("\n------VPList----" + vpList + "----\n");

    vpList = removeDesignElements(vpList);
    return vpList;
}

From source file:com.project.NLP.Requirement.PhrasesIdentification.java

/**
 * method to get the negative characters from the sentence
 *
 * @return arrayList of negative words in a sentence which are denoted by RB
 * and CC/*  w  w  w.  ja v  a 2  s . c o  m*/
 */
public ArrayList NegativeSentenceDetection() {
    String phraseNotation = "RB|CC";//@" + phrase + "! << @" + phrase;
    TregexPattern VBpattern = TregexPattern.compile(phraseNotation);
    TregexMatcher matcher = VBpattern.matcher(sTree);
    ArrayList negativeLists = new ArrayList();
    while (matcher.findNextMatchingNode()) {
        Tree match = matcher.getMatch();
        Tree[] innerChild = match.children();
        for (Tree inChild : innerChild) {
            negativeLists.add(inChild.getLeaves().get(0).yieldWords().get(0).word());
        }
    }
    return negativeLists;
}

From source file:com.project.NLP.Requirement.PhrasesIdentification.java

/**
 * method to extract the classes from a sentence
 *
 * @return ArrayList: arrayList of classes from a sentence
 *//*www .  j  a  v a  2s .  c om*/
public ArrayList getClassList() {
    nounList = new ArrayList();
    attributeLists = new ArrayList();
    int adjectiveExist = 0;
    int adjectiveNoun = 0;
    String adj = "";
    String storingClass = "";
    HashSet classWithAttr = new HashSet();
    storingClassWithAttr = new HashMap<String, HashSet>();

    List<Tree> leaves;
    String phraseNotation = "(NP([<NNS|NN|NNP]$VP))";//@" + phrase + "! << @" + phrase;

    /*For the single Tree */
    TregexPattern VBpattern = TregexPattern.compile(phraseNotation);
    TregexMatcher matcher = VBpattern.matcher(sTree);
    String tempClass = "";

    while (matcher.findNextMatchingNode()) {
        Tree match = matcher.getMatch();
        Tree[] innerChild = match.children();
        adjectiveExist = 0;
        adjectiveNoun = 0;
        int separator = 0;

        if (innerChild.length > 1) {
            int count = 1;
            int loopCount = 1;
            for (Tree inChild : innerChild) {
                if (inChild.value().equals("CC")) {
                    separator = 1;
                }
                if ((inChild.value().equals("JJ")) || (inChild.value().equals("VBG"))) {
                    adjectiveExist++;
                    leaves = inChild.getLeaves();
                    adj = leaves.get(0).yieldWords().get(0).word();
                    if (dictionaryForClassList.contains(adj)) {
                        adj = "";
                    }
                }
                //if adjective exist store the classes and attributes separately
                if (adjectiveExist == 1) {
                    storeClassesAndAttributesWhenAdjectiveExistToIdentifyClasses(inChild, adjectiveNoun, adj);
                } else {
                    //storeClassesAndAttributesWhenAdjectiveNotExistToIdentifyClasses(inChild, loopCount, innerChild, separator, tempClass, count);
                    if ((inChild.value().equals("NN"))
                            || (inChild.value().equals("NNS") || (inChild.value().equals("NNP")))) {
                        leaves = inChild.getLeaves(); //leaves correspond to the tokens
                        if (separator == 0) {
                            if (loopCount == innerChild.length) {
                                String identifiedWord = ((leaves.get(0).yieldWords()).get(0).word());
                                String word = "";
                                word = stemmingForAWord(identifiedWord);
                                if (!dictionaryForClassList.contains(word)) {
                                    nounList.remove(tempClass);
                                    nounList.add(word);
                                    attributeLists.add(tempClass);

                                }

                            } else if (count == 1) {
                                String identifiedWord = ((leaves.get(0).yieldWords()).get(0).word());
                                /*if the identified word is having underscore skips the stemming part . ex: user_id*/
                                String word = stemmingForAWord(identifiedWord);
                                nounList.add(word);
                                tempClass = word;
                                storingClass = word;

                            } else {
                                /*if the identified word is having underscore skips the stemming part . ex: user_id*/
                                if (tempClass.contains("_")) {
                                    nounList.remove(tempClass);
                                } else {
                                    nounList.remove(morphology.stem(tempClass));
                                    nounList.remove(tempClass);
                                }
                                String identifiedWord = ((leaves.get(0).yieldWords()).get(0).word());

                                tempClass += " " + identifiedWord;
                                nounList.add(tempClass);
                                storingClass = tempClass;
                            }

                            count++;
                        } else {
                            String identifiedWord = ((leaves.get(0).yieldWords()).get(0).word());
                            /*if the identified word is having underscore skips the stemming part . ex: user_id*/
                            String word = stemmingForAWord(identifiedWord);
                            nounList.add(word);
                            tempClass = word;
                            storingClass = word;
                        }
                    }

                }
                loopCount++;
            }
        } else {
            for (Tree inChild : innerChild) {
                if ((inChild.value().equals("NN")) || (inChild.value().equals("NNS"))
                        || (inChild.value().equals("NNP"))) {
                    leaves = inChild.getLeaves(); //leaves correspond to the tokens
                    String identifiedWord = ((leaves.get(0).yieldWords()).get(0).word());
                    if (!identifiedWord.contains("_")) {
                        nounList.add(morphology.stem(identifiedWord));
                    } else {
                        nounList.add(identifiedWord);
                    }
                }
                if (inChild.value().equals("JJ")) {
                    //leaves correspond to the tokens
                    leaves = inChild.getLeaves();
                    nounList.add(((leaves.get(0).yieldWords()).get(0).word()));
                }
            }
        }
    }
    System.out.println("NOUN LIST :" + nounList);
    return nounList;
}

From source file:com.project.NLP.Requirement.PhrasesIdentification.java

/**
 * method to identify the attributes using the tokenization
 *
 * @return ArrayList: arrayList of attributes
 *//*from  www. j  a  v a  2 s .  co  m*/
public ArrayList getAttributeList() {
    nounList = new ArrayList();
    attributeLists = new ArrayList();
    ArrayList adjAtt = new ArrayList();
    int separator = 0;
    List<Tree> leaves;
    String phraseNotation = "NP([<NNS|NN|NNP]![<JJ|VBG])!$VP";// !<VBG";//@" + phrase + "! << @" + phrase;

    TregexPattern VBpattern = TregexPattern.compile(phraseNotation);
    TregexMatcher matcher = VBpattern.matcher(sTree);

    while (matcher.findNextMatchingNode()) {
        Tree match = matcher.getMatch();
        Tree[] innerChild = match.children();
        int adjectiveExist = 0;
        String adj = "";
        String attribute = "";
        String b = "";

        if (innerChild.length > 1) {
            int count = 1;

            for (Tree inChild : innerChild) {
                if (inChild.value().equals("CC") || inChild.value().equals(",")) {
                    separator = 1;
                }
                if ((inChild.value().equals("JJ")) || (inChild.value().equals("VBG"))) {
                    adjectiveExist++;
                    leaves = inChild.getLeaves();
                    adj = leaves.get(0).toString();
                    if (designEleList.contains(adj)) {
                        adj = "";
                    }
                }
                if ((inChild.value().equals("NN")) || (inChild.value().equals("NNS"))
                        || (inChild.value().equals("NNP"))) {
                    leaves = inChild.getLeaves(); //leaves correspond to the tokens
                    if (count == 1) {
                        if (adjectiveExist == 1) {
                            attribute = adj + " " + leaves.get(0).yieldWords().get(0).word();
                        } else {
                            attribute = leaves.get(0).yieldWords().get(0).word();
                        }
                        if (!designEleList.contains(attribute)) {
                            String identifiedWord = attribute;
                            if (!identifiedWord.contains("_")) {
                                attributeLists.add(morphology.stem(identifiedWord));
                            } else {
                                attributeLists.add(identifiedWord);
                            }
                        }

                    } else if (count >= 2 && separator == 0) {
                        if (!attribute.contains("_")) {

                            attributeLists.remove(morphology.stem(attribute));
                            attributeLists.remove(attribute);
                        } else {
                            attributeLists.remove(attribute);
                        }

                        attribute += " " + (leaves.get(0).yieldWords()).get(0).word();
                        attributeLists.add(attribute);
                    } else if (count >= 2 && separator == 1) {
                        attribute = (leaves.get(0).yieldWords()).get(0).word();
                        if (!attribute.contains("_")) {
                            attributeLists.add(morphology.stem(attribute));
                        } else {
                            attributeLists.add(attribute);
                        }
                        separator = 0;
                    }
                    count++;
                }
            }
        } else {
            for (Tree inChild : innerChild) {
                if ((inChild.value().equals("NN")) || (inChild.value().equals("NNS"))) {
                    leaves = inChild.getLeaves(); //leaves correspond to the tokens
                    String identifiedWord = ((leaves.get(0).yieldWords()).get(0).word());
                    if (!identifiedWord.contains("_")) {
                        attributeLists.add(morphology.stem(identifiedWord));
                    } else {
                        attributeLists.add(identifiedWord);
                    }
                }
            }
        }
    }
    adjAtt = getAdjectiveAttribute();
    if (!adjAtt.isEmpty()) {
        String att = "";
        for (int i = 0; i < adjAtt.size(); i++) {
            att = adjAtt.get(i).toString();
            if (!att.isEmpty() || !att.equals("") || !(att.equals(" "))) {
                attributeLists.add(att.trim());
            }
        }
    }

    System.out.println("ATTRIBUTE LIST :" + attributeLists);
    return attributeLists;

}

From source file:com.project.NLP.Requirement.PhrasesIdentification.java

/**
 * method to identify the attributes when the words which are identifies as
 * nouns are in adjective phrases//from  ww  w  .ja va  2 s . c o  m
 *
 * @return ArrayList of adjective attributes
 */
public ArrayList getAdjectiveAttribute() {
    adjAttributeList = new ArrayList();
    //adjAttributeList = new ArrayList();

    int adjectiveExist = 0;
    int adjectiveNoun = 0;
    int nnCount = 0;
    String adj = "";
    List<Tree> leaves;
    String phraseNotation = "NP[<NNS|NN]!$VP";//@" + phrase + "! << @" + phrase;
    DesignElementClass designEle = new DesignElementClass();
    ArrayList designEleList = designEle.getDesignElementsList();

    /*For single Tree  */
    TregexPattern VBpattern = TregexPattern.compile(phraseNotation);
    TregexMatcher matcher = VBpattern.matcher(sTree);

    while (matcher.findNextMatchingNode()) {
        Tree match = matcher.getMatch();
        Tree[] innerChild = match.children();
        String a = "";
        boolean separatorExist = false;
        if (innerChild.length > 1) {
            int count = 1;
            adjectiveExist = 0;
            adjectiveNoun = 0;
            nnCount = 0;
            String attribute = "";
            adj = "";

            for (Tree inChild : innerChild) {
                //checks whether there are any separators
                if (inChild.value().equals("CC")) {
                    separatorExist = true;
                    attribute = "";
                    adjectiveExist = 0;
                    adjectiveNoun = 0;
                }
                //checks whether there are adjectives
                if ((inChild.value().equals("JJ")) || (inChild.value().equals("VBG"))) {
                    adjectiveExist++;
                    leaves = inChild.getLeaves();
                    adj = leaves.get(0).toString();
                    if (designEleList.contains(adj)) {
                        adj = "";
                    }

                }
                //if the adjective exist store the attributes
                if (adjectiveExist == 1) {
                    adjectiveNoun = storeAdjectiveAttribute(inChild, adjectiveNoun, nnCount, adj);
                }
            }
            if (adjectiveExist == 1 && adjectiveNoun == 0 && !adj.isEmpty()) {
                adjAttributeList.add(stemmingForAWord(adj));

            }
        }
    }

    System.out.println("ADJECTVE ATTRIBUTE :" + adjAttributeList);
    return adjAttributeList;

}

From source file:edu.cmu.ark.QuestionTransducer.java

License:Open Source License

/**
 * Changes the inflection of the main verb for questions with
 * first and second person pronouns are the subject.
 * Note: this probably isn't necessary for most applications.
 *
 * E.g.,//from   w  w  w. j  a va 2  s  .c o m
 * Affects:
 * I walk -> Who walks? (rather than, Who walk?)
 *
 * Does not affect:
 * He walks -> Who walks?
 *
 */
private void ensureVerbAgreementForSubjectWH(Tree inputTree) {
    String tregexOpStr;
    TregexMatcher matcher;
    TregexPattern matchPattern;
    Tree subjectTree;
    String subjectString;

    tregexOpStr = "/^(NP|PP|SBAR)-" + 0 + "$/";
    matchPattern = TregexPatternFactory.getPattern(tregexOpStr);
    matcher = matchPattern.matcher(inputTree);
    if (matcher.find()) {
        subjectTree = matcher.getMatch();
        subjectString = subjectTree.yield().toString();
        if (subjectString.equalsIgnoreCase("I") || subjectString.equalsIgnoreCase("you")) {
            tregexOpStr = "ROOT=root < (S=mainclause < (VP=verbphrase < (/VB.?/=tensedverb)))";
            matchPattern = TregexPatternFactory.getPattern(tregexOpStr);
            matcher = matchPattern.matcher(inputTree);
            if (matcher.find()) {
                Tree verbSubtree = matcher.getNode("tensedverb");
                Tree vpSubtree = matcher.getNode("verbphrase");
                Tree singularFormSubtree = AnalysisUtilities.getInstance()
                        .readTreeFromString(getSingularFormSubtree(verbSubtree));
                int index = vpSubtree.indexOf(verbSubtree);
                vpSubtree.removeChild(index);
                vpSubtree.addChild(index, singularFormSubtree);
                if (GlobalProperties.getDebug())
                    System.err.println("ensureVerbAgreementForSubjectWH: " + inputTree.toString());
            }
        }
    }
}