Example usage for edu.stanford.nlp.trees.tregex TregexPattern matcher

List of usage examples for edu.stanford.nlp.trees.tregex TregexPattern matcher

Introduction

In this page you can find the example usage for edu.stanford.nlp.trees.tregex TregexPattern matcher.

Prototype

public TregexMatcher matcher(Tree t) 

Source Link

Document

Get a TregexMatcher for this pattern on this tree.

Usage

From source file:Ceist.CeistView.java

License:Open Source License

/**
 * Begin a search//  w  w w. jav  a 2  s. c om
 */
private void runSearch() {
    //setTregexState(true); Disable buttons while searching

    Thread searchThread = new Thread() {
        @Override
        public void run() {
            lblSearchStatus.setText("Searching...");

            // Initialise search patterns
            final TregexPattern primary = MatchPattern.getMatchPattern(txtCurrentPattern);

            if (primary == null) {
                lblSearchStatus.setText("Bad Pattern!");
                return;
            }

            SwingUtilities.invokeLater(new Runnable() {
                public void run() {

                    Treebank treebank = new MemoryTreebank();

                    // Add the test data set if selected and loaded
                    if (dataSet.testData.isLoaded() && btnUseTestData.isSelected())
                        treebank.addAll(dataSet.testData.getTreebank());

                    // Add the development data set if selected and loaded
                    if (dataSet.testData.isLoaded() && btnUseDevelopmentData.isSelected())
                        treebank.addAll(dataSet.devData.getTreebank());

                    int treeCount = treebank.size();
                    int count = 0;

                    // Copy the current matches
                    diffTrees.clear();
                    diffTrees.addAll(matchedTrees);

                    if (!chkShowPreview.isSelected()) {
                        matchedTrees.clear();

                        // Clear the table
                        ((DefaultTableModel) tableMatches.getModel()).setRowCount(0);
                    }

                    for (Tree testTree : treebank) {
                        count++;
                        lblSearchStatus.setText(String.format("Searching %d of %d", count, treeCount));
                        TregexMatcher m = primary.matcher(testTree);

                        //Tree lastMatchingRootNode = null;
                        boolean bMatchFound = false;

                        while (m.find() && !bMatchFound) {

                            matchedTrees.add(testTree);

                            if (chkShowTagged.isSelected())
                                ((DefaultTableModel) tableMatches.getModel())
                                        .addRow(getMatcherTableRow(m, testTree, true));
                            else
                                ((DefaultTableModel) tableMatches.getModel())
                                        .addRow(getMatcherTableRow(m, testTree, false));
                            bMatchFound = true;
                        }
                    }

                    if (matchedTrees.size() > 0)
                        lblSearchStatus.setText(String.format("Found %d matches.", matchedTrees.size()));
                    else
                        lblSearchStatus.setText(String.format("No matches found!"));
                }
            });
        }
    };

    searchThread.start();
}

From source file:com.github.kutschkem.Qgen.annotators.SimpleSentenceDecompositionAnnotator.java

License:Open Source License

private List<String> decompose(String documentText) {
    List<Tree> trees = new ArrayList<Tree>();
    for (String sentence : AnalysisUtilities.getSentences(documentText)) {
        trees.add(AnalysisUtilities.getInstance().parseSentence(sentence).parse);
    }//from  w w w .  jav a2s  .c o  m

    List<String> result = new ArrayList<String>();

    for (Tree t : trees) {
        TregexPattern p = TregexPattern.compile("ROOT << (NP=np $++ VP=vp) ");
        TregexMatcher m = p.matcher(t);
        while (m.find()) {
            Tree np = m.getNode("np");
            Tree vp = m.getNode("vp");

            Tree np2 = np.deepCopy();
            TregexPattern p2 = TregexPattern.compile("NP << (/^S.*/=sbarq ?. /,/=c1 ?, /,/=c2)");
            List<TsurgeonPattern> ps = new ArrayList<TsurgeonPattern>();
            ps.add(Tsurgeon.parseOperation("prune sbarq"));
            ps.add(Tsurgeon.parseOperation("prune c1"));
            ps.add(Tsurgeon.parseOperation("prune c2"));

            Tsurgeon.processPattern(p2, Tsurgeon.collectOperations(ps), np2);
            np = np2;

            Tree newTree = Tree.valueOf("(S " + np + vp + "(. .))");
            result.add(AnalysisUtilities.orginialSentence(newTree.yield()));
        }

    }

    return result;
}

From source file:com.mycompany.stanlp.ChildSpeech.java

/**
 * @param args the command line arguments
 *///from ww w . ja va  2s .  co  m
public static void main(String[] args) throws IOException {

    PrintWriter pw = new PrintWriter(new File("out.csv"));
    StringBuilder sb = new StringBuilder();
    // creates a StanfordCoreNLP object, with POS tagging, lemmatization, NER, parsing, and coreference resolution 
    Properties props = new Properties();
    props.put("annotators", "tokenize, ssplit, pos, lemma, ner, parse, dcoref");
    StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
    TreeMap<String, String[]> tm = new TreeMap<String, String[]>();
    String csvFile = "C:\\Users\\steve\\Downloads\\verbsome.csv";
    BufferedReader br = null;
    String line = "";
    String cvsSplitBy = ",";

    try {

        br = new BufferedReader(new FileReader(csvFile));
        while ((line = br.readLine()) != null) {
            System.out.println("reached");
            String[] country = line.split(cvsSplitBy);
            String[] input = new String[2];
            input[0] = country[0];
            input[1] = country[5];
            tm.put(country[4], input);

        }

    } catch (FileNotFoundException e) {
        e.printStackTrace();
    } catch (IOException e) {
        e.printStackTrace();
    } finally {
        if (br != null) {
            try {
                br.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }

    for (Map.Entry<String, String[]> entry : tm.entrySet()) {
        String[] value = entry.getValue();
        Annotation document = new Annotation(value[1]);

        pipeline.annotate(document);
        List<CoreMap> sentences = document.get(SentencesAnnotation.class);

        for (CoreMap sentence : sentences) {

            for (CoreLabel token : sentence.get(TokensAnnotation.class)) {
                // this is the text of the token
                String word = token.get(TextAnnotation.class);
                //ArrayList<CoreLabel> al = new ArrayList();
                if (word.equals(value[0])) {
                    Tree tree = sentence.get(TreeAnnotation.class);
                    //TregexPattern patternMW = TregexPattern.compile("VP([ >># VB | >># VBP | >># VBD] <<" + value[0] +
                    //       ")");
                    TregexPattern patternMW = TregexPattern
                            .compile(" VP  [ <# VB | <# VBP | <# VBD] & <<" + value[0]);
                    TregexMatcher matcher = patternMW.matcher(tree);
                    while (matcher.findNextMatchingNode()) {
                        Tree match = matcher.getMatch();
                        String tempString = tree.toString();
                        sb.append(entry.getKey());
                        sb.append(",");
                        sb.append(value[0]);
                        sb.append(",");
                        sb.append(tempString);
                        sb.append(",");
                        if (match.preTerminalYield().size() == 1) {
                            for (Label l : tree.preTerminalYield()) {
                                sb.append(l.toString());
                                sb.append("&");
                            }
                        } else {
                            for (Label l : match.preTerminalYield()) {

                                sb.append(l.toString());
                                sb.append("&");
                            }
                        }
                        sb.append(",");
                        sb.append(match.toString());
                        //sb.append(",");
                        //sb.append(token.get(PartOfSpeechAnnotation.class));
                        sb.append('\n');
                    }

                }
                // this is the POS tag of the token
                // this is the NER label of the token
                //String ne = token.get(NamedEntityTagAnnotation.class);       
            }

            //SemanticGraph dependencies = sentence.get(CollapsedCCProcessedDependenciesAnnotation.class);
        }

        //Map<Integer, edu.stanford.nlp.dcoref.CorefChain> graph = 
        //document.get(CorefChainAnnotation.class);

    }
    pw.write(sb.toString());
    pw.close();
}

From source file:com.project.NLP.Requirement.ClassRelationIdentifier.java

public HashSet identifyAssociation(Tree tree, Set documentClass) {
    HashSet classRelations = new HashSet();
    String phraseNotation = "S<(NP.(VP<NP))";
    String verbPhraseNotation = "VBZ|VBP>(VP,(NP>S))";
    /* Stemming the sentence */
    wordStemmer.visitTree(tree);/*from ww  w .  j av a 2s .c  o m*/
    TregexPattern pattern = TregexPattern.compile(phraseNotation);
    TregexMatcher matcher = pattern.matcher((Tree) tree);
    TregexPattern verbPattern = TregexPattern.compile(verbPhraseNotation);

    while (matcher.findNextMatchingNode()) {
        Tree match = matcher.getMatch();
        System.out.println("Sentence match : " + Sentence.listToString(match.yield()));
        TregexMatcher verbMatcher = verbPattern.matcher(match);
        // while(verbMatcher.findNextMatchingNode()){
        if (verbMatcher.findNextMatchingNode()) {
            Tree verbMatch = verbMatcher.getMatch();
            String verb = Sentence.listToString(verbMatch.yield());
            System.out.println("Verb match : " + verb);
            if (verbPhraseList.contains(verb)) {
                System.out.println("list contains verb : " + verb);
                String noun_1_phraseNotation = "NN|NNS>(NP>S)";
                String noun_2_phraseNotation = "NN|NNS>>(NP,(VBZ|VBP>(VP,NP)))";
                TregexPattern noun_pattern = TregexPattern.compile(noun_1_phraseNotation);
                TregexMatcher noun_matcher = noun_pattern.matcher((Tree) tree);
                if (noun_matcher.findNextMatchingNode()) {
                    Tree nounMatch = noun_matcher.getMatch();
                    String noun1 = Sentence.listToString(nounMatch.yield());

                    if (documentClass.contains(noun1)) {
                        noun_pattern = TregexPattern.compile(noun_2_phraseNotation);
                        noun_matcher = noun_pattern.matcher((Tree) tree);
                        System.out.println("class list contains noun1 : " + noun1);
                        if (noun_matcher.findNextMatchingNode()) {
                            nounMatch = noun_matcher.getMatch();
                            String noun2 = Sentence.listToString(nounMatch.yield());
                            if (!noun1.equals(noun2) && documentClass.contains(noun2)) {
                                ClassRelation clr;
                                System.out.println("class list contains noun2 : " + noun2);
                                if (verb.equals("be")) {
                                    clr = new ClassRelation("Generalization", noun1, noun2);
                                    System.out.println("class generalization");
                                } else {
                                    clr = new ClassRelation("Association", noun2, noun1);
                                    System.out.println("class association");
                                }
                                classRelations.add(clr);
                            }
                        }
                    }
                }
            }
        }
    }
    return classRelations;
}

From source file:com.project.NLP.Requirement.MethodIdentifier.java

ArrayList getPhrase(ArrayList<Tree> sentenceTree) {
    /*ref : patterns -http://nlp.stanford.edu/nlp/javadoc/javanlp/edu/stanford/nlp/trees/tregex/TregexPattern.html  */
    String phraseNotation = "VB|VBN>VP";//@VB>VP" ; //& VBN >VP";//"VP<(VB $++NP)";//"VP:VB";//"@"+"VP"+"! << @"+"VP";
    ArrayList vpList = new ArrayList();
    for (Tree tree : sentenceTree) {
        System.out.print("\n---tree_sen----" + tree + "----\n");
        /* Stemming the sentence */
        wordStemmer.visitTree(tree);/*from w w w. j a v  a  2 s .co  m*/
        TregexPattern VBpattern = TregexPattern.compile(phraseNotation);
        TregexMatcher matcher = VBpattern.matcher((Tree) tree);
        while (matcher.findNextMatchingNode()) {
            Tree match = matcher.getMatch();
            String verb = Sentence.listToString(match.yield());

            /* Filter to unique verbs  */
            //List<String> newList = new ArrayList<String>(new HashSet<String>(oldList));
            if (!vpList.contains(verb)) {
                vpList.add(verb);
            }
            System.out.print("\n---phrase match----" + match + "----\n");

        }
    }
    System.out.print("\n---VPList----" + vpList + "----\n");
    return vpList;
}

From source file:com.project.NLP.Requirement.MethodIdentifier.java

HashSet identifyCandidateMethods(Tree[] tree) {

    String phraseNotation = "VB|VBN>VP";//@VB>VP" ; //& VBN >VP";//"VP<(VB $++NP)";//"VP:VB";//"@"+"VP"+"! << @"+"VP";
    HashSet vpList = new HashSet();
    for (Tree childTree : tree) {
        System.out.print("\n---tree_sen----" + childTree + "----\n");
        /* Stemming the sentence */
        wordStemmer.visitTree(childTree);
        TregexPattern VBpattern = TregexPattern.compile(phraseNotation);
        TregexMatcher matcher = VBpattern.matcher((Tree) childTree);
        while (matcher.findNextMatchingNode()) {
            Tree match = matcher.getMatch();
            String verb = Sentence.listToString(match.yield());

            /* Filter to unique verbs  */
            //List<String> newList = new ArrayList<String>(new HashSet<String>(oldList));
            //if(!vpList.contains(verb)){
            vpList.add(verb);/*from  ww w  . j av  a 2s  . co m*/
            //}
            System.out.print("\n---phrase match----" + match + "----\n");

        }
    }
    vpList.removeAll(commonVerbs);
    System.out.print("\n---VPList----" + vpList + "----\n");
    return vpList;
}

From source file:com.project.NLP.Requirement.MethodIdentifier.java

HashSet identifyCandidateMethods(Tree tree) {

    String phraseNotation = "VB|VBN>VP";//@VB>VP" ; //& VBN >VP";//"VP<(VB $++NP)";//"VP:VB";//"@"+"VP"+"! << @"+"VP";
    HashSet vpList = new HashSet();

    /* Stemming the sentence */
    wordStemmer.visitTree(tree);//from  w  w  w  .  j a  v a  2  s.  co m
    TregexPattern VBpattern = TregexPattern.compile(phraseNotation);
    TregexMatcher matcher = VBpattern.matcher(tree);
    while (matcher.findNextMatchingNode()) {
        Tree match = matcher.getMatch();
        String verb = Sentence.listToString(match.yield());

        /* Filter to unique verbs  */
        //List<String> newList = new ArrayList<String>(new HashSet<String>(oldList));
        //if(!vpList.contains(verb)){
        vpList.add(verb);
        //}
        System.out.print("\n---phrase match----" + match + "----\n");

    }

    vpList.removeAll(commonVerbs);
    System.out.print("\n------VPList----" + vpList + "----\n");

    vpList = removeDesignElements(vpList);
    return vpList;
}

From source file:com.project.NLP.Requirement.PhrasesIdentification.java

/**
 * method to get the negative characters from the sentence
 *
 * @return arrayList of negative words in a sentence which are denoted by RB
 * and CC// ww  w.  j  av  a  2s  . co  m
 */
public ArrayList NegativeSentenceDetection() {
    String phraseNotation = "RB|CC";//@" + phrase + "! << @" + phrase;
    TregexPattern VBpattern = TregexPattern.compile(phraseNotation);
    TregexMatcher matcher = VBpattern.matcher(sTree);
    ArrayList negativeLists = new ArrayList();
    while (matcher.findNextMatchingNode()) {
        Tree match = matcher.getMatch();
        Tree[] innerChild = match.children();
        for (Tree inChild : innerChild) {
            negativeLists.add(inChild.getLeaves().get(0).yieldWords().get(0).word());
        }
    }
    return negativeLists;
}

From source file:com.project.NLP.Requirement.PhrasesIdentification.java

/**
 * method to extract the classes from a sentence
 *
 * @return ArrayList: arrayList of classes from a sentence
 *//*from www.j  av a2 s  .c o  m*/
public ArrayList getClassList() {
    nounList = new ArrayList();
    attributeLists = new ArrayList();
    int adjectiveExist = 0;
    int adjectiveNoun = 0;
    String adj = "";
    String storingClass = "";
    HashSet classWithAttr = new HashSet();
    storingClassWithAttr = new HashMap<String, HashSet>();

    List<Tree> leaves;
    String phraseNotation = "(NP([<NNS|NN|NNP]$VP))";//@" + phrase + "! << @" + phrase;

    /*For the single Tree */
    TregexPattern VBpattern = TregexPattern.compile(phraseNotation);
    TregexMatcher matcher = VBpattern.matcher(sTree);
    String tempClass = "";

    while (matcher.findNextMatchingNode()) {
        Tree match = matcher.getMatch();
        Tree[] innerChild = match.children();
        adjectiveExist = 0;
        adjectiveNoun = 0;
        int separator = 0;

        if (innerChild.length > 1) {
            int count = 1;
            int loopCount = 1;
            for (Tree inChild : innerChild) {
                if (inChild.value().equals("CC")) {
                    separator = 1;
                }
                if ((inChild.value().equals("JJ")) || (inChild.value().equals("VBG"))) {
                    adjectiveExist++;
                    leaves = inChild.getLeaves();
                    adj = leaves.get(0).yieldWords().get(0).word();
                    if (dictionaryForClassList.contains(adj)) {
                        adj = "";
                    }
                }
                //if adjective exist store the classes and attributes separately
                if (adjectiveExist == 1) {
                    storeClassesAndAttributesWhenAdjectiveExistToIdentifyClasses(inChild, adjectiveNoun, adj);
                } else {
                    //storeClassesAndAttributesWhenAdjectiveNotExistToIdentifyClasses(inChild, loopCount, innerChild, separator, tempClass, count);
                    if ((inChild.value().equals("NN"))
                            || (inChild.value().equals("NNS") || (inChild.value().equals("NNP")))) {
                        leaves = inChild.getLeaves(); //leaves correspond to the tokens
                        if (separator == 0) {
                            if (loopCount == innerChild.length) {
                                String identifiedWord = ((leaves.get(0).yieldWords()).get(0).word());
                                String word = "";
                                word = stemmingForAWord(identifiedWord);
                                if (!dictionaryForClassList.contains(word)) {
                                    nounList.remove(tempClass);
                                    nounList.add(word);
                                    attributeLists.add(tempClass);

                                }

                            } else if (count == 1) {
                                String identifiedWord = ((leaves.get(0).yieldWords()).get(0).word());
                                /*if the identified word is having underscore skips the stemming part . ex: user_id*/
                                String word = stemmingForAWord(identifiedWord);
                                nounList.add(word);
                                tempClass = word;
                                storingClass = word;

                            } else {
                                /*if the identified word is having underscore skips the stemming part . ex: user_id*/
                                if (tempClass.contains("_")) {
                                    nounList.remove(tempClass);
                                } else {
                                    nounList.remove(morphology.stem(tempClass));
                                    nounList.remove(tempClass);
                                }
                                String identifiedWord = ((leaves.get(0).yieldWords()).get(0).word());

                                tempClass += " " + identifiedWord;
                                nounList.add(tempClass);
                                storingClass = tempClass;
                            }

                            count++;
                        } else {
                            String identifiedWord = ((leaves.get(0).yieldWords()).get(0).word());
                            /*if the identified word is having underscore skips the stemming part . ex: user_id*/
                            String word = stemmingForAWord(identifiedWord);
                            nounList.add(word);
                            tempClass = word;
                            storingClass = word;
                        }
                    }

                }
                loopCount++;
            }
        } else {
            for (Tree inChild : innerChild) {
                if ((inChild.value().equals("NN")) || (inChild.value().equals("NNS"))
                        || (inChild.value().equals("NNP"))) {
                    leaves = inChild.getLeaves(); //leaves correspond to the tokens
                    String identifiedWord = ((leaves.get(0).yieldWords()).get(0).word());
                    if (!identifiedWord.contains("_")) {
                        nounList.add(morphology.stem(identifiedWord));
                    } else {
                        nounList.add(identifiedWord);
                    }
                }
                if (inChild.value().equals("JJ")) {
                    //leaves correspond to the tokens
                    leaves = inChild.getLeaves();
                    nounList.add(((leaves.get(0).yieldWords()).get(0).word()));
                }
            }
        }
    }
    System.out.println("NOUN LIST :" + nounList);
    return nounList;
}

From source file:com.project.NLP.Requirement.PhrasesIdentification.java

/**
 * method to identify the attributes using the tokenization
 *
 * @return ArrayList: arrayList of attributes
 *//*w  ww  .jav  a 2s  . c o  m*/
public ArrayList getAttributeList() {
    nounList = new ArrayList();
    attributeLists = new ArrayList();
    ArrayList adjAtt = new ArrayList();
    int separator = 0;
    List<Tree> leaves;
    String phraseNotation = "NP([<NNS|NN|NNP]![<JJ|VBG])!$VP";// !<VBG";//@" + phrase + "! << @" + phrase;

    TregexPattern VBpattern = TregexPattern.compile(phraseNotation);
    TregexMatcher matcher = VBpattern.matcher(sTree);

    while (matcher.findNextMatchingNode()) {
        Tree match = matcher.getMatch();
        Tree[] innerChild = match.children();
        int adjectiveExist = 0;
        String adj = "";
        String attribute = "";
        String b = "";

        if (innerChild.length > 1) {
            int count = 1;

            for (Tree inChild : innerChild) {
                if (inChild.value().equals("CC") || inChild.value().equals(",")) {
                    separator = 1;
                }
                if ((inChild.value().equals("JJ")) || (inChild.value().equals("VBG"))) {
                    adjectiveExist++;
                    leaves = inChild.getLeaves();
                    adj = leaves.get(0).toString();
                    if (designEleList.contains(adj)) {
                        adj = "";
                    }
                }
                if ((inChild.value().equals("NN")) || (inChild.value().equals("NNS"))
                        || (inChild.value().equals("NNP"))) {
                    leaves = inChild.getLeaves(); //leaves correspond to the tokens
                    if (count == 1) {
                        if (adjectiveExist == 1) {
                            attribute = adj + " " + leaves.get(0).yieldWords().get(0).word();
                        } else {
                            attribute = leaves.get(0).yieldWords().get(0).word();
                        }
                        if (!designEleList.contains(attribute)) {
                            String identifiedWord = attribute;
                            if (!identifiedWord.contains("_")) {
                                attributeLists.add(morphology.stem(identifiedWord));
                            } else {
                                attributeLists.add(identifiedWord);
                            }
                        }

                    } else if (count >= 2 && separator == 0) {
                        if (!attribute.contains("_")) {

                            attributeLists.remove(morphology.stem(attribute));
                            attributeLists.remove(attribute);
                        } else {
                            attributeLists.remove(attribute);
                        }

                        attribute += " " + (leaves.get(0).yieldWords()).get(0).word();
                        attributeLists.add(attribute);
                    } else if (count >= 2 && separator == 1) {
                        attribute = (leaves.get(0).yieldWords()).get(0).word();
                        if (!attribute.contains("_")) {
                            attributeLists.add(morphology.stem(attribute));
                        } else {
                            attributeLists.add(attribute);
                        }
                        separator = 0;
                    }
                    count++;
                }
            }
        } else {
            for (Tree inChild : innerChild) {
                if ((inChild.value().equals("NN")) || (inChild.value().equals("NNS"))) {
                    leaves = inChild.getLeaves(); //leaves correspond to the tokens
                    String identifiedWord = ((leaves.get(0).yieldWords()).get(0).word());
                    if (!identifiedWord.contains("_")) {
                        attributeLists.add(morphology.stem(identifiedWord));
                    } else {
                        attributeLists.add(identifiedWord);
                    }
                }
            }
        }
    }
    adjAtt = getAdjectiveAttribute();
    if (!adjAtt.isEmpty()) {
        String att = "";
        for (int i = 0; i < adjAtt.size(); i++) {
            att = adjAtt.get(i).toString();
            if (!att.isEmpty() || !att.equals("") || !(att.equals(" "))) {
                attributeLists.add(att.trim());
            }
        }
    }

    System.out.println("ATTRIBUTE LIST :" + attributeLists);
    return attributeLists;

}