Example usage for edu.stanford.nlp.trees.tregex TregexPattern compile

List of usage examples for edu.stanford.nlp.trees.tregex TregexPattern compile

Introduction

In this page you can find the example usage for edu.stanford.nlp.trees.tregex TregexPattern compile.

Prototype

public static TregexPattern compile(String tregex) 

Source Link

Document

Creates a pattern from the given string using the default HeadFinder and BasicCategoryFunction.

Usage

From source file:com.github.kutschkem.Qgen.annotators.SimpleSentenceDecompositionAnnotator.java

License:Open Source License

private List<String> decompose(String documentText) {
    List<Tree> trees = new ArrayList<Tree>();
    for (String sentence : AnalysisUtilities.getSentences(documentText)) {
        trees.add(AnalysisUtilities.getInstance().parseSentence(sentence).parse);
    }//from   w  w  w  .  j av  a  2 s . c  o  m

    List<String> result = new ArrayList<String>();

    for (Tree t : trees) {
        TregexPattern p = TregexPattern.compile("ROOT << (NP=np $++ VP=vp) ");
        TregexMatcher m = p.matcher(t);
        while (m.find()) {
            Tree np = m.getNode("np");
            Tree vp = m.getNode("vp");

            Tree np2 = np.deepCopy();
            TregexPattern p2 = TregexPattern.compile("NP << (/^S.*/=sbarq ?. /,/=c1 ?, /,/=c2)");
            List<TsurgeonPattern> ps = new ArrayList<TsurgeonPattern>();
            ps.add(Tsurgeon.parseOperation("prune sbarq"));
            ps.add(Tsurgeon.parseOperation("prune c1"));
            ps.add(Tsurgeon.parseOperation("prune c2"));

            Tsurgeon.processPattern(p2, Tsurgeon.collectOperations(ps), np2);
            np = np2;

            Tree newTree = Tree.valueOf("(S " + np + vp + "(. .))");
            result.add(AnalysisUtilities.orginialSentence(newTree.yield()));
        }

    }

    return result;
}

From source file:com.mycompany.stanlp.ChildSpeech.java

/**
 * @param args the command line arguments
 *///from  www . jav a  2 s . c o  m
public static void main(String[] args) throws IOException {

    PrintWriter pw = new PrintWriter(new File("out.csv"));
    StringBuilder sb = new StringBuilder();
    // creates a StanfordCoreNLP object, with POS tagging, lemmatization, NER, parsing, and coreference resolution 
    Properties props = new Properties();
    props.put("annotators", "tokenize, ssplit, pos, lemma, ner, parse, dcoref");
    StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
    TreeMap<String, String[]> tm = new TreeMap<String, String[]>();
    String csvFile = "C:\\Users\\steve\\Downloads\\verbsome.csv";
    BufferedReader br = null;
    String line = "";
    String cvsSplitBy = ",";

    try {

        br = new BufferedReader(new FileReader(csvFile));
        while ((line = br.readLine()) != null) {
            System.out.println("reached");
            String[] country = line.split(cvsSplitBy);
            String[] input = new String[2];
            input[0] = country[0];
            input[1] = country[5];
            tm.put(country[4], input);

        }

    } catch (FileNotFoundException e) {
        e.printStackTrace();
    } catch (IOException e) {
        e.printStackTrace();
    } finally {
        if (br != null) {
            try {
                br.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }

    for (Map.Entry<String, String[]> entry : tm.entrySet()) {
        String[] value = entry.getValue();
        Annotation document = new Annotation(value[1]);

        pipeline.annotate(document);
        List<CoreMap> sentences = document.get(SentencesAnnotation.class);

        for (CoreMap sentence : sentences) {

            for (CoreLabel token : sentence.get(TokensAnnotation.class)) {
                // this is the text of the token
                String word = token.get(TextAnnotation.class);
                //ArrayList<CoreLabel> al = new ArrayList();
                if (word.equals(value[0])) {
                    Tree tree = sentence.get(TreeAnnotation.class);
                    //TregexPattern patternMW = TregexPattern.compile("VP([ >># VB | >># VBP | >># VBD] <<" + value[0] +
                    //       ")");
                    TregexPattern patternMW = TregexPattern
                            .compile(" VP  [ <# VB | <# VBP | <# VBD] & <<" + value[0]);
                    TregexMatcher matcher = patternMW.matcher(tree);
                    while (matcher.findNextMatchingNode()) {
                        Tree match = matcher.getMatch();
                        String tempString = tree.toString();
                        sb.append(entry.getKey());
                        sb.append(",");
                        sb.append(value[0]);
                        sb.append(",");
                        sb.append(tempString);
                        sb.append(",");
                        if (match.preTerminalYield().size() == 1) {
                            for (Label l : tree.preTerminalYield()) {
                                sb.append(l.toString());
                                sb.append("&");
                            }
                        } else {
                            for (Label l : match.preTerminalYield()) {

                                sb.append(l.toString());
                                sb.append("&");
                            }
                        }
                        sb.append(",");
                        sb.append(match.toString());
                        //sb.append(",");
                        //sb.append(token.get(PartOfSpeechAnnotation.class));
                        sb.append('\n');
                    }

                }
                // this is the POS tag of the token
                // this is the NER label of the token
                //String ne = token.get(NamedEntityTagAnnotation.class);       
            }

            //SemanticGraph dependencies = sentence.get(CollapsedCCProcessedDependenciesAnnotation.class);
        }

        //Map<Integer, edu.stanford.nlp.dcoref.CorefChain> graph = 
        //document.get(CorefChainAnnotation.class);

    }
    pw.write(sb.toString());
    pw.close();
}

From source file:com.project.NLP.Requirement.ClassRelationIdentifier.java

public HashSet identifyAssociation(Tree tree, Set documentClass) {
    HashSet classRelations = new HashSet();
    String phraseNotation = "S<(NP.(VP<NP))";
    String verbPhraseNotation = "VBZ|VBP>(VP,(NP>S))";
    /* Stemming the sentence */
    wordStemmer.visitTree(tree);/*from   w  ww  .j a v a 2 s  .  com*/
    TregexPattern pattern = TregexPattern.compile(phraseNotation);
    TregexMatcher matcher = pattern.matcher((Tree) tree);
    TregexPattern verbPattern = TregexPattern.compile(verbPhraseNotation);

    while (matcher.findNextMatchingNode()) {
        Tree match = matcher.getMatch();
        System.out.println("Sentence match : " + Sentence.listToString(match.yield()));
        TregexMatcher verbMatcher = verbPattern.matcher(match);
        // while(verbMatcher.findNextMatchingNode()){
        if (verbMatcher.findNextMatchingNode()) {
            Tree verbMatch = verbMatcher.getMatch();
            String verb = Sentence.listToString(verbMatch.yield());
            System.out.println("Verb match : " + verb);
            if (verbPhraseList.contains(verb)) {
                System.out.println("list contains verb : " + verb);
                String noun_1_phraseNotation = "NN|NNS>(NP>S)";
                String noun_2_phraseNotation = "NN|NNS>>(NP,(VBZ|VBP>(VP,NP)))";
                TregexPattern noun_pattern = TregexPattern.compile(noun_1_phraseNotation);
                TregexMatcher noun_matcher = noun_pattern.matcher((Tree) tree);
                if (noun_matcher.findNextMatchingNode()) {
                    Tree nounMatch = noun_matcher.getMatch();
                    String noun1 = Sentence.listToString(nounMatch.yield());

                    if (documentClass.contains(noun1)) {
                        noun_pattern = TregexPattern.compile(noun_2_phraseNotation);
                        noun_matcher = noun_pattern.matcher((Tree) tree);
                        System.out.println("class list contains noun1 : " + noun1);
                        if (noun_matcher.findNextMatchingNode()) {
                            nounMatch = noun_matcher.getMatch();
                            String noun2 = Sentence.listToString(nounMatch.yield());
                            if (!noun1.equals(noun2) && documentClass.contains(noun2)) {
                                ClassRelation clr;
                                System.out.println("class list contains noun2 : " + noun2);
                                if (verb.equals("be")) {
                                    clr = new ClassRelation("Generalization", noun1, noun2);
                                    System.out.println("class generalization");
                                } else {
                                    clr = new ClassRelation("Association", noun2, noun1);
                                    System.out.println("class association");
                                }
                                classRelations.add(clr);
                            }
                        }
                    }
                }
            }
        }
    }
    return classRelations;
}

From source file:com.project.NLP.Requirement.MethodIdentifier.java

ArrayList getPhrase(ArrayList<Tree> sentenceTree) {
    /*ref : patterns -http://nlp.stanford.edu/nlp/javadoc/javanlp/edu/stanford/nlp/trees/tregex/TregexPattern.html  */
    String phraseNotation = "VB|VBN>VP";//@VB>VP" ; //& VBN >VP";//"VP<(VB $++NP)";//"VP:VB";//"@"+"VP"+"! << @"+"VP";
    ArrayList vpList = new ArrayList();
    for (Tree tree : sentenceTree) {
        System.out.print("\n---tree_sen----" + tree + "----\n");
        /* Stemming the sentence */
        wordStemmer.visitTree(tree);/*from  w  w  w  .j  ava  2s. co m*/
        TregexPattern VBpattern = TregexPattern.compile(phraseNotation);
        TregexMatcher matcher = VBpattern.matcher((Tree) tree);
        while (matcher.findNextMatchingNode()) {
            Tree match = matcher.getMatch();
            String verb = Sentence.listToString(match.yield());

            /* Filter to unique verbs  */
            //List<String> newList = new ArrayList<String>(new HashSet<String>(oldList));
            if (!vpList.contains(verb)) {
                vpList.add(verb);
            }
            System.out.print("\n---phrase match----" + match + "----\n");

        }
    }
    System.out.print("\n---VPList----" + vpList + "----\n");
    return vpList;
}

From source file:com.project.NLP.Requirement.MethodIdentifier.java

HashSet identifyCandidateMethods(Tree[] tree) {

    String phraseNotation = "VB|VBN>VP";//@VB>VP" ; //& VBN >VP";//"VP<(VB $++NP)";//"VP:VB";//"@"+"VP"+"! << @"+"VP";
    HashSet vpList = new HashSet();
    for (Tree childTree : tree) {
        System.out.print("\n---tree_sen----" + childTree + "----\n");
        /* Stemming the sentence */
        wordStemmer.visitTree(childTree);
        TregexPattern VBpattern = TregexPattern.compile(phraseNotation);
        TregexMatcher matcher = VBpattern.matcher((Tree) childTree);
        while (matcher.findNextMatchingNode()) {
            Tree match = matcher.getMatch();
            String verb = Sentence.listToString(match.yield());

            /* Filter to unique verbs  */
            //List<String> newList = new ArrayList<String>(new HashSet<String>(oldList));
            //if(!vpList.contains(verb)){
            vpList.add(verb);/*from  ww w  .j a v  a 2  s.  com*/
            //}
            System.out.print("\n---phrase match----" + match + "----\n");

        }
    }
    vpList.removeAll(commonVerbs);
    System.out.print("\n---VPList----" + vpList + "----\n");
    return vpList;
}

From source file:com.project.NLP.Requirement.MethodIdentifier.java

HashSet identifyCandidateMethods(Tree tree) {

    String phraseNotation = "VB|VBN>VP";//@VB>VP" ; //& VBN >VP";//"VP<(VB $++NP)";//"VP:VB";//"@"+"VP"+"! << @"+"VP";
    HashSet vpList = new HashSet();

    /* Stemming the sentence */
    wordStemmer.visitTree(tree);/*from  www  .  j a v a2  s .com*/
    TregexPattern VBpattern = TregexPattern.compile(phraseNotation);
    TregexMatcher matcher = VBpattern.matcher(tree);
    while (matcher.findNextMatchingNode()) {
        Tree match = matcher.getMatch();
        String verb = Sentence.listToString(match.yield());

        /* Filter to unique verbs  */
        //List<String> newList = new ArrayList<String>(new HashSet<String>(oldList));
        //if(!vpList.contains(verb)){
        vpList.add(verb);
        //}
        System.out.print("\n---phrase match----" + match + "----\n");

    }

    vpList.removeAll(commonVerbs);
    System.out.print("\n------VPList----" + vpList + "----\n");

    vpList = removeDesignElements(vpList);
    return vpList;
}

From source file:com.project.NLP.Requirement.PhrasesIdentification.java

/**
 * method to get the negative characters from the sentence
 *
 * @return arrayList of negative words in a sentence which are denoted by RB
 * and CC/*ww  w. j a v a 2 s .co m*/
 */
public ArrayList NegativeSentenceDetection() {
    String phraseNotation = "RB|CC";//@" + phrase + "! << @" + phrase;
    TregexPattern VBpattern = TregexPattern.compile(phraseNotation);
    TregexMatcher matcher = VBpattern.matcher(sTree);
    ArrayList negativeLists = new ArrayList();
    while (matcher.findNextMatchingNode()) {
        Tree match = matcher.getMatch();
        Tree[] innerChild = match.children();
        for (Tree inChild : innerChild) {
            negativeLists.add(inChild.getLeaves().get(0).yieldWords().get(0).word());
        }
    }
    return negativeLists;
}

From source file:com.project.NLP.Requirement.PhrasesIdentification.java

/**
 * method to extract the classes from a sentence
 *
 * @return ArrayList: arrayList of classes from a sentence
 *//* w  w  w  . j  a v a 2 s .  c  o  m*/
public ArrayList getClassList() {
    nounList = new ArrayList();
    attributeLists = new ArrayList();
    int adjectiveExist = 0;
    int adjectiveNoun = 0;
    String adj = "";
    String storingClass = "";
    HashSet classWithAttr = new HashSet();
    storingClassWithAttr = new HashMap<String, HashSet>();

    List<Tree> leaves;
    String phraseNotation = "(NP([<NNS|NN|NNP]$VP))";//@" + phrase + "! << @" + phrase;

    /*For the single Tree */
    TregexPattern VBpattern = TregexPattern.compile(phraseNotation);
    TregexMatcher matcher = VBpattern.matcher(sTree);
    String tempClass = "";

    while (matcher.findNextMatchingNode()) {
        Tree match = matcher.getMatch();
        Tree[] innerChild = match.children();
        adjectiveExist = 0;
        adjectiveNoun = 0;
        int separator = 0;

        if (innerChild.length > 1) {
            int count = 1;
            int loopCount = 1;
            for (Tree inChild : innerChild) {
                if (inChild.value().equals("CC")) {
                    separator = 1;
                }
                if ((inChild.value().equals("JJ")) || (inChild.value().equals("VBG"))) {
                    adjectiveExist++;
                    leaves = inChild.getLeaves();
                    adj = leaves.get(0).yieldWords().get(0).word();
                    if (dictionaryForClassList.contains(adj)) {
                        adj = "";
                    }
                }
                //if adjective exist store the classes and attributes separately
                if (adjectiveExist == 1) {
                    storeClassesAndAttributesWhenAdjectiveExistToIdentifyClasses(inChild, adjectiveNoun, adj);
                } else {
                    //storeClassesAndAttributesWhenAdjectiveNotExistToIdentifyClasses(inChild, loopCount, innerChild, separator, tempClass, count);
                    if ((inChild.value().equals("NN"))
                            || (inChild.value().equals("NNS") || (inChild.value().equals("NNP")))) {
                        leaves = inChild.getLeaves(); //leaves correspond to the tokens
                        if (separator == 0) {
                            if (loopCount == innerChild.length) {
                                String identifiedWord = ((leaves.get(0).yieldWords()).get(0).word());
                                String word = "";
                                word = stemmingForAWord(identifiedWord);
                                if (!dictionaryForClassList.contains(word)) {
                                    nounList.remove(tempClass);
                                    nounList.add(word);
                                    attributeLists.add(tempClass);

                                }

                            } else if (count == 1) {
                                String identifiedWord = ((leaves.get(0).yieldWords()).get(0).word());
                                /*if the identified word is having underscore skips the stemming part . ex: user_id*/
                                String word = stemmingForAWord(identifiedWord);
                                nounList.add(word);
                                tempClass = word;
                                storingClass = word;

                            } else {
                                /*if the identified word is having underscore skips the stemming part . ex: user_id*/
                                if (tempClass.contains("_")) {
                                    nounList.remove(tempClass);
                                } else {
                                    nounList.remove(morphology.stem(tempClass));
                                    nounList.remove(tempClass);
                                }
                                String identifiedWord = ((leaves.get(0).yieldWords()).get(0).word());

                                tempClass += " " + identifiedWord;
                                nounList.add(tempClass);
                                storingClass = tempClass;
                            }

                            count++;
                        } else {
                            String identifiedWord = ((leaves.get(0).yieldWords()).get(0).word());
                            /*if the identified word is having underscore skips the stemming part . ex: user_id*/
                            String word = stemmingForAWord(identifiedWord);
                            nounList.add(word);
                            tempClass = word;
                            storingClass = word;
                        }
                    }

                }
                loopCount++;
            }
        } else {
            for (Tree inChild : innerChild) {
                if ((inChild.value().equals("NN")) || (inChild.value().equals("NNS"))
                        || (inChild.value().equals("NNP"))) {
                    leaves = inChild.getLeaves(); //leaves correspond to the tokens
                    String identifiedWord = ((leaves.get(0).yieldWords()).get(0).word());
                    if (!identifiedWord.contains("_")) {
                        nounList.add(morphology.stem(identifiedWord));
                    } else {
                        nounList.add(identifiedWord);
                    }
                }
                if (inChild.value().equals("JJ")) {
                    //leaves correspond to the tokens
                    leaves = inChild.getLeaves();
                    nounList.add(((leaves.get(0).yieldWords()).get(0).word()));
                }
            }
        }
    }
    System.out.println("NOUN LIST :" + nounList);
    return nounList;
}

From source file:com.project.NLP.Requirement.PhrasesIdentification.java

/**
 * method to identify the attributes using the tokenization
 *
 * @return ArrayList: arrayList of attributes
 *///from www .j a  va 2  s.co m
public ArrayList getAttributeList() {
    nounList = new ArrayList();
    attributeLists = new ArrayList();
    ArrayList adjAtt = new ArrayList();
    int separator = 0;
    List<Tree> leaves;
    String phraseNotation = "NP([<NNS|NN|NNP]![<JJ|VBG])!$VP";// !<VBG";//@" + phrase + "! << @" + phrase;

    TregexPattern VBpattern = TregexPattern.compile(phraseNotation);
    TregexMatcher matcher = VBpattern.matcher(sTree);

    while (matcher.findNextMatchingNode()) {
        Tree match = matcher.getMatch();
        Tree[] innerChild = match.children();
        int adjectiveExist = 0;
        String adj = "";
        String attribute = "";
        String b = "";

        if (innerChild.length > 1) {
            int count = 1;

            for (Tree inChild : innerChild) {
                if (inChild.value().equals("CC") || inChild.value().equals(",")) {
                    separator = 1;
                }
                if ((inChild.value().equals("JJ")) || (inChild.value().equals("VBG"))) {
                    adjectiveExist++;
                    leaves = inChild.getLeaves();
                    adj = leaves.get(0).toString();
                    if (designEleList.contains(adj)) {
                        adj = "";
                    }
                }
                if ((inChild.value().equals("NN")) || (inChild.value().equals("NNS"))
                        || (inChild.value().equals("NNP"))) {
                    leaves = inChild.getLeaves(); //leaves correspond to the tokens
                    if (count == 1) {
                        if (adjectiveExist == 1) {
                            attribute = adj + " " + leaves.get(0).yieldWords().get(0).word();
                        } else {
                            attribute = leaves.get(0).yieldWords().get(0).word();
                        }
                        if (!designEleList.contains(attribute)) {
                            String identifiedWord = attribute;
                            if (!identifiedWord.contains("_")) {
                                attributeLists.add(morphology.stem(identifiedWord));
                            } else {
                                attributeLists.add(identifiedWord);
                            }
                        }

                    } else if (count >= 2 && separator == 0) {
                        if (!attribute.contains("_")) {

                            attributeLists.remove(morphology.stem(attribute));
                            attributeLists.remove(attribute);
                        } else {
                            attributeLists.remove(attribute);
                        }

                        attribute += " " + (leaves.get(0).yieldWords()).get(0).word();
                        attributeLists.add(attribute);
                    } else if (count >= 2 && separator == 1) {
                        attribute = (leaves.get(0).yieldWords()).get(0).word();
                        if (!attribute.contains("_")) {
                            attributeLists.add(morphology.stem(attribute));
                        } else {
                            attributeLists.add(attribute);
                        }
                        separator = 0;
                    }
                    count++;
                }
            }
        } else {
            for (Tree inChild : innerChild) {
                if ((inChild.value().equals("NN")) || (inChild.value().equals("NNS"))) {
                    leaves = inChild.getLeaves(); //leaves correspond to the tokens
                    String identifiedWord = ((leaves.get(0).yieldWords()).get(0).word());
                    if (!identifiedWord.contains("_")) {
                        attributeLists.add(morphology.stem(identifiedWord));
                    } else {
                        attributeLists.add(identifiedWord);
                    }
                }
            }
        }
    }
    adjAtt = getAdjectiveAttribute();
    if (!adjAtt.isEmpty()) {
        String att = "";
        for (int i = 0; i < adjAtt.size(); i++) {
            att = adjAtt.get(i).toString();
            if (!att.isEmpty() || !att.equals("") || !(att.equals(" "))) {
                attributeLists.add(att.trim());
            }
        }
    }

    System.out.println("ATTRIBUTE LIST :" + attributeLists);
    return attributeLists;

}

From source file:com.project.NLP.Requirement.PhrasesIdentification.java

/**
 * method to identify the attributes when the words which are identifies as
 * nouns are in adjective phrases/*from   w w w .  jav a 2  s  .c o  m*/
 *
 * @return ArrayList of adjective attributes
 */
public ArrayList getAdjectiveAttribute() {
    adjAttributeList = new ArrayList();
    //adjAttributeList = new ArrayList();

    int adjectiveExist = 0;
    int adjectiveNoun = 0;
    int nnCount = 0;
    String adj = "";
    List<Tree> leaves;
    String phraseNotation = "NP[<NNS|NN]!$VP";//@" + phrase + "! << @" + phrase;
    DesignElementClass designEle = new DesignElementClass();
    ArrayList designEleList = designEle.getDesignElementsList();

    /*For single Tree  */
    TregexPattern VBpattern = TregexPattern.compile(phraseNotation);
    TregexMatcher matcher = VBpattern.matcher(sTree);

    while (matcher.findNextMatchingNode()) {
        Tree match = matcher.getMatch();
        Tree[] innerChild = match.children();
        String a = "";
        boolean separatorExist = false;
        if (innerChild.length > 1) {
            int count = 1;
            adjectiveExist = 0;
            adjectiveNoun = 0;
            nnCount = 0;
            String attribute = "";
            adj = "";

            for (Tree inChild : innerChild) {
                //checks whether there are any separators
                if (inChild.value().equals("CC")) {
                    separatorExist = true;
                    attribute = "";
                    adjectiveExist = 0;
                    adjectiveNoun = 0;
                }
                //checks whether there are adjectives
                if ((inChild.value().equals("JJ")) || (inChild.value().equals("VBG"))) {
                    adjectiveExist++;
                    leaves = inChild.getLeaves();
                    adj = leaves.get(0).toString();
                    if (designEleList.contains(adj)) {
                        adj = "";
                    }

                }
                //if the adjective exist store the attributes
                if (adjectiveExist == 1) {
                    adjectiveNoun = storeAdjectiveAttribute(inChild, adjectiveNoun, nnCount, adj);
                }
            }
            if (adjectiveExist == 1 && adjectiveNoun == 0 && !adj.isEmpty()) {
                adjAttributeList.add(stemmingForAWord(adj));

            }
        }
    }

    System.out.println("ADJECTVE ATTRIBUTE :" + adjAttributeList);
    return adjAttributeList;

}