Example usage for edu.stanford.nlp.trees GrammaticalStructure typedDependenciesCollapsed

List of usage examples for edu.stanford.nlp.trees GrammaticalStructure typedDependenciesCollapsed

Introduction

In this page you can find the example usage for edu.stanford.nlp.trees GrammaticalStructure typedDependenciesCollapsed.

Prototype

public Collection<TypedDependency> typedDependenciesCollapsed() 

Source Link

Document

Get the typed dependencies after collapsing them.

Usage

From source file:artinex.TypDep.java

public static void main(String[] args) {
    String str = "What is index in array";
    TypDep parser = new TypDep();
    Tree tree = parser.parse(str);//  w ww .  j a v  a  2  s. co m

    List<Tree> leaves = tree.getLeaves();
    // Print words and Pos Tags
    for (Tree leaf : leaves) {
        Tree parent = leaf.parent(tree);
        System.out.print(leaf.label().value() + "-" + parent.label().value() + " ");
    }
    System.out.println();

    //Type dependencies

    // Tree tree1 = str.get(TreeAnnotation.class);
    // Get dependency tree
    TreebankLanguagePack tlp = new PennTreebankLanguagePack();
    GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory();
    GrammaticalStructure gs = gsf.newGrammaticalStructure(tree);
    Collection<TypedDependency> td = gs.typedDependenciesCollapsed();
    System.out.println(td);

}

From source file:DependencyParser.ParseDependency.java

public List<String> getAspect_OpinionPair(String str) {
    sr = new StringReader(str);
    tkzr = PTBTokenizer.newPTBTokenizer(sr);
    List toks = tkzr.tokenize();//from   w  w w.j a v a2s.  c o  m
    Tree parse = (Tree) lp.apply(toks);
    TreebankLanguagePack tlp = new PennTreebankLanguagePack();
    GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory();
    GrammaticalStructure gs = gsf.newGrammaticalStructure(parse);
    Collection<TypedDependency> td = gs.typedDependenciesCollapsed();
    System.out.println(td);
    List<String> ls = new ArrayList<String>();
    Object[] list = td.toArray();
    List<String> pair_1 = getPair(list, "nsubj", "nn", 1);
    for (String pair1 : pair_1) {
        if (!pair1.isEmpty()) {
            System.out.println("find pair1: " + pair1);
            ls.add(pair1);
        }
    }

    List<String> pair_2 = getPair(list, "nsubj", "xcomp", 2);
    for (String pair2 : pair_2) {
        if (!pair2.isEmpty()) {
            List<String> pair_22 = getPair(list, "dobj", "nn", 22);
            for (String pair22 : pair_22) {
                if (!pair22.isEmpty()) {
                    System.out.println("find pair2: (" + pair22 + "," + pair2 + ")");
                    ls.add(pair22 + ", " + pair2);
                }
            }
        }
    }
    List<String> pair_3 = getPair(list, "nsubj", "dobj", 3);
    for (String pair3 : pair_3) {
        if (!pair3.isEmpty()) {
            System.out.println("find pair3: " + pair3);
            ls.add(pair3);
        }
    }
    List<String> pair_4 = getPair(list, "nsubj", "acomp", 4);
    for (String pair4 : pair_4) {
        if (!pair4.isEmpty()) {
            System.out.println("find pair4: " + pair4);
            ls.add(pair4);
        }
    }
    List<String> pair_5 = getPair(list, "nsubj", "acomp", 5);
    for (String pair5 : pair_5) {
        if (!pair5.isEmpty()) {
            List<String> pair_55 = getPair(list, "rcmod", "nn", 55);
            for (String pair55 : pair_55) {
                if (!pair55.isEmpty()) {
                    System.out.println("find pair5: " + pair55 + "," + pair5);
                    ls.add(pair55 + "," + pair5);
                }
            }
        }
    }

    List<String> pair_6 = getPair(list, "amod", "", 6);
    for (String pair6 : pair_6) {
        if (!pair6.isEmpty()) {

            System.out.println("find pair6: " + pair6);
            ls.add(pair6);
        }
    }
    List<String> pair_7 = getPair(list, "amod", "amod", 7);
    for (String pair7 : pair_7) {
        if (!pair7.isEmpty()) {

            System.out.println("find pair7: " + pair7);
            ls.add(pair7);
        }
    }
    List<String> pair_7a = getPair(list, "amod", "conj_and", 7);
    for (String pair7a : pair_7) {
        if (!pair7a.isEmpty()) {

            System.out.println("find pair7a: " + pair7a);
            ls.add(pair7a);
        }
    }
    List<String> pair_8 = getPair(list, "amod", "conj_and", 8);
    for (String pair8 : pair_8) {
        if (!pair8.isEmpty()) {

            System.out.println("find pair9: " + pair8);
            ls.add(pair8);
        }
    }
    List<String> pair_10 = getPair(list, "nsubj", "nn", 10);
    for (String pair10 : pair_10) {
        if (!pair10.isEmpty()) {

            System.out.println("find pair10: " + pair10);
            ls.add(pair10);
        }
    }
    List<String> pair_11 = getPair(list, "nsubj", "prep_with", 11);
    for (String pair11a : pair_11) {
        if (!pair11a.isEmpty()) {
            List<String> pair_12 = getPair(list, "prep_with", "nn", 12);
            for (String pair12 : pair_12) {
                if (!pair12.isEmpty()) {
                    System.out.println("find paart12: " + pair12);
                    ls.add(pair12);
                }
            }
        }
    }
    //System.out.println(list.length);
    //TypedDependency typedDependency,typedDependency2,typedDependency3;
    //for (Object object : list) {

    // getPair(list,TypedDependency typedDependency,TypedDependency typedDependency2,String relation);
    //typedDependency = (TypedDependency) object;
    //System.out.println("Depdency Name: "+typedDependency.dep().nodeString()+ " :: "+ "Node: "+typedDependency.reln()+":: Gov: "+typedDependency.gov().nodeString());
    //  if (typedDependency.reln().getShortName().equals("nsubj")) {

    //if(!pair1.isEmpty())
    //{
    //  System.out.println("find dependency: "+pair1);
    //}

    //}

    //}
    return ls;
}

From source file:DependencyParser.RunStanfordParser.java

public RunStanfordParser(String filename) throws FileNotFoundException, IOException {
    // input format: data directory, and output directory

    String fileToParse = filename;

    LexicalizedParser lp = LexicalizedParser.loadModel("edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz");
    //lp.setOptionFlags(new String[]{"-maxLength", "80", "-retainTmpSubcategories"}); // set max sentence length if you want

    // Call parser on files, and tokenize the contents
    FileInputStream fstream = new FileInputStream(fileToParse);
    DataInputStream in = new DataInputStream(fstream); // Get the object of DataInputStream
    BufferedReader br = new BufferedReader(new InputStreamReader(in));
    StringReader sr; // we need to re-read each line into its own reader because the tokenizer is over-complicated garbage
    PTBTokenizer tkzr; // tokenizer object
    WordStemmer ls = new WordStemmer(); // stemmer/lemmatizer object

    // Read File Line By Line
    String strLine;/* w  w w.jav  a2 s  .  c  o  m*/
    while ((strLine = br.readLine()) != null) {
        System.out.println("Tokenizing and Parsing: " + strLine); // print current line to console

        // do all the standard java over-complication to use the stanford parser tokenizer
        sr = new StringReader(strLine);
        tkzr = PTBTokenizer.newPTBTokenizer(sr);
        List toks = tkzr.tokenize();
        System.out.println("tokens: " + toks);

        Tree parse = (Tree) lp.apply(toks); // finally, we actually get to parse something

        // Output Option 1: Printing out various data by accessing it programmatically

        // Get words, stemmed words and POS tags
        ArrayList<String> words = new ArrayList();
        ArrayList<String> stems = new ArrayList();
        ArrayList<String> tags = new ArrayList();

        // Get words and Tags
        ls.visitTree(parse); // apply the stemmer to the tree
        for (TaggedWord tw : parse.taggedYield()) {
            if (tw.tag().startsWith("N") || tw.tag().startsWith("J")) {
                words.add(tw.word());
                tags.add(tw.tag());
            }
        }
        System.out.println("Noun and Ajective words: " + words);
        System.out.println("POStags: " + tags);

        // Get stems
        ls.visitTree(parse); // apply the stemmer to the tree
        for (TaggedWord tw : parse.taggedYield()) {
            stems.add(tw.word());
        }

        // Get dependency tree
        TreebankLanguagePack tlp = new PennTreebankLanguagePack();
        GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory();
        GrammaticalStructure gs = gsf.newGrammaticalStructure(parse);
        Collection tdl = gs.typedDependenciesCollapsed();

        // And print!
        System.out.println("words: " + words);
        System.out.println("POStags: " + tags);
        System.out.println("stemmedWordsAndTags: " + stems);
        System.out.println("typedDependencies: " + tdl);
        //  getAspect_OpinionWord(tdl.toString(),words,tags);

        TreePrint tp = new TreePrint("words,penn");
        //TreePrint tn = new TreePrint("words,typedDependenciesCollapsed");
        //TreePrint to = new TreePrint("rootLabelOnlyFormat,penn");

        //System.out.println("Tree print"+tp.); 
        tp.printTree(parse);
        //tn.printTree(parse);
        System.out.println("Noun Phrases are: -------"); //(NP (DT a) (JJ temporary) (NN searcher))
        String reg = "(\\(DT \\w*\\) \\((NN||NNS||NNP) \\w*\\)) | (\\(DT \\w*\\) \\((JJ||JJR||JJS) \\w*\\) \\((NN||NNS||NNP) \\w*\\)) | (\\((NN||NNS||NNP) \\w*\\) \\((NN||NNS||NNP) \\w*\\)) | (\\(DT \\w*\\) \\((NN||NNS||NNP) \\w*\\) \\((NN||NNS||NNP) \\w*\\))";
        Pattern patt = Pattern.compile(reg);
        System.out.println(" Noun Phrase List:..");
        dfs(parse, parse, patt);

        //for (Tree subtree: parse)
        //{

        /* if(subtree.label().value().equals("NP"))
         {
                     
           String a=subtree.toString();  
          //System.out.println(a);
           Matcher match = patt.matcher(a.trim());
           while(match.find()) {
                System.out.println("NP: "+match.group());
           }
         }*/
        /*for(Tree np:subtree)
        {
            if(np.label().value().equals("NP"))
            {
                for(Tree n:np)
                {
                    if(np.label().value().equals("\\(DT \\w*\\) \\((NN||NNS||NNP) \\w*\\)"))
                    {
                         System.out.println("NP tag Tags: "+np);
                         System.out.println(Sentence.listToString(np.yield()));
                    }
                    else if(np.label().value().equals("\\((NN||NNS||NNP) \\w*\\) \\((NN||NNS||NNP) \\w*\\)"))
                    {
                        System.out.println("NP tag Tags: "+np);
                         System.out.println(Sentence.listToString(np.yield()));
                    }
                    else if(np.label().value().equals("\\(DT \\w*\\) \\((NN||NNS||NNP) \\w*\\) \\((NN||NNS||NNP) \\w*\\)"))
                    {
                        System.out.println("NP tag Tags: "+np);
                        System.out.println(Sentence.listToString(np.yield()));
                    }
                    else{
                        if(n.label().value().equals("NP"))
                        {
                      
                            System.out.println("N tag Tags: "+n);
                            System.out.println(Sentence.listToString(n.yield()));
                        }
                    }
                                
                            
                }
                       
            }
        }*/

        //}
        //}
        System.out.println(); // separate output lines*/
    }

}

From source file:knowledgeextraction.EntityAttributeGraph.java

public static void main(String[] args) throws IOException {
    // TODO code application logic here

    BufferedReader reader = new BufferedReader(new FileReader(filePath));
    String text = reader.readLine();

    Annotation document = new Annotation(text);
    Properties props = new Properties();
    props.put("annotators", "tokenize, ssplit");//, pos, lemma, ner, parse, dcoref");
    StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
    pipeline.annotate(document);/*from w  ww  .j  av  a  2s.  c  o  m*/
    List<CoreMap> sentences = document.get(CoreAnnotations.SentencesAnnotation.class);

    for (CoreMap sentence : sentences) {
        String input = sentence.toString();
        System.out.println(input);
        Tree tree = new EntityAttributeGraph().parse(input);
        System.out.println("tree: " + tree.toString());

        // Get dependency tree
        TreebankLanguagePack tlp = new PennTreebankLanguagePack();
        GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory();
        GrammaticalStructure gs = gsf.newGrammaticalStructure(tree);
        Collection<TypedDependency> td = gs.typedDependenciesCollapsed();
        //System.out.println(td);

        Object[] list = td.toArray();
        //System.out.println(list.length);
        PrintBestPath(list);
        //System.out.println();
    }
}

From source file:nlpOperations.RunStanfordParser.java

public static String tagOperations(String sent) {
    String resultStr = "";
    StringReader sr;//from  w ww .  ja va  2s  . c o  m
    PTBTokenizer tkzr;
    WordStemmer ls = new WordStemmer();

    // do all the standard java over-complication to use the stanford parser tokenizer
    sr = new StringReader(sent);
    tkzr = PTBTokenizer.newPTBTokenizer(sr);
    List toks = tkzr.tokenize();
    System.out.println("tokens: " + toks);
    resultStr += "tokens: " + toks + "\n\n";

    Tree parse = (Tree) lp.apply(toks); // finally, we actually get to parse something

    // Get words, stemmed words and POS tags
    ArrayList<String> words = new ArrayList();
    ArrayList<String> stems = new ArrayList();
    ArrayList<String> tags = new ArrayList();

    // Get words and Tags
    for (TaggedWord tw : parse.taggedYield()) {
        words.add(tw.word());
        tags.add(tw.tag());
    }

    // Get stems
    ls.visitTree(parse); // apply the stemmer to the tree
    for (TaggedWord tw : parse.taggedYield()) {
        stems.add(tw.word());
    }

    // Get dependency tree
    TreebankLanguagePack tlp = new PennTreebankLanguagePack();
    GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory();
    GrammaticalStructure gs = gsf.newGrammaticalStructure(parse);
    Collection tdl = gs.typedDependenciesCollapsed();

    // And print!
    System.out.println("words: " + words);
    System.out.println("POStags: " + tags);
    System.out.println("stemmedWords: " + stems);
    System.out.println("typedDependencies: " + tdl);
    resultStr += "words: " + words + "\n\n";
    resultStr += "POStags: " + tags + "\n\n";
    resultStr += "stemmedWords: " + stems + "\n\n";
    resultStr += "typedDependencies: " + tdl + "\n\n";

    TreePrint tp1 = new TreePrint("wordsAndTags,latexTree");
    tp1.printTree(parse);
    System.out.println(); // separate output lines
    return resultStr;

}

From source file:nlpOperations.RunStanfordParser.java

public static Vector taggingStemming(String sent) {
    Vector resVector = new Vector();
    String resultStr = "";
    StringReader sr;/*from w  w w.  java2  s  .  c o m*/
    PTBTokenizer tkzr;
    WordStemmer ls = new WordStemmer();

    // do all the standard java over-complication to use the stanford parser tokenizer
    sr = new StringReader(sent);
    tkzr = PTBTokenizer.newPTBTokenizer(sr);
    List toks = tkzr.tokenize();
    System.out.println("tokens: " + toks);
    resultStr += "tokens: " + toks + "\n\n";

    Tree parse = (Tree) lp.apply(toks); // finally, we actually get to parse something

    // Get words, stemmed words and POS tags
    ArrayList<String> words = new ArrayList();
    ArrayList<String> stems = new ArrayList();
    ArrayList<String> tags = new ArrayList();

    // Get words and Tags
    for (TaggedWord tw : parse.taggedYield()) {
        words.add(tw.word());
        tags.add(tw.tag());
    }

    // Get stems
    ls.visitTree(parse); // apply the stemmer to the tree
    for (TaggedWord tw : parse.taggedYield()) {
        stems.add(tw.word());
    }
    for (int i = 0; i < toks.size(); i++) {
        ExpandedTerm expandedTerm = new ExpandedTerm();
        expandedTerm.setTermOriginWord(toks.get(i).toString());
        expandedTerm.setTermStemmedWord(stems.get(i));
        expandedTerm.setTermTag(tags.get(i));
        expandedTerm.setIsStopWord(StopWordList.isStopWord(stems.get(i)));

        resVector.add(expandedTerm);
    }

    // Get dependency tree
    TreebankLanguagePack tlp = new PennTreebankLanguagePack();
    GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory();
    GrammaticalStructure gs = gsf.newGrammaticalStructure(parse);
    Collection tdl = gs.typedDependenciesCollapsed();

    // And print!
    System.out.println("words: " + words);
    System.out.println("POStags: " + tags);
    System.out.println("stemmedWords: " + stems);
    System.out.println("typedDependencies: " + tdl);
    resultStr += "words: " + words + "\n\n";
    resultStr += "POStags: " + tags + "\n\n";
    resultStr += "stemmedWordsAndTags: " + stems + "\n\n";
    resultStr += "typedDependencies: " + tdl + "\n\n";

    TreePrint tp1 = new TreePrint("wordsAndTags,latexTree");
    tp1.printTree(parse);
    System.out.println(); // separate output lines
    return resVector;

}

From source file:nlpOperations.RunStanfordParser.java

public static void main(String[] args) throws Exception {

    String fileToParse = "E:\\OWL\\test.txt";
    String englishDataUrl = "E:\\phd-project-tools\\q-system\\stanford-parser-full-2014-06-16\\stanford-parser-full-2014-06-16\\englishPCFG.ser.gz";
    LexicalizedParser lp = LexicalizedParser.loadModel(englishDataUrl, "-maxLength", "80",
            "-retainTmpSubcategories");

    // Call parser on files, and tokenize the contents
    FileInputStream fstream = new FileInputStream(fileToParse);
    DataInputStream in = new DataInputStream(fstream); // Get the object of DataInputStream
    BufferedReader br = new BufferedReader(new InputStreamReader(in));
    StringReader sr; // we need to re-read each line into its own reader because the tokenizer is over-complicated garbage
    PTBTokenizer tkzr; // tokenizer object
    WordStemmer ls = new WordStemmer(); // stemmer/lemmatizer object

    // Read File Line By Line
    String strLine;//from w w w  .j  a v  a  2  s.c  o  m
    while ((strLine = br.readLine()) != null) {
        System.out.println("Tokenizing and Parsing: " + strLine); // print current line to console

        // do all the standard java over-complication to use the stanford parser tokenizer
        sr = new StringReader(strLine);
        tkzr = PTBTokenizer.newPTBTokenizer(sr);
        List toks = tkzr.tokenize();
        System.out.println("tokens: " + toks);

        Tree parse = (Tree) lp.apply(toks); // finally, we actually get to parse something

        // Output Option 1: Printing out various data by accessing it programmatically
        // Get words, stemmed words and POS tags
        ArrayList<String> words = new ArrayList();
        ArrayList<String> stems = new ArrayList();
        ArrayList<String> tags = new ArrayList();

        // Get words and Tags
        for (TaggedWord tw : parse.taggedYield()) {
            words.add(tw.word());
            tags.add(tw.tag());
        }

        // Get stems
        ls.visitTree(parse); // apply the stemmer to the tree
        for (TaggedWord tw : parse.taggedYield()) {
            stems.add(tw.word());
        }

        // Get dependency tree
        TreebankLanguagePack tlp = new PennTreebankLanguagePack();
        GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory();
        GrammaticalStructure gs = gsf.newGrammaticalStructure(parse);
        Collection tdl = gs.typedDependenciesCollapsed();

        // And print!
        System.out.println("words: " + words);
        System.out.println("POStags: " + tags);
        System.out.println("stemmedWordsAndTags: " + stems);
        System.out.println("typedDependencies: " + tdl);

        // Output Option 2: Printing out various data using TreePrint
        // Various TreePrint options
        //       "penn", // constituency parse
        //       "oneline",
        //       rootLabelOnlyFormat,
        //       "words",
        //       "wordsAndTags", // unstemmed words and pos tags
        //       "dependencies", // unlabeled dependency parse
        //       "typedDependencies", // dependency parse
        //       "typedDependenciesCollapsed",
        //       "latexTree",
        //       "collocations",
        //       "semanticGraph"
        // Print using TreePrint with various options
        //       TreePrint tp = new TreePrint("wordsAndTags,semanticGraph");
        //       tp.printTree(parse);
        //      System.out.println(); // separate output lines
        TreePrint tp1 = new TreePrint("wordsAndTags,latexTree");
        tp1.printTree(parse);
        System.out.println(); // separate output lines

        //                TreePrint tp2 = new TreePrint("wordsAndTags,collocations");
        //       tp2.printTree(parse);
        //      System.out.println(); // separate output lines
        //                
        //                TreePrint tp3 = new TreePrint("wordsAndTags,dependencies");
        //       tp3.printTree(parse);
        //      System.out.println(); // separate output lines
    }

}

From source file:nlpOperations.RunStanfordParser.java

public static String getPhrases(String sent) {

    StringReader sr;/*from www .  ja v a2  s  .c o  m*/
    PTBTokenizer tkzr;
    WordStemmer ls = new WordStemmer();
    // do all the standard java over-complication to use the stanford parser tokenizer
    sr = new StringReader(sent);
    tkzr = PTBTokenizer.newPTBTokenizer(sr);
    List toks = tkzr.tokenize();
    System.out.println("tokens: " + toks);

    Tree parse = (Tree) lp.apply(toks); // finally, we actually get to parse something

    // Get words, stemmed words and POS tags
    ArrayList<String> words = new ArrayList();
    ArrayList<String> stems = new ArrayList();
    ArrayList<String> tags = new ArrayList();

    // Get words and Tags
    for (TaggedWord tw : parse.taggedYield()) {
        words.add(tw.word());
        tags.add(tw.tag());
    }
    // Get stems
    ls.visitTree(parse); // apply the stemmer to the tree
    for (TaggedWord tw : parse.taggedYield()) {
        stems.add(tw.word());
    }
    // Get dependency tree
    TreebankLanguagePack tlp = new PennTreebankLanguagePack();
    GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory();
    GrammaticalStructure gs = gsf.newGrammaticalStructure(parse);
    Collection tdl = gs.typedDependenciesCollapsed();
    // And print!
    System.out.println("words: " + words);
    System.out.println("POStags: " + tags);
    System.out.println("stemmedWordsAndTags: " + stems);
    System.out.println("typedDependencies: " + tdl);
    /*
    dependecy mainpulation
    */
    // remove [ ]

    //tokenization 
    Object[] wordRelationsArr = tdl.toArray();
    //get nn,amod relations
    String requiredRelations = "";
    for (int i = 0; i < wordRelationsArr.length; i++) {
        String oneRelation = wordRelationsArr[i].toString();
        if (oneRelation.trim().startsWith("nn") || (oneRelation.trim().startsWith("amod"))) {
            requiredRelations += oneRelation + "#";
        }
    }

    String phrases = "";
    //get nn words
    String[] requiredRelationsArr = requiredRelations.split("#");
    for (int i = 0; i < requiredRelationsArr.length; i++) {
        String oneRelation = requiredRelationsArr[i];
        if (oneRelation.trim().startsWith("nn")) {
            oneRelation = oneRelation.replace("(", "");
            oneRelation = oneRelation.replace(")", "");
            oneRelation = oneRelation.replace("nn", "");
            String[] oneRelationArr = oneRelation.split(",");
            String w1 = oneRelationArr[0].split("-")[0];
            String w2 = oneRelationArr[1].split("-")[0];
            int phraseIndex = sent.indexOf(w2.trim() + " " + w1.trim());
            phrases += w2.trim() + " " + w1.trim() + ":" + phraseIndex + "#";
        }
    }
    //get amod words

    String[] requiredRelationsArr2 = requiredRelations.split("#");
    for (int i = 0; i < requiredRelationsArr2.length; i++) {
        String oneRelation = requiredRelationsArr2[i];
        if (oneRelation.trim().startsWith("amod")) {
            oneRelation = oneRelation.replace("(", "");
            oneRelation = oneRelation.replace(")", "");
            oneRelation = oneRelation.replace("amod", "");
            String[] oneRelationArr = oneRelation.split(",");
            String w1 = oneRelationArr[0].split("-")[0];
            String w2 = oneRelationArr[1].split("-")[0];
            int phraseIndex = sent.indexOf(w2.trim() + " " + w1.trim());
            phrases += w2.trim() + " " + w1.trim() + ":" + phraseIndex + "#";
        }
    }

    System.out.println("phrases are  " + phrases);

    TreePrint tp1 = new TreePrint("wordsAndTags,latexTree");
    tp1.printTree(parse);
    System.out.println(); // separate output lines
    return phrases;
}

From source file:tml.utils.StanfordUtils.java

License:Apache License

/**
 * Calculates the typed dependencies from a grammatical tree
 * @param tree the grammatical tree/*from w  w w .j  av  a 2s . com*/
 */
public static List<String> calculateTypedDependencies(Tree tree) {
    double time = System.nanoTime();
    List<String> output = new ArrayList<String>();
    GrammaticalStructure gs = null;
    try {
        gs = getGrammaticalStructureFactory().newGrammaticalStructure(tree);
    } catch (Exception e) {
        logger.error(e);
        return null;
    }

    Collection<TypedDependency> tdl = gs.typedDependenciesCollapsed();

    // Get the POS tag from each word
    Hashtable<String, String> posInfo = new Hashtable<String, String>();
    for (Tree t : tree.getLeaves()) {
        Tree pt = null;
        for (Tree tt : tree.dominationPath(t)) {
            if (tt.isLeaf()) {
                posInfo.put(tt.nodeString(), pt.nodeString());
            }
            pt = tt;
        }
    }

    for (Object obj : tdl.toArray()) {
        TypedDependency dep = (TypedDependency) obj;

        String wordGov = dep.gov().nodeString().split("-")[0];
        String wordDep = dep.dep().nodeString().split("-")[0];
        String posGov = posInfo.get(wordGov);
        String posDep = posInfo.get(wordDep);
        String dependencyString = dep.reln().toString() + "(" + dep.gov().pennString().trim() + "-" + posGov
                + ", " + dep.dep().pennString().trim() + "-" + posDep + ")";
        output.add(dependencyString);
    }

    time = System.nanoTime() - time;
    logger.debug("Typed dependencies obtained in " + time * 10E-6 + " milliseconds");
    return output;
}