Example usage for edu.stanford.nlp.process PTBTokenizer newPTBTokenizer

Introduction

In this page you can find the example usage for edu.stanford.nlp.process PTBTokenizer newPTBTokenizer.

Prototype

public static PTBTokenizer<Word> newPTBTokenizer(Reader r)

Source Link

Document

Constructs a new PTBTokenizer that returns Word tokens and which treats carriage returns as normal whitespace.

Usage

From source file:DependencyParser.ParseDependency.java

public List<String> getAspect_OpinionPair(String str) {
    sr = new StringReader(str);
    tkzr = PTBTokenizer.newPTBTokenizer(sr);
    List toks = tkzr.tokenize();//from   www .  ja v  a2s . co m
    Tree parse = (Tree) lp.apply(toks);
    TreebankLanguagePack tlp = new PennTreebankLanguagePack();
    GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory();
    GrammaticalStructure gs = gsf.newGrammaticalStructure(parse);
    Collection<TypedDependency> td = gs.typedDependenciesCollapsed();
    System.out.println(td);
    List<String> ls = new ArrayList<String>();
    Object[] list = td.toArray();
    List<String> pair_1 = getPair(list, "nsubj", "nn", 1);
    for (String pair1 : pair_1) {
        if (!pair1.isEmpty()) {
            System.out.println("find pair1: " + pair1);
            ls.add(pair1);
        }
    }

    List<String> pair_2 = getPair(list, "nsubj", "xcomp", 2);
    for (String pair2 : pair_2) {
        if (!pair2.isEmpty()) {
            List<String> pair_22 = getPair(list, "dobj", "nn", 22);
            for (String pair22 : pair_22) {
                if (!pair22.isEmpty()) {
                    System.out.println("find pair2: (" + pair22 + "," + pair2 + ")");
                    ls.add(pair22 + ", " + pair2);
                }
            }
        }
    }
    List<String> pair_3 = getPair(list, "nsubj", "dobj", 3);
    for (String pair3 : pair_3) {
        if (!pair3.isEmpty()) {
            System.out.println("find pair3: " + pair3);
            ls.add(pair3);
        }
    }
    List<String> pair_4 = getPair(list, "nsubj", "acomp", 4);
    for (String pair4 : pair_4) {
        if (!pair4.isEmpty()) {
            System.out.println("find pair4: " + pair4);
            ls.add(pair4);
        }
    }
    List<String> pair_5 = getPair(list, "nsubj", "acomp", 5);
    for (String pair5 : pair_5) {
        if (!pair5.isEmpty()) {
            List<String> pair_55 = getPair(list, "rcmod", "nn", 55);
            for (String pair55 : pair_55) {
                if (!pair55.isEmpty()) {
                    System.out.println("find pair5: " + pair55 + "," + pair5);
                    ls.add(pair55 + "," + pair5);
                }
            }
        }
    }

    List<String> pair_6 = getPair(list, "amod", "", 6);
    for (String pair6 : pair_6) {
        if (!pair6.isEmpty()) {

            System.out.println("find pair6: " + pair6);
            ls.add(pair6);
        }
    }
    List<String> pair_7 = getPair(list, "amod", "amod", 7);
    for (String pair7 : pair_7) {
        if (!pair7.isEmpty()) {

            System.out.println("find pair7: " + pair7);
            ls.add(pair7);
        }
    }
    List<String> pair_7a = getPair(list, "amod", "conj_and", 7);
    for (String pair7a : pair_7) {
        if (!pair7a.isEmpty()) {

            System.out.println("find pair7a: " + pair7a);
            ls.add(pair7a);
        }
    }
    List<String> pair_8 = getPair(list, "amod", "conj_and", 8);
    for (String pair8 : pair_8) {
        if (!pair8.isEmpty()) {

            System.out.println("find pair9: " + pair8);
            ls.add(pair8);
        }
    }
    List<String> pair_10 = getPair(list, "nsubj", "nn", 10);
    for (String pair10 : pair_10) {
        if (!pair10.isEmpty()) {

            System.out.println("find pair10: " + pair10);
            ls.add(pair10);
        }
    }
    List<String> pair_11 = getPair(list, "nsubj", "prep_with", 11);
    for (String pair11a : pair_11) {
        if (!pair11a.isEmpty()) {
            List<String> pair_12 = getPair(list, "prep_with", "nn", 12);
            for (String pair12 : pair_12) {
                if (!pair12.isEmpty()) {
                    System.out.println("find paart12: " + pair12);
                    ls.add(pair12);
                }
            }
        }
    }
    //System.out.println(list.length);
    //TypedDependency typedDependency,typedDependency2,typedDependency3;
    //for (Object object : list) {

    // getPair(list,TypedDependency typedDependency,TypedDependency typedDependency2,String relation);
    //typedDependency = (TypedDependency) object;
    //System.out.println("Depdency Name: "+typedDependency.dep().nodeString()+ " :: "+ "Node: "+typedDependency.reln()+":: Gov: "+typedDependency.gov().nodeString());
    //  if (typedDependency.reln().getShortName().equals("nsubj")) {

    //if(!pair1.isEmpty())
    //{
    //  System.out.println("find dependency: "+pair1);
    //}

    //}

    //}
    return ls;
}

From source file:DependencyParser.RunStanfordParser.java

public RunStanfordParser(String filename) throws FileNotFoundException, IOException {
    // input format: data directory, and output directory

    String fileToParse = filename;

    LexicalizedParser lp = LexicalizedParser.loadModel("edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz");
    //lp.setOptionFlags(new String[]{"-maxLength", "80", "-retainTmpSubcategories"}); // set max sentence length if you want

    // Call parser on files, and tokenize the contents
    FileInputStream fstream = new FileInputStream(fileToParse);
    DataInputStream in = new DataInputStream(fstream); // Get the object of DataInputStream
    BufferedReader br = new BufferedReader(new InputStreamReader(in));
    StringReader sr; // we need to re-read each line into its own reader because the tokenizer is over-complicated garbage
    PTBTokenizer tkzr; // tokenizer object
    WordStemmer ls = new WordStemmer(); // stemmer/lemmatizer object

    // Read File Line By Line
    String strLine;//from  w w  w .  ja va2s . c  om
    while ((strLine = br.readLine()) != null) {
        System.out.println("Tokenizing and Parsing: " + strLine); // print current line to console

        // do all the standard java over-complication to use the stanford parser tokenizer
        sr = new StringReader(strLine);
        tkzr = PTBTokenizer.newPTBTokenizer(sr);
        List toks = tkzr.tokenize();
        System.out.println("tokens: " + toks);

        Tree parse = (Tree) lp.apply(toks); // finally, we actually get to parse something

        // Output Option 1: Printing out various data by accessing it programmatically

        // Get words, stemmed words and POS tags
        ArrayList<String> words = new ArrayList();
        ArrayList<String> stems = new ArrayList();
        ArrayList<String> tags = new ArrayList();

        // Get words and Tags
        ls.visitTree(parse); // apply the stemmer to the tree
        for (TaggedWord tw : parse.taggedYield()) {
            if (tw.tag().startsWith("N") || tw.tag().startsWith("J")) {
                words.add(tw.word());
                tags.add(tw.tag());
            }
        }
        System.out.println("Noun and Ajective words: " + words);
        System.out.println("POStags: " + tags);

        // Get stems
        ls.visitTree(parse); // apply the stemmer to the tree
        for (TaggedWord tw : parse.taggedYield()) {
            stems.add(tw.word());
        }

        // Get dependency tree
        TreebankLanguagePack tlp = new PennTreebankLanguagePack();
        GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory();
        GrammaticalStructure gs = gsf.newGrammaticalStructure(parse);
        Collection tdl = gs.typedDependenciesCollapsed();

        // And print!
        System.out.println("words: " + words);
        System.out.println("POStags: " + tags);
        System.out.println("stemmedWordsAndTags: " + stems);
        System.out.println("typedDependencies: " + tdl);
        //  getAspect_OpinionWord(tdl.toString(),words,tags);

        TreePrint tp = new TreePrint("words,penn");
        //TreePrint tn = new TreePrint("words,typedDependenciesCollapsed");
        //TreePrint to = new TreePrint("rootLabelOnlyFormat,penn");

        //System.out.println("Tree print"+tp.); 
        tp.printTree(parse);
        //tn.printTree(parse);
        System.out.println("Noun Phrases are: -------"); //(NP (DT a) (JJ temporary) (NN searcher))
        String reg = "(\\(DT \\w*\\) \\((NN||NNS||NNP) \\w*\\)) | (\\(DT \\w*\\) \\((JJ||JJR||JJS) \\w*\\) \\((NN||NNS||NNP) \\w*\\)) | (\\((NN||NNS||NNP) \\w*\\) \\((NN||NNS||NNP) \\w*\\)) | (\\(DT \\w*\\) \\((NN||NNS||NNP) \\w*\\) \\((NN||NNS||NNP) \\w*\\))";
        Pattern patt = Pattern.compile(reg);
        System.out.println(" Noun Phrase List:..");
        dfs(parse, parse, patt);

        //for (Tree subtree: parse)
        //{

        /* if(subtree.label().value().equals("NP"))
         {
                     
           String a=subtree.toString();  
          //System.out.println(a);
           Matcher match = patt.matcher(a.trim());
           while(match.find()) {
                System.out.println("NP: "+match.group());
           }
         }*/
        /*for(Tree np:subtree)
        {
            if(np.label().value().equals("NP"))
            {
                for(Tree n:np)
                {
                    if(np.label().value().equals("\\(DT \\w*\\) \\((NN||NNS||NNP) \\w*\\)"))
                    {
                         System.out.println("NP tag Tags: "+np);
                         System.out.println(Sentence.listToString(np.yield()));
                    }
                    else if(np.label().value().equals("\\((NN||NNS||NNP) \\w*\\) \\((NN||NNS||NNP) \\w*\\)"))
                    {
                        System.out.println("NP tag Tags: "+np);
                         System.out.println(Sentence.listToString(np.yield()));
                    }
                    else if(np.label().value().equals("\\(DT \\w*\\) \\((NN||NNS||NNP) \\w*\\) \\((NN||NNS||NNP) \\w*\\)"))
                    {
                        System.out.println("NP tag Tags: "+np);
                        System.out.println(Sentence.listToString(np.yield()));
                    }
                    else{
                        if(n.label().value().equals("NP"))
                        {
                      
                            System.out.println("N tag Tags: "+n);
                            System.out.println(Sentence.listToString(n.yield()));
                        }
                    }
                                
                            
                }
                       
            }
        }*/

        //}
        //}
        System.out.println(); // separate output lines*/
    }

}

From source file:edu.isi.mavuno.nlp.NLProcTools.java

License:Apache License

@SuppressWarnings("unchecked")
public List<List<Word>> getTagStrippedSentences(String text) {
    // tokenize the document
    PTBTokenizer<Word> tokenizer = PTBTokenizer.newPTBTokenizer(new StringReader(text));
    List<Word> documentWords = tokenizer.tokenize();

    // strip tags and detect sentences
    return mSentenceDetector.process(mTagStripper.process(documentWords));
}

From source file:nlpOperations.RunStanfordParser.java

public static String tagOperations(String sent) {
    String resultStr = "";
    StringReader sr;/*from www .  j  av  a2s. c  om*/
    PTBTokenizer tkzr;
    WordStemmer ls = new WordStemmer();

    // do all the standard java over-complication to use the stanford parser tokenizer
    sr = new StringReader(sent);
    tkzr = PTBTokenizer.newPTBTokenizer(sr);
    List toks = tkzr.tokenize();
    System.out.println("tokens: " + toks);
    resultStr += "tokens: " + toks + "\n\n";

    Tree parse = (Tree) lp.apply(toks); // finally, we actually get to parse something

    // Get words, stemmed words and POS tags
    ArrayList<String> words = new ArrayList();
    ArrayList<String> stems = new ArrayList();
    ArrayList<String> tags = new ArrayList();

    // Get words and Tags
    for (TaggedWord tw : parse.taggedYield()) {
        words.add(tw.word());
        tags.add(tw.tag());
    }

    // Get stems
    ls.visitTree(parse); // apply the stemmer to the tree
    for (TaggedWord tw : parse.taggedYield()) {
        stems.add(tw.word());
    }

    // Get dependency tree
    TreebankLanguagePack tlp = new PennTreebankLanguagePack();
    GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory();
    GrammaticalStructure gs = gsf.newGrammaticalStructure(parse);
    Collection tdl = gs.typedDependenciesCollapsed();

    // And print!
    System.out.println("words: " + words);
    System.out.println("POStags: " + tags);
    System.out.println("stemmedWords: " + stems);
    System.out.println("typedDependencies: " + tdl);
    resultStr += "words: " + words + "\n\n";
    resultStr += "POStags: " + tags + "\n\n";
    resultStr += "stemmedWords: " + stems + "\n\n";
    resultStr += "typedDependencies: " + tdl + "\n\n";

    TreePrint tp1 = new TreePrint("wordsAndTags,latexTree");
    tp1.printTree(parse);
    System.out.println(); // separate output lines
    return resultStr;

}

From source file:nlpOperations.RunStanfordParser.java

public static String sentStemming(Map sent) {
    String nounsStr = "";
    Iterator iter = sent.keySet().iterator();
    while (iter.hasNext()) {
        nounsStr += " " + (String) iter.next();
    }//www  .  j a  v  a2s. c o  m

    String outputStr = "";
    StringReader sr;
    PTBTokenizer tkzr;
    WordStemmer ls = new WordStemmer();

    // do all the standard java over-complication to use the stanford parser tokenizer
    sr = new StringReader(nounsStr);
    tkzr = PTBTokenizer.newPTBTokenizer(sr);
    List toks = tkzr.tokenize();

    Tree parse = (Tree) lp.apply(toks); // finally, we actually get to parse something

    // Get words, stemmed words and POS tags
    ArrayList<String> words = new ArrayList();
    ArrayList<String> stems = new ArrayList();
    ArrayList<String> tags = new ArrayList();

    // Get words and Tags
    for (TaggedWord tw : parse.taggedYield()) {
        words.add(tw.word());
        tags.add(tw.tag());
    }

    // Get stems
    ls.visitTree(parse); // apply the stemmer to the tree
    for (TaggedWord tw : parse.taggedYield()) {
        stems.add(tw.word());
    }
    for (int i = 0; i < stems.size(); i++) {
        outputStr += stems.get(i) + " ";
    }
    return outputStr;
}

From source file:nlpOperations.RunStanfordParser.java

public static String sentStemming(String sent) {

    String outputStr = "";
    StringReader sr;// w  ww.  j  a  v a 2 s.c  om
    PTBTokenizer tkzr;
    WordStemmer ls = new WordStemmer();

    // do all the standard java over-complication to use the stanford parser tokenizer
    sr = new StringReader(sent);
    tkzr = PTBTokenizer.newPTBTokenizer(sr);
    List toks = tkzr.tokenize();

    Tree parse = (Tree) lp.apply(toks); // finally, we actually get to parse something

    // Get words, stemmed words and POS tags
    ArrayList<String> words = new ArrayList();
    ArrayList<String> stems = new ArrayList();
    ArrayList<String> tags = new ArrayList();

    // Get words and Tags
    for (TaggedWord tw : parse.taggedYield()) {
        words.add(tw.word());
        tags.add(tw.tag());
    }

    // Get stems
    ls.visitTree(parse); // apply the stemmer to the tree
    for (TaggedWord tw : parse.taggedYield()) {
        stems.add(tw.word());
    }
    for (int i = 0; i < stems.size(); i++) {
        outputStr += stems.get(i) + " ";
    }
    return outputStr;
}

From source file:nlpOperations.RunStanfordParser.java

public static Vector taggingStemming(String sent) {
    Vector resVector = new Vector();
    String resultStr = "";
    StringReader sr;//from  www.j  av a2  s. c om
    PTBTokenizer tkzr;
    WordStemmer ls = new WordStemmer();

    // do all the standard java over-complication to use the stanford parser tokenizer
    sr = new StringReader(sent);
    tkzr = PTBTokenizer.newPTBTokenizer(sr);
    List toks = tkzr.tokenize();
    System.out.println("tokens: " + toks);
    resultStr += "tokens: " + toks + "\n\n";

    Tree parse = (Tree) lp.apply(toks); // finally, we actually get to parse something

    // Get words, stemmed words and POS tags
    ArrayList<String> words = new ArrayList();
    ArrayList<String> stems = new ArrayList();
    ArrayList<String> tags = new ArrayList();

    // Get words and Tags
    for (TaggedWord tw : parse.taggedYield()) {
        words.add(tw.word());
        tags.add(tw.tag());
    }

    // Get stems
    ls.visitTree(parse); // apply the stemmer to the tree
    for (TaggedWord tw : parse.taggedYield()) {
        stems.add(tw.word());
    }
    for (int i = 0; i < toks.size(); i++) {
        ExpandedTerm expandedTerm = new ExpandedTerm();
        expandedTerm.setTermOriginWord(toks.get(i).toString());
        expandedTerm.setTermStemmedWord(stems.get(i));
        expandedTerm.setTermTag(tags.get(i));
        expandedTerm.setIsStopWord(StopWordList.isStopWord(stems.get(i)));

        resVector.add(expandedTerm);
    }

    // Get dependency tree
    TreebankLanguagePack tlp = new PennTreebankLanguagePack();
    GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory();
    GrammaticalStructure gs = gsf.newGrammaticalStructure(parse);
    Collection tdl = gs.typedDependenciesCollapsed();

    // And print!
    System.out.println("words: " + words);
    System.out.println("POStags: " + tags);
    System.out.println("stemmedWords: " + stems);
    System.out.println("typedDependencies: " + tdl);
    resultStr += "words: " + words + "\n\n";
    resultStr += "POStags: " + tags + "\n\n";
    resultStr += "stemmedWordsAndTags: " + stems + "\n\n";
    resultStr += "typedDependencies: " + tdl + "\n\n";

    TreePrint tp1 = new TreePrint("wordsAndTags,latexTree");
    tp1.printTree(parse);
    System.out.println(); // separate output lines
    return resVector;

}

From source file:nlpOperations.RunStanfordParser.java

public static Map getNouns(String sent) {
    String resultStr = "";
    StringReader sr;//from ww w. ja  v a  2  s  .  co m
    PTBTokenizer tkzr;
    Map nouns = new HashMap();

    WordStemmer ls = new WordStemmer();
    ArrayList<String> stems = new ArrayList();

    // do all the standard java over-complication to use the stanford parser tokenizer
    sr = new StringReader(sent);
    tkzr = PTBTokenizer.newPTBTokenizer(sr);
    List toks = tkzr.tokenize();
    Tree parse = (Tree) lp.apply(toks); // finally, we actually get to parse something

    // Get stems
    ls.visitTree(parse);
    // Get words and Tags
    for (TaggedWord tw : parse.taggedYield()) {

        if (tw.tag().startsWith("N")) {
            int nounIndex = sent.indexOf(tw.word());
            resultStr += tw.word() + ":" + nounIndex + "#";
            nouns.put(tw.word(), nounIndex);

        }
        if (tw.tag().startsWith("JJ")) {
            int adjIndex = sent.indexOf(tw.word());
            resultStr += tw.word() + ":" + adjIndex + "#";
            nouns.put(tw.word(), adjIndex);

        }
    }

    return nouns;

}

From source file:nlpOperations.RunStanfordParser.java

public static void main(String[] args) throws Exception {

    String fileToParse = "E:\\OWL\\test.txt";
    String englishDataUrl = "E:\\phd-project-tools\\q-system\\stanford-parser-full-2014-06-16\\stanford-parser-full-2014-06-16\\englishPCFG.ser.gz";
    LexicalizedParser lp = LexicalizedParser.loadModel(englishDataUrl, "-maxLength", "80",
            "-retainTmpSubcategories");

    // Call parser on files, and tokenize the contents
    FileInputStream fstream = new FileInputStream(fileToParse);
    DataInputStream in = new DataInputStream(fstream); // Get the object of DataInputStream
    BufferedReader br = new BufferedReader(new InputStreamReader(in));
    StringReader sr; // we need to re-read each line into its own reader because the tokenizer is over-complicated garbage
    PTBTokenizer tkzr; // tokenizer object
    WordStemmer ls = new WordStemmer(); // stemmer/lemmatizer object

    // Read File Line By Line
    String strLine;/*from  ww w . ja v a 2s .c  o m*/
    while ((strLine = br.readLine()) != null) {
        System.out.println("Tokenizing and Parsing: " + strLine); // print current line to console

        // do all the standard java over-complication to use the stanford parser tokenizer
        sr = new StringReader(strLine);
        tkzr = PTBTokenizer.newPTBTokenizer(sr);
        List toks = tkzr.tokenize();
        System.out.println("tokens: " + toks);

        Tree parse = (Tree) lp.apply(toks); // finally, we actually get to parse something

        // Output Option 1: Printing out various data by accessing it programmatically
        // Get words, stemmed words and POS tags
        ArrayList<String> words = new ArrayList();
        ArrayList<String> stems = new ArrayList();
        ArrayList<String> tags = new ArrayList();

        // Get words and Tags
        for (TaggedWord tw : parse.taggedYield()) {
            words.add(tw.word());
            tags.add(tw.tag());
        }

        // Get stems
        ls.visitTree(parse); // apply the stemmer to the tree
        for (TaggedWord tw : parse.taggedYield()) {
            stems.add(tw.word());
        }

        // Get dependency tree
        TreebankLanguagePack tlp = new PennTreebankLanguagePack();
        GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory();
        GrammaticalStructure gs = gsf.newGrammaticalStructure(parse);
        Collection tdl = gs.typedDependenciesCollapsed();

        // And print!
        System.out.println("words: " + words);
        System.out.println("POStags: " + tags);
        System.out.println("stemmedWordsAndTags: " + stems);
        System.out.println("typedDependencies: " + tdl);

        // Output Option 2: Printing out various data using TreePrint
        // Various TreePrint options
        //       "penn", // constituency parse
        //       "oneline",
        //       rootLabelOnlyFormat,
        //       "words",
        //       "wordsAndTags", // unstemmed words and pos tags
        //       "dependencies", // unlabeled dependency parse
        //       "typedDependencies", // dependency parse
        //       "typedDependenciesCollapsed",
        //       "latexTree",
        //       "collocations",
        //       "semanticGraph"
        // Print using TreePrint with various options
        //       TreePrint tp = new TreePrint("wordsAndTags,semanticGraph");
        //       tp.printTree(parse);
        //      System.out.println(); // separate output lines
        TreePrint tp1 = new TreePrint("wordsAndTags,latexTree");
        tp1.printTree(parse);
        System.out.println(); // separate output lines

        //                TreePrint tp2 = new TreePrint("wordsAndTags,collocations");
        //       tp2.printTree(parse);
        //      System.out.println(); // separate output lines
        //                
        //                TreePrint tp3 = new TreePrint("wordsAndTags,dependencies");
        //       tp3.printTree(parse);
        //      System.out.println(); // separate output lines
    }

}

From source file:nlpOperations.RunStanfordParser.java

public static String getPhrases(String sent) {

    StringReader sr;/*from  ww  w. jav a 2  s .  c  om*/
    PTBTokenizer tkzr;
    WordStemmer ls = new WordStemmer();
    // do all the standard java over-complication to use the stanford parser tokenizer
    sr = new StringReader(sent);
    tkzr = PTBTokenizer.newPTBTokenizer(sr);
    List toks = tkzr.tokenize();
    System.out.println("tokens: " + toks);

    Tree parse = (Tree) lp.apply(toks); // finally, we actually get to parse something

    // Get words, stemmed words and POS tags
    ArrayList<String> words = new ArrayList();
    ArrayList<String> stems = new ArrayList();
    ArrayList<String> tags = new ArrayList();

    // Get words and Tags
    for (TaggedWord tw : parse.taggedYield()) {
        words.add(tw.word());
        tags.add(tw.tag());
    }
    // Get stems
    ls.visitTree(parse); // apply the stemmer to the tree
    for (TaggedWord tw : parse.taggedYield()) {
        stems.add(tw.word());
    }
    // Get dependency tree
    TreebankLanguagePack tlp = new PennTreebankLanguagePack();
    GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory();
    GrammaticalStructure gs = gsf.newGrammaticalStructure(parse);
    Collection tdl = gs.typedDependenciesCollapsed();
    // And print!
    System.out.println("words: " + words);
    System.out.println("POStags: " + tags);
    System.out.println("stemmedWordsAndTags: " + stems);
    System.out.println("typedDependencies: " + tdl);
    /*
    dependecy mainpulation
    */
    // remove [ ]

    //tokenization 
    Object[] wordRelationsArr = tdl.toArray();
    //get nn,amod relations
    String requiredRelations = "";
    for (int i = 0; i < wordRelationsArr.length; i++) {
        String oneRelation = wordRelationsArr[i].toString();
        if (oneRelation.trim().startsWith("nn") || (oneRelation.trim().startsWith("amod"))) {
            requiredRelations += oneRelation + "#";
        }
    }

    String phrases = "";
    //get nn words
    String[] requiredRelationsArr = requiredRelations.split("#");
    for (int i = 0; i < requiredRelationsArr.length; i++) {
        String oneRelation = requiredRelationsArr[i];
        if (oneRelation.trim().startsWith("nn")) {
            oneRelation = oneRelation.replace("(", "");
            oneRelation = oneRelation.replace(")", "");
            oneRelation = oneRelation.replace("nn", "");
            String[] oneRelationArr = oneRelation.split(",");
            String w1 = oneRelationArr[0].split("-")[0];
            String w2 = oneRelationArr[1].split("-")[0];
            int phraseIndex = sent.indexOf(w2.trim() + " " + w1.trim());
            phrases += w2.trim() + " " + w1.trim() + ":" + phraseIndex + "#";
        }
    }
    //get amod words

    String[] requiredRelationsArr2 = requiredRelations.split("#");
    for (int i = 0; i < requiredRelationsArr2.length; i++) {
        String oneRelation = requiredRelationsArr2[i];
        if (oneRelation.trim().startsWith("amod")) {
            oneRelation = oneRelation.replace("(", "");
            oneRelation = oneRelation.replace(")", "");
            oneRelation = oneRelation.replace("amod", "");
            String[] oneRelationArr = oneRelation.split(",");
            String w1 = oneRelationArr[0].split("-")[0];
            String w2 = oneRelationArr[1].split("-")[0];
            int phraseIndex = sent.indexOf(w2.trim() + " " + w1.trim());
            phrases += w2.trim() + " " + w1.trim() + ":" + phraseIndex + "#";
        }
    }

    System.out.println("phrases are  " + phrases);

    TreePrint tp1 = new TreePrint("wordsAndTags,latexTree");
    tp1.printTree(parse);
    System.out.println(); // separate output lines
    return phrases;
}