Example usage for edu.stanford.nlp.process PTBTokenizer newPTBTokenizer

List of usage examples for edu.stanford.nlp.process PTBTokenizer newPTBTokenizer

Introduction

In this page you can find the example usage for edu.stanford.nlp.process PTBTokenizer newPTBTokenizer.

Prototype

public static PTBTokenizer<Word> newPTBTokenizer(Reader r) 

Source Link

Document

Constructs a new PTBTokenizer that returns Word tokens and which treats carriage returns as normal whitespace.

Usage

From source file:DependencyParser.ParseDependency.java

public List<String> getAspect_OpinionPair(String str) {
    sr = new StringReader(str);
    tkzr = PTBTokenizer.newPTBTokenizer(sr);
    List toks = tkzr.tokenize();//from   www .  ja v  a2s . co m
    Tree parse = (Tree) lp.apply(toks);
    TreebankLanguagePack tlp = new PennTreebankLanguagePack();
    GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory();
    GrammaticalStructure gs = gsf.newGrammaticalStructure(parse);
    Collection<TypedDependency> td = gs.typedDependenciesCollapsed();
    System.out.println(td);
    List<String> ls = new ArrayList<String>();
    Object[] list = td.toArray();
    List<String> pair_1 = getPair(list, "nsubj", "nn", 1);
    for (String pair1 : pair_1) {
        if (!pair1.isEmpty()) {
            System.out.println("find pair1: " + pair1);
            ls.add(pair1);
        }
    }

    List<String> pair_2 = getPair(list, "nsubj", "xcomp", 2);
    for (String pair2 : pair_2) {
        if (!pair2.isEmpty()) {
            List<String> pair_22 = getPair(list, "dobj", "nn", 22);
            for (String pair22 : pair_22) {
                if (!pair22.isEmpty()) {
                    System.out.println("find pair2: (" + pair22 + "," + pair2 + ")");
                    ls.add(pair22 + ", " + pair2);
                }
            }
        }
    }
    List<String> pair_3 = getPair(list, "nsubj", "dobj", 3);
    for (String pair3 : pair_3) {
        if (!pair3.isEmpty()) {
            System.out.println("find pair3: " + pair3);
            ls.add(pair3);
        }
    }
    List<String> pair_4 = getPair(list, "nsubj", "acomp", 4);
    for (String pair4 : pair_4) {
        if (!pair4.isEmpty()) {
            System.out.println("find pair4: " + pair4);
            ls.add(pair4);
        }
    }
    List<String> pair_5 = getPair(list, "nsubj", "acomp", 5);
    for (String pair5 : pair_5) {
        if (!pair5.isEmpty()) {
            List<String> pair_55 = getPair(list, "rcmod", "nn", 55);
            for (String pair55 : pair_55) {
                if (!pair55.isEmpty()) {
                    System.out.println("find pair5: " + pair55 + "," + pair5);
                    ls.add(pair55 + "," + pair5);
                }
            }
        }
    }

    List<String> pair_6 = getPair(list, "amod", "", 6);
    for (String pair6 : pair_6) {
        if (!pair6.isEmpty()) {

            System.out.println("find pair6: " + pair6);
            ls.add(pair6);
        }
    }
    List<String> pair_7 = getPair(list, "amod", "amod", 7);
    for (String pair7 : pair_7) {
        if (!pair7.isEmpty()) {

            System.out.println("find pair7: " + pair7);
            ls.add(pair7);
        }
    }
    List<String> pair_7a = getPair(list, "amod", "conj_and", 7);
    for (String pair7a : pair_7) {
        if (!pair7a.isEmpty()) {

            System.out.println("find pair7a: " + pair7a);
            ls.add(pair7a);
        }
    }
    List<String> pair_8 = getPair(list, "amod", "conj_and", 8);
    for (String pair8 : pair_8) {
        if (!pair8.isEmpty()) {

            System.out.println("find pair9: " + pair8);
            ls.add(pair8);
        }
    }
    List<String> pair_10 = getPair(list, "nsubj", "nn", 10);
    for (String pair10 : pair_10) {
        if (!pair10.isEmpty()) {

            System.out.println("find pair10: " + pair10);
            ls.add(pair10);
        }
    }
    List<String> pair_11 = getPair(list, "nsubj", "prep_with", 11);
    for (String pair11a : pair_11) {
        if (!pair11a.isEmpty()) {
            List<String> pair_12 = getPair(list, "prep_with", "nn", 12);
            for (String pair12 : pair_12) {
                if (!pair12.isEmpty()) {
                    System.out.println("find paart12: " + pair12);
                    ls.add(pair12);
                }
            }
        }
    }
    //System.out.println(list.length);
    //TypedDependency typedDependency,typedDependency2,typedDependency3;
    //for (Object object : list) {

    // getPair(list,TypedDependency typedDependency,TypedDependency typedDependency2,String relation);
    //typedDependency = (TypedDependency) object;
    //System.out.println("Depdency Name: "+typedDependency.dep().nodeString()+ " :: "+ "Node: "+typedDependency.reln()+":: Gov: "+typedDependency.gov().nodeString());
    //  if (typedDependency.reln().getShortName().equals("nsubj")) {

    //if(!pair1.isEmpty())
    //{
    //  System.out.println("find dependency: "+pair1);
    //}

    //}

    //}
    return ls;
}

From source file:DependencyParser.RunStanfordParser.java

public RunStanfordParser(String filename) throws FileNotFoundException, IOException {
    // input format: data directory, and output directory

    String fileToParse = filename;

    LexicalizedParser lp = LexicalizedParser.loadModel("edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz");
    //lp.setOptionFlags(new String[]{"-maxLength", "80", "-retainTmpSubcategories"}); // set max sentence length if you want

    // Call parser on files, and tokenize the contents
    FileInputStream fstream = new FileInputStream(fileToParse);
    DataInputStream in = new DataInputStream(fstream); // Get the object of DataInputStream
    BufferedReader br = new BufferedReader(new InputStreamReader(in));
    StringReader sr; // we need to re-read each line into its own reader because the tokenizer is over-complicated garbage
    PTBTokenizer tkzr; // tokenizer object
    WordStemmer ls = new WordStemmer(); // stemmer/lemmatizer object

    // Read File Line By Line
    String strLine;//from  w w  w .  ja va2s . c  om
    while ((strLine = br.readLine()) != null) {
        System.out.println("Tokenizing and Parsing: " + strLine); // print current line to console

        // do all the standard java over-complication to use the stanford parser tokenizer
        sr = new StringReader(strLine);
        tkzr = PTBTokenizer.newPTBTokenizer(sr);
        List toks = tkzr.tokenize();
        System.out.println("tokens: " + toks);

        Tree parse = (Tree) lp.apply(toks); // finally, we actually get to parse something

        // Output Option 1: Printing out various data by accessing it programmatically

        // Get words, stemmed words and POS tags
        ArrayList<String> words = new ArrayList();
        ArrayList<String> stems = new ArrayList();
        ArrayList<String> tags = new ArrayList();

        // Get words and Tags
        ls.visitTree(parse); // apply the stemmer to the tree
        for (TaggedWord tw : parse.taggedYield()) {
            if (tw.tag().startsWith("N") || tw.tag().startsWith("J")) {
                words.add(tw.word());
                tags.add(tw.tag());
            }
        }
        System.out.println("Noun and Ajective words: " + words);
        System.out.println("POStags: " + tags);

        // Get stems
        ls.visitTree(parse); // apply the stemmer to the tree
        for (TaggedWord tw : parse.taggedYield()) {
            stems.add(tw.word());
        }

        // Get dependency tree
        TreebankLanguagePack tlp = new PennTreebankLanguagePack();
        GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory();
        GrammaticalStructure gs = gsf.newGrammaticalStructure(parse);
        Collection tdl = gs.typedDependenciesCollapsed();

        // And print!
        System.out.println("words: " + words);
        System.out.println("POStags: " + tags);
        System.out.println("stemmedWordsAndTags: " + stems);
        System.out.println("typedDependencies: " + tdl);
        //  getAspect_OpinionWord(tdl.toString(),words,tags);

        TreePrint tp = new TreePrint("words,penn");
        //TreePrint tn = new TreePrint("words,typedDependenciesCollapsed");
        //TreePrint to = new TreePrint("rootLabelOnlyFormat,penn");

        //System.out.println("Tree print"+tp.); 
        tp.printTree(parse);
        //tn.printTree(parse);
        System.out.println("Noun Phrases are: -------"); //(NP (DT a) (JJ temporary) (NN searcher))
        String reg = "(\\(DT \\w*\\) \\((NN||NNS||NNP) \\w*\\)) | (\\(DT \\w*\\) \\((JJ||JJR||JJS) \\w*\\) \\((NN||NNS||NNP) \\w*\\)) | (\\((NN||NNS||NNP) \\w*\\) \\((NN||NNS||NNP) \\w*\\)) | (\\(DT \\w*\\) \\((NN||NNS||NNP) \\w*\\) \\((NN||NNS||NNP) \\w*\\))";
        Pattern patt = Pattern.compile(reg);
        System.out.println(" Noun Phrase List:..");
        dfs(parse, parse, patt);

        //for (Tree subtree: parse)
        //{

        /* if(subtree.label().value().equals("NP"))
         {
                     
           String a=subtree.toString();  
          //System.out.println(a);
           Matcher match = patt.matcher(a.trim());
           while(match.find()) {
                System.out.println("NP: "+match.group());
           }
         }*/
        /*for(Tree np:subtree)
        {
            if(np.label().value().equals("NP"))
            {
                for(Tree n:np)
                {
                    if(np.label().value().equals("\\(DT \\w*\\) \\((NN||NNS||NNP) \\w*\\)"))
                    {
                         System.out.println("NP tag Tags: "+np);
                         System.out.println(Sentence.listToString(np.yield()));
                    }
                    else if(np.label().value().equals("\\((NN||NNS||NNP) \\w*\\) \\((NN||NNS||NNP) \\w*\\)"))
                    {
                        System.out.println("NP tag Tags: "+np);
                         System.out.println(Sentence.listToString(np.yield()));
                    }
                    else if(np.label().value().equals("\\(DT \\w*\\) \\((NN||NNS||NNP) \\w*\\) \\((NN||NNS||NNP) \\w*\\)"))
                    {
                        System.out.println("NP tag Tags: "+np);
                        System.out.println(Sentence.listToString(np.yield()));
                    }
                    else{
                        if(n.label().value().equals("NP"))
                        {
                      
                            System.out.println("N tag Tags: "+n);
                            System.out.println(Sentence.listToString(n.yield()));
                        }
                    }
                                
                            
                }
                       
            }
        }*/

        //}
        //}
        System.out.println(); // separate output lines*/
    }

}

From source file:edu.isi.mavuno.nlp.NLProcTools.java

License:Apache License

@SuppressWarnings("unchecked")
public List<List<Word>> getTagStrippedSentences(String text) {
    // tokenize the document
    PTBTokenizer<Word> tokenizer = PTBTokenizer.newPTBTokenizer(new StringReader(text));
    List<Word> documentWords = tokenizer.tokenize();

    // strip tags and detect sentences
    return mSentenceDetector.process(mTagStripper.process(documentWords));
}

From source file:nlpOperations.RunStanfordParser.java

public static String tagOperations(String sent) {
    String resultStr = "";
    StringReader sr;/*from www .  j  av  a2s. c  om*/
    PTBTokenizer tkzr;
    WordStemmer ls = new WordStemmer();

    // do all the standard java over-complication to use the stanford parser tokenizer
    sr = new StringReader(sent);
    tkzr = PTBTokenizer.newPTBTokenizer(sr);
    List toks = tkzr.tokenize();
    System.out.println("tokens: " + toks);
    resultStr += "tokens: " + toks + "\n\n";

    Tree parse = (Tree) lp.apply(toks); // finally, we actually get to parse something

    // Get words, stemmed words and POS tags
    ArrayList<String> words = new ArrayList();
    ArrayList<String> stems = new ArrayList();
    ArrayList<String> tags = new ArrayList();

    // Get words and Tags
    for (TaggedWord tw : parse.taggedYield()) {
        words.add(tw.word());
        tags.add(tw.tag());
    }

    // Get stems
    ls.visitTree(parse); // apply the stemmer to the tree
    for (TaggedWord tw : parse.taggedYield()) {
        stems.add(tw.word());
    }

    // Get dependency tree
    TreebankLanguagePack tlp = new PennTreebankLanguagePack();
    GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory();
    GrammaticalStructure gs = gsf.newGrammaticalStructure(parse);
    Collection tdl = gs.typedDependenciesCollapsed();

    // And print!
    System.out.println("words: " + words);
    System.out.println("POStags: " + tags);
    System.out.println("stemmedWords: " + stems);
    System.out.println("typedDependencies: " + tdl);
    resultStr += "words: " + words + "\n\n";
    resultStr += "POStags: " + tags + "\n\n";
    resultStr += "stemmedWords: " + stems + "\n\n";
    resultStr += "typedDependencies: " + tdl + "\n\n";

    TreePrint tp1 = new TreePrint("wordsAndTags,latexTree");
    tp1.printTree(parse);
    System.out.println(); // separate output lines
    return resultStr;

}

From source file:nlpOperations.RunStanfordParser.java

public static String sentStemming(Map sent) {
    String nounsStr = "";
    Iterator iter = sent.keySet().iterator();
    while (iter.hasNext()) {
        nounsStr += " " + (String) iter.next();
    }//www  .  j a  v  a2s. c o  m

    String outputStr = "";
    StringReader sr;
    PTBTokenizer tkzr;
    WordStemmer ls = new WordStemmer();

    // do all the standard java over-complication to use the stanford parser tokenizer
    sr = new StringReader(nounsStr);
    tkzr = PTBTokenizer.newPTBTokenizer(sr);
    List toks = tkzr.tokenize();

    Tree parse = (Tree) lp.apply(toks); // finally, we actually get to parse something

    // Get words, stemmed words and POS tags
    ArrayList<String> words = new ArrayList();
    ArrayList<String> stems = new ArrayList();
    ArrayList<String> tags = new ArrayList();

    // Get words and Tags
    for (TaggedWord tw : parse.taggedYield()) {
        words.add(tw.word());
        tags.add(tw.tag());
    }

    // Get stems
    ls.visitTree(parse); // apply the stemmer to the tree
    for (TaggedWord tw : parse.taggedYield()) {
        stems.add(tw.word());
    }
    for (int i = 0; i < stems.size(); i++) {
        outputStr += stems.get(i) + " ";
    }
    return outputStr;
}

From source file:nlpOperations.RunStanfordParser.java

public static String sentStemming(String sent) {

    String outputStr = "";
    StringReader sr;// w  ww.  j  a  v a 2 s.c  om
    PTBTokenizer tkzr;
    WordStemmer ls = new WordStemmer();

    // do all the standard java over-complication to use the stanford parser tokenizer
    sr = new StringReader(sent);
    tkzr = PTBTokenizer.newPTBTokenizer(sr);
    List toks = tkzr.tokenize();

    Tree parse = (Tree) lp.apply(toks); // finally, we actually get to parse something

    // Get words, stemmed words and POS tags
    ArrayList<String> words = new ArrayList();
    ArrayList<String> stems = new ArrayList();
    ArrayList<String> tags = new ArrayList();

    // Get words and Tags
    for (TaggedWord tw : parse.taggedYield()) {
        words.add(tw.word());
        tags.add(tw.tag());
    }

    // Get stems
    ls.visitTree(parse); // apply the stemmer to the tree
    for (TaggedWord tw : parse.taggedYield()) {
        stems.add(tw.word());
    }
    for (int i = 0; i < stems.size(); i++) {
        outputStr += stems.get(i) + " ";
    }
    return outputStr;
}

From source file:nlpOperations.RunStanfordParser.java

public static Vector taggingStemming(String sent) {
    Vector resVector = new Vector();
    String resultStr = "";
    StringReader sr;//from  www.j  av a2  s. c om
    PTBTokenizer tkzr;
    WordStemmer ls = new WordStemmer();

    // do all the standard java over-complication to use the stanford parser tokenizer
    sr = new StringReader(sent);
    tkzr = PTBTokenizer.newPTBTokenizer(sr);
    List toks = tkzr.tokenize();
    System.out.println("tokens: " + toks);
    resultStr += "tokens: " + toks + "\n\n";

    Tree parse = (Tree) lp.apply(toks); // finally, we actually get to parse something

    // Get words, stemmed words and POS tags
    ArrayList<String> words = new ArrayList();
    ArrayList<String> stems = new ArrayList();
    ArrayList<String> tags = new ArrayList();

    // Get words and Tags
    for (TaggedWord tw : parse.taggedYield()) {
        words.add(tw.word());
        tags.add(tw.tag());
    }

    // Get stems
    ls.visitTree(parse); // apply the stemmer to the tree
    for (TaggedWord tw : parse.taggedYield()) {
        stems.add(tw.word());
    }
    for (int i = 0; i < toks.size(); i++) {
        ExpandedTerm expandedTerm = new ExpandedTerm();
        expandedTerm.setTermOriginWord(toks.get(i).toString());
        expandedTerm.setTermStemmedWord(stems.get(i));
        expandedTerm.setTermTag(tags.get(i));
        expandedTerm.setIsStopWord(StopWordList.isStopWord(stems.get(i)));

        resVector.add(expandedTerm);
    }

    // Get dependency tree
    TreebankLanguagePack tlp = new PennTreebankLanguagePack();
    GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory();
    GrammaticalStructure gs = gsf.newGrammaticalStructure(parse);
    Collection tdl = gs.typedDependenciesCollapsed();

    // And print!
    System.out.println("words: " + words);
    System.out.println("POStags: " + tags);
    System.out.println("stemmedWords: " + stems);
    System.out.println("typedDependencies: " + tdl);
    resultStr += "words: " + words + "\n\n";
    resultStr += "POStags: " + tags + "\n\n";
    resultStr += "stemmedWordsAndTags: " + stems + "\n\n";
    resultStr += "typedDependencies: " + tdl + "\n\n";

    TreePrint tp1 = new TreePrint("wordsAndTags,latexTree");
    tp1.printTree(parse);
    System.out.println(); // separate output lines
    return resVector;

}

From source file:nlpOperations.RunStanfordParser.java

public static Map getNouns(String sent) {
    String resultStr = "";
    StringReader sr;//from ww w. ja  v a  2  s  .  co m
    PTBTokenizer tkzr;
    Map nouns = new HashMap();

    WordStemmer ls = new WordStemmer();
    ArrayList<String> stems = new ArrayList();

    // do all the standard java over-complication to use the stanford parser tokenizer
    sr = new StringReader(sent);
    tkzr = PTBTokenizer.newPTBTokenizer(sr);
    List toks = tkzr.tokenize();
    Tree parse = (Tree) lp.apply(toks); // finally, we actually get to parse something

    // Get stems
    ls.visitTree(parse);
    // Get words and Tags
    for (TaggedWord tw : parse.taggedYield()) {

        if (tw.tag().startsWith("N")) {
            int nounIndex = sent.indexOf(tw.word());
            resultStr += tw.word() + ":" + nounIndex + "#";
            nouns.put(tw.word(), nounIndex);

        }
        if (tw.tag().startsWith("JJ")) {
            int adjIndex = sent.indexOf(tw.word());
            resultStr += tw.word() + ":" + adjIndex + "#";
            nouns.put(tw.word(), adjIndex);

        }
    }

    return nouns;

}

From source file:nlpOperations.RunStanfordParser.java

public static void main(String[] args) throws Exception {

    String fileToParse = "E:\\OWL\\test.txt";
    String englishDataUrl = "E:\\phd-project-tools\\q-system\\stanford-parser-full-2014-06-16\\stanford-parser-full-2014-06-16\\englishPCFG.ser.gz";
    LexicalizedParser lp = LexicalizedParser.loadModel(englishDataUrl, "-maxLength", "80",
            "-retainTmpSubcategories");

    // Call parser on files, and tokenize the contents
    FileInputStream fstream = new FileInputStream(fileToParse);
    DataInputStream in = new DataInputStream(fstream); // Get the object of DataInputStream
    BufferedReader br = new BufferedReader(new InputStreamReader(in));
    StringReader sr; // we need to re-read each line into its own reader because the tokenizer is over-complicated garbage
    PTBTokenizer tkzr; // tokenizer object
    WordStemmer ls = new WordStemmer(); // stemmer/lemmatizer object

    // Read File Line By Line
    String strLine;/*from  ww w . ja v a 2s .c  o m*/
    while ((strLine = br.readLine()) != null) {
        System.out.println("Tokenizing and Parsing: " + strLine); // print current line to console

        // do all the standard java over-complication to use the stanford parser tokenizer
        sr = new StringReader(strLine);
        tkzr = PTBTokenizer.newPTBTokenizer(sr);
        List toks = tkzr.tokenize();
        System.out.println("tokens: " + toks);

        Tree parse = (Tree) lp.apply(toks); // finally, we actually get to parse something

        // Output Option 1: Printing out various data by accessing it programmatically
        // Get words, stemmed words and POS tags
        ArrayList<String> words = new ArrayList();
        ArrayList<String> stems = new ArrayList();
        ArrayList<String> tags = new ArrayList();

        // Get words and Tags
        for (TaggedWord tw : parse.taggedYield()) {
            words.add(tw.word());
            tags.add(tw.tag());
        }

        // Get stems
        ls.visitTree(parse); // apply the stemmer to the tree
        for (TaggedWord tw : parse.taggedYield()) {
            stems.add(tw.word());
        }

        // Get dependency tree
        TreebankLanguagePack tlp = new PennTreebankLanguagePack();
        GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory();
        GrammaticalStructure gs = gsf.newGrammaticalStructure(parse);
        Collection tdl = gs.typedDependenciesCollapsed();

        // And print!
        System.out.println("words: " + words);
        System.out.println("POStags: " + tags);
        System.out.println("stemmedWordsAndTags: " + stems);
        System.out.println("typedDependencies: " + tdl);

        // Output Option 2: Printing out various data using TreePrint
        // Various TreePrint options
        //       "penn", // constituency parse
        //       "oneline",
        //       rootLabelOnlyFormat,
        //       "words",
        //       "wordsAndTags", // unstemmed words and pos tags
        //       "dependencies", // unlabeled dependency parse
        //       "typedDependencies", // dependency parse
        //       "typedDependenciesCollapsed",
        //       "latexTree",
        //       "collocations",
        //       "semanticGraph"
        // Print using TreePrint with various options
        //       TreePrint tp = new TreePrint("wordsAndTags,semanticGraph");
        //       tp.printTree(parse);
        //      System.out.println(); // separate output lines
        TreePrint tp1 = new TreePrint("wordsAndTags,latexTree");
        tp1.printTree(parse);
        System.out.println(); // separate output lines

        //                TreePrint tp2 = new TreePrint("wordsAndTags,collocations");
        //       tp2.printTree(parse);
        //      System.out.println(); // separate output lines
        //                
        //                TreePrint tp3 = new TreePrint("wordsAndTags,dependencies");
        //       tp3.printTree(parse);
        //      System.out.println(); // separate output lines
    }

}

From source file:nlpOperations.RunStanfordParser.java

public static String getPhrases(String sent) {

    StringReader sr;/*from  ww  w. jav a 2  s .  c  om*/
    PTBTokenizer tkzr;
    WordStemmer ls = new WordStemmer();
    // do all the standard java over-complication to use the stanford parser tokenizer
    sr = new StringReader(sent);
    tkzr = PTBTokenizer.newPTBTokenizer(sr);
    List toks = tkzr.tokenize();
    System.out.println("tokens: " + toks);

    Tree parse = (Tree) lp.apply(toks); // finally, we actually get to parse something

    // Get words, stemmed words and POS tags
    ArrayList<String> words = new ArrayList();
    ArrayList<String> stems = new ArrayList();
    ArrayList<String> tags = new ArrayList();

    // Get words and Tags
    for (TaggedWord tw : parse.taggedYield()) {
        words.add(tw.word());
        tags.add(tw.tag());
    }
    // Get stems
    ls.visitTree(parse); // apply the stemmer to the tree
    for (TaggedWord tw : parse.taggedYield()) {
        stems.add(tw.word());
    }
    // Get dependency tree
    TreebankLanguagePack tlp = new PennTreebankLanguagePack();
    GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory();
    GrammaticalStructure gs = gsf.newGrammaticalStructure(parse);
    Collection tdl = gs.typedDependenciesCollapsed();
    // And print!
    System.out.println("words: " + words);
    System.out.println("POStags: " + tags);
    System.out.println("stemmedWordsAndTags: " + stems);
    System.out.println("typedDependencies: " + tdl);
    /*
    dependecy mainpulation
    */
    // remove [ ]

    //tokenization 
    Object[] wordRelationsArr = tdl.toArray();
    //get nn,amod relations
    String requiredRelations = "";
    for (int i = 0; i < wordRelationsArr.length; i++) {
        String oneRelation = wordRelationsArr[i].toString();
        if (oneRelation.trim().startsWith("nn") || (oneRelation.trim().startsWith("amod"))) {
            requiredRelations += oneRelation + "#";
        }
    }

    String phrases = "";
    //get nn words
    String[] requiredRelationsArr = requiredRelations.split("#");
    for (int i = 0; i < requiredRelationsArr.length; i++) {
        String oneRelation = requiredRelationsArr[i];
        if (oneRelation.trim().startsWith("nn")) {
            oneRelation = oneRelation.replace("(", "");
            oneRelation = oneRelation.replace(")", "");
            oneRelation = oneRelation.replace("nn", "");
            String[] oneRelationArr = oneRelation.split(",");
            String w1 = oneRelationArr[0].split("-")[0];
            String w2 = oneRelationArr[1].split("-")[0];
            int phraseIndex = sent.indexOf(w2.trim() + " " + w1.trim());
            phrases += w2.trim() + " " + w1.trim() + ":" + phraseIndex + "#";
        }
    }
    //get amod words

    String[] requiredRelationsArr2 = requiredRelations.split("#");
    for (int i = 0; i < requiredRelationsArr2.length; i++) {
        String oneRelation = requiredRelationsArr2[i];
        if (oneRelation.trim().startsWith("amod")) {
            oneRelation = oneRelation.replace("(", "");
            oneRelation = oneRelation.replace(")", "");
            oneRelation = oneRelation.replace("amod", "");
            String[] oneRelationArr = oneRelation.split(",");
            String w1 = oneRelationArr[0].split("-")[0];
            String w2 = oneRelationArr[1].split("-")[0];
            int phraseIndex = sent.indexOf(w2.trim() + " " + w1.trim());
            phrases += w2.trim() + " " + w1.trim() + ":" + phraseIndex + "#";
        }
    }

    System.out.println("phrases are  " + phrases);

    TreePrint tp1 = new TreePrint("wordsAndTags,latexTree");
    tp1.printTree(parse);
    System.out.println(); // separate output lines
    return phrases;
}