Example usage for edu.stanford.nlp.tagger.maxent MaxentTagger tagTokenizedString

List of usage examples for edu.stanford.nlp.tagger.maxent MaxentTagger tagTokenizedString

Introduction

In this page you can find the example usage for edu.stanford.nlp.tagger.maxent MaxentTagger tagTokenizedString.

Prototype

public String tagTokenizedString(String toTag) 

Source Link

Document

Tags the tokenized input string and returns the tagged version.

Usage

From source file:stanford.java

public static void main4(String abc) throws IOException, ClassNotFoundException, Exception {

    //  for(int a=8;a<=10;a++)
    {/*from www. ja  v a2 s  . c om*/

        FileOutputStream fout = new FileOutputStream("C:\\Users\\AvinashKumarPrajapati\\Desktop\\bc.txt");
        /*Error in this line*/

        //File file = new File("C:\\Users\\AvinashKumarPrajapati\\Desktop\\p.txt");

        //FileInputStream fis = new FileInputStream(file);
        //byte[] data = new byte[(int) file.length()];
        //fis.read(data);
        //fis.close();

        //strcpy(data,abc);
        MaxentTagger tagger = new MaxentTagger("taggers/wsj-0-18-bidirectional-nodistsim.tagger");
        //String s=new String(data, "UTF-8");
        String s = null;
        s = abc;
        String sample = s.replaceAll("\\W", " ");

        String tagged = tagger.tagTokenizedString(sample);

        String[] x = tagged.split(" ");
        ArrayList<String> list = new ArrayList<String>();
        //verb
        for (int i = 0; i < x.length; i++) {
            if (x[i].substring(x[i].lastIndexOf("_") + 1).startsWith("V")) {
                list.add(x[i].split("_")[0]);
            }
        }
        //noun  
        for (int i = 0; i < x.length; i++) {
            if (x[i].substring(x[i].lastIndexOf("_") + 1).startsWith("N")) {
                list.add(x[i].split("_")[0]);
            }
        }
        /*
        for(int i=0;i<x.length;i++)
        {
            if (x[i].substring(x[i].lastIndexOf("_")+1).startsWith("J"))
            {
                list.add(x[i].split("_")[0]);
            }
        }
          */
        String bit = "";
        for (int i = 0; i < list.size(); i++) {
            bit += list.get(i) + "\r\n ";
            System.out.println(list.get(i));
        }
        byte b[] = bit.getBytes();//converting string into byte array            
        fout.write(b);
        fout.close();
        stanford stan = new stanford();
        stanford.Stemmer stem = stan.new Stemmer();
        stem.main1();

    }
}

From source file:tfidf.java

public static void main(String[] args) throws IOException, ClassNotFoundException, Exception {

    //for(int a=770;a<=799;a++)
    {//from w  w  w  . j ava2 s.  c o  m
        //till 550,(800-1982) in 1 2 ------1887
        //no file 1-3,20,264,1977,1973,1961,1957,1904,1872,1860,1854,1858,1844,1755,1766,1782,1725,1733,1738,1760,1578
        //no file 1536,1542,1456,1466,1482,1494,1112,1177,1184,1299,1318,1323,1347,1358,1372,1383,1393,1433,1434,664        
        //no file 735,745
        //heap//350,1838,1702,1644,383,514,820,857,925,618,985,1051,769
        //Thread.sleep(2000);
        db = new OracleJDBC();
        //   System.out.println("                 "+a+"   ");
        FileOutputStream fout = new FileOutputStream("C:\\Users\\AvinashKumarPrajapati\\Desktop\\bc.txt");
        File file = new File("C:\\Users\\AvinashKumarPrajapati\\Desktop\\pol.txt");
        FileInputStream fis = new FileInputStream(file);
        byte[] data = new byte[(int) file.length()];
        fis.read(data);
        fis.close();
        MaxentTagger tagger = new MaxentTagger("taggers/wsj-0-18-bidirectional-nodistsim.tagger");
        String s = new String(data, "UTF-8");
        String sample = s.replaceAll("\\W", " ");

        String tagged = tagger.tagTokenizedString(sample);

        String[] x = tagged.split(" ");
        ArrayList<String> list = new ArrayList<String>();

        for (int i = 0; i < x.length; i++) {
            if (x[i].substring(x[i].lastIndexOf("_") + 1).startsWith("N")) {
                list.add(x[i].split("_")[0]);
            }
        }
        String bit = "";
        for (int i = 0; i < list.size(); i++) {
            bit += list.get(i) + "\r\n ";
            System.out.println(list.get(i));
        }
        byte b[] = bit.getBytes();//converting string into byte array            
        fout.write(b);
        fout.close();
        stanford stan = new stanford();
        stanford.Stemmer stem = stan.new Stemmer();
        stem.main1();

        try {
            db.finalize();
        } catch (Throwable ex) {
        }
    }
}

From source file:taggers.Bigram.java

public static void main(String args[]) throws FileNotFoundException, IOException {

    String corporaLocation = " ";
    String splitBy = " ";
    String line = "";
    String pattern = "^[a-zA-Z0-9]*$";
    String[] words = null;// for getting words in each line
    int total = 0;
    String corpus = "";
    wordCount = new HashMap<String, Integer>();
    bigramCount = new HashMap<String, Integer>();
    tagCount = new HashMap<String, Integer>();

    tagTransitionCount = new HashMap<String, Integer>();
    wordLikelihoodCount = new HashMap<String, Integer>();
    wordProbability = new HashMap<String, Double>();

    try {/*  w w w .j a  v  a2 s . c o m*/

        MaxentTagger tagger = new MaxentTagger("taggers/left3words-wsj-0-18.tagger");

        // UNIGRAM
        try (InputStream fis = new FileInputStream(
                "C:\\Users\\Paul G Mathew\\workspace2\\POSTagger\\src\\taggers\\corpus.txt");
                InputStreamReader isr = new InputStreamReader(fis);
                BufferedReader br = new BufferedReader(isr);) {
            while ((line = br.readLine()) != null) {
                // System.out.println(line);
                corpus = corpus + " " + line.toLowerCase();

                words = line.toLowerCase().split(splitBy);
                // System.out.println(line.toLowerCase());
                for (int i = 0; i < words.length; i++) {
                    // if (words[i].matches(pattern)) {
                    total++;
                    if (wordCount.containsKey(words[i])) {
                        int count = wordCount.get(words[i]);
                        wordCount.put(words[i], count + 1);
                    } else {
                        wordCount.put(words[i], 1);
                    }
                    // }
                }

                // The tagged string
                String tagged = tagger.tagString(corpus);
                String tt = tagger.tagTokenizedString(corpus);
                // String tt = tagger.
                // Output the result
                // System.out.println(tagged);
                // System.out.println(tt);

                String[] pp = tagged.split(" ");
                String[] posTag = new String[pp.length];
                for (int i = 0; i < pp.length; i++) {
                    // System.out.println(pp[i]);
                    if (!wordLikelihoodCount.containsKey(pp[i])) {
                        wordLikelihoodCount.put(pp[i], 1);
                    } else {
                        int count = wordLikelihoodCount.get(pp[i]);
                        wordLikelihoodCount.put(pp[i], count + 1);
                    }

                    if (!tagCount.containsKey(pp[i].split("/")[1])) {
                        tagCount.put(pp[i].split("/")[1], 1);
                    } else {
                        int count = tagCount.get(pp[i].split("/")[1]);
                        tagCount.put(pp[i].split("/")[1], count + 1);
                    }

                }
                for (int i = 0; i < pp.length - 1; i++) {
                    String temp = pp[i].split("/")[1] + "/" + pp[i + 1].split("/")[1];
                    if (tagTransitionCount.containsKey(temp)) {
                        int count = tagTransitionCount.get(temp);
                        tagTransitionCount.put(temp, count + 1);
                    } else {
                        tagTransitionCount.put(temp, 1);
                    }
                }

            }
        }

        String[] sss = corpus.split(" " + "\\." + " ");// to calculate
        // beginning of
        // sentence which
        // will be same as
        // end of sentences;
        int noSentences = sss.length;
        // System.out.println("----------------------------------------------------------->"+sss.length);
        // bigram count

        for (Map.Entry<String, Integer> entry : tagTransitionCount.entrySet()) {
            // System.out.println("Key : " + entry.getKey() + " Value : "
            // + entry.getValue());
        }

        System.out.println("Total number" + total + " CORPUS -->" + corpus);

        // BIGRAM WORD COUNT
        String[] corpusarray = corpus.toLowerCase().split(splitBy);

        for (int i = 1; i < corpusarray.length - 1; i++) {
            String temp = corpusarray[i] + "/" + corpusarray[i + 1];
            // System.out.println("temp - > " + temp + "i" + i);
            if (bigramCount.containsKey(temp)) {
                int count = bigramCount.get(temp);
                bigramCount.put(temp, count + 1);
            } else {
                bigramCount.put(temp, 1);
            }

        }

        for (Map.Entry<String, Integer> entry : wordLikelihoodCount.entrySet()) {
            // System.out.println("Key : " + entry.getKey() + " Value : "
            // + entry.getValue());
        }

        // calculating tag transition Probability
        tagTransProb = new HashMap<String, Double>();
        for (Map.Entry<String, Integer> entry : tagTransitionCount.entrySet()) {
            // System.out.println("Key : " + entry.getKey() + " Value : "
            // + entry.getValue());

            String key = entry.getKey();
            String[] a = key.split("/");
            int value = entry.getValue();

            // System.out.println("Prvious -- >>"a[0]);
            double prob = (double) value / (double) tagCount.get(a[0]);
            tagTransProb.put(key, prob);

        }

        System.out.println("Tag transition prob");
        for (Map.Entry<String, Double> entry : tagTransProb.entrySet()) {
            System.out.println("Key : " + entry.getKey() + " Value : " + entry.getValue());
        }

        // calculating word Likelihood Probability
        wordLikelihoodProb = new HashMap<String, Double>();
        for (Map.Entry<String, Integer> entry : wordLikelihoodCount.entrySet()) {
            // System.out.println("Key : " + entry.getKey() + " Value : "
            // + entry.getValue());

            String key = entry.getKey();
            String[] a = key.split("/");
            int value = entry.getValue();

            double prob = (double) value / (double) tagCount.get(a[1]);
            wordLikelihoodProb.put(key, prob);

        }
        // System.out.println("word Likelihood prob");
        for (Map.Entry<String, Double> entry : wordLikelihoodProb.entrySet()) {
            // System.out.println("Key : " + entry.getKey() + " Value : "
            // + entry.getValue());
        }

        System.out.println("Bigram count");
        for (Map.Entry<String, Integer> entry : bigramCount.entrySet()) {
            System.out.println("Key : " + entry.getKey() + " Value : " + entry.getValue());
        }
        // System.out.println("HIIIII");

        bigramProb = new HashMap<String, Double>();
        // / don't forget to calculate bigram probability.
        for (Map.Entry<String, Integer> entry : bigramCount.entrySet()) {
            System.out.println("Key : " + entry.getKey() + " Value : " + entry.getValue());

            String key = entry.getKey();
            String[] a = key.split("/");
            int value = entry.getValue();

            double prob = (double) value / (double) wordCount.get(a[0]);
            bigramProb.put(key, prob);

        }

        // System.out.println("bigramProbability");
        for (Map.Entry<String, Double> entry : bigramProb.entrySet()) {
            // System.out.println("Key : " + entry.getKey() + " Value : "
            // + entry.getValue());
        }

        // getting declarative statement input from user
        PosTagger pp = new PosTagger();
        String check = pp.check();

        String[] sentence = check.split(" ");
        String[] temp = sentence;
        String[] posTag = new String[temp.length]; // for storing the tag
        // transition in
        // sentence

        for (int i = 0; i < temp.length; i++) {
            System.out.println(" " + temp[i].split("/")[1]);
            posTag[i] = temp[i].split("/")[1];
        }

        HashMap<String, String> output = new HashMap<String, String>();
        // Checking the grammer structure
        checkGrammer(sentence, posTag, output);

        // printting word count
        /*
         * Set set = bigramCount.entrySet(); Iterator i = set.iterator();
         * while(i.hasNext()) { Map.Entry me = (Map.Entry)i.next();
         * 
         * // System.out.println(me.getKey()+"= "+me.getValue());
         * 
         * }
         */
        // String s1 =
        // "The president has relinquished his control of the company's board";
        // String s2 =
        // "The cheif executive officer said the last year revenue was good";

        // for calculating bigram table and bigram probability

        // ---->>>Bigram b = new Bigram();

        // --->>>>>>
        // b.calculateBigramTable(sent.toUpperCase().toLowerCase(),
        // bigramCount, wordCount, noSentences, total);

    } catch (Exception e) {
        System.out.println(e);
    }

}

From source file:taggers.CopyOfBigram.java

public static void main(String args[]) throws FileNotFoundException, IOException {

    String corporaLocation = " ";
    String splitBy = " ";
    String line = "";
    String pattern = "^[a-zA-Z0-9]*$";
    String[] words = null;// for getting words in each line
    int total = 0;
    String corpus = "";
    HashMap<String, Integer> wordCount = new HashMap<String, Integer>();
    HashMap<String, Integer> bigramCount = new HashMap<String, Integer>();
    HashMap<String, Integer> tagCount = new HashMap<String, Integer>();

    HashMap<String, Integer> tagTransitionCount = new HashMap<String, Integer>();
    HashMap<String, Integer> wordLikelihoodCount = new HashMap<String, Integer>();
    HashMap<String, Double> wordProbability = new HashMap<String, Double>();

    try {//from  w  w  w  .  j a  va 2  s. c om

        MaxentTagger tagger = new MaxentTagger("taggers/left3words-wsj-0-18.tagger");

        // UNIGRAM
        try (InputStream fis = new FileInputStream(
                "C:\\Users\\Paul G Mathew\\workspace2\\POSTagger\\src\\taggers\\corpus.txt");
                InputStreamReader isr = new InputStreamReader(fis);
                BufferedReader br = new BufferedReader(isr);) {
            while ((line = br.readLine()) != null) {
                // System.out.println(line);
                corpus = corpus + " " + line.toLowerCase();

                words = line.toLowerCase().split(splitBy);
                // System.out.println(line.toLowerCase());
                for (int i = 0; i < words.length; i++) {
                    // if (words[i].matches(pattern)) {
                    total++;
                    if (wordCount.containsKey(words[i])) {
                        int count = wordCount.get(words[i]);
                        wordCount.put(words[i], count + 1);
                    } else {
                        wordCount.put(words[i], 1);
                    }
                    // }
                }

                // The tagged string
                String tagged = tagger.tagString(corpus);
                String tt = tagger.tagTokenizedString(corpus);
                // String tt = tagger.
                // Output the result
                System.out.println(tagged);
                System.out.println(tt);

                String[] pp = tagged.split(" ");
                String[] posTag = new String[pp.length];
                for (int i = 0; i < pp.length; i++) {
                    // System.out.println(pp[i]);
                    if (!wordLikelihoodCount.containsKey(pp[i])) {
                        wordLikelihoodCount.put(pp[i], 1);
                    } else {
                        int count = wordLikelihoodCount.get(pp[i]);
                        wordLikelihoodCount.put(pp[i], count + 1);
                    }

                    if (!tagCount.containsKey(pp[i].split("/")[1])) {
                        tagCount.put(pp[i].split("/")[1], 1);
                    } else {
                        int count = tagCount.get(pp[i].split("/")[1]);
                        tagCount.put(pp[i].split("/")[1], count + 1);
                    }

                }
                for (int i = 0; i < pp.length - 1; i++) {
                    String temp = pp[i].split("/")[1] + "/" + pp[i + 1].split("/")[1];
                    if (tagTransitionCount.containsKey(temp)) {
                        int count = tagTransitionCount.get(temp);
                        tagTransitionCount.put(temp, count + 1);
                    } else {
                        tagTransitionCount.put(temp, 1);
                    }
                }

            }
        }

        String[] sss = corpus.split(" " + "\\." + " ");// to calculate
        // beginning of
        // sentence which
        // will be same as
        // end of sentences;
        int noSentences = sss.length;
        // System.out.println("----------------------------------------------------------->"+sss.length);
        // bigram count

        for (Map.Entry<String, Integer> entry : tagTransitionCount.entrySet()) {
            // System.out.println("Key : " + entry.getKey() + " Value : "
            // + entry.getValue());
        }

        System.out.println("Total number" + total + " CORPUS -->" + corpus);

        // BIGRAM WORD COUNT
        String[] corpusarray = corpus.toLowerCase().split(splitBy);

        for (int i = 1; i < corpusarray.length - 1; i++) {
            String temp = corpusarray[i] + "/" + corpusarray[i + 1];
            // System.out.println("temp - > " + temp + "i" + i);
            if (bigramCount.containsKey(temp)) {
                int count = bigramCount.get(temp);
                bigramCount.put(temp, count + 1);
            } else {
                bigramCount.put(temp, 1);
            }

        }

        for (Map.Entry<String, Integer> entry : wordLikelihoodCount.entrySet()) {
            // System.out.println("Key : " + entry.getKey() + " Value : "
            // + entry.getValue());
        }

        // calculating tag transition Probability
        HashMap<String, Double> tagTransProb = new HashMap<String, Double>();
        for (Map.Entry<String, Integer> entry : tagTransitionCount.entrySet()) {
            System.out.println("Key : " + entry.getKey() + " Value : " + entry.getValue());

            String key = entry.getKey();
            String[] a = key.split("/");
            int value = entry.getValue();

            // System.out.println("Prvious -- >>"a[0]);
            double prob = (double) value / (double) tagCount.get(a[0]);
            tagTransProb.put(key, prob);

        }

        System.out.println("Tag transition prob");
        for (Map.Entry<String, Double> entry : tagTransProb.entrySet()) {
            System.out.println("Key : " + entry.getKey() + " Value : " + entry.getValue());
        }

        // calculating word Likelihood Probability
        HashMap<String, Double> wordLikelihoodProb = new HashMap<String, Double>();
        for (Map.Entry<String, Integer> entry : wordLikelihoodCount.entrySet()) {
            // System.out.println("Key : " + entry.getKey() + " Value : "
            // + entry.getValue());

            String key = entry.getKey();
            String[] a = key.split("/");
            int value = entry.getValue();

            double prob = (double) value / (double) tagCount.get(a[1]);
            wordLikelihoodProb.put(key, prob);

        }
        System.out.println("word Likelihood prob");
        for (Map.Entry<String, Double> entry : wordLikelihoodProb.entrySet()) {
            System.out.println("Key : " + entry.getKey() + " Value : " + entry.getValue());
        }

        System.out.println("Bigram count");
        for (Map.Entry<String, Integer> entry : bigramCount.entrySet()) {
            System.out.println("Key : " + entry.getKey() + " Value : " + entry.getValue());
        }
        System.out.println("HIIIII");

        HashMap<String, Double> bigramProb = new HashMap<String, Double>();
        // / don't forget to calculate bigram probability.
        for (Map.Entry<String, Integer> entry : bigramCount.entrySet()) {
            System.out.println("Key : " + entry.getKey() + " Value : " + entry.getValue());

            String key = entry.getKey();
            String[] a = key.split("/");
            int value = entry.getValue();

            double prob = (double) value / (double) wordCount.get(a[0]);
            bigramProb.put(key, prob);

        }

        System.out.println("bigramProbability");
        for (Map.Entry<String, Double> entry : bigramProb.entrySet()) {
            System.out.println("Key : " + entry.getKey() + " Value : " + entry.getValue());
        }

        // getting declarative statement input from user
        PosTagger pp = new PosTagger();
        String check = pp.check();

        String[] checkgrammer = check.split(" ");
        String[] temp = checkgrammer;

        // Checking the grammer structure
        checkGrammer(checkgrammer, tagTransProb);
        // printting word count
        /*
         * Set set = bigramCount.entrySet(); Iterator i = set.iterator();
         * while(i.hasNext()) { Map.Entry me = (Map.Entry)i.next();
         * 
         * // System.out.println(me.getKey()+"= "+me.getValue());
         * 
         * }
         */
        // String s1 =
        // "The president has relinquished his control of the company's board";
        // String s2 =
        // "The cheif executive officer said the last year revenue was good";

        // for calculating bigram table and bigram probability

        // ---->>>Bigram b = new Bigram();

        // --->>>>>>
        // b.calculateBigramTable(sent.toUpperCase().toLowerCase(),
        // bigramCount, wordCount, noSentences, total);

    } catch (Exception e) {
        System.out.println(e);
    }

}

From source file:taggers.Copy_2_of_Bigram.java

public static void main(String args[]) throws FileNotFoundException, IOException {

    String corporaLocation = " ";
    String splitBy = " ";
    String line = "";
    String pattern = "^[a-zA-Z0-9]*$";
    String[] words = null;// for getting words in each line
    int total = 0;
    String corpus = "";
    HashMap<String, Integer> wordCount = new HashMap<String, Integer>();
    HashMap<String, Integer> bigramCount = new HashMap<String, Integer>();
    HashMap<String, Integer> tagCount = new HashMap<String, Integer>();

    HashMap<String, Integer> tagTransitionCount = new HashMap<String, Integer>();
    HashMap<String, Integer> wordLikelihoodCount = new HashMap<String, Integer>();
    HashMap<String, Double> wordProbability = new HashMap<String, Double>();

    try {//from  ww w .  jav a  2 s  .c  om

        MaxentTagger tagger = new MaxentTagger("taggers/left3words-wsj-0-18.tagger");

        // UNIGRAM
        try (InputStream fis = new FileInputStream(
                "C:\\Users\\Paul G Mathew\\workspace2\\POSTagger\\src\\taggers\\corpus.txt");
                InputStreamReader isr = new InputStreamReader(fis);
                BufferedReader br = new BufferedReader(isr);) {
            while ((line = br.readLine()) != null) {
                // System.out.println(line);
                corpus = corpus + " " + line.toLowerCase();

                words = line.toLowerCase().split(splitBy);
                // System.out.println(line.toLowerCase());
                for (int i = 0; i < words.length; i++) {
                    // if (words[i].matches(pattern)) {
                    total++;
                    if (wordCount.containsKey(words[i])) {
                        int count = wordCount.get(words[i]);
                        wordCount.put(words[i], count + 1);
                    } else {
                        wordCount.put(words[i], 1);
                    }
                    // }
                }

                // The tagged string
                String tagged = tagger.tagString(corpus);
                String tt = tagger.tagTokenizedString(corpus);
                // String tt = tagger.
                // Output the result
                // System.out.println(tagged);
                // System.out.println(tt);

                String[] pp = tagged.split(" ");
                String[] posTag = new String[pp.length];
                for (int i = 0; i < pp.length; i++) {
                    // System.out.println(pp[i]);
                    if (!wordLikelihoodCount.containsKey(pp[i])) {
                        wordLikelihoodCount.put(pp[i], 1);
                    } else {
                        int count = wordLikelihoodCount.get(pp[i]);
                        wordLikelihoodCount.put(pp[i], count + 1);
                    }

                    if (!tagCount.containsKey(pp[i].split("/")[1])) {
                        tagCount.put(pp[i].split("/")[1], 1);
                    } else {
                        int count = tagCount.get(pp[i].split("/")[1]);
                        tagCount.put(pp[i].split("/")[1], count + 1);
                    }

                }
                for (int i = 0; i < pp.length - 1; i++) {
                    String temp = pp[i].split("/")[1] + "/" + pp[i + 1].split("/")[1];
                    if (tagTransitionCount.containsKey(temp)) {
                        int count = tagTransitionCount.get(temp);
                        tagTransitionCount.put(temp, count + 1);
                    } else {
                        tagTransitionCount.put(temp, 1);
                    }
                }

            }
        }

        String[] sss = corpus.split(" " + "\\." + " ");// to calculate
        // beginning of
        // sentence which
        // will be same as
        // end of sentences;
        int noSentences = sss.length;
        // System.out.println("----------------------------------------------------------->"+sss.length);
        // bigram count

        for (Map.Entry<String, Integer> entry : tagTransitionCount.entrySet()) {
            // System.out.println("Key : " + entry.getKey() + " Value : "
            // + entry.getValue());
        }

        System.out.println("Total number" + total + " CORPUS -->" + corpus);

        // BIGRAM WORD COUNT
        String[] corpusarray = corpus.toLowerCase().split(splitBy);

        for (int i = 1; i < corpusarray.length - 1; i++) {
            String temp = corpusarray[i] + "/" + corpusarray[i + 1];
            // System.out.println("temp - > " + temp + "i" + i);
            if (bigramCount.containsKey(temp)) {
                int count = bigramCount.get(temp);
                bigramCount.put(temp, count + 1);
            } else {
                bigramCount.put(temp, 1);
            }

        }

        for (Map.Entry<String, Integer> entry : wordLikelihoodCount.entrySet()) {
            // System.out.println("Key : " + entry.getKey() + " Value : "
            // + entry.getValue());
        }

        // calculating tag transition Probability
        HashMap<String, Double> tagTransProb = new HashMap<String, Double>();
        for (Map.Entry<String, Integer> entry : tagTransitionCount.entrySet()) {
            // System.out.println("Key : " + entry.getKey() + " Value : "
            // + entry.getValue());

            String key = entry.getKey();
            String[] a = key.split("/");
            int value = entry.getValue();

            // System.out.println("Prvious -- >>"a[0]);
            double prob = (double) value / (double) tagCount.get(a[0]);
            tagTransProb.put(key, prob);

        }

        // System.out.println("Tag transition prob");
        for (Map.Entry<String, Double> entry : tagTransProb.entrySet()) {
            // System.out.println("Key : " + entry.getKey() + " Value : "
            // + entry.getValue());
        }

        // calculating word Likelihood Probability
        HashMap<String, Double> wordLikelihoodProb = new HashMap<String, Double>();
        for (Map.Entry<String, Integer> entry : wordLikelihoodCount.entrySet()) {
            // System.out.println("Key : " + entry.getKey() + " Value : "
            // + entry.getValue());

            String key = entry.getKey();
            String[] a = key.split("/");
            int value = entry.getValue();

            double prob = (double) value / (double) tagCount.get(a[1]);
            wordLikelihoodProb.put(key, prob);

        }
        // System.out.println("word Likelihood prob");
        for (Map.Entry<String, Double> entry : wordLikelihoodProb.entrySet()) {
            // System.out.println("Key : " + entry.getKey() + " Value : "
            // + entry.getValue());
        }

        // System.out.println("Bigram count");
        for (Map.Entry<String, Integer> entry : bigramCount.entrySet()) {
            // System.out.println("Key : " + entry.getKey() + " Value : "
            // + entry.getValue());
        }
        // System.out.println("HIIIII");

        HashMap<String, Double> bigramProb = new HashMap<String, Double>();
        // / don't forget to calculate bigram probability.
        for (Map.Entry<String, Integer> entry : bigramCount.entrySet()) {
            // System.out.println("Key : " + entry.getKey() + " Value : "
            // + entry.getValue());

            String key = entry.getKey();
            String[] a = key.split("/");
            int value = entry.getValue();

            double prob = (double) value / (double) wordCount.get(a[0]);
            bigramProb.put(key, prob);

        }

        // System.out.println("bigramProbability");
        for (Map.Entry<String, Double> entry : bigramProb.entrySet()) {
            // System.out.println("Key : " + entry.getKey() + " Value : "
            // + entry.getValue());
        }

        // getting declarative statement input from user
        PosTagger pp = new PosTagger();
        String check = pp.check();

        String[] sentence = check.split(" ");
        String[] temp = sentence;
        String[] posTag = new String[temp.length]; // for storing the tag
        // transition in
        // sentence

        for (int i = 0; i < temp.length; i++) {
            System.out.println(" " + temp[i].split("/")[1]);
            posTag[i] = temp[i].split("/")[1];
        }

        // Checking the grammer structure
        checkGrammer(sentence, tagTransProb, tagCount, posTag);
        // printting word count
        /*
         * Set set = bigramCount.entrySet(); Iterator i = set.iterator();
         * while(i.hasNext()) { Map.Entry me = (Map.Entry)i.next();
         * 
         * // System.out.println(me.getKey()+"= "+me.getValue());
         * 
         * }
         */
        // String s1 =
        // "The president has relinquished his control of the company's board";
        // String s2 =
        // "The cheif executive officer said the last year revenue was good";

        // for calculating bigram table and bigram probability

        // ---->>>Bigram b = new Bigram();

        // --->>>>>>
        // b.calculateBigramTable(sent.toUpperCase().toLowerCase(),
        // bigramCount, wordCount, noSentences, total);

    } catch (Exception e) {
        System.out.println(e);
    }

}

From source file:taggers.Copy_3_of_Bigram.java

public static void main(String args[]) throws FileNotFoundException, IOException {

    String corporaLocation = " ";
    String splitBy = " ";
    String line = "";
    String pattern = "^[a-zA-Z0-9]*$";
    String[] words = null;// for getting words in each line
    int total = 0;
    String corpus = "";
    wordCount = new HashMap<String, Integer>();
    bigramCount = new HashMap<String, Integer>();
    tagCount = new HashMap<String, Integer>();

    tagTransitionCount = new HashMap<String, Integer>();
    wordLikelihoodCount = new HashMap<String, Integer>();
    wordProbability = new HashMap<String, Double>();
    answer = new HashMap<String, String>();
    out = new ArrayList<String>();

    try {/*from   w w  w . j  a va 2  s.  c  o m*/

        MaxentTagger tagger = new MaxentTagger("taggers/left3words-wsj-0-18.tagger");

        // UNIGRAM
        try (InputStream fis = new FileInputStream(
                "C:\\Users\\Paul G Mathew\\workspace2\\POSTagger\\src\\taggers\\corpus.txt");
                InputStreamReader isr = new InputStreamReader(fis);
                BufferedReader br = new BufferedReader(isr);) {
            while ((line = br.readLine()) != null) {
                // System.out.println(line);
                corpus = corpus + " " + line.toLowerCase();

                words = line.toLowerCase().split(splitBy);
                // System.out.println(line.toLowerCase());
                for (int i = 0; i < words.length; i++) {
                    // if (words[i].matches(pattern)) {
                    total++;
                    if (wordCount.containsKey(words[i])) {
                        int count = wordCount.get(words[i]);
                        wordCount.put(words[i], count + 1);
                    } else {
                        wordCount.put(words[i], 1);
                    }
                    // }
                }

                // The tagged string
                String tagged = tagger.tagString(corpus);
                String tt = tagger.tagTokenizedString(corpus);
                // String tt = tagger.
                // Output the result
                // System.out.println(tagged);
                // System.out.println(tt);

                String[] pp = tagged.split(" ");
                String[] posTag = new String[pp.length];
                for (int i = 0; i < pp.length; i++) {
                    // System.out.println(pp[i]);
                    if (!wordLikelihoodCount.containsKey(pp[i])) {
                        wordLikelihoodCount.put(pp[i], 1);
                    } else {
                        int count = wordLikelihoodCount.get(pp[i]);
                        wordLikelihoodCount.put(pp[i], count + 1);
                    }

                    if (!tagCount.containsKey(pp[i].split("/")[1])) {
                        tagCount.put(pp[i].split("/")[1], 1);
                    } else {
                        int count = tagCount.get(pp[i].split("/")[1]);
                        tagCount.put(pp[i].split("/")[1], count + 1);
                    }

                }
                for (int i = 0; i < pp.length - 1; i++) {
                    String temp = pp[i].split("/")[1] + "/" + pp[i + 1].split("/")[1];
                    if (tagTransitionCount.containsKey(temp)) {
                        int count = tagTransitionCount.get(temp);
                        tagTransitionCount.put(temp, count + 1);
                    } else {
                        tagTransitionCount.put(temp, 1);
                    }
                }

            }
        }

        String[] sss = corpus.split(" " + "\\." + " ");// to calculate
        // beginning of
        // sentence which
        // will be same as
        // end of sentences;
        int noSentences = sss.length;
        // System.out.println("----------------------------------------------------------->"+sss.length);
        // bigram count

        for (Map.Entry<String, Integer> entry : tagTransitionCount.entrySet()) {
            // System.out.println("Key : " + entry.getKey() + " Value : "
            // + entry.getValue());
        }

        // System.out.println("Total number" + total + " CORPUS -->" +
        // corpus);

        // BIGRAM WORD COUNT
        String[] corpusarray = corpus.toLowerCase().split(splitBy);

        for (int i = 1; i < corpusarray.length - 1; i++) {
            String temp = corpusarray[i] + "/" + corpusarray[i + 1];
            // System.out.println("temp - > " + temp + "i" + i);
            if (bigramCount.containsKey(temp)) {
                int count = bigramCount.get(temp);
                bigramCount.put(temp, count + 1);
            } else {
                bigramCount.put(temp, 1);
            }

        }

        for (Map.Entry<String, Integer> entry : wordLikelihoodCount.entrySet()) {
            // System.out.println("Key : " + entry.getKey() + " Value : "
            // + entry.getValue());
        }

        // calculating tag transition Probability
        tagTransProb = new HashMap<String, Double>();
        for (Map.Entry<String, Integer> entry : tagTransitionCount.entrySet()) {
            // System.out.println("Key : " + entry.getKey() + " Value : "
            // + entry.getValue());

            String key = entry.getKey();
            String[] a = key.split("/");
            int value = entry.getValue();

            // System.out.println("Prvious -- >>"a[0]);
            double prob = (double) value / (double) tagCount.get(a[0]);
            tagTransProb.put(key, prob);

        }

        // System.out.println("Tag transition prob");
        for (Map.Entry<String, Double> entry : tagTransProb.entrySet()) {
            // System.out.println("Key : " + entry.getKey() + " Value : "
            // + entry.getValue());
        }

        // calculating word Likelihood Probability
        wordLikelihoodProb = new HashMap<String, Double>();
        for (Map.Entry<String, Integer> entry : wordLikelihoodCount.entrySet()) {
            // System.out.println("Key : " + entry.getKey() + " Value : "
            // + entry.getValue());

            String key = entry.getKey();
            String[] a = key.split("/");
            int value = entry.getValue();

            double prob = (double) value / (double) tagCount.get(a[1]);
            wordLikelihoodProb.put(key, prob);

        }
        // System.out.println("word Likelihood prob");
        for (Map.Entry<String, Double> entry : wordLikelihoodProb.entrySet()) {
            // System.out.println("Key : " + entry.getKey() + " Value : "
            // + entry.getValue());
        }

        // System.out.println("Bigram count");
        // for (Map.Entry<String, Integer> entry : bigramCount.entrySet()) {
        // System.out.println("Key : " + entry.getKey() + " Value : "
        // + entry.getValue());
        // }
        // System.out.println("HIIIII");

        bigramProb = new HashMap<String, Double>();
        // / don't forget to calculate bigram probability.
        for (Map.Entry<String, Integer> entry : bigramCount.entrySet()) {
            // System.out.println("Key : " + entry.getKey() + " Value : "
            // + entry.getValue());

            String key = entry.getKey();
            String[] a = key.split("/");
            int value = entry.getValue();

            double prob = (double) value / (double) wordCount.get(a[0]);
            bigramProb.put(key, prob);

        }

        // System.out.println("bigramProbability");
        for (Map.Entry<String, Double> entry : bigramProb.entrySet()) {
            // System.out.println("Key : " + entry.getKey() + " Value : "
            // + entry.getValue());
        }

        // getting declarative statement input from user
        PosTagger pp = new PosTagger();
        String check = pp.check();

        String[] sentence = check.split(" ");
        String[] temp = sentence;
        String[] posTag = new String[temp.length];
        String[] posTag2 = new String[temp.length];// for storing the tag
        // transition in
        // sentence

        for (int i = 0; i < temp.length; i++) {
            // System.out.println(" " + temp[i].split("/")[1]);
            posTag[i] = temp[i].split("/")[1];
            posTag2[i] = temp[i].split("/")[1];
        }

        HashMap<String, String> output = new HashMap<String, String>();
        // Checking the grammer structure
        checkGrammer(sentence, posTag, output);
        // checkGrammer2(sentence, posTag2, output);
        // System.out.println(out);
        if (wrong) {
            System.out.println("incorrect grammer based on corpus");
            System.out.println("--Suggested Correct grammer based on Corpus--");
            for (int ii = 0; ii < out.size(); ii++) {
                String ss = out.get(ii);
                String[] ss2 = ss.split(" ");
                System.out.println("Correct sentence->");
                for (int j = 1; j < ss2.length; j++) {
                    System.out.print(ss2[j].split("/")[0] + " ");
                }
                System.out.println(" ");
                System.out.println("Correct syntactic Structure->");
                for (int j = 1; j < ss2.length; j++) {
                    System.out.print(ss2[j].split("/")[1] + " ");
                }

                System.out.println("");
            }
        } else {
            System.out.println("Correct grammer based on corpus");
        }

        /*
         * System.out.println("--output2--"); for (Map.Entry<String, String>
         * entry : answer.entrySet()) { System.out.println("Key : " +
         * entry.getKey() + " Value : " + entry.getValue()); }
         */
        // printting word count
        /*
         * Set set = bigramCount.entrySet(); Iterator i = set.iterator();
         * while(i.hasNext()) { Map.Entry me = (Map.Entry)i.next();
         * 
         * // System.out.println(me.getKey()+"= "+me.getValue());
         * 
         * }
         */
        // String s1 =
        // "The president has relinquished his control of the company's board";
        // String s2 =
        // "The cheif executive officer said the last year revenue was good";

        // for calculating bigram table and bigram probability

        // ---->>>Bigram b = new Bigram();

        // --->>>>>>
        // b.calculateBigramTable(sent.toUpperCase().toLowerCase(),
        // bigramCount, wordCount, noSentences, total);

    } catch (Exception e) {
        System.out.println(e);
    }

}