List of usage examples for edu.stanford.nlp.tagger.maxent MaxentTagger tagTokenizedString
public String tagTokenizedString(String toTag)
From source file:stanford.java
public static void main4(String abc) throws IOException, ClassNotFoundException, Exception { // for(int a=8;a<=10;a++) {/*from www. ja v a2 s . c om*/ FileOutputStream fout = new FileOutputStream("C:\\Users\\AvinashKumarPrajapati\\Desktop\\bc.txt"); /*Error in this line*/ //File file = new File("C:\\Users\\AvinashKumarPrajapati\\Desktop\\p.txt"); //FileInputStream fis = new FileInputStream(file); //byte[] data = new byte[(int) file.length()]; //fis.read(data); //fis.close(); //strcpy(data,abc); MaxentTagger tagger = new MaxentTagger("taggers/wsj-0-18-bidirectional-nodistsim.tagger"); //String s=new String(data, "UTF-8"); String s = null; s = abc; String sample = s.replaceAll("\\W", " "); String tagged = tagger.tagTokenizedString(sample); String[] x = tagged.split(" "); ArrayList<String> list = new ArrayList<String>(); //verb for (int i = 0; i < x.length; i++) { if (x[i].substring(x[i].lastIndexOf("_") + 1).startsWith("V")) { list.add(x[i].split("_")[0]); } } //noun for (int i = 0; i < x.length; i++) { if (x[i].substring(x[i].lastIndexOf("_") + 1).startsWith("N")) { list.add(x[i].split("_")[0]); } } /* for(int i=0;i<x.length;i++) { if (x[i].substring(x[i].lastIndexOf("_")+1).startsWith("J")) { list.add(x[i].split("_")[0]); } } */ String bit = ""; for (int i = 0; i < list.size(); i++) { bit += list.get(i) + "\r\n "; System.out.println(list.get(i)); } byte b[] = bit.getBytes();//converting string into byte array fout.write(b); fout.close(); stanford stan = new stanford(); stanford.Stemmer stem = stan.new Stemmer(); stem.main1(); } }
From source file:tfidf.java
public static void main(String[] args) throws IOException, ClassNotFoundException, Exception { //for(int a=770;a<=799;a++) {//from w w w . j ava2 s. c o m //till 550,(800-1982) in 1 2 ------1887 //no file 1-3,20,264,1977,1973,1961,1957,1904,1872,1860,1854,1858,1844,1755,1766,1782,1725,1733,1738,1760,1578 //no file 1536,1542,1456,1466,1482,1494,1112,1177,1184,1299,1318,1323,1347,1358,1372,1383,1393,1433,1434,664 //no file 735,745 //heap//350,1838,1702,1644,383,514,820,857,925,618,985,1051,769 //Thread.sleep(2000); db = new OracleJDBC(); // System.out.println(" "+a+" "); FileOutputStream fout = new FileOutputStream("C:\\Users\\AvinashKumarPrajapati\\Desktop\\bc.txt"); File file = new File("C:\\Users\\AvinashKumarPrajapati\\Desktop\\pol.txt"); FileInputStream fis = new FileInputStream(file); byte[] data = new byte[(int) file.length()]; fis.read(data); fis.close(); MaxentTagger tagger = new MaxentTagger("taggers/wsj-0-18-bidirectional-nodistsim.tagger"); String s = new String(data, "UTF-8"); String sample = s.replaceAll("\\W", " "); String tagged = tagger.tagTokenizedString(sample); String[] x = tagged.split(" "); ArrayList<String> list = new ArrayList<String>(); for (int i = 0; i < x.length; i++) { if (x[i].substring(x[i].lastIndexOf("_") + 1).startsWith("N")) { list.add(x[i].split("_")[0]); } } String bit = ""; for (int i = 0; i < list.size(); i++) { bit += list.get(i) + "\r\n "; System.out.println(list.get(i)); } byte b[] = bit.getBytes();//converting string into byte array fout.write(b); fout.close(); stanford stan = new stanford(); stanford.Stemmer stem = stan.new Stemmer(); stem.main1(); try { db.finalize(); } catch (Throwable ex) { } } }
From source file:taggers.Bigram.java
public static void main(String args[]) throws FileNotFoundException, IOException { String corporaLocation = " "; String splitBy = " "; String line = ""; String pattern = "^[a-zA-Z0-9]*$"; String[] words = null;// for getting words in each line int total = 0; String corpus = ""; wordCount = new HashMap<String, Integer>(); bigramCount = new HashMap<String, Integer>(); tagCount = new HashMap<String, Integer>(); tagTransitionCount = new HashMap<String, Integer>(); wordLikelihoodCount = new HashMap<String, Integer>(); wordProbability = new HashMap<String, Double>(); try {/* w w w .j a v a2 s . c o m*/ MaxentTagger tagger = new MaxentTagger("taggers/left3words-wsj-0-18.tagger"); // UNIGRAM try (InputStream fis = new FileInputStream( "C:\\Users\\Paul G Mathew\\workspace2\\POSTagger\\src\\taggers\\corpus.txt"); InputStreamReader isr = new InputStreamReader(fis); BufferedReader br = new BufferedReader(isr);) { while ((line = br.readLine()) != null) { // System.out.println(line); corpus = corpus + " " + line.toLowerCase(); words = line.toLowerCase().split(splitBy); // System.out.println(line.toLowerCase()); for (int i = 0; i < words.length; i++) { // if (words[i].matches(pattern)) { total++; if (wordCount.containsKey(words[i])) { int count = wordCount.get(words[i]); wordCount.put(words[i], count + 1); } else { wordCount.put(words[i], 1); } // } } // The tagged string String tagged = tagger.tagString(corpus); String tt = tagger.tagTokenizedString(corpus); // String tt = tagger. // Output the result // System.out.println(tagged); // System.out.println(tt); String[] pp = tagged.split(" "); String[] posTag = new String[pp.length]; for (int i = 0; i < pp.length; i++) { // System.out.println(pp[i]); if (!wordLikelihoodCount.containsKey(pp[i])) { wordLikelihoodCount.put(pp[i], 1); } else { int count = wordLikelihoodCount.get(pp[i]); wordLikelihoodCount.put(pp[i], count + 1); } if (!tagCount.containsKey(pp[i].split("/")[1])) { tagCount.put(pp[i].split("/")[1], 1); } else { int count = tagCount.get(pp[i].split("/")[1]); tagCount.put(pp[i].split("/")[1], count + 1); } } for (int i = 0; i < pp.length - 1; i++) { String temp = pp[i].split("/")[1] + "/" + pp[i + 1].split("/")[1]; if (tagTransitionCount.containsKey(temp)) { int count = tagTransitionCount.get(temp); tagTransitionCount.put(temp, count + 1); } else { tagTransitionCount.put(temp, 1); } } } } String[] sss = corpus.split(" " + "\\." + " ");// to calculate // beginning of // sentence which // will be same as // end of sentences; int noSentences = sss.length; // System.out.println("----------------------------------------------------------->"+sss.length); // bigram count for (Map.Entry<String, Integer> entry : tagTransitionCount.entrySet()) { // System.out.println("Key : " + entry.getKey() + " Value : " // + entry.getValue()); } System.out.println("Total number" + total + " CORPUS -->" + corpus); // BIGRAM WORD COUNT String[] corpusarray = corpus.toLowerCase().split(splitBy); for (int i = 1; i < corpusarray.length - 1; i++) { String temp = corpusarray[i] + "/" + corpusarray[i + 1]; // System.out.println("temp - > " + temp + "i" + i); if (bigramCount.containsKey(temp)) { int count = bigramCount.get(temp); bigramCount.put(temp, count + 1); } else { bigramCount.put(temp, 1); } } for (Map.Entry<String, Integer> entry : wordLikelihoodCount.entrySet()) { // System.out.println("Key : " + entry.getKey() + " Value : " // + entry.getValue()); } // calculating tag transition Probability tagTransProb = new HashMap<String, Double>(); for (Map.Entry<String, Integer> entry : tagTransitionCount.entrySet()) { // System.out.println("Key : " + entry.getKey() + " Value : " // + entry.getValue()); String key = entry.getKey(); String[] a = key.split("/"); int value = entry.getValue(); // System.out.println("Prvious -- >>"a[0]); double prob = (double) value / (double) tagCount.get(a[0]); tagTransProb.put(key, prob); } System.out.println("Tag transition prob"); for (Map.Entry<String, Double> entry : tagTransProb.entrySet()) { System.out.println("Key : " + entry.getKey() + " Value : " + entry.getValue()); } // calculating word Likelihood Probability wordLikelihoodProb = new HashMap<String, Double>(); for (Map.Entry<String, Integer> entry : wordLikelihoodCount.entrySet()) { // System.out.println("Key : " + entry.getKey() + " Value : " // + entry.getValue()); String key = entry.getKey(); String[] a = key.split("/"); int value = entry.getValue(); double prob = (double) value / (double) tagCount.get(a[1]); wordLikelihoodProb.put(key, prob); } // System.out.println("word Likelihood prob"); for (Map.Entry<String, Double> entry : wordLikelihoodProb.entrySet()) { // System.out.println("Key : " + entry.getKey() + " Value : " // + entry.getValue()); } System.out.println("Bigram count"); for (Map.Entry<String, Integer> entry : bigramCount.entrySet()) { System.out.println("Key : " + entry.getKey() + " Value : " + entry.getValue()); } // System.out.println("HIIIII"); bigramProb = new HashMap<String, Double>(); // / don't forget to calculate bigram probability. for (Map.Entry<String, Integer> entry : bigramCount.entrySet()) { System.out.println("Key : " + entry.getKey() + " Value : " + entry.getValue()); String key = entry.getKey(); String[] a = key.split("/"); int value = entry.getValue(); double prob = (double) value / (double) wordCount.get(a[0]); bigramProb.put(key, prob); } // System.out.println("bigramProbability"); for (Map.Entry<String, Double> entry : bigramProb.entrySet()) { // System.out.println("Key : " + entry.getKey() + " Value : " // + entry.getValue()); } // getting declarative statement input from user PosTagger pp = new PosTagger(); String check = pp.check(); String[] sentence = check.split(" "); String[] temp = sentence; String[] posTag = new String[temp.length]; // for storing the tag // transition in // sentence for (int i = 0; i < temp.length; i++) { System.out.println(" " + temp[i].split("/")[1]); posTag[i] = temp[i].split("/")[1]; } HashMap<String, String> output = new HashMap<String, String>(); // Checking the grammer structure checkGrammer(sentence, posTag, output); // printting word count /* * Set set = bigramCount.entrySet(); Iterator i = set.iterator(); * while(i.hasNext()) { Map.Entry me = (Map.Entry)i.next(); * * // System.out.println(me.getKey()+"= "+me.getValue()); * * } */ // String s1 = // "The president has relinquished his control of the company's board"; // String s2 = // "The cheif executive officer said the last year revenue was good"; // for calculating bigram table and bigram probability // ---->>>Bigram b = new Bigram(); // --->>>>>> // b.calculateBigramTable(sent.toUpperCase().toLowerCase(), // bigramCount, wordCount, noSentences, total); } catch (Exception e) { System.out.println(e); } }
From source file:taggers.CopyOfBigram.java
public static void main(String args[]) throws FileNotFoundException, IOException { String corporaLocation = " "; String splitBy = " "; String line = ""; String pattern = "^[a-zA-Z0-9]*$"; String[] words = null;// for getting words in each line int total = 0; String corpus = ""; HashMap<String, Integer> wordCount = new HashMap<String, Integer>(); HashMap<String, Integer> bigramCount = new HashMap<String, Integer>(); HashMap<String, Integer> tagCount = new HashMap<String, Integer>(); HashMap<String, Integer> tagTransitionCount = new HashMap<String, Integer>(); HashMap<String, Integer> wordLikelihoodCount = new HashMap<String, Integer>(); HashMap<String, Double> wordProbability = new HashMap<String, Double>(); try {//from w w w . j a va 2 s. c om MaxentTagger tagger = new MaxentTagger("taggers/left3words-wsj-0-18.tagger"); // UNIGRAM try (InputStream fis = new FileInputStream( "C:\\Users\\Paul G Mathew\\workspace2\\POSTagger\\src\\taggers\\corpus.txt"); InputStreamReader isr = new InputStreamReader(fis); BufferedReader br = new BufferedReader(isr);) { while ((line = br.readLine()) != null) { // System.out.println(line); corpus = corpus + " " + line.toLowerCase(); words = line.toLowerCase().split(splitBy); // System.out.println(line.toLowerCase()); for (int i = 0; i < words.length; i++) { // if (words[i].matches(pattern)) { total++; if (wordCount.containsKey(words[i])) { int count = wordCount.get(words[i]); wordCount.put(words[i], count + 1); } else { wordCount.put(words[i], 1); } // } } // The tagged string String tagged = tagger.tagString(corpus); String tt = tagger.tagTokenizedString(corpus); // String tt = tagger. // Output the result System.out.println(tagged); System.out.println(tt); String[] pp = tagged.split(" "); String[] posTag = new String[pp.length]; for (int i = 0; i < pp.length; i++) { // System.out.println(pp[i]); if (!wordLikelihoodCount.containsKey(pp[i])) { wordLikelihoodCount.put(pp[i], 1); } else { int count = wordLikelihoodCount.get(pp[i]); wordLikelihoodCount.put(pp[i], count + 1); } if (!tagCount.containsKey(pp[i].split("/")[1])) { tagCount.put(pp[i].split("/")[1], 1); } else { int count = tagCount.get(pp[i].split("/")[1]); tagCount.put(pp[i].split("/")[1], count + 1); } } for (int i = 0; i < pp.length - 1; i++) { String temp = pp[i].split("/")[1] + "/" + pp[i + 1].split("/")[1]; if (tagTransitionCount.containsKey(temp)) { int count = tagTransitionCount.get(temp); tagTransitionCount.put(temp, count + 1); } else { tagTransitionCount.put(temp, 1); } } } } String[] sss = corpus.split(" " + "\\." + " ");// to calculate // beginning of // sentence which // will be same as // end of sentences; int noSentences = sss.length; // System.out.println("----------------------------------------------------------->"+sss.length); // bigram count for (Map.Entry<String, Integer> entry : tagTransitionCount.entrySet()) { // System.out.println("Key : " + entry.getKey() + " Value : " // + entry.getValue()); } System.out.println("Total number" + total + " CORPUS -->" + corpus); // BIGRAM WORD COUNT String[] corpusarray = corpus.toLowerCase().split(splitBy); for (int i = 1; i < corpusarray.length - 1; i++) { String temp = corpusarray[i] + "/" + corpusarray[i + 1]; // System.out.println("temp - > " + temp + "i" + i); if (bigramCount.containsKey(temp)) { int count = bigramCount.get(temp); bigramCount.put(temp, count + 1); } else { bigramCount.put(temp, 1); } } for (Map.Entry<String, Integer> entry : wordLikelihoodCount.entrySet()) { // System.out.println("Key : " + entry.getKey() + " Value : " // + entry.getValue()); } // calculating tag transition Probability HashMap<String, Double> tagTransProb = new HashMap<String, Double>(); for (Map.Entry<String, Integer> entry : tagTransitionCount.entrySet()) { System.out.println("Key : " + entry.getKey() + " Value : " + entry.getValue()); String key = entry.getKey(); String[] a = key.split("/"); int value = entry.getValue(); // System.out.println("Prvious -- >>"a[0]); double prob = (double) value / (double) tagCount.get(a[0]); tagTransProb.put(key, prob); } System.out.println("Tag transition prob"); for (Map.Entry<String, Double> entry : tagTransProb.entrySet()) { System.out.println("Key : " + entry.getKey() + " Value : " + entry.getValue()); } // calculating word Likelihood Probability HashMap<String, Double> wordLikelihoodProb = new HashMap<String, Double>(); for (Map.Entry<String, Integer> entry : wordLikelihoodCount.entrySet()) { // System.out.println("Key : " + entry.getKey() + " Value : " // + entry.getValue()); String key = entry.getKey(); String[] a = key.split("/"); int value = entry.getValue(); double prob = (double) value / (double) tagCount.get(a[1]); wordLikelihoodProb.put(key, prob); } System.out.println("word Likelihood prob"); for (Map.Entry<String, Double> entry : wordLikelihoodProb.entrySet()) { System.out.println("Key : " + entry.getKey() + " Value : " + entry.getValue()); } System.out.println("Bigram count"); for (Map.Entry<String, Integer> entry : bigramCount.entrySet()) { System.out.println("Key : " + entry.getKey() + " Value : " + entry.getValue()); } System.out.println("HIIIII"); HashMap<String, Double> bigramProb = new HashMap<String, Double>(); // / don't forget to calculate bigram probability. for (Map.Entry<String, Integer> entry : bigramCount.entrySet()) { System.out.println("Key : " + entry.getKey() + " Value : " + entry.getValue()); String key = entry.getKey(); String[] a = key.split("/"); int value = entry.getValue(); double prob = (double) value / (double) wordCount.get(a[0]); bigramProb.put(key, prob); } System.out.println("bigramProbability"); for (Map.Entry<String, Double> entry : bigramProb.entrySet()) { System.out.println("Key : " + entry.getKey() + " Value : " + entry.getValue()); } // getting declarative statement input from user PosTagger pp = new PosTagger(); String check = pp.check(); String[] checkgrammer = check.split(" "); String[] temp = checkgrammer; // Checking the grammer structure checkGrammer(checkgrammer, tagTransProb); // printting word count /* * Set set = bigramCount.entrySet(); Iterator i = set.iterator(); * while(i.hasNext()) { Map.Entry me = (Map.Entry)i.next(); * * // System.out.println(me.getKey()+"= "+me.getValue()); * * } */ // String s1 = // "The president has relinquished his control of the company's board"; // String s2 = // "The cheif executive officer said the last year revenue was good"; // for calculating bigram table and bigram probability // ---->>>Bigram b = new Bigram(); // --->>>>>> // b.calculateBigramTable(sent.toUpperCase().toLowerCase(), // bigramCount, wordCount, noSentences, total); } catch (Exception e) { System.out.println(e); } }
From source file:taggers.Copy_2_of_Bigram.java
public static void main(String args[]) throws FileNotFoundException, IOException { String corporaLocation = " "; String splitBy = " "; String line = ""; String pattern = "^[a-zA-Z0-9]*$"; String[] words = null;// for getting words in each line int total = 0; String corpus = ""; HashMap<String, Integer> wordCount = new HashMap<String, Integer>(); HashMap<String, Integer> bigramCount = new HashMap<String, Integer>(); HashMap<String, Integer> tagCount = new HashMap<String, Integer>(); HashMap<String, Integer> tagTransitionCount = new HashMap<String, Integer>(); HashMap<String, Integer> wordLikelihoodCount = new HashMap<String, Integer>(); HashMap<String, Double> wordProbability = new HashMap<String, Double>(); try {//from ww w . jav a 2 s .c om MaxentTagger tagger = new MaxentTagger("taggers/left3words-wsj-0-18.tagger"); // UNIGRAM try (InputStream fis = new FileInputStream( "C:\\Users\\Paul G Mathew\\workspace2\\POSTagger\\src\\taggers\\corpus.txt"); InputStreamReader isr = new InputStreamReader(fis); BufferedReader br = new BufferedReader(isr);) { while ((line = br.readLine()) != null) { // System.out.println(line); corpus = corpus + " " + line.toLowerCase(); words = line.toLowerCase().split(splitBy); // System.out.println(line.toLowerCase()); for (int i = 0; i < words.length; i++) { // if (words[i].matches(pattern)) { total++; if (wordCount.containsKey(words[i])) { int count = wordCount.get(words[i]); wordCount.put(words[i], count + 1); } else { wordCount.put(words[i], 1); } // } } // The tagged string String tagged = tagger.tagString(corpus); String tt = tagger.tagTokenizedString(corpus); // String tt = tagger. // Output the result // System.out.println(tagged); // System.out.println(tt); String[] pp = tagged.split(" "); String[] posTag = new String[pp.length]; for (int i = 0; i < pp.length; i++) { // System.out.println(pp[i]); if (!wordLikelihoodCount.containsKey(pp[i])) { wordLikelihoodCount.put(pp[i], 1); } else { int count = wordLikelihoodCount.get(pp[i]); wordLikelihoodCount.put(pp[i], count + 1); } if (!tagCount.containsKey(pp[i].split("/")[1])) { tagCount.put(pp[i].split("/")[1], 1); } else { int count = tagCount.get(pp[i].split("/")[1]); tagCount.put(pp[i].split("/")[1], count + 1); } } for (int i = 0; i < pp.length - 1; i++) { String temp = pp[i].split("/")[1] + "/" + pp[i + 1].split("/")[1]; if (tagTransitionCount.containsKey(temp)) { int count = tagTransitionCount.get(temp); tagTransitionCount.put(temp, count + 1); } else { tagTransitionCount.put(temp, 1); } } } } String[] sss = corpus.split(" " + "\\." + " ");// to calculate // beginning of // sentence which // will be same as // end of sentences; int noSentences = sss.length; // System.out.println("----------------------------------------------------------->"+sss.length); // bigram count for (Map.Entry<String, Integer> entry : tagTransitionCount.entrySet()) { // System.out.println("Key : " + entry.getKey() + " Value : " // + entry.getValue()); } System.out.println("Total number" + total + " CORPUS -->" + corpus); // BIGRAM WORD COUNT String[] corpusarray = corpus.toLowerCase().split(splitBy); for (int i = 1; i < corpusarray.length - 1; i++) { String temp = corpusarray[i] + "/" + corpusarray[i + 1]; // System.out.println("temp - > " + temp + "i" + i); if (bigramCount.containsKey(temp)) { int count = bigramCount.get(temp); bigramCount.put(temp, count + 1); } else { bigramCount.put(temp, 1); } } for (Map.Entry<String, Integer> entry : wordLikelihoodCount.entrySet()) { // System.out.println("Key : " + entry.getKey() + " Value : " // + entry.getValue()); } // calculating tag transition Probability HashMap<String, Double> tagTransProb = new HashMap<String, Double>(); for (Map.Entry<String, Integer> entry : tagTransitionCount.entrySet()) { // System.out.println("Key : " + entry.getKey() + " Value : " // + entry.getValue()); String key = entry.getKey(); String[] a = key.split("/"); int value = entry.getValue(); // System.out.println("Prvious -- >>"a[0]); double prob = (double) value / (double) tagCount.get(a[0]); tagTransProb.put(key, prob); } // System.out.println("Tag transition prob"); for (Map.Entry<String, Double> entry : tagTransProb.entrySet()) { // System.out.println("Key : " + entry.getKey() + " Value : " // + entry.getValue()); } // calculating word Likelihood Probability HashMap<String, Double> wordLikelihoodProb = new HashMap<String, Double>(); for (Map.Entry<String, Integer> entry : wordLikelihoodCount.entrySet()) { // System.out.println("Key : " + entry.getKey() + " Value : " // + entry.getValue()); String key = entry.getKey(); String[] a = key.split("/"); int value = entry.getValue(); double prob = (double) value / (double) tagCount.get(a[1]); wordLikelihoodProb.put(key, prob); } // System.out.println("word Likelihood prob"); for (Map.Entry<String, Double> entry : wordLikelihoodProb.entrySet()) { // System.out.println("Key : " + entry.getKey() + " Value : " // + entry.getValue()); } // System.out.println("Bigram count"); for (Map.Entry<String, Integer> entry : bigramCount.entrySet()) { // System.out.println("Key : " + entry.getKey() + " Value : " // + entry.getValue()); } // System.out.println("HIIIII"); HashMap<String, Double> bigramProb = new HashMap<String, Double>(); // / don't forget to calculate bigram probability. for (Map.Entry<String, Integer> entry : bigramCount.entrySet()) { // System.out.println("Key : " + entry.getKey() + " Value : " // + entry.getValue()); String key = entry.getKey(); String[] a = key.split("/"); int value = entry.getValue(); double prob = (double) value / (double) wordCount.get(a[0]); bigramProb.put(key, prob); } // System.out.println("bigramProbability"); for (Map.Entry<String, Double> entry : bigramProb.entrySet()) { // System.out.println("Key : " + entry.getKey() + " Value : " // + entry.getValue()); } // getting declarative statement input from user PosTagger pp = new PosTagger(); String check = pp.check(); String[] sentence = check.split(" "); String[] temp = sentence; String[] posTag = new String[temp.length]; // for storing the tag // transition in // sentence for (int i = 0; i < temp.length; i++) { System.out.println(" " + temp[i].split("/")[1]); posTag[i] = temp[i].split("/")[1]; } // Checking the grammer structure checkGrammer(sentence, tagTransProb, tagCount, posTag); // printting word count /* * Set set = bigramCount.entrySet(); Iterator i = set.iterator(); * while(i.hasNext()) { Map.Entry me = (Map.Entry)i.next(); * * // System.out.println(me.getKey()+"= "+me.getValue()); * * } */ // String s1 = // "The president has relinquished his control of the company's board"; // String s2 = // "The cheif executive officer said the last year revenue was good"; // for calculating bigram table and bigram probability // ---->>>Bigram b = new Bigram(); // --->>>>>> // b.calculateBigramTable(sent.toUpperCase().toLowerCase(), // bigramCount, wordCount, noSentences, total); } catch (Exception e) { System.out.println(e); } }
From source file:taggers.Copy_3_of_Bigram.java
public static void main(String args[]) throws FileNotFoundException, IOException { String corporaLocation = " "; String splitBy = " "; String line = ""; String pattern = "^[a-zA-Z0-9]*$"; String[] words = null;// for getting words in each line int total = 0; String corpus = ""; wordCount = new HashMap<String, Integer>(); bigramCount = new HashMap<String, Integer>(); tagCount = new HashMap<String, Integer>(); tagTransitionCount = new HashMap<String, Integer>(); wordLikelihoodCount = new HashMap<String, Integer>(); wordProbability = new HashMap<String, Double>(); answer = new HashMap<String, String>(); out = new ArrayList<String>(); try {/*from w w w . j a va 2 s. c o m*/ MaxentTagger tagger = new MaxentTagger("taggers/left3words-wsj-0-18.tagger"); // UNIGRAM try (InputStream fis = new FileInputStream( "C:\\Users\\Paul G Mathew\\workspace2\\POSTagger\\src\\taggers\\corpus.txt"); InputStreamReader isr = new InputStreamReader(fis); BufferedReader br = new BufferedReader(isr);) { while ((line = br.readLine()) != null) { // System.out.println(line); corpus = corpus + " " + line.toLowerCase(); words = line.toLowerCase().split(splitBy); // System.out.println(line.toLowerCase()); for (int i = 0; i < words.length; i++) { // if (words[i].matches(pattern)) { total++; if (wordCount.containsKey(words[i])) { int count = wordCount.get(words[i]); wordCount.put(words[i], count + 1); } else { wordCount.put(words[i], 1); } // } } // The tagged string String tagged = tagger.tagString(corpus); String tt = tagger.tagTokenizedString(corpus); // String tt = tagger. // Output the result // System.out.println(tagged); // System.out.println(tt); String[] pp = tagged.split(" "); String[] posTag = new String[pp.length]; for (int i = 0; i < pp.length; i++) { // System.out.println(pp[i]); if (!wordLikelihoodCount.containsKey(pp[i])) { wordLikelihoodCount.put(pp[i], 1); } else { int count = wordLikelihoodCount.get(pp[i]); wordLikelihoodCount.put(pp[i], count + 1); } if (!tagCount.containsKey(pp[i].split("/")[1])) { tagCount.put(pp[i].split("/")[1], 1); } else { int count = tagCount.get(pp[i].split("/")[1]); tagCount.put(pp[i].split("/")[1], count + 1); } } for (int i = 0; i < pp.length - 1; i++) { String temp = pp[i].split("/")[1] + "/" + pp[i + 1].split("/")[1]; if (tagTransitionCount.containsKey(temp)) { int count = tagTransitionCount.get(temp); tagTransitionCount.put(temp, count + 1); } else { tagTransitionCount.put(temp, 1); } } } } String[] sss = corpus.split(" " + "\\." + " ");// to calculate // beginning of // sentence which // will be same as // end of sentences; int noSentences = sss.length; // System.out.println("----------------------------------------------------------->"+sss.length); // bigram count for (Map.Entry<String, Integer> entry : tagTransitionCount.entrySet()) { // System.out.println("Key : " + entry.getKey() + " Value : " // + entry.getValue()); } // System.out.println("Total number" + total + " CORPUS -->" + // corpus); // BIGRAM WORD COUNT String[] corpusarray = corpus.toLowerCase().split(splitBy); for (int i = 1; i < corpusarray.length - 1; i++) { String temp = corpusarray[i] + "/" + corpusarray[i + 1]; // System.out.println("temp - > " + temp + "i" + i); if (bigramCount.containsKey(temp)) { int count = bigramCount.get(temp); bigramCount.put(temp, count + 1); } else { bigramCount.put(temp, 1); } } for (Map.Entry<String, Integer> entry : wordLikelihoodCount.entrySet()) { // System.out.println("Key : " + entry.getKey() + " Value : " // + entry.getValue()); } // calculating tag transition Probability tagTransProb = new HashMap<String, Double>(); for (Map.Entry<String, Integer> entry : tagTransitionCount.entrySet()) { // System.out.println("Key : " + entry.getKey() + " Value : " // + entry.getValue()); String key = entry.getKey(); String[] a = key.split("/"); int value = entry.getValue(); // System.out.println("Prvious -- >>"a[0]); double prob = (double) value / (double) tagCount.get(a[0]); tagTransProb.put(key, prob); } // System.out.println("Tag transition prob"); for (Map.Entry<String, Double> entry : tagTransProb.entrySet()) { // System.out.println("Key : " + entry.getKey() + " Value : " // + entry.getValue()); } // calculating word Likelihood Probability wordLikelihoodProb = new HashMap<String, Double>(); for (Map.Entry<String, Integer> entry : wordLikelihoodCount.entrySet()) { // System.out.println("Key : " + entry.getKey() + " Value : " // + entry.getValue()); String key = entry.getKey(); String[] a = key.split("/"); int value = entry.getValue(); double prob = (double) value / (double) tagCount.get(a[1]); wordLikelihoodProb.put(key, prob); } // System.out.println("word Likelihood prob"); for (Map.Entry<String, Double> entry : wordLikelihoodProb.entrySet()) { // System.out.println("Key : " + entry.getKey() + " Value : " // + entry.getValue()); } // System.out.println("Bigram count"); // for (Map.Entry<String, Integer> entry : bigramCount.entrySet()) { // System.out.println("Key : " + entry.getKey() + " Value : " // + entry.getValue()); // } // System.out.println("HIIIII"); bigramProb = new HashMap<String, Double>(); // / don't forget to calculate bigram probability. for (Map.Entry<String, Integer> entry : bigramCount.entrySet()) { // System.out.println("Key : " + entry.getKey() + " Value : " // + entry.getValue()); String key = entry.getKey(); String[] a = key.split("/"); int value = entry.getValue(); double prob = (double) value / (double) wordCount.get(a[0]); bigramProb.put(key, prob); } // System.out.println("bigramProbability"); for (Map.Entry<String, Double> entry : bigramProb.entrySet()) { // System.out.println("Key : " + entry.getKey() + " Value : " // + entry.getValue()); } // getting declarative statement input from user PosTagger pp = new PosTagger(); String check = pp.check(); String[] sentence = check.split(" "); String[] temp = sentence; String[] posTag = new String[temp.length]; String[] posTag2 = new String[temp.length];// for storing the tag // transition in // sentence for (int i = 0; i < temp.length; i++) { // System.out.println(" " + temp[i].split("/")[1]); posTag[i] = temp[i].split("/")[1]; posTag2[i] = temp[i].split("/")[1]; } HashMap<String, String> output = new HashMap<String, String>(); // Checking the grammer structure checkGrammer(sentence, posTag, output); // checkGrammer2(sentence, posTag2, output); // System.out.println(out); if (wrong) { System.out.println("incorrect grammer based on corpus"); System.out.println("--Suggested Correct grammer based on Corpus--"); for (int ii = 0; ii < out.size(); ii++) { String ss = out.get(ii); String[] ss2 = ss.split(" "); System.out.println("Correct sentence->"); for (int j = 1; j < ss2.length; j++) { System.out.print(ss2[j].split("/")[0] + " "); } System.out.println(" "); System.out.println("Correct syntactic Structure->"); for (int j = 1; j < ss2.length; j++) { System.out.print(ss2[j].split("/")[1] + " "); } System.out.println(""); } } else { System.out.println("Correct grammer based on corpus"); } /* * System.out.println("--output2--"); for (Map.Entry<String, String> * entry : answer.entrySet()) { System.out.println("Key : " + * entry.getKey() + " Value : " + entry.getValue()); } */ // printting word count /* * Set set = bigramCount.entrySet(); Iterator i = set.iterator(); * while(i.hasNext()) { Map.Entry me = (Map.Entry)i.next(); * * // System.out.println(me.getKey()+"= "+me.getValue()); * * } */ // String s1 = // "The president has relinquished his control of the company's board"; // String s2 = // "The cheif executive officer said the last year revenue was good"; // for calculating bigram table and bigram probability // ---->>>Bigram b = new Bigram(); // --->>>>>> // b.calculateBigramTable(sent.toUpperCase().toLowerCase(), // bigramCount, wordCount, noSentences, total); } catch (Exception e) { System.out.println(e); } }