List of usage examples for edu.stanford.nlp.tagger.maxent MaxentTagger MaxentTagger
public MaxentTagger(InputStream modelStream)
From source file:Dependency.java
public static void main(String[] args) { String modelPath = DependencyParser.DEFAULT_MODEL; String taggerPath = "edu/stanford/nlp/models/pos-tagger/english-left3words/english-left3words-distsim.tagger"; Scanner sc = new Scanner(System.in); String text = ""; text = sc.nextLine();/* ww w.ja v a 2s . c o m*/ // while(text!="exit"){ MaxentTagger tagger = new MaxentTagger(taggerPath); DependencyParser parser = DependencyParser.loadFromModelFile(modelPath); DocumentPreprocessor tokenizer = new DocumentPreprocessor(new StringReader(text)); for (List<HasWord> sentence : tokenizer) { List<TaggedWord> tagged = tagger.tagSentence(sentence); Object[] x = tagged.toArray(); GrammaticalStructure gs = parser.predict(tagged); //System.out.println(); Collection<TypedDependency> s = gs.typedDependenciesCollapsedTree(); Object[] z = s.toArray(); System.out.println(tagged.toString()); String token[] = new String[z.length]; String pos[] = new String[z.length]; int k = 0; for (Object i : x) { String str = i.toString(); /*String temp0="(.*?)(?=\\/)"; String temp1="\\/(.*)"; System.out.println(str); Pattern t0 = Pattern.compile("(.*?)(?=\\/)"); Pattern t1 = Pattern.compile("\\/(.*)"); Matcher m0 = t0.matcher(str); Matcher m1 = t1.matcher(str);*/ int index = str.lastIndexOf('/'); token[k] = str.substring(0, index); pos[k] = str.substring(index + 1); //System.out.println(pos[k]); k++; } String rels[] = new String[z.length]; String word1[] = new String[z.length]; String word2[] = new String[z.length]; int j = 0; for (Object i : z) { System.out.println(i); String temp = i.toString(); String pattern0 = "(.*)(?=\\()"; String pattern1 = "(?<=\\()(.*?)(?=-)"; String pattern2 = "(?<=, )(.*)(?=-)"; Pattern r0 = Pattern.compile(pattern0); Pattern r1 = Pattern.compile(pattern1); Pattern r2 = Pattern.compile(pattern2); Matcher m0 = r0.matcher(temp); Matcher m1 = r1.matcher(temp); Matcher m2 = r2.matcher(temp); if (m0.find()) { rels[j] = m0.group(0); //System.out.println(rels[j]); } if (m1.find()) { word1[j] = m1.group(0); } if (m2.find()) { word2[j] = m2.group(0); } j++; } //System.out.println(s); //Rules for feature extraction. //rule1::::::::::::::::: //System.out.println("1"); int[] q = toIntArray(grRecog(rels, "nsubj")); //System.out.println("2"); if (q.length != 0) { //System.out.println("3"); if (posrecog(token, pos, word2[q[0]]).equals("NN")) { //System.out.println("4"); int[] w = toIntArray(grRecog(rels, "compound")); //System.out.println("5"); if (w.length != 0) { System.out.println("6"); System.out.println(word1[q[0]] + "," + word2[q[0]] + "," + word2[w[0]]); } else { int conj_and_index = compgrRecog(rels, word1, word2, "conj:and", word2[q[0]]); if (conj_and_index != -1) { System.out.println( word1[conj_and_index] + "," + word2[conj_and_index] + "," + word2[q[0]]); } else System.out.println(word1[q[0]] + "," + word2[q[0]]); } } //RULE 2::::::::::::: else if (posrecog(token, pos, word1[q[0]]).equals("JJ")) { //System.out.println("aaaaa_JJ"); int a = compgrRecog(rels, word1, word2, "xcomp", word1[q[0]]); if (a != -1) { int b = compgrRecog(rels, word1, word2, "dobj", word2[a]); if (b != -1) { int c = compgrRecog(rels, word1, word2, "compound", word2[b]); if (c != -1) { System.out.println(word1[q[0]] + "," + word1[c] + "," + word2[c]); } } } //RULE 3:::::::::: else { int b[] = toIntArray(grRecog(rels, "ccomp")); if (b.length != 0) { System.out.println(word1[q[1]] + "," + word2[q[1]] + "," + word1[b[0]]); } } } //RULE 4:::::::::: else if (posrecog(token, pos, word1[q[0]]).equals("VBZ")) { //System.out.println("aaaaa"); int vbp_dobj_index = compgrRecog(rels, word1, word2, "dobj", word2[q[0]]); if (vbp_dobj_index != -1) { System.out.println(word1[vbp_dobj_index] + "," + word2[vbp_dobj_index]); } else { int vbp_xcomp_index = compgrRecog(rels, word1, word2, "xcomp", word1[q[0]]); if (vbp_xcomp_index != -1) { System.out.println(word1[vbp_xcomp_index] + "," + word2[vbp_xcomp_index]); } else { int vbp_acomp_index = compgrRecog(rels, word1, word2, "acomp", word1[q[0]]); if (vbp_acomp_index != -1) { System.out.println( word1[q[0]] + "," + word1[vbp_acomp_index] + "," + word2[vbp_acomp_index]); } else System.out.println(word1[q[0]]); } } } int[] f = toIntArray(grRecog(rels, "amod")); if (f.length != 0) { for (int i : f) { System.out.println(word1[i] + "," + word2[i]); } int cj[] = toIntArray(grRecog(rels, "conj:and")); if (cj.length != 0) { for (int i : cj) { System.out.println(word1[i] + "," + word2[i]); } } } int[] neg = toIntArray(grRecog(rels, "neg")); if (neg.length != 0) { for (int i : neg) { System.out.println(word1[i] + "," + word2[i]); } } } else { int[] f = toIntArray(grRecog(rels, "amod")); if (f.length != 0) { for (int i : f) { System.out.print(word1[i] + "," + word2[i]); String qwe = word1[i] + "," + word2[i]; } int cj[] = toIntArray(grRecog(rels, "conj:and")); if (cj.length != 0) { for (int i : cj) { System.out.println(word2[i]); } } } int[] neg = toIntArray(grRecog(rels, "neg")); if (neg.length != 0) { for (int i : neg) { System.out.println(word1[i] + "," + word2[i]); } } } //RULE 2::::::::::::: } // text=sc.nextLine(); //} }
From source file:Meteor.java
License:Open Source License
public static void main(String[] args) { // Usage//from ww w. j a va2 s . c o m if (args.length < 2) { printUsage(); System.exit(2); } // Files String testFile = args[0]; String refFile = args[1]; // Use command line options to create props, configuration Properties props = createPropertiesFromArgs(args, 2); MeteorConfiguration config = new MeteorConfiguration(props); // Print settings Boolean ssOut = Boolean.parseBoolean(props.getProperty("ssOut")); Boolean sgml = Boolean.parseBoolean(props.getProperty("sgml")); Boolean stdio = Boolean.parseBoolean(props.getProperty("stdio")); Boolean quiet = Boolean.parseBoolean(props.getProperty("quiet")); String format = sgml ? "SGML" : "plaintext"; if (!ssOut && !stdio && !quiet) { System.out.println("Meteor version: " + Constants.VERSION); System.out.println(); System.out.println("Eval ID: " + config.getConfigID()); System.out.println(); System.out.println("Language: " + config.getLanguage().substring(0, 1).toUpperCase() + config.getLanguage().substring(1)); System.out.println("Format: " + format); System.out.println("Task: " + config.getTaskDesc()); System.out.println("Modules: " + config.getModulesString()); System.out.println("Weights: " + config.getModuleWeightsString()); System.out.println("Parameters: " + config.getParametersString()); System.out.println("New d. weights: " + config.getNewDeltaWeightsString()); System.out.println(); } MaxentTagger tagger = new MaxentTagger("taggers/english-caseless-left3words-distsim.tagger"); // Module / Weight check if (config.getModuleWeights().size() < config.getModules().size()) { System.err.println("Warning: More modules than weights specified " + "- modules with no weights will not be counted."); } // New delta weights number check if (config.getNewDeltaWeights().size() != 4) { System.err.println("Warning: You have not specified exactly 4 new delta weights. " + "If you are attempting to use new delta weights, " + "there must be exactly 4 specified."); } // Stdio check if (stdio && sgml) { System.err.println("Warning: Stdio incompatible with other modes - using Stdio only"); } MeteorScorer scorer = new MeteorScorer(config); if (stdio) { try { scoreStdio(scorer, tagger); } catch (IOException ex) { System.err.println("Error: Could not score Stdio inputs"); ex.printStackTrace(); System.exit(1); } } else if (sgml) { try { scoreSGML(scorer, props, config, testFile, refFile, tagger); } catch (IOException ex) { System.err.println("Error: Could not score SGML files:"); ex.printStackTrace(); System.exit(1); } } else try { scorePlaintext(scorer, props, config, testFile, refFile, tagger); } catch (IOException ex) { System.err.println("Error: Could not score text files:"); ex.printStackTrace(); System.exit(1); } }
From source file:DependencyParse.java
License:Apache License
public static void main(String[] args) throws Exception { Properties props = StringUtils.argsToProperties(args); if (!props.containsKey("tokpath") || !props.containsKey("parentpath") || !props.containsKey("relpath")) { System.err.println(/*from ww w. j av a 2 s. com*/ "usage: java DependencyParse -tokenize - -tokpath <tokpath> -parentpath <parentpath> -relpath <relpath>"); System.exit(1); } boolean tokenize = false; if (props.containsKey("tokenize")) { tokenize = true; } String tokPath = props.getProperty("tokpath"); String parentPath = props.getProperty("parentpath"); String relPath = props.getProperty("relpath"); BufferedWriter tokWriter = new BufferedWriter(new FileWriter(tokPath)); BufferedWriter parentWriter = new BufferedWriter(new FileWriter(parentPath)); BufferedWriter relWriter = new BufferedWriter(new FileWriter(relPath)); MaxentTagger tagger = new MaxentTagger(TAGGER_MODEL); DependencyParser parser = DependencyParser.loadFromModelFile(PARSER_MODEL); Scanner stdin = new Scanner(System.in); int count = 0; long start = System.currentTimeMillis(); while (stdin.hasNextLine()) { String line = stdin.nextLine(); List<HasWord> tokens = new ArrayList<>(); if (tokenize) { PTBTokenizer<Word> tokenizer = new PTBTokenizer(new StringReader(line), new WordTokenFactory(), ""); for (Word label; tokenizer.hasNext();) { tokens.add(tokenizer.next()); } } else { for (String word : line.split(" ")) { tokens.add(new Word(word)); } } List<TaggedWord> tagged = tagger.tagSentence(tokens); int len = tagged.size(); Collection<TypedDependency> tdl = parser.predict(tagged).typedDependencies(); int[] parents = new int[len]; for (int i = 0; i < len; i++) { // if a node has a parent of -1 at the end of parsing, then the node // has no parent. parents[i] = -1; } String[] relns = new String[len]; for (TypedDependency td : tdl) { // let root have index 0 int child = td.dep().index(); int parent = td.gov().index(); relns[child - 1] = td.reln().toString(); parents[child - 1] = parent; } // print tokens StringBuilder sb = new StringBuilder(); for (int i = 0; i < len - 1; i++) { if (tokenize) { sb.append(PTBTokenizer.ptbToken2Text(tokens.get(i).word())); } else { sb.append(tokens.get(i).word()); } sb.append(' '); } if (tokenize) { sb.append(PTBTokenizer.ptbToken2Text(tokens.get(len - 1).word())); } else { sb.append(tokens.get(len - 1).word()); } sb.append('\n'); tokWriter.write(sb.toString()); // print parent pointers sb = new StringBuilder(); for (int i = 0; i < len - 1; i++) { sb.append(parents[i]); sb.append(' '); } sb.append(parents[len - 1]); sb.append('\n'); parentWriter.write(sb.toString()); // print relations sb = new StringBuilder(); for (int i = 0; i < len - 1; i++) { sb.append(relns[i]); sb.append(' '); } sb.append(relns[len - 1]); sb.append('\n'); relWriter.write(sb.toString()); count++; if (count % 1000 == 0) { double elapsed = (System.currentTimeMillis() - start) / 1000.0; System.err.printf("Parsed %d lines (%.2fs)\n", count, elapsed); } } long totalTimeMillis = System.currentTimeMillis() - start; System.err.printf("Done: %d lines in %.2fs (%.1fms per line)\n", count, totalTimeMillis / 1000.0, totalTimeMillis / (double) count); tokWriter.close(); parentWriter.close(); relWriter.close(); }
From source file:DependencyParserDemo.java
public static void main(String[] args) { String modelPath = DependencyParser.DEFAULT_MODEL; String taggerPath = "edu/stanford/nlp/models/pos-tagger/english-left3words/english-left3words-distsim.tagger"; for (int argIndex = 0; argIndex < args.length;) { switch (args[argIndex]) { case "-tagger": taggerPath = args[argIndex + 1]; argIndex += 2;/*from w ww . j a v a 2s. c o m*/ break; case "-model": modelPath = args[argIndex + 1]; argIndex += 2; break; default: throw new RuntimeException("Unknown argument " + args[argIndex]); } } String text = "I can almost always tell when movies use fake dinosaurs."; MaxentTagger tagger = new MaxentTagger(taggerPath); DependencyParser parser = DependencyParser.loadFromModelFile(modelPath); DocumentPreprocessor tokenizer = new DocumentPreprocessor(new StringReader(text)); for (List<HasWord> sentence : tokenizer) { List<TaggedWord> tagged = tagger.tagSentence(sentence); GrammaticalStructure gs = parser.predict(tagged); // Print typed dependencies System.err.println(gs); } }
From source file:QuestTagger.java
public static void main(String args[]) throws IOException { MaxentTagger tagger = new MaxentTagger("taggers/english-bidirectional-distsim.tagger"); String quest = "What is the name of the author?"; String tagged = tagger.tagString(quest); System.out.println(tagged);/* ww w .j a v a2 s .co m*/ quest = "Whose dog did Ryan kidnap?"; tagged = tagger.tagString(quest); System.out.println(tagged); quest = "Who was Stella talking to?"; tagged = tagger.tagString(quest); System.out.println(tagged); quest = "Where is the dog?"; tagged = tagger.tagString(quest); System.out.println(tagged); quest = "When did the dog return?"; tagged = tagger.tagString(quest); System.out.println(tagged); quest = "Which country was no participating in the game?"; tagged = tagger.tagString(quest); System.out.println(tagged); quest = "When was he born?"; tagged = tagger.tagString(quest); System.out.println(tagged); quest = "How did ack manage to escape?"; tagged = tagger.tagString(quest); System.out.println(tagged); quest = "Whom are you going to invite?"; tagged = tagger.tagString(quest); System.out.println(tagged); quest = "Why was Ryan mad?"; tagged = tagger.tagString(quest); System.out.println(tagged); quest = "Which president modified the legislation?"; tagged = tagger.tagString(quest); System.out.println(tagged); quest = "Are you crazy?"; tagged = tagger.tagString(quest); System.out.println(tagged); String sent = "Dempsey was born in Nacogdoches, Texas, and, for much of his childhood, his family lived in a trailer park, where he and his siblings grew up playing soccer with Hispanic immigrants."; tagged = tagger.tagString(sent); System.out.println(tagged); quest = "Who sells the most greeting cards?"; tagged = tagger.tagString(quest); System.out.println(tagged); }
From source file:Dependency2.java
public static void main(String[] args) { String modelPath = DependencyParser.DEFAULT_MODEL; String taggerPath = "edu/stanford/nlp/models/pos-tagger/english-left3words/english-left3words-distsim.tagger"; Scanner sc = new Scanner(System.in); readCsv();/*from ww w.ja va 2 s . c om*/ String text = ""; text = sc.nextLine(); if (multifeatures(text)) { System.out.println("Multiple features present"); MaxentTagger tagger = new MaxentTagger(taggerPath); DependencyParser parser = DependencyParser.loadFromModelFile(modelPath); DocumentPreprocessor tokenizer = new DocumentPreprocessor(new StringReader(text)); for (List<HasWord> sentence : tokenizer) { List<TaggedWord> tagged = tagger.tagSentence(sentence); GrammaticalStructure gs = parser.predict(tagged); Collection<TypedDependency> s = gs.typedDependenciesCollapsedTree(); Map<Character, Pair<Character, Character>> map = new HashMap<Character, Pair<Character, Character>>(); Object[] z = s.toArray(); String rels[] = new String[z.length]; String word1[] = new String[z.length]; String word2[] = new String[z.length]; int j = 0; String f, f1, f2; for (Object i : z) { //System.out.println(i); String temp = i.toString(); System.out.println(temp); String pattern0 = "(.*)(?=\\()"; String pattern1 = "(?<=\\()(.*?)(?=-)"; String pattern2 = "(?<=,)(.*)(?=-)"; Pattern r0 = Pattern.compile(pattern0); Pattern r1 = Pattern.compile(pattern1); Pattern r2 = Pattern.compile(pattern2); Matcher m0 = r0.matcher(temp); Matcher m1 = r1.matcher(temp); Matcher m2 = r2.matcher(temp); if (m0.find()) rels[j] = m0.group(0); if (m1.find()) word1[j] = m1.group(0); if (m2.find()) word2[j] = m2.group(0); if (rels[j].equals("amod")) { f1 = getFeature(word1[j]); f2 = getFeature(word2[j]); f = f1 != null ? (f1) : (f2 != null ? f2 : null); if (f != null) { System.out.println("Feature: " + f); } } j++; } //System.out.println(Arrays.toString(rels)); } } else { //sentence score is feature score } }
From source file:stanford.java
public static void main4(String abc) throws IOException, ClassNotFoundException, Exception { // for(int a=8;a<=10;a++) {//from w ww . j a v a 2s . c o m FileOutputStream fout = new FileOutputStream("C:\\Users\\AvinashKumarPrajapati\\Desktop\\bc.txt"); /*Error in this line*/ //File file = new File("C:\\Users\\AvinashKumarPrajapati\\Desktop\\p.txt"); //FileInputStream fis = new FileInputStream(file); //byte[] data = new byte[(int) file.length()]; //fis.read(data); //fis.close(); //strcpy(data,abc); MaxentTagger tagger = new MaxentTagger("taggers/wsj-0-18-bidirectional-nodistsim.tagger"); //String s=new String(data, "UTF-8"); String s = null; s = abc; String sample = s.replaceAll("\\W", " "); String tagged = tagger.tagTokenizedString(sample); String[] x = tagged.split(" "); ArrayList<String> list = new ArrayList<String>(); //verb for (int i = 0; i < x.length; i++) { if (x[i].substring(x[i].lastIndexOf("_") + 1).startsWith("V")) { list.add(x[i].split("_")[0]); } } //noun for (int i = 0; i < x.length; i++) { if (x[i].substring(x[i].lastIndexOf("_") + 1).startsWith("N")) { list.add(x[i].split("_")[0]); } } /* for(int i=0;i<x.length;i++) { if (x[i].substring(x[i].lastIndexOf("_")+1).startsWith("J")) { list.add(x[i].split("_")[0]); } } */ String bit = ""; for (int i = 0; i < list.size(); i++) { bit += list.get(i) + "\r\n "; System.out.println(list.get(i)); } byte b[] = bit.getBytes();//converting string into byte array fout.write(b); fout.close(); stanford stan = new stanford(); stanford.Stemmer stem = stan.new Stemmer(); stem.main1(); } }
From source file:tfidf.java
public static void main(String[] args) throws IOException, ClassNotFoundException, Exception { //for(int a=770;a<=799;a++) {/*w w w . j a v a2 s . co m*/ //till 550,(800-1982) in 1 2 ------1887 //no file 1-3,20,264,1977,1973,1961,1957,1904,1872,1860,1854,1858,1844,1755,1766,1782,1725,1733,1738,1760,1578 //no file 1536,1542,1456,1466,1482,1494,1112,1177,1184,1299,1318,1323,1347,1358,1372,1383,1393,1433,1434,664 //no file 735,745 //heap//350,1838,1702,1644,383,514,820,857,925,618,985,1051,769 //Thread.sleep(2000); db = new OracleJDBC(); // System.out.println(" "+a+" "); FileOutputStream fout = new FileOutputStream("C:\\Users\\AvinashKumarPrajapati\\Desktop\\bc.txt"); File file = new File("C:\\Users\\AvinashKumarPrajapati\\Desktop\\pol.txt"); FileInputStream fis = new FileInputStream(file); byte[] data = new byte[(int) file.length()]; fis.read(data); fis.close(); MaxentTagger tagger = new MaxentTagger("taggers/wsj-0-18-bidirectional-nodistsim.tagger"); String s = new String(data, "UTF-8"); String sample = s.replaceAll("\\W", " "); String tagged = tagger.tagTokenizedString(sample); String[] x = tagged.split(" "); ArrayList<String> list = new ArrayList<String>(); for (int i = 0; i < x.length; i++) { if (x[i].substring(x[i].lastIndexOf("_") + 1).startsWith("N")) { list.add(x[i].split("_")[0]); } } String bit = ""; for (int i = 0; i < list.size(); i++) { bit += list.get(i) + "\r\n "; System.out.println(list.get(i)); } byte b[] = bit.getBytes();//converting string into byte array fout.write(b); fout.close(); stanford stan = new stanford(); stanford.Stemmer stem = stan.new Stemmer(); stem.main1(); try { db.finalize(); } catch (Throwable ex) { } } }
From source file:AbstractionSummarizer.AbstractionSummarizer.java
private void annotateSentences(String execPath) { MaxentTagger tagger = new MaxentTagger(execPath + "/Taggers/english-bidirectional-distsim.tagger"); annotedSentences = new ArrayList<String>(); for (String sentence : nonAnnotedSentences) { String annotedString = tagger.tagString(sentence); annotedString = annotedString.replace("_", "/") + " ./."; annotedSentences.add(annotedString); }//w w w .j a v a 2s . com }
From source file:artinex.Postagger.java
public Postagger() { MaxentTagger tagger = new MaxentTagger( "D:\\study\\stanford-postagger\\models\\english-left3words-distsim.tagger"); String sample = "James Bond teams up with the lone survivor of a destroyed Russian research center to stop the hijacking of a nuclear space weapon by a fellow agent believed to be dead."; // The tagged string String tagged = tagger.tagString(sample); //output the tagged sample string onto your console System.out.println("Input: " + sample); System.out.println("Output: " + tagged); }