List of usage examples for edu.stanford.nlp.tagger.maxent MaxentTagger tagString
public String tagString(String toTag)
From source file:QuestTagger.java
public static void main(String args[]) throws IOException { MaxentTagger tagger = new MaxentTagger("taggers/english-bidirectional-distsim.tagger"); String quest = "What is the name of the author?"; String tagged = tagger.tagString(quest); System.out.println(tagged);//www. j ava 2 s . com quest = "Whose dog did Ryan kidnap?"; tagged = tagger.tagString(quest); System.out.println(tagged); quest = "Who was Stella talking to?"; tagged = tagger.tagString(quest); System.out.println(tagged); quest = "Where is the dog?"; tagged = tagger.tagString(quest); System.out.println(tagged); quest = "When did the dog return?"; tagged = tagger.tagString(quest); System.out.println(tagged); quest = "Which country was no participating in the game?"; tagged = tagger.tagString(quest); System.out.println(tagged); quest = "When was he born?"; tagged = tagger.tagString(quest); System.out.println(tagged); quest = "How did ack manage to escape?"; tagged = tagger.tagString(quest); System.out.println(tagged); quest = "Whom are you going to invite?"; tagged = tagger.tagString(quest); System.out.println(tagged); quest = "Why was Ryan mad?"; tagged = tagger.tagString(quest); System.out.println(tagged); quest = "Which president modified the legislation?"; tagged = tagger.tagString(quest); System.out.println(tagged); quest = "Are you crazy?"; tagged = tagger.tagString(quest); System.out.println(tagged); String sent = "Dempsey was born in Nacogdoches, Texas, and, for much of his childhood, his family lived in a trailer park, where he and his siblings grew up playing soccer with Hispanic immigrants."; tagged = tagger.tagString(sent); System.out.println(tagged); quest = "Who sells the most greeting cards?"; tagged = tagger.tagString(quest); System.out.println(tagged); }
From source file:AbstractionSummarizer.AbstractionSummarizer.java
private void annotateSentences(String execPath) { MaxentTagger tagger = new MaxentTagger(execPath + "/Taggers/english-bidirectional-distsim.tagger"); annotedSentences = new ArrayList<String>(); for (String sentence : nonAnnotedSentences) { String annotedString = tagger.tagString(sentence); annotedString = annotedString.replace("_", "/") + " ./."; annotedSentences.add(annotedString); }/*from w w w. java2 s . c o m*/ }
From source file:artinex.Postagger.java
public Postagger() { MaxentTagger tagger = new MaxentTagger( "D:\\study\\stanford-postagger\\models\\english-left3words-distsim.tagger"); String sample = "James Bond teams up with the lone survivor of a destroyed Russian research center to stop the hijacking of a nuclear space weapon by a fellow agent believed to be dead."; // The tagged string String tagged = tagger.tagString(sample); //output the tagged sample string onto your console System.out.println("Input: " + sample); System.out.println("Output: " + tagged); }
From source file:baseline_system.Stemmer.java
static String tagSentence(String input) { MaxentTagger tagger = new MaxentTagger( "C:\\Users\\Praveen\\OneDrive\\NLP\\stanford-postagger-2015-04-20\\stanford-postagger-2015-04-20\\models\\english-left3words-distsim.tagger"); String tagged = tagger.tagString(input); return tagged; }
From source file:com.mhowlett.Main.java
License:Open Source License
private static void tagSubtitles(HashMap<String, Person> ps) { MaxentTagger tagger = new MaxentTagger( "/data/taggers/stanford-postagger-2015-12-09/models/english-left3words-distsim.tagger"); ps.forEach((k, p) -> {//from w ww . j a v a 2 s .co m if (p.subtitle != null) { String tagged = tagger.tagString(p.subtitle); String[] words = tagged.split(" "); p.words = new WordMap(); for (int i = 0; i < words.length; ++i) { String[] tokens = words[i].split("_"); if (tokens.length == 2) { p.words.add(tokens[0], tokens[1]); } } } }); }
From source file:context.core.task.pos.POSTagger.java
License:Open Source License
/** * * @param args/*from ww w .j a v a 2 s . c o m*/ * @throws ClassNotFoundException * @throws IOException */ public static void main(String[] args) throws ClassNotFoundException, IOException { // Initialize the tagger MaxentTagger tagger = getTagger("en"); // The sample string String sample = " "; // The tagged string String tagged = tagger.tagString(sample); // Output the result System.out.println(tagged); // List<CoreLabel> sent = Sentence.toWordList("This", "is", "a", "sample", "text"); // tag(sent, "en"); }
From source file:de.citec.io.ImportNW.java
public static String tagText(String body, MaxentTagger tagger) { String line = tagger.tagString(body); String new_artikel = ""; String term = ""; for (String word : line.split(" ")) { if (word.contains("NN") || word.contains("ADJA") || word.contains("ADJD") || word.contains("NE")) { term += " " + word.split("_")[0]; } else {/* ww w .j a va2s.co m*/ term = term.replace(" ", "_"); if (term.length() > 0) { new_artikel += " " + term.toLowerCase().substring(1); term = ""; } } } if (term.length() > 0) { new_artikel += " " + term.toLowerCase().substring(1); } new_artikel = new_artikel.replace("'", ""); new_artikel = new_artikel.replace(" ._$.", ""); new_artikel = new_artikel.replace("_$.", ""); new_artikel = new_artikel.replace("_card", ""); new_artikel = new_artikel.replace("_xy ", ""); new_artikel = new_artikel.replace("/_$[", ""); new_artikel = new_artikel.replace("-_$[", ""); new_artikel = new_artikel.replace("_$[", ""); new_artikel = new_artikel.replace(("_pper"), ""); new_artikel = new_artikel.replace(("_appr"), ""); new_artikel = new_artikel.replace(" 's", "s"); new_artikel = new_artikel.replace("_piat", ""); new_artikel = new_artikel.replace("_adv", ""); new_artikel = new_artikel.replace("' ", ""); new_artikel = new_artikel.replace("'", " "); new_artikel = new_artikel.replace(" ", " "); new_artikel = new_artikel.trim(); if (new_artikel.length() == 0) { return new_artikel; } return new_artikel.substring(1); }
From source file:eu.edisonproject.training.execute.Main.java
License:Apache License
private static void apriori(String in, String out) throws IOException { String stopWordsPath = System.getProperty("stop.words.file"); if (stopWordsPath == null) { stopWordsPath = prop.getProperty("stop.words.file", ".." + File.separator + "etc" + File.separator + "stopwords.csv"); }/*w w w . ja v a 2s . c o m*/ DataPrepare dataPrepare = new DataPrepare(in, out, stopWordsPath); dataPrepare.execute(); String taggerPath = System.getProperty("model.path"); if (taggerPath == null) { taggerPath = prop.getProperty("model.path", ".." + File.separator + "etc" + File.separator + "model"); } taggerPath += File.separator + "stanford" + File.separator + "english-left3words-distsim.tagger"; File fin = new File(out + File.separator + "itemset.csv"); File fout = new File(out + File.separator + "tmp.csv"); MaxentTagger tagger = new MaxentTagger(taggerPath); try (PrintWriter pw = new PrintWriter(fout)) { try (BufferedReader br = new BufferedReader(new FileReader(fin))) { for (String text; (text = br.readLine()) != null;) { String term = text.split("/")[0]; String tagged = tagger.tagString(term); boolean add = true; if (!tagged.contains("NN") || tagged.contains("RB")) { add = false; } if (add) { pw.print(text + "\n"); } } } } Files.move(fout, fin); }
From source file:eu.edisonproject.training.term.extraction.AprioriExtraction.java
License:Apache License
@Override public Map<String, Double> termXtraction(String inDir) throws IOException { try {/* w w w.java 2 s. c om*/ int count = 0; HashMap<String, Double> keywordsDictionaray = new HashMap(); File dir = new File(inDir); Set<String> terms = new HashSet<>(); if (dir.isDirectory()) { for (File f : dir.listFiles()) { if (FilenameUtils.getExtension(f.getName()).endsWith("txt")) { count++; Logger.getLogger(AprioriExtraction.class.getName()).log(Level.INFO, "{0}: {1} of {2}", new Object[] { f.getName(), count, dir.list().length }); terms.addAll(extractFromFile(f)); } } } else if (dir.isFile()) { if (FilenameUtils.getExtension(dir.getName()).endsWith("txt")) { terms.addAll(extractFromFile(dir)); } } MaxentTagger tagger = new MaxentTagger(taggerPath); for (String t : terms) { Double tf = 0.0; String term = t.toLowerCase().trim().replaceAll(" ", "_").split("/")[0]; while (term.endsWith("_")) { term = term.substring(0, term.lastIndexOf("_")); } while (term.startsWith("_")) { term = term.substring(term.indexOf("_") + 1, term.length()); } String tagged = null; // if (!term.contains("_")) { tagged = tagger.tagString(term); // } boolean add = true; if (tagged != null) { if (!tagged.contains("NN") || tagged.contains("RB")) { add = false; } // } } else { add = true; } if (add) { if (keywordsDictionaray.containsKey(term)) { tf = keywordsDictionaray.get(term); tf++; } else { tf = 1.0; } keywordsDictionaray.put(term, tf); } } return keywordsDictionaray; } catch (Exception ex) { Logger.getLogger(AprioriExtraction.class.getName()).log(Level.SEVERE, null, ex); } return null; }
From source file:flight_ranker.tweet_tagger.java
public tweet_tagger(String tweet) throws FileNotFoundException { MaxentTagger tagger = new MaxentTagger("taggers\\english-left3words-distsim.tagger"); tagged = tagger.tagString(tweet); }