List of usage examples for edu.stanford.nlp.process Morphology stem
public void stem(CoreLabel label)
From source file:com.music.service.text.TimelineToMusicService.java
License:Open Source License
private Variation getVariation(List<Tweet> tweets, TimelineMusic meta) { Morphology morphology = new Morphology(new StringReader("")); Multiset<String> words = HashMultiset.create(); for (Tweet tweet : tweets) { String tweetText = tweet.getText().toLowerCase(); List<String> urls = TimelineToMusicService.extractUrls(tweetText); for (String url : urls) { tweetText = tweetText.replace(url, ""); }// ww w .j ava 2 s . co m List<String> usernames = TimelineToMusicService.extractMentionedUsernames(tweetText); for (String username : usernames) { tweetText = tweetText.replace(username, "").replace("rt", ""); } String[] wordsInTweet = tweetText.split("[^\\p{L}&&[^']]+"); for (String word : wordsInTweet) { try { words.add(morphology.stem(word)); } catch (Exception ex) { words.add(word); } } } words.removeAll(stopwords); // if a word is mentioned more times than is 4% of the tweets, it's considered a topic double topicThreshold = tweets.size() * 4 / 100; for (Iterator<String> it = words.iterator(); it.hasNext();) { String word = it.next(); // remove stopwords not in the list (e.g. in a different language). // We consider all words less than 4 characters to be stop words if (word == null || word.length() < 4) { it.remove(); } else if (words.count(word) < topicThreshold) { it.remove(); } } meta.setTopKeywords(new HashSet<>(words.elementSet())); // the more topics you have, the more variative music if (meta.getTopKeywords().size() > 40) { return Variation.EXTREMELY_VARIATIVE; } else if (meta.getTopKeywords().size() > 30) { return Variation.VERY_VARIATIVE; } else if (meta.getTopKeywords().size() > 20) { return Variation.MOVING; } else if (meta.getTopKeywords().size() > 10) { return Variation.AVERAGE; } else { return Variation.MONOTONOUS; } }
From source file:edu.iastate.airl.semtus.parser.Parser.java
License:Open Source License
/** * Get morphology base//w w w.j a va2s. c o m * * @param thisWord * word * @return morphology base */ static public String morphology(Word thisWord) { try { Morphology thisMorphology = new Morphology(); Word thisBase = thisMorphology.stem(thisWord); return thisBase.word(); } catch (Throwable e) { return null; } }
From source file:final_dissertation.Feature_Parsing.java
public static void getTag(String tg, Element elem, Sentences sent, HashMap<String, Noun_Word> noun, HashMap<String, String> negation, int ttt) { int global_offset = 0; NodeList nl = elem.getElementsByTagName(tg); int nn = -1, n, aj = 0, ng = 0, cc = 0, vb = 0, tp, flag = 1, temp = -1; int len = nl.getLength(); Node wrd;/*from w ww. java 2s . c om*/ String temp_noun = "", prev = ""; Morphology mr = new Morphology(); for (int i = 0; i < len; i++) { wrd = nl.item(i); Element el = (Element) wrd; String vl = el.getAttribute("pos"); String s = el.getFirstChild().getNodeValue(); s = s.trim().toLowerCase(); String s1 = ""; if ((i + 1) < len) { Node wrd1 = nl.item(i + 1); Element el1 = (Element) wrd1; s1 = el1.getFirstChild().getNodeValue().toLowerCase(); s1 = mr.stem(s1).trim().toLowerCase(); } if (vl.equals("NN") || vl.equals("NNS") || vl.equals("NNP") || vl.equals("NNPS")) { if (s != null) { if (s.equals("t")) { num++; } else if (!(s.equals("##") || s.equals("p"))) { int ln = s.length(); if (ln == 1 || ln == 2) { continue; } nn++; tp = Integer.parseInt(el.getAttribute("wid")); if (vl.equals("NNS") || vl.equals("NNPS")) { s = mr.stem(s); } if (s.equals("pic")) { s = "picture"; } temp_noun = s; /* if (temp != -1) { if (temp + 1 == tp) { nn--; if (flag == 1) { flag = 0; Noun_Word w1 = noun.get(sent.nouns[nn]); if (w1.cnt == 1) { noun.remove(sent.nouns[nn]); } else { w1.cnt--; noun.put(sent.nouns[nn], w1); } } global_offset++; //It is for Noun Phrases and to correct their position. sent.nouns[nn] = sent.nouns[nn] + " " + temp_noun; temp = tp; continue; } } */ sent.nouns[nn] = temp_noun; sent.posNoun[nn] = tp - global_offset; sent.reviewID = num; temp = tp; Noun_Word word = noun.get(sent.nouns[nn]); if (word == null) { word = new Noun_Word(sent.nouns[nn], 1); } else { word.increment(); } noun.put(sent.nouns[nn], word); } } } else if (vl.equals("JJ") || vl.equals("JJS") || vl.equals("JJR")) { flag = 1; if (s != null) { if (!(s.equals("##") || s.equals("<") || s.equals("="))) { //s = mr.stem(s); int tp1 = Integer.parseInt(el.getAttribute("wid")); sent.adjective[aj] = s; sent.posAdjective[aj] = Integer.toString(tp1 - global_offset); //System.out.println("Adjective: "+sent.adjective[aj]+" Pos"+sent.posAdjective[nn]); aj++; } } } else if (vl.equals("CC")) { flag = 1; if (s != null) { if (!(s.equals("x") || s.equals("+") || s.equals("&") || (s.equals("but") && s1.equals("also")))) { int tp1 = Integer.parseInt(el.getAttribute("wid")); sent.conjunction[cc] = s; sent.posConjunction[cc] = Integer.toString(tp1 - global_offset); //System.out.println("Conjunction: "+sent.conjunction[cc]+" Pos "+sent.posConjunction[cc]); cc++; } } } else if (vl.equals("VB") || vl.equals("VBD") || vl.equals("VBG") || vl.equals("VBN") || vl.equals("VBP") || vl.equals("VBZ")) { flag = 1; if (s != null) { s = mr.stem(s); int tp1 = Integer.parseInt(el.getAttribute("wid")); sent.verbs[vb] = s; sent.posVerb[vb] = Integer.toString(tp1 - global_offset); //System.out.println("verbs: "+sent.verbs[vb]+" Pos "+sent.posVerb[vb]); vb++; } } else if (s != null) { flag = 1; s = mr.stem(s); if (negation.containsKey(s)) { if ((prev.equals("do") || prev.equals("does")) && s.equals("n't")) { int tp1 = Integer.parseInt(el.getAttribute("wid")); sent.negation[ng] = s; sent.posNegation[ng] = tp1 - global_offset; //System.out.println("Negation Words :"+prev+s); ng++; } else if (!((s.equals("not") && (s1.equals("only") || s1.equals("just"))) || (s.equals("no") && (s1.equals("wonder") || s1.equals("problem"))))) //Excludind not only?, "no problem" ,not just? and no wonder?. { int tp1 = Integer.parseInt(el.getAttribute("wid")); sent.negation[ng] = s; sent.posNegation[ng] = tp1 - global_offset; //System.out.println("Negation Words ignore : "+prev+s+s1); ng++; } } } prev = s; } //End for loop }