Example usage for edu.stanford.nlp.process Morphology stem

List of usage examples for edu.stanford.nlp.process Morphology stem

Introduction

In this page you can find the example usage for edu.stanford.nlp.process Morphology stem.

Prototype

public void stem(CoreLabel label) 

Source Link

Document

Adds the LemmaAnnotation to the given CoreLabel.

Usage

From source file:com.music.service.text.TimelineToMusicService.java

License:Open Source License

private Variation getVariation(List<Tweet> tweets, TimelineMusic meta) {
    Morphology morphology = new Morphology(new StringReader(""));
    Multiset<String> words = HashMultiset.create();
    for (Tweet tweet : tweets) {
        String tweetText = tweet.getText().toLowerCase();
        List<String> urls = TimelineToMusicService.extractUrls(tweetText);
        for (String url : urls) {
            tweetText = tweetText.replace(url, "");
        }// ww  w  .j ava  2 s  . co m
        List<String> usernames = TimelineToMusicService.extractMentionedUsernames(tweetText);
        for (String username : usernames) {
            tweetText = tweetText.replace(username, "").replace("rt", "");
        }

        String[] wordsInTweet = tweetText.split("[^\\p{L}&&[^']]+");
        for (String word : wordsInTweet) {
            try {
                words.add(morphology.stem(word));
            } catch (Exception ex) {
                words.add(word);
            }
        }
    }
    words.removeAll(stopwords);

    // if a word is mentioned more times than is 4% of the tweets, it's considered a topic
    double topicThreshold = tweets.size() * 4 / 100;
    for (Iterator<String> it = words.iterator(); it.hasNext();) {
        String word = it.next();
        // remove stopwords not in the list (e.g. in a different language).
        // We consider all words less than 4 characters to be stop words
        if (word == null || word.length() < 4) {
            it.remove();
        } else if (words.count(word) < topicThreshold) {
            it.remove();
        }
    }

    meta.setTopKeywords(new HashSet<>(words.elementSet()));

    // the more topics you have, the more variative music
    if (meta.getTopKeywords().size() > 40) {
        return Variation.EXTREMELY_VARIATIVE;
    } else if (meta.getTopKeywords().size() > 30) {
        return Variation.VERY_VARIATIVE;
    } else if (meta.getTopKeywords().size() > 20) {
        return Variation.MOVING;
    } else if (meta.getTopKeywords().size() > 10) {
        return Variation.AVERAGE;
    } else {
        return Variation.MONOTONOUS;
    }
}

From source file:edu.iastate.airl.semtus.parser.Parser.java

License:Open Source License

/**
 * Get morphology base//w w  w.j  a va2s. c  o  m
 *
 * @param thisWord
 *            word
 * @return morphology base
 */
static public String morphology(Word thisWord) {
    try {
        Morphology thisMorphology = new Morphology();
        Word thisBase = thisMorphology.stem(thisWord);
        return thisBase.word();
    } catch (Throwable e) {
        return null;
    }
}

From source file:final_dissertation.Feature_Parsing.java

public static void getTag(String tg, Element elem, Sentences sent, HashMap<String, Noun_Word> noun,
        HashMap<String, String> negation, int ttt) {
    int global_offset = 0;
    NodeList nl = elem.getElementsByTagName(tg);
    int nn = -1, n, aj = 0, ng = 0, cc = 0, vb = 0, tp, flag = 1, temp = -1;
    int len = nl.getLength();
    Node wrd;/*from w ww. java 2s  . c  om*/
    String temp_noun = "", prev = "";
    Morphology mr = new Morphology();

    for (int i = 0; i < len; i++) {
        wrd = nl.item(i);
        Element el = (Element) wrd;
        String vl = el.getAttribute("pos");
        String s = el.getFirstChild().getNodeValue();
        s = s.trim().toLowerCase();
        String s1 = "";
        if ((i + 1) < len) {
            Node wrd1 = nl.item(i + 1);
            Element el1 = (Element) wrd1;
            s1 = el1.getFirstChild().getNodeValue().toLowerCase();
            s1 = mr.stem(s1).trim().toLowerCase();
        }

        if (vl.equals("NN") || vl.equals("NNS") || vl.equals("NNP") || vl.equals("NNPS")) {
            if (s != null) {
                if (s.equals("t")) {
                    num++;
                } else if (!(s.equals("##") || s.equals("p"))) {
                    int ln = s.length();
                    if (ln == 1 || ln == 2) {
                        continue;
                    }
                    nn++;
                    tp = Integer.parseInt(el.getAttribute("wid"));
                    if (vl.equals("NNS") || vl.equals("NNPS")) {
                        s = mr.stem(s);
                    }
                    if (s.equals("pic")) {
                        s = "picture";
                    }
                    temp_noun = s;
                    /*
                       if (temp != -1) 
                       {
                    if (temp + 1 == tp) 
                    {
                        nn--;
                        if (flag == 1) 
                        {
                            flag = 0;
                            Noun_Word w1 = noun.get(sent.nouns[nn]);
                            if (w1.cnt == 1) 
                            {
                                noun.remove(sent.nouns[nn]);
                            } 
                            else 
                            {
                                w1.cnt--;
                                noun.put(sent.nouns[nn], w1);
                            }
                        }
                        global_offset++;        //It is for Noun Phrases and to correct their position.
                        sent.nouns[nn] = sent.nouns[nn] + " " + temp_noun;
                        temp = tp;
                        continue;
                    }
                       }
                    */
                    sent.nouns[nn] = temp_noun;
                    sent.posNoun[nn] = tp - global_offset;
                    sent.reviewID = num;
                    temp = tp;
                    Noun_Word word = noun.get(sent.nouns[nn]);
                    if (word == null) {
                        word = new Noun_Word(sent.nouns[nn], 1);
                    } else {
                        word.increment();
                    }
                    noun.put(sent.nouns[nn], word);
                }
            }
        } else if (vl.equals("JJ") || vl.equals("JJS") || vl.equals("JJR")) {
            flag = 1;
            if (s != null) {
                if (!(s.equals("##") || s.equals("<") || s.equals("="))) {
                    //s = mr.stem(s);
                    int tp1 = Integer.parseInt(el.getAttribute("wid"));
                    sent.adjective[aj] = s;
                    sent.posAdjective[aj] = Integer.toString(tp1 - global_offset);
                    //System.out.println("Adjective: "+sent.adjective[aj]+" Pos"+sent.posAdjective[nn]);
                    aj++;
                }
            }
        } else if (vl.equals("CC")) {
            flag = 1;
            if (s != null) {
                if (!(s.equals("x") || s.equals("+") || s.equals("&")
                        || (s.equals("but") && s1.equals("also")))) {
                    int tp1 = Integer.parseInt(el.getAttribute("wid"));
                    sent.conjunction[cc] = s;
                    sent.posConjunction[cc] = Integer.toString(tp1 - global_offset);
                    //System.out.println("Conjunction: "+sent.conjunction[cc]+" Pos "+sent.posConjunction[cc]);
                    cc++;
                }
            }
        } else if (vl.equals("VB") || vl.equals("VBD") || vl.equals("VBG") || vl.equals("VBN")
                || vl.equals("VBP") || vl.equals("VBZ")) {
            flag = 1;
            if (s != null) {
                s = mr.stem(s);
                int tp1 = Integer.parseInt(el.getAttribute("wid"));
                sent.verbs[vb] = s;
                sent.posVerb[vb] = Integer.toString(tp1 - global_offset);
                //System.out.println("verbs: "+sent.verbs[vb]+" Pos "+sent.posVerb[vb]);
                vb++;
            }
        } else if (s != null) {
            flag = 1;
            s = mr.stem(s);
            if (negation.containsKey(s)) {

                if ((prev.equals("do") || prev.equals("does")) && s.equals("n't")) {
                    int tp1 = Integer.parseInt(el.getAttribute("wid"));
                    sent.negation[ng] = s;
                    sent.posNegation[ng] = tp1 - global_offset;
                    //System.out.println("Negation Words :"+prev+s);
                    ng++;

                } else if (!((s.equals("not") && (s1.equals("only") || s1.equals("just")))
                        || (s.equals("no") && (s1.equals("wonder") || s1.equals("problem"))))) //Excludind not only?, "no problem" ,not just? and no wonder?.
                {
                    int tp1 = Integer.parseInt(el.getAttribute("wid"));
                    sent.negation[ng] = s;
                    sent.posNegation[ng] = tp1 - global_offset;
                    //System.out.println("Negation Words ignore : "+prev+s+s1);
                    ng++;
                }
            }
        }
        prev = s;
    } //End for loop

}