Example usage for edu.stanford.nlp.process Morphology Morphology

List of usage examples for edu.stanford.nlp.process Morphology Morphology

Introduction

In this page you can find the example usage for edu.stanford.nlp.process Morphology Morphology.

Prototype

public Morphology(Reader in) 

Source Link

Document

Process morphologically words from a Reader.

Usage

From source file:com.music.service.text.TimelineToMusicService.java

License:Open Source License

private Variation getVariation(List<Tweet> tweets, TimelineMusic meta) {
    Morphology morphology = new Morphology(new StringReader(""));
    Multiset<String> words = HashMultiset.create();
    for (Tweet tweet : tweets) {
        String tweetText = tweet.getText().toLowerCase();
        List<String> urls = TimelineToMusicService.extractUrls(tweetText);
        for (String url : urls) {
            tweetText = tweetText.replace(url, "");
        }/*from  w w w  . jav a 2s.  c  om*/
        List<String> usernames = TimelineToMusicService.extractMentionedUsernames(tweetText);
        for (String username : usernames) {
            tweetText = tweetText.replace(username, "").replace("rt", "");
        }

        String[] wordsInTweet = tweetText.split("[^\\p{L}&&[^']]+");
        for (String word : wordsInTweet) {
            try {
                words.add(morphology.stem(word));
            } catch (Exception ex) {
                words.add(word);
            }
        }
    }
    words.removeAll(stopwords);

    // if a word is mentioned more times than is 4% of the tweets, it's considered a topic
    double topicThreshold = tweets.size() * 4 / 100;
    for (Iterator<String> it = words.iterator(); it.hasNext();) {
        String word = it.next();
        // remove stopwords not in the list (e.g. in a different language).
        // We consider all words less than 4 characters to be stop words
        if (word == null || word.length() < 4) {
            it.remove();
        } else if (words.count(word) < topicThreshold) {
            it.remove();
        }
    }

    meta.setTopKeywords(new HashSet<>(words.elementSet()));

    // the more topics you have, the more variative music
    if (meta.getTopKeywords().size() > 40) {
        return Variation.EXTREMELY_VARIATIVE;
    } else if (meta.getTopKeywords().size() > 30) {
        return Variation.VERY_VARIATIVE;
    } else if (meta.getTopKeywords().size() > 20) {
        return Variation.MOVING;
    } else if (meta.getTopKeywords().size() > 10) {
        return Variation.AVERAGE;
    } else {
        return Variation.MONOTONOUS;
    }
}