get Syllable Count From Line - Java java.lang

Java examples for java.lang:char

Description

get Syllable Count From Line

Demo Code


import java.util.ArrayList;
import java.util.List;

public class Main{
    public static void main(String[] argv) throws Exception{
        List line = java.util.Arrays.asList("asdf","java2s.com");
        System.out.println(getSyllableCountFromLine(line));
    }/*from w w  w. j  av  a 2 s  .c  om*/
    /**
     *
     *
     * @param line List of words
     * @return
     */
    public static Integer getSyllableCountFromLine(List<String> line) {
        Integer totalSyllables = 0;
        for (String word : line) {
            totalSyllables += getSyllableCountFromWord(word);
        }
        return totalSyllables;
    }
    public static Integer getSyllableCountFromLine(String line) {
        Integer totalSyllables = 0;
        if (line == null || line.length() == 0) {
            return 0;
        }
        String[] words = line.split(" ");
        for (String word : words) {
            totalSyllables += getSyllableCountFromWord(word);
        }
        return totalSyllables;
    }
    /**
     * The syllable count of the word is just the number of items returned from the
     * function that parses the word into syllables. It's important to use that
     * function, because that's used by the abc renderer and we always want it
     * to come up with the exact same result as this function, which is used by
     * the rhythm module.
     *
     * @param word
     * @return
     */
    public static Integer getSyllableCountFromWord(String word) {
        return getSyllablesFromWord(word).size();
    }
    public static List<String> getSyllablesFromWord(String word) {
        word = word.toLowerCase();
        List<String> syllables = new ArrayList<String>();
        int marker = 0;
        while (true) {
            List<String> clusters = getClustersFromWord(word
                    .substring(marker));
            String currentSyllable = "";
            // corner cases with the ends of words
            if (clusters.size() == 1 && syllables.size() == 0) {
                // this word is just consonants. Not a real word. Return the word to avoid an exception
                syllables.add(clusters.get(0));
                break;

            } else if (clusters.size() == 2 && clusters.get(1).equals("e")
                    && syllables.size() == 0) {
                // e.g. the word "the"
                syllables.add(clusters.get(0) + clusters.get(1));
                break;

            } else if (clusters.size() == 1) {
                // just a few consonants at the end of the word. Slap this onto the preceding syllable and end
                String lastSyllable = syllables
                        .remove(syllables.size() - 1);
                lastSyllable = lastSyllable + clusters.get(0);
                syllables.add(lastSyllable);
                break;

            } else if (clusters.size() == 2 && clusters.get(1).equals("e")) {
                // perhaps a few consonants and an e (presumably silent) at the end. Slap all this onto the preceding vowel
                String lastSyllable = syllables
                        .remove(syllables.size() - 1);
                if (clusters.get(0) != null) {
                    lastSyllable = lastSyllable + clusters.get(0);
                }
                lastSyllable = lastSyllable + clusters.get(1);
                syllables.add(lastSyllable);
                break;
            }

            /**
             * Enough with the corner cases. From here down we build a legit syllable
             */
            if (clusters.get(0) != null) {
                // the opening consonant cluster always makes it in this syllable
                currentSyllable = currentSyllable + clusters.get(0);
            }

            if (isMultisyllableVowelCluster(clusters.get(1))) {
                // if the vowel cluster has multiple syllables, we break it up with the
                // over-simplified rule that the break after the first vowel
                currentSyllable = currentSyllable
                        + clusters.get(1).substring(0, 1);
            } else {
                // if it's not a multisyllable vowel cluster then we add the whole
                // cluster, and maybe some consonants
                currentSyllable = currentSyllable + clusters.get(1);
                if (clusters.size() > 2 && clusters.get(2) != null
                        && clusters.get(2).length() > 1) {
                    // if the next consonant cluster has multiple consonants, we break it
                    // up by breaking after the first consonant.
                    currentSyllable = currentSyllable
                            + clusters.get(2).substring(0, 1);
                }
            }
            syllables.add(currentSyllable);
            marker += currentSyllable.length();
        }
        return syllables;
    }
    /**
     * Groups the word into alternating vowel and consonant clusters.
     *
     * Note that the even-indexed clusters are always consonants, and the odd-indexed clusters
     * are always vowels. This means that if the word starts with a vowel, the first cluster is
     * null
     *
     * @param word
     *            In practice this is often a fragment of a word, not the complete word
     * @return
     */
    static List<String> getClustersFromWord(String word) {
        List<String> clusters = new ArrayList<String>();
        StringBuffer currentCluster = new StringBuffer();
        boolean buildingConsonantCluster = true;
        for (int i = 0; i < word.length(); i++) {
            char c = word.charAt(i);
            if (i == 0 && isVowel(c)) {
                // corner case: we start with a vowel
                clusters.add(null); // the first cluster is null if we start with a vowel
                currentCluster.append(c);

            } else if (buildingConsonantCluster && isVowel(c)) {
                // we just finished a consonant cluster
                clusters.add(currentCluster.toString());
                currentCluster.delete(0, currentCluster.length());
                currentCluster.append(c);
                buildingConsonantCluster = false;

            } else if (!buildingConsonantCluster && isConsonant(c)) {
                // we just finished a vowel cluster
                clusters.add(currentCluster.toString());
                currentCluster.delete(0, currentCluster.length());
                currentCluster.append(c);
                buildingConsonantCluster = true;

            } else if (buildingConsonantCluster && isConsonant(c)) {
                // still building a consonant cluster
                currentCluster.append(c);

            } else if (!buildingConsonantCluster && isVowel(c)) {
                // still building vowel cluster
                currentCluster.append(c);

            } else {
                throw new RuntimeException(
                        "I missed a case in the clustering code");
            }

        }
        clusters.add(currentCluster.toString());
        return clusters;
    }
    /**
     *
     * This function assumes it's being passed a vowel cluster
     * and return true iff the cluster typically has more than one
     * syllable. This is impossible to get perfect with such simple
     * code i.e. the "io" in caution vs. cation.
     *
     * @param cluster
     * @return
     */
    static boolean isMultisyllableVowelCluster(String cluster) {
        if (cluster.length() > 2) {
            return true;
        } else if (cluster.equals("eo") || cluster.equals("ia")
                || cluster.equals("io") || cluster.equals("ua")
                || cluster.equals("uo")) {
            return true;
        }
        return false;
    }
}

Related Tutorials