Android Open Source - droidling Personal Activity






From Project

Back to project page droidling.

License

The source code is released under:

Copyright (c) 2012 Keith Trnka Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Softwa...

If you think the Android project droidling listed in this page is inappropriate, such as containing malicious code/tools or violating the copyright, please email info at java2s dot com, thanks.

Java Source Code

package com.github.ktrnka.droidling;
//from   w w w . j  a v a  2s.  c  o  m
import static com.github.ktrnka.droidling.Tokenizer.isNonword;
import static com.github.ktrnka.droidling.Tokenizer.tokenize;

import java.io.BufferedOutputStream;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileOutputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Locale;

import org.achartengine.GraphicalView;
import org.achartengine.chart.BarChart;
import org.achartengine.model.CategorySeries;
import org.achartengine.model.XYMultipleSeriesDataset;
import org.achartengine.renderer.SimpleSeriesRenderer;
import org.achartengine.renderer.XYMultipleSeriesRenderer;
import org.achartengine.renderer.XYMultipleSeriesRenderer.Orientation;
import org.json.JSONException;

import android.content.Context;
import android.content.Intent;
import android.content.SharedPreferences;
import android.database.Cursor;
import android.graphics.Bitmap;
import android.graphics.Bitmap.CompressFormat;
import android.graphics.Color;
import android.graphics.Paint.Align;
import android.net.Uri;
import android.os.Bundle;
import android.os.Environment;
import android.preference.PreferenceManager;
import android.util.DisplayMetrics;
import android.util.Log;
import android.view.LayoutInflater;
import android.view.View;
import android.view.ViewGroup;
import android.widget.AdapterView;
import android.widget.AdapterView.OnItemSelectedListener;
import android.widget.ImageView;
import android.widget.Spinner;
import android.widget.TextView;

import com.fima.cardsui.views.CardUI;
import com.github.ktrnka.droidling.helpers.Util;

public class PersonalActivity extends RefreshableActivity implements OnItemSelectedListener {
    public static final int maxPhrases = 50;
    private boolean scanned = false;

    /**
     * Unigrams from a background corpus for the locale language. May be null if
     * none available.
     */
    private WordDistribution corpusUnigrams;
    private HashSet<String> smallStopwords;
    private HashSet<String> largeStopwords;
    private DateDistribution dates;

    // constants to tweak the scoring of phrases. This is probably
    // language-specific and should be extracted to a config.
    public static final double unigramScale = 0.25;
    public static final double bigramScale = 0.9;
    public static final double trigramScale = 1.2;
    public static final double shortMessageFactor = 1.3;
    public static final double simplePhraseFactor = 1.6;

    private static final String TAG = "PersonalActivity";

    /**
     * the string or near-string stats to display
     */
    private PersonalStats displayStats;

    /**
     * which of the key phrase sortings in displayStats to display
     */
    private int displayPhraseIndex;

    private static final int graphBarBottomColor = Color.rgb(25, 89, 115);
    private static final int graphBarTopColor = Color.rgb(17, 60, 77);

    public static final String MESSAGE_LOOP_KEY = "PersonalActivity: scanning messages";
    public static final String LOAD_UNIGRAMS_KEY = "PersonalActivity: loading unigrams";
    public static final String LOAD_STOPWORDS_KEY = "PersonalActivity: loading stopwords";
    public static final String LOAD_CONTACTS_KEY = "PersonalActivity: loading contacts";
    public static final String SELECT_CANDIDATES_KEY = "PersonalActivity: finding the best candidates";
    public static final String GENERATE_DESCRIPTIONS_KEY = "PersonalActivity: generating descriptions";
    public static final String SAVE_DISPLAY_KEY = "PersonalActivity: caching results";

    public static final String[] PROFILING_KEY_ORDER = {
            LOAD_UNIGRAMS_KEY, LOAD_STOPWORDS_KEY, LOAD_CONTACTS_KEY, MESSAGE_LOOP_KEY,
            SELECT_CANDIDATES_KEY, GENERATE_DESCRIPTIONS_KEY, SAVE_DISPLAY_KEY
    };

    public static final boolean LOG_PHRASES = false;
    private static final String DISPLAY_FILENAME = "PersonalActivity.cache";
    private static final String PROCESSED_SENT_MESSAGES = "PersonalActivity.processedMessages";

    private File logFile;
    private CardUI mCardView;

    @Override
    public void onCreate(Bundle savedInstanceState) {
        super.onCreate(savedInstanceState);
        setHelpActivity(AboutPersonalActivity.class);

        // setContentView(R.layout.simple_scroll);
        // cards UI test
        setContentView(R.layout.cardsui_main);
        mCardView = (CardUI) findViewById(R.id.cardsview);
        mCardView.setSwipeable(false);

        // draw empty list to start?
        mCardView.refresh();

        displayPhraseIndex = 0;
    }

    @Override
    public void onStart() {
        super.onStart();

        // prevent rescanning on tabbing back to the app or something
        if (!scanned)
            refresh(false);
    }

    @Override
    protected void refresh(final boolean forceRefresh) {
        new Thread() {
            @Override
            public void run() {
                setRefreshActionButtonState(true);
                buildPersonalStats(forceRefresh);
                setRefreshActionButtonState(false);
            }
        }.start();
        scanned = true;
    }

    protected void buildPersonalStats(boolean computeFresh) {
        if (computeFresh) {
            scanSMS();
        }
        else {
            try {
                displayStats = new PersonalStats(openFileInput(DISPLAY_FILENAME));
            } catch (IOException e) {
                scanSMS();
            } catch (JSONException e) {
                scanSMS();
            }
        }

        showStats();
    }

    @Override
    public boolean hasNewData() {
        String[] sentColumns = new String[] {
                Sms.BODY, Sms.DATE, Sms.ADDRESS
        };
        Cursor messages = getContentResolver().query(Sms.SENT_URI, sentColumns, null, null, null);
        int numMessages = messages.getCount();
        messages.close();

        SharedPreferences prefs = PreferenceManager
                .getDefaultSharedPreferences(getApplicationContext());
        if (prefs.getInt(PROCESSED_SENT_MESSAGES, 0) != numMessages)
            return true;

        return false;
    }

    /**
     * Get the scaling factor to apply to fonts.
     */
    private float getFontScale() {
        DisplayMetrics metrics = new DisplayMetrics();
        getWindowManager().getDefaultDisplay().getMetrics(metrics);
        return metrics.scaledDensity;
    }

    /**
     * Gets a file like en.unigrams.utf8.txt if it exists in the assets. If not,
     * returns null. TODO: Basically this is mimicking Resources. I need to
     * double-check why I chose to use Assets instead.
     * 
     * @param suffix The suffix to append to the language and/or country code.
     * @return The filename if it exists. Null if not.
     */
    private String getLocalizedAsset(String suffix) {
        try {
            String languageCode2 = Locale.getDefault().getLanguage();
            String filename = languageCode2 + suffix;

            String[] assets = getAssets().list("");
            for (String asset : assets) {
                if (asset.equals(filename)) {
                    return filename;
                }
            }
        } catch (IOException e) {
            Log.e(TAG, "getLocalizedAsset failed to list assets");
        }

        return null;
    }

    private void loadUnigrams() {
        long time = System.currentTimeMillis();

        try {
            String unigramFilename = getLocalizedAsset(".unigrams.utf8.txt");

            if (unigramFilename != null) {
                corpusUnigrams = new WordDistribution(getAssets().open(unigramFilename), false);
            }
            else {
                // There isn't a unigram file for this language.
                // TODO: Build a baseline unigram model from the set of received
                // messages.
                corpusUnigrams = null;
            }
        } catch (IOException e) {
            corpusUnigrams = null;
            Log.e(TAG, "loadUnigrams failed");
        }
        setPreference(LOAD_UNIGRAMS_KEY, System.currentTimeMillis() - time);
    }

    private void loadStopwords() {
        long time = System.currentTimeMillis();
        smallStopwords = new HashSet<String>();

        try {
            String smallStopwordsFile = getLocalizedAsset(".stopwords.small.utf8.txt");

            if (smallStopwordsFile != null) {
                BufferedReader in = new BufferedReader(new InputStreamReader(getAssets().open(
                        smallStopwordsFile)), 8192);
                String line;
                while ((line = in.readLine()) != null) {
                    line = line.trim();
                    if (line.length() > 0)
                        smallStopwords.add(line.toLowerCase(Locale.getDefault()));
                }
                in.close();
            }
        } catch (IOException e) {
            Log.e(TAG, "loadStopwords failed for small file");
        }

        largeStopwords = new HashSet<String>();
        try {
            String largeStopwordsFile = getLocalizedAsset(".stopwords.medium.utf8.txt");

            if (largeStopwordsFile != null) {
                BufferedReader in = new BufferedReader(new InputStreamReader(getAssets().open(
                        largeStopwordsFile)), 8192);
                String line;
                while ((line = in.readLine()) != null) {
                    line = line.trim();
                    if (line.length() > 0)
                        // TODO: This should use the locale closest to the
                        // stopword language. We only have stopwords for English
                        // right now though.
                        largeStopwords.add(line.toLowerCase(Locale.ENGLISH));
                }
                in.close();
            }
        } catch (IOException e) {
            Log.e(TAG, "loadStopwords failed for large file");
        }

        setPreference(LOAD_STOPWORDS_KEY, System.currentTimeMillis() - time);
    }

    public void scanSMS() {
        loadUnigrams();
        loadStopwords();

        displayStats = new PersonalStats();

        // step 1: scan contacts, build a mapping of contact number to name
        long time = System.currentTimeMillis();
        ExtendedApplication app = (ExtendedApplication) getApplication();
        if (!app.blockingLoadContacts()) {
            warning("No contacts found");
        }
        setPreference(LOAD_CONTACTS_KEY, System.currentTimeMillis() - time);

        // step 2: scan sent messages
        time = System.currentTimeMillis();
        String[] sentColumns = new String[] {
                Sms.BODY, Sms.DATE, Sms.ADDRESS
        };
        Cursor messages = getContentResolver().query(Sms.SENT_URI, sentColumns, null, null, null);

        final HashMap<String, int[]> personCounts = new HashMap<String, int[]>();

        CorpusStats sentStats = new CorpusStats();

        // full-message distribution (sort messages only)
        final HashMap<String, int[]> shortMessages = new HashMap<String, int[]>();
        int shortMessageTotal = 0;
        final int maxShortMessageLength = 20;

        // segmented phrases distribution (think hacky chunking)
        final HashMap<String, int[]> simplePhrases = new HashMap<String, int[]>();
        int simplePhraseTotal = 0;

        ArrayList<String> simplePhrase = new ArrayList<String>();

        dates = new DateDistribution();

        // reusable phrase builder
        StringBuilder reusableBuilder = new StringBuilder();
        int numMessages = messages.getCount();

        if (messages.moveToFirst()) {
            final int bodyIndex = messages.getColumnIndexOrThrow(Sms.BODY);
            final int dateIndex = messages.getColumnIndexOrThrow(Sms.DATE);
            final int addressIndex = messages.getColumnIndexOrThrow(Sms.ADDRESS);

            do {
                // TODO: Replace this with truecasing
                String body = messages.getString(bodyIndex).toLowerCase(Locale.getDefault());

                long millis = messages.getLong(dateIndex);
                Date date = new Date(millis);
                dates.add(date);

                // handle the simple message thing
                if (body.length() <= maxShortMessageLength) {
                    String text = body;

                    if (shortMessages.containsKey(text))
                        shortMessages.get(text)[0]++;
                    else
                        shortMessages.put(text, new int[] {
                            1
                        });
                    shortMessageTotal++;
                }

                ArrayList<String> tokens = tokenize(body);

                // clear out the simplePhrase sequence
                simplePhrase.clear();

                sentStats.train(tokens, body.length());

                // update the simple phrases
                for (String token : tokens) {
                    // simple phrases
                    if (!isNonword(token) && !smallStopwords.contains(token)) {
                        // add to the phrase
                        simplePhrase.add(token);
                    }
                    else {
                        if (simplePhrase.size() > 0) {
                            reusableBuilder.setLength(0);
                            reusableBuilder.append(simplePhrase.get(0));
                            for (int i = 1; i < simplePhrase.size(); i++) {
                                reusableBuilder.append(' ');
                                reusableBuilder.append(simplePhrase.get(i));
                            }

                            String phraseString = reusableBuilder.toString();

                            if (simplePhrases.containsKey(phraseString))
                                simplePhrases.get(phraseString)[0]++;
                            else
                                simplePhrases.put(phraseString, new int[] {
                                    1
                                });

                            simplePhraseTotal++;
                        }
                        // flush the phrase
                        simplePhrase.clear();
                    }
                }

                // figure out the name of the destination, store it in person
                String address = messages.getString(addressIndex);

                String displayName = app.lookupContactName(address);
                if (displayName != null) {
                    if (personCounts.containsKey(displayName))
                        personCounts.get(displayName)[0]++;
                    else
                        personCounts.put(displayName, new int[] { 1 });
                }
            } while (messages.moveToNext());
        }
        else {
            messages.close();
            error(getString(R.string.error_no_sent_sms));
            return;
        }
        messages.close();

        setPreference(MESSAGE_LOOP_KEY, System.currentTimeMillis() - time);

        time = System.currentTimeMillis();
        // generate candidates
        final HashMap<String, double[]> candidates = new HashMap<String, double[]>();

        final HashMap<String, int[]> frequencyCandidates = new HashMap<String, int[]>();

        // unigram candidates
        for (String word : sentStats.unigrams.keySet()) {
            if (!isNonword(word))
                frequencyCandidates.put(word, new int[] {
                    sentStats.unigrams.get(word)[0]
                });

            if (!isNonword(word) && !largeStopwords.contains(word)) {
                double expected = 0;
                if (corpusUnigrams != null)
                    expected = corpusUnigrams.expectedFrequency(word, sentStats.unigramTotal);

                candidates.put(
                        word,
                        new double[] {
                            unigramScale * (sentStats.unigrams.get(word)[0] - expected)
                        });
            }
        }

        // analyse bigrams
        StringBuilder ngramBuilder = new StringBuilder();
        for (String word1 : sentStats.bigrams.keySet()) {
            if (isNonword(word1))
                continue;

            for (String word2 : sentStats.bigrams.get(word1).keySet()) {
                if (isNonword(word2))
                    continue;

                // concatenation with StringBuilder for performance
                ngramBuilder.setLength(0);
                ngramBuilder.append(word1);
                ngramBuilder.append(' ');
                ngramBuilder.append(word2);
                String ngram = ngramBuilder.toString();

                frequencyCandidates.put(ngram, new int[] {
                    sentStats.bigrams.get(word1).get(word2)[0]
                });

                if (smallStopwords.contains(word1) || smallStopwords.contains(word2))
                    continue;

                int freq = sentStats.bigrams.get(word1).get(word2)[0];

                double expected = 0;
                if (corpusUnigrams != null)
                    expected = corpusUnigrams
                            .expectedFrequency(word1, word2, sentStats.bigramTotal);

                candidates.put(ngram, new double[] {
                    bigramScale * (freq - expected)
                });
            }
        }

        // analyse trigrams
        for (String word1 : sentStats.trigrams.keySet()) {
            if (isNonword(word1))
                continue;

            for (String word2 : sentStats.trigrams.get(word1).keySet()) {
                if (isNonword(word2))
                    continue;

                for (String word3 : sentStats.trigrams.get(word1).get(word2).keySet()) {
                    if (isNonword(word3))
                        continue;

                    // concatenation with StringBuilder for performance
                    ngramBuilder.setLength(0);
                    ngramBuilder.append(word1);
                    ngramBuilder.append(' ');
                    ngramBuilder.append(word2);
                    ngramBuilder.append(' ');
                    ngramBuilder.append(word3);
                    String ngram = ngramBuilder.toString();

                    frequencyCandidates.put(ngram, new int[] {
                        sentStats.trigrams.get(word1).get(word2).get(word3)[0]
                    });

                    if (smallStopwords.contains(word1) || smallStopwords.contains(word3))
                        continue;

                    int freq = sentStats.trigrams.get(word1).get(word2).get(word3)[0];

                    double expected = 0;
                    if (corpusUnigrams != null)
                        corpusUnigrams.expectedFrequency(word1, word2, word3,
                                sentStats.trigramTotal);

                    candidates.put(ngram, new double[] {
                        trigramScale * (freq - expected)
                    });
                }
            }
        }

        ArrayList<String> basicPhrases = new ArrayList<String>(frequencyCandidates.keySet());
        Collections.sort(basicPhrases, new Comparator<String>() {
            public int compare(String a, String b) {
                return Double.compare(frequencyCandidates.get(b)[0], frequencyCandidates.get(a)[0]);
            }
        });

        int basicCurrent = 0;
        for (String wordPair : basicPhrases) {
            displayStats.keyPhraseTexts[PersonalStats.COUNT_SORTED].append(wordPair);
            displayStats.keyPhraseTexts[PersonalStats.COUNT_SORTED].append('\n');

            if (++basicCurrent >= maxPhrases)
                break;
        }

        // adjust candidates based on phrases, etc
        for (String candidate : candidates.keySet()) {
            if (simplePhrases.containsKey(candidate))
                candidates.get(candidate)[0] *= simplePhraseFactor
                        * Math.log(simplePhrases.get(candidate)[0]);

            if (shortMessages.containsKey(candidate))
                candidates.get(candidate)[0] *= shortMessageFactor
                        * Math.log(shortMessages.get(candidate)[0]);
        }

        // sort candidate pairs
        ArrayList<String> pairs = new ArrayList<String>(candidates.keySet());
        Collections.sort(pairs, new Comparator<String>() {
            public int compare(String a, String b) {
                return Double.compare(candidates.get(b)[0], candidates.get(a)[0]);
            }
        });

        if (LOG_PHRASES) {
            logFile = new File(Environment.getExternalStorageDirectory(), "sms_phrase_log.txt");
            PrintWriter scoresOut;
            try {
                scoresOut = new PrintWriter(new FileWriter(logFile));
                logCandidateFeatures(sentStats, shortMessages, simplePhrases, candidates, scoresOut);
                scoresOut.close();
            } catch (IOException e) {
                Log.e(TAG, "Failed to write to log: " + e);
            }
        }

        mergeSimilarPhrases(sentStats, candidates, pairs);

        // resort candidate pairs
        Collections.sort(pairs, new Comparator<String>() {
            public int compare(String a, String b) {
                return Double.compare(candidates.get(b)[0], candidates.get(a)[0]);
            }
        });

        setPreference(SELECT_CANDIDATES_KEY, System.currentTimeMillis() - time);

        time = System.currentTimeMillis();

        /*********************** BUILD THE STRINGS ************************/

        // KEY PHRASE DISPLAY
        final StringBuilder phraseBuilder = displayStats.keyPhraseTexts[PersonalStats.PHRASE_SORTED];
        int current = 0;
        for (String wordPair : pairs) {
            phraseBuilder.append(wordPair);
            phraseBuilder.append('\n');

            if (++current >= maxPhrases)
                break;
        }

        if (phraseBuilder.length() == 0)
            phraseBuilder.append(getString(R.string.no_phrases));

        // CONTACT DISPLAY
        ArrayList<String> people = new ArrayList<String>(personCounts.keySet());
        Collections.sort(people, new Comparator<String>() {
            public int compare(String a, String b) {
                return personCounts.get(b)[0] - personCounts.get(a)[0];
            }
        });

        for (String person : people) {
            if (personCounts.get(person)[0] <= 1)
                break;

            displayStats.contactsDisplay.append(getString(R.string.num_messages_format, person,
                    personCounts.get(person)[0]));
        }

        if (displayStats.contactsDisplay.length() == 0)
            displayStats.contactsDisplay.append(getString(R.string.no_frequent_contacts));

        // build out the general stats
        displayStats.generalDisplay.append(getString(R.string.num_sent_format, sentStats.messages));
        displayStats.generalDisplay.append(getString(R.string.num_sent_per_month_format,
                dates.computeTextsPerMonth()));

        displayStats.generalDisplay.append(getString(R.string.words_per_text_format,
                sentStats.filteredWords / sentStats.messages));
        displayStats.generalDisplay.append(getString(R.string.chars_per_text_format,
                sentStats.chars / sentStats.messages));
        displayStats.generalDisplay.append(getString(R.string.chars_per_word_format,
                sentStats.filteredWordLength / (double) sentStats.filteredWords));

        // day of the week histogram
        displayStats.dayHistogram = dates.computeDayOfWeekHistogram();

        // time of day histogram
        displayStats.hourHistogram = dates.computeHourHistogram();

        // clean up some of the strings for newlines
        Util.strip(displayStats.contactsDisplay, '\n');
        Util.strip(displayStats.generalDisplay, '\n');
        for (StringBuilder b : displayStats.keyPhraseTexts)
            Util.strip(b, '\n');

        setPreference(GENERATE_DESCRIPTIONS_KEY, System.currentTimeMillis() - time);

        time = System.currentTimeMillis();
        try {
            displayStats.writeTo(openFileOutput(DISPLAY_FILENAME, Context.MODE_PRIVATE));
        } catch (IOException e) {
            Log.e(TAG, "Failed to save displayStats");
            Log.e(TAG, Log.getStackTraceString(e));
        } catch (JSONException e) {
            Log.e(TAG, "Failed to save displayStats");
            Log.e(TAG, Log.getStackTraceString(e));
        }
        setPreference(SAVE_DISPLAY_KEY, System.currentTimeMillis() - time);
        setPreference(PROCESSED_SENT_MESSAGES, numMessages);

        showStats();
    }

    /**
     * show the stats in the UI
     */
    private void showStats() {
        // RUNTIME DISPLAY
        final String runtimeString;
        if (MainActivity.DEVELOPER_MODE)
            runtimeString = MainActivity.summarizeRuntime(getApplicationContext(),
                    PROFILING_KEY_ORDER);
        else
            runtimeString = null;

        runOnUiThread(new Runnable() {
            public void run() {
                mCardView.clearCards();

                String appName = getString(R.string.app_name);
                Context shareContext = PersonalActivity.this;

                mCardView.addCard(new ShareableCard(getString(R.string.key_phrases),
                        displayStats.keyPhraseTexts[displayPhraseIndex].toString(), appName,
                        shareContext));
                mCardView.addCard(new ShareableCard(getString(R.string.contacts),
                        displayStats.contactsDisplay.toString(), appName, shareContext));
                mCardView.addCard(new ShareableCard(getString(R.string.stats),
                        displayStats.generalDisplay.toString(), appName, shareContext));

                GraphicalView dayChart = buildDayChart(PersonalActivity.this,
                        displayStats.dayHistogram);
                mCardView.addCard(new GraphCard(getString(R.string.day_of_week), dayChart, appName,
                        shareContext));

                GraphicalView hourChart = buildHourChart(PersonalActivity.this,
                        displayStats.hourHistogram);
                mCardView.addCard(new GraphCard(getString(R.string.time_of_day), hourChart,
                        appName, shareContext));

                if (runtimeString != null)
                    mCardView.addCard(new ShareableCard(getString(R.string.runtime), runtimeString,
                            appName, shareContext));

                mCardView.refresh();

                if (LOG_PHRASES) {
                    Intent intent = new Intent(android.content.Intent.ACTION_SEND);
                    intent.setFlags(Intent.FLAG_ACTIVITY_NEW_TASK);
                    intent.putExtra(Intent.EXTRA_STREAM,
                            Uri.parse("file://" + logFile.getAbsolutePath()));
                    intent.setType("text/plain");

                    intent.putExtra(Intent.EXTRA_SUBJECT, "Logged phrases");

                    startActivity(Intent.createChooser(intent, "Send email..."));
                }
            }
        });
    }

    private void logCandidateFeatures(CorpusStats sentStats, HashMap<String, int[]> shortMessages,
            HashMap<String, int[]> simplePhrases, HashMap<String, double[]> combinedScores,
            PrintWriter scoresOut) {
        StringBuilder ngramBuilder = new StringBuilder();

        char sep = '\t';
        // header
        scoresOut.println("Phrase\tFrequency\tExpected Frequency\tHas Nonwords\tStopword Start\tStopword End\tSimple Phrase Count\tShort Message Count\tCombined Score");

        // unigrams
        for (String word : sentStats.unigrams.keySet()) {
            scoresOut.print(word);
            scoresOut.print(sep);
            scoresOut.print(sentStats.unigrams.get(word)[0]);
            scoresOut.print(sep);
            scoresOut.print(corpusUnigrams == null ? 0 : corpusUnigrams.expectedFrequency(word,
                    sentStats.unigramTotal));
            scoresOut.print(sep);
            scoresOut.print(isNonword(word) ? 1 : 0);
            scoresOut.print(sep);
            scoresOut.print(largeStopwords.contains(word) ? 1 : 0);
            scoresOut.print(sep);
            scoresOut.print(largeStopwords.contains(word) ? 1 : 0);
            scoresOut.print(sep);
            scoresOut.print(simplePhrases.containsKey(word) ? simplePhrases.get(word)[0] : 0);
            scoresOut.print(sep);
            scoresOut.print(shortMessages.containsKey(word) ? shortMessages.get(word)[0] : 0);
            scoresOut.print(sep);
            scoresOut.print(combinedScores.containsKey(word) ? combinedScores.get(word)[0] : 0);
            scoresOut.println();
        }

        for (String word1 : sentStats.bigrams.keySet()) {
            for (String word2 : sentStats.bigrams.get(word1).keySet()) {
                ngramBuilder.setLength(0);
                ngramBuilder.append(word1);
                ngramBuilder.append(' ');
                ngramBuilder.append(word2);

                scoresOut.print(ngramBuilder);
                scoresOut.print(sep);
                scoresOut.print(sentStats.bigrams.get(word1).get(word2)[0]);
                scoresOut.print(sep);
                scoresOut.print(corpusUnigrams == null ? 0 : corpusUnigrams.expectedFrequency(
                        word1, word2, sentStats.bigramTotal));
                scoresOut.print(sep);
                scoresOut.print(isNonword(word1) || isNonword(word2) ? 1 : 0);
                scoresOut.print(sep);
                scoresOut.print(smallStopwords.contains(word1) ? 1 : 0);
                scoresOut.print(sep);
                scoresOut.print(smallStopwords.contains(word2) ? 1 : 0);
                scoresOut.print(sep);
                scoresOut.print(simplePhrases.containsKey(ngramBuilder.toString()) ? simplePhrases
                        .get(ngramBuilder.toString())[0] : 0);
                scoresOut.print(sep);
                scoresOut.print(shortMessages.containsKey(ngramBuilder.toString()) ? shortMessages
                        .get(ngramBuilder.toString())[0] : 0);
                scoresOut.print(sep);
                scoresOut
                        .print(combinedScores.containsKey(ngramBuilder.toString()) ? combinedScores
                                .get(ngramBuilder.toString())[0] : 0);
                scoresOut.println();
            }
        }

        for (String word1 : sentStats.trigrams.keySet()) {
            for (String word2 : sentStats.trigrams.get(word1).keySet()) {
                for (String word3 : sentStats.trigrams.get(word1).get(word2).keySet()) {
                    ngramBuilder.setLength(0);
                    ngramBuilder.append(word1);
                    ngramBuilder.append(' ');
                    ngramBuilder.append(word2);
                    ngramBuilder.append(' ');
                    ngramBuilder.append(word3);

                    scoresOut.print(ngramBuilder);
                    scoresOut.print(sep);
                    scoresOut.print(sentStats.trigrams.get(word1).get(word2).get(word3)[0]);
                    scoresOut.print(sep);
                    scoresOut.print(corpusUnigrams == null ? 0 : corpusUnigrams.expectedFrequency(
                            word1, word2, word3, sentStats.trigramTotal));
                    scoresOut.print(sep);
                    scoresOut.print(isNonword(word1) || isNonword(word2) || isNonword(word3) ? 1
                            : 0);
                    scoresOut.print(sep);
                    scoresOut.print(smallStopwords.contains(word1) ? 1 : 0);
                    scoresOut.print(sep);
                    scoresOut.print(smallStopwords.contains(word3) ? 1 : 0);
                    scoresOut.print(sep);
                    scoresOut
                            .print(simplePhrases.containsKey(ngramBuilder.toString()) ? simplePhrases
                                    .get(ngramBuilder.toString())[0] : 0);
                    scoresOut.print(sep);
                    scoresOut
                            .print(shortMessages.containsKey(ngramBuilder.toString()) ? shortMessages
                                    .get(ngramBuilder.toString())[0] : 0);
                    scoresOut.print(sep);
                    scoresOut
                            .print(combinedScores.containsKey(ngramBuilder.toString()) ? combinedScores
                                    .get(ngramBuilder.toString())[0] : 0);
                    scoresOut.println();
                }
            }
        }
    }

    private void mergeSimilarPhrases(CorpusStats sentStats, HashMap<String, double[]> candidates,
            ArrayList<String> sortedCandidates) {
        StringBuilder ngramBuilder = new StringBuilder();
        // fold unigrams into bigrams (top K bigrams only)
        for (int i = 0; i < sortedCandidates.size() && i <= maxPhrases * 2; i++) {
            String[] words = sortedCandidates.get(i).split(" ");

            if (words.length == 2) {
                // discount from the first word
                if (candidates.containsKey(words[0])) {
                    double ratio = sentStats.bigrams.get(words[0]).get(words[1])[0]
                            / (double) sentStats.unigrams.get(words[0])[0];
                    double discount = ratio * candidates.get(words[0])[0];

                    candidates.get(words[0])[0] -= discount;
                    candidates.get(sortedCandidates.get(i))[0] += discount;

                    if (Double.isNaN(ratio)
                            || Double.isNaN(candidates.get(sortedCandidates.get(i))[0]))
                        Log.e(TAG, "NaN in " + sortedCandidates.get(i) + " / " + words[0]);
                }

                // discount from the second word
                if (candidates.containsKey(words[1])) {
                    double ratio = sentStats.bigrams.get(words[0]).get(words[1])[0]
                            / (double) sentStats.unigrams.get(words[1])[0];
                    double discount = ratio * candidates.get(words[1])[0];

                    candidates.get(words[1])[0] -= discount;
                    candidates.get(sortedCandidates.get(i))[0] += discount;
                    if (Double.isNaN(ratio)
                            || Double.isNaN(candidates.get(sortedCandidates.get(i))[0]))
                        Log.e(TAG, "NaN in " + sortedCandidates.get(i) + " / " + words[1]);
                }
            }
        }

        // fold bigrams into trigrams (top K trigrams only)
        for (int i = 0; i < sortedCandidates.size() && i <= maxPhrases * 2; i++) {
            String[] words = sortedCandidates.get(i).split(" ");

            if (words.length == 3) {
                // This doesn't look pretty, but it's much faster than normal +
                ngramBuilder.setLength(0);
                ngramBuilder.append(words[0]);
                ngramBuilder.append(' ');
                ngramBuilder.append(words[1]);
                String first = ngramBuilder.toString();

                ngramBuilder.setLength(0);
                ngramBuilder.append(words[1]);
                ngramBuilder.append(' ');
                ngramBuilder.append(words[2]);
                String second = ngramBuilder.toString();

                // discount from the first pair
                if (candidates.containsKey(first)) {
                    double ratio = sentStats.trigrams.get(words[0]).get(words[1]).get(words[2])[0]
                            / (double) sentStats.bigrams.get(words[0]).get(words[1])[0];
                    double discount = ratio * candidates.get(first)[0];

                    candidates.get(first)[0] -= discount;
                    candidates.get(sortedCandidates.get(i))[0] += discount;
                    if (Double.isNaN(ratio)
                            || Double.isNaN(candidates.get(sortedCandidates.get(i))[0]))
                        Log.e(TAG, "NaN in " + sortedCandidates.get(i) + " / " + first);

                }

                // discount from the second word
                if (candidates.containsKey(second)) {
                    double ratio = sentStats.trigrams.get(words[0]).get(words[1]).get(words[2])[0]
                            / (double) sentStats.bigrams.get(words[1]).get(words[2])[0];
                    double discount = ratio * candidates.get(second)[0];

                    candidates.get(second)[0] -= discount;
                    candidates.get(sortedCandidates.get(i))[0] += discount;

                    if (Double.isNaN(ratio)
                            || Double.isNaN(candidates.get(sortedCandidates.get(i))[0]))
                        Log.e(TAG, "NaN in " + sortedCandidates.get(i) + " / " + second);
                }
            }
        }
    }

    /**
     * Inflates a R.layout.phrases with the specified details, using the
     * specified inflater, registers callbacks for the spinner, etc.
     * 
     * @param inflater
     * @param details
     * @return the inflated view
     */
    public View inflatePhraseResults(LayoutInflater inflater, final CharSequence details) {
        View view = inflater.inflate(R.layout.results_phrases, null);

        TextView textView = (TextView) view.findViewById(android.R.id.text2);
        textView.setText(details);

        Spinner spinner = (Spinner) view.findViewById(R.id.spinner1);
        spinner.setOnItemSelectedListener(this);

        ImageView shareView = (ImageView) view.findViewById(R.id.share);
        shareView.setOnClickListener(new View.OnClickListener() {
            public void onClick(View v) {
                String subject = "Shared stats from " + getString(R.string.app_name);
                String text = "Stats: " + getString(R.string.key_phrases) + ":\n" + details;

                Intent sendIntent = new Intent(Intent.ACTION_SEND);
                sendIntent.setType("text/plain");
                sendIntent.putExtra(Intent.EXTRA_TEXT, text);
                sendIntent.putExtra(Intent.EXTRA_SUBJECT, subject);

                startActivity(Intent.createChooser(sendIntent, "Share with..."));
            }
        });

        return view;
    }

    public View inflateChart(LayoutInflater inflater, final CharSequence title,
            final GraphicalView graph) {
        View view = inflater.inflate(R.layout.results_graphed, null);

        // setup the title
        TextView textView = (TextView) view.findViewById(android.R.id.text1);
        textView.setText(title);

        // setup the graph
        ViewGroup container = (ViewGroup) view.findViewById(R.id.graphGroup);

        // TODO: This method for getting height is deprecated
        int screenHeight = getWindowManager().getDefaultDisplay().getHeight();

        container.addView(graph,
                new ViewGroup.LayoutParams(ViewGroup.LayoutParams.FILL_PARENT, screenHeight / 3));

        // setup the sharing action
        ImageView share = (ImageView) view.findViewById(R.id.share);
        share.setOnClickListener(new View.OnClickListener() {
            public void onClick(View v) {
                share(graph.toBitmap(), title, "Shared: histogram of "
                        + title.toString().toLowerCase(Locale.getDefault()));
            }
        });

        return view;
    }

    public void share(Bitmap bitmap, CharSequence title, CharSequence subject) {
        // In the future, I should switch this to getExternalFilesDir
        File file = new File(Environment.getExternalStorageDirectory(), "sms_ling.png");
        try {
            OutputStream out = new BufferedOutputStream(new FileOutputStream(file));
            bitmap.compress(CompressFormat.PNG, 100, out);
            out.close();

            Intent intent = new Intent(android.content.Intent.ACTION_SEND);
            intent.setFlags(Intent.FLAG_ACTIVITY_NEW_TASK);
            intent.putExtra(Intent.EXTRA_STREAM, Uri.parse("file://" + file.getAbsolutePath()));
            intent.setType("image/png");

            intent.putExtra(Intent.EXTRA_SUBJECT, subject);

            startActivity(Intent.createChooser(intent, "Send email..."));
        } catch (IOException e) {
            error("Unable to share image");
        }
    }

    /**
     * Build a chart for day-of-the-week histogram
     * 
     * @param c
     * @param dayHistogram
     * @return The drawable View. Be sure to set the height of it or it won't
     *         show!
     */
    public GraphicalView buildDayChart(Context c, int[] dayHistogram) {
        /****************** BUILD THE DATA SET **********************/
        XYMultipleSeriesDataset dataset = new XYMultipleSeriesDataset();

        CategorySeries series = new CategorySeries("Day");
        for (int i = 0; i < dayHistogram.length; i++) {
            series.add(dayHistogram[i]);
        }
        dataset.addSeries(series.toXYSeries());

        // determine the Y height
        int ymax = 0;
        for (int day : dayHistogram)
            if (day > ymax)
                ymax = day;

        ymax *= 1.05;

        /******************** BUILD THE RENDERER ********************/
        XYMultipleSeriesRenderer renderer = createBaseChartTheme(0, 8, 0, ymax, getFontScale());

        // set the strings and we're good to go!
        renderer.setXTitle("Day");
        renderer.setYTitle("Messages");

        String[] days = new String[] {
                "Sunday", "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday"
        };
        for (int i = 0; i < days.length; i++)
            renderer.addXTextLabel(i + 1, days[i].substring(0, 1));

        final BarChart chart = new BarChart(dataset, renderer, BarChart.Type.DEFAULT);
        GraphicalView view = new GraphicalView(c, chart);

        return view;
    }

    /**
     * Does all the parts of renderer setup that don't depend on the actual
     * datapoints, under the assumption that we're rendering a single data
     * series. The caller should still call addXTextLabel and setXTitle,
     * setYTitle.
     * 
     * @return
     */
    private static XYMultipleSeriesRenderer createBaseChartTheme(int xmin, int xmax, int ymin,
            int ymax, float scale) {
        XYMultipleSeriesRenderer renderer = new XYMultipleSeriesRenderer();

        // text sizes
        renderer.setAxisTitleTextSize(18 * scale);
        renderer.setChartTitleTextSize(20 * scale);
        renderer.setLabelsTextSize(14 * scale);
        renderer.setLegendTextSize(14 * scale);

        // a post on StackOverflow suggests the X/Y axis labels stick to
        // margins, so if they aren't big enough it'll render labels on top of
        // other things
        // http://stackoverflow.com/a/12527041/1492373
        // Margin order is { top, left, bottom, right }
        int[] margins = renderer.getMargins();
        // top margin: This is a total hack; I saw a y-axis label near the top
        // of the axis that had a small amount of the top of the number cutoff
        // and this fixes it.
        margins[0] += 2;
        // left margin: Also a hack. I *think* the left margin needs to be
        // enough for the y-axis labels, plus the y-axis title. But I don't know
        // how to get
        // the pixel width of the y-axis labels (and this point of the code is
        // graph-independent). It'll take some refactoring to cleanse this
        // unholy mess.
        margins[1] = (int) (3 * renderer.getLabelsTextSize());
        renderer.setMargins(margins);

        // data series settings
        SimpleSeriesRenderer r = new SimpleSeriesRenderer();
        r.setColor(Color.DKGRAY);
        r.setDisplayChartValues(false);
        r.setGradientEnabled(true);
        r.setGradientStart(0, graphBarBottomColor); // start = bottom of the bar
        r.setGradientStop(ymax, graphBarTopColor); // this color will be the top
                                                   // of the max height bar
        renderer.addSeriesRenderer(r);

        renderer.setOrientation(Orientation.HORIZONTAL);
        renderer.setBarSpacing(0.2f);

        // colors
        renderer.setAxesColor(Color.DKGRAY);
        renderer.setLabelsColor(Color.DKGRAY);
        renderer.setXLabelsColor(Color.DKGRAY);
        renderer.setYLabelsColor(0, Color.DKGRAY);

        // there's a bug in achartengine that requires you to set the color
        // portion even with full transparency
        renderer.setMarginsColor(Color.argb(0, 1, 1, 1));
        renderer.setBackgroundColor(Color.WHITE);
        renderer.setApplyBackgroundColor(false);

        // size
        renderer.setXAxisMin(xmin);
        renderer.setXAxisMax(xmax);
        renderer.setYAxisMin(ymin);
        renderer.setYAxisMax(ymax);

        renderer.setYLabelsAlign(Align.RIGHT);

        renderer.setShowAxes(true);
        renderer.setShowLabels(true);
        renderer.setShowLegend(false);

        // tick marks + labels - we don't want any for X cause that will be
        // labels
        renderer.setYLabels(4);
        renderer.setXLabels(0);

        // disable interaction
        renderer.setPanEnabled(false, false);
        renderer.setZoomEnabled(false);

        renderer.setInScroll(true);

        return renderer;
    }

    /**
     * Build the chart View for the hour-of-day histogram
     * 
     * @param c
     * @param timeData
     * @return The drawable View. Be sure to set the height of it or it won't
     *         show!
     */
    public GraphicalView buildHourChart(Context c, int[] timeData) {
        /****************** BUILD THE DATA SET **********************/
        XYMultipleSeriesDataset dataset = new XYMultipleSeriesDataset();

        CategorySeries series = new CategorySeries("Day");
        for (int i = 1; i < timeData.length; i++) {
            series.add(timeData[i]);
        }
        dataset.addSeries(series.toXYSeries());

        int first = 0, last = 24;
        for (int i = 1; i < timeData.length; i++) {
            if (first == 0 && timeData[i] > 0)
                first = i;

            if (timeData[i] > 0)
                last = i;
        }

        // determine the Y height
        int ymax = 0;
        for (int day : timeData)
            if (day > ymax)
                ymax = day;

        ymax *= 1.05;

        /******************** BUILD THE RENDERER ********************/
        XYMultipleSeriesRenderer renderer = createBaseChartTheme(first - 1, last + 1, 0, ymax,
                getFontScale());

        // set the strings and we're good to go!
        renderer.setXTitle("Hour");
        renderer.setYTitle("Messages");

        // TODO: These constant labels won't work for everyone
        renderer.addXTextLabel(7.5, "8 AM");
        renderer.addXTextLabel(11.5, "noon");
        renderer.addXTextLabel(17.5, "5 PM");
        renderer.addXTextLabel(21.5, "10 PM");

        /**************** BUILD THE VIEW *********************/
        final BarChart chart = new BarChart(dataset, renderer, BarChart.Type.DEFAULT);
        GraphicalView view = new GraphicalView(c, chart);

        return view;
    }

    /**
     * Handle the spinner for selecting phrases by phraseness or count
     */
    public void onItemSelected(AdapterView<?> parent, View view, int pos, long id) {
        if (pos == displayPhraseIndex)
            return;

        View phrasesView = findViewById(R.id.phrase_layout);
        if (phrasesView != null) {
            if (pos < displayStats.keyPhraseTexts.length) {
                TextView textView = (TextView) phrasesView.findViewById(android.R.id.text2);
                if (textView != null) {
                    textView.setText(displayStats.keyPhraseTexts[pos]);
                }
            }
        }
        else
            Log.d(TAG, "Can't find phrase_layout");

        displayPhraseIndex = pos;
    }

    public void onNothingSelected(AdapterView<?> arg0) {
    }
}




Java Source Code List

com.github.ktrnka.droidling.AboutActivity.java
com.github.ktrnka.droidling.AboutInterpersonalActivity.java
com.github.ktrnka.droidling.AboutLangIDActivity.java
com.github.ktrnka.droidling.AboutPersonalActivity.java
com.github.ktrnka.droidling.CorpusStats.java
com.github.ktrnka.droidling.DateDistribution.java
com.github.ktrnka.droidling.DiagnosticActivity.java
com.github.ktrnka.droidling.ExtendedApplication.java
com.github.ktrnka.droidling.GraphCard.java
com.github.ktrnka.droidling.ImageAdapter.java
com.github.ktrnka.droidling.InterpersonalActivity.java
com.github.ktrnka.droidling.InterpersonalCard.java
com.github.ktrnka.droidling.InterpersonalSingleStats.java
com.github.ktrnka.droidling.InterpersonalStats.java
com.github.ktrnka.droidling.LIDStats.java
com.github.ktrnka.droidling.LanguageIdentificationActivity.java
com.github.ktrnka.droidling.LanguageIdentifier.java
com.github.ktrnka.droidling.MainActivity.java
com.github.ktrnka.droidling.PersonalActivity.java
com.github.ktrnka.droidling.PersonalStats.java
com.github.ktrnka.droidling.RefreshableActivity.java
com.github.ktrnka.droidling.ShareableCard.java
com.github.ktrnka.droidling.Sms.java
com.github.ktrnka.droidling.Tokenizer.java
com.github.ktrnka.droidling.WordDistribution.java
com.github.ktrnka.droidling.helpers.AsyncDrawable.java
com.github.ktrnka.droidling.helpers.BitmapLoaderTask.java
com.github.ktrnka.droidling.helpers.Util.java