edu.stanford.muse.email.MboxEmailStore.java Source code

Java tutorial

Introduction

Here is the source code for edu.stanford.muse.email.MboxEmailStore.java

Source

/*
 Copyright (C) 2012 The Stanford MobiSocial Laboratory
    
   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at
    
   http://www.apache.org/licenses/LICENSE-2.0
    
   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.
*/
package edu.stanford.muse.email;

import edu.stanford.muse.util.Pair;
import edu.stanford.muse.util.Util;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

import javax.mail.*;
import java.io.*;
import java.util.*;

/** email store in mbox format. caches message counts in folders.
 */
public class MboxEmailStore extends EmailStore implements Serializable {
    private final static long serialVersionUID = 1L;

    private static Log log = LogFactory.getLog(MboxEmailStore.class);

    private static final String CACHE_FILENAME = ".muse.dir";

    private static final Properties mstorProps;
    private FolderCache folderCache;
    private String rootPath;
    private String accountKey;

    static {
        //http://sourceforge.net/projects/mstor/files/mstor/0.9.13/
        mstorProps = new Properties();
        mstorProps.put("mstor.mbox.metadataStrategy", "NONE");
        //the following two properties are actually different.
        //while cache buffers option enables/disables the caching of the message content to optimise the message content retieval
        //cache.disabled option controls what kind of CacheAdapter is used by Mstor; CacheAdapter (dummy) or EhCacheAdapter
        mstorProps.put("mstor.mbox.cacheBuffers", "disabled");
        mstorProps.put("mstor.cache.disabled", "true");
        // http://code.google.com/p/coucou/source/browse/src/main/resources/mstor.properties?spec=svn9e5ed7be0c8e39027c72a220f745d87b944be826&r=9e5ed7be0c8e39027c72a220f745d87b944be826
        //relaxed parsing uses a relaxed pattern that is adapted to FoxMail export
        //mstorProps.put("mstor.mbox.parsing.relaxed", "true");
        mstorProps.put("mstor.mbox.encoding", "UTF-8"); // note: we are assuming utf-8 because that's what gmail+thunderbird will save as
        // consider adding mstor.cache.disabled true, then won't need ehcache etc
        // see: https://sourceforge.net/tracker/?func=detail&aid=2791167&group_id=114229&atid=667640
    }

    // constructor for de-serialization
    private MboxEmailStore() {
    }

    public MboxEmailStore(String accountKey, String name, String path) throws IOException {
        super(name, "" /* no automatic email address for mbox files */);
        this.accountKey = accountKey;
        this.rootPath = new File(path).getCanonicalPath(); // get canonical because the incoming path may be convoluted, like <tbird profiledir>/../../... etc
    }

    public String getRootPath() {
        return rootPath;
    }

    public Store connect() throws MessagingException {
        // Get a Session object
        // can customize javamail properties here if needed, see e.g. http://java.sun.com/products/javamail/javadocs/com/sun/mail/imap/package-summary.html

        Session session = Session.getInstance(mstorProps, null);
        session.setDebug(DEBUG);

        // Get a Store object
        Store store = session.getStore(new URLName("mstor:" + Util.devNullPath())); // although "store" is irrelevant with mbox, connect/close may still be attempted on it. thus, use /dev/null rather than leaving it at / or unspecified path (which may trigger file io error).
        store.connect();
        return store;
    }

    private void collect_mbox_folders(List<FolderInfo> list, File f) throws MessagingException {
        if (!f.exists())
            return;
        boolean noChildren = !f.isDirectory();

        if (noChildren) {
            FolderInfo cachedFolderInfo = folderCache.lookup(f.getPath());
            if (cachedFolderInfo != null) {
                if (cachedFolderInfo.messageCount > 0)
                    list.add(cachedFolderInfo);
                log.info(Util.blurPath(f.getPath()) + " is in the folder cache, message count: "
                        + cachedFolderInfo.messageCount);
            } else {
                try {
                    // read the file
                    String path = f.getPath();
                    if (path.endsWith(".msf"))
                        return; // explicitly ignore msf files, sometimes it seems to read an actual number of messages even from msf files

                    folderBeingScanned = f.getPath();
                    //               folderBeingScannedShortName = Util.stripFrom(f.getPath(), rootPath + File.separatorChar);
                    int idx = folderBeingScanned.lastIndexOf(File.separatorChar);
                    if (idx >= 0 && (idx + 1) < folderBeingScanned.length())
                        folderBeingScannedShortName = folderBeingScanned.substring(idx + 1);
                    else
                        folderBeingScannedShortName = folderBeingScanned;

                    Pair<Folder, Integer> pair = openFolder(null, f.getPath());
                    int count = pair.getSecond();
                    if (count == 1) { // many files are wrongly considered mbox with count 1. Ignore them if they also have a suffix that is known to cause noise. we're being cautious and ignoring these files only if they are noisy
                        if (path.endsWith(".plist"))
                            return;
                        if (path.endsWith(".lock"))
                            return;
                        if (path.endsWith(".DS_Store")) // explicitly ignore .DS_store and ._.DS_Store files, annoying Mac OS binary files that get read as mbox by our parser
                            return;
                        log.info("Ignoring file " + path
                                + " because it has only 1 message and its name matches a suffix that indicates it's likely not an mbox file.");
                    }
                    Folder f1 = pair.getFirst();
                    boolean validFolder = count > 0 && f1 != null;
                    // we'll cache the folder info even if its not a valid folder.
                    // this ensures we don't have to scan invalid folders again
                    //   if (validFolder)
                    {
                        if (f1 != null)
                            f1.close(false);
                        // put the info in the cache
                        FolderInfo fi = new FolderInfo(null, folderBeingScanned, folderBeingScannedShortName,
                                count);
                        folderCache.put(f.getPath(), fi);
                        if (validFolder)
                            list.add(fi);
                        folderBeingScanned = null;
                    }
                } catch (Exception e) {
                    log.error("Exception trying to read file " + f + ": " + e);
                }
                //         store.close();
            }
        } else {
            File filesInDir[] = f.listFiles();
            if (filesInDir != null) // somehow this can be null when run on /tmp (maybe due to soft links etc).
                for (File child : filesInDir)
                    collect_mbox_folders(list, child);
        }
    }

    public Folder openFolderWithoutCount(Store s, String fname) throws MessagingException {
        if (fname == null)
            fname = "INBOX";

        // ignore the store coming in, we need a new session and store
        // for each folder

        Session session = Session.getInstance(mstorProps, null);
        session.setDebug(DEBUG);

        // Get a Store object
        Store store = session.getStore(new URLName("mstor:" + fname));
        store.connect();

        //This is the root folder in the namespace provided
        //see http://docs.oracle.com/javaee/5/api/javax/mail/Store.html#getDefaultFolder%28%29
        Folder folder = store.getDefaultFolder();
        if (folder == null)
            throw new RuntimeException("Invalid folder: " + fname);

        log.info("Opening folder " + Util.blurPath(fname) + " in r/o mode...");
        try {
            folder.open(Folder.READ_ONLY);
        } catch (MessagingException me) {
            folder = null;
        }

        return folder;
    }

    @Override
    /** we need the Store s parameter from superclass, but it is not used in the folder store
     */
    public Pair<Folder, Integer> openFolder(Store s, String fname) throws MessagingException {
        Folder folder = openFolderWithoutCount(s, fname);
        int count = 0;
        if (folder != null)
            count = folder.getMessageCount(); // warning, do not close, we need to return an open folder

        return new Pair<>(folder, count);
    }

    @Override
    public void computeFoldersAndCounts(String foldersAndCountsDir) throws MessagingException {
        doneReadingFolderCounts = false;

        new File(foldersAndCountsDir).mkdirs(); // ensure cacheDir exists

        // convert the root path to a filename
        String cacheFilePath = foldersAndCountsDir + File.separatorChar + CACHE_FILENAME + "."
                + rootPath.replaceAll("/", "--").replaceAll("\\\\", "--");
        this.folderCache = new FolderCache(cacheFilePath);
        this.folderInfos = new ArrayList<FolderInfo>();
        collect_mbox_folders(folderInfos, new File(rootPath));
        doneReadingFolderCounts = true;
        folderBeingScanned = "";

        folderCache.save();
        //      try { store.close(); } catch (Exception e) { System.err.println ("Exception in closing folder: " + e); }
    }

    @Override
    public String getAccountID() {
        return (Util.nullOrEmpty(accountKey) ? "mbox" : accountKey); // always return mbox because the "folder" part of the cache file name will reflect the full path
    }

    public static void main(String args[]) throws MessagingException, IOException {
        LineNumberReader lnr = new LineNumberReader(new BufferedReader(
                new InputStreamReader(new FileInputStream("/Users/hangal/Local Folders/gmail-sent"))));
        int count = 0;
        while (true) {
            String line = lnr.readLine();
            if (line == null)
                break;
            if (line.startsWith("From "))
                count++;
        }
        System.out.println(count + " messages found");
    }

    public String toString() {
        return "Folder store with rootpath = \"" + rootPath + "\"";
    }

    /** little class to stash away folder info, so we don't have to parse them again */
    class FolderCache implements Serializable {
        private final static long serialVersionUID = 1L;

        //----------------------- folder cache ops -----------------------
        // Notes: we are never deleting cache entries. so they will live on for mbox files that may have been deleted.
        // doesn't really matter.
        // in case memory becomes a problem, we may consider not caching folders with 0 messages (right now we cache those too)
        public Map<String, FolderInfo> cacheMap = new LinkedHashMap<String, FolderInfo>();
        private String cacheFile;

        FolderCache(String path) {
            cacheFile = path;
            load();
        }

        @SuppressWarnings("unchecked")
        private void load() {
            File f = new File(cacheFile);
            if (!f.exists()) {
                log.info("folder cache does not exist: " + cacheFile);
                return;
            }

            try {
                log.info("Reading folder cache from " + cacheFile);
                ObjectInputStream oos = new ObjectInputStream(new FileInputStream(cacheFile));
                cacheMap = (Map<String, FolderInfo>) oos.readObject();
                oos.close();
            } catch (Exception e) {
                log.warn("Error trying to read cache file: " + e);
                e.printStackTrace(System.err);
            }
        }

        // returns the folderinfo if its present in the cache, and the info is current
        // null if info not available (not present or outdated)
        FolderInfo lookup(String file) {
            FolderInfo fi = cacheMap.get(file);
            if (fi == null)
                return null;

            // if file has been modified since we read this, return null.
            // no need to invalidate, because this entry will soon get overwritten anyway
            File f = new File(file);
            if (!f.exists() || f.lastModified() > fi.timestamp)
                return null;
            return fi;
        }

        void put(String file, FolderInfo fi) {
            cacheMap.put(file, fi);
        }

        void save() {
            log.info("Saving folder cache to " + cacheFile);

            try {
                ObjectOutputStream oos = new ObjectOutputStream(new FileOutputStream(cacheFile));
                oos.writeObject(cacheMap);
                oos.close();
            } catch (IOException ioe) {
                log.warn("Error trying to write cache file: " + ioe);
                ioe.printStackTrace(System.err);
            }
        }
    }
}