org.columba.mail.folder.search.LuceneQueryEngine.java Source code

Java tutorial

Introduction

Here is the source code for org.columba.mail.folder.search.LuceneQueryEngine.java

Source

//The contents of this file are subject to the Mozilla Public License Version 1.1
//(the "License"); you may not use this file except in compliance with the
//License. You may obtain a copy of the License at http://www.mozilla.org/MPL/
//
//Software distributed under the License is distributed on an "AS IS" basis,
//WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
//for the specific language governing rights and
//limitations under the License.
//
//The Original Code is "The Columba Project"
//
//The Initial Developers of the Original Code are Frederik Dietz and Timo Stich.
//Portions created by Frederik Dietz and Timo Stich are Copyright (C) 2003.
//
//All Rights Reserved.
package org.columba.mail.folder.search;

import java.io.File;
import java.io.IOException;
import java.io.StringReader;
import java.util.Arrays;
import java.util.LinkedList;
import java.util.List;
import java.util.ListIterator;
import java.util.logging.Logger;

import javax.swing.JOptionPane;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.WildcardQuery;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.RAMDirectory;
import org.columba.api.command.IStatusObservable;
import org.columba.core.base.ListTools;
import org.columba.core.filter.FilterCriteria;
import org.columba.core.filter.FilterRule;
import org.columba.core.filter.IFilterCriteria;
import org.columba.core.filter.IFilterRule;
import org.columba.core.io.DiskIO;
import org.columba.core.io.StreamUtils;
import org.columba.mail.folder.IMailbox;
import org.columba.mail.folder.event.IFolderEvent;
import org.columba.mail.message.ICloseableIterator;
import org.columba.mail.message.IHeaderList;
import org.columba.mail.util.MailResourceLoader;
import org.columba.ristretto.message.MimePart;
import org.columba.ristretto.message.MimeTree;

/**
 * @author timo
 */
public class LuceneQueryEngine implements QueryEngine {

    /** JDK 1.4+ logging framework logger, used for logging. */
    private static final Logger LOG = Logger.getLogger("org.columba.mail.folder.search");

    private static final int OPTIMIZE_AFTER_N_OPERATIONS = 30;

    private static final String[] CAPS = { "Body" };

    private File indexDir;

    private IndexReader fileIndexReader;

    private IndexReader ramIndexReader;

    private Directory luceneIndexDir;

    private Directory ramIndexDir;

    private long ramLastModified;

    private long luceneLastModified;

    private LinkedList deleted;

    private int operationCounter;

    private Analyzer analyzer;

    private IMailbox folder;

    /**
     * Constructor for LuceneQueryEngine.
     */
    public LuceneQueryEngine(IMailbox folder) {
        this.folder = folder;

        analyzer = new StandardAnalyzer();

        try {
            initRAMDir();
        } catch (IOException e) {
            e.printStackTrace();
        }

        luceneLastModified = -1;
        ramLastModified = -1;

        deleted = new LinkedList();
        operationCounter = 0;

        File folderInDir = folder.getDirectoryFile();
        indexDir = new File(folderInDir, ".index");

        try {
            if (!indexDir.exists()) {
                createIndex();
            }

            luceneIndexDir = FSDirectory.getDirectory(indexDir, false);
        } catch (IOException e) {
            JOptionPane.showMessageDialog(null, e.getLocalizedMessage(), "Error while creating Lucene Index",
                    JOptionPane.ERROR_MESSAGE);
        }

        try {
            // If there is an existing lock then it must be from a
            // previous crash -> remove it!
            if (IndexReader.isLocked(luceneIndexDir)) {
                IndexReader.unlock(luceneIndexDir);
            }
        } catch (IOException e) {
            // Remove of lock didn't work -> delete by hand
            File commitLock = new File(indexDir, "commit.lock");

            if (commitLock.exists()) {
                commitLock.delete();
            }

            File writeLock = new File(indexDir, "write.lock");

            if (writeLock.exists()) {
                writeLock.delete();
            }
        }

        // Check if index is consitent with mailbox
        try {
            if (getFileReader().numDocs() != folder.getHeaderList().count()) {
                LOG.warning("Lucene Index includes " + getFileReader().numDocs() + " messages, but mailbox has "
                        + folder.getHeaderList().count());
                sync();
            }
        } catch (Exception e) {
            LOG.severe(e.getMessage());
            e.printStackTrace();
        }
    }

    protected void createIndex() throws IOException {
        DiskIO.ensureDirectory(indexDir);

        IndexWriter indexWriter = new IndexWriter(indexDir, null, true);
        indexWriter.close();
    }

    protected IndexReader getFileReader() {
        try {
            // @TODO dont use deprecated method
            if (IndexReader.getCurrentVersion(luceneIndexDir) != luceneLastModified) {
                fileIndexReader = IndexReader.open(luceneIndexDir);
                // @TODO dont use deprecated method
                luceneLastModified = IndexReader.getCurrentVersion(luceneIndexDir);
            }
        } catch (IOException e) {
            LOG.severe(e.getLocalizedMessage());
            try {
                reset();
            } catch (Exception e1) {
                LOG.severe(e.getLocalizedMessage());
            }
        }

        return fileIndexReader;
    }

    protected IndexReader getRAMReader() {
        try {
            if (IndexReader.getCurrentVersion(ramIndexDir) != ramLastModified) {
                ramIndexReader = IndexReader.open(ramIndexDir);
                ramLastModified = IndexReader.getCurrentVersion(ramIndexDir);
            }
        } catch (IOException e) {
            e.printStackTrace();
        }

        return ramIndexReader;
    }

    private Query getLuceneQuery(IFilterRule filterRule, Analyzer analyzer) {
        IFilterCriteria criteria;
        String field;
        int mode;

        Query result = new BooleanQuery();
        Query subresult = null;

        int condition = filterRule.getConditionInt();
        boolean prohibited;
        boolean required;

        if (condition == FilterRule.MATCH_ALL) {
            prohibited = false;
            required = true;
        } else {
            prohibited = false;
            required = false;
        }

        BooleanQuery termQuery = null;

        for (int i = 0; i < filterRule.count(); i++) {
            criteria = filterRule.get(i);
            mode = criteria.getCriteria();

            field = "Body";

            TokenStream tokenStream = analyzer.tokenStream(field, new StringReader(criteria.getPatternString()));

            termQuery = new BooleanQuery();

            try {
                Token token = tokenStream.next();

                while (token != null) {
                    String pattern = "*" + token.termText() + "*";
                    LOG.info("Field = \"" + field + "\" Text = \"" + pattern + "\"");
                    termQuery.add(new WildcardQuery(new Term(field, pattern)), true, false);

                    token = tokenStream.next();
                }
            } catch (IOException e) {
                e.printStackTrace();
            }

            switch (mode) {
            case FilterCriteria.CONTAINS: {
                subresult = new BooleanQuery();
                ((BooleanQuery) subresult).add(termQuery, true, false);

                break;
            }

            case FilterCriteria.CONTAINS_NOT: {
                subresult = new BooleanQuery();
                ((BooleanQuery) subresult).add(new WildcardQuery(new Term("uid", "*")), true, false);
                ((BooleanQuery) subresult).add(termQuery, false, true);

                break;
            }
            }

            ((BooleanQuery) result).add(subresult, required, prohibited);
        }

        return result;
    }

    public List queryEngine(IFilterRule filter) throws Exception {
        Query query = getLuceneQuery(filter, analyzer);

        List result = search(query);

        ListTools.substract(result, deleted);

        if (!checkResult(result)) {
            // Search again
            result = search(query);
            ListTools.substract(result, deleted);
        }

        return result;
    }

    protected List search(Query query) throws IOException {
        LinkedList result = new LinkedList();

        if (getFileReader().numDocs() > 0) {
            Hits hitsFile = new IndexSearcher(getFileReader()).search(query);

            for (int i = 0; i < hitsFile.length(); i++) {
                result.add(new Integer(hitsFile.doc(i).getField("uid").stringValue()));
            }
        }

        if (getRAMReader().numDocs() > 0) {
            Hits hitsRAM = new IndexSearcher(getRAMReader()).search(query);

            for (int i = 0; i < hitsRAM.length(); i++) {
                result.add(new Integer(hitsRAM.doc(i).getField("uid").stringValue()));
            }
        }

        return result;
    }

    public List queryEngine(IFilterRule filter, Object[] uids) throws Exception {
        List result = queryEngine(filter);

        ListTools.intersect(result, Arrays.asList(uids));

        return result;
    }

    /**
     * @see org.columba.mail.folder.SearchEngineInterface#messageAdded(IFolderEvent)
     */
    public void messageAdded(Object uid) throws Exception {
        Document messageDoc = getDocument(uid);

        IndexWriter writer = new IndexWriter(ramIndexDir, analyzer, false);
        writer.addDocument(messageDoc);
        writer.close();
        incOperationCounter();
    }

    private Document getDocument(Object uid) {
        Document messageDoc = new Document();

        messageDoc.add(Field.Keyword("uid", uid.toString()));

        // Find the body text part
        try {
            MimeTree mimeTree = folder.getMimePartTree(uid);
            MimePart bodyPart = mimeTree.getFirstTextPart("plain");

            if (bodyPart != null) {
                messageDoc.add(Field.UnStored("Body", StreamUtils
                        .readCharacterStream(folder.getMimePartBodyStream(uid, bodyPart.getAddress())).toString()));
            }
        } catch (IOException e) {
            e.printStackTrace();
            LOG.severe(e.getMessage());
        } catch (Exception e) {
            e.printStackTrace();
            LOG.severe(e.getMessage());
        }

        return messageDoc;
    }

    /**
     * @see org.columba.mail.folder.SearchEngineInterface#messageRemoved(IFolderEvent)
     */
    public void messageRemoved(Object uid) throws Exception {
        deleted.add(uid);

        /*
         * try { indexLock.tryToGetLock(null); getReader().delete(new
         * Term("uid", uid.toString())); indexLock.release(); } catch
         * (IOException e) { JOptionPane.showMessageDialog( null,
         * e.getMessage(), "Error while removing Message from Lucene Index",
         * JOptionPane.ERROR_MESSAGE); }
         */
    }

    protected void mergeRAMtoIndex() throws IOException {
        IndexReader ramReader = getRAMReader();
        IndexReader fileReader = getFileReader();

        LOG.fine("Lucene: Merging RAMIndex to FileIndex");

        /*
         * Document doc; for( int i=0; i<ramReader.numDocs(); i++) { doc =
         * ramReader.document(i); if( !deleted.contains(new
         * Integer(ramReader.document(i).getField("uid").stringValue())) ) {
         * fileIndex.addDocument(doc); } }
         */
        ListIterator it = deleted.listIterator();

        while (it.hasNext()) {
            String uid = it.next().toString();

            if (ramReader.delete(new Term("uid", uid)) == 0) {
                fileReader.delete(new Term("uid", uid));
            }
        }

        fileReader.close();
        ramReader.close();

        IndexWriter fileIndex = new IndexWriter(luceneIndexDir, analyzer, false);

        fileIndex.addIndexes(new Directory[] { ramIndexDir });

        fileIndex.optimize();
        fileIndex.close();

        initRAMDir();

        deleted.clear();
    }

    private void initRAMDir() throws IOException {
        ramIndexDir = new RAMDirectory();

        IndexWriter writer = new IndexWriter(ramIndexDir, analyzer, true);
        writer.close();
        ramLastModified = -1;
    }

    private void incOperationCounter() throws IOException {
        operationCounter++;

        if (operationCounter > OPTIMIZE_AFTER_N_OPERATIONS) {
            mergeRAMtoIndex();
            operationCounter = 0;
        }
    }

    /**
     * Returns the caps.
     * 
     * @return String[]
     */
    public String[] getCaps() {
        return CAPS;
    }

    private boolean checkResult(List result) {
        ListIterator it = result.listIterator();

        try {
            while (it.hasNext()) {
                if (!folder.exists(it.next())) {
                    result.clear();
                    sync();

                    return false;
                }
            }
        } catch (Exception e) {
            e.printStackTrace();
        }

        return true;
    }

    /**
     * @see org.columba.mail.folder.DefaultSearchEngine#reset()
     */
    public void reset() throws Exception {
        createIndex();
    }

    /** {@inheritDoc} */
    public void sync() throws Exception {
        LOG.severe("Lucene Index inconsistent - recreation forced");
        IHeaderList hl = folder.getHeaderList();

        if (getObservable() != null) {
            getObservable().setMessage(MailResourceLoader.getString("statusbar", "message", "lucene_sync"));
        }

        getObservable().setCurrent(0);

        try {
            createIndex();

            IndexWriter writer = new IndexWriter(luceneIndexDir, analyzer, false);

            int count = hl.count();
            getObservable().setCurrent(count);

            Object uid;
            int i = 0;
            ICloseableIterator it;
            for (it = hl.keyIterator(); it.hasNext();) {
                uid = it.next();

                writer.addDocument(getDocument(uid));

                getObservable().setCurrent(i);
            }
            it.close();

            getObservable().setCurrent(count);

            writer.optimize();
            writer.close();
        } catch (Exception e) {
            LOG.severe("Creation of Lucene Index failed :" + e.getLocalizedMessage());

            // show neat error dialog here
        }
    }

    public IStatusObservable getObservable() {
        return folder.getObservable();
    }

    public void save() {
        try {
            mergeRAMtoIndex();
        } catch (IOException e) {
            LOG.severe(e.getMessage());
        }

    }

}