PostIndexer.java :  » Forum » mvnforum-1.1 » com » mvnforum » search » post » Java Open Source

Java Open Source » Forum » mvnforum 1.1 
mvnforum 1.1 » com » mvnforum » search » post » PostIndexer.java
/*
 * $Header: /cvsroot/mvnforum/mvnforum/src/com/mvnforum/search/post/PostIndexer.java,v 1.23 2008/01/15 11:17:57 minhnn Exp $
 * $Author: minhnn $
 * $Revision: 1.23 $
 * $Date: 2008/01/15 11:17:57 $
 *
 * ====================================================================
 *
 * Copyright (C) 2002-2007 by MyVietnam.net
 *
 * All copyright notices regarding mvnForum MUST remain
 * intact in the scripts and in the outputted HTML.
 * The "powered by" text/logo with a link back to
 * http://www.mvnForum.com and http://www.MyVietnam.net in
 * the footer of the pages MUST remain visible when the pages
 * are viewed on the internet or intranet.
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 *
 * Support can be obtained from support forums at:
 * http://www.mvnForum.com/mvnforum/index
 *
 * Correspondence and Marketing Questions can be sent to:
 * info at MyVietnam net
 *
 * @author: Minh Nguyen
 * @author: Dejan Krsmanovic dejan_krsmanovic@yahoo.com
 */
package com.mvnforum.search.post;

import java.io.IOException;

import net.myvietnam.mvncore.exception.SearchException;
import net.myvietnam.mvncore.util.DateUtil;
import net.myvietnam.mvncore.util.TimerUtil;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.*;
import org.apache.lucene.document.DateTools.Resolution;
import org.apache.lucene.index.*;
import org.apache.lucene.store.Directory;

import com.mvnforum.MVNForumFactoryConfig;
import com.mvnforum.db.PostBean;
import com.mvnforum.search.IntegerFilter;
import com.mvnforum.service.MvnForumServiceFactory;
import com.mvnforum.service.SearchService;

public class PostIndexer {
    
    private static Log log = LogFactory.getLog(PostIndexer.class);

    //Field names (used for indexing)
    public static final String FIELD_POST_ID    = "postID";
    public static final String FIELD_THREAD_ID  = "threadID";
    public static final String FIELD_FORUM_ID   = "forumID";
    public static final String FIELD_MEMBER_ID  = "memberID";
    public static final String FIELD_POST_TOPIC = "postTopic";
    public static final String FIELD_POST_BODY  = "postBody";
    public static final String FIELD_POST_DATE  = "postDate";

    public static final String FIELD_WITH_ATTACHMENT = "withAttachment";

    public static final String FIELD_ATTACHMENT_COUNT = "attachmentCount";

    //public static final String PROPERTY_SEARCH_PATH      = "search.path";
    //public static final String PROPERTY_SEARCH_AUTOINDEX = "search.autoindex";

    //Timer is used for scheduling jobs
    private static Analyzer analyzer;

    private static long lastOptimizeTime = 0;

    static {
        initializeAnalyzer();
    }

    public static void scheduleAddPostTask(PostBean postBean) {
        AddUpdatePostIndexTask task = new AddUpdatePostIndexTask(postBean, AddUpdatePostIndexTask.OPERATION_ADD);
        TimerUtil.getInstance().schedule(task, 0);
    }

    public static void scheduleUpdatePostTask(PostBean postBean) {
        AddUpdatePostIndexTask task = new AddUpdatePostIndexTask(postBean, AddUpdatePostIndexTask.OPERATION_UPDATE);
        TimerUtil.getInstance().schedule(task, 0);
    }

    public static void scheduleDeletePostTask(int objectID, int objectType) {
        DeletePostIndexTask task = new DeletePostIndexTask(objectID, objectType);
        TimerUtil.getInstance().schedule(task, 0);
    }

    public static void scheduleUpdateThreadTask(int threadID) {
        UpdateThreadIndexTask task = new UpdateThreadIndexTask(threadID);
        TimerUtil.getInstance().schedule(task, 0);
    }

    public static void scheduleRebuildIndexTask() {
        int maxPostID = 0;
        RebuildPostIndexTask task = new RebuildPostIndexTask(maxPostID);
        TimerUtil.getInstance().schedule(task, 0);
    }

    static Analyzer getAnalyzer() {
        return analyzer;
    }

    /**
     * This class will load analyzer when starting. If specified analyzer class
     * cannot be loaded then default analyzer will be used.
     */
    private static void initializeAnalyzer() {
        String analyzerClassName = MVNForumFactoryConfig.getLuceneAnalyzerClassName();
        if ( (analyzerClassName == null) || (analyzerClassName.equals("")) ) {
            //create standard analyzer
            //String[] stopWords = this.loadStopWords();
            analyzer = new StandardAnalyzer();
            log.debug("Using StandardAnalyzer for indexing");
        } else {
            //try to create specified analyzer
            try {
                log.debug("About to load Analyzer [" + analyzerClassName + "] for indexing");
                analyzer = (Analyzer) Class.forName(analyzerClassName).newInstance();
            } catch (Exception e) {
                log.warn("Cannot load " + analyzerClassName + ". Loading StandardAnalyzer");
                analyzer = new StandardAnalyzer();
            }
        }
    }

    /**
     * This method is used for getting new IndexWriter. It can create new index
     * or add post to existing index. Creating new index will delete previous so it
     * should be used for rebuilding index.
     * @param create - true if new index should be created.
     *               - false for adding posts to existing index
     * @return IndexWriter object that is used for adding posts to index
     */
    static IndexWriter getIndexWriter(Directory directory, boolean create) throws SearchException {

        IndexWriter writer = null;

        SearchService service = MvnForumServiceFactory.getMvnForumService().getSearchService();
        //If create = false, we will create IndexWriter with false argument
        if (create == false) {
            try {
                writer = new IndexWriter(directory, analyzer, false);
                //writer.setWriteLockTimeout(100000);
                if (service.savePostOnDisk()) {
                    writer.setUseCompoundFile(true);
                }
                return writer;
            } catch (IOException e) {
                log.warn("Cannot open existed index. New index will be created.", e);
                //Ignore Exception. We will try to create index with true parameter
            }
        }
        // We are here in two cases: We wanted to create new index or because
        // index doesn't existed
        try {
            //This will create new index and delete existing
            service.deleteContent(directory);
            writer = new IndexWriter(directory, analyzer, true);// actually the directory should be 'create' = true
            //writer.setWriteLockTimeout(100000);
            if (service.savePostOnDisk()) {
                writer.setUseCompoundFile(true);
            }
            return writer;
        } catch (IOException e) {
            //@todo : localize me
            log.error("IOException during get index writer", e);
            throw new SearchException("Error while creating index writer");
        }
    }

    /**
     * This method is used for adding single post to index
     * Note: this method does not close the writer
     * @param post A post that should be indexed
     * @param writer IndexWriter that is used for storing
     * @throws SearchException
     */
    static void doIndexPost(PostBean post, IndexWriter writer) throws SearchException {

        if (post == null) return;
        //Post must include topic and body. If not then we have nothing to index.
        if ( (post.getPostTopic() == null || post.getPostTopic().equals("")) ||
             (post.getPostBody() == null || post.getPostBody().equals(""))) {
            return;
        }

        //Each post will be represented as a document
        Document postDocument = new Document();
        //Document has following fields that could be queried on
        postDocument.add(new Field(FIELD_POST_ID, Integer.toString(post.getPostID()), Field.Store.YES, Field.Index.UN_TOKENIZED));
        postDocument.add(new Field(FIELD_THREAD_ID, Integer.toString(post.getThreadID()), Field.Store.YES, Field.Index.UN_TOKENIZED));
        postDocument.add(new Field(FIELD_FORUM_ID, Integer.toString(post.getForumID()), Field.Store.YES, Field.Index.UN_TOKENIZED));
        postDocument.add(new Field(FIELD_MEMBER_ID, Integer.toString(post.getMemberID()), Field.Store.YES, Field.Index.UN_TOKENIZED));
        postDocument.add(new Field(FIELD_WITH_ATTACHMENT, new Boolean(post.getPostAttachCount()>0).toString(), Field.Store.YES, Field.Index.UN_TOKENIZED));// make it compilable on JDK 1.3
        postDocument.add(new Field(FIELD_ATTACHMENT_COUNT, IntegerFilter.intToString(post.getPostAttachCount()), Field.Store.YES, Field.Index.UN_TOKENIZED));
        //postDocument.add(new Field(FIELD_ATTACHMENT_COUNT, Integer.toString(post.getPostAttachCount()), Field.Store.YES, Field.Index.UN_TOKENIZED));

        //document body and title is not stored since we can retrieve them from database
        postDocument.add(new Field(FIELD_POST_TOPIC, post.getPostTopic(), Field.Store.NO, Field.Index.TOKENIZED));
        postDocument.add(new Field(FIELD_POST_BODY, post.getPostBody(), Field.Store.NO, Field.Index.TOKENIZED));
        //add date field
        postDocument.add(new Field(FIELD_POST_DATE, DateTools.dateToString(post.getPostCreationDate(), Resolution.MILLISECOND), Field.Store.YES, Field.Index.UN_TOKENIZED));

        //now we have created document with fields so we can store it
        try {
            writer.addDocument(postDocument);
        } catch (IOException e) {
            log.error("PostIndexer.doIndexPost failed", e);
            //@todo : localize me
            throw new SearchException("Error writing new post to index");
        } catch (Throwable e) {
            log.error("PostIndexer.doIndexPost failed", e);
            //@todo : localize me
            throw new SearchException("Error writing new post to index");
        }
    }

    /**
     * Add single post to index
     * @param post
     * @throws SearchException
     */
    static void addPostToIndex(PostBean post) throws SearchException, IOException {

        Directory directory = null;
        IndexWriter writer = null;
        SearchService service = MvnForumServiceFactory.getMvnForumService().getSearchService();
        try {
            directory = service.getSearchPostIndexDir();
            writer = getIndexWriter(directory, false);
            if (writer == null) {
                log.warn("Cannot get the IndexWriter");
                return;
            }
            doIndexPost(post, writer);

            // now check if we should optimize index (each hour)
            long now = System.currentTimeMillis();
            long timeFromLastOptimize = now - lastOptimizeTime;
            if (service.savePostOnDisk() && (timeFromLastOptimize > DateUtil.HOUR)) {
                log.debug("writer.optimize() called in addPostToIndex");
                writer.optimize();
                lastOptimizeTime = now;
            }
        } catch (SearchException ex) {
            throw ex;
        } finally {
            if (writer != null) {
                try {
                    writer.close();
                } catch (IOException e) {
                    log.debug("Error closing Lucene IndexWriter", e);
                }
            }
            if (directory != null) {
                try {
                    directory.close();
                } catch (IOException e) {
                    log.debug("Cannot close directory.", e);
                }
            }
        }
    }

    /**
     * This method is used for deleting post from index.
     * @param postID id of the post that should be deleted
     * @throws SearchException
     */
    static void deletePostFromIndex(int postID) throws SearchException {

        Directory directory = null;
        IndexReader reader = null;
        try {
            SearchService service = MvnForumServiceFactory.getMvnForumService().getSearchService();
            directory = service.getSearchPostIndexDir();
            reader = IndexReader.open(directory);
            if (reader == null) {
                log.warn("Cannot get the IndexReader");
                return;
            }

            Term term = new Term(FIELD_POST_ID, String.valueOf(postID));
            int deletedCount = reader.deleteDocuments(term);
            log.debug("deletePostFromIndex: deleted posts = " + deletedCount);
        } catch (IOException e) {
            //@todo : localize me
            throw new SearchException("Error trying to delete post with postID = " + postID);
        } finally {
            if (reader != null) {
                try {
                    reader.close();
                } catch (IOException e) {
                    log.debug("Error closing Lucene IndexReader", e);
                }
            }
            if (directory != null) {
                try {
                    directory.close();
                } catch (IOException e) {
                    log.debug("Cannot close directory.", e);
                }
            }
        }
    }

    /**
     * This method is used for deleting all posts in a thread from index.
     * @param threadID id of the thread that should be deleted
     * @throws SearchException
     */
    static void deleteThreadFromIndex(int threadID) throws SearchException {

        Directory directory = null;
        IndexReader reader = null;
        try {
            SearchService service = MvnForumServiceFactory.getMvnForumService().getSearchService();
            directory = service.getSearchPostIndexDir();
            reader = IndexReader.open(directory);
            if (reader == null) {
                log.warn("Cannot get the IndexReader");
                return;
            }

            Term term = new Term(FIELD_THREAD_ID, String.valueOf(threadID));
            int deletedCount = reader.deleteDocuments(term);
            log.debug("deleteThreadFromIndex: deleted posts = " + deletedCount);
        } catch (IOException e) {
            //@todo : localize me
            throw new SearchException("Error trying to delete posts in index with threadID = " + threadID);
        } finally {
            if (reader != null) {
                try {
                    reader.close();
                } catch (IOException e) {
                    log.debug("Error closing Lucene IndexReader", e);
                }
            }
            if (directory != null) {
                try {
                    directory.close();
                } catch (IOException e) {
                    log.debug("Cannot close directory.", e);
                }
            }
        }
    }

    /**
     * This method is used for deleting all posts in a forum from index.
     * @param forumID id of the forum that should be deleted
     * @throws SearchException
     */
    static void deleteForumFromIndex(int forumID) throws SearchException {

        Directory directory = null;
        IndexReader reader = null;
        try {
            SearchService service = MvnForumServiceFactory.getMvnForumService().getSearchService();
            directory = service.getSearchPostIndexDir();
            reader = IndexReader.open(directory);
            if (reader == null) {
                log.warn("Cannot get the IndexReader");
                return;
            }

            Term term = new Term(FIELD_FORUM_ID, String.valueOf(forumID));
            int deletedCount = reader.deleteDocuments(term);
            log.debug("deleteForumFromIndex: deleted posts = " + deletedCount);
        } catch (IOException e) {
            //@todo : localize me
            throw new SearchException("Error trying to delete posts in index with forumID = " + forumID);
        } finally {
            if (reader != null) {
                try {
                    reader.close();
                } catch (IOException e) {
                    log.debug("Error closing Lucene IndexReader", e);
                }
            }
            if (directory != null) {
                try {
                    directory.close();
                } catch (IOException e) {
                    log.debug("Cannot close directory.", e);
                }
            }
        }
    }

    public static int getNumDocs() {

        int numDocs = -1;
        Directory directory = null;
        IndexReader reader = null;
        try {
            SearchService service = MvnForumServiceFactory.getMvnForumService().getSearchService();
            directory = service.getSearchPostIndexDir();
            reader = IndexReader.open(directory);
            if (reader == null) {
                log.warn("Cannot get the IndexReader");
                return -1;
            }
            numDocs = reader.numDocs();
        } catch (IOException ioe) {
            //ignore
            ioe.printStackTrace();
        } finally {
            if (reader != null) {
                try {
                    reader.close();
                } catch (IOException e) {
                    log.debug("Error closing Lucene IndexReader", e);
                }
            }
            if (directory != null) {
                try {
                    directory.close();
                } catch (IOException e) {
                    log.debug("Cannot close directory.", e);
                }
            }
        }
        return numDocs;
    }

}
java2s.com  | Contact Us | Privacy Policy
Copyright 2009 - 12 Demo Source and Support. All rights reserved.
All other trademarks are property of their respective owners.