lius.lucene.LuceneActions.java Source code

Java tutorial

Introduction

Here is the source code for lius.lucene.LuceneActions.java

Source

package lius.lucene;

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;

import lius.config.LiusConfig;
import lius.config.LiusConfigBuilder;
import lius.config.LiusDocumentProperty;
import lius.config.LiusField;
import lius.exception.LiusException;
import lius.index.Indexer;
import lius.index.IndexerFactory;
import lius.search.LiusHit;

import org.apache.log4j.Logger;
import org.apache.lucene.analysis.Analyzer;
// import org.apache.lucene.document.DateTools;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;

/**
 *
 * Classe permettant d'effectuer des actions relatives  Lucene.
 *
 * <br/><br/>
 *
 * Class that executes actions related to Lucene.
 *
 * @author Rida Benjelloun (ridabenjelloun@gmail.com)
 *
 */

public class LuceneActions {

    private static LuceneActions luceneActIns;

    static Logger logger = Logger.getRootLogger();

    private LiusConfig lc = null;

    private LuceneActions() {
    }

    public static LuceneActions getSingletonInstance() {
        if (luceneActIns == null)
            luceneActIns = new LuceneActions();
        return luceneActIns;
    }

    /**
     * Methode permettant de construire un objet de type "Lucene Document" 
     * partir de plusieurs collections contenant des informations sur les
     * documents  indexer. Cette mthode est utilise pour l'indexation mixte.
     * <br/><br/>Method that constructs a Lucene document object from many
     * collections containing information on the documents to index. This method
     * is used for mixed indexation.
     */
    public Document populateLuceneDocumentFromListOfLiusFields(List listCollectionsFieldsContentAndType) {
        Collection coll = new ArrayList();
        for (int i = 0; i < listCollectionsFieldsContentAndType.size(); i++) {
            Collection collList = (Collection) listCollectionsFieldsContentAndType.get(i);
            Iterator it = collList.iterator();
            while (it.hasNext()) {
                LiusField lf = (LiusField) it.next();
                coll.add(lf);
            }
        }
        Document doc = populateLuceneDoc(coll);
        return doc;
    }

    public Document populateLuceneDoc(Collection fieldsContentAndType) {
        if (fieldsContentAndType == null) {
            return null;
        }
        Document doc = new Document();
        LiusDocumentProperty ldp = null;
        logger.debug("==== Nouveau lucene document dans l'index ====");
        Iterator it = fieldsContentAndType.iterator();
        while (it.hasNext()) {
            Field field = null;
            Object fieldColl = it.next();
            if (fieldColl instanceof LiusField) {
                LiusField lf = (LiusField) fieldColl;
                if (lf.getType().equalsIgnoreCase("Text")) {
                    field = new Field(lf.getName(), lf.getValue(), Field.Store.YES, Field.Index.TOKENIZED);
                    logger.debug(lf.getName() + " (type = " + lf.getType() + ") " + " : " + lf.getValue());
                } else if (lf.getType().equalsIgnoreCase("TextReader")) {
                    if (lf.getValueInputStreamReader() != null) {
                        field = new Field(lf.getName(), lf.getValueInputStreamReader());
                    } else if (lf.getValueReader() != null)
                        field = new Field(lf.getName(), lf.getValueReader());
                    logger.debug(lf.getName() + " (type = " + lf.getType() + ") " + " : Texte ajout");
                } else if (lf.getType().equalsIgnoreCase("Keyword")) {
                    field = new Field(lf.getName(), lf.getValue(), Field.Store.YES, Field.Index.UN_TOKENIZED);
                    logger.debug(lf.getName() + " (type = " + lf.getType() + ") " + " : " + lf.getValue());
                }

                else if (lf.getType().equalsIgnoreCase("concatDate")) {
                    String dateValue = lf.getValue();
                    if (dateValue.indexOf("-") > 0) {
                        dateValue = dateValue.replaceAll("-", "");
                    }
                    if (dateValue.indexOf("/") > 0) {
                        dateValue = dateValue.replaceAll("/", "");
                    }
                    if (dateValue.indexOf(" ") > 0) {
                        dateValue = dateValue.replaceAll(" ", "");
                    }
                    if (dateValue.indexOf("\\") > 0) {
                        dateValue = dateValue.replaceAll("\\", "");
                    }
                    if (dateValue.indexOf(".") > 0) {
                        dateValue = dateValue.replaceAll(".", "");
                    }

                    field = new Field(lf.getName(), dateValue, Field.Store.YES, Field.Index.UN_TOKENIZED);
                    logger.debug(lf.getName() + " (type = " + lf.getType() + ") " + " : " + dateValue);

                    // } else if (lf.getType().equalsIgnoreCase("DateToString"))
                    // {
                    // field = new Field(lf.getName(), DateTools.dateToString(lf
                    // .getDate()), Field.Store.YES,
                    // Field.Index.UN_TOKENIZED);
                    // logger.debug(lf.getName() + " (type = " + lf.getType()
                    // + ") " + " : " + lf.getDate().toString());
                    // } else if (lf.getType().equalsIgnoreCase("StringToDate"))
                    // {
                    // DateFormat formatter = new SimpleDateFormat(lf
                    // .getDateFormat());
                    // try {
                    // field = new Field(lf.getName(), DateTools
                    // .dateToString(formatter.parse(lf.getValue())),
                    // Field.Store.YES, Field.Index.UN_TOKENIZED);
                    // logger.debug(lf.getName() + " (type = " + lf.getType()
                    // + ") " + " : " + lf.getValue());
                    // } catch (ParseException ex) {
                    // logger.error(ex.getMessage());
                    // }
                } else if (lf.getType().equalsIgnoreCase("UnIndexed")) {
                    field = new Field(lf.getName(), lf.getValue(), Field.Store.YES, Field.Index.NO);
                    logger.debug(lf.getName() + " (type = " + lf.getType() + ") " + " : " + lf.getValue());
                } else if (lf.getType().equalsIgnoreCase("UnStored")) {
                    field = new Field(lf.getName(), lf.getValue(), Field.Store.NO, Field.Index.TOKENIZED);
                    logger.debug(lf.getName() + " (type = " + lf.getType() + ") " + " : " + lf.getValue());
                }
                if (lf.getIsBoosted() && field != null) {
                    field.setBoost(lf.getBoost());
                    logger.debug("--------> Field " + lf.getName() + " setBoost = " + lf.getBoost());
                }
                if (field != null) {
                    doc.add(field);
                }
            } else if (fieldColl instanceof LiusDocumentProperty) {
                ldp = (LiusDocumentProperty) fieldColl;
            }
        }
        if (ldp != null && ldp.getBoost() != null) {
            doc.setBoost(Float.parseFloat(ldp.getBoost()));
            logger.debug("@@@@@@@@@@@@@@@ Document boost = " + ldp.getBoost() + " @@@@@@@@@@@@@@@");
        }
        return doc;
    }

    /**
     * Mthode permettant d'insrer une liste de documents Lucene dans l'index.
     * <br/><br/>Method that inserts a list of Lucene documents in the index.
     */

    public synchronized void save(List luceneDocs, IndexWriter writer, LiusConfig lc) throws LiusException {
        for (int i = 0; i < luceneDocs.size(); i++)
            save((Document) luceneDocs.get(i), writer, lc);
    }

    /**
     * Mthode permettant d'insrer un document Lucene dans l'index <br/><br/>
     * Mthod that inserts a Lucene document in the index.
     */

    public synchronized void save(Document luceneDoc, IndexWriter writer, LiusConfig lc) throws LiusException {
        try {
            writer.addDocument(luceneDoc);
            logger.debug("**************Document Ajout  l'index*********");
        } catch (IOException e) {
            logger.error(e.getMessage());
            logger.error("*************Document non Ajout  l'index*********");
        }
        if (lc.getOptimizeValue() != null) {
            if (lc.getOptimize()) {
                try {
                    writer.optimize();
                } catch (IOException e) {
                    logger.error(e.getMessage());
                }
            }
        }
    }

    public synchronized void index(Document doc, String indexDir, LiusConfig lc) throws LiusException, IOException {
        Analyzer analyzer = AnalyzerFactory.getAnalyzer(lc);
        IndexWriter writer = null;
        try {
            boolean createIndex = createIndexValue(lc.getCreateIndex(), indexDir);
            Directory fsDir = FSDirectory.getDirectory(indexDir, createIndex);
            writer = new IndexWriter(fsDir, analyzer, createIndex);
            setIndexWriterProps(writer, lc);
            LuceneActions.getSingletonInstance().save(doc, writer, lc);
        } catch (Exception e) {
            logger.error(e.getMessage());
        } finally {
            unLock(indexDir);
            if (writer != null) {
                writer.close();
            }
        }
    }

    public synchronized Directory index(Document doc, Directory indexDir, LiusConfig lc)
            throws LiusException, IOException {
        Analyzer analyzer = AnalyzerFactory.getAnalyzer(lc);
        IndexWriter writer = null;
        try {
            writer = new IndexWriter(indexDir, analyzer, true);
            setIndexWriterProps(writer, lc);
            LuceneActions.getSingletonInstance().save(doc, writer, lc);
        } catch (Exception e) {
            logger.error(e.getMessage());
        } finally {
            if (writer != null) {
                writer.close();
            }
        }
        return indexDir;
    }

    public synchronized void index(String toIndex, String indexDir, String liusConfig)
            throws LiusException, IOException {

        lc = LiusConfigBuilder.getSingletonInstance().getLiusConfig(liusConfig);
        Analyzer analyzer = AnalyzerFactory.getAnalyzer(lc);
        IndexWriter writer = null;
        try {
            boolean createIndex = createIndexValue(lc.getCreateIndex(), indexDir);
            Directory fsDir = FSDirectory.getDirectory(indexDir, createIndex);
            writer = new IndexWriter(fsDir, analyzer, createIndex);
            setIndexWriterProps(writer, lc);
            fileDirectoryIndexing(toIndex, indexDir, lc);
        } catch (Exception e) {
            logger.error(e.getMessage());
        } finally {
            unLock(indexDir);
            if (writer != null) {
                writer.close();
            }
        }
    }

    public synchronized void recursifIndexing(File f, String indexDir, LiusConfig lc) {

        IndexWriter iw = null;
        try {
            iw = LuceneActions.getSingletonInstance().openIndex(indexDir, lc);
            fileDirectoryProcessing(f, indexDir, lc, iw);
            iw.optimize();
            iw.close();
        } catch (LiusException e) {
            logger.error(e.getMessage());
        } catch (IOException e) {
            logger.error(e.getMessage());
        } finally {
            if (iw != null) {
                try {
                    LuceneActions.getSingletonInstance().unLock(indexDir);
                    iw.close();
                } catch (IOException e1) {
                    logger.error(e1.getMessage());
                }
            }
        }
    }

    public synchronized void indexSubDirectories(File f, String indexDir, LiusConfig lc) {
        recursifIndexing(f, indexDir, lc);
    }

    private void fileDirectoryProcessing(File f, String indexDir, LiusConfig lc, IndexWriter iw)
            throws IOException {
        Indexer indexer = null;
        if (f.isFile()) {
            indexer = IndexerFactory.getIndexer(f, lc);
            if (indexer != null) {
                Document doc = LuceneActions.getSingletonInstance()
                        .populateLuceneDoc(indexer.getPopulatedLiusFields());
                doc.add(new Field("filePath", f.getAbsolutePath(), Field.Store.YES, Field.Index.UN_TOKENIZED));
                iw.addDocument(doc);
            }

        } else {
            File[] files = f.listFiles();
            for (int i = 0; i < files.length; i++) {

                fileDirectoryProcessing(files[i], indexDir, lc, iw);

            }

        }

    }

    public void addIndexes(Directory[] directoriesToIndex, String indexDir, LiusConfig lc) {
        Analyzer analyzer = AnalyzerFactory.getAnalyzer(lc);
        IndexWriter writer = null;
        try {
            boolean createIndex = createIndexValue(lc.getCreateIndex(), indexDir);
            Directory fsDir = FSDirectory.getDirectory(indexDir, createIndex);
            writer = new IndexWriter(fsDir, analyzer, createIndex);
            setIndexWriterProps(writer, lc);
            writer.addIndexes(directoriesToIndex);
        } catch (Exception e) {
            logger.error(e.getMessage());
        } finally {
            try {
                if (writer != null) {
                    writer.close();
                }
            } catch (IOException e) {
                logger.error(e.getMessage());
            }
        }
    }

    public synchronized IndexWriter openIndex(String indexDir, LiusConfig lc) throws LiusException, IOException {
        Analyzer analyzer = AnalyzerFactory.getAnalyzer(lc);
        IndexWriter writer = null;
        try {
            boolean createIndex = createIndexValue(lc.getCreateIndex(), indexDir);
            Directory fsDir = FSDirectory.getDirectory(indexDir, createIndex);
            writer = new IndexWriter(fsDir, analyzer, createIndex);
            setIndexWriterProps(writer, lc);
        } catch (Exception e) {
            logger.error(e.getMessage());
            unLock(indexDir);
            if (writer != null) {
                writer.close();
            }

        }
        return writer;
    }

    /**
     * Mthode appele par la mthode index(). Elle permet d'effectuer le
     * processus d'indexation. <br/><br/>Method called by index(). It processes
     * the indexation.
     */

    private void fileDirectoryIndexing(String toIndex, String indexDir, LiusConfig lc) throws IOException {
        String sep = System.getProperty("file.separator");
        File typFD = new File(toIndex);
        if (typFD.isFile()) {
            fileProcessing(typFD, indexDir, lc);
        } else if (typFD.isDirectory()) {
            File[] liste = (new File(toIndex)).listFiles();
            for (int i = 0; i < liste.length; i++) {
                String fileToIndexB = toIndex + sep + liste[i].getName();
                File fileToIndexBF = new File(fileToIndexB);
                if (fileToIndexBF.isDirectory()) {
                    fileDirectoryIndexing(fileToIndexB, indexDir, lc);
                } else {
                    fileProcessing(fileToIndexBF, indexDir, lc);
                }
            }
        }
    }

    /**
     * Mthode appele par la la mthode fileDirectoryIndexing(), pour indexer
     * en fonction du type de fichier. <br/><br/>Method called by
     * fileDirectoryIndexing(), for indexing related to the file type.
     */
    private void fileProcessing(File fileToIndex, String indexDir, LiusConfig lc) {
        Indexer indexer = null;
        indexer = IndexerFactory.getIndexer(fileToIndex, lc);
        if (indexer != null) {
            indexer.index(indexDir);
        }
    }

    /**
     * Mthode permettant de forcer l'ouverture de l'index de Lucene quand il
     * est ferm. <br/><br/>Method that force the opening of Lucene index when
     * it is closed.
     */

    public void unLock(String indexDir) {
        try {
            Directory directory = FSDirectory.getDirectory(indexDir, false);
            IndexReader.open(directory);
            if (IndexReader.isLocked(directory)) {
                IndexReader.unlock(directory);
            }
        } catch (IOException e) {
            logger.error(e.getMessage());
        }
    }

    public synchronized void deleteAllDocuments(String indexDir) {
        try {
            Directory directory = FSDirectory.getDirectory(indexDir, false);
            IndexReader ir = IndexReader.open(directory);
            int num = ir.numDocs();
            for (int i = 0; i <= num - 1; i++) {
                ir.deleteDocument(i);
            }
            if (ir != null) {
                ir.close();
            }
        } catch (IOException e) {
            logger.error(e.getMessage());
        }
    }

    public synchronized List ListAllDocuments(String indexDir, LiusConfig lc) {
        List documentsList = new ArrayList();
        List fieldList = lc.getBrowseFieldsToDisplay();
        Map values = null;
        LiusHit lh = null;
        try {
            Directory directory = FSDirectory.getDirectory(indexDir, false);
            IndexReader ir = IndexReader.open(directory);
            int num = ir.numDocs();
            for (int i = 0; i <= num - 1; i++) {
                lh = new LiusHit();
                values = new HashMap();
                Document luceneDoc = ir.document(i);
                lh.setDocId(i);
                for (int j = 0; j < fieldList.size(); j++) {
                    LiusField lf = (LiusField) fieldList.get(j);
                    Field f = luceneDoc.getField(lf.getName());
                    LiusField nlf = new LiusField();
                    nlf.setName(lf.getName());
                    nlf.setLabel(lf.getLabel());
                    if (f != null) {
                        String content = f.stringValue();
                        nlf.setValue(content);
                        values.put(lf.getName(), nlf);
                    }
                }
                lh.setLiusFieldsMap(values);
                documentsList.add(lh);
            }
            if (ir != null) {
                ir.close();
            }
        } catch (IOException e) {
            logger.error(e.getMessage());
        }
        return documentsList;
    }

    public synchronized void unDeleteAllDocuments(String indexDir) {
        try {
            Directory directory = FSDirectory.getDirectory(indexDir, false);
            IndexReader ir = IndexReader.open(directory);
            ir.undeleteAll();
            if (ir != null) {
                ir.close();
            }
        } catch (IOException e) {
            logger.error(e.getMessage());
        }
    }

    public synchronized void newIndex(String indexDir) {
        try {
            Directory directory = FSDirectory.getDirectory(indexDir, true);
        } catch (IOException ex) {
            logger.error(ex.getMessage());
        }
    }

    /**
     * Mthode permettant d'initialiser les proprits de l'index si ces
     * dernires ont t places dans le fichier de configuration. <br/><br/>
     * Method that initializes the properties of the index if those were placed
     * in the configuration file.
     */

    private void setIndexWriterProps(IndexWriter writer, LiusConfig lc) {
        if (lc.getMergeFactor() != null)
            writer.setMergeFactor((new Integer(lc.getMergeFactor())).intValue());
        if (lc.getMaxMergeDocs() != null)
            writer.setMaxMergeDocs((new Integer(lc.getMaxMergeDocs())).intValue());
    }

    /**
     * Mthode permettant d'effacer un document dans l'index. Elle prend comme
     * arguments le rpertoire de l'index, le nom du champs et le contenu
     * recherch. <br/><br/>Method that erases a document from the index. Its
     * parameters are the directory of the index, the name of the field and the
     * content searched.
     */

    public synchronized int deleteDoc(String indexDir, String field, String content) throws LiusException {
        int nbDelete = 0;
        try {
            Directory fsDir = FSDirectory.getDirectory(indexDir, false);
            IndexReader indexReader = IndexReader.open(fsDir);
            Term t = new Term(field, content);
            nbDelete = indexReader.deleteDocuments(t);
            if (indexReader != null) {
                indexReader.close();
            }
            logger.debug("Document supprim");
        } catch (IOException e) {
            logger.error(e.getMessage());
        }
        return nbDelete;
    }

    /**
     * Mthode permettant d'effacer un document dans l'index. Elle prend comme
     * arguments le rpertoire de l'index et un objet de type Lucene Term. <br/>
     * <br/>Method that erases a document from the index. Its parameters are the
     * directory of the index and a Lucene term object.
     */
    public synchronized int deleteDoc(String indexDir, Term t) throws LiusException {
        int nbDelete = 0;
        try {
            Directory fsDir = FSDirectory.getDirectory(indexDir, false);
            IndexReader indexReader = IndexReader.open(fsDir);
            nbDelete = indexReader.deleteDocuments(t);
            if (indexReader != null) {
                indexReader.close();
            }
            logger.info("Document supprim");
        } catch (IOException e) {
            logger.error(e.getMessage());
        }
        return nbDelete;
    }

    public synchronized void deleteDoc(String indexDir, int docNum) throws LiusException {

        try {
            Directory fsDir = FSDirectory.getDirectory(indexDir, false);
            IndexReader indexReader = IndexReader.open(fsDir);
            indexReader.deleteDocument(docNum);
            if (indexReader != null) {
                indexReader.close();
            }
            logger.info("Document supprim");
        } catch (IOException e) {
            logger.error(e.getMessage());
        }

    }

    /**
     * Mthode permettant de mettre  jour un document dans l'index. Elle prend
     * comme arguments le rpertoire de l'index, un objet de type lucene Term,
     * le fichier  indexer  la place de celui trouv et le fichier XML de
     * configuration qui servira  l'indexation. <br/><br/>Method that updated
     * a document in the index. Its parameters are the directory of the index,
     * an Lucene Term object, the file to index in place of the one found and
     * the XML configuration file which will serve for indexing.
     *
     */

    public synchronized void updateDoc(String rep, Term t, String fileToReindex, String configFile)
            throws LiusException, IOException {
        deleteDoc(rep, t);
        getSingletonInstance().index(fileToReindex, rep, configFile);
        logger.info("Document mis  jour");
    }

    /**
     * Mthode permettant de mettre  jour un document dans l'index. Elle prend
     * comme arguments le repertoire de l'index, le nom du champs qui doit
     * contenir la valeur recherche, le fichier  indexer  la place de celui
     * trouv et le fichier XML de configuration qui servira  la rindexation.
     * <br/><br/>Method that updates a document in the index. Its parameters
     * are the directory of the index, the name of the field which will contain
     * the searched value, the searched value, the file to index in place of the
     * one found and the XML configuration which will serve for indexing.
     */

    public synchronized void updateDoc(String rep, String field, String content, String fileToReindex,
            String configFile) throws LiusException, IOException {
        deleteDoc(rep, field, content);
        getSingletonInstance().index(fileToReindex, rep, configFile);
        logger.info("Document mis  jour");
    }

    public boolean createIndexValue(String valueCreateIndex, String indexDir) {
        boolean createIndex = false;
        if (valueCreateIndex.equals("true"))
            createIndex = true;
        else if (valueCreateIndex.equals("false"))
            createIndex = false;
        else if (valueCreateIndex.equals("auto")) {
            createIndex = !indexExists(indexDir);
        }
        return createIndex;
    }

    /**
     * Mthode permettant de vrifier le rpertoire de sortie de l'index. S'il
     * n'existe pas il sera cre. <br/><br/>Method for verifying the output
     * directory of index. If it does not exist it will be created.
     */
    public boolean indexExists(String indexDir) {
        return IndexReader.indexExists(indexDir);
    }

    public Directory getDirectory(String directoryPath) {
        try {
            return FSDirectory.getDirectory(directoryPath, false);
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    }

    public Directory[] getDirectories(String[] directoryPaths) {
        Directory[] directories = new Directory[directoryPaths.length];
        for (int i = 0; i < directoryPaths.length; i++) {
            String directoryPath = directoryPaths[i];
            try {
                directories[i] = FSDirectory.getDirectory(directoryPath, false);
            } catch (IOException e) {
                throw new RuntimeException(e);
            }
        }
        return directories;
    }

}