org.lahab.clucene.server.indexer.Indexer.java Source code

Introduction

Here is the source code for org.lahab.clucene.server.indexer.Indexer.java
Source

package org.lahab.clucene.server.indexer;

/*
 * #%L
 * server
 * %%
 * Copyright (C) 2012 NTNU
 * %%
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *      http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 * #L%
 */

import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.util.Collection;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.logging.Logger;

import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.SegmentInfos;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.util.Version;
import org.lahab.clucene.core.BlobDirectoryFS;
import org.lahab.clucene.utils.CloudStorage;
import org.lahab.clucene.utils.Configuration;
import org.lahab.clucene.utils.Parametizer;
import org.lahab.clucene.utils.ParametizerException;
import org.lahab.clucene.utils.Statable;

import com.microsoft.windowsazure.services.blob.client.CloudBlockBlob;
import com.microsoft.windowsazure.services.blob.client.ListBlobItem;
import com.microsoft.windowsazure.services.core.storage.StorageException;

/**
 * A wrapper to encapsulate all communication with the index writer
 * It also keeps some stats and schedule jobs to the pool manager
 * @author charlymolter
 *
 */
public class Indexer implements Statable {
    public final static Logger LOGGER = Logger.getLogger(Indexer.class.getName());

    /** The index writer */
    protected IndexWriter _index;
    /** The directory that will write to */
    private Directory _directory;
    protected PoolManager _pool;
    /** How many documents have been added since the indexer is opened */
    private int _nbLastCommit = 0;

    public Parametizer _params;

    private CloudStorage _cloudStorage;

    private boolean _close = true;

    private static Map<String, Object> DEFAULTS = new HashMap<String, Object>();
    static {
        // Default parameters
        DEFAULTS.put("regular", false);
        DEFAULTS.put("container", "clucene");
        DEFAULTS.put("bufferSize", 512.0);
        DEFAULTS.put("folder", "indexCache");
    }

    /**
     * Creates an new indexer
     * @param storage
     * @param pool
     * @param config
     * @throws Exception
     */
    public Indexer(CloudStorage storage, PoolManager pool, Configuration config) throws Exception {
        _params = new Parametizer(DEFAULTS, config);
        _pool = pool;
        _cloudStorage = storage;
        _cloudStorage.addContainer("directory", _params.getString("container"));
        if (_params.getBoolean("regular")) {
            _directory = FSDirectory.open(new File(_params.getString("folder")));
        } else {
            _directory = new BlobDirectoryFS(_cloudStorage.getAccount(), _params.getString("container"),
                    FSDirectory.open(new File(_params.getString("folder"))));
        }
    }

    /**
     * Download the entire Index to the locale disc
     * @param downloadDir the directory where the index will be copied to
     * @throws Exception
     */
    public void download(String downloadDir) throws Exception {
        final File[] files = new File(downloadDir).listFiles();
        for (File f : files)
            f.delete();

        for (ListBlobItem blobItem : _cloudStorage.getContainer("directory").listBlobs()) {
            CloudBlockBlob b = _cloudStorage.getContainer("directory")
                    .getBlockBlobReference(blobItem.getUri().toString());
            File f = new File(downloadDir + "/" + b.getName());
            if (!f.exists()) {
                f.createNewFile();
            }
            OutputStream outStream = new FileOutputStream(f);
            b.download(outStream);
        }
    }

    /**
     * Adds the document to the index and commits if necessary
     * @param _doc
     * @throws CorruptIndexException
     * @throws IOException
     * @throws ParametizerException
     */
    public void addDoc(Document _doc) throws CorruptIndexException, IOException, ParametizerException {
        _index.addDocument(_doc);
    }

    public void commit() throws CorruptIndexException, IOException {
        LOGGER.info("Commit Start");
        int nbDocs = _index.maxDoc();
        if (_nbLastCommit < nbDocs) {
            _nbLastCommit = nbDocs;
            _index.commit();
            LOGGER.info("Commit done");

        } else {
            LOGGER.info("Commit unecessary");
        }
    }

    /**
     * Opens an index writer on the current directory
     * @throws CorruptIndexException
     * @throws IOException
     * @throws ParametizerException 
     */
    public void open() throws CorruptIndexException, IOException, ParametizerException {
        StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_36);
        IndexWriterConfig configWriter = new IndexWriterConfig(Version.LUCENE_36, analyzer);
        configWriter.setRAMBufferSizeMB(_params.getDouble("bufferSize"));
        configWriter.setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH);
        configWriter.setOpenMode(IndexWriterConfig.OpenMode.CREATE);

        try {
            _index = new IndexWriter(_directory, configWriter);
            _nbLastCommit = _index.maxDoc();
            _close = false;
        } catch (LockObtainFailedException e) {
            System.out.println("Lock is taken trying again");
            _directory.clearLock("write.lock");
        }
    }

    /**
     * Closes the current index writer
     */
    public void close() {
        if (_index != null) {
            try {
                _close = true;
                _index.close();
                _index = null;
                _directory.close();
            } catch (CorruptIndexException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            } catch (IOException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            }

        }
    }

    /**
     * Tells the pool to add a job to index this document
     * @param doc Document to be indexed
     */
    public void queueDoc(Document doc) {
        _pool.addIndexJob(doc);
    }

    @Override
    public Collection<? extends String> record() {
        try {
            int nbDoc = _close ? 0 : _index.numDocs();
            List<String> stats = new LinkedList<String>();
            stats.add(String.valueOf(nbDoc));
            stats.add(String.valueOf(SegmentInfos.getLastCommitGeneration(_directory)));
            stats.add(String.valueOf(_close ? 0 : _index.ramSizeInBytes()));
            LOGGER.info("Doc added:" + nbDoc);
            return stats;
        } catch (IOException e) {
            e.printStackTrace();
        }
        return null;

    }

    @Override
    public Collection<? extends String> header() {
        List<String> header = new LinkedList<String>();
        header.add("nbIndex");
        header.add("nbCommits");
        header.add("sizeBuffer");
        return header;

    }

    public void delete() throws StorageException {
        _cloudStorage.getContainer("directory").delete();
    }
}