cn.hbu.cs.esearch.index.RAMSearchIndex.java Source code

Java tutorial

Introduction

Here is the source code for cn.hbu.cs.esearch.index.RAMSearchIndex.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package cn.hbu.cs.esearch.index;

import java.io.File;
import java.io.IOException;
import java.util.concurrent.TimeoutException;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.index.SerialMergeScheduler;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.Version;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import cn.hbu.cs.esearch.core.EsearchMergePolicy;
import cn.hbu.cs.esearch.core.EsearchMultiReader;
import cn.hbu.cs.esearch.document.DocIDMapper;
import cn.hbu.cs.esearch.util.FileUtil;
import cn.hbu.cs.esearch.util.IndexUtil;
import it.unimi.dsi.fastutil.longs.LongSet;

public class RAMSearchIndex<R extends IndexReader> extends BaseSearchIndex<R> {
    private volatile String _version;
    private final Directory _directory;
    private final File _backingdir;
    private final IndexReaderDecorator<R> _decorator;
    // a consistent pair of reader and deleted set
    private volatile EsearchMultiReader<R> _currentReader;
    private final EsearchMergePolicy.MergePolicyParams _mergePolicyParams;

    public static final Logger log = LoggerFactory.getLogger(RAMSearchIndex.class);

    public RAMSearchIndex(String version, IndexReaderDecorator<R> decorator, SearchIndexManager<R> idxMgr,
            Directory ramIdxDir, File backingdir) {
        super(idxMgr, true);
        _directory = ramIdxDir;
        _backingdir = backingdir;
        _version = version;
        _decorator = decorator;
        _currentReader = null;
        _mergeScheduler = new SerialMergeScheduler();
        _mergePolicyParams = new EsearchMergePolicy.MergePolicyParams();
        _mergePolicyParams.setNumLargeSegments(3);
        _mergePolicyParams.setMergeFactor(3);
        _mergePolicyParams.setMaxSmallSegments(4);
    }

    @Override
    public void close() {
        super.close();
        if (_currentReader != null) {
            _currentReader.decEsearchRef();
        }
        if (_directory != null) {
            try {
                _directory.close();
                if (_backingdir != null) {
                    FileUtil.rmDir(_backingdir);
                }
            } catch (IOException e) {
                log.error("{}", e);
            }
        }
    }

    @Override
    public String getVersion() {
        return _version;
    }

    @Override
    public void setVersion(String version) throws IOException {
        _version = version;
        synchronized (readerOpenLock) {
            readerOpenLock.notifyAll();
        }
    }

    @Override
    public int getNumdocs() {
        EsearchMultiReader<R> reader = null;
        synchronized (this) {
            reader = openIndexReader();
            if (reader == null) {
                return 0;
            }
            reader.incEsearchRef();
        }
        int numDocs = reader.numDocs();
        reader.decEsearchRef();
        return numDocs;
    }

    @Override
    public EsearchMultiReader<R> openIndexReader() {
        return _currentReader;
    }

    private EsearchMultiReader<R> openIndexReaderInternal() throws IOException {
        if (DirectoryReader.indexExists(_directory)) {
            DirectoryReader srcReader = null;
            EsearchMultiReader<R> finalReader = null;
            try {
                // for RAM indexes, just get a new index reader
                srcReader = DirectoryReader.open(_directory);
                finalReader = new EsearchMultiReader<R>(srcReader, _decorator);
                DocIDMapper mapper = _idxMgr._docIDMapperFactory.getDocIDMapper(finalReader);
                finalReader.setDocIDMapper(mapper);
                return finalReader;
            } catch (IOException ioe) {
                // if reader decoration fails, still need to close the source reader
                if (srcReader != null) {
                    srcReader.close();
                }
                throw ioe;
            }
        } else {
            return null; // null indicates no index exist, following the contract
        }
    }

    @Override
    public IndexWriter openIndexWriter(Analyzer analyzer, Similarity similarity) throws IOException {

        if (_indexWriter != null) {
            return _indexWriter;
        }

        EsearchMergePolicy mergePolicy = new EsearchMergePolicy();
        mergePolicy.setMergePolicyParams(_mergePolicyParams);
        mergePolicy.setUseCompoundFile(false);

        IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_43, analyzer);
        config.setOpenMode(OpenMode.CREATE_OR_APPEND);
        config.setMergeScheduler(_mergeScheduler);
        config.setMergePolicy(mergePolicy);
        config.setReaderPooling(false);
        if (similarity != null) {
            config.setSimilarity(similarity);
        }
        config.setRAMBufferSizeMB(3);

        IndexWriter idxWriter = new IndexWriter(_directory, config);
        _indexWriter = idxWriter;
        return idxWriter;
    }

    private final Object readerOpenLock = new Object();

    public EsearchMultiReader<R> openIndexReader(String minVersion, long timeout)
            throws IOException, TimeoutException {

        if (timeout < 0) {
            timeout = Long.MAX_VALUE;
        }
        if (_versionComparator.compare(minVersion, _version) <= 0) {
            return _currentReader;
        }
        long startTimer = System.currentTimeMillis();
        while (_versionComparator.compare(minVersion, _version) > 0) {
            synchronized (readerOpenLock) {
                try {
                    readerOpenLock.wait(100);
                } catch (InterruptedException e) {
                    // ignore
                }
            }

            long now = System.currentTimeMillis();
            if (now - startTimer >= timeout) {
                throw new TimeoutException("timed-out, took: " + (now - startTimer) + " ms");
            }
        }

        return _currentReader;

    }

    @Override
    public void refresh() throws IOException {
        synchronized (this) {
            EsearchMultiReader<R> reader = null;
            if (_currentReader == null) {
                reader = openIndexReaderInternal();
            } else {
                reader = _currentReader.reopen();
                if (reader != _currentReader) {
                    DocIDMapper mapper = _idxMgr._docIDMapperFactory.getDocIDMapper(reader);
                    reader.setDocIDMapper(mapper);
                }
            }

            if (_currentReader != reader) {
                EsearchMultiReader<R> oldReader = _currentReader;
                _currentReader = reader;
                if (oldReader != null) {
                    oldReader.decEsearchRef();
                }
            }
            LongSet delDocs = _delDocs;
            clearDeletes();
            markDeletes(delDocs); // re-mark deletes
            commitDeletes();
        }
    }

    public int getSegmentCount() throws IOException {
        return _directory == null ? -1 : IndexUtil.getNumSegments(_directory);
    }
}