cn.hbu.cs.esearch.store.LuceneStore.java Source code

Java tutorial

Introduction

Here is the source code for cn.hbu.cs.esearch.store.LuceneStore.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package cn.hbu.cs.esearch.store;

import java.io.IOException;
import java.util.HashMap;
import java.util.List;

import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.Query;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.Version;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import cn.hbu.cs.esearch.core.EsearchMergePolicy;
import cn.hbu.cs.esearch.core.EsearchSegmentReader;
import cn.hbu.cs.esearch.document.AbstractEsearchIndexable;
import it.unimi.dsi.fastutil.longs.Long2IntRBTreeMap;

public class LuceneStore extends AbstractEsearchStore {

    private static final String VERSION_NAME = "version";
    public static final Logger LOGGER = LoggerFactory.getLogger(LuceneStore.class);

    private static class ReaderData {
        final IndexReader reader;
        final Long2IntRBTreeMap uidMap;
        final long minimalUID;
        final long maxiuamUID;

        ReaderData(IndexReader reader) throws IOException {
            this.reader = reader;
            long minUID = Long.MAX_VALUE;
            long maxUID = Long.MIN_VALUE;

            uidMap = new Long2IntRBTreeMap();
            uidMap.defaultReturnValue(-1);
            int maxDoc = reader.maxDoc();
            if (maxDoc == 0) {
                minimalUID = Long.MIN_VALUE;
                maxiuamUID = Long.MIN_VALUE;
                return;
            }

            List<AtomicReaderContext> leaves = reader.getContext().leaves();
            for (AtomicReaderContext context : leaves) {
                AtomicReader atomicReader = context.reader();
                NumericDocValues uidValues = atomicReader
                        .getNumericDocValues(AbstractEsearchIndexable.DOCUMENT_ID_PAYLOAD_FIELD);
                Bits liveDocs = atomicReader.getLiveDocs();
                for (int i = 0; i < atomicReader.maxDoc(); ++i) {
                    if (liveDocs == null || liveDocs.get(i)) {
                        long uid = uidValues.get(i);
                        if (uid < minUID) {
                            minUID = uid;
                        }
                        if (uid > maxUID) {
                            maxUID = uid;
                        }
                        uidMap.put(uid, i);
                    }
                }
            }
            minimalUID = minUID;
            maxiuamUID = maxUID;
        }

        void close() {
            if (this.reader != null) {
                try {
                    this.reader.close();
                } catch (IOException e) {
                    LOGGER.error(e.getMessage(), e);
                }
            }

        }
    }

    private final String field;
    private final Directory directory;
    private IndexWriter indexWriter;
    private volatile ReaderData currentReaderData;
    private volatile ReaderData oldReaderData;
    private volatile boolean closed = true;

    private LuceneStore(Directory dir, String field) throws IOException {
        this.field = field;
        indexWriter = null;
        directory = dir;
    }

    @Override
    public void open() throws IOException {
        if (closed) {
            IndexWriterConfig idxWriterConfig = new IndexWriterConfig(Version.LUCENE_43,
                    new StandardAnalyzer(Version.LUCENE_43));
            idxWriterConfig.setMergePolicy(new EsearchMergePolicy());
            idxWriterConfig.setOpenMode(OpenMode.CREATE_OR_APPEND);
            indexWriter = new IndexWriter(directory, idxWriterConfig);
            updateReader();
            closed = false;
        }
    }

    private void updateReader() throws IOException {

        IndexReader oldReader = null;

        if (currentReaderData != null) {
            oldReader = currentReaderData.reader;
        }

        IndexReader idxReader = DirectoryReader.open(indexWriter, true);

        // if reader did not change, no updates were applied, not need to refresh
        if (idxReader == oldReader) {
            return;
        }

        ReaderData readerData = new ReaderData(idxReader);
        currentReaderData = readerData;
        if (oldReaderData != null) {
            ReaderData tmpOld = oldReaderData;
            oldReaderData = currentReaderData;
            tmpOld.close();
        }
        currentReaderData = readerData;
    }

    public static EsearchStore openStore(Directory idxDir, String field, boolean compressionOff)
            throws IOException {
        LuceneStore store = new LuceneStore(idxDir, field);
        store.setDataCompressed(!compressionOff);
        store.open();
        return store;
    }

    private int mapDocId(long uid) {
        if (currentReaderData != null) {
            if (currentReaderData.maxiuamUID >= uid && currentReaderData.minimalUID <= uid) {
                return currentReaderData.uidMap.get(uid);
            }
        }
        return -1;
    }

    @Override
    protected void persist(long uid, byte[] data) throws IOException {
        Document doc = new Document();
        doc.add(new StoredField(field, data));
        EsearchSegmentReader.fillDocumentID(doc, uid);
        indexWriter.addDocument(doc);
    }

    @Override
    protected void persistDelete(long uid) throws IOException {
        final int docid = mapDocId(uid);
        if (docid < 0) {
            return;
        }

        Query deleteQ = new ConstantScoreQuery(new Filter() {

            @Override
            public DocIdSet getDocIdSet(AtomicReaderContext readerCtx, Bits acceptedDocs) throws IOException {
                return new DocIdSet() {

                    @Override
                    public DocIdSetIterator iterator() throws IOException {
                        return new DocIdSetIterator() {
                            int currId = -1;

                            @Override
                            public int nextDoc() throws IOException {
                                if (currId == -1) {
                                    currId = docid;
                                } else {
                                    currId = DocIdSetIterator.NO_MORE_DOCS;
                                }
                                return currId;
                            }

                            @Override
                            public int docID() {
                                return currId;
                            }

                            @Override
                            public int advance(int target) throws IOException {
                                if (currId != DocIdSetIterator.NO_MORE_DOCS) {
                                    if (target < docid) {
                                        currId = docid;
                                    } else {
                                        currId = DocIdSetIterator.NO_MORE_DOCS;
                                    }
                                }
                                return currId;
                            }

                            @Override
                            public long cost() {
                                // TODO Auto-generated method stub
                                return 0;
                            }
                        };
                    }

                };
            }

        });
        indexWriter.deleteDocuments(deleteQ);
        if (currentReaderData != null) {
            currentReaderData.uidMap.remove(uid);
        }

    }

    @Override
    protected BytesRef getFromStore(long uid) throws IOException {
        int docid = mapDocId(uid);
        if (docid < 0) {
            return null;
        }
        IndexReader reader = null;
        if (currentReaderData != null) {
            reader = currentReaderData.reader;
        }
        if (docid >= 0 && reader != null) {
            Document doc = reader.document(docid);
            if (doc != null) {
                return doc.getBinaryValue(field);
            }
        }
        return null;
    }

    @Override
    protected void commitVersion(String version) throws IOException {
        HashMap<String, String> versionMap = new HashMap<String, String>();
        versionMap.put(VERSION_NAME, version);
        indexWriter.setCommitData(versionMap);
        indexWriter.prepareCommit();
        indexWriter.commit();
        updateReader();
    }

    @Override
    public void close() throws IOException {
        if (!closed) {
            indexWriter.close();
            closed = true;
        }
    }
}