Java tutorial
/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package cn.hbu.cs.esearch.store; import java.io.IOException; import java.util.HashMap; import java.util.List; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.StoredField; import org.apache.lucene.index.AtomicReader; import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.index.NumericDocValues; import org.apache.lucene.search.ConstantScoreQuery; import org.apache.lucene.search.DocIdSet; import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.Filter; import org.apache.lucene.search.Query; import org.apache.lucene.store.Directory; import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.Version; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import cn.hbu.cs.esearch.core.EsearchMergePolicy; import cn.hbu.cs.esearch.core.EsearchSegmentReader; import cn.hbu.cs.esearch.document.AbstractEsearchIndexable; import it.unimi.dsi.fastutil.longs.Long2IntRBTreeMap; public class LuceneStore extends AbstractEsearchStore { private static final String VERSION_NAME = "version"; public static final Logger LOGGER = LoggerFactory.getLogger(LuceneStore.class); private static class ReaderData { final IndexReader reader; final Long2IntRBTreeMap uidMap; final long minimalUID; final long maxiuamUID; ReaderData(IndexReader reader) throws IOException { this.reader = reader; long minUID = Long.MAX_VALUE; long maxUID = Long.MIN_VALUE; uidMap = new Long2IntRBTreeMap(); uidMap.defaultReturnValue(-1); int maxDoc = reader.maxDoc(); if (maxDoc == 0) { minimalUID = Long.MIN_VALUE; maxiuamUID = Long.MIN_VALUE; return; } List<AtomicReaderContext> leaves = reader.getContext().leaves(); for (AtomicReaderContext context : leaves) { AtomicReader atomicReader = context.reader(); NumericDocValues uidValues = atomicReader .getNumericDocValues(AbstractEsearchIndexable.DOCUMENT_ID_PAYLOAD_FIELD); Bits liveDocs = atomicReader.getLiveDocs(); for (int i = 0; i < atomicReader.maxDoc(); ++i) { if (liveDocs == null || liveDocs.get(i)) { long uid = uidValues.get(i); if (uid < minUID) { minUID = uid; } if (uid > maxUID) { maxUID = uid; } uidMap.put(uid, i); } } } minimalUID = minUID; maxiuamUID = maxUID; } void close() { if (this.reader != null) { try { this.reader.close(); } catch (IOException e) { LOGGER.error(e.getMessage(), e); } } } } private final String field; private final Directory directory; private IndexWriter indexWriter; private volatile ReaderData currentReaderData; private volatile ReaderData oldReaderData; private volatile boolean closed = true; private LuceneStore(Directory dir, String field) throws IOException { this.field = field; indexWriter = null; directory = dir; } @Override public void open() throws IOException { if (closed) { IndexWriterConfig idxWriterConfig = new IndexWriterConfig(Version.LUCENE_43, new StandardAnalyzer(Version.LUCENE_43)); idxWriterConfig.setMergePolicy(new EsearchMergePolicy()); idxWriterConfig.setOpenMode(OpenMode.CREATE_OR_APPEND); indexWriter = new IndexWriter(directory, idxWriterConfig); updateReader(); closed = false; } } private void updateReader() throws IOException { IndexReader oldReader = null; if (currentReaderData != null) { oldReader = currentReaderData.reader; } IndexReader idxReader = DirectoryReader.open(indexWriter, true); // if reader did not change, no updates were applied, not need to refresh if (idxReader == oldReader) { return; } ReaderData readerData = new ReaderData(idxReader); currentReaderData = readerData; if (oldReaderData != null) { ReaderData tmpOld = oldReaderData; oldReaderData = currentReaderData; tmpOld.close(); } currentReaderData = readerData; } public static EsearchStore openStore(Directory idxDir, String field, boolean compressionOff) throws IOException { LuceneStore store = new LuceneStore(idxDir, field); store.setDataCompressed(!compressionOff); store.open(); return store; } private int mapDocId(long uid) { if (currentReaderData != null) { if (currentReaderData.maxiuamUID >= uid && currentReaderData.minimalUID <= uid) { return currentReaderData.uidMap.get(uid); } } return -1; } @Override protected void persist(long uid, byte[] data) throws IOException { Document doc = new Document(); doc.add(new StoredField(field, data)); EsearchSegmentReader.fillDocumentID(doc, uid); indexWriter.addDocument(doc); } @Override protected void persistDelete(long uid) throws IOException { final int docid = mapDocId(uid); if (docid < 0) { return; } Query deleteQ = new ConstantScoreQuery(new Filter() { @Override public DocIdSet getDocIdSet(AtomicReaderContext readerCtx, Bits acceptedDocs) throws IOException { return new DocIdSet() { @Override public DocIdSetIterator iterator() throws IOException { return new DocIdSetIterator() { int currId = -1; @Override public int nextDoc() throws IOException { if (currId == -1) { currId = docid; } else { currId = DocIdSetIterator.NO_MORE_DOCS; } return currId; } @Override public int docID() { return currId; } @Override public int advance(int target) throws IOException { if (currId != DocIdSetIterator.NO_MORE_DOCS) { if (target < docid) { currId = docid; } else { currId = DocIdSetIterator.NO_MORE_DOCS; } } return currId; } @Override public long cost() { // TODO Auto-generated method stub return 0; } }; } }; } }); indexWriter.deleteDocuments(deleteQ); if (currentReaderData != null) { currentReaderData.uidMap.remove(uid); } } @Override protected BytesRef getFromStore(long uid) throws IOException { int docid = mapDocId(uid); if (docid < 0) { return null; } IndexReader reader = null; if (currentReaderData != null) { reader = currentReaderData.reader; } if (docid >= 0 && reader != null) { Document doc = reader.document(docid); if (doc != null) { return doc.getBinaryValue(field); } } return null; } @Override protected void commitVersion(String version) throws IOException { HashMap<String, String> versionMap = new HashMap<String, String>(); versionMap.put(VERSION_NAME, version); indexWriter.setCommitData(versionMap); indexWriter.prepareCommit(); indexWriter.commit(); updateReader(); } @Override public void close() throws IOException { if (!closed) { indexWriter.close(); closed = true; } } }