Java tutorial
/* * Licensed to STRATIO (C) under one or more contributor license agreements. * See the NOTICE file distributed with this work for additional information * regarding copyright ownership. The STRATIO (C) licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package io.puntanegra.fhir.index.lucene; import java.nio.file.Path; import java.util.Set; import org.apache.cassandra.io.util.FileUtils; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.document.Document; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.Term; import org.apache.lucene.index.TieredMergePolicy; import org.apache.lucene.index.TrackingIndexWriter; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.ControlledRealTimeReopenThread; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.SearcherFactory; import org.apache.lucene.search.SearcherManager; import org.apache.lucene.search.Sort; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.store.NRTCachingDirectory; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import io.puntanegra.fhir.index.FhirIndexException; /** * @author Jorge L. Middleton {@literal <jorge.middleton@gmail.com>} * */ public class LuceneService { private static final Logger logger = LoggerFactory.getLogger(LuceneService.class); private Path path; private String name; private Directory directory; private IndexWriter indexWriter; private SearcherManager searcherManager; private ControlledRealTimeReopenThread<IndexSearcher> searcherReopener; // Disable max boolean query clauses limit static { BooleanQuery.setMaxClauseCount(Integer.MAX_VALUE); } public LuceneService() { } /** * Builds a new {@link FSIndex}. * * @param name * the index name * @param mbeanName * the JMX MBean object name * @param path * the directory path * @param analyzer * the index writer analyzer * @param refresh * the index reader refresh frequency in seconds * @param ramBufferMB * the index writer RAM buffer size in MB * @param maxMergeMB * the directory max merge size in MB * @param maxCachedMB * the directory max cache size in MB * @param refreshTask * action to be done during refresh */ public void init(String name, String mbeanName, Path path, Analyzer analyzer, double refresh, int ramBufferMB, int maxMergeMB, int maxCachedMB, Runnable refreshTask) { try { this.path = path; this.name = name; // Open or create directory FSDirectory fsDirectory = FSDirectory.open(path); this.directory = new NRTCachingDirectory(fsDirectory, maxMergeMB, maxCachedMB); // Setup index writer IndexWriterConfig indexWriterConfig = new IndexWriterConfig(analyzer); indexWriterConfig.setRAMBufferSizeMB(ramBufferMB); indexWriterConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); indexWriterConfig.setUseCompoundFile(true); indexWriterConfig.setMergePolicy(new TieredMergePolicy()); this.indexWriter = new IndexWriter(this.directory, indexWriterConfig); // Setup NRT search SearcherFactory searcherFactory = new SearcherFactory() { @Override public IndexSearcher newSearcher(IndexReader reader, IndexReader previousReader) { if (refreshTask != null) { refreshTask.run(); } IndexSearcher searcher = new IndexSearcher(reader); searcher.setSimilarity(new NoIDFSimilarity()); return searcher; } }; TrackingIndexWriter trackingWriter = new TrackingIndexWriter(this.indexWriter); this.searcherManager = new SearcherManager(this.indexWriter, true, searcherFactory); this.searcherReopener = new ControlledRealTimeReopenThread<>(trackingWriter, this.searcherManager, refresh, refresh); this.searcherReopener.start(); // Register JMX MBean // mbean = new ObjectName(mbeanName); // ManagementFactory.getPlatformMBeanServer().registerMBean(service, // this.mbean); } catch (Exception e) { throw new FhirIndexException(e, "Error while creating index %s", name); } } /** * Upserts the specified {@link Document} by first deleting the documents * containing {@code Term} and then adding the new document. The delete and * then add are atomic as seen by a reader on the same index (flush may * happen only after the add). * * @param term * the {@link Term} to identify the document(s) to be deleted * @param document * the {@link Document} to be added */ public void upsert(Term term, Document document) { logger.debug("Indexing {} with term {} in {}", document, term, name); try { indexWriter.updateDocument(term, document); } catch (Exception e) { throw new FhirIndexException(e, "Error indexing %s with term %s in %s", document, term, name); } } /** * Deletes all the {@link Document}s containing the specified {@link Term}. * * @param term * the {@link Term} identifying the documents to be deleted */ public void delete(Term term) { logger.debug("Deleting {} from {}", term, name); try { indexWriter.deleteDocuments(term); } catch (Exception e) { throw new FhirIndexException(e, "Error deleting %s from %s", term, name); } } /** * Deletes all the {@link Document}s satisfying the specified {@link Query}. * * @param query * the {@link Query} identifying the documents to be deleted */ public void delete(Query query) { logger.debug("Deleting {} from {}", query, name); try { indexWriter.deleteDocuments(query); } catch (Exception e) { throw new FhirIndexException(e, "Error deleting %s from %s", query, name); } } /** * Deletes all the {@link Document}s. */ public void truncate() { try { indexWriter.deleteAll(); } catch (Exception e) { throw new FhirIndexException(e, "Error truncating %s", name); } logger.info("Truncated {}", name); } /** * Commits the pending changes. */ public void commit() { try { indexWriter.commit(); } catch (Exception e) { throw new FhirIndexException(e, "Error committing %s", name); } logger.debug("Committed {}", name); } /** * Commits all changes to the index, waits for pending merges to complete, * and closes all associated resources. */ public void close() { try { searcherReopener.interrupt(); searcherManager.close(); indexWriter.close(); directory.close(); // ManagementFactory.getPlatformMBeanServer().unregisterMBean(mbean); } catch (Exception e) { throw new FhirIndexException(e, "Error closing %s", name); } logger.info("Closed {}", name); } /** * Closes the index and removes all its files. */ public void delete() { try { close(); } catch (Exception e) { throw new FhirIndexException(e, "Error deleting %s", name); } finally { FileUtils.deleteRecursive(path.toFile()); } logger.info("Deleted {}", name); } /** * Finds the top {@code count} hits for {@code query} and sorting the hits * by {@code sort}. * * @param query * the {@link Query} to search for * @param sort * the {@link Sort} to be applied * @param after * the starting {@link ScoreDoc} * @param count * the max number of results to be collected * @param fields * the names of the fields to be loaded * @return the found documents, sorted according to the supplied * {@link Sort} instance */ public LuceneDocumentIterator search(Query query, Sort sort, ScoreDoc after, Integer count, Set<String> fields) { logger.debug("Searching in {}\n" + "count: {}\n" + "after: {}\n" + "query: {}\n" + " sort: {}", name, count, after, query, sort); return new LuceneDocumentIterator(searcherManager, query, sort, after, count, fields); } /** * Returns the total number of {@link Document}s in this index. * * @return the number of {@link Document}s */ public long getNumDocs() { logger.debug("Getting {} num docs", name); try { IndexSearcher searcher = searcherManager.acquire(); try { return searcher.getIndexReader().numDocs(); } finally { searcherManager.release(searcher); } } catch (Exception e) { throw new FhirIndexException(e, "Error getting %s num docs", name); } } /** * Returns the total number of deleted {@link Document}s in this index. * * @return the number of deleted {@link Document}s */ public long getNumDeletedDocs() { logger.debug("Getting %s num deleted docs", name); try { IndexSearcher searcher = searcherManager.acquire(); try { return searcher.getIndexReader().numDeletedDocs(); } finally { searcherManager.release(searcher); } } catch (Exception e) { throw new FhirIndexException(e, "Error getting %s num docs", name); } } /** * Optimizes the index forcing merge segments leaving the specified number * of segments. This operation may block until all merging completes. * * @param maxNumSegments * the maximum number of segments left in the index after merging * finishes * @param doWait * {@code true} if the call should block until the operation * completes */ public void forceMerge(int maxNumSegments, boolean doWait) { logger.info("Merging {} segments to {}", name, maxNumSegments); try { indexWriter.forceMerge(maxNumSegments, doWait); indexWriter.commit(); } catch (Exception e) { throw new FhirIndexException(e, "Error merging %s segments to %s", name, maxNumSegments); } logger.info("Merged {} segments to {}", name, maxNumSegments); } /** * Optimizes the index forcing merge of all segments that have deleted * documents. This operation may block until all merging completes. * * @param doWait * {@code true} if the call should block until the operation * completes */ public void forceMergeDeletes(boolean doWait) { logger.info("Merging {} segments with deletions", name); try { indexWriter.forceMergeDeletes(doWait); indexWriter.commit(); } catch (Exception e) { throw new FhirIndexException(e, "Error merging %s segments with deletion", name); } logger.info("Merged {} segments with deletions", name); } /** * Refreshes the index readers. */ public void refresh() { logger.debug("Refreshing {} readers...", name); try { commit(); searcherManager.maybeRefreshBlocking(); } catch (Exception e) { throw new FhirIndexException(e, "Error refreshing %s readers", name); } logger.debug("Refreshed {} readers", name); } }