List of usage examples for org.apache.lucene.index IndexWriterConfig setMergePolicy
@Override
public IndexWriterConfig setMergePolicy(MergePolicy mergePolicy)
From source file:com.lucid.solr.sidecar.SidecarIndexReaderFactory.java
License:Apache License
DirectoryReader buildParallelReader(DirectoryReader main, SolrIndexSearcher source, boolean rebuild) { try {/* ww w. jav a 2s. c o m*/ if (source == null) { throw new Exception("Source collection is missing."); } // create as a sibling path of the main index Directory d = main.directory(); File primaryDir = null; if (d instanceof FSDirectory) { String path = ((FSDirectory) d).getDirectory().getPath(); primaryDir = new File(path); sidecarIndex = new File(primaryDir.getParentFile(), sidecarIndexLocation); } else { String secondaryPath = System.getProperty("java.io.tmpdir") + File.separator + sidecarIndexLocation + "-" + System.currentTimeMillis(); sidecarIndex = new File(secondaryPath); } // create a new tmp dir for the secondary indexes File secondaryIndex = new File(sidecarIndex, System.currentTimeMillis() + "-index"); if (rebuild) { safeDelete(sidecarIndex); } parallelFields.addAll(source.getFieldNames()); parallelFields.remove("id"); LOG.debug("building a new index"); Directory dir = FSDirectory.open(secondaryIndex); if (IndexWriter.isLocked(dir)) { // try forcing unlock try { IndexWriter.unlock(dir); } catch (Exception e) { LOG.warn("Failed to unlock " + secondaryIndex); } } int[] mergeTargets; AtomicReader[] subReaders = SidecarIndexReader.getSequentialSubReaders(main); if (subReaders == null || subReaders.length == 0) { mergeTargets = new int[] { main.maxDoc() }; } else { mergeTargets = new int[subReaders.length]; for (int i = 0; i < subReaders.length; i++) { mergeTargets[i] = subReaders[i].maxDoc(); } } Version ver = currentCore.getLatestSchema().getDefaultLuceneMatchVersion(); IndexWriterConfig cfg = new IndexWriterConfig(ver, currentCore.getLatestSchema().getAnalyzer()); //cfg.setInfoStream(System.err); cfg.setMergeScheduler(new SerialMergeScheduler()); cfg.setMergePolicy(new SidecarMergePolicy(mergeTargets, false)); IndexWriter iw = new IndexWriter(dir, cfg); LOG.info("processing " + main.maxDoc() + " docs / " + main.numDeletedDocs() + " dels in main index"); int boostedDocs = 0; Bits live = MultiFields.getLiveDocs(main); int targetPos = 0; int nextTarget = mergeTargets[targetPos]; BytesRef idRef = new BytesRef(); for (int i = 0; i < main.maxDoc(); i++) { if (i == nextTarget) { iw.commit(); nextTarget = nextTarget + mergeTargets[++targetPos]; } if (live != null && !live.get(i)) { addDummy(iw); // this is required to preserve doc numbers. continue; } else { DocumentStoredFieldVisitor visitor = new DocumentStoredFieldVisitor(docIdField); main.document(i, visitor); Document doc = visitor.getDocument(); // get docId String id = doc.get(docIdField); if (id == null) { LOG.debug("missing id, docNo=" + i); addDummy(iw); continue; } else { // find the data, if any doc = lookup(source, id, idRef, parallelFields); if (doc == null) { LOG.debug("missing boost data, docId=" + id); addDummy(iw); continue; } else { LOG.debug("adding boost data, docId=" + id + ", b=" + doc); iw.addDocument(doc); boostedDocs++; } } } } iw.close(); DirectoryReader other = DirectoryReader.open(dir); LOG.info("SidecarIndexReader with " + boostedDocs + " boosted documents."); SidecarIndexReader pr = createSidecarIndexReader(main, other, sourceCollection, secondaryIndex); return pr; } catch (Exception e) { LOG.warn("Unable to build parallel index: " + e.toString(), e); LOG.warn("Proceeding with single main index."); try { return new SidecarIndexReader(this, main, null, SidecarIndexReader.getSequentialSubReaders(main), sourceCollection, null); } catch (Exception e1) { LOG.warn("Unexpected exception, returning single main index", e1); return main; } } }
From source file:com.qwazr.search.bench.LuceneCommonIndex.java
License:Apache License
LuceneCommonIndex(final Path rootDirectory, final String schemaName, final String indexName, final double ramBufferSize, final boolean useCompoundFile) throws IOException { final Path schemaDirectory = Files.createDirectory(rootDirectory.resolve(schemaName)); this.indexDirectory = Files.createDirectory(schemaDirectory.resolve(indexName)); this.luceneDirectory = indexDirectory.resolve("data"); this.dataDirectory = FSDirectory.open(luceneDirectory); final IndexWriterConfig indexWriterConfig = new IndexWriterConfig( new PerFieldAnalyzerWrapper(new StandardAnalyzer())); indexWriterConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); indexWriterConfig.setRAMBufferSizeMB(ramBufferSize); final ConcurrentMergeScheduler mergeScheduler = new ConcurrentMergeScheduler(); mergeScheduler.setMaxMergesAndThreads(MAX_SSD_MERGE_THREADS, MAX_SSD_MERGE_THREADS); indexWriterConfig.setMergeScheduler(mergeScheduler); indexWriterConfig.setUseCompoundFile(useCompoundFile); final TieredMergePolicy mergePolicy = new TieredMergePolicy(); indexWriterConfig.setMergePolicy(mergePolicy); // We use snapshots deletion policy final SnapshotDeletionPolicy snapshotDeletionPolicy = new SnapshotDeletionPolicy( indexWriterConfig.getIndexDeletionPolicy()); indexWriterConfig.setIndexDeletionPolicy(snapshotDeletionPolicy); this.indexWriter = new IndexWriter(this.dataDirectory, indexWriterConfig); this.localReplicator = new LocalReplicator(); }
From source file:com.rocana.lucene.codec.v1.TestRocanaPerFieldPostingsFormat2.java
License:Apache License
private IndexWriter newWriter(Directory dir, IndexWriterConfig conf) throws IOException { LogDocMergePolicy logByteSizeMergePolicy = new LogDocMergePolicy(); logByteSizeMergePolicy.setNoCFSRatio(0.0); // make sure we use plain // files//from ww w .j a v a 2 s . c o m conf.setMergePolicy(logByteSizeMergePolicy); final IndexWriter writer = new IndexWriter(dir, conf); return writer; }
From source file:com.stratio.cassandra.index.LuceneIndex.java
License:Apache License
/** * Initializes this using the specified {@link Sort} for trying to keep the {@link Document}s sorted. * * @param sort The {@link Sort} to be used. *//*from w ww . j a va 2 s . c o m*/ public void init(Sort sort) { Log.debug("Initializing index"); try { this.sort = sort; // Get directory file file = new File(path); // Open or create directory FSDirectory fsDirectory = FSDirectory.open(file); directory = new NRTCachingDirectory(fsDirectory, maxMergeMB, maxCachedMB); // Setup index writer IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_48, analyzer); config.setRAMBufferSizeMB(ramBufferMB); config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); config.setUseCompoundFile(true); config.setMergePolicy(new SortingMergePolicy(config.getMergePolicy(), sort)); indexWriter = new IndexWriter(directory, config); // Setup NRT search SearcherFactory searcherFactory = new SearcherFactory() { public IndexSearcher newSearcher(IndexReader reader) throws IOException { IndexSearcher searcher = new IndexSearcher(reader); searcher.setSimilarity(new NoIDFSimilarity()); return searcher; } }; TrackingIndexWriter trackingIndexWriter = new TrackingIndexWriter(indexWriter); searcherManager = new SearcherManager(indexWriter, true, searcherFactory); searcherReopener = new ControlledRealTimeReopenThread<>(trackingIndexWriter, searcherManager, refreshSeconds, refreshSeconds); searcherReopener.start(); // Start the refresher thread } catch (IOException e) { Log.error(e, "Error while initializing index"); throw new RuntimeException(e); } }
From source file:com.stratio.cassandra.lucene.index.FSIndex.java
License:Apache License
/** * Builds a new {@link FSIndex}./*from www .j av a 2s.c om*/ * * @param name the index name * @param mbeanName the JMX MBean object name * @param path the directory path * @param analyzer the index writer analyzer * @param refresh the index reader refresh frequency in seconds * @param ramBufferMB the index writer RAM buffer size in MB * @param maxMergeMB the directory max merge size in MB * @param maxCachedMB the directory max cache size in MB * @param refreshTask action to be done during refresh */ public FSIndex(String name, String mbeanName, Path path, Analyzer analyzer, double refresh, int ramBufferMB, int maxMergeMB, int maxCachedMB, Runnable refreshTask) { try { this.path = path; this.name = name; // Open or create directory FSDirectory fsDirectory = FSDirectory.open(path); directory = new NRTCachingDirectory(fsDirectory, maxMergeMB, maxCachedMB); // Setup index writer IndexWriterConfig indexWriterConfig = new IndexWriterConfig(analyzer); indexWriterConfig.setRAMBufferSizeMB(ramBufferMB); indexWriterConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); indexWriterConfig.setUseCompoundFile(true); indexWriterConfig.setMergePolicy(new TieredMergePolicy()); indexWriter = new IndexWriter(directory, indexWriterConfig); // Setup NRT search SearcherFactory searcherFactory = new SearcherFactory() { @Override public IndexSearcher newSearcher(IndexReader reader, IndexReader previousReader) { if (refreshTask != null) { refreshTask.run(); } IndexSearcher searcher = new IndexSearcher(reader); searcher.setSimilarity(new NoIDFSimilarity()); return searcher; } }; TrackingIndexWriter trackingWriter = new TrackingIndexWriter(indexWriter); searcherManager = new SearcherManager(indexWriter, true, searcherFactory); searcherReopener = new ControlledRealTimeReopenThread<>(trackingWriter, searcherManager, refresh, refresh); searcherReopener.start(); // Register JMX MBean mbean = new ObjectName(mbeanName); ManagementFactory.getPlatformMBeanServer().registerMBean(this, this.mbean); } catch (Exception e) { throw new IndexException(logger, e, "Error while creating index %s", name); } }
From source file:com.stratio.cassandra.lucene.service.LuceneIndex.java
License:Apache License
/** * Builds a new {@code RowDirectory} using the specified directory path and analyzer. * * @param keyspace The keyspace name. * @param table The table name. * @param name The index name. * @param path The path of the directory in where the Lucene files will be stored. * @param ramBufferMB The index writer buffer size in MB. * @param maxMergeMB NRTCachingDirectory max merge size in MB. * @param maxCachedMB NRTCachingDirectory max cached MB. * @param analyzer The default {@link Analyzer}. * @param refreshSeconds The index readers refresh time in seconds. Writings are not visible until this time. * @param refreshCallback A runnable to be run on index refresh. * @throws IOException If Lucene throws IO errors. *//* ww w. java 2 s. co m*/ public LuceneIndex(String keyspace, String table, String name, Path path, Integer ramBufferMB, Integer maxMergeMB, Integer maxCachedMB, Analyzer analyzer, Double refreshSeconds, Runnable refreshCallback) throws IOException { this.path = path; this.refreshCallback = refreshCallback; this.logName = String.format("Lucene index %s.%s.%s", keyspace, table, name); // Open or create directory FSDirectory fsDirectory = FSDirectory.open(path); directory = new NRTCachingDirectory(fsDirectory, maxMergeMB, maxCachedMB); // Setup index writer IndexWriterConfig config = new IndexWriterConfig(analyzer); config.setRAMBufferSizeMB(ramBufferMB); config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); config.setUseCompoundFile(true); config.setMergePolicy(new TieredMergePolicy()); indexWriter = new IndexWriter(directory, config); // Setup NRT search SearcherFactory searcherFactory = new SearcherFactory() { public IndexSearcher newSearcher(IndexReader reader) throws IOException { LuceneIndex.this.refreshCallBack(); IndexSearcher searcher = new IndexSearcher(reader); searcher.setSimilarity(new NoIDFSimilarity()); return searcher; } }; TrackingIndexWriter trackingIndexWriter = new TrackingIndexWriter(indexWriter); searcherManager = new SearcherManager(indexWriter, true, searcherFactory); searcherReopener = new ControlledRealTimeReopenThread<>(trackingIndexWriter, searcherManager, refreshSeconds, refreshSeconds); searcherReopener.start(); // Start the refresher thread // Register JMX MBean try { objectName = new ObjectName( String.format("com.stratio.cassandra.lucene:type=LuceneIndexes,keyspace=%s,table=%s,index=%s", keyspace, table, name)); ManagementFactory.getPlatformMBeanServer().registerMBean(this, objectName); } catch (MBeanException | OperationsException e) { Log.error(e, "Error while registering MBean"); } }
From source file:com.zimbra.cs.index.LuceneIndex.java
License:Open Source License
private IndexWriterConfig getWriterConfig() { IndexWriterConfig config = new IndexWriterConfig(VERSION, mailbox.index.getAnalyzer()); config.setMergeScheduler(new MergeScheduler()); config.setMaxBufferedDocs(LC.zimbra_index_lucene_max_buffered_docs.intValue()); config.setRAMBufferSizeMB(LC.zimbra_index_lucene_ram_buffer_size_kb.intValue() / 1024.0); if (LC.zimbra_index_lucene_merge_policy.booleanValue()) { LogDocMergePolicy policy = new LogDocMergePolicy(); config.setMergePolicy(policy); policy.setUseCompoundFile(LC.zimbra_index_lucene_use_compound_file.booleanValue()); policy.setMergeFactor(LC.zimbra_index_lucene_merge_factor.intValue()); policy.setMinMergeDocs(LC.zimbra_index_lucene_min_merge.intValue()); if (LC.zimbra_index_lucene_max_merge.intValue() != Integer.MAX_VALUE) { policy.setMaxMergeDocs(LC.zimbra_index_lucene_max_merge.intValue()); }/* www .j a v a2 s .c om*/ } else { LogByteSizeMergePolicy policy = new LogByteSizeMergePolicy(); config.setMergePolicy(policy); policy.setUseCompoundFile(LC.zimbra_index_lucene_use_compound_file.booleanValue()); policy.setMergeFactor(LC.zimbra_index_lucene_merge_factor.intValue()); policy.setMinMergeMB(LC.zimbra_index_lucene_min_merge.intValue() / 1024.0); if (LC.zimbra_index_lucene_max_merge.intValue() != Integer.MAX_VALUE) { policy.setMaxMergeMB(LC.zimbra_index_lucene_max_merge.intValue() / 1024.0); } } return config; }
From source file:de.csw.linkgenerator.plugin.lucene.IndexUpdater.java
License:Open Source License
private void openWriter(OpenMode openMode) { if (writer != null) { LOG.error("Writer already open and createWriter called"); return;/*from www. ja v a 2 s .com*/ } try { // fix for windows by Daniel Cortes: // FSDirectory f = FSDirectory.getDirectory(indexDir); IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_40, analyzer); conf.setOpenMode(openMode); // Ralph: This is kind of guesswork LogDocMergePolicy mergePolicy = new LogDocMergePolicy(); mergePolicy.setUseCompoundFile(true); conf.setMergePolicy(mergePolicy); // writer = new IndexWriter (indexDir, analyzer, create); writer = new IndexWriter(indexDir, conf); // writer.setUseCompoundFile(true); if (LOG.isDebugEnabled()) { LOG.debug("successfully opened index writer : " + indexDir); } } catch (IOException e) { LOG.error("IOException when opening Lucene Index for writing at " + indexDir, e); } }
From source file:de.jetsli.lumeo.LucPerfTest.java
License:Apache License
public void testPerf() { new PerfRunner(1000000, 26f) { @Override/*from w w w.j a v a2 s. c o m*/ public void reinit() throws Exception { super.reinit(); if (nrtManager != null) { nrtManager.close(); reopenThread.close(); writer.waitForMerges(); writer.close(); dir.close(); } Helper.deleteDir(file); docs = 0; IndexWriterConfig cfg = new IndexWriterConfig(version, keyAna); cfg.setRAMBufferSizeMB(128); // cfg.setCodec(new Lucene40Codec() { // // @Override public PostingsFormat getPostingsFormatForField(String field) { // if ("_id".equals(field)) // return new Pulsing40PostingsFormat(); // else // return new Lucene40PostingsFormat(); // } // }); LogByteSizeMergePolicy mp = new LogByteSizeMergePolicy(); mp.setUseCompoundFile(false); cfg.setMergePolicy(mp); dir = FSDirectory.open(file); cfg.setOpenMode(IndexWriterConfig.OpenMode.CREATE); writer = new IndexWriter(dir, cfg); nrtManager = new NRTManager(writer, new SearcherWarmer() { @Override public void warm(IndexSearcher s) throws IOException { // TODO get some random vertices via getVertices? } }); int priority = Math.min(Thread.currentThread().getPriority() + 2, Thread.MAX_PRIORITY); reopenThread = new NRTManagerReopenThread(nrtManager, 5.0, 0.03); reopenThread.setName("NRT Reopen Thread"); reopenThread.setPriority(priority); reopenThread.setDaemon(true); reopenThread.start(); } final BytesRef bytes = new BytesRef(); @Override public void innerRun(int trial, int i) { long id = i; Document newDoc = new Document(); NumericField idField = new NumericField("_id", 6, NumericField.TYPE_STORED).setLongValue(id); Field uIdField = new Field("_uid", "" + id, StringField.TYPE_STORED); Field typeField = new Field("_type", "test", StringField.TYPE_STORED); newDoc.add(idField); newDoc.add(uIdField); newDoc.add(typeField); // Analyzer ana = anas.get(newDoc.get("_type")); try { NumericUtils.longToPrefixCoded(id, 0, bytes); latestGen = nrtManager.updateDocument(new Term("_id", bytes), newDoc, keyAna); docs++; } catch (IOException ex) { logger.error("Cannot update " + i, ex); } } @Override protected void finalAssert() throws Exception { // logger.info("wait for " + latestGen + ", current:" + nrtManager.getCurrentSearchingGen(true)); nrtManager.waitForGeneration(latestGen, true); // writer.commit(); // writer.waitForMerges(); SearcherManager mng = nrtManager.getSearcherManager(true); // mng.maybeReopen(); IndexSearcher searcher = mng.acquire(); try { TotalHitCountCollector coll = new TotalHitCountCollector(); searcher.search(new MatchAllDocsQuery(), coll); long total = coll.getTotalHits(); if (docs != total) throw new IllegalStateException(total + " vs. " + docs); } finally { nrtManager.getSearcherManager(true).release(searcher); } } }.run(); }
From source file:de.jetsli.lumeo.RawLucene.java
License:Apache License
public RawLucene init() { indexLock();// w w w.j a v a 2 s . c o m try { if (closed) throw new IllegalStateException("Already closed"); if (writer != null) throw new IllegalStateException("Already initialized"); // release locks when started if (IndexWriter.isLocked(dir)) { logger.warn("index is locked + " + name + " -> releasing lock"); IndexWriter.unlock(dir); } IndexWriterConfig cfg = new IndexWriterConfig(VERSION, defaultMapping.getCombinedAnalyzer()); LogByteSizeMergePolicy mp = new LogByteSizeMergePolicy(); mp.setMaxMergeMB(getMaxMergeMB()); cfg.setRAMBufferSizeMB(ramBufferSizeMB); cfg.setTermIndexInterval(termIndexIntervalSize); cfg.setMergePolicy(mp); // TODO specify different formats for id fields etc // -> this breaks 16 of our tests!? Lucene Bug? // cfg.setCodec(new Lucene40Codec() { // // @Override public PostingsFormat getPostingsFormatForField(String field) { // return new Pulsing40PostingsFormat(); // } // }); // cfg.setMaxThreadStates(8); boolean create = !DirectoryReader.indexExists(dir); cfg.setOpenMode(create ? IndexWriterConfig.OpenMode.CREATE : IndexWriterConfig.OpenMode.APPEND); //wrap the writer with a tracking index writer writer = new TrackingIndexWriter(new IndexWriter(dir, cfg)); nrtManager = new NRTManager(writer, new SearcherFactory() { // @Override // public IndexSearcher newSearcher(IndexReader reader) throws IOException { // //TODO do some kind of warming here? // return new IndexSearcher(reader); // } }); getCurrentRTCache(latestGen); int priority = Math.min(Thread.currentThread().getPriority() + 2, Thread.MAX_PRIORITY); flushThread = new FlushThread("flush-thread"); flushThread.setPriority(priority); flushThread.setDaemon(true); flushThread.start(); reopenThread = new NRTManagerReopenThread(nrtManager, ordinaryWaiting, incomingSearchesMaximumWaiting); reopenThread.setName("NRT Reopen Thread"); reopenThread.setPriority(priority); reopenThread.setDaemon(true); reopenThread.start(); return this; } catch (Exception e) { throw new RuntimeException(e); } finally { indexUnlock(); } }