List of usage examples for org.apache.lucene.index IndexWriterConfig setMergePolicy
@Override
public IndexWriterConfig setMergePolicy(MergePolicy mergePolicy)
From source file:org.elasticsearch.test.unit.common.lucene.uid.VersionsTests.java
License:Apache License
@Test public void testMergingOldIndices() throws Exception { final IndexWriterConfig iwConf = new IndexWriterConfig(Lucene.VERSION, new KeywordAnalyzer()); iwConf.setMergePolicy(new IndexUpgraderMergePolicy(iwConf.getMergePolicy())); final Directory dir = newDirectory(); final IndexWriter iw = new IndexWriter(dir, iwConf); // 1st segment, no _version Document document = new Document(); // Add a dummy field (enough to trigger #3237) document.add(new StringField("a", "b", Store.NO)); StringField uid = new StringField(UidFieldMapper.NAME, "1", Store.YES); document.add(uid);// www .j a v a2 s. c om iw.addDocument(document); uid.setStringValue("2"); iw.addDocument(document); iw.commit(); // 2nd segment, old layout document = new Document(); UidField uidAndVersion = new UidField("3", 3L); document.add(uidAndVersion); iw.addDocument(document); uidAndVersion.uid = "4"; uidAndVersion.version = 4L; iw.addDocument(document); iw.commit(); // 3rd segment new layout document = new Document(); uid.setStringValue("5"); Field version = new NumericDocValuesField(UidFieldMapper.VERSION, 5L); document.add(uid); document.add(version); iw.addDocument(document); uid.setStringValue("6"); version.setLongValue(6L); iw.addDocument(document); iw.commit(); final Map<String, Long> expectedVersions = ImmutableMap.<String, Long>builder().put("1", 0L).put("2", 0L) .put("3", 0L).put("4", 4L).put("5", 5L).put("6", 6L).build(); // Force merge and check versions iw.forceMerge(1); final AtomicReader ir = SlowCompositeReaderWrapper.wrap(DirectoryReader.open(iw.getDirectory())); final NumericDocValues versions = ir.getNumericDocValues(UidFieldMapper.VERSION); assertThat(versions, notNullValue()); for (int i = 0; i < ir.maxDoc(); ++i) { final String uidValue = ir.document(i).get(UidFieldMapper.NAME); final long expectedVersion = expectedVersions.get(uidValue); assertThat(versions.get(i), equalTo(expectedVersion)); } iw.close(); assertThat(IndexWriter.isLocked(iw.getDirectory()), is(false)); ir.close(); dir.close(); }
From source file:org.elasticsearch.xpack.core.security.authz.accesscontrol.DocumentSubsetReaderTests.java
License:Open Source License
/** Same test as in FieldSubsetReaderTests, test that core cache key (needed for NRT) is working */ public void testCoreCacheKey() throws Exception { Directory dir = newDirectory();//from www . j a v a 2s. com IndexWriterConfig iwc = new IndexWriterConfig(null); iwc.setMaxBufferedDocs(100); iwc.setMergePolicy(NoMergePolicy.INSTANCE); IndexWriter iw = new IndexWriter(dir, iwc); // add two docs, id:0 and id:1 Document doc = new Document(); Field idField = new StringField("id", "", Field.Store.NO); doc.add(idField); idField.setStringValue("0"); iw.addDocument(doc); idField.setStringValue("1"); iw.addDocument(doc); // open reader DirectoryReader ir = ElasticsearchDirectoryReader.wrap(DirectoryReader.open(iw), new ShardId("_index", "_na_", 0)); ir = DocumentSubsetReader.wrap(ir, bitsetFilterCache, new MatchAllDocsQuery()); assertEquals(2, ir.numDocs()); assertEquals(1, ir.leaves().size()); // delete id:0 and reopen iw.deleteDocuments(new Term("id", "0")); DirectoryReader ir2 = DirectoryReader.openIfChanged(ir); // we should have the same cache key as before assertEquals(1, ir2.numDocs()); assertEquals(1, ir2.leaves().size()); assertSame(ir.leaves().get(0).reader().getCoreCacheHelper().getKey(), ir2.leaves().get(0).reader().getCoreCacheHelper().getKey()); // However we don't support caching on the reader cache key since we override deletes assertNull(ir.leaves().get(0).reader().getReaderCacheHelper()); assertNull(ir2.leaves().get(0).reader().getReaderCacheHelper()); TestUtil.checkReader(ir); IOUtils.close(ir, ir2, iw, dir); }
From source file:org.elasticsearch.xpack.core.security.authz.accesscontrol.FieldSubsetReaderTests.java
License:Open Source License
/** test that core cache key (needed for NRT) is working */ public void testCoreCacheKey() throws Exception { Directory dir = newDirectory();//w w w.j a v a 2s . co m IndexWriterConfig iwc = new IndexWriterConfig(null); iwc.setMaxBufferedDocs(100); iwc.setMergePolicy(NoMergePolicy.INSTANCE); IndexWriter iw = new IndexWriter(dir, iwc); // add two docs, id:0 and id:1 Document doc = new Document(); Field idField = new StringField("id", "", Field.Store.NO); doc.add(idField); idField.setStringValue("0"); iw.addDocument(doc); idField.setStringValue("1"); iw.addDocument(doc); // open reader DirectoryReader ir = FieldSubsetReader.wrap(DirectoryReader.open(iw), new CharacterRunAutomaton(Automata.makeString("id"))); assertEquals(2, ir.numDocs()); assertEquals(1, ir.leaves().size()); // delete id:0 and reopen iw.deleteDocuments(new Term("id", "0")); DirectoryReader ir2 = DirectoryReader.openIfChanged(ir); // we should have the same cache key as before assertEquals(1, ir2.numDocs()); assertEquals(1, ir2.leaves().size()); assertSame(ir.leaves().get(0).reader().getCoreCacheHelper().getKey(), ir2.leaves().get(0).reader().getCoreCacheHelper().getKey()); TestUtil.checkReader(ir); IOUtils.close(ir, ir2, iw, dir); }
From source file:org.elasticsearch.xpack.security.authz.accesscontrol.FieldDataCacheWithFieldSubsetReaderTests.java
License:Open Source License
@Before public void setup() throws Exception { IndexSettings indexSettings = createIndexSettings(); CircuitBreakerService circuitBreakerService = new NoneCircuitBreakerService(); String name = "_field"; indexFieldDataCache = new DummyAccountingFieldDataCache(); sortedSetDVOrdinalsIndexFieldData = new SortedSetDVOrdinalsIndexFieldData(indexSettings, indexFieldDataCache, name, circuitBreakerService, AbstractAtomicOrdinalsFieldData.DEFAULT_SCRIPT_FUNCTION); pagedBytesIndexFieldData = new PagedBytesIndexFieldData(indexSettings, name, indexFieldDataCache, circuitBreakerService, TextFieldMapper.Defaults.FIELDDATA_MIN_FREQUENCY, TextFieldMapper.Defaults.FIELDDATA_MAX_FREQUENCY, TextFieldMapper.Defaults.FIELDDATA_MIN_SEGMENT_SIZE); dir = newDirectory();/*from ww w .j ava 2 s.com*/ IndexWriterConfig iwc = new IndexWriterConfig(null); iwc.setMergePolicy(NoMergePolicy.INSTANCE); IndexWriter iw = new IndexWriter(dir, iwc); numDocs = scaledRandomIntBetween(32, 128); for (int i = 1; i <= numDocs; i++) { Document doc = new Document(); doc.add(new StringField("_field", String.valueOf(i), Field.Store.NO)); doc.add(new SortedSetDocValuesField("_field", new BytesRef(String.valueOf(i)))); iw.addDocument(doc); if (i % 24 == 0) { iw.commit(); } } iw.close(); ir = ElasticsearchDirectoryReader.wrap(DirectoryReader.open(dir), new ShardId(indexSettings.getIndex(), 0)); }
From source file:org.ihtsdo.otf.query.lucene.LuceneIndexer.java
License:Apache License
protected LuceneIndexer(String indexName) throws IOException { try {/*from w w w . jav a 2 s. co m*/ indexName_ = indexName; luceneWriterService = LookupService.getService(WorkExecutors.class).getExecutor(); luceneWriterFutureCheckerService = Executors.newFixedThreadPool(1, new NamedThreadFactory(indexName + " Lucene future checker", false)); Path searchFolder = LookupService.getService(ConfigurationService.class).getSearchFolderPath(); if (luceneRootFolder_.compareAndSet(null, new File(searchFolder.toFile(), DEFAULT_LUCENE_FOLDER))) { luceneRootFolder_.get().mkdirs(); } indexFolder_ = new File(luceneRootFolder_.get(), indexName); indexFolder_.mkdirs(); log.info("Index: " + indexFolder_.getAbsolutePath()); Directory indexDirectory = new MMapDirectory(indexFolder_); //switch over to MMapDirectory - in theory - this gives us back some //room on the JDK stack, letting the OS directly manage the caching of the index files - and more importantly, gives us a huge //performance boost during any operation that tries to do multi-threaded reads of the index (like the SOLOR rules processing) because //the default value of SimpleFSDirectory is a huge bottleneck. indexDirectory.clearLock("write.lock"); IndexWriterConfig config = new IndexWriterConfig(luceneVersion, new PerFieldAnalyzer()); config.setRAMBufferSizeMB(256); MergePolicy mergePolicy = new LogByteSizeMergePolicy(); config.setMergePolicy(mergePolicy); config.setSimilarity(new ShortTextSimilarity()); IndexWriter indexWriter = new IndexWriter(indexDirectory, config); trackingIndexWriter = new TrackingIndexWriter(indexWriter); boolean applyAllDeletes = false; searcherManager = new SearcherManager(indexWriter, applyAllDeletes, null); // [3]: Create the ControlledRealTimeReopenThread that reopens the index periodically taking into // account the changes made to the index and tracked by the TrackingIndexWriter instance // The index is refreshed every 60sc when nobody is waiting // and every 100 millis whenever is someone waiting (see search method) // (see http://lucene.apache.org/core/4_3_0/core/org/apache/lucene/search/NRTManagerReopenThread.html) reopenThread = new ControlledRealTimeReopenThread<>(trackingIndexWriter, searcherManager, 60.00, 0.1); this.startThread(); //Register for commits: log.info("Registering indexer " + getIndexerName() + " for commits"); Get.commitService().addChangeListener(new ChronologyChangeListener() { @Override public void handleCommit(CommitRecord commitRecord) { commitRecord.getSememesInCommit().stream().forEach(sememeId -> { handleChange(Get.sememeService().getSememe(sememeId)); }); } @Override public void handleChange(SememeChronology<? extends SememeVersion<?>> sc) { log.info("submitting sememe " + sc.toUserString() + " to indexer " + getIndexerName() + " due to commit"); index(sc); } @Override public void handleChange(ConceptChronology<? extends StampedVersion> cc) { // noop } @Override public UUID getListenerUuid() { return UuidT5Generator.get(getIndexerName()); } }); } catch (Exception e) { LookupService.getService(SystemStatusService.class).notifyServiceConfigurationFailure(indexName, e); throw e; } }
From source file:org.meresco.lucene.numerate.UriEnumerate.java
License:Open Source License
/** * * @param path//ww w.ja va 2s. c o m * @param max_cache_size * @param withTransactionLog allows for crash recovery, but slows down UriNumerate considerably because of file system flush. * @throws IOException */ public UriEnumerate(String path, int max_cache_size, boolean withTransactionLog) throws IOException { IndexWriterConfig config = new IndexWriterConfig(null); ConcurrentMergeScheduler ms = (ConcurrentMergeScheduler) config.getMergeScheduler(); ms.setDefaultMaxMergesAndThreads(/* spins= */false); LogDocMergePolicy mp = new LogDocMergePolicy(); mp.setMergeFactor(2); mp.setMinMergeDocs(max_cache_size); config.setMergePolicy(mp); config.setCodec(new Lucene60Codec() { @Override public PostingsFormat getPostingsFormatForField(String field) { return new BloomFilteringPostingsFormat(super.getPostingsFormatForField(field)); } }); config.setUseCompoundFile(false); this.writer = new IndexWriter(FSDirectory.open(FileSystems.getDefault().getPath(path)), config); this.next_ord = writer.numDocs() + 1; this.searcher = new SimpleSearcher(this.writer); this.cache = new Cache(max_cache_size, () -> this.commit()); this.transactionLog = new TransactionLog(withTransactionLog ? path + "/transactionLog" : null); this.transactionLog.maybeRecover(); }
From source file:org.neo4j.kernel.api.impl.index.IndexWriterConfigs.java
License:Open Source License
public static IndexWriterConfig standard() { IndexWriterConfig writerConfig = new IndexWriterConfig(LuceneDataSource.KEYWORD_ANALYZER); writerConfig.setMaxBufferedDocs(MAX_BUFFERED_DOCS); writerConfig.setIndexDeletionPolicy(new MultipleBackupDeletionPolicy()); writerConfig.setUseCompoundFile(true); writerConfig.setCodec(new Lucene54Codec() { @Override/*from ww w. j a va 2s. c o m*/ public PostingsFormat getPostingsFormatForField(String field) { PostingsFormat postingFormat = super.getPostingsFormatForField(field); return CODEC_BLOCK_TREE_ORDS_POSTING_FORMAT ? blockTreeOrdsPostingsFormat : postingFormat; } }); LogByteSizeMergePolicy mergePolicy = new LogByteSizeMergePolicy(); mergePolicy.setNoCFSRatio(MERGE_POLICY_NO_CFS_RATIO); mergePolicy.setMinMergeMB(MERGE_POLICY_MIN_MERGE_MB); mergePolicy.setMergeFactor(MERGE_POLICY_MERGE_FACTOR); writerConfig.setMergePolicy(mergePolicy); return writerConfig; }
From source file:org.neo4j.kernel.api.impl.index.IndexWriterFactories.java
License:Open Source License
public static LuceneIndexWriterFactory standard() { return new LuceneIndexWriterFactory() { @Override/* www .ja v a 2 s .co m*/ public IndexWriter create(Directory directory) throws IOException { IndexWriterConfig writerConfig = new IndexWriterConfig(Version.LUCENE_36, LuceneDataSource.KEYWORD_ANALYZER); writerConfig.setMaxBufferedDocs(100000); // TODO figure out depending on environment? writerConfig.setIndexDeletionPolicy(new MultipleBackupDeletionPolicy()); writerConfig.setTermIndexInterval(14); LogByteSizeMergePolicy mergePolicy = new LogByteSizeMergePolicy(); mergePolicy.setUseCompoundFile(true); mergePolicy.setNoCFSRatio(1.0); mergePolicy.setMinMergeMB(0.1); mergePolicy.setMergeFactor(2); writerConfig.setMergePolicy(mergePolicy); return new IndexWriter(directory, writerConfig); } }; }
From source file:org.olat.search.service.indexer.JmsIndexer.java
License:Apache License
public IndexWriterConfig newIndexWriterConfig() { Analyzer analyzer = new StandardAnalyzer(SearchService.OO_LUCENE_VERSION); IndexWriterConfig indexWriterConfig = new IndexWriterConfig(SearchService.OO_LUCENE_VERSION, analyzer); indexWriterConfig.setMergePolicy(newLogMergePolicy()); indexWriterConfig.setRAMBufferSizeMB(ramBufferSizeMB);// for better performance set to 48MB (see lucene docu 'how to make indexing faster") indexWriterConfig.setOpenMode(OpenMode.CREATE_OR_APPEND); return indexWriterConfig; }
From source file:org.punksearch.crawler.IndexOperator.java
License:Open Source License
/** * @param dir to open writer for/* w w w .j a va 2 s .c o m*/ * @param segments less segments number - slower indexing but faster search and vice versa, 0 - means default * @return new index writer * @throws IOException */ private static IndexWriter createIndexWriter(String dir, int segments) throws IOException { final IndexWriterConfig indexWriterConfig = new IndexWriterConfig(LuceneVersion.VERSION, analyzer); if (segments > 0) { indexWriterConfig.setMergePolicy( new TieredMergePolicy().setSegmentsPerTier(segments).setMaxMergeAtOnce(segments)); } return new IndexWriter(FSDirectory.open(new File(dir)), indexWriterConfig); }