List of usage examples for org.apache.lucene.index IndexWriterConfig setMergePolicy
@Override
public IndexWriterConfig setMergePolicy(MergePolicy mergePolicy)
From source file:org.elasticsearch.common.lucene.LuceneTests.java
License:Apache License
public void testPruneUnreferencedFiles() throws IOException { MockDirectoryWrapper dir = newMockDirectory(); dir.setEnableVirusScanner(false);/*from w w w .j a v a2 s. c o m*/ IndexWriterConfig iwc = newIndexWriterConfig(); iwc.setIndexDeletionPolicy(NoDeletionPolicy.INSTANCE); iwc.setMergePolicy(NoMergePolicy.INSTANCE); iwc.setMaxBufferedDocs(2); IndexWriter writer = new IndexWriter(dir, iwc); Document doc = new Document(); doc.add(new TextField("id", "1", random().nextBoolean() ? Field.Store.YES : Field.Store.NO)); writer.addDocument(doc); writer.commit(); doc = new Document(); doc.add(new TextField("id", "2", random().nextBoolean() ? Field.Store.YES : Field.Store.NO)); writer.addDocument(doc); doc = new Document(); doc.add(new TextField("id", "3", random().nextBoolean() ? Field.Store.YES : Field.Store.NO)); writer.addDocument(doc); writer.commit(); SegmentInfos segmentCommitInfos = Lucene.readSegmentInfos(dir); doc = new Document(); doc.add(new TextField("id", "4", random().nextBoolean() ? Field.Store.YES : Field.Store.NO)); writer.addDocument(doc); writer.deleteDocuments(new Term("id", "2")); writer.commit(); DirectoryReader open = DirectoryReader.open(writer, true); assertEquals(3, open.numDocs()); assertEquals(1, open.numDeletedDocs()); assertEquals(4, open.maxDoc()); open.close(); writer.close(); SegmentInfos si = Lucene.pruneUnreferencedFiles(segmentCommitInfos.getSegmentsFileName(), dir); assertEquals(si.getSegmentsFileName(), segmentCommitInfos.getSegmentsFileName()); open = DirectoryReader.open(dir); assertEquals(3, open.numDocs()); assertEquals(0, open.numDeletedDocs()); assertEquals(3, open.maxDoc()); IndexSearcher s = new IndexSearcher(open); assertEquals(s.search(new TermQuery(new Term("id", "1")), 1).totalHits, 1); assertEquals(s.search(new TermQuery(new Term("id", "2")), 1).totalHits, 1); assertEquals(s.search(new TermQuery(new Term("id", "3")), 1).totalHits, 1); assertEquals(s.search(new TermQuery(new Term("id", "4")), 1).totalHits, 0); for (String file : dir.listAll()) { assertFalse("unexpected file: " + file, file.equals("segments_3") || file.startsWith("_2")); } open.close(); dir.close(); }
From source file:org.elasticsearch.common.lucene.LuceneTests.java
License:Apache License
public void testFiles() throws IOException { MockDirectoryWrapper dir = newMockDirectory(); dir.setEnableVirusScanner(false);// ww w .j a v a 2s . c om IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random())); iwc.setMergePolicy(NoMergePolicy.INSTANCE); iwc.setMaxBufferedDocs(2); iwc.setUseCompoundFile(true); IndexWriter writer = new IndexWriter(dir, iwc); Document doc = new Document(); doc.add(new TextField("id", "1", random().nextBoolean() ? Field.Store.YES : Field.Store.NO)); writer.addDocument(doc); writer.commit(); Set<String> files = new HashSet<>(); for (String f : Lucene.files(Lucene.readSegmentInfos(dir))) { files.add(f); } final boolean simpleTextCFS = files.contains("_0.scf"); assertTrue(files.toString(), files.contains("segments_1")); if (simpleTextCFS) { assertFalse(files.toString(), files.contains("_0.cfs")); assertFalse(files.toString(), files.contains("_0.cfe")); } else { assertTrue(files.toString(), files.contains("_0.cfs")); assertTrue(files.toString(), files.contains("_0.cfe")); } assertTrue(files.toString(), files.contains("_0.si")); doc = new Document(); doc.add(new TextField("id", "2", random().nextBoolean() ? Field.Store.YES : Field.Store.NO)); writer.addDocument(doc); doc = new Document(); doc.add(new TextField("id", "3", random().nextBoolean() ? Field.Store.YES : Field.Store.NO)); writer.addDocument(doc); writer.commit(); files.clear(); for (String f : Lucene.files(Lucene.readSegmentInfos(dir))) { files.add(f); } assertFalse(files.toString(), files.contains("segments_1")); assertTrue(files.toString(), files.contains("segments_2")); if (simpleTextCFS) { assertFalse(files.toString(), files.contains("_0.cfs")); assertFalse(files.toString(), files.contains("_0.cfe")); } else { assertTrue(files.toString(), files.contains("_0.cfs")); assertTrue(files.toString(), files.contains("_0.cfe")); } assertTrue(files.toString(), files.contains("_0.si")); if (simpleTextCFS) { assertFalse(files.toString(), files.contains("_1.cfs")); assertFalse(files.toString(), files.contains("_1.cfe")); } else { assertTrue(files.toString(), files.contains("_1.cfs")); assertTrue(files.toString(), files.contains("_1.cfe")); } assertTrue(files.toString(), files.contains("_1.si")); writer.close(); dir.close(); }
From source file:org.elasticsearch.common.lucene.uid.VersionsTests.java
License:Apache License
@Test public void testMergingOldIndices() throws Exception { final IndexWriterConfig iwConf = new IndexWriterConfig(Lucene.VERSION, new KeywordAnalyzer()); iwConf.setMergePolicy(new IndexUpgraderMergePolicy(iwConf.getMergePolicy())); final Directory dir = newDirectory(); final IndexWriter iw = new IndexWriter(dir, iwConf); // 1st segment, no _version Document document = new Document(); // Add a dummy field (enough to trigger #3237) document.add(new StringField("a", "b", Store.NO)); StringField uid = new StringField(UidFieldMapper.NAME, "1", Store.YES); document.add(uid);/*from w w w.ja va 2 s . c o m*/ iw.addDocument(document); uid.setStringValue("2"); iw.addDocument(document); iw.commit(); // 2nd segment, old layout document = new Document(); UidField uidAndVersion = new UidField("3", 3L); document.add(uidAndVersion); iw.addDocument(document); uidAndVersion.uid = "4"; uidAndVersion.version = 4L; iw.addDocument(document); iw.commit(); // 3rd segment new layout document = new Document(); uid.setStringValue("5"); Field version = new NumericDocValuesField(VersionFieldMapper.NAME, 5L); document.add(uid); document.add(version); iw.addDocument(document); uid.setStringValue("6"); version.setLongValue(6L); iw.addDocument(document); iw.commit(); final Map<String, Long> expectedVersions = ImmutableMap.<String, Long>builder().put("1", 0L).put("2", 0L) .put("3", 0L).put("4", 4L).put("5", 5L).put("6", 6L).build(); // Force merge and check versions iw.forceMerge(1); final AtomicReader ir = SlowCompositeReaderWrapper.wrap(DirectoryReader.open(iw.getDirectory())); final NumericDocValues versions = ir.getNumericDocValues(VersionFieldMapper.NAME); assertThat(versions, notNullValue()); for (int i = 0; i < ir.maxDoc(); ++i) { final String uidValue = ir.document(i).get(UidFieldMapper.NAME); final long expectedVersion = expectedVersions.get(uidValue); assertThat(versions.get(i), equalTo(expectedVersion)); } iw.close(); assertThat(IndexWriter.isLocked(iw.getDirectory()), is(false)); ir.close(); dir.close(); }
From source file:org.elasticsearch.index.engine.internal.AsynchronousEngine.java
License:Apache License
private IndexWriter createWriter() throws IOException { try {/*from ww w . ja v a 2s.co m*/ boolean create = !Lucene.indexExists(store.directory()); IndexWriterConfig config = new IndexWriterConfig(Lucene.VERSION, analysisService.defaultIndexAnalyzer()); config.setOpenMode(create ? IndexWriterConfig.OpenMode.CREATE : IndexWriterConfig.OpenMode.APPEND); config.setIndexDeletionPolicy(deletionPolicy); config.setInfoStream(new LoggerInfoStream(indexSettings, shardId)); config.setMergeScheduler(mergeScheduler.newMergeScheduler()); MergePolicy mergePolicy = mergePolicyProvider.getMergePolicy(); // Give us the opportunity to upgrade old segments while performing // background merges mergePolicy = new ElasticsearchMergePolicy(mergePolicy); config.setMergePolicy(mergePolicy); config.setSimilarity(similarityService.similarity()); config.setRAMBufferSizeMB(indexingBufferSize.mbFrac()); config.setMaxThreadStates(indexConcurrency); config.setCodec(codecService.codec(codecName)); /* We set this timeout to a highish value to work around * the default poll interval in the Lucene lock that is * 1000ms by default. We might need to poll multiple times * here but with 1s poll this is only executed twice at most * in combination with the default writelock timeout*/ config.setWriteLockTimeout(5000); config.setUseCompoundFile(this.compoundOnFlush); config.setCheckIntegrityAtMerge(checksumOnMerge); // Warm-up hook for newly-merged segments. Warming up segments here is better since it will be performed at the end // of the merge operation and won't slow down _refresh config.setMergedSegmentWarmer(new IndexReaderWarmer() { @Override public void warm(AtomicReader reader) throws IOException { try { assert isMergedSegment(reader); if (warmer != null) { final Engine.Searcher searcher = new SimpleSearcher("warmer", new IndexSearcher(reader)); final IndicesWarmer.WarmerContext context = new IndicesWarmer.WarmerContext(shardId, searcher); warmer.warmNewReaders(context); } } catch (Throwable t) { // Don't fail a merge if the warm-up failed if (!closed) { logger.warn("Warm-up failed", t); } if (t instanceof Error) { // assertion/out-of-memory error, don't ignore those throw (Error) t; } } } }); return new IndexWriter(store.directory(), config); } catch (LockObtainFailedException ex) { boolean isLocked = IndexWriter.isLocked(store.directory()); logger.warn("Could not lock IndexWriter isLocked [{}]", ex, isLocked); throw ex; } }
From source file:org.elasticsearch.index.engine.internal.InternalEngine.java
License:Apache License
private IndexWriter createWriter() throws IOException { try {/* w w w . j a v a 2 s. co m*/ // release locks when started if (IndexWriter.isLocked(store.directory())) { logger.warn("shard is locked, releasing lock"); IndexWriter.unlock(store.directory()); } boolean create = !Lucene.indexExists(store.directory()); IndexWriterConfig config = new IndexWriterConfig(Lucene.VERSION, analysisService.defaultIndexAnalyzer()); config.setOpenMode(create ? IndexWriterConfig.OpenMode.CREATE : IndexWriterConfig.OpenMode.APPEND); config.setIndexDeletionPolicy(deletionPolicy); config.setMergeScheduler(mergeScheduler.newMergeScheduler()); MergePolicy mergePolicy = mergePolicyProvider.newMergePolicy(); // Give us the opportunity to upgrade old segments while performing // background merges mergePolicy = new IndexUpgraderMergePolicy(mergePolicy); config.setMergePolicy(mergePolicy); config.setSimilarity(similarityService.similarity()); config.setRAMBufferSizeMB(indexingBufferSize.mbFrac()); config.setMaxThreadStates(indexConcurrency); config.setCodec(codecService.codec(codecName)); /* We set this timeout to a highish value to work around * the default poll interval in the Lucene lock that is * 1000ms by default. We might need to poll multiple times * here but with 1s poll this is only executed twice at most * in combination with the default writelock timeout*/ config.setWriteLockTimeout(5000); config.setUseCompoundFile(this.compoundOnFlush); // Warm-up hook for newly-merged segments. Warming up segments here is better since it will be performed at the end // of the merge operation and won't slow down _refresh config.setMergedSegmentWarmer(new IndexReaderWarmer() { @Override public void warm(AtomicReader reader) throws IOException { try { assert isMergedSegment(reader); final Engine.Searcher searcher = new SimpleSearcher("warmer", new IndexSearcher(reader)); final IndicesWarmer.WarmerContext context = new IndicesWarmer.WarmerContext(shardId, searcher); if (warmer != null) warmer.warm(context); } catch (Throwable t) { // Don't fail a merge if the warm-up failed if (!closed) { logger.warn("Warm-up failed", t); } if (t instanceof Error) { // assertion/out-of-memory error, don't ignore those throw (Error) t; } } } }); return new IndexWriter(store.directory(), config); } catch (LockObtainFailedException ex) { boolean isLocked = IndexWriter.isLocked(store.directory()); logger.warn("Could not lock IndexWriter isLocked [{}]", ex, isLocked); throw ex; } }
From source file:org.elasticsearch.index.engine.InternalEngine.java
License:Apache License
private IndexWriter createWriter(boolean create) throws IOException { try {//from w w w. j a v a 2 s.c o m final IndexWriterConfig iwc = new IndexWriterConfig(engineConfig.getAnalyzer()); iwc.setCommitOnClose(false); // we by default don't commit on close iwc.setOpenMode(create ? IndexWriterConfig.OpenMode.CREATE : IndexWriterConfig.OpenMode.APPEND); iwc.setIndexDeletionPolicy(deletionPolicy); // with tests.verbose, lucene sets this up: plumb to align with filesystem stream boolean verbose = false; try { verbose = Boolean.parseBoolean(System.getProperty("tests.verbose")); } catch (Throwable ignore) { } iwc.setInfoStream(verbose ? InfoStream.getDefault() : new LoggerInfoStream(logger)); iwc.setMergeScheduler(mergeScheduler); MergePolicy mergePolicy = config().getMergePolicy(); // Give us the opportunity to upgrade old segments while performing // background merges mergePolicy = new ElasticsearchMergePolicy(mergePolicy); iwc.setMergePolicy(mergePolicy); iwc.setSimilarity(engineConfig.getSimilarity()); iwc.setRAMBufferSizeMB(engineConfig.getIndexingBufferSize().mbFrac()); iwc.setCodec(engineConfig.getCodec()); /* We set this timeout to a highish value to work around * the default poll interval in the Lucene lock that is * 1000ms by default. We might need to poll multiple times * here but with 1s poll this is only executed twice at most * in combination with the default writelock timeout*/ iwc.setWriteLockTimeout(5000); iwc.setUseCompoundFile(this.engineConfig.isCompoundOnFlush()); // Warm-up hook for newly-merged segments. Warming up segments here is better since it will be performed at the end // of the merge operation and won't slow down _refresh iwc.setMergedSegmentWarmer(new IndexReaderWarmer() { @Override public void warm(LeafReader reader) throws IOException { try { LeafReader esLeafReader = new ElasticsearchLeafReader(reader, shardId); assert isMergedSegment(esLeafReader); if (warmer != null) { final Engine.Searcher searcher = new Searcher("warmer", searcherFactory.newSearcher(esLeafReader, null)); final IndicesWarmer.WarmerContext context = new IndicesWarmer.WarmerContext(shardId, searcher); warmer.warmNewReaders(context); } } catch (Throwable t) { // Don't fail a merge if the warm-up failed if (isClosed.get() == false) { logger.warn("Warm-up failed", t); } if (t instanceof Error) { // assertion/out-of-memory error, don't ignore those throw (Error) t; } } } }); return new IndexWriter(store.directory(), iwc); } catch (LockObtainFailedException ex) { boolean isLocked = IndexWriter.isLocked(store.directory()); logger.warn("Could not lock IndexWriter isLocked [{}]", ex, isLocked); throw ex; } }
From source file:org.elasticsearch.index.engine.robin.RobinEngine.java
License:Apache License
private IndexWriter createWriter() throws IOException { IndexWriter indexWriter = null;/*from w ww . j a v a 2 s . c o m*/ try { // release locks when started if (IndexWriter.isLocked(store.directory())) { logger.warn("shard is locked, releasing lock"); IndexWriter.unlock(store.directory()); } boolean create = !IndexReader.indexExists(store.directory()); IndexWriterConfig config = new IndexWriterConfig(Lucene.VERSION, analysisService.defaultIndexAnalyzer()); config.setOpenMode(create ? IndexWriterConfig.OpenMode.CREATE : IndexWriterConfig.OpenMode.APPEND); config.setIndexDeletionPolicy(deletionPolicy); config.setMergeScheduler(mergeScheduler.newMergeScheduler()); config.setMergePolicy(mergePolicyProvider.newMergePolicy()); config.setSimilarity(similarityService.defaultIndexSimilarity()); config.setRAMBufferSizeMB(indexingBufferSize.mbFrac()); config.setTermIndexInterval(termIndexInterval); config.setReaderTermsIndexDivisor(termIndexDivisor); config.setMaxThreadStates(indexConcurrency); indexWriter = new IndexWriter(store.directory(), config); } catch (IOException e) { safeClose(indexWriter); throw e; } return indexWriter; }
From source file:org.elasticsearch.index.fielddata.fieldcomparator.ReplaceMissingTests.java
License:Apache License
public void test() throws Exception { Directory dir = newDirectory();/* ww w .j a v a2 s .co m*/ IndexWriterConfig iwc = newIndexWriterConfig(null); iwc.setMergePolicy(newLogMergePolicy()); IndexWriter iw = new IndexWriter(dir, iwc); Document doc = new Document(); doc.add(new SortedDocValuesField("field", new BytesRef("cat"))); iw.addDocument(doc); doc = new Document(); iw.addDocument(doc); doc = new Document(); doc.add(new SortedDocValuesField("field", new BytesRef("dog"))); iw.addDocument(doc); iw.forceMerge(1); iw.close(); DirectoryReader reader = DirectoryReader.open(dir); LeafReader ar = getOnlySegmentReader(reader); SortedDocValues raw = ar.getSortedDocValues("field"); assertEquals(2, raw.getValueCount()); // existing values SortedDocValues dv = new BytesRefFieldComparatorSource.ReplaceMissing(raw, new BytesRef("cat")); assertEquals(2, dv.getValueCount()); assertEquals("cat", dv.lookupOrd(0).utf8ToString()); assertEquals("dog", dv.lookupOrd(1).utf8ToString()); assertEquals(0, dv.getOrd(0)); assertEquals(0, dv.getOrd(1)); assertEquals(1, dv.getOrd(2)); dv = new BytesRefFieldComparatorSource.ReplaceMissing(raw, new BytesRef("dog")); assertEquals(2, dv.getValueCount()); assertEquals("cat", dv.lookupOrd(0).utf8ToString()); assertEquals("dog", dv.lookupOrd(1).utf8ToString()); assertEquals(0, dv.getOrd(0)); assertEquals(1, dv.getOrd(1)); assertEquals(1, dv.getOrd(2)); // non-existing values dv = new BytesRefFieldComparatorSource.ReplaceMissing(raw, new BytesRef("apple")); assertEquals(3, dv.getValueCount()); assertEquals("apple", dv.lookupOrd(0).utf8ToString()); assertEquals("cat", dv.lookupOrd(1).utf8ToString()); assertEquals("dog", dv.lookupOrd(2).utf8ToString()); assertEquals(1, dv.getOrd(0)); assertEquals(0, dv.getOrd(1)); assertEquals(2, dv.getOrd(2)); dv = new BytesRefFieldComparatorSource.ReplaceMissing(raw, new BytesRef("company")); assertEquals(3, dv.getValueCount()); assertEquals("cat", dv.lookupOrd(0).utf8ToString()); assertEquals("company", dv.lookupOrd(1).utf8ToString()); assertEquals("dog", dv.lookupOrd(2).utf8ToString()); assertEquals(0, dv.getOrd(0)); assertEquals(1, dv.getOrd(1)); assertEquals(2, dv.getOrd(2)); dv = new BytesRefFieldComparatorSource.ReplaceMissing(raw, new BytesRef("ebay")); assertEquals(3, dv.getValueCount()); assertEquals("cat", dv.lookupOrd(0).utf8ToString()); assertEquals("dog", dv.lookupOrd(1).utf8ToString()); assertEquals("ebay", dv.lookupOrd(2).utf8ToString()); assertEquals(0, dv.getOrd(0)); assertEquals(2, dv.getOrd(1)); assertEquals(1, dv.getOrd(2)); reader.close(); dir.close(); }
From source file:org.elasticsearch.index.fielddata.FieldDataCacheTests.java
License:Apache License
public void testLoadGlobal_neverCacheIfFieldIsMissing() throws Exception { Directory dir = newDirectory();/* ww w . j av a 2 s . co m*/ IndexWriterConfig iwc = new IndexWriterConfig(null); iwc.setMergePolicy(NoMergePolicy.INSTANCE); IndexWriter iw = new IndexWriter(dir, iwc); long numDocs = scaledRandomIntBetween(32, 128); for (int i = 1; i <= numDocs; i++) { Document doc = new Document(); doc.add(new SortedSetDocValuesField("field1", new BytesRef(String.valueOf(i)))); doc.add(new StringField("field2", String.valueOf(i), Field.Store.NO)); iw.addDocument(doc); if (i % 24 == 0) { iw.commit(); } } iw.close(); DirectoryReader ir = ElasticsearchDirectoryReader.wrap(DirectoryReader.open(dir), new ShardId("_index", 0)); DummyAccountingFieldDataCache fieldDataCache = new DummyAccountingFieldDataCache(); // Testing SortedSetDVOrdinalsIndexFieldData: SortedSetDVOrdinalsIndexFieldData sortedSetDVOrdinalsIndexFieldData = createSortedDV("field1", fieldDataCache); sortedSetDVOrdinalsIndexFieldData.loadGlobal(ir); assertThat(fieldDataCache.cachedGlobally, equalTo(1)); sortedSetDVOrdinalsIndexFieldData.loadGlobal(new FieldMaskingReader("field1", ir)); assertThat(fieldDataCache.cachedGlobally, equalTo(1)); // Testing PagedBytesIndexFieldData PagedBytesIndexFieldData pagedBytesIndexFieldData = createPagedBytes("field2", fieldDataCache); pagedBytesIndexFieldData.loadGlobal(ir); assertThat(fieldDataCache.cachedGlobally, equalTo(2)); pagedBytesIndexFieldData.loadGlobal(new FieldMaskingReader("field2", ir)); assertThat(fieldDataCache.cachedGlobally, equalTo(2)); ir.close(); dir.close(); }
From source file:org.elasticsearch.index.query.PercolateQueryTests.java
License:Apache License
@Before public void init() throws Exception { directory = newDirectory();/*ww w.ja v a 2 s. co m*/ queries = new HashMap<>(); queryRegistry = ctx -> docId -> { try { String val = ctx.reader().document(docId).get(UidFieldMapper.NAME); return queries.get(Uid.createUid(val).id()); } catch (IOException e) { throw new RuntimeException(e); } }; IndexWriterConfig config = new IndexWriterConfig(new WhitespaceAnalyzer()); config.setMergePolicy(NoMergePolicy.INSTANCE); indexWriter = new IndexWriter(directory, config); }