List of usage examples for org.apache.lucene.index IndexWriter forceMerge
public void forceMerge(int maxNumSegments) throws IOException
From source file:org.elasticsearch.benchmark.fielddata.LongFieldDataBenchmark.java
License:Apache License
public static void main(String[] args) throws Exception { final IndexWriterConfig iwc = new IndexWriterConfig(Lucene.VERSION, new KeywordAnalyzer()); final String fieldName = "f"; final int numDocs = 1000000; System.out.println("Data\tLoading time\tImplementation\tActual size\tExpected size"); for (Data data : Data.values()) { final RAMDirectory dir = new RAMDirectory(); final IndexWriter indexWriter = new IndexWriter(dir, iwc); for (int i = 0; i < numDocs; ++i) { final Document doc = new Document(); final int numFields = data.numValues(); for (int j = 0; j < numFields; ++j) { doc.add(new LongField(fieldName, data.nextValue(), Store.NO)); }/*from ww w .ja v a 2 s . c o m*/ indexWriter.addDocument(doc); } indexWriter.forceMerge(1); indexWriter.close(); final DirectoryReader dr = DirectoryReader.open(dir); final IndexFieldDataService fds = new IndexFieldDataService(new Index("dummy"), new DummyCircuitBreakerService()); final LongFieldMapper mapper = new LongFieldMapper.Builder(fieldName) .build(new BuilderContext(null, new ContentPath(1))); final IndexNumericFieldData<AtomicNumericFieldData> fd = fds.getForField(mapper); final long start = System.nanoTime(); final AtomicNumericFieldData afd = fd.loadDirect(SlowCompositeReaderWrapper.wrap(dr).getContext()); final long loadingTimeMs = (System.nanoTime() - start) / 1000 / 1000; System.out.println(data + "\t" + loadingTimeMs + "\t" + afd.getClass().getSimpleName() + "\t" + RamUsageEstimator.humanSizeOf(afd.getLongValues()) + "\t" + RamUsageEstimator.humanReadableUnits(afd.getMemorySizeInBytes())); dr.close(); } }
From source file:org.elasticsearch.common.lucene.uid.VersionsTests.java
License:Apache License
@Test public void testMergingOldIndices() throws Exception { final IndexWriterConfig iwConf = new IndexWriterConfig(Lucene.VERSION, new KeywordAnalyzer()); iwConf.setMergePolicy(new IndexUpgraderMergePolicy(iwConf.getMergePolicy())); final Directory dir = newDirectory(); final IndexWriter iw = new IndexWriter(dir, iwConf); // 1st segment, no _version Document document = new Document(); // Add a dummy field (enough to trigger #3237) document.add(new StringField("a", "b", Store.NO)); StringField uid = new StringField(UidFieldMapper.NAME, "1", Store.YES); document.add(uid);/*from w w w . ja va2 s . c o m*/ iw.addDocument(document); uid.setStringValue("2"); iw.addDocument(document); iw.commit(); // 2nd segment, old layout document = new Document(); UidField uidAndVersion = new UidField("3", 3L); document.add(uidAndVersion); iw.addDocument(document); uidAndVersion.uid = "4"; uidAndVersion.version = 4L; iw.addDocument(document); iw.commit(); // 3rd segment new layout document = new Document(); uid.setStringValue("5"); Field version = new NumericDocValuesField(VersionFieldMapper.NAME, 5L); document.add(uid); document.add(version); iw.addDocument(document); uid.setStringValue("6"); version.setLongValue(6L); iw.addDocument(document); iw.commit(); final Map<String, Long> expectedVersions = ImmutableMap.<String, Long>builder().put("1", 0L).put("2", 0L) .put("3", 0L).put("4", 4L).put("5", 5L).put("6", 6L).build(); // Force merge and check versions iw.forceMerge(1); final AtomicReader ir = SlowCompositeReaderWrapper.wrap(DirectoryReader.open(iw.getDirectory())); final NumericDocValues versions = ir.getNumericDocValues(VersionFieldMapper.NAME); assertThat(versions, notNullValue()); for (int i = 0; i < ir.maxDoc(); ++i) { final String uidValue = ir.document(i).get(UidFieldMapper.NAME); final long expectedVersion = expectedVersions.get(uidValue); assertThat(versions.get(i), equalTo(expectedVersion)); } iw.close(); assertThat(IndexWriter.isLocked(iw.getDirectory()), is(false)); ir.close(); dir.close(); }
From source file:org.elasticsearch.common.UUIDTests.java
License:Apache License
private static double testCompression(int numDocs, int numDocsPerSecond, int numNodes, Logger logger) throws Exception { final double intervalBetweenDocs = 1000. / numDocsPerSecond; // milliseconds final byte[][] macAddresses = new byte[numNodes][]; Random r = random();// w ww . java 2s . com for (int i = 0; i < macAddresses.length; ++i) { macAddresses[i] = new byte[6]; random().nextBytes(macAddresses[i]); } UUIDGenerator generator = new TimeBasedUUIDGenerator() { double currentTimeMillis = System.currentTimeMillis(); @Override protected long currentTimeMillis() { currentTimeMillis += intervalBetweenDocs * 2 * r.nextDouble(); return (long) currentTimeMillis; } @Override protected byte[] macAddress() { return RandomPicks.randomFrom(r, macAddresses); } }; // Avoid randomization which will slow down things without improving // the quality of this test Directory dir = newFSDirectory(createTempDir()); IndexWriterConfig config = new IndexWriterConfig().setMergeScheduler(new SerialMergeScheduler()); // for reproducibility IndexWriter w = new IndexWriter(dir, config); Document doc = new Document(); StringField id = new StringField("_id", "", Store.NO); doc.add(id); long start = System.nanoTime(); for (int i = 0; i < numDocs; ++i) { id.setStringValue(generator.getBase64UUID()); w.addDocument(doc); } w.forceMerge(1); long time = (System.nanoTime() - start) / 1000 / 1000; w.close(); long size = 0; for (String file : dir.listAll()) { size += dir.fileLength(file); } dir.close(); double bytesPerDoc = (double) size / numDocs; logger.info(numDocs + " docs indexed at " + numDocsPerSecond + " docs/s required " + new ByteSizeValue(size) + " bytes of disk space, or " + bytesPerDoc + " bytes per document. Took: " + new TimeValue(time) + "."); return bytesPerDoc; }
From source file:org.elasticsearch.index.cache.bitset.BitSetFilterCacheTest.java
License:Apache License
@Test public void testInvalidateEntries() throws Exception { IndexWriter writer = new IndexWriter(new RAMDirectory(), new IndexWriterConfig(new StandardAnalyzer()).setMergePolicy(new LogByteSizeMergePolicy())); Document document = new Document(); document.add(new StringField("field", "value", Field.Store.NO)); writer.addDocument(document);//w w w. j a v a2s . co m writer.commit(); document = new Document(); document.add(new StringField("field", "value", Field.Store.NO)); writer.addDocument(document); writer.commit(); document = new Document(); document.add(new StringField("field", "value", Field.Store.NO)); writer.addDocument(document); writer.commit(); IndexReader reader = DirectoryReader.open(writer, false); IndexSearcher searcher = new IndexSearcher(reader); BitsetFilterCache cache = new BitsetFilterCache(new Index("test"), ImmutableSettings.EMPTY); BitDocIdSetFilter filter = cache .getBitDocIdSetFilter(Queries.wrap(new TermQuery(new Term("field", "value")))); TopDocs docs = searcher.search(new ConstantScoreQuery(filter), 1); assertThat(docs.totalHits, equalTo(3)); // now cached docs = searcher.search(new ConstantScoreQuery(filter), 1); assertThat(docs.totalHits, equalTo(3)); // There are 3 segments assertThat(cache.getLoadedFilters().size(), equalTo(3l)); writer.forceMerge(1); reader.close(); reader = DirectoryReader.open(writer, false); searcher = new IndexSearcher(reader); docs = searcher.search(new ConstantScoreQuery(filter), 1); assertThat(docs.totalHits, equalTo(3)); // now cached docs = searcher.search(new ConstantScoreQuery(filter), 1); assertThat(docs.totalHits, equalTo(3)); // Only one segment now, so the size must be 1 assertThat(cache.getLoadedFilters().size(), equalTo(1l)); reader.close(); writer.close(); // There is no reference from readers and writer to any segment in the test index, so the size in the fbs cache must be 0 assertThat(cache.getLoadedFilters().size(), equalTo(0l)); }
From source file:org.elasticsearch.index.cache.bitset.BitSetFilterCacheTests.java
License:Apache License
@Test public void testInvalidateEntries() throws Exception { IndexWriter writer = new IndexWriter(new RAMDirectory(), new IndexWriterConfig(new StandardAnalyzer()).setMergePolicy(new LogByteSizeMergePolicy())); Document document = new Document(); document.add(new StringField("field", "value", Field.Store.NO)); writer.addDocument(document);/*from w w w .j ava 2 s . c o m*/ writer.commit(); document = new Document(); document.add(new StringField("field", "value", Field.Store.NO)); writer.addDocument(document); writer.commit(); document = new Document(); document.add(new StringField("field", "value", Field.Store.NO)); writer.addDocument(document); writer.commit(); DirectoryReader reader = DirectoryReader.open(writer, false); reader = ElasticsearchDirectoryReader.wrap(reader, new ShardId(new Index("test"), 0)); IndexSearcher searcher = new IndexSearcher(reader); BitsetFilterCache cache = new BitsetFilterCache(new Index("test"), Settings.EMPTY); BitSetProducer filter = cache.getBitSetProducer(new TermQuery(new Term("field", "value"))); assertThat(matchCount(filter, reader), equalTo(3)); // now cached assertThat(matchCount(filter, reader), equalTo(3)); // There are 3 segments assertThat(cache.getLoadedFilters().size(), equalTo(3l)); writer.forceMerge(1); reader.close(); reader = DirectoryReader.open(writer, false); reader = ElasticsearchDirectoryReader.wrap(reader, new ShardId(new Index("test"), 0)); searcher = new IndexSearcher(reader); assertThat(matchCount(filter, reader), equalTo(3)); // now cached assertThat(matchCount(filter, reader), equalTo(3)); // Only one segment now, so the size must be 1 assertThat(cache.getLoadedFilters().size(), equalTo(1l)); reader.close(); writer.close(); // There is no reference from readers and writer to any segment in the test index, so the size in the fbs cache must be 0 assertThat(cache.getLoadedFilters().size(), equalTo(0l)); }
From source file:org.elasticsearch.index.cache.fixedbitset.FixedBitSetFilterCacheTest.java
License:Apache License
@Test public void testInvalidateEntries() throws Exception { IndexWriter writer = new IndexWriter(new RAMDirectory(), new IndexWriterConfig(Lucene.VERSION, new StandardAnalyzer(Lucene.VERSION)) .setMergePolicy(new LogByteSizeMergePolicy())); Document document = new Document(); document.add(new StringField("field", "value", Field.Store.NO)); writer.addDocument(document);//from w ww . j av a2s . c o m writer.commit(); document = new Document(); document.add(new StringField("field", "value", Field.Store.NO)); writer.addDocument(document); writer.commit(); document = new Document(); document.add(new StringField("field", "value", Field.Store.NO)); writer.addDocument(document); writer.commit(); IndexReader reader = DirectoryReader.open(writer, false); IndexSearcher searcher = new IndexSearcher(reader); FixedBitSetFilterCache cache = new FixedBitSetFilterCache(new Index("test"), ImmutableSettings.EMPTY); FixedBitSetFilter filter = cache.getFixedBitSetFilter(new TermFilter(new Term("field", "value"))); TopDocs docs = searcher.search(new XConstantScoreQuery(filter), 1); assertThat(docs.totalHits, equalTo(3)); // now cached docs = searcher.search(new XConstantScoreQuery(filter), 1); assertThat(docs.totalHits, equalTo(3)); // There are 3 segments assertThat(cache.getLoadedFilters().size(), equalTo(3l)); writer.forceMerge(1); reader.close(); reader = DirectoryReader.open(writer, false); searcher = new IndexSearcher(reader); docs = searcher.search(new XConstantScoreQuery(filter), 1); assertThat(docs.totalHits, equalTo(3)); // now cached docs = searcher.search(new XConstantScoreQuery(filter), 1); assertThat(docs.totalHits, equalTo(3)); // Only one segment now, so the size must be 1 assertThat(cache.getLoadedFilters().size(), equalTo(1l)); reader.close(); writer.close(); // There is no reference from readers and writer to any segment in the test index, so the size in the fbs cache must be 0 assertThat(cache.getLoadedFilters().size(), equalTo(0l)); }
From source file:org.elasticsearch.index.codec.postingformat.DefaultPostingsFormatTests.java
License:Apache License
@Test public void testNoUIDField() throws IOException { Codec codec = new TestCodec(); Directory d = new RAMDirectory(); IndexWriterConfig config = new IndexWriterConfig(Lucene.VERSION, new WhitespaceAnalyzer(Lucene.VERSION)); config.setCodec(codec);/* www.j av a 2 s .c om*/ IndexWriter writer = new IndexWriter(d, config); for (int i = 0; i < 100; i++) { writer.addDocument(Arrays.asList(new TextField("foo", "foo bar foo bar", Store.YES), new TextField("some_other_field", "1234", Store.YES))); } writer.forceMerge(1); writer.commit(); DirectoryReader reader = DirectoryReader.open(writer, false); List<AtomicReaderContext> leaves = reader.leaves(); assertThat(leaves.size(), equalTo(1)); AtomicReader ar = leaves.get(0).reader(); Terms terms = ar.terms("foo"); Terms some_other_field = ar.terms("some_other_field"); assertThat(terms.size(), equalTo(2l)); assertThat(terms, not(instanceOf(BloomFilterPostingsFormat.BloomFilteredTerms.class))); assertThat(some_other_field, not(instanceOf(BloomFilterPostingsFormat.BloomFilteredTerms.class))); TermsEnum iterator = terms.iterator(null); Set<String> expected = new HashSet<String>(); expected.add("foo"); expected.add("bar"); while (iterator.next() != null) { expected.remove(iterator.term().utf8ToString()); } assertThat(expected.size(), equalTo(0)); reader.close(); writer.close(); d.close(); }
From source file:org.elasticsearch.index.fielddata.fieldcomparator.ReplaceMissingTests.java
License:Apache License
public void test() throws Exception { Directory dir = newDirectory();// w w w .ja va2s . c o m IndexWriterConfig iwc = newIndexWriterConfig(null); iwc.setMergePolicy(newLogMergePolicy()); IndexWriter iw = new IndexWriter(dir, iwc); Document doc = new Document(); doc.add(new SortedDocValuesField("field", new BytesRef("cat"))); iw.addDocument(doc); doc = new Document(); iw.addDocument(doc); doc = new Document(); doc.add(new SortedDocValuesField("field", new BytesRef("dog"))); iw.addDocument(doc); iw.forceMerge(1); iw.close(); DirectoryReader reader = DirectoryReader.open(dir); LeafReader ar = getOnlySegmentReader(reader); SortedDocValues raw = ar.getSortedDocValues("field"); assertEquals(2, raw.getValueCount()); // existing values SortedDocValues dv = new BytesRefFieldComparatorSource.ReplaceMissing(raw, new BytesRef("cat")); assertEquals(2, dv.getValueCount()); assertEquals("cat", dv.lookupOrd(0).utf8ToString()); assertEquals("dog", dv.lookupOrd(1).utf8ToString()); assertEquals(0, dv.getOrd(0)); assertEquals(0, dv.getOrd(1)); assertEquals(1, dv.getOrd(2)); dv = new BytesRefFieldComparatorSource.ReplaceMissing(raw, new BytesRef("dog")); assertEquals(2, dv.getValueCount()); assertEquals("cat", dv.lookupOrd(0).utf8ToString()); assertEquals("dog", dv.lookupOrd(1).utf8ToString()); assertEquals(0, dv.getOrd(0)); assertEquals(1, dv.getOrd(1)); assertEquals(1, dv.getOrd(2)); // non-existing values dv = new BytesRefFieldComparatorSource.ReplaceMissing(raw, new BytesRef("apple")); assertEquals(3, dv.getValueCount()); assertEquals("apple", dv.lookupOrd(0).utf8ToString()); assertEquals("cat", dv.lookupOrd(1).utf8ToString()); assertEquals("dog", dv.lookupOrd(2).utf8ToString()); assertEquals(1, dv.getOrd(0)); assertEquals(0, dv.getOrd(1)); assertEquals(2, dv.getOrd(2)); dv = new BytesRefFieldComparatorSource.ReplaceMissing(raw, new BytesRef("company")); assertEquals(3, dv.getValueCount()); assertEquals("cat", dv.lookupOrd(0).utf8ToString()); assertEquals("company", dv.lookupOrd(1).utf8ToString()); assertEquals("dog", dv.lookupOrd(2).utf8ToString()); assertEquals(0, dv.getOrd(0)); assertEquals(1, dv.getOrd(1)); assertEquals(2, dv.getOrd(2)); dv = new BytesRefFieldComparatorSource.ReplaceMissing(raw, new BytesRef("ebay")); assertEquals(3, dv.getValueCount()); assertEquals("cat", dv.lookupOrd(0).utf8ToString()); assertEquals("dog", dv.lookupOrd(1).utf8ToString()); assertEquals("ebay", dv.lookupOrd(2).utf8ToString()); assertEquals(0, dv.getOrd(0)); assertEquals(2, dv.getOrd(1)); assertEquals(1, dv.getOrd(2)); reader.close(); dir.close(); }
From source file:org.elasticsearch.index.percolator.PercolatorQueryCacheTests.java
License:Apache License
public void testLoadQueries() throws Exception { Directory directory = newDirectory(); IndexWriter indexWriter = new IndexWriter(directory, new IndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(NoMergePolicy.INSTANCE)); boolean legacyFormat = randomBoolean(); Version version = legacyFormat ? Version.V_2_0_0 : Version.CURRENT; IndexShard indexShard = mockIndexShard(version, legacyFormat); storeQuery("0", indexWriter, termQuery("field1", "value1"), true, legacyFormat); storeQuery("1", indexWriter, wildcardQuery("field1", "v*"), true, legacyFormat); storeQuery("2", indexWriter, boolQuery().must(termQuery("field1", "value1")).must(termQuery("field2", "value2")), true, legacyFormat);/*from w ww.j av a2 s . c o m*/ // dymmy docs should be skipped during loading: Document doc = new Document(); doc.add(new StringField("dummy", "value", Field.Store.YES)); indexWriter.addDocument(doc); storeQuery("4", indexWriter, termQuery("field2", "value2"), true, legacyFormat); // only documents that .percolator type should be loaded: storeQuery("5", indexWriter, termQuery("field2", "value2"), false, legacyFormat); storeQuery("6", indexWriter, termQuery("field3", "value3"), true, legacyFormat); indexWriter.forceMerge(1); // also include queries for percolator docs marked as deleted: indexWriter.deleteDocuments(new Term("id", "6")); indexWriter.close(); ShardId shardId = new ShardId("_index", ClusterState.UNKNOWN_UUID, 0); IndexReader indexReader = ElasticsearchDirectoryReader.wrap(DirectoryReader.open(directory), shardId); assertThat(indexReader.leaves().size(), equalTo(1)); assertThat(indexReader.numDeletedDocs(), equalTo(1)); assertThat(indexReader.maxDoc(), equalTo(7)); initialize("field1", "type=keyword", "field2", "type=keyword", "field3", "type=keyword"); PercolatorQueryCache.QueriesLeaf leaf = cache.loadQueries(indexReader.leaves().get(0), indexShard); assertThat(leaf.queries.size(), equalTo(5)); assertThat(leaf.getQuery(0), equalTo(new TermQuery(new Term("field1", "value1")))); assertThat(leaf.getQuery(1), equalTo(new WildcardQuery(new Term("field1", "v*")))); assertThat(leaf.getQuery(2), equalTo(new BooleanQuery.Builder() .add(new TermQuery(new Term("field1", "value1")), BooleanClause.Occur.MUST) .add(new TermQuery(new Term("field2", "value2")), BooleanClause.Occur.MUST).build())); assertThat(leaf.getQuery(4), equalTo(new TermQuery(new Term("field2", "value2")))); assertThat(leaf.getQuery(6), equalTo(new TermQuery(new Term("field3", "value3")))); indexReader.close(); directory.close(); }
From source file:org.elasticsearch.index.percolator.PercolatorQueryCacheTests.java
License:Apache License
public void testInvalidateEntries() throws Exception { Directory directory = newDirectory(); IndexWriter indexWriter = new IndexWriter(directory, new IndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(NoMergePolicy.INSTANCE)); storeQuery("0", indexWriter, termQuery("a", "0"), true, false); indexWriter.flush();//from w w w . ja va2 s . com storeQuery("1", indexWriter, termQuery("a", "1"), true, false); indexWriter.flush(); storeQuery("2", indexWriter, termQuery("a", "2"), true, false); indexWriter.flush(); ShardId shardId = new ShardId("_index", ClusterState.UNKNOWN_UUID, 0); IndexReader indexReader = ElasticsearchDirectoryReader.wrap(DirectoryReader.open(indexWriter), shardId); assertThat(indexReader.leaves().size(), equalTo(3)); assertThat(indexReader.maxDoc(), equalTo(3)); initialize("a", "type=keyword"); IndexShard indexShard = mockIndexShard(Version.CURRENT, false); ThreadPool threadPool = mockThreadPool(); IndexWarmer.Listener listener = cache.createListener(threadPool); listener.warmReader(indexShard, new Engine.Searcher("test", new IndexSearcher(indexReader))); assertThat(cache.getStats(shardId).getNumQueries(), equalTo(3L)); PercolateQuery.QueryRegistry.Leaf leaf = cache.getQueries(indexReader.leaves().get(0)); assertThat(leaf.getQuery(0), equalTo(new TermQuery(new Term("a", "0")))); leaf = cache.getQueries(indexReader.leaves().get(1)); assertThat(leaf.getQuery(0), equalTo(new TermQuery(new Term("a", "1")))); leaf = cache.getQueries(indexReader.leaves().get(2)); assertThat(leaf.getQuery(0), equalTo(new TermQuery(new Term("a", "2")))); // change merge policy, so that merges will actually happen: indexWriter.getConfig().setMergePolicy(new TieredMergePolicy()); indexWriter.deleteDocuments(new Term("id", "1")); indexWriter.forceMergeDeletes(); indexReader.close(); indexReader = ElasticsearchDirectoryReader.wrap(DirectoryReader.open(indexWriter), shardId); assertThat(indexReader.leaves().size(), equalTo(2)); assertThat(indexReader.maxDoc(), equalTo(2)); listener.warmReader(indexShard, new Engine.Searcher("test", new IndexSearcher(indexReader))); assertThat(cache.getStats(shardId).getNumQueries(), equalTo(2L)); leaf = cache.getQueries(indexReader.leaves().get(0)); assertThat(leaf.getQuery(0), equalTo(new TermQuery(new Term("a", "0")))); leaf = cache.getQueries(indexReader.leaves().get(1)); assertThat(leaf.getQuery(0), equalTo(new TermQuery(new Term("a", "2")))); indexWriter.forceMerge(1); indexReader.close(); indexReader = ElasticsearchDirectoryReader.wrap(DirectoryReader.open(indexWriter), shardId); assertThat(indexReader.leaves().size(), equalTo(1)); assertThat(indexReader.maxDoc(), equalTo(2)); listener.warmReader(indexShard, new Engine.Searcher("test", new IndexSearcher(indexReader))); assertThat(cache.getStats(shardId).getNumQueries(), equalTo(2L)); leaf = cache.getQueries(indexReader.leaves().get(0)); assertThat(leaf.getQuery(0), equalTo(new TermQuery(new Term("a", "0")))); assertThat(leaf.getQuery(1), equalTo(new TermQuery(new Term("a", "2")))); indexWriter.close(); indexReader.close(); directory.close(); }