List of usage examples for org.apache.lucene.index IndexWriterConfig setMaxBufferedDocs
@Override public IndexWriterConfig setMaxBufferedDocs(int maxBufferedDocs)
From source file:org.drftpd.vfs.index.lucene.LuceneEngine.java
License:Open Source License
/** * Opens all the needed streams that the engine needs to work properly. * /*from w ww . j av a 2 s .c o m*/ * @throws IndexException */ private void openStreams() throws IndexException { try { if (_nativeLocking) { _storage = FSDirectory.open(new File(INDEX_DIR), new NativeFSLockFactory(INDEX_DIR)); } else { _storage = FSDirectory.open(new File(INDEX_DIR)); } IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_32, ANALYZER); conf.setMaxBufferedDocs(_maxDocsBuffer); conf.setRAMBufferSizeMB(_maxRAMBufferSize); _iWriter = new IndexWriter(_storage, conf); } catch (IOException e) { closeAll(); throw new IndexException("Unable to initialize the index", e); } }
From source file:org.elasticsearch.common.lucene.index.ElasticsearchDirectoryReaderTests.java
License:Apache License
/** Test that core cache key (needed for NRT) is working */ public void testCoreCacheKey() throws Exception { Directory dir = newDirectory();//from w w w. ja v a 2 s.c om IndexWriterConfig iwc = new IndexWriterConfig(null); iwc.setMaxBufferedDocs(100); iwc.setMergePolicy(NoMergePolicy.INSTANCE); IndexWriter iw = new IndexWriter(dir, iwc); // add two docs, id:0 and id:1 Document doc = new Document(); Field idField = new StringField("id", "", Field.Store.NO); doc.add(idField); idField.setStringValue("0"); iw.addDocument(doc); idField.setStringValue("1"); iw.addDocument(doc); // open reader ShardId shardId = new ShardId(new Index("fake"), 1); DirectoryReader ir = ElasticsearchDirectoryReader.wrap(DirectoryReader.open(iw, true), shardId); assertEquals(2, ir.numDocs()); assertEquals(1, ir.leaves().size()); // delete id:0 and reopen iw.deleteDocuments(new Term("id", "0")); DirectoryReader ir2 = DirectoryReader.openIfChanged(ir); // we should have the same cache key as before assertEquals(1, ir2.numDocs()); assertEquals(1, ir2.leaves().size()); assertSame(ir.leaves().get(0).reader().getCoreCacheKey(), ir2.leaves().get(0).reader().getCoreCacheKey()); // this is kind of stupid, but for now its here assertNotSame(ir.leaves().get(0).reader().getCombinedCoreAndDeletesKey(), ir2.leaves().get(0).reader().getCombinedCoreAndDeletesKey()); IOUtils.close(ir, ir2, iw, dir); }
From source file:org.elasticsearch.common.lucene.index.ESDirectoryReaderTests.java
License:Apache License
/** Test that core cache key (needed for NRT) is working */ public void testCoreCacheKey() throws Exception { Directory dir = newDirectory();// w w w. ja v a 2 s. co m IndexWriterConfig iwc = new IndexWriterConfig(null); iwc.setMaxBufferedDocs(100); iwc.setMergePolicy(NoMergePolicy.INSTANCE); IndexWriter iw = new IndexWriter(dir, iwc); // add two docs, id:0 and id:1 Document doc = new Document(); Field idField = new StringField("id", "", Field.Store.NO); doc.add(idField); idField.setStringValue("0"); iw.addDocument(doc); idField.setStringValue("1"); iw.addDocument(doc); // open reader ShardId shardId = new ShardId(new Index("fake"), 1); DirectoryReader ir = ElasticsearchDirectoryReader.wrap(DirectoryReader.open(iw, true), shardId); assertEquals(2, ir.numDocs()); assertEquals(1, ir.leaves().size()); // delete id:0 and reopen iw.deleteDocuments(new Term("id", "0")); DirectoryReader ir2 = DirectoryReader.openIfChanged(ir); // we should have the same cache key as before assertEquals(1, ir2.numDocs()); assertEquals(1, ir2.leaves().size()); assertSame(ir.leaves().get(0).reader().getCoreCacheKey(), ir2.leaves().get(0).reader().getCoreCacheKey()); IOUtils.close(ir, ir2, iw, dir); }
From source file:org.elasticsearch.common.lucene.LuceneTests.java
License:Apache License
public void testWaitForIndex() throws Exception { final MockDirectoryWrapper dir = newMockDirectory(); final AtomicBoolean succeeded = new AtomicBoolean(false); final CountDownLatch latch = new CountDownLatch(1); // Create a shadow Engine, which will freak out because there is no // index yet/*from w w w .ja v a 2s . c o m*/ Thread t = new Thread(new Runnable() { @Override public void run() { try { latch.await(); if (Lucene.waitForIndex(dir, 5000)) { succeeded.set(true); } else { fail("index should have eventually existed!"); } } catch (InterruptedException e) { // ignore interruptions } catch (Exception e) { fail("should have been able to create the engine! " + e.getMessage()); } } }); t.start(); // count down latch // now shadow engine should try to be created latch.countDown(); dir.setEnableVirusScanner(false); IndexWriterConfig iwc = newIndexWriterConfig(); iwc.setIndexDeletionPolicy(NoDeletionPolicy.INSTANCE); iwc.setMergePolicy(NoMergePolicy.INSTANCE); iwc.setMaxBufferedDocs(2); IndexWriter writer = new IndexWriter(dir, iwc); Document doc = new Document(); doc.add(new TextField("id", "1", random().nextBoolean() ? Field.Store.YES : Field.Store.NO)); writer.addDocument(doc); writer.commit(); t.join(); writer.close(); dir.close(); assertTrue("index should have eventually existed", succeeded.get()); }
From source file:org.elasticsearch.common.lucene.LuceneTests.java
License:Apache License
public void testCleanIndex() throws IOException { MockDirectoryWrapper dir = newMockDirectory(); dir.setEnableVirusScanner(false);//w ww . j a v a2 s.c om IndexWriterConfig iwc = newIndexWriterConfig(); iwc.setIndexDeletionPolicy(NoDeletionPolicy.INSTANCE); iwc.setMergePolicy(NoMergePolicy.INSTANCE); iwc.setMaxBufferedDocs(2); IndexWriter writer = new IndexWriter(dir, iwc); Document doc = new Document(); doc.add(new TextField("id", "1", random().nextBoolean() ? Field.Store.YES : Field.Store.NO)); writer.addDocument(doc); writer.commit(); doc = new Document(); doc.add(new TextField("id", "2", random().nextBoolean() ? Field.Store.YES : Field.Store.NO)); writer.addDocument(doc); doc = new Document(); doc.add(new TextField("id", "3", random().nextBoolean() ? Field.Store.YES : Field.Store.NO)); writer.addDocument(doc); writer.commit(); doc = new Document(); doc.add(new TextField("id", "4", random().nextBoolean() ? Field.Store.YES : Field.Store.NO)); writer.addDocument(doc); writer.deleteDocuments(new Term("id", "2")); writer.commit(); try (DirectoryReader open = DirectoryReader.open(writer, true)) { assertEquals(3, open.numDocs()); assertEquals(1, open.numDeletedDocs()); assertEquals(4, open.maxDoc()); } writer.close(); if (random().nextBoolean()) { for (String file : dir.listAll()) { if (file.startsWith("_1")) { // delete a random file dir.deleteFile(file); break; } } } Lucene.cleanLuceneIndex(dir); if (dir.listAll().length > 0) { for (String file : dir.listAll()) { if (file.startsWith("extra") == false) { assertEquals(file, "write.lock"); } } } dir.close(); }
From source file:org.elasticsearch.common.lucene.LuceneTests.java
License:Apache License
public void testPruneUnreferencedFiles() throws IOException { MockDirectoryWrapper dir = newMockDirectory(); dir.setEnableVirusScanner(false);// ww w.j a va2 s . c om IndexWriterConfig iwc = newIndexWriterConfig(); iwc.setIndexDeletionPolicy(NoDeletionPolicy.INSTANCE); iwc.setMergePolicy(NoMergePolicy.INSTANCE); iwc.setMaxBufferedDocs(2); IndexWriter writer = new IndexWriter(dir, iwc); Document doc = new Document(); doc.add(new TextField("id", "1", random().nextBoolean() ? Field.Store.YES : Field.Store.NO)); writer.addDocument(doc); writer.commit(); doc = new Document(); doc.add(new TextField("id", "2", random().nextBoolean() ? Field.Store.YES : Field.Store.NO)); writer.addDocument(doc); doc = new Document(); doc.add(new TextField("id", "3", random().nextBoolean() ? Field.Store.YES : Field.Store.NO)); writer.addDocument(doc); writer.commit(); SegmentInfos segmentCommitInfos = Lucene.readSegmentInfos(dir); doc = new Document(); doc.add(new TextField("id", "4", random().nextBoolean() ? Field.Store.YES : Field.Store.NO)); writer.addDocument(doc); writer.deleteDocuments(new Term("id", "2")); writer.commit(); DirectoryReader open = DirectoryReader.open(writer, true); assertEquals(3, open.numDocs()); assertEquals(1, open.numDeletedDocs()); assertEquals(4, open.maxDoc()); open.close(); writer.close(); SegmentInfos si = Lucene.pruneUnreferencedFiles(segmentCommitInfos.getSegmentsFileName(), dir); assertEquals(si.getSegmentsFileName(), segmentCommitInfos.getSegmentsFileName()); open = DirectoryReader.open(dir); assertEquals(3, open.numDocs()); assertEquals(0, open.numDeletedDocs()); assertEquals(3, open.maxDoc()); IndexSearcher s = new IndexSearcher(open); assertEquals(s.search(new TermQuery(new Term("id", "1")), 1).totalHits, 1); assertEquals(s.search(new TermQuery(new Term("id", "2")), 1).totalHits, 1); assertEquals(s.search(new TermQuery(new Term("id", "3")), 1).totalHits, 1); assertEquals(s.search(new TermQuery(new Term("id", "4")), 1).totalHits, 0); for (String file : dir.listAll()) { assertFalse("unexpected file: " + file, file.equals("segments_3") || file.startsWith("_2")); } open.close(); dir.close(); }
From source file:org.elasticsearch.common.lucene.LuceneTests.java
License:Apache License
public void testFiles() throws IOException { MockDirectoryWrapper dir = newMockDirectory(); dir.setEnableVirusScanner(false);//w ww. j av a 2s .c o m IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random())); iwc.setMergePolicy(NoMergePolicy.INSTANCE); iwc.setMaxBufferedDocs(2); iwc.setUseCompoundFile(true); IndexWriter writer = new IndexWriter(dir, iwc); Document doc = new Document(); doc.add(new TextField("id", "1", random().nextBoolean() ? Field.Store.YES : Field.Store.NO)); writer.addDocument(doc); writer.commit(); Set<String> files = new HashSet<>(); for (String f : Lucene.files(Lucene.readSegmentInfos(dir))) { files.add(f); } final boolean simpleTextCFS = files.contains("_0.scf"); assertTrue(files.toString(), files.contains("segments_1")); if (simpleTextCFS) { assertFalse(files.toString(), files.contains("_0.cfs")); assertFalse(files.toString(), files.contains("_0.cfe")); } else { assertTrue(files.toString(), files.contains("_0.cfs")); assertTrue(files.toString(), files.contains("_0.cfe")); } assertTrue(files.toString(), files.contains("_0.si")); doc = new Document(); doc.add(new TextField("id", "2", random().nextBoolean() ? Field.Store.YES : Field.Store.NO)); writer.addDocument(doc); doc = new Document(); doc.add(new TextField("id", "3", random().nextBoolean() ? Field.Store.YES : Field.Store.NO)); writer.addDocument(doc); writer.commit(); files.clear(); for (String f : Lucene.files(Lucene.readSegmentInfos(dir))) { files.add(f); } assertFalse(files.toString(), files.contains("segments_1")); assertTrue(files.toString(), files.contains("segments_2")); if (simpleTextCFS) { assertFalse(files.toString(), files.contains("_0.cfs")); assertFalse(files.toString(), files.contains("_0.cfe")); } else { assertTrue(files.toString(), files.contains("_0.cfs")); assertTrue(files.toString(), files.contains("_0.cfe")); } assertTrue(files.toString(), files.contains("_0.si")); if (simpleTextCFS) { assertFalse(files.toString(), files.contains("_1.cfs")); assertFalse(files.toString(), files.contains("_1.cfe")); } else { assertTrue(files.toString(), files.contains("_1.cfs")); assertTrue(files.toString(), files.contains("_1.cfe")); } assertTrue(files.toString(), files.contains("_1.si")); writer.close(); dir.close(); }
From source file:org.elasticsearch.search.aggregations.bucket.sampler.SamplerAggregatorTests.java
License:Apache License
/** * Uses the sampler aggregation to find the minimum value of a field out of the top 3 scoring documents in a search. *//*from w ww . j a v a2s . c o m*/ public void testSampler() throws IOException { TextFieldType textFieldType = new TextFieldType(); textFieldType.setIndexAnalyzer(new NamedAnalyzer("foo", AnalyzerScope.GLOBAL, new StandardAnalyzer())); MappedFieldType numericFieldType = new NumberFieldMapper.NumberFieldType(NumberFieldMapper.NumberType.LONG); numericFieldType.setName("int"); IndexWriterConfig indexWriterConfig = newIndexWriterConfig(); indexWriterConfig.setMaxBufferedDocs(100); indexWriterConfig.setRAMBufferSizeMB(100); // flush on open to have a single segment with predictable docIds try (Directory dir = newDirectory(); IndexWriter w = new IndexWriter(dir, indexWriterConfig)) { for (long value : new long[] { 7, 3, -10, -6, 5, 50 }) { Document doc = new Document(); StringBuilder text = new StringBuilder(); for (int i = 0; i < value; i++) { text.append("good "); } doc.add(new Field("text", text.toString(), textFieldType)); doc.add(new SortedNumericDocValuesField("int", value)); w.addDocument(doc); } SamplerAggregationBuilder aggBuilder = new SamplerAggregationBuilder("sampler").shardSize(3) .subAggregation(new MinAggregationBuilder("min").field("int")); try (IndexReader reader = DirectoryReader.open(w)) { assertEquals("test expects a single segment", 1, reader.leaves().size()); IndexSearcher searcher = new IndexSearcher(reader); Sampler sampler = searchAndReduce(searcher, new TermQuery(new Term("text", "good")), aggBuilder, textFieldType, numericFieldType); Min min = sampler.getAggregations().get("min"); assertEquals(5.0, min.getValue(), 0); } } }
From source file:org.elasticsearch.search.aggregations.bucket.significant.SignificantTermsAggregatorTests.java
License:Apache License
/** * Uses the significant terms aggregation to find the keywords in text fields *//*w w w. j a v a 2 s . c o m*/ public void testSignificance() throws IOException { TextFieldType textFieldType = new TextFieldType(); textFieldType.setName("text"); textFieldType.setFielddata(true); textFieldType .setIndexAnalyzer(new NamedAnalyzer("my_analyzer", AnalyzerScope.GLOBAL, new StandardAnalyzer())); IndexWriterConfig indexWriterConfig = newIndexWriterConfig(); indexWriterConfig.setMaxBufferedDocs(100); indexWriterConfig.setRAMBufferSizeMB(100); // flush on open to have a single segment try (Directory dir = newDirectory(); IndexWriter w = new IndexWriter(dir, indexWriterConfig)) { addMixedTextDocs(textFieldType, w); SignificantTermsAggregationBuilder sigAgg = new SignificantTermsAggregationBuilder("sig_text", null) .field("text"); sigAgg.executionHint(randomExecutionHint()); if (randomBoolean()) { // Use a background filter which just happens to be same scope as whole-index. sigAgg.backgroundFilter(QueryBuilders.termsQuery("text", "common")); } SignificantTermsAggregationBuilder sigNumAgg = new SignificantTermsAggregationBuilder("sig_number", null).field("long_field"); sigNumAgg.executionHint(randomExecutionHint()); try (IndexReader reader = DirectoryReader.open(w)) { assertEquals("test expects a single segment", 1, reader.leaves().size()); IndexSearcher searcher = new IndexSearcher(reader); // Search "odd" SignificantTerms terms = searchAndReduce(searcher, new TermQuery(new Term("text", "odd")), sigAgg, textFieldType); assertEquals(1, terms.getBuckets().size()); assertNull(terms.getBucketByKey("even")); assertNull(terms.getBucketByKey("common")); assertNotNull(terms.getBucketByKey("odd")); // Search even terms = searchAndReduce(searcher, new TermQuery(new Term("text", "even")), sigAgg, textFieldType); assertEquals(1, terms.getBuckets().size()); assertNull(terms.getBucketByKey("odd")); assertNull(terms.getBucketByKey("common")); assertNotNull(terms.getBucketByKey("even")); // Search odd with regex includeexcludes sigAgg.includeExclude(new IncludeExclude("o.d", null)); terms = searchAndReduce(searcher, new TermQuery(new Term("text", "odd")), sigAgg, textFieldType); assertEquals(1, terms.getBuckets().size()); assertNotNull(terms.getBucketByKey("odd")); assertNull(terms.getBucketByKey("common")); assertNull(terms.getBucketByKey("even")); // Search with string-based includeexcludes String oddStrings[] = new String[] { "odd", "weird" }; String evenStrings[] = new String[] { "even", "regular" }; sigAgg.includeExclude(new IncludeExclude(oddStrings, evenStrings)); sigAgg.significanceHeuristic(SignificanceHeuristicTests.getRandomSignificanceheuristic()); terms = searchAndReduce(searcher, new TermQuery(new Term("text", "odd")), sigAgg, textFieldType); assertEquals(1, terms.getBuckets().size()); assertNotNull(terms.getBucketByKey("odd")); assertNull(terms.getBucketByKey("weird")); assertNull(terms.getBucketByKey("common")); assertNull(terms.getBucketByKey("even")); assertNull(terms.getBucketByKey("regular")); sigAgg.includeExclude(new IncludeExclude(evenStrings, oddStrings)); terms = searchAndReduce(searcher, new TermQuery(new Term("text", "odd")), sigAgg, textFieldType); assertEquals(0, terms.getBuckets().size()); assertNull(terms.getBucketByKey("odd")); assertNull(terms.getBucketByKey("weird")); assertNull(terms.getBucketByKey("common")); assertNull(terms.getBucketByKey("even")); assertNull(terms.getBucketByKey("regular")); } } }
From source file:org.elasticsearch.search.aggregations.bucket.significant.SignificantTermsAggregatorTests.java
License:Apache License
/** * Uses the significant terms aggregation to find the keywords in numeric * fields// w w w.java2s .c o m */ public void testNumericSignificance() throws IOException { NumberFieldType longFieldType = new NumberFieldMapper.NumberFieldType(NumberFieldMapper.NumberType.LONG); longFieldType.setName("long_field"); TextFieldType textFieldType = new TextFieldType(); textFieldType.setName("text"); textFieldType .setIndexAnalyzer(new NamedAnalyzer("my_analyzer", AnalyzerScope.GLOBAL, new StandardAnalyzer())); IndexWriterConfig indexWriterConfig = newIndexWriterConfig(); indexWriterConfig.setMaxBufferedDocs(100); indexWriterConfig.setRAMBufferSizeMB(100); // flush on open to have a single segment final long ODD_VALUE = 3; final long EVEN_VALUE = 6; final long COMMON_VALUE = 2; try (Directory dir = newDirectory(); IndexWriter w = new IndexWriter(dir, indexWriterConfig)) { for (int i = 0; i < 10; i++) { Document doc = new Document(); if (i % 2 == 0) { addFields(doc, NumberType.LONG.createFields("long_field", ODD_VALUE, true, true, false)); doc.add(new Field("text", "odd", textFieldType)); } else { addFields(doc, NumberType.LONG.createFields("long_field", EVEN_VALUE, true, true, false)); doc.add(new Field("text", "even", textFieldType)); } addFields(doc, NumberType.LONG.createFields("long_field", COMMON_VALUE, true, true, false)); w.addDocument(doc); } SignificantTermsAggregationBuilder sigNumAgg = new SignificantTermsAggregationBuilder("sig_number", null).field("long_field"); sigNumAgg.executionHint(randomExecutionHint()); try (IndexReader reader = DirectoryReader.open(w)) { assertEquals("test expects a single segment", 1, reader.leaves().size()); IndexSearcher searcher = new IndexSearcher(reader); // Search "odd" SignificantLongTerms terms = searchAndReduce(searcher, new TermQuery(new Term("text", "odd")), sigNumAgg, longFieldType); assertEquals(1, terms.getBuckets().size()); assertNull(terms.getBucketByKey(Long.toString(EVEN_VALUE))); assertNull(terms.getBucketByKey(Long.toString(COMMON_VALUE))); assertNotNull(terms.getBucketByKey(Long.toString(ODD_VALUE))); terms = searchAndReduce(searcher, new TermQuery(new Term("text", "even")), sigNumAgg, longFieldType); assertEquals(1, terms.getBuckets().size()); assertNull(terms.getBucketByKey(Long.toString(ODD_VALUE))); assertNull(terms.getBucketByKey(Long.toString(COMMON_VALUE))); assertNotNull(terms.getBucketByKey(Long.toString(EVEN_VALUE))); } } }