Example usage for org.apache.lucene.index IndexWriterConfig setMaxBufferedDocs

List of usage examples for org.apache.lucene.index IndexWriterConfig setMaxBufferedDocs

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexWriterConfig setMaxBufferedDocs.

Prototype

@Override
    public IndexWriterConfig setMaxBufferedDocs(int maxBufferedDocs) 

Source Link

Usage

From source file:org.drftpd.vfs.index.lucene.LuceneEngine.java

License:Open Source License

/**
 * Opens all the needed streams that the engine needs to work properly.
 * /*from w  ww  . j  av  a 2 s .c  o m*/
 * @throws IndexException
 */
private void openStreams() throws IndexException {
    try {
        if (_nativeLocking) {
            _storage = FSDirectory.open(new File(INDEX_DIR), new NativeFSLockFactory(INDEX_DIR));
        } else {
            _storage = FSDirectory.open(new File(INDEX_DIR));
        }

        IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_32, ANALYZER);
        conf.setMaxBufferedDocs(_maxDocsBuffer);
        conf.setRAMBufferSizeMB(_maxRAMBufferSize);

        _iWriter = new IndexWriter(_storage, conf);
    } catch (IOException e) {
        closeAll();

        throw new IndexException("Unable to initialize the index", e);
    }
}

From source file:org.elasticsearch.common.lucene.index.ElasticsearchDirectoryReaderTests.java

License:Apache License

/** Test that core cache key (needed for NRT) is working */
public void testCoreCacheKey() throws Exception {
    Directory dir = newDirectory();//from w w  w.  ja  v  a  2 s.c om
    IndexWriterConfig iwc = new IndexWriterConfig(null);
    iwc.setMaxBufferedDocs(100);
    iwc.setMergePolicy(NoMergePolicy.INSTANCE);
    IndexWriter iw = new IndexWriter(dir, iwc);

    // add two docs, id:0 and id:1
    Document doc = new Document();
    Field idField = new StringField("id", "", Field.Store.NO);
    doc.add(idField);
    idField.setStringValue("0");
    iw.addDocument(doc);
    idField.setStringValue("1");
    iw.addDocument(doc);

    // open reader
    ShardId shardId = new ShardId(new Index("fake"), 1);
    DirectoryReader ir = ElasticsearchDirectoryReader.wrap(DirectoryReader.open(iw, true), shardId);
    assertEquals(2, ir.numDocs());
    assertEquals(1, ir.leaves().size());

    // delete id:0 and reopen
    iw.deleteDocuments(new Term("id", "0"));
    DirectoryReader ir2 = DirectoryReader.openIfChanged(ir);

    // we should have the same cache key as before
    assertEquals(1, ir2.numDocs());
    assertEquals(1, ir2.leaves().size());
    assertSame(ir.leaves().get(0).reader().getCoreCacheKey(), ir2.leaves().get(0).reader().getCoreCacheKey());

    // this is kind of stupid, but for now its here
    assertNotSame(ir.leaves().get(0).reader().getCombinedCoreAndDeletesKey(),
            ir2.leaves().get(0).reader().getCombinedCoreAndDeletesKey());

    IOUtils.close(ir, ir2, iw, dir);
}

From source file:org.elasticsearch.common.lucene.index.ESDirectoryReaderTests.java

License:Apache License

/** Test that core cache key (needed for NRT) is working */
public void testCoreCacheKey() throws Exception {
    Directory dir = newDirectory();// w w w. ja  v a  2 s. co  m
    IndexWriterConfig iwc = new IndexWriterConfig(null);
    iwc.setMaxBufferedDocs(100);
    iwc.setMergePolicy(NoMergePolicy.INSTANCE);
    IndexWriter iw = new IndexWriter(dir, iwc);

    // add two docs, id:0 and id:1
    Document doc = new Document();
    Field idField = new StringField("id", "", Field.Store.NO);
    doc.add(idField);
    idField.setStringValue("0");
    iw.addDocument(doc);
    idField.setStringValue("1");
    iw.addDocument(doc);

    // open reader
    ShardId shardId = new ShardId(new Index("fake"), 1);
    DirectoryReader ir = ElasticsearchDirectoryReader.wrap(DirectoryReader.open(iw, true), shardId);
    assertEquals(2, ir.numDocs());
    assertEquals(1, ir.leaves().size());

    // delete id:0 and reopen
    iw.deleteDocuments(new Term("id", "0"));
    DirectoryReader ir2 = DirectoryReader.openIfChanged(ir);

    // we should have the same cache key as before
    assertEquals(1, ir2.numDocs());
    assertEquals(1, ir2.leaves().size());
    assertSame(ir.leaves().get(0).reader().getCoreCacheKey(), ir2.leaves().get(0).reader().getCoreCacheKey());
    IOUtils.close(ir, ir2, iw, dir);
}

From source file:org.elasticsearch.common.lucene.LuceneTests.java

License:Apache License

public void testWaitForIndex() throws Exception {
    final MockDirectoryWrapper dir = newMockDirectory();

    final AtomicBoolean succeeded = new AtomicBoolean(false);
    final CountDownLatch latch = new CountDownLatch(1);

    // Create a shadow Engine, which will freak out because there is no
    // index yet/*from w  w  w .ja v  a  2s .  c  o  m*/
    Thread t = new Thread(new Runnable() {
        @Override
        public void run() {
            try {
                latch.await();
                if (Lucene.waitForIndex(dir, 5000)) {
                    succeeded.set(true);
                } else {
                    fail("index should have eventually existed!");
                }
            } catch (InterruptedException e) {
                // ignore interruptions
            } catch (Exception e) {
                fail("should have been able to create the engine! " + e.getMessage());
            }
        }
    });
    t.start();

    // count down latch
    // now shadow engine should try to be created
    latch.countDown();

    dir.setEnableVirusScanner(false);
    IndexWriterConfig iwc = newIndexWriterConfig();
    iwc.setIndexDeletionPolicy(NoDeletionPolicy.INSTANCE);
    iwc.setMergePolicy(NoMergePolicy.INSTANCE);
    iwc.setMaxBufferedDocs(2);
    IndexWriter writer = new IndexWriter(dir, iwc);
    Document doc = new Document();
    doc.add(new TextField("id", "1", random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
    writer.addDocument(doc);
    writer.commit();

    t.join();

    writer.close();
    dir.close();
    assertTrue("index should have eventually existed", succeeded.get());
}

From source file:org.elasticsearch.common.lucene.LuceneTests.java

License:Apache License

public void testCleanIndex() throws IOException {
    MockDirectoryWrapper dir = newMockDirectory();
    dir.setEnableVirusScanner(false);//w  ww .  j a v  a2 s.c  om
    IndexWriterConfig iwc = newIndexWriterConfig();
    iwc.setIndexDeletionPolicy(NoDeletionPolicy.INSTANCE);
    iwc.setMergePolicy(NoMergePolicy.INSTANCE);
    iwc.setMaxBufferedDocs(2);
    IndexWriter writer = new IndexWriter(dir, iwc);
    Document doc = new Document();
    doc.add(new TextField("id", "1", random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
    writer.addDocument(doc);
    writer.commit();

    doc = new Document();
    doc.add(new TextField("id", "2", random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
    writer.addDocument(doc);

    doc = new Document();
    doc.add(new TextField("id", "3", random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
    writer.addDocument(doc);

    writer.commit();
    doc = new Document();
    doc.add(new TextField("id", "4", random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
    writer.addDocument(doc);

    writer.deleteDocuments(new Term("id", "2"));
    writer.commit();
    try (DirectoryReader open = DirectoryReader.open(writer, true)) {
        assertEquals(3, open.numDocs());
        assertEquals(1, open.numDeletedDocs());
        assertEquals(4, open.maxDoc());
    }
    writer.close();
    if (random().nextBoolean()) {
        for (String file : dir.listAll()) {
            if (file.startsWith("_1")) {
                // delete a random file
                dir.deleteFile(file);
                break;
            }
        }
    }
    Lucene.cleanLuceneIndex(dir);
    if (dir.listAll().length > 0) {
        for (String file : dir.listAll()) {
            if (file.startsWith("extra") == false) {
                assertEquals(file, "write.lock");
            }
        }
    }
    dir.close();
}

From source file:org.elasticsearch.common.lucene.LuceneTests.java

License:Apache License

public void testPruneUnreferencedFiles() throws IOException {
    MockDirectoryWrapper dir = newMockDirectory();
    dir.setEnableVirusScanner(false);// ww w.j a va2  s . c  om
    IndexWriterConfig iwc = newIndexWriterConfig();
    iwc.setIndexDeletionPolicy(NoDeletionPolicy.INSTANCE);
    iwc.setMergePolicy(NoMergePolicy.INSTANCE);
    iwc.setMaxBufferedDocs(2);
    IndexWriter writer = new IndexWriter(dir, iwc);
    Document doc = new Document();
    doc.add(new TextField("id", "1", random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
    writer.addDocument(doc);
    writer.commit();

    doc = new Document();
    doc.add(new TextField("id", "2", random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
    writer.addDocument(doc);

    doc = new Document();
    doc.add(new TextField("id", "3", random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
    writer.addDocument(doc);

    writer.commit();
    SegmentInfos segmentCommitInfos = Lucene.readSegmentInfos(dir);

    doc = new Document();
    doc.add(new TextField("id", "4", random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
    writer.addDocument(doc);

    writer.deleteDocuments(new Term("id", "2"));
    writer.commit();
    DirectoryReader open = DirectoryReader.open(writer, true);
    assertEquals(3, open.numDocs());
    assertEquals(1, open.numDeletedDocs());
    assertEquals(4, open.maxDoc());
    open.close();
    writer.close();
    SegmentInfos si = Lucene.pruneUnreferencedFiles(segmentCommitInfos.getSegmentsFileName(), dir);
    assertEquals(si.getSegmentsFileName(), segmentCommitInfos.getSegmentsFileName());
    open = DirectoryReader.open(dir);
    assertEquals(3, open.numDocs());
    assertEquals(0, open.numDeletedDocs());
    assertEquals(3, open.maxDoc());

    IndexSearcher s = new IndexSearcher(open);
    assertEquals(s.search(new TermQuery(new Term("id", "1")), 1).totalHits, 1);
    assertEquals(s.search(new TermQuery(new Term("id", "2")), 1).totalHits, 1);
    assertEquals(s.search(new TermQuery(new Term("id", "3")), 1).totalHits, 1);
    assertEquals(s.search(new TermQuery(new Term("id", "4")), 1).totalHits, 0);

    for (String file : dir.listAll()) {
        assertFalse("unexpected file: " + file, file.equals("segments_3") || file.startsWith("_2"));
    }
    open.close();
    dir.close();

}

From source file:org.elasticsearch.common.lucene.LuceneTests.java

License:Apache License

public void testFiles() throws IOException {
    MockDirectoryWrapper dir = newMockDirectory();
    dir.setEnableVirusScanner(false);//w  ww.  j av a 2s  .c  o m
    IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
    iwc.setMergePolicy(NoMergePolicy.INSTANCE);
    iwc.setMaxBufferedDocs(2);
    iwc.setUseCompoundFile(true);
    IndexWriter writer = new IndexWriter(dir, iwc);
    Document doc = new Document();
    doc.add(new TextField("id", "1", random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
    writer.addDocument(doc);
    writer.commit();
    Set<String> files = new HashSet<>();
    for (String f : Lucene.files(Lucene.readSegmentInfos(dir))) {
        files.add(f);
    }
    final boolean simpleTextCFS = files.contains("_0.scf");
    assertTrue(files.toString(), files.contains("segments_1"));
    if (simpleTextCFS) {
        assertFalse(files.toString(), files.contains("_0.cfs"));
        assertFalse(files.toString(), files.contains("_0.cfe"));
    } else {
        assertTrue(files.toString(), files.contains("_0.cfs"));
        assertTrue(files.toString(), files.contains("_0.cfe"));
    }
    assertTrue(files.toString(), files.contains("_0.si"));

    doc = new Document();
    doc.add(new TextField("id", "2", random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
    writer.addDocument(doc);

    doc = new Document();
    doc.add(new TextField("id", "3", random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
    writer.addDocument(doc);
    writer.commit();

    files.clear();
    for (String f : Lucene.files(Lucene.readSegmentInfos(dir))) {
        files.add(f);
    }
    assertFalse(files.toString(), files.contains("segments_1"));
    assertTrue(files.toString(), files.contains("segments_2"));
    if (simpleTextCFS) {
        assertFalse(files.toString(), files.contains("_0.cfs"));
        assertFalse(files.toString(), files.contains("_0.cfe"));
    } else {
        assertTrue(files.toString(), files.contains("_0.cfs"));
        assertTrue(files.toString(), files.contains("_0.cfe"));
    }
    assertTrue(files.toString(), files.contains("_0.si"));

    if (simpleTextCFS) {
        assertFalse(files.toString(), files.contains("_1.cfs"));
        assertFalse(files.toString(), files.contains("_1.cfe"));
    } else {
        assertTrue(files.toString(), files.contains("_1.cfs"));
        assertTrue(files.toString(), files.contains("_1.cfe"));
    }
    assertTrue(files.toString(), files.contains("_1.si"));
    writer.close();
    dir.close();
}

From source file:org.elasticsearch.search.aggregations.bucket.sampler.SamplerAggregatorTests.java

License:Apache License

/**
 * Uses the sampler aggregation to find the minimum value of a field out of the top 3 scoring documents in a search.
 *//*from   w ww .  j  a  v a2s  . c o m*/
public void testSampler() throws IOException {
    TextFieldType textFieldType = new TextFieldType();
    textFieldType.setIndexAnalyzer(new NamedAnalyzer("foo", AnalyzerScope.GLOBAL, new StandardAnalyzer()));
    MappedFieldType numericFieldType = new NumberFieldMapper.NumberFieldType(NumberFieldMapper.NumberType.LONG);
    numericFieldType.setName("int");

    IndexWriterConfig indexWriterConfig = newIndexWriterConfig();
    indexWriterConfig.setMaxBufferedDocs(100);
    indexWriterConfig.setRAMBufferSizeMB(100); // flush on open to have a single segment with predictable docIds
    try (Directory dir = newDirectory(); IndexWriter w = new IndexWriter(dir, indexWriterConfig)) {
        for (long value : new long[] { 7, 3, -10, -6, 5, 50 }) {
            Document doc = new Document();
            StringBuilder text = new StringBuilder();
            for (int i = 0; i < value; i++) {
                text.append("good ");
            }
            doc.add(new Field("text", text.toString(), textFieldType));
            doc.add(new SortedNumericDocValuesField("int", value));
            w.addDocument(doc);
        }

        SamplerAggregationBuilder aggBuilder = new SamplerAggregationBuilder("sampler").shardSize(3)
                .subAggregation(new MinAggregationBuilder("min").field("int"));
        try (IndexReader reader = DirectoryReader.open(w)) {
            assertEquals("test expects a single segment", 1, reader.leaves().size());
            IndexSearcher searcher = new IndexSearcher(reader);
            Sampler sampler = searchAndReduce(searcher, new TermQuery(new Term("text", "good")), aggBuilder,
                    textFieldType, numericFieldType);
            Min min = sampler.getAggregations().get("min");
            assertEquals(5.0, min.getValue(), 0);
        }
    }
}

From source file:org.elasticsearch.search.aggregations.bucket.significant.SignificantTermsAggregatorTests.java

License:Apache License

/**
 * Uses the significant terms aggregation to find the keywords in text fields
 *//*w  w  w. j  a  v  a 2  s  .  c o m*/
public void testSignificance() throws IOException {
    TextFieldType textFieldType = new TextFieldType();
    textFieldType.setName("text");
    textFieldType.setFielddata(true);
    textFieldType
            .setIndexAnalyzer(new NamedAnalyzer("my_analyzer", AnalyzerScope.GLOBAL, new StandardAnalyzer()));

    IndexWriterConfig indexWriterConfig = newIndexWriterConfig();
    indexWriterConfig.setMaxBufferedDocs(100);
    indexWriterConfig.setRAMBufferSizeMB(100); // flush on open to have a single segment

    try (Directory dir = newDirectory(); IndexWriter w = new IndexWriter(dir, indexWriterConfig)) {
        addMixedTextDocs(textFieldType, w);

        SignificantTermsAggregationBuilder sigAgg = new SignificantTermsAggregationBuilder("sig_text", null)
                .field("text");
        sigAgg.executionHint(randomExecutionHint());
        if (randomBoolean()) {
            // Use a background filter which just happens to be same scope as whole-index.
            sigAgg.backgroundFilter(QueryBuilders.termsQuery("text", "common"));
        }

        SignificantTermsAggregationBuilder sigNumAgg = new SignificantTermsAggregationBuilder("sig_number",
                null).field("long_field");
        sigNumAgg.executionHint(randomExecutionHint());

        try (IndexReader reader = DirectoryReader.open(w)) {
            assertEquals("test expects a single segment", 1, reader.leaves().size());
            IndexSearcher searcher = new IndexSearcher(reader);

            // Search "odd"
            SignificantTerms terms = searchAndReduce(searcher, new TermQuery(new Term("text", "odd")), sigAgg,
                    textFieldType);

            assertEquals(1, terms.getBuckets().size());
            assertNull(terms.getBucketByKey("even"));
            assertNull(terms.getBucketByKey("common"));
            assertNotNull(terms.getBucketByKey("odd"));

            // Search even
            terms = searchAndReduce(searcher, new TermQuery(new Term("text", "even")), sigAgg, textFieldType);

            assertEquals(1, terms.getBuckets().size());
            assertNull(terms.getBucketByKey("odd"));
            assertNull(terms.getBucketByKey("common"));
            assertNotNull(terms.getBucketByKey("even"));

            // Search odd with regex includeexcludes
            sigAgg.includeExclude(new IncludeExclude("o.d", null));
            terms = searchAndReduce(searcher, new TermQuery(new Term("text", "odd")), sigAgg, textFieldType);
            assertEquals(1, terms.getBuckets().size());
            assertNotNull(terms.getBucketByKey("odd"));
            assertNull(terms.getBucketByKey("common"));
            assertNull(terms.getBucketByKey("even"));

            // Search with string-based includeexcludes
            String oddStrings[] = new String[] { "odd", "weird" };
            String evenStrings[] = new String[] { "even", "regular" };

            sigAgg.includeExclude(new IncludeExclude(oddStrings, evenStrings));
            sigAgg.significanceHeuristic(SignificanceHeuristicTests.getRandomSignificanceheuristic());
            terms = searchAndReduce(searcher, new TermQuery(new Term("text", "odd")), sigAgg, textFieldType);
            assertEquals(1, terms.getBuckets().size());
            assertNotNull(terms.getBucketByKey("odd"));
            assertNull(terms.getBucketByKey("weird"));
            assertNull(terms.getBucketByKey("common"));
            assertNull(terms.getBucketByKey("even"));
            assertNull(terms.getBucketByKey("regular"));

            sigAgg.includeExclude(new IncludeExclude(evenStrings, oddStrings));
            terms = searchAndReduce(searcher, new TermQuery(new Term("text", "odd")), sigAgg, textFieldType);
            assertEquals(0, terms.getBuckets().size());
            assertNull(terms.getBucketByKey("odd"));
            assertNull(terms.getBucketByKey("weird"));
            assertNull(terms.getBucketByKey("common"));
            assertNull(terms.getBucketByKey("even"));
            assertNull(terms.getBucketByKey("regular"));

        }
    }
}

From source file:org.elasticsearch.search.aggregations.bucket.significant.SignificantTermsAggregatorTests.java

License:Apache License

/**
 * Uses the significant terms aggregation to find the keywords in numeric
 * fields// w  w  w.java2s .c o  m
 */
public void testNumericSignificance() throws IOException {
    NumberFieldType longFieldType = new NumberFieldMapper.NumberFieldType(NumberFieldMapper.NumberType.LONG);
    longFieldType.setName("long_field");

    TextFieldType textFieldType = new TextFieldType();
    textFieldType.setName("text");
    textFieldType
            .setIndexAnalyzer(new NamedAnalyzer("my_analyzer", AnalyzerScope.GLOBAL, new StandardAnalyzer()));

    IndexWriterConfig indexWriterConfig = newIndexWriterConfig();
    indexWriterConfig.setMaxBufferedDocs(100);
    indexWriterConfig.setRAMBufferSizeMB(100); // flush on open to have a single segment
    final long ODD_VALUE = 3;
    final long EVEN_VALUE = 6;
    final long COMMON_VALUE = 2;

    try (Directory dir = newDirectory(); IndexWriter w = new IndexWriter(dir, indexWriterConfig)) {

        for (int i = 0; i < 10; i++) {
            Document doc = new Document();
            if (i % 2 == 0) {
                addFields(doc, NumberType.LONG.createFields("long_field", ODD_VALUE, true, true, false));
                doc.add(new Field("text", "odd", textFieldType));
            } else {
                addFields(doc, NumberType.LONG.createFields("long_field", EVEN_VALUE, true, true, false));
                doc.add(new Field("text", "even", textFieldType));
            }
            addFields(doc, NumberType.LONG.createFields("long_field", COMMON_VALUE, true, true, false));
            w.addDocument(doc);
        }

        SignificantTermsAggregationBuilder sigNumAgg = new SignificantTermsAggregationBuilder("sig_number",
                null).field("long_field");
        sigNumAgg.executionHint(randomExecutionHint());

        try (IndexReader reader = DirectoryReader.open(w)) {
            assertEquals("test expects a single segment", 1, reader.leaves().size());
            IndexSearcher searcher = new IndexSearcher(reader);

            // Search "odd"
            SignificantLongTerms terms = searchAndReduce(searcher, new TermQuery(new Term("text", "odd")),
                    sigNumAgg, longFieldType);
            assertEquals(1, terms.getBuckets().size());

            assertNull(terms.getBucketByKey(Long.toString(EVEN_VALUE)));
            assertNull(terms.getBucketByKey(Long.toString(COMMON_VALUE)));
            assertNotNull(terms.getBucketByKey(Long.toString(ODD_VALUE)));

            terms = searchAndReduce(searcher, new TermQuery(new Term("text", "even")), sigNumAgg,
                    longFieldType);
            assertEquals(1, terms.getBuckets().size());

            assertNull(terms.getBucketByKey(Long.toString(ODD_VALUE)));
            assertNull(terms.getBucketByKey(Long.toString(COMMON_VALUE)));
            assertNotNull(terms.getBucketByKey(Long.toString(EVEN_VALUE)));

        }
    }
}