Example usage for org.apache.lucene.index IndexWriterConfig setRAMBufferSizeMB

List of usage examples for org.apache.lucene.index IndexWriterConfig setRAMBufferSizeMB

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexWriterConfig setRAMBufferSizeMB.

Prototype

@Override
    public IndexWriterConfig setRAMBufferSizeMB(double ramBufferSizeMB) 

Source Link

Usage

From source file:org.elasticsearch.search.aggregations.bucket.sampler.SamplerAggregatorTests.java

License:Apache License

/**
 * Uses the sampler aggregation to find the minimum value of a field out of the top 3 scoring documents in a search.
 *///from   w w  w . java 2s.  c o m
public void testSampler() throws IOException {
    TextFieldType textFieldType = new TextFieldType();
    textFieldType.setIndexAnalyzer(new NamedAnalyzer("foo", AnalyzerScope.GLOBAL, new StandardAnalyzer()));
    MappedFieldType numericFieldType = new NumberFieldMapper.NumberFieldType(NumberFieldMapper.NumberType.LONG);
    numericFieldType.setName("int");

    IndexWriterConfig indexWriterConfig = newIndexWriterConfig();
    indexWriterConfig.setMaxBufferedDocs(100);
    indexWriterConfig.setRAMBufferSizeMB(100); // flush on open to have a single segment with predictable docIds
    try (Directory dir = newDirectory(); IndexWriter w = new IndexWriter(dir, indexWriterConfig)) {
        for (long value : new long[] { 7, 3, -10, -6, 5, 50 }) {
            Document doc = new Document();
            StringBuilder text = new StringBuilder();
            for (int i = 0; i < value; i++) {
                text.append("good ");
            }
            doc.add(new Field("text", text.toString(), textFieldType));
            doc.add(new SortedNumericDocValuesField("int", value));
            w.addDocument(doc);
        }

        SamplerAggregationBuilder aggBuilder = new SamplerAggregationBuilder("sampler").shardSize(3)
                .subAggregation(new MinAggregationBuilder("min").field("int"));
        try (IndexReader reader = DirectoryReader.open(w)) {
            assertEquals("test expects a single segment", 1, reader.leaves().size());
            IndexSearcher searcher = new IndexSearcher(reader);
            Sampler sampler = searchAndReduce(searcher, new TermQuery(new Term("text", "good")), aggBuilder,
                    textFieldType, numericFieldType);
            Min min = sampler.getAggregations().get("min");
            assertEquals(5.0, min.getValue(), 0);
        }
    }
}

From source file:org.elasticsearch.search.aggregations.bucket.significant.SignificantTermsAggregatorTests.java

License:Apache License

/**
 * Uses the significant terms aggregation to find the keywords in text fields
 *//*from  w  ww  .  j av a2  s.c  o  m*/
public void testSignificance() throws IOException {
    TextFieldType textFieldType = new TextFieldType();
    textFieldType.setName("text");
    textFieldType.setFielddata(true);
    textFieldType
            .setIndexAnalyzer(new NamedAnalyzer("my_analyzer", AnalyzerScope.GLOBAL, new StandardAnalyzer()));

    IndexWriterConfig indexWriterConfig = newIndexWriterConfig();
    indexWriterConfig.setMaxBufferedDocs(100);
    indexWriterConfig.setRAMBufferSizeMB(100); // flush on open to have a single segment

    try (Directory dir = newDirectory(); IndexWriter w = new IndexWriter(dir, indexWriterConfig)) {
        addMixedTextDocs(textFieldType, w);

        SignificantTermsAggregationBuilder sigAgg = new SignificantTermsAggregationBuilder("sig_text", null)
                .field("text");
        sigAgg.executionHint(randomExecutionHint());
        if (randomBoolean()) {
            // Use a background filter which just happens to be same scope as whole-index.
            sigAgg.backgroundFilter(QueryBuilders.termsQuery("text", "common"));
        }

        SignificantTermsAggregationBuilder sigNumAgg = new SignificantTermsAggregationBuilder("sig_number",
                null).field("long_field");
        sigNumAgg.executionHint(randomExecutionHint());

        try (IndexReader reader = DirectoryReader.open(w)) {
            assertEquals("test expects a single segment", 1, reader.leaves().size());
            IndexSearcher searcher = new IndexSearcher(reader);

            // Search "odd"
            SignificantTerms terms = searchAndReduce(searcher, new TermQuery(new Term("text", "odd")), sigAgg,
                    textFieldType);

            assertEquals(1, terms.getBuckets().size());
            assertNull(terms.getBucketByKey("even"));
            assertNull(terms.getBucketByKey("common"));
            assertNotNull(terms.getBucketByKey("odd"));

            // Search even
            terms = searchAndReduce(searcher, new TermQuery(new Term("text", "even")), sigAgg, textFieldType);

            assertEquals(1, terms.getBuckets().size());
            assertNull(terms.getBucketByKey("odd"));
            assertNull(terms.getBucketByKey("common"));
            assertNotNull(terms.getBucketByKey("even"));

            // Search odd with regex includeexcludes
            sigAgg.includeExclude(new IncludeExclude("o.d", null));
            terms = searchAndReduce(searcher, new TermQuery(new Term("text", "odd")), sigAgg, textFieldType);
            assertEquals(1, terms.getBuckets().size());
            assertNotNull(terms.getBucketByKey("odd"));
            assertNull(terms.getBucketByKey("common"));
            assertNull(terms.getBucketByKey("even"));

            // Search with string-based includeexcludes
            String oddStrings[] = new String[] { "odd", "weird" };
            String evenStrings[] = new String[] { "even", "regular" };

            sigAgg.includeExclude(new IncludeExclude(oddStrings, evenStrings));
            sigAgg.significanceHeuristic(SignificanceHeuristicTests.getRandomSignificanceheuristic());
            terms = searchAndReduce(searcher, new TermQuery(new Term("text", "odd")), sigAgg, textFieldType);
            assertEquals(1, terms.getBuckets().size());
            assertNotNull(terms.getBucketByKey("odd"));
            assertNull(terms.getBucketByKey("weird"));
            assertNull(terms.getBucketByKey("common"));
            assertNull(terms.getBucketByKey("even"));
            assertNull(terms.getBucketByKey("regular"));

            sigAgg.includeExclude(new IncludeExclude(evenStrings, oddStrings));
            terms = searchAndReduce(searcher, new TermQuery(new Term("text", "odd")), sigAgg, textFieldType);
            assertEquals(0, terms.getBuckets().size());
            assertNull(terms.getBucketByKey("odd"));
            assertNull(terms.getBucketByKey("weird"));
            assertNull(terms.getBucketByKey("common"));
            assertNull(terms.getBucketByKey("even"));
            assertNull(terms.getBucketByKey("regular"));

        }
    }
}

From source file:org.elasticsearch.search.aggregations.bucket.significant.SignificantTermsAggregatorTests.java

License:Apache License

/**
 * Uses the significant terms aggregation to find the keywords in numeric
 * fields//  ww w.  j a  va2  s  .co m
 */
public void testNumericSignificance() throws IOException {
    NumberFieldType longFieldType = new NumberFieldMapper.NumberFieldType(NumberFieldMapper.NumberType.LONG);
    longFieldType.setName("long_field");

    TextFieldType textFieldType = new TextFieldType();
    textFieldType.setName("text");
    textFieldType
            .setIndexAnalyzer(new NamedAnalyzer("my_analyzer", AnalyzerScope.GLOBAL, new StandardAnalyzer()));

    IndexWriterConfig indexWriterConfig = newIndexWriterConfig();
    indexWriterConfig.setMaxBufferedDocs(100);
    indexWriterConfig.setRAMBufferSizeMB(100); // flush on open to have a single segment
    final long ODD_VALUE = 3;
    final long EVEN_VALUE = 6;
    final long COMMON_VALUE = 2;

    try (Directory dir = newDirectory(); IndexWriter w = new IndexWriter(dir, indexWriterConfig)) {

        for (int i = 0; i < 10; i++) {
            Document doc = new Document();
            if (i % 2 == 0) {
                addFields(doc, NumberType.LONG.createFields("long_field", ODD_VALUE, true, true, false));
                doc.add(new Field("text", "odd", textFieldType));
            } else {
                addFields(doc, NumberType.LONG.createFields("long_field", EVEN_VALUE, true, true, false));
                doc.add(new Field("text", "even", textFieldType));
            }
            addFields(doc, NumberType.LONG.createFields("long_field", COMMON_VALUE, true, true, false));
            w.addDocument(doc);
        }

        SignificantTermsAggregationBuilder sigNumAgg = new SignificantTermsAggregationBuilder("sig_number",
                null).field("long_field");
        sigNumAgg.executionHint(randomExecutionHint());

        try (IndexReader reader = DirectoryReader.open(w)) {
            assertEquals("test expects a single segment", 1, reader.leaves().size());
            IndexSearcher searcher = new IndexSearcher(reader);

            // Search "odd"
            SignificantLongTerms terms = searchAndReduce(searcher, new TermQuery(new Term("text", "odd")),
                    sigNumAgg, longFieldType);
            assertEquals(1, terms.getBuckets().size());

            assertNull(terms.getBucketByKey(Long.toString(EVEN_VALUE)));
            assertNull(terms.getBucketByKey(Long.toString(COMMON_VALUE)));
            assertNotNull(terms.getBucketByKey(Long.toString(ODD_VALUE)));

            terms = searchAndReduce(searcher, new TermQuery(new Term("text", "even")), sigNumAgg,
                    longFieldType);
            assertEquals(1, terms.getBuckets().size());

            assertNull(terms.getBucketByKey(Long.toString(ODD_VALUE)));
            assertNull(terms.getBucketByKey(Long.toString(COMMON_VALUE)));
            assertNotNull(terms.getBucketByKey(Long.toString(EVEN_VALUE)));

        }
    }
}

From source file:org.elasticsearch.search.aggregations.bucket.significant.SignificantTermsAggregatorTests.java

License:Apache License

/**
 * Uses the significant terms aggregation on an index with unmapped field
 *///from  ww  w .ja  v a  2 s  . c om
public void testUnmapped() throws IOException {
    TextFieldType textFieldType = new TextFieldType();
    textFieldType.setName("text");
    textFieldType.setFielddata(true);
    textFieldType
            .setIndexAnalyzer(new NamedAnalyzer("my_analyzer", AnalyzerScope.GLOBAL, new StandardAnalyzer()));

    IndexWriterConfig indexWriterConfig = newIndexWriterConfig();
    indexWriterConfig.setMaxBufferedDocs(100);
    indexWriterConfig.setRAMBufferSizeMB(100); // flush on open to have a single segment
    try (Directory dir = newDirectory(); IndexWriter w = new IndexWriter(dir, indexWriterConfig)) {
        addMixedTextDocs(textFieldType, w);

        // Attempt aggregation on unmapped field
        SignificantTermsAggregationBuilder sigAgg = new SignificantTermsAggregationBuilder("sig_text", null)
                .field("unmapped_field");
        sigAgg.executionHint(randomExecutionHint());

        try (IndexReader reader = DirectoryReader.open(w)) {
            assertEquals("test expects a single segment", 1, reader.leaves().size());
            IndexSearcher searcher = new IndexSearcher(reader);

            // Search "odd"
            SignificantTerms terms = searchAndReduce(searcher, new TermQuery(new Term("text", "odd")), sigAgg,
                    textFieldType);
            assertEquals(0, terms.getBuckets().size());

            assertNull(terms.getBucketByKey("even"));
            assertNull(terms.getBucketByKey("common"));
            assertNull(terms.getBucketByKey("odd"));

        }
    }
}

From source file:org.elasticsearch.search.aggregations.bucket.significant.SignificantTextAggregatorTests.java

License:Apache License

/**
 * Uses the significant text aggregation to find the keywords in text fields
 *//* w  w w  .jav a  2 s .c o m*/
public void testSignificance() throws IOException {
    TextFieldType textFieldType = new TextFieldType();
    textFieldType.setName("text");
    textFieldType
            .setIndexAnalyzer(new NamedAnalyzer("my_analyzer", AnalyzerScope.GLOBAL, new StandardAnalyzer()));

    IndexWriterConfig indexWriterConfig = newIndexWriterConfig();
    indexWriterConfig.setMaxBufferedDocs(100);
    indexWriterConfig.setRAMBufferSizeMB(100); // flush on open to have a single segment
    try (Directory dir = newDirectory(); IndexWriter w = new IndexWriter(dir, indexWriterConfig)) {
        for (int i = 0; i < 10; i++) {
            Document doc = new Document();
            StringBuilder text = new StringBuilder("common ");
            if (i % 2 == 0) {
                text.append("odd ");
            } else {
                text.append(
                        "even separator" + i + " duplicate duplicate duplicate duplicate duplicate duplicate ");
            }

            doc.add(new Field("text", text.toString(), textFieldType));
            String json = "{ \"text\" : \"" + text.toString() + "\"," + " \"json_only_field\" : \""
                    + text.toString() + "\"" + " }";
            doc.add(new StoredField("_source", new BytesRef(json)));
            w.addDocument(doc);
        }

        SignificantTextAggregationBuilder sigAgg = new SignificantTextAggregationBuilder("sig_text", "text")
                .filterDuplicateText(true);
        if (randomBoolean()) {
            sigAgg.sourceFieldNames(Arrays.asList(new String[] { "json_only_field" }));
        }
        SamplerAggregationBuilder aggBuilder = new SamplerAggregationBuilder("sampler").subAggregation(sigAgg);

        try (IndexReader reader = DirectoryReader.open(w)) {
            assertEquals("test expects a single segment", 1, reader.leaves().size());
            IndexSearcher searcher = new IndexSearcher(reader);

            // Search "odd" which should have no duplication
            Sampler sampler = searchAndReduce(searcher, new TermQuery(new Term("text", "odd")), aggBuilder,
                    textFieldType);
            SignificantTerms terms = sampler.getAggregations().get("sig_text");

            assertNull(terms.getBucketByKey("even"));
            assertNull(terms.getBucketByKey("duplicate"));
            assertNull(terms.getBucketByKey("common"));
            assertNotNull(terms.getBucketByKey("odd"));

            // Search "even" which will have duplication
            sampler = searchAndReduce(searcher, new TermQuery(new Term("text", "even")), aggBuilder,
                    textFieldType);
            terms = sampler.getAggregations().get("sig_text");

            assertNull(terms.getBucketByKey("odd"));
            assertNull(terms.getBucketByKey("duplicate"));
            assertNull(terms.getBucketByKey("common"));
            assertNull(terms.getBucketByKey("separator2"));
            assertNull(terms.getBucketByKey("separator4"));
            assertNull(terms.getBucketByKey("separator6"));

            assertNotNull(terms.getBucketByKey("even"));

        }
    }
}

From source file:org.elasticsearch.search.aggregations.bucket.significant.SignificantTextAggregatorTests.java

License:Apache License

/**
 * Test documents with arrays of text//from w ww  .j  a va  2  s  .  c  o m
 */
public void testSignificanceOnTextArrays() throws IOException {
    TextFieldType textFieldType = new TextFieldType();
    textFieldType.setName("text");
    textFieldType
            .setIndexAnalyzer(new NamedAnalyzer("my_analyzer", AnalyzerScope.GLOBAL, new StandardAnalyzer()));

    IndexWriterConfig indexWriterConfig = newIndexWriterConfig();
    indexWriterConfig.setMaxBufferedDocs(100);
    indexWriterConfig.setRAMBufferSizeMB(100); // flush on open to have a single segment
    try (Directory dir = newDirectory(); IndexWriter w = new IndexWriter(dir, indexWriterConfig)) {
        for (int i = 0; i < 10; i++) {
            Document doc = new Document();
            doc.add(new Field("text", "foo", textFieldType));
            String json = "{ \"text\" : [\"foo\",\"foo\"], \"title\" : [\"foo\", \"foo\"]}";
            doc.add(new StoredField("_source", new BytesRef(json)));
            w.addDocument(doc);
        }

        SignificantTextAggregationBuilder sigAgg = new SignificantTextAggregationBuilder("sig_text", "text");
        sigAgg.sourceFieldNames(Arrays.asList(new String[] { "title", "text" }));
        try (IndexReader reader = DirectoryReader.open(w)) {
            assertEquals("test expects a single segment", 1, reader.leaves().size());
            IndexSearcher searcher = new IndexSearcher(reader);
            searchAndReduce(searcher, new TermQuery(new Term("text", "foo")), sigAgg, textFieldType);
            // No significant results to be found in this test - only checking we don't end up
            // with the internal exception discovered in issue https://github.com/elastic/elasticsearch/issues/25029
        }
    }
}

From source file:org.esa.beam.occci.LuceneCreateIndexMain.java

License:Open Source License

public static void main(String[] args) throws IOException, ParseException {
    if (args.length != 2) {
        printUsage();/*from w  w w  . ja v a 2 s.  c o  m*/
    }
    File productListFile = new File(args[0]);
    File indexfile = new File(args[1]);
    if (!productListFile.exists()) {
        System.err.printf("productList file '%s' does not exits%n", args[0]);
        printUsage();
    }
    List<EoProduct> eoProductList = ProductDB.readProducts("s2", productListFile);

    Directory indexDirectory = FSDirectory.open(indexfile.toPath());
    IndexWriterConfig config = new IndexWriterConfig(new SimpleAnalyzer());
    config.setRAMBufferSizeMB(100);

    DateRangePrefixTree dateRangePrefixTree = DateRangePrefixTree.INSTANCE;
    PrefixTreeStrategy strategy = new NumberRangePrefixTreeStrategy(dateRangePrefixTree, "productDateRange");

    final SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS", ENGLISH);
    dateFormat.setCalendar(GregorianCalendar.getInstance(UTC, Locale.ENGLISH));

    int indexCount = 0;
    try (IndexWriter indexWriter = new IndexWriter(indexDirectory, config)) {
        for (EoProduct eoProduct : eoProductList) {
            Document doc = new Document();
            doc.add(new StringField("name", eoProduct.getName(), Field.Store.YES));
            String start = dateFormat.format(new Date(eoProduct.getStartTime()));
            String end = dateFormat.format(new Date(eoProduct.getEndTime()));
            String range = "[" + start + " TO " + end + "]";

            NumberRangePrefixTree.NRShape nrShape = dateRangePrefixTree.parseShape(range);
            for (IndexableField f : strategy.createIndexableFields(nrShape)) {
                doc.add(f);
            }
            indexWriter.addDocument(doc);

            indexCount++;
            if (indexCount % 10_000 == 0) {
                System.out.println("indexCount = " + indexCount);
            }
        }

    }

}

From source file:org.exist.xquery.modules.mpeg7.net.semanticmetadata.lire.utils.LuceneUtils.java

License:Open Source License

public static IndexWriter createIndexWriter(Directory directory, boolean create, AnalyzerType analyzer,
        double RAMBufferSize) throws IOException {
    // set the analyzer according to the method params
    Analyzer tmpAnalyzer = null;/*  w  ww .  j a v  a2s.  c  o  m*/
    if (analyzer == AnalyzerType.SimpleAnalyzer)
        tmpAnalyzer = new SimpleAnalyzer(LUCENE_VERSION);
    else if (analyzer == AnalyzerType.WhitespaceAnalyzer)
        tmpAnalyzer = new WhitespaceAnalyzer(LUCENE_VERSION);

    // The config
    IndexWriterConfig config = new IndexWriterConfig(LUCENE_VERSION, tmpAnalyzer);
    if (create)
        config.setOpenMode(IndexWriterConfig.OpenMode.CREATE); // overwrite if it exists.
    else
        config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); // create new if none is there, append otherwise.
    config.setRAMBufferSizeMB(RAMBufferSize);
    config.setCodec(new LireCustomCodec());
    return new IndexWriter(directory, config);
}

From source file:org.frontcache.cache.impl.LuceneIndexManager.java

License:Apache License

/**
 * Returns instance of IndexManager/*www . ja  v a2  s  . c  o m*/
 * @param create
 * @return
 * @throws IOException
 */
private IndexWriter getIndexWriter() throws IOException {
    if (indexWriter == null || !indexWriter.isOpen()) {
        synchronized (this) {
            if (indexWriter == null || !indexWriter.isOpen()) {
                indexWriter = null;
                logger.info("Trying to get indexWriter...");
                Directory dir = FSDirectory.open(Paths.get(INDEX_PATH));
                Analyzer analyzer = new StandardAnalyzer();
                IndexWriterConfig iwc = new IndexWriterConfig(analyzer);

                iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
                iwc.setRAMBufferSizeMB(250.0);
                indexWriter = new IndexWriter(dir, iwc);
                logger.info("IndexWriter initialized");
            }
        }
    }

    return indexWriter;
}

From source file:org.ihtsdo.otf.query.lucene.LuceneIndexer.java

License:Apache License

protected LuceneIndexer(String indexName) throws IOException {
    try {//  w  w  w.  j  a  v  a 2s. c o  m
        indexName_ = indexName;
        luceneWriterService = LookupService.getService(WorkExecutors.class).getExecutor();
        luceneWriterFutureCheckerService = Executors.newFixedThreadPool(1,
                new NamedThreadFactory(indexName + " Lucene future checker", false));

        Path searchFolder = LookupService.getService(ConfigurationService.class).getSearchFolderPath();

        if (luceneRootFolder_.compareAndSet(null, new File(searchFolder.toFile(), DEFAULT_LUCENE_FOLDER))) {
            luceneRootFolder_.get().mkdirs();
        }

        indexFolder_ = new File(luceneRootFolder_.get(), indexName);
        indexFolder_.mkdirs();

        log.info("Index: " + indexFolder_.getAbsolutePath());
        Directory indexDirectory = new MMapDirectory(indexFolder_); //switch over to MMapDirectory - in theory - this gives us back some 
        //room on the JDK stack, letting the OS directly manage the caching of the index files - and more importantly, gives us a huge 
        //performance boost during any operation that tries to do multi-threaded reads of the index (like the SOLOR rules processing) because
        //the default value of SimpleFSDirectory is a huge bottleneck.

        indexDirectory.clearLock("write.lock");

        IndexWriterConfig config = new IndexWriterConfig(luceneVersion, new PerFieldAnalyzer());
        config.setRAMBufferSizeMB(256);
        MergePolicy mergePolicy = new LogByteSizeMergePolicy();

        config.setMergePolicy(mergePolicy);
        config.setSimilarity(new ShortTextSimilarity());

        IndexWriter indexWriter = new IndexWriter(indexDirectory, config);

        trackingIndexWriter = new TrackingIndexWriter(indexWriter);

        boolean applyAllDeletes = false;

        searcherManager = new SearcherManager(indexWriter, applyAllDeletes, null);
        // [3]: Create the ControlledRealTimeReopenThread that reopens the index periodically taking into 
        //      account the changes made to the index and tracked by the TrackingIndexWriter instance
        //      The index is refreshed every 60sc when nobody is waiting 
        //      and every 100 millis whenever is someone waiting (see search method)
        //      (see http://lucene.apache.org/core/4_3_0/core/org/apache/lucene/search/NRTManagerReopenThread.html)
        reopenThread = new ControlledRealTimeReopenThread<>(trackingIndexWriter, searcherManager, 60.00, 0.1);

        this.startThread();

        //Register for commits:

        log.info("Registering indexer " + getIndexerName() + " for commits");
        Get.commitService().addChangeListener(new ChronologyChangeListener() {

            @Override
            public void handleCommit(CommitRecord commitRecord) {
                commitRecord.getSememesInCommit().stream().forEach(sememeId -> {
                    handleChange(Get.sememeService().getSememe(sememeId));
                });

            }

            @Override
            public void handleChange(SememeChronology<? extends SememeVersion<?>> sc) {
                log.info("submitting sememe " + sc.toUserString() + " to indexer " + getIndexerName()
                        + " due to commit");
                index(sc);

            }

            @Override
            public void handleChange(ConceptChronology<? extends StampedVersion> cc) {
                // noop
            }

            @Override
            public UUID getListenerUuid() {
                return UuidT5Generator.get(getIndexerName());
            }
        });

    } catch (Exception e) {
        LookupService.getService(SystemStatusService.class).notifyServiceConfigurationFailure(indexName, e);
        throw e;
    }
}