List of usage examples for org.apache.lucene.index IndexWriterConfig setRAMBufferSizeMB
@Override public IndexWriterConfig setRAMBufferSizeMB(double ramBufferSizeMB)
From source file:org.elasticsearch.search.aggregations.bucket.sampler.SamplerAggregatorTests.java
License:Apache License
/** * Uses the sampler aggregation to find the minimum value of a field out of the top 3 scoring documents in a search. *///from w w w . java 2s. c o m public void testSampler() throws IOException { TextFieldType textFieldType = new TextFieldType(); textFieldType.setIndexAnalyzer(new NamedAnalyzer("foo", AnalyzerScope.GLOBAL, new StandardAnalyzer())); MappedFieldType numericFieldType = new NumberFieldMapper.NumberFieldType(NumberFieldMapper.NumberType.LONG); numericFieldType.setName("int"); IndexWriterConfig indexWriterConfig = newIndexWriterConfig(); indexWriterConfig.setMaxBufferedDocs(100); indexWriterConfig.setRAMBufferSizeMB(100); // flush on open to have a single segment with predictable docIds try (Directory dir = newDirectory(); IndexWriter w = new IndexWriter(dir, indexWriterConfig)) { for (long value : new long[] { 7, 3, -10, -6, 5, 50 }) { Document doc = new Document(); StringBuilder text = new StringBuilder(); for (int i = 0; i < value; i++) { text.append("good "); } doc.add(new Field("text", text.toString(), textFieldType)); doc.add(new SortedNumericDocValuesField("int", value)); w.addDocument(doc); } SamplerAggregationBuilder aggBuilder = new SamplerAggregationBuilder("sampler").shardSize(3) .subAggregation(new MinAggregationBuilder("min").field("int")); try (IndexReader reader = DirectoryReader.open(w)) { assertEquals("test expects a single segment", 1, reader.leaves().size()); IndexSearcher searcher = new IndexSearcher(reader); Sampler sampler = searchAndReduce(searcher, new TermQuery(new Term("text", "good")), aggBuilder, textFieldType, numericFieldType); Min min = sampler.getAggregations().get("min"); assertEquals(5.0, min.getValue(), 0); } } }
From source file:org.elasticsearch.search.aggregations.bucket.significant.SignificantTermsAggregatorTests.java
License:Apache License
/** * Uses the significant terms aggregation to find the keywords in text fields *//*from w ww . j av a2 s.c o m*/ public void testSignificance() throws IOException { TextFieldType textFieldType = new TextFieldType(); textFieldType.setName("text"); textFieldType.setFielddata(true); textFieldType .setIndexAnalyzer(new NamedAnalyzer("my_analyzer", AnalyzerScope.GLOBAL, new StandardAnalyzer())); IndexWriterConfig indexWriterConfig = newIndexWriterConfig(); indexWriterConfig.setMaxBufferedDocs(100); indexWriterConfig.setRAMBufferSizeMB(100); // flush on open to have a single segment try (Directory dir = newDirectory(); IndexWriter w = new IndexWriter(dir, indexWriterConfig)) { addMixedTextDocs(textFieldType, w); SignificantTermsAggregationBuilder sigAgg = new SignificantTermsAggregationBuilder("sig_text", null) .field("text"); sigAgg.executionHint(randomExecutionHint()); if (randomBoolean()) { // Use a background filter which just happens to be same scope as whole-index. sigAgg.backgroundFilter(QueryBuilders.termsQuery("text", "common")); } SignificantTermsAggregationBuilder sigNumAgg = new SignificantTermsAggregationBuilder("sig_number", null).field("long_field"); sigNumAgg.executionHint(randomExecutionHint()); try (IndexReader reader = DirectoryReader.open(w)) { assertEquals("test expects a single segment", 1, reader.leaves().size()); IndexSearcher searcher = new IndexSearcher(reader); // Search "odd" SignificantTerms terms = searchAndReduce(searcher, new TermQuery(new Term("text", "odd")), sigAgg, textFieldType); assertEquals(1, terms.getBuckets().size()); assertNull(terms.getBucketByKey("even")); assertNull(terms.getBucketByKey("common")); assertNotNull(terms.getBucketByKey("odd")); // Search even terms = searchAndReduce(searcher, new TermQuery(new Term("text", "even")), sigAgg, textFieldType); assertEquals(1, terms.getBuckets().size()); assertNull(terms.getBucketByKey("odd")); assertNull(terms.getBucketByKey("common")); assertNotNull(terms.getBucketByKey("even")); // Search odd with regex includeexcludes sigAgg.includeExclude(new IncludeExclude("o.d", null)); terms = searchAndReduce(searcher, new TermQuery(new Term("text", "odd")), sigAgg, textFieldType); assertEquals(1, terms.getBuckets().size()); assertNotNull(terms.getBucketByKey("odd")); assertNull(terms.getBucketByKey("common")); assertNull(terms.getBucketByKey("even")); // Search with string-based includeexcludes String oddStrings[] = new String[] { "odd", "weird" }; String evenStrings[] = new String[] { "even", "regular" }; sigAgg.includeExclude(new IncludeExclude(oddStrings, evenStrings)); sigAgg.significanceHeuristic(SignificanceHeuristicTests.getRandomSignificanceheuristic()); terms = searchAndReduce(searcher, new TermQuery(new Term("text", "odd")), sigAgg, textFieldType); assertEquals(1, terms.getBuckets().size()); assertNotNull(terms.getBucketByKey("odd")); assertNull(terms.getBucketByKey("weird")); assertNull(terms.getBucketByKey("common")); assertNull(terms.getBucketByKey("even")); assertNull(terms.getBucketByKey("regular")); sigAgg.includeExclude(new IncludeExclude(evenStrings, oddStrings)); terms = searchAndReduce(searcher, new TermQuery(new Term("text", "odd")), sigAgg, textFieldType); assertEquals(0, terms.getBuckets().size()); assertNull(terms.getBucketByKey("odd")); assertNull(terms.getBucketByKey("weird")); assertNull(terms.getBucketByKey("common")); assertNull(terms.getBucketByKey("even")); assertNull(terms.getBucketByKey("regular")); } } }
From source file:org.elasticsearch.search.aggregations.bucket.significant.SignificantTermsAggregatorTests.java
License:Apache License
/** * Uses the significant terms aggregation to find the keywords in numeric * fields// ww w. j a va2 s .co m */ public void testNumericSignificance() throws IOException { NumberFieldType longFieldType = new NumberFieldMapper.NumberFieldType(NumberFieldMapper.NumberType.LONG); longFieldType.setName("long_field"); TextFieldType textFieldType = new TextFieldType(); textFieldType.setName("text"); textFieldType .setIndexAnalyzer(new NamedAnalyzer("my_analyzer", AnalyzerScope.GLOBAL, new StandardAnalyzer())); IndexWriterConfig indexWriterConfig = newIndexWriterConfig(); indexWriterConfig.setMaxBufferedDocs(100); indexWriterConfig.setRAMBufferSizeMB(100); // flush on open to have a single segment final long ODD_VALUE = 3; final long EVEN_VALUE = 6; final long COMMON_VALUE = 2; try (Directory dir = newDirectory(); IndexWriter w = new IndexWriter(dir, indexWriterConfig)) { for (int i = 0; i < 10; i++) { Document doc = new Document(); if (i % 2 == 0) { addFields(doc, NumberType.LONG.createFields("long_field", ODD_VALUE, true, true, false)); doc.add(new Field("text", "odd", textFieldType)); } else { addFields(doc, NumberType.LONG.createFields("long_field", EVEN_VALUE, true, true, false)); doc.add(new Field("text", "even", textFieldType)); } addFields(doc, NumberType.LONG.createFields("long_field", COMMON_VALUE, true, true, false)); w.addDocument(doc); } SignificantTermsAggregationBuilder sigNumAgg = new SignificantTermsAggregationBuilder("sig_number", null).field("long_field"); sigNumAgg.executionHint(randomExecutionHint()); try (IndexReader reader = DirectoryReader.open(w)) { assertEquals("test expects a single segment", 1, reader.leaves().size()); IndexSearcher searcher = new IndexSearcher(reader); // Search "odd" SignificantLongTerms terms = searchAndReduce(searcher, new TermQuery(new Term("text", "odd")), sigNumAgg, longFieldType); assertEquals(1, terms.getBuckets().size()); assertNull(terms.getBucketByKey(Long.toString(EVEN_VALUE))); assertNull(terms.getBucketByKey(Long.toString(COMMON_VALUE))); assertNotNull(terms.getBucketByKey(Long.toString(ODD_VALUE))); terms = searchAndReduce(searcher, new TermQuery(new Term("text", "even")), sigNumAgg, longFieldType); assertEquals(1, terms.getBuckets().size()); assertNull(terms.getBucketByKey(Long.toString(ODD_VALUE))); assertNull(terms.getBucketByKey(Long.toString(COMMON_VALUE))); assertNotNull(terms.getBucketByKey(Long.toString(EVEN_VALUE))); } } }
From source file:org.elasticsearch.search.aggregations.bucket.significant.SignificantTermsAggregatorTests.java
License:Apache License
/** * Uses the significant terms aggregation on an index with unmapped field *///from ww w .ja v a 2 s . c om public void testUnmapped() throws IOException { TextFieldType textFieldType = new TextFieldType(); textFieldType.setName("text"); textFieldType.setFielddata(true); textFieldType .setIndexAnalyzer(new NamedAnalyzer("my_analyzer", AnalyzerScope.GLOBAL, new StandardAnalyzer())); IndexWriterConfig indexWriterConfig = newIndexWriterConfig(); indexWriterConfig.setMaxBufferedDocs(100); indexWriterConfig.setRAMBufferSizeMB(100); // flush on open to have a single segment try (Directory dir = newDirectory(); IndexWriter w = new IndexWriter(dir, indexWriterConfig)) { addMixedTextDocs(textFieldType, w); // Attempt aggregation on unmapped field SignificantTermsAggregationBuilder sigAgg = new SignificantTermsAggregationBuilder("sig_text", null) .field("unmapped_field"); sigAgg.executionHint(randomExecutionHint()); try (IndexReader reader = DirectoryReader.open(w)) { assertEquals("test expects a single segment", 1, reader.leaves().size()); IndexSearcher searcher = new IndexSearcher(reader); // Search "odd" SignificantTerms terms = searchAndReduce(searcher, new TermQuery(new Term("text", "odd")), sigAgg, textFieldType); assertEquals(0, terms.getBuckets().size()); assertNull(terms.getBucketByKey("even")); assertNull(terms.getBucketByKey("common")); assertNull(terms.getBucketByKey("odd")); } } }
From source file:org.elasticsearch.search.aggregations.bucket.significant.SignificantTextAggregatorTests.java
License:Apache License
/** * Uses the significant text aggregation to find the keywords in text fields *//* w w w .jav a 2 s .c o m*/ public void testSignificance() throws IOException { TextFieldType textFieldType = new TextFieldType(); textFieldType.setName("text"); textFieldType .setIndexAnalyzer(new NamedAnalyzer("my_analyzer", AnalyzerScope.GLOBAL, new StandardAnalyzer())); IndexWriterConfig indexWriterConfig = newIndexWriterConfig(); indexWriterConfig.setMaxBufferedDocs(100); indexWriterConfig.setRAMBufferSizeMB(100); // flush on open to have a single segment try (Directory dir = newDirectory(); IndexWriter w = new IndexWriter(dir, indexWriterConfig)) { for (int i = 0; i < 10; i++) { Document doc = new Document(); StringBuilder text = new StringBuilder("common "); if (i % 2 == 0) { text.append("odd "); } else { text.append( "even separator" + i + " duplicate duplicate duplicate duplicate duplicate duplicate "); } doc.add(new Field("text", text.toString(), textFieldType)); String json = "{ \"text\" : \"" + text.toString() + "\"," + " \"json_only_field\" : \"" + text.toString() + "\"" + " }"; doc.add(new StoredField("_source", new BytesRef(json))); w.addDocument(doc); } SignificantTextAggregationBuilder sigAgg = new SignificantTextAggregationBuilder("sig_text", "text") .filterDuplicateText(true); if (randomBoolean()) { sigAgg.sourceFieldNames(Arrays.asList(new String[] { "json_only_field" })); } SamplerAggregationBuilder aggBuilder = new SamplerAggregationBuilder("sampler").subAggregation(sigAgg); try (IndexReader reader = DirectoryReader.open(w)) { assertEquals("test expects a single segment", 1, reader.leaves().size()); IndexSearcher searcher = new IndexSearcher(reader); // Search "odd" which should have no duplication Sampler sampler = searchAndReduce(searcher, new TermQuery(new Term("text", "odd")), aggBuilder, textFieldType); SignificantTerms terms = sampler.getAggregations().get("sig_text"); assertNull(terms.getBucketByKey("even")); assertNull(terms.getBucketByKey("duplicate")); assertNull(terms.getBucketByKey("common")); assertNotNull(terms.getBucketByKey("odd")); // Search "even" which will have duplication sampler = searchAndReduce(searcher, new TermQuery(new Term("text", "even")), aggBuilder, textFieldType); terms = sampler.getAggregations().get("sig_text"); assertNull(terms.getBucketByKey("odd")); assertNull(terms.getBucketByKey("duplicate")); assertNull(terms.getBucketByKey("common")); assertNull(terms.getBucketByKey("separator2")); assertNull(terms.getBucketByKey("separator4")); assertNull(terms.getBucketByKey("separator6")); assertNotNull(terms.getBucketByKey("even")); } } }
From source file:org.elasticsearch.search.aggregations.bucket.significant.SignificantTextAggregatorTests.java
License:Apache License
/** * Test documents with arrays of text//from w ww .j a va 2 s . c o m */ public void testSignificanceOnTextArrays() throws IOException { TextFieldType textFieldType = new TextFieldType(); textFieldType.setName("text"); textFieldType .setIndexAnalyzer(new NamedAnalyzer("my_analyzer", AnalyzerScope.GLOBAL, new StandardAnalyzer())); IndexWriterConfig indexWriterConfig = newIndexWriterConfig(); indexWriterConfig.setMaxBufferedDocs(100); indexWriterConfig.setRAMBufferSizeMB(100); // flush on open to have a single segment try (Directory dir = newDirectory(); IndexWriter w = new IndexWriter(dir, indexWriterConfig)) { for (int i = 0; i < 10; i++) { Document doc = new Document(); doc.add(new Field("text", "foo", textFieldType)); String json = "{ \"text\" : [\"foo\",\"foo\"], \"title\" : [\"foo\", \"foo\"]}"; doc.add(new StoredField("_source", new BytesRef(json))); w.addDocument(doc); } SignificantTextAggregationBuilder sigAgg = new SignificantTextAggregationBuilder("sig_text", "text"); sigAgg.sourceFieldNames(Arrays.asList(new String[] { "title", "text" })); try (IndexReader reader = DirectoryReader.open(w)) { assertEquals("test expects a single segment", 1, reader.leaves().size()); IndexSearcher searcher = new IndexSearcher(reader); searchAndReduce(searcher, new TermQuery(new Term("text", "foo")), sigAgg, textFieldType); // No significant results to be found in this test - only checking we don't end up // with the internal exception discovered in issue https://github.com/elastic/elasticsearch/issues/25029 } } }
From source file:org.esa.beam.occci.LuceneCreateIndexMain.java
License:Open Source License
public static void main(String[] args) throws IOException, ParseException { if (args.length != 2) { printUsage();/*from w w w . ja v a 2 s. c o m*/ } File productListFile = new File(args[0]); File indexfile = new File(args[1]); if (!productListFile.exists()) { System.err.printf("productList file '%s' does not exits%n", args[0]); printUsage(); } List<EoProduct> eoProductList = ProductDB.readProducts("s2", productListFile); Directory indexDirectory = FSDirectory.open(indexfile.toPath()); IndexWriterConfig config = new IndexWriterConfig(new SimpleAnalyzer()); config.setRAMBufferSizeMB(100); DateRangePrefixTree dateRangePrefixTree = DateRangePrefixTree.INSTANCE; PrefixTreeStrategy strategy = new NumberRangePrefixTreeStrategy(dateRangePrefixTree, "productDateRange"); final SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS", ENGLISH); dateFormat.setCalendar(GregorianCalendar.getInstance(UTC, Locale.ENGLISH)); int indexCount = 0; try (IndexWriter indexWriter = new IndexWriter(indexDirectory, config)) { for (EoProduct eoProduct : eoProductList) { Document doc = new Document(); doc.add(new StringField("name", eoProduct.getName(), Field.Store.YES)); String start = dateFormat.format(new Date(eoProduct.getStartTime())); String end = dateFormat.format(new Date(eoProduct.getEndTime())); String range = "[" + start + " TO " + end + "]"; NumberRangePrefixTree.NRShape nrShape = dateRangePrefixTree.parseShape(range); for (IndexableField f : strategy.createIndexableFields(nrShape)) { doc.add(f); } indexWriter.addDocument(doc); indexCount++; if (indexCount % 10_000 == 0) { System.out.println("indexCount = " + indexCount); } } } }
From source file:org.exist.xquery.modules.mpeg7.net.semanticmetadata.lire.utils.LuceneUtils.java
License:Open Source License
public static IndexWriter createIndexWriter(Directory directory, boolean create, AnalyzerType analyzer, double RAMBufferSize) throws IOException { // set the analyzer according to the method params Analyzer tmpAnalyzer = null;/* w ww . j a v a2s. c o m*/ if (analyzer == AnalyzerType.SimpleAnalyzer) tmpAnalyzer = new SimpleAnalyzer(LUCENE_VERSION); else if (analyzer == AnalyzerType.WhitespaceAnalyzer) tmpAnalyzer = new WhitespaceAnalyzer(LUCENE_VERSION); // The config IndexWriterConfig config = new IndexWriterConfig(LUCENE_VERSION, tmpAnalyzer); if (create) config.setOpenMode(IndexWriterConfig.OpenMode.CREATE); // overwrite if it exists. else config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); // create new if none is there, append otherwise. config.setRAMBufferSizeMB(RAMBufferSize); config.setCodec(new LireCustomCodec()); return new IndexWriter(directory, config); }
From source file:org.frontcache.cache.impl.LuceneIndexManager.java
License:Apache License
/** * Returns instance of IndexManager/*www . ja v a2 s . c o m*/ * @param create * @return * @throws IOException */ private IndexWriter getIndexWriter() throws IOException { if (indexWriter == null || !indexWriter.isOpen()) { synchronized (this) { if (indexWriter == null || !indexWriter.isOpen()) { indexWriter = null; logger.info("Trying to get indexWriter..."); Directory dir = FSDirectory.open(Paths.get(INDEX_PATH)); Analyzer analyzer = new StandardAnalyzer(); IndexWriterConfig iwc = new IndexWriterConfig(analyzer); iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); iwc.setRAMBufferSizeMB(250.0); indexWriter = new IndexWriter(dir, iwc); logger.info("IndexWriter initialized"); } } } return indexWriter; }
From source file:org.ihtsdo.otf.query.lucene.LuceneIndexer.java
License:Apache License
protected LuceneIndexer(String indexName) throws IOException { try {// w w w. j a v a 2s. c o m indexName_ = indexName; luceneWriterService = LookupService.getService(WorkExecutors.class).getExecutor(); luceneWriterFutureCheckerService = Executors.newFixedThreadPool(1, new NamedThreadFactory(indexName + " Lucene future checker", false)); Path searchFolder = LookupService.getService(ConfigurationService.class).getSearchFolderPath(); if (luceneRootFolder_.compareAndSet(null, new File(searchFolder.toFile(), DEFAULT_LUCENE_FOLDER))) { luceneRootFolder_.get().mkdirs(); } indexFolder_ = new File(luceneRootFolder_.get(), indexName); indexFolder_.mkdirs(); log.info("Index: " + indexFolder_.getAbsolutePath()); Directory indexDirectory = new MMapDirectory(indexFolder_); //switch over to MMapDirectory - in theory - this gives us back some //room on the JDK stack, letting the OS directly manage the caching of the index files - and more importantly, gives us a huge //performance boost during any operation that tries to do multi-threaded reads of the index (like the SOLOR rules processing) because //the default value of SimpleFSDirectory is a huge bottleneck. indexDirectory.clearLock("write.lock"); IndexWriterConfig config = new IndexWriterConfig(luceneVersion, new PerFieldAnalyzer()); config.setRAMBufferSizeMB(256); MergePolicy mergePolicy = new LogByteSizeMergePolicy(); config.setMergePolicy(mergePolicy); config.setSimilarity(new ShortTextSimilarity()); IndexWriter indexWriter = new IndexWriter(indexDirectory, config); trackingIndexWriter = new TrackingIndexWriter(indexWriter); boolean applyAllDeletes = false; searcherManager = new SearcherManager(indexWriter, applyAllDeletes, null); // [3]: Create the ControlledRealTimeReopenThread that reopens the index periodically taking into // account the changes made to the index and tracked by the TrackingIndexWriter instance // The index is refreshed every 60sc when nobody is waiting // and every 100 millis whenever is someone waiting (see search method) // (see http://lucene.apache.org/core/4_3_0/core/org/apache/lucene/search/NRTManagerReopenThread.html) reopenThread = new ControlledRealTimeReopenThread<>(trackingIndexWriter, searcherManager, 60.00, 0.1); this.startThread(); //Register for commits: log.info("Registering indexer " + getIndexerName() + " for commits"); Get.commitService().addChangeListener(new ChronologyChangeListener() { @Override public void handleCommit(CommitRecord commitRecord) { commitRecord.getSememesInCommit().stream().forEach(sememeId -> { handleChange(Get.sememeService().getSememe(sememeId)); }); } @Override public void handleChange(SememeChronology<? extends SememeVersion<?>> sc) { log.info("submitting sememe " + sc.toUserString() + " to indexer " + getIndexerName() + " due to commit"); index(sc); } @Override public void handleChange(ConceptChronology<? extends StampedVersion> cc) { // noop } @Override public UUID getListenerUuid() { return UuidT5Generator.get(getIndexerName()); } }); } catch (Exception e) { LookupService.getService(SystemStatusService.class).notifyServiceConfigurationFailure(indexName, e); throw e; } }