List of usage examples for org.apache.lucene.index IndexWriterConfig setRAMBufferSizeMB
@Override public IndexWriterConfig setRAMBufferSizeMB(double ramBufferSizeMB)
From source file:nl.inl.util.Utilities.java
License:Apache License
public static IndexWriterConfig getIndexWriterConfig(Analyzer analyzer, boolean create) { IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_42, analyzer); config.setOpenMode(create ? OpenMode.CREATE : OpenMode.CREATE_OR_APPEND); config.setRAMBufferSizeMB(150); // faster indexing // Set merge factor (if using LogMergePolicy, which is the default up to version LUCENE_32, // so yes)/*from w ww . j av a 2 s . c o m*/ MergePolicy mp = config.getMergePolicy(); if (mp instanceof LogMergePolicy) { ((LogMergePolicy) mp).setMergeFactor(40); // faster indexing } return config; }
From source file:org.apache.gora.lucene.store.LuceneStore.java
License:Apache License
@Override public void initialize(Class<K> keyClass, Class<T> persistentClass, Properties properties) throws GoraException { try {/*from w w w. ja v a2s. c om*/ super.initialize(keyClass, persistentClass, properties); } catch (GoraException ge) { LOG.error(ge.getMessage(), ge); throw new GoraException(ge); } String mappingFile = null; try { mappingFile = DataStoreFactory.getMappingFile(properties, (DataStore<?, ?>) this, DEFAULT_MAPPING_FILE); } catch (IOException ioe) { LOG.error(ioe.getMessage(), ioe); throw new GoraException(ioe); } String luceneVersion = properties.getProperty(LUCENE_VERSION_KEY, DEFAULT_LUCENE_VERSION); String ramBuffer = properties.getProperty(LUCENE_RAM_BUFFER_KEY, DEFAULT_LUCENE_RAMBUFFER); LOG.debug("Lucene index version: {}", luceneVersion); LOG.debug("Lucene index writer RAM buffer size: {}", ramBuffer); try { mapping = readMapping(mappingFile); } catch (IOException ioe) { LOG.error(ioe.getMessage(), ioe); throw new GoraException(ioe); } String persistentClassObject = persistentClass.getCanonicalName(); String dataStoreOutputPath = outputPath + "_" + persistentClassObject .substring(persistentClassObject.lastIndexOf('.') + 1).toLowerCase(Locale.getDefault()); try { dir = FSDirectory.open(FileSystems.getDefault().getPath(dataStoreOutputPath)); Analyzer analyzer = new StandardAnalyzer(); IndexWriterConfig iwc = new IndexWriterConfig(analyzer); iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); iwc.setRAMBufferSizeMB(Double.parseDouble(ramBuffer)); writer = new IndexWriter(dir, iwc); //TODO do we definately want all past deletions to be applied. searcherManager = new SearcherManager(writer, true, true, new SearcherFactory()); } catch (IOException e) { LOG.error("Error opening {} with Lucene FSDirectory.", outputPath, e); } }
From source file:org.apache.maven.index.context.NexusIndexWriter.java
License:Apache License
public static IndexWriterConfig defaultConfig() { final IndexWriterConfig config = new IndexWriterConfig(new NexusAnalyzer()); // default open mode is CreateOrAppend which suits us config.setRAMBufferSizeMB(2.0); // old default config.setMergeScheduler(new SerialMergeScheduler()); // merging serially config.setWriteLockTimeout(IndexWriterConfig.WRITE_LOCK_TIMEOUT); return config; }
From source file:org.apache.solr.update.SolrIndexConfig.java
License:Apache License
public IndexWriterConfig toIndexWriterConfig(IndexSchema schema) { // so that we can update the analyzer on core reload, we pass null // for the default analyzer, and explicitly pass an analyzer on // appropriate calls to IndexWriter IndexWriterConfig iwc = new IndexWriterConfig(luceneVersion, null); if (maxBufferedDocs != -1) iwc.setMaxBufferedDocs(maxBufferedDocs); if (ramBufferSizeMB != -1) iwc.setRAMBufferSizeMB(ramBufferSizeMB); if (termIndexInterval != -1) iwc.setTermIndexInterval(termIndexInterval); if (writeLockTimeout != -1) iwc.setWriteLockTimeout(writeLockTimeout); iwc.setSimilarity(schema.getSimilarity()); iwc.setMergePolicy(buildMergePolicy(schema)); iwc.setMergeScheduler(buildMergeScheduler(schema)); iwc.setInfoStream(infoStream);//from ww w . j ava 2 s . c o m // do this after buildMergePolicy since the backcompat logic // there may modify the effective useCompoundFile iwc.setUseCompoundFile(getUseCompoundFile()); if (maxIndexingThreads != -1) { iwc.setMaxThreadStates(maxIndexingThreads); } if (mergedSegmentWarmerInfo != null) { // TODO: add infostream -> normal logging system (there is an issue somewhere) IndexReaderWarmer warmer = schema.getResourceLoader().newInstance(mergedSegmentWarmerInfo.className, IndexReaderWarmer.class, null, new Class[] { InfoStream.class }, new Object[] { iwc.getInfoStream() }); iwc.setMergedSegmentWarmer(warmer); } return iwc; }
From source file:org.dbpedia.spotlight.lucene.index.BaseIndexer.java
License:Apache License
/** * Base class with the indexing functionality used by the subclasses {@link SeparateOccurrencesIndexer} and {@link MergedOccurrencesContextIndexer}}. * @param lucene/*from w w w . jav a 2s. com*/ * @param create - what to do if lucene.mContextIndexDir already exists (so we do not unvoluntary add to an existing index). true to create the index or overwrite the existing one; false to append to the existing index * @throws IOException */ public BaseIndexer(LuceneManager lucene, boolean create) throws IOException { this.mLucene = lucene; //TODO this config is new in 3.6, does basically what LuceneManager does for us. IndexWriterConfig iwConfig = new IndexWriterConfig(Version.LUCENE_36, lucene.defaultAnalyzer()); iwConfig.setOpenMode(create ? IndexWriterConfig.OpenMode.CREATE : IndexWriterConfig.OpenMode.APPEND); /* Determines ... buffering added documents and deletions before they are flushed to the Directory. NOTE: because IndexWriter uses ints when managing its internal storage, (...) it's best to set this value comfortably under 2048. http://lucene.apache.org/java/3_0_2/api/all/org/apache/lucene/index/IndexWriter.html#setRAMBufferSizeMB%28double%29 */ iwConfig.setRAMBufferSizeMB(lucene.RAMBufferSizeMB()); /* Generally for faster indexing performance it's best to flush by RAM usage instead of document count and use as large a RAM buffer as you can. http://lucene.apache.org/java/3_0_2/api/all/org/apache/lucene/index/IndexWriter.html#setRAMBufferSizeMB%28double%29 But if setting by doc count, the sweet spot suggested is 48 http://issues.apache.org/jira/browse/LUCENE-843 */ //this.mWriter.setMaxBufferedDocs(lucene.RAMBufferSizeMB()); this.mWriter = new IndexWriter(lucene.directory(), iwConfig); }
From source file:org.drftpd.vfs.index.lucene.LuceneEngine.java
License:Open Source License
/** * Opens all the needed streams that the engine needs to work properly. * //ww w . j a v a 2 s .c om * @throws IndexException */ private void openStreams() throws IndexException { try { if (_nativeLocking) { _storage = FSDirectory.open(new File(INDEX_DIR), new NativeFSLockFactory(INDEX_DIR)); } else { _storage = FSDirectory.open(new File(INDEX_DIR)); } IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_32, ANALYZER); conf.setMaxBufferedDocs(_maxDocsBuffer); conf.setRAMBufferSizeMB(_maxRAMBufferSize); _iWriter = new IndexWriter(_storage, conf); } catch (IOException e) { closeAll(); throw new IndexException("Unable to initialize the index", e); } }
From source file:org.elasticsearch.index.engine.internal.AsynchronousEngine.java
License:Apache License
private IndexWriter createWriter() throws IOException { try {//w w w. ja va 2 s . co m boolean create = !Lucene.indexExists(store.directory()); IndexWriterConfig config = new IndexWriterConfig(Lucene.VERSION, analysisService.defaultIndexAnalyzer()); config.setOpenMode(create ? IndexWriterConfig.OpenMode.CREATE : IndexWriterConfig.OpenMode.APPEND); config.setIndexDeletionPolicy(deletionPolicy); config.setInfoStream(new LoggerInfoStream(indexSettings, shardId)); config.setMergeScheduler(mergeScheduler.newMergeScheduler()); MergePolicy mergePolicy = mergePolicyProvider.getMergePolicy(); // Give us the opportunity to upgrade old segments while performing // background merges mergePolicy = new ElasticsearchMergePolicy(mergePolicy); config.setMergePolicy(mergePolicy); config.setSimilarity(similarityService.similarity()); config.setRAMBufferSizeMB(indexingBufferSize.mbFrac()); config.setMaxThreadStates(indexConcurrency); config.setCodec(codecService.codec(codecName)); /* We set this timeout to a highish value to work around * the default poll interval in the Lucene lock that is * 1000ms by default. We might need to poll multiple times * here but with 1s poll this is only executed twice at most * in combination with the default writelock timeout*/ config.setWriteLockTimeout(5000); config.setUseCompoundFile(this.compoundOnFlush); config.setCheckIntegrityAtMerge(checksumOnMerge); // Warm-up hook for newly-merged segments. Warming up segments here is better since it will be performed at the end // of the merge operation and won't slow down _refresh config.setMergedSegmentWarmer(new IndexReaderWarmer() { @Override public void warm(AtomicReader reader) throws IOException { try { assert isMergedSegment(reader); if (warmer != null) { final Engine.Searcher searcher = new SimpleSearcher("warmer", new IndexSearcher(reader)); final IndicesWarmer.WarmerContext context = new IndicesWarmer.WarmerContext(shardId, searcher); warmer.warmNewReaders(context); } } catch (Throwable t) { // Don't fail a merge if the warm-up failed if (!closed) { logger.warn("Warm-up failed", t); } if (t instanceof Error) { // assertion/out-of-memory error, don't ignore those throw (Error) t; } } } }); return new IndexWriter(store.directory(), config); } catch (LockObtainFailedException ex) { boolean isLocked = IndexWriter.isLocked(store.directory()); logger.warn("Could not lock IndexWriter isLocked [{}]", ex, isLocked); throw ex; } }
From source file:org.elasticsearch.index.engine.internal.InternalEngine.java
License:Apache License
private IndexWriter createWriter() throws IOException { try {/*from ww w .jav a2 s . c o m*/ // release locks when started if (IndexWriter.isLocked(store.directory())) { logger.warn("shard is locked, releasing lock"); IndexWriter.unlock(store.directory()); } boolean create = !Lucene.indexExists(store.directory()); IndexWriterConfig config = new IndexWriterConfig(Lucene.VERSION, analysisService.defaultIndexAnalyzer()); config.setOpenMode(create ? IndexWriterConfig.OpenMode.CREATE : IndexWriterConfig.OpenMode.APPEND); config.setIndexDeletionPolicy(deletionPolicy); config.setMergeScheduler(mergeScheduler.newMergeScheduler()); MergePolicy mergePolicy = mergePolicyProvider.newMergePolicy(); // Give us the opportunity to upgrade old segments while performing // background merges mergePolicy = new IndexUpgraderMergePolicy(mergePolicy); config.setMergePolicy(mergePolicy); config.setSimilarity(similarityService.similarity()); config.setRAMBufferSizeMB(indexingBufferSize.mbFrac()); config.setMaxThreadStates(indexConcurrency); config.setCodec(codecService.codec(codecName)); /* We set this timeout to a highish value to work around * the default poll interval in the Lucene lock that is * 1000ms by default. We might need to poll multiple times * here but with 1s poll this is only executed twice at most * in combination with the default writelock timeout*/ config.setWriteLockTimeout(5000); config.setUseCompoundFile(this.compoundOnFlush); // Warm-up hook for newly-merged segments. Warming up segments here is better since it will be performed at the end // of the merge operation and won't slow down _refresh config.setMergedSegmentWarmer(new IndexReaderWarmer() { @Override public void warm(AtomicReader reader) throws IOException { try { assert isMergedSegment(reader); final Engine.Searcher searcher = new SimpleSearcher("warmer", new IndexSearcher(reader)); final IndicesWarmer.WarmerContext context = new IndicesWarmer.WarmerContext(shardId, searcher); if (warmer != null) warmer.warm(context); } catch (Throwable t) { // Don't fail a merge if the warm-up failed if (!closed) { logger.warn("Warm-up failed", t); } if (t instanceof Error) { // assertion/out-of-memory error, don't ignore those throw (Error) t; } } } }); return new IndexWriter(store.directory(), config); } catch (LockObtainFailedException ex) { boolean isLocked = IndexWriter.isLocked(store.directory()); logger.warn("Could not lock IndexWriter isLocked [{}]", ex, isLocked); throw ex; } }
From source file:org.elasticsearch.index.engine.InternalEngine.java
License:Apache License
private IndexWriter createWriter(boolean create) throws IOException { try {/* w w w. ja v a 2 s . c o m*/ final IndexWriterConfig iwc = new IndexWriterConfig(engineConfig.getAnalyzer()); iwc.setCommitOnClose(false); // we by default don't commit on close iwc.setOpenMode(create ? IndexWriterConfig.OpenMode.CREATE : IndexWriterConfig.OpenMode.APPEND); iwc.setIndexDeletionPolicy(deletionPolicy); // with tests.verbose, lucene sets this up: plumb to align with filesystem stream boolean verbose = false; try { verbose = Boolean.parseBoolean(System.getProperty("tests.verbose")); } catch (Throwable ignore) { } iwc.setInfoStream(verbose ? InfoStream.getDefault() : new LoggerInfoStream(logger)); iwc.setMergeScheduler(mergeScheduler); MergePolicy mergePolicy = config().getMergePolicy(); // Give us the opportunity to upgrade old segments while performing // background merges mergePolicy = new ElasticsearchMergePolicy(mergePolicy); iwc.setMergePolicy(mergePolicy); iwc.setSimilarity(engineConfig.getSimilarity()); iwc.setRAMBufferSizeMB(engineConfig.getIndexingBufferSize().mbFrac()); iwc.setCodec(engineConfig.getCodec()); /* We set this timeout to a highish value to work around * the default poll interval in the Lucene lock that is * 1000ms by default. We might need to poll multiple times * here but with 1s poll this is only executed twice at most * in combination with the default writelock timeout*/ iwc.setWriteLockTimeout(5000); iwc.setUseCompoundFile(this.engineConfig.isCompoundOnFlush()); // Warm-up hook for newly-merged segments. Warming up segments here is better since it will be performed at the end // of the merge operation and won't slow down _refresh iwc.setMergedSegmentWarmer(new IndexReaderWarmer() { @Override public void warm(LeafReader reader) throws IOException { try { LeafReader esLeafReader = new ElasticsearchLeafReader(reader, shardId); assert isMergedSegment(esLeafReader); if (warmer != null) { final Engine.Searcher searcher = new Searcher("warmer", searcherFactory.newSearcher(esLeafReader, null)); final IndicesWarmer.WarmerContext context = new IndicesWarmer.WarmerContext(shardId, searcher); warmer.warmNewReaders(context); } } catch (Throwable t) { // Don't fail a merge if the warm-up failed if (isClosed.get() == false) { logger.warn("Warm-up failed", t); } if (t instanceof Error) { // assertion/out-of-memory error, don't ignore those throw (Error) t; } } } }); return new IndexWriter(store.directory(), iwc); } catch (LockObtainFailedException ex) { boolean isLocked = IndexWriter.isLocked(store.directory()); logger.warn("Could not lock IndexWriter isLocked [{}]", ex, isLocked); throw ex; } }
From source file:org.elasticsearch.index.engine.robin.RobinEngine.java
License:Apache License
private IndexWriter createWriter() throws IOException { IndexWriter indexWriter = null;// www . j a v a2s . c om try { // release locks when started if (IndexWriter.isLocked(store.directory())) { logger.warn("shard is locked, releasing lock"); IndexWriter.unlock(store.directory()); } boolean create = !IndexReader.indexExists(store.directory()); IndexWriterConfig config = new IndexWriterConfig(Lucene.VERSION, analysisService.defaultIndexAnalyzer()); config.setOpenMode(create ? IndexWriterConfig.OpenMode.CREATE : IndexWriterConfig.OpenMode.APPEND); config.setIndexDeletionPolicy(deletionPolicy); config.setMergeScheduler(mergeScheduler.newMergeScheduler()); config.setMergePolicy(mergePolicyProvider.newMergePolicy()); config.setSimilarity(similarityService.defaultIndexSimilarity()); config.setRAMBufferSizeMB(indexingBufferSize.mbFrac()); config.setTermIndexInterval(termIndexInterval); config.setReaderTermsIndexDivisor(termIndexDivisor); config.setMaxThreadStates(indexConcurrency); indexWriter = new IndexWriter(store.directory(), config); } catch (IOException e) { safeClose(indexWriter); throw e; } return indexWriter; }