Example usage for org.apache.lucene.index IndexWriterConfig setRAMBufferSizeMB

List of usage examples for org.apache.lucene.index IndexWriterConfig setRAMBufferSizeMB

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexWriterConfig setRAMBufferSizeMB.

Prototype

@Override
    public IndexWriterConfig setRAMBufferSizeMB(double ramBufferSizeMB) 

Source Link

Usage

From source file:nl.inl.util.Utilities.java

License:Apache License

public static IndexWriterConfig getIndexWriterConfig(Analyzer analyzer, boolean create) {
    IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_42, analyzer);
    config.setOpenMode(create ? OpenMode.CREATE : OpenMode.CREATE_OR_APPEND);
    config.setRAMBufferSizeMB(150); // faster indexing

    // Set merge factor (if using LogMergePolicy, which is the default up to version LUCENE_32,
    // so yes)/*from  w ww .  j av a 2  s  .  c o  m*/
    MergePolicy mp = config.getMergePolicy();
    if (mp instanceof LogMergePolicy) {
        ((LogMergePolicy) mp).setMergeFactor(40); // faster indexing
    }
    return config;
}

From source file:org.apache.gora.lucene.store.LuceneStore.java

License:Apache License

@Override
public void initialize(Class<K> keyClass, Class<T> persistentClass, Properties properties)
        throws GoraException {
    try {/*from  w w  w. ja  v a2s.  c om*/
        super.initialize(keyClass, persistentClass, properties);
    } catch (GoraException ge) {
        LOG.error(ge.getMessage(), ge);
        throw new GoraException(ge);
    }

    String mappingFile = null;
    try {
        mappingFile = DataStoreFactory.getMappingFile(properties, (DataStore<?, ?>) this, DEFAULT_MAPPING_FILE);
    } catch (IOException ioe) {
        LOG.error(ioe.getMessage(), ioe);
        throw new GoraException(ioe);
    }
    String luceneVersion = properties.getProperty(LUCENE_VERSION_KEY, DEFAULT_LUCENE_VERSION);
    String ramBuffer = properties.getProperty(LUCENE_RAM_BUFFER_KEY, DEFAULT_LUCENE_RAMBUFFER);

    LOG.debug("Lucene index version: {}", luceneVersion);
    LOG.debug("Lucene index writer RAM buffer size: {}", ramBuffer);

    try {
        mapping = readMapping(mappingFile);
    } catch (IOException ioe) {
        LOG.error(ioe.getMessage(), ioe);
        throw new GoraException(ioe);
    }
    String persistentClassObject = persistentClass.getCanonicalName();
    String dataStoreOutputPath = outputPath + "_" + persistentClassObject
            .substring(persistentClassObject.lastIndexOf('.') + 1).toLowerCase(Locale.getDefault());
    try {
        dir = FSDirectory.open(FileSystems.getDefault().getPath(dataStoreOutputPath));

        Analyzer analyzer = new StandardAnalyzer();
        IndexWriterConfig iwc = new IndexWriterConfig(analyzer);

        iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
        iwc.setRAMBufferSizeMB(Double.parseDouble(ramBuffer));

        writer = new IndexWriter(dir, iwc);
        //TODO do we definately want all past deletions to be applied.
        searcherManager = new SearcherManager(writer, true, true, new SearcherFactory());
    } catch (IOException e) {
        LOG.error("Error opening {} with Lucene FSDirectory.", outputPath, e);
    }
}

From source file:org.apache.maven.index.context.NexusIndexWriter.java

License:Apache License

public static IndexWriterConfig defaultConfig() {
    final IndexWriterConfig config = new IndexWriterConfig(new NexusAnalyzer());
    // default open mode is CreateOrAppend which suits us
    config.setRAMBufferSizeMB(2.0); // old default
    config.setMergeScheduler(new SerialMergeScheduler()); // merging serially
    config.setWriteLockTimeout(IndexWriterConfig.WRITE_LOCK_TIMEOUT);
    return config;
}

From source file:org.apache.solr.update.SolrIndexConfig.java

License:Apache License

public IndexWriterConfig toIndexWriterConfig(IndexSchema schema) {
    // so that we can update the analyzer on core reload, we pass null
    // for the default analyzer, and explicitly pass an analyzer on 
    // appropriate calls to IndexWriter

    IndexWriterConfig iwc = new IndexWriterConfig(luceneVersion, null);
    if (maxBufferedDocs != -1)
        iwc.setMaxBufferedDocs(maxBufferedDocs);

    if (ramBufferSizeMB != -1)
        iwc.setRAMBufferSizeMB(ramBufferSizeMB);

    if (termIndexInterval != -1)
        iwc.setTermIndexInterval(termIndexInterval);

    if (writeLockTimeout != -1)
        iwc.setWriteLockTimeout(writeLockTimeout);

    iwc.setSimilarity(schema.getSimilarity());
    iwc.setMergePolicy(buildMergePolicy(schema));
    iwc.setMergeScheduler(buildMergeScheduler(schema));
    iwc.setInfoStream(infoStream);//from  ww  w  .  j  ava  2  s  . c  o  m

    // do this after buildMergePolicy since the backcompat logic 
    // there may modify the effective useCompoundFile
    iwc.setUseCompoundFile(getUseCompoundFile());

    if (maxIndexingThreads != -1) {
        iwc.setMaxThreadStates(maxIndexingThreads);
    }

    if (mergedSegmentWarmerInfo != null) {
        // TODO: add infostream -> normal logging system (there is an issue somewhere)
        IndexReaderWarmer warmer = schema.getResourceLoader().newInstance(mergedSegmentWarmerInfo.className,
                IndexReaderWarmer.class, null, new Class[] { InfoStream.class },
                new Object[] { iwc.getInfoStream() });
        iwc.setMergedSegmentWarmer(warmer);
    }

    return iwc;
}

From source file:org.dbpedia.spotlight.lucene.index.BaseIndexer.java

License:Apache License

/**
 * Base class with the indexing functionality used by the subclasses {@link SeparateOccurrencesIndexer} and {@link MergedOccurrencesContextIndexer}}.
 * @param lucene/*from w w w . jav a  2s. com*/
 * @param create - what to do if lucene.mContextIndexDir already exists (so we do not unvoluntary add to an existing index). true to create the index or overwrite the existing one; false to append to the existing index
 * @throws IOException
 */
public BaseIndexer(LuceneManager lucene, boolean create) throws IOException {
    this.mLucene = lucene;

    //TODO this config is new in 3.6, does basically what LuceneManager does for us.
    IndexWriterConfig iwConfig = new IndexWriterConfig(Version.LUCENE_36, lucene.defaultAnalyzer());
    iwConfig.setOpenMode(create ? IndexWriterConfig.OpenMode.CREATE : IndexWriterConfig.OpenMode.APPEND);

    /* Determines ... buffering added documents and deletions before they are flushed to the Directory.
    NOTE: because IndexWriter uses ints when managing its internal storage,
    (...) it's best to set this value comfortably under 2048.
    http://lucene.apache.org/java/3_0_2/api/all/org/apache/lucene/index/IndexWriter.html#setRAMBufferSizeMB%28double%29
     */
    iwConfig.setRAMBufferSizeMB(lucene.RAMBufferSizeMB());

    /*
      Generally for faster indexing performance it's best to flush by RAM usage instead of document count and use as large a RAM buffer as you can.
      http://lucene.apache.org/java/3_0_2/api/all/org/apache/lucene/index/IndexWriter.html#setRAMBufferSizeMB%28double%29
            
      But if setting by doc count, the sweet spot suggested is 48 http://issues.apache.org/jira/browse/LUCENE-843
     */
    //this.mWriter.setMaxBufferedDocs(lucene.RAMBufferSizeMB());

    this.mWriter = new IndexWriter(lucene.directory(), iwConfig);

}

From source file:org.drftpd.vfs.index.lucene.LuceneEngine.java

License:Open Source License

/**
 * Opens all the needed streams that the engine needs to work properly.
 * //ww  w  . j  a v  a 2 s  .c om
 * @throws IndexException
 */
private void openStreams() throws IndexException {
    try {
        if (_nativeLocking) {
            _storage = FSDirectory.open(new File(INDEX_DIR), new NativeFSLockFactory(INDEX_DIR));
        } else {
            _storage = FSDirectory.open(new File(INDEX_DIR));
        }

        IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_32, ANALYZER);
        conf.setMaxBufferedDocs(_maxDocsBuffer);
        conf.setRAMBufferSizeMB(_maxRAMBufferSize);

        _iWriter = new IndexWriter(_storage, conf);
    } catch (IOException e) {
        closeAll();

        throw new IndexException("Unable to initialize the index", e);
    }
}

From source file:org.elasticsearch.index.engine.internal.AsynchronousEngine.java

License:Apache License

private IndexWriter createWriter() throws IOException {
    try {//w w w.  ja va 2  s .  co m
        boolean create = !Lucene.indexExists(store.directory());
        IndexWriterConfig config = new IndexWriterConfig(Lucene.VERSION,
                analysisService.defaultIndexAnalyzer());
        config.setOpenMode(create ? IndexWriterConfig.OpenMode.CREATE : IndexWriterConfig.OpenMode.APPEND);
        config.setIndexDeletionPolicy(deletionPolicy);
        config.setInfoStream(new LoggerInfoStream(indexSettings, shardId));
        config.setMergeScheduler(mergeScheduler.newMergeScheduler());
        MergePolicy mergePolicy = mergePolicyProvider.getMergePolicy();
        // Give us the opportunity to upgrade old segments while performing
        // background merges
        mergePolicy = new ElasticsearchMergePolicy(mergePolicy);
        config.setMergePolicy(mergePolicy);
        config.setSimilarity(similarityService.similarity());
        config.setRAMBufferSizeMB(indexingBufferSize.mbFrac());
        config.setMaxThreadStates(indexConcurrency);
        config.setCodec(codecService.codec(codecName));
        /* We set this timeout to a highish value to work around
         * the default poll interval in the Lucene lock that is
         * 1000ms by default. We might need to poll multiple times
         * here but with 1s poll this is only executed twice at most
         * in combination with the default writelock timeout*/
        config.setWriteLockTimeout(5000);
        config.setUseCompoundFile(this.compoundOnFlush);
        config.setCheckIntegrityAtMerge(checksumOnMerge);
        // Warm-up hook for newly-merged segments. Warming up segments here is better since it will be performed at the end
        // of the merge operation and won't slow down _refresh
        config.setMergedSegmentWarmer(new IndexReaderWarmer() {
            @Override
            public void warm(AtomicReader reader) throws IOException {
                try {
                    assert isMergedSegment(reader);
                    if (warmer != null) {
                        final Engine.Searcher searcher = new SimpleSearcher("warmer",
                                new IndexSearcher(reader));
                        final IndicesWarmer.WarmerContext context = new IndicesWarmer.WarmerContext(shardId,
                                searcher);
                        warmer.warmNewReaders(context);
                    }
                } catch (Throwable t) {
                    // Don't fail a merge if the warm-up failed
                    if (!closed) {
                        logger.warn("Warm-up failed", t);
                    }
                    if (t instanceof Error) {
                        // assertion/out-of-memory error, don't ignore those
                        throw (Error) t;
                    }
                }
            }
        });
        return new IndexWriter(store.directory(), config);
    } catch (LockObtainFailedException ex) {
        boolean isLocked = IndexWriter.isLocked(store.directory());
        logger.warn("Could not lock IndexWriter isLocked [{}]", ex, isLocked);
        throw ex;
    }
}

From source file:org.elasticsearch.index.engine.internal.InternalEngine.java

License:Apache License

private IndexWriter createWriter() throws IOException {
    try {/*from  ww  w  .jav  a2  s  .  c o m*/
        // release locks when started
        if (IndexWriter.isLocked(store.directory())) {
            logger.warn("shard is locked, releasing lock");
            IndexWriter.unlock(store.directory());
        }
        boolean create = !Lucene.indexExists(store.directory());
        IndexWriterConfig config = new IndexWriterConfig(Lucene.VERSION,
                analysisService.defaultIndexAnalyzer());
        config.setOpenMode(create ? IndexWriterConfig.OpenMode.CREATE : IndexWriterConfig.OpenMode.APPEND);
        config.setIndexDeletionPolicy(deletionPolicy);
        config.setMergeScheduler(mergeScheduler.newMergeScheduler());
        MergePolicy mergePolicy = mergePolicyProvider.newMergePolicy();
        // Give us the opportunity to upgrade old segments while performing
        // background merges
        mergePolicy = new IndexUpgraderMergePolicy(mergePolicy);
        config.setMergePolicy(mergePolicy);
        config.setSimilarity(similarityService.similarity());
        config.setRAMBufferSizeMB(indexingBufferSize.mbFrac());
        config.setMaxThreadStates(indexConcurrency);
        config.setCodec(codecService.codec(codecName));
        /* We set this timeout to a highish value to work around
         * the default poll interval in the Lucene lock that is
         * 1000ms by default. We might need to poll multiple times
         * here but with 1s poll this is only executed twice at most
         * in combination with the default writelock timeout*/
        config.setWriteLockTimeout(5000);
        config.setUseCompoundFile(this.compoundOnFlush);
        // Warm-up hook for newly-merged segments. Warming up segments here is better since it will be performed at the end
        // of the merge operation and won't slow down _refresh
        config.setMergedSegmentWarmer(new IndexReaderWarmer() {
            @Override
            public void warm(AtomicReader reader) throws IOException {
                try {
                    assert isMergedSegment(reader);
                    final Engine.Searcher searcher = new SimpleSearcher("warmer", new IndexSearcher(reader));
                    final IndicesWarmer.WarmerContext context = new IndicesWarmer.WarmerContext(shardId,
                            searcher);
                    if (warmer != null)
                        warmer.warm(context);
                } catch (Throwable t) {
                    // Don't fail a merge if the warm-up failed
                    if (!closed) {
                        logger.warn("Warm-up failed", t);
                    }
                    if (t instanceof Error) {
                        // assertion/out-of-memory error, don't ignore those
                        throw (Error) t;
                    }
                }
            }
        });
        return new IndexWriter(store.directory(), config);
    } catch (LockObtainFailedException ex) {
        boolean isLocked = IndexWriter.isLocked(store.directory());
        logger.warn("Could not lock IndexWriter isLocked [{}]", ex, isLocked);
        throw ex;
    }
}

From source file:org.elasticsearch.index.engine.InternalEngine.java

License:Apache License

private IndexWriter createWriter(boolean create) throws IOException {
    try {/*  w w w.  ja  v  a  2 s  .  c o  m*/
        final IndexWriterConfig iwc = new IndexWriterConfig(engineConfig.getAnalyzer());
        iwc.setCommitOnClose(false); // we by default don't commit on close
        iwc.setOpenMode(create ? IndexWriterConfig.OpenMode.CREATE : IndexWriterConfig.OpenMode.APPEND);
        iwc.setIndexDeletionPolicy(deletionPolicy);
        // with tests.verbose, lucene sets this up: plumb to align with filesystem stream
        boolean verbose = false;
        try {
            verbose = Boolean.parseBoolean(System.getProperty("tests.verbose"));
        } catch (Throwable ignore) {
        }
        iwc.setInfoStream(verbose ? InfoStream.getDefault() : new LoggerInfoStream(logger));
        iwc.setMergeScheduler(mergeScheduler);
        MergePolicy mergePolicy = config().getMergePolicy();
        // Give us the opportunity to upgrade old segments while performing
        // background merges
        mergePolicy = new ElasticsearchMergePolicy(mergePolicy);
        iwc.setMergePolicy(mergePolicy);
        iwc.setSimilarity(engineConfig.getSimilarity());
        iwc.setRAMBufferSizeMB(engineConfig.getIndexingBufferSize().mbFrac());
        iwc.setCodec(engineConfig.getCodec());
        /* We set this timeout to a highish value to work around
         * the default poll interval in the Lucene lock that is
         * 1000ms by default. We might need to poll multiple times
         * here but with 1s poll this is only executed twice at most
         * in combination with the default writelock timeout*/
        iwc.setWriteLockTimeout(5000);
        iwc.setUseCompoundFile(this.engineConfig.isCompoundOnFlush());
        // Warm-up hook for newly-merged segments. Warming up segments here is better since it will be performed at the end
        // of the merge operation and won't slow down _refresh
        iwc.setMergedSegmentWarmer(new IndexReaderWarmer() {
            @Override
            public void warm(LeafReader reader) throws IOException {
                try {
                    LeafReader esLeafReader = new ElasticsearchLeafReader(reader, shardId);
                    assert isMergedSegment(esLeafReader);
                    if (warmer != null) {
                        final Engine.Searcher searcher = new Searcher("warmer",
                                searcherFactory.newSearcher(esLeafReader, null));
                        final IndicesWarmer.WarmerContext context = new IndicesWarmer.WarmerContext(shardId,
                                searcher);
                        warmer.warmNewReaders(context);
                    }
                } catch (Throwable t) {
                    // Don't fail a merge if the warm-up failed
                    if (isClosed.get() == false) {
                        logger.warn("Warm-up failed", t);
                    }
                    if (t instanceof Error) {
                        // assertion/out-of-memory error, don't ignore those
                        throw (Error) t;
                    }
                }
            }
        });
        return new IndexWriter(store.directory(), iwc);
    } catch (LockObtainFailedException ex) {
        boolean isLocked = IndexWriter.isLocked(store.directory());
        logger.warn("Could not lock IndexWriter isLocked [{}]", ex, isLocked);
        throw ex;
    }
}

From source file:org.elasticsearch.index.engine.robin.RobinEngine.java

License:Apache License

private IndexWriter createWriter() throws IOException {
    IndexWriter indexWriter = null;//  www  . j a v a2s .  c om
    try {
        // release locks when started
        if (IndexWriter.isLocked(store.directory())) {
            logger.warn("shard is locked, releasing lock");
            IndexWriter.unlock(store.directory());
        }
        boolean create = !IndexReader.indexExists(store.directory());
        IndexWriterConfig config = new IndexWriterConfig(Lucene.VERSION,
                analysisService.defaultIndexAnalyzer());
        config.setOpenMode(create ? IndexWriterConfig.OpenMode.CREATE : IndexWriterConfig.OpenMode.APPEND);
        config.setIndexDeletionPolicy(deletionPolicy);
        config.setMergeScheduler(mergeScheduler.newMergeScheduler());
        config.setMergePolicy(mergePolicyProvider.newMergePolicy());
        config.setSimilarity(similarityService.defaultIndexSimilarity());
        config.setRAMBufferSizeMB(indexingBufferSize.mbFrac());
        config.setTermIndexInterval(termIndexInterval);
        config.setReaderTermsIndexDivisor(termIndexDivisor);
        config.setMaxThreadStates(indexConcurrency);

        indexWriter = new IndexWriter(store.directory(), config);
    } catch (IOException e) {
        safeClose(indexWriter);
        throw e;
    }
    return indexWriter;
}