Example usage for org.apache.lucene.index IndexWriterConfig setMergePolicy

List of usage examples for org.apache.lucene.index IndexWriterConfig setMergePolicy

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexWriterConfig setMergePolicy.

Prototype

@Override
    public IndexWriterConfig setMergePolicy(MergePolicy mergePolicy) 

Source Link

Usage

From source file:proj.zoie.impl.indexing.internal.RAMSearchIndex.java

License:Apache License

public IndexWriter openIndexWriter(Analyzer analyzer, Similarity similarity) throws IOException {
    if (_indexWriter != null)
        return _indexWriter;

    ZoieMergePolicy mergePolicy = new ZoieMergePolicy();
    mergePolicy.setMergePolicyParams(_mergePolicyParams);
    mergePolicy.setUseCompoundFile(false);

    IndexWriterConfig config = indexWriterConfigStorage.get();
    if (config == null) {
        config = new IndexWriterConfig(Version.LUCENE_34, analyzer);
        indexWriterConfigStorage.set(config);
    }//from  w ww . ja  v a 2 s  .c o m
    config.setOpenMode(OpenMode.CREATE_OR_APPEND);

    config.setMergeScheduler(_mergeScheduler);
    config.setMergePolicy(mergePolicy);

    config.setReaderPooling(false);
    if (similarity != null) {
        config.setSimilarity(similarity);
    }
    config.setRAMBufferSizeMB(3);

    IndexWriter idxWriter = new IndexWriter(_directory, config);
    _indexWriter = idxWriter;
    return idxWriter;
}

From source file:psidev.psi.mi.search.index.PsimiIndexWriter.java

License:Apache License

public void index(Directory directory, InputStream is, boolean createIndex, boolean hasHeaderLine)
        throws IOException, ConverterException, MitabLineException {
    IndexWriterConfig writerConfig = new IndexWriterConfig(Version.LUCENE_30,
            new StandardAnalyzer(Version.LUCENE_30));
    LogMergePolicy policy = new LogDocMergePolicy();
    policy.setMergeFactor(MERGE_FACTOR);
    policy.setMaxMergeDocs(Integer.MAX_VALUE);
    writerConfig.setMergePolicy(policy);

    IndexWriter indexWriter = new IndexWriter(directory, writerConfig);

    if (createIndex) {
        indexWriter.commit();/*from   w  w  w  .j  a v  a 2s  .c om*/
        indexWriter.deleteAll();
        indexWriter.commit();
    }

    index(indexWriter, is, hasHeaderLine);
    indexWriter.close();
}

From source file:sh.isaac.provider.query.lucene.LuceneIndexer.java

License:Apache License

/**
 * Instantiates a new lucene indexer./*from ww  w .j  av  a  2s  . c  o  m*/
 *
 * @param indexName the index name
 * @throws IOException Signals that an I/O exception has occurred.
 */
protected LuceneIndexer(String indexName) throws IOException {
    try {
        this.indexName = indexName;
        this.luceneWriterService = LookupService.getService(WorkExecutors.class).getIOExecutor();
        this.luceneWriterFutureCheckerService = Executors.newFixedThreadPool(1,
                new NamedThreadFactory(indexName + " Lucene future checker", false));

        final Path searchFolder = LookupService.getService(ConfigurationService.class).getSearchFolderPath();
        final File luceneRootFolder = new File(searchFolder.toFile(), DEFAULT_LUCENE_FOLDER);

        luceneRootFolder.mkdirs();
        this.indexFolder = new File(luceneRootFolder, indexName);

        if (!this.indexFolder.exists()) {
            this.databaseValidity = DatabaseValidity.MISSING_DIRECTORY;
            LOG.info("Index folder missing: " + this.indexFolder.getAbsolutePath());
        } else if (this.indexFolder.list().length > 0) {
            this.databaseValidity = DatabaseValidity.POPULATED_DIRECTORY;
        }

        this.indexFolder.mkdirs();
        LOG.info("Index: " + this.indexFolder.getAbsolutePath());

        final MMapDirectory indexDirectory = new MMapDirectory(this.indexFolder.toPath()); // switch over to MMapDirectory - in theory - this gives us back some
        // room on the JDK stack, letting the OS directly manage the caching of the index files - and more importantly, gives us a huge
        // performance boost during any operation that tries to do multi-threaded reads of the index (like the SOLOR rules processing) because
        // the default value of SimpleFSDirectory is a huge bottleneck.

        final IndexWriterConfig config = new IndexWriterConfig(new PerFieldAnalyzer());

        config.setRAMBufferSizeMB(256);

        final MergePolicy mergePolicy = new LogByteSizeMergePolicy();

        config.setMergePolicy(mergePolicy);
        config.setSimilarity(new ShortTextSimilarity());

        this.indexWriter = new IndexWriter(indexDirectory, config);

        final boolean applyAllDeletes = false;
        final boolean writeAllDeletes = false;

        this.searcherManager = new SearcherManager(indexWriter, applyAllDeletes, writeAllDeletes, null);

        // [3]: Create the ControlledRealTimeReopenThread that reopens the index periodically taking into
        // account the changes made to the index and tracked by the TrackingIndexWriter instance
        // The index is refreshed every 60sc when nobody is waiting
        // and every 100 millis whenever is someone waiting (see search method)
        // (see http://lucene.apache.org/core/4_3_0/core/org/apache/lucene/search/NRTManagerReopenThread.html)
        this.reopenThread = new ControlledRealTimeReopenThread<>(this.indexWriter, this.searcherManager, 60.00,
                0.1);
        this.startThread();

        // Register for commits:
        LOG.info("Registering indexer " + indexName + " for commits");
        this.changeListenerRef = new ChronologyChangeListener() {
            @Override
            public void handleCommit(CommitRecord commitRecord) {
                if (LuceneIndexer.this.dbBuildMode == null) {
                    LuceneIndexer.this.dbBuildMode = Get.configurationService().inDBBuildMode();
                }

                if (LuceneIndexer.this.dbBuildMode) {
                    LOG.debug("Ignore commit due to db build mode");
                    return;
                }

                final int size = commitRecord.getSemanticNidsInCommit().size();

                if (size < 100) {
                    LOG.info("submitting semantic elements " + commitRecord.getSemanticNidsInCommit().toString()
                            + " to indexer " + getIndexerName() + " due to commit");
                } else {
                    LOG.info("submitting " + size + " semantic elements to indexer " + getIndexerName()
                            + " due to commit");
                }

                commitRecord.getSemanticNidsInCommit().stream().forEach(sememeId -> {
                    final SemanticChronology sc = Get.assemblageService().getSemanticChronology(sememeId);

                    index(sc);
                });
                LOG.info("Completed index of " + size + " semantics for " + getIndexerName());
            }

            @Override
            public void handleChange(SemanticChronology sc) {
                // noop
            }

            @Override
            public void handleChange(ConceptChronology cc) {
                // noop
            }

            @Override
            public UUID getListenerUuid() {
                return UuidT5Generator.get(getIndexerName());
            }
        };
        Get.commitService().addChangeListener(this.changeListenerRef);
    } catch (final IOException e) {
        LookupService.getService(SystemStatusService.class).notifyServiceConfigurationFailure(indexName, e);
        throw e;
    }
}

From source file:uk.co.flax.luwak.Monitor.java

License:Apache License

static IndexWriter defaultIndexWriter(Directory directory) throws IOException {

    IndexWriterConfig iwc = new IndexWriterConfig(new KeywordAnalyzer());
    TieredMergePolicy mergePolicy = new TieredMergePolicy();
    mergePolicy.setSegmentsPerTier(4);/* www. j a va 2 s .c om*/
    iwc.setMergePolicy(mergePolicy);
    iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);

    return new IndexWriter(directory, iwc);

}