Example usage for org.apache.lucene.index IndexWriterConfig setMergePolicy

List of usage examples for org.apache.lucene.index IndexWriterConfig setMergePolicy

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexWriterConfig setMergePolicy.

Prototype

@Override
    public IndexWriterConfig setMergePolicy(MergePolicy mergePolicy) 

Source Link

Usage

From source file:org.elasticsearch.test.unit.common.lucene.uid.VersionsTests.java

License:Apache License

@Test
public void testMergingOldIndices() throws Exception {
    final IndexWriterConfig iwConf = new IndexWriterConfig(Lucene.VERSION, new KeywordAnalyzer());
    iwConf.setMergePolicy(new IndexUpgraderMergePolicy(iwConf.getMergePolicy()));
    final Directory dir = newDirectory();
    final IndexWriter iw = new IndexWriter(dir, iwConf);

    // 1st segment, no _version
    Document document = new Document();
    // Add a dummy field (enough to trigger #3237)
    document.add(new StringField("a", "b", Store.NO));
    StringField uid = new StringField(UidFieldMapper.NAME, "1", Store.YES);
    document.add(uid);// www  .j  a v  a2 s. c om
    iw.addDocument(document);
    uid.setStringValue("2");
    iw.addDocument(document);
    iw.commit();

    // 2nd segment, old layout
    document = new Document();
    UidField uidAndVersion = new UidField("3", 3L);
    document.add(uidAndVersion);
    iw.addDocument(document);
    uidAndVersion.uid = "4";
    uidAndVersion.version = 4L;
    iw.addDocument(document);
    iw.commit();

    // 3rd segment new layout
    document = new Document();
    uid.setStringValue("5");
    Field version = new NumericDocValuesField(UidFieldMapper.VERSION, 5L);
    document.add(uid);
    document.add(version);
    iw.addDocument(document);
    uid.setStringValue("6");
    version.setLongValue(6L);
    iw.addDocument(document);
    iw.commit();

    final Map<String, Long> expectedVersions = ImmutableMap.<String, Long>builder().put("1", 0L).put("2", 0L)
            .put("3", 0L).put("4", 4L).put("5", 5L).put("6", 6L).build();

    // Force merge and check versions
    iw.forceMerge(1);
    final AtomicReader ir = SlowCompositeReaderWrapper.wrap(DirectoryReader.open(iw.getDirectory()));
    final NumericDocValues versions = ir.getNumericDocValues(UidFieldMapper.VERSION);
    assertThat(versions, notNullValue());
    for (int i = 0; i < ir.maxDoc(); ++i) {
        final String uidValue = ir.document(i).get(UidFieldMapper.NAME);
        final long expectedVersion = expectedVersions.get(uidValue);
        assertThat(versions.get(i), equalTo(expectedVersion));
    }

    iw.close();
    assertThat(IndexWriter.isLocked(iw.getDirectory()), is(false));
    ir.close();
    dir.close();
}

From source file:org.elasticsearch.xpack.core.security.authz.accesscontrol.DocumentSubsetReaderTests.java

License:Open Source License

/** Same test as in FieldSubsetReaderTests, test that core cache key (needed for NRT) is working */
public void testCoreCacheKey() throws Exception {
    Directory dir = newDirectory();//from   www  . j a v a 2s.  com
    IndexWriterConfig iwc = new IndexWriterConfig(null);
    iwc.setMaxBufferedDocs(100);
    iwc.setMergePolicy(NoMergePolicy.INSTANCE);
    IndexWriter iw = new IndexWriter(dir, iwc);

    // add two docs, id:0 and id:1
    Document doc = new Document();
    Field idField = new StringField("id", "", Field.Store.NO);
    doc.add(idField);
    idField.setStringValue("0");
    iw.addDocument(doc);
    idField.setStringValue("1");
    iw.addDocument(doc);

    // open reader
    DirectoryReader ir = ElasticsearchDirectoryReader.wrap(DirectoryReader.open(iw),
            new ShardId("_index", "_na_", 0));
    ir = DocumentSubsetReader.wrap(ir, bitsetFilterCache, new MatchAllDocsQuery());
    assertEquals(2, ir.numDocs());
    assertEquals(1, ir.leaves().size());

    // delete id:0 and reopen
    iw.deleteDocuments(new Term("id", "0"));
    DirectoryReader ir2 = DirectoryReader.openIfChanged(ir);

    // we should have the same cache key as before
    assertEquals(1, ir2.numDocs());
    assertEquals(1, ir2.leaves().size());
    assertSame(ir.leaves().get(0).reader().getCoreCacheHelper().getKey(),
            ir2.leaves().get(0).reader().getCoreCacheHelper().getKey());
    // However we don't support caching on the reader cache key since we override deletes
    assertNull(ir.leaves().get(0).reader().getReaderCacheHelper());
    assertNull(ir2.leaves().get(0).reader().getReaderCacheHelper());

    TestUtil.checkReader(ir);
    IOUtils.close(ir, ir2, iw, dir);
}

From source file:org.elasticsearch.xpack.core.security.authz.accesscontrol.FieldSubsetReaderTests.java

License:Open Source License

/** test that core cache key (needed for NRT) is working */
public void testCoreCacheKey() throws Exception {
    Directory dir = newDirectory();//w  w  w.j a  v a 2s  .  co m
    IndexWriterConfig iwc = new IndexWriterConfig(null);
    iwc.setMaxBufferedDocs(100);
    iwc.setMergePolicy(NoMergePolicy.INSTANCE);
    IndexWriter iw = new IndexWriter(dir, iwc);

    // add two docs, id:0 and id:1
    Document doc = new Document();
    Field idField = new StringField("id", "", Field.Store.NO);
    doc.add(idField);
    idField.setStringValue("0");
    iw.addDocument(doc);
    idField.setStringValue("1");
    iw.addDocument(doc);

    // open reader
    DirectoryReader ir = FieldSubsetReader.wrap(DirectoryReader.open(iw),
            new CharacterRunAutomaton(Automata.makeString("id")));
    assertEquals(2, ir.numDocs());
    assertEquals(1, ir.leaves().size());

    // delete id:0 and reopen
    iw.deleteDocuments(new Term("id", "0"));
    DirectoryReader ir2 = DirectoryReader.openIfChanged(ir);

    // we should have the same cache key as before
    assertEquals(1, ir2.numDocs());
    assertEquals(1, ir2.leaves().size());
    assertSame(ir.leaves().get(0).reader().getCoreCacheHelper().getKey(),
            ir2.leaves().get(0).reader().getCoreCacheHelper().getKey());

    TestUtil.checkReader(ir);
    IOUtils.close(ir, ir2, iw, dir);
}

From source file:org.elasticsearch.xpack.security.authz.accesscontrol.FieldDataCacheWithFieldSubsetReaderTests.java

License:Open Source License

@Before
public void setup() throws Exception {
    IndexSettings indexSettings = createIndexSettings();
    CircuitBreakerService circuitBreakerService = new NoneCircuitBreakerService();
    String name = "_field";
    indexFieldDataCache = new DummyAccountingFieldDataCache();
    sortedSetDVOrdinalsIndexFieldData = new SortedSetDVOrdinalsIndexFieldData(indexSettings,
            indexFieldDataCache, name, circuitBreakerService,
            AbstractAtomicOrdinalsFieldData.DEFAULT_SCRIPT_FUNCTION);
    pagedBytesIndexFieldData = new PagedBytesIndexFieldData(indexSettings, name, indexFieldDataCache,
            circuitBreakerService, TextFieldMapper.Defaults.FIELDDATA_MIN_FREQUENCY,
            TextFieldMapper.Defaults.FIELDDATA_MAX_FREQUENCY,
            TextFieldMapper.Defaults.FIELDDATA_MIN_SEGMENT_SIZE);

    dir = newDirectory();/*from   ww  w .j ava 2 s.com*/
    IndexWriterConfig iwc = new IndexWriterConfig(null);
    iwc.setMergePolicy(NoMergePolicy.INSTANCE);
    IndexWriter iw = new IndexWriter(dir, iwc);
    numDocs = scaledRandomIntBetween(32, 128);

    for (int i = 1; i <= numDocs; i++) {
        Document doc = new Document();
        doc.add(new StringField("_field", String.valueOf(i), Field.Store.NO));
        doc.add(new SortedSetDocValuesField("_field", new BytesRef(String.valueOf(i))));
        iw.addDocument(doc);
        if (i % 24 == 0) {
            iw.commit();
        }
    }
    iw.close();
    ir = ElasticsearchDirectoryReader.wrap(DirectoryReader.open(dir), new ShardId(indexSettings.getIndex(), 0));
}

From source file:org.ihtsdo.otf.query.lucene.LuceneIndexer.java

License:Apache License

protected LuceneIndexer(String indexName) throws IOException {
    try {/*from  w w  w . jav  a 2  s. co m*/
        indexName_ = indexName;
        luceneWriterService = LookupService.getService(WorkExecutors.class).getExecutor();
        luceneWriterFutureCheckerService = Executors.newFixedThreadPool(1,
                new NamedThreadFactory(indexName + " Lucene future checker", false));

        Path searchFolder = LookupService.getService(ConfigurationService.class).getSearchFolderPath();

        if (luceneRootFolder_.compareAndSet(null, new File(searchFolder.toFile(), DEFAULT_LUCENE_FOLDER))) {
            luceneRootFolder_.get().mkdirs();
        }

        indexFolder_ = new File(luceneRootFolder_.get(), indexName);
        indexFolder_.mkdirs();

        log.info("Index: " + indexFolder_.getAbsolutePath());
        Directory indexDirectory = new MMapDirectory(indexFolder_); //switch over to MMapDirectory - in theory - this gives us back some 
        //room on the JDK stack, letting the OS directly manage the caching of the index files - and more importantly, gives us a huge 
        //performance boost during any operation that tries to do multi-threaded reads of the index (like the SOLOR rules processing) because
        //the default value of SimpleFSDirectory is a huge bottleneck.

        indexDirectory.clearLock("write.lock");

        IndexWriterConfig config = new IndexWriterConfig(luceneVersion, new PerFieldAnalyzer());
        config.setRAMBufferSizeMB(256);
        MergePolicy mergePolicy = new LogByteSizeMergePolicy();

        config.setMergePolicy(mergePolicy);
        config.setSimilarity(new ShortTextSimilarity());

        IndexWriter indexWriter = new IndexWriter(indexDirectory, config);

        trackingIndexWriter = new TrackingIndexWriter(indexWriter);

        boolean applyAllDeletes = false;

        searcherManager = new SearcherManager(indexWriter, applyAllDeletes, null);
        // [3]: Create the ControlledRealTimeReopenThread that reopens the index periodically taking into 
        //      account the changes made to the index and tracked by the TrackingIndexWriter instance
        //      The index is refreshed every 60sc when nobody is waiting 
        //      and every 100 millis whenever is someone waiting (see search method)
        //      (see http://lucene.apache.org/core/4_3_0/core/org/apache/lucene/search/NRTManagerReopenThread.html)
        reopenThread = new ControlledRealTimeReopenThread<>(trackingIndexWriter, searcherManager, 60.00, 0.1);

        this.startThread();

        //Register for commits:

        log.info("Registering indexer " + getIndexerName() + " for commits");
        Get.commitService().addChangeListener(new ChronologyChangeListener() {

            @Override
            public void handleCommit(CommitRecord commitRecord) {
                commitRecord.getSememesInCommit().stream().forEach(sememeId -> {
                    handleChange(Get.sememeService().getSememe(sememeId));
                });

            }

            @Override
            public void handleChange(SememeChronology<? extends SememeVersion<?>> sc) {
                log.info("submitting sememe " + sc.toUserString() + " to indexer " + getIndexerName()
                        + " due to commit");
                index(sc);

            }

            @Override
            public void handleChange(ConceptChronology<? extends StampedVersion> cc) {
                // noop
            }

            @Override
            public UUID getListenerUuid() {
                return UuidT5Generator.get(getIndexerName());
            }
        });

    } catch (Exception e) {
        LookupService.getService(SystemStatusService.class).notifyServiceConfigurationFailure(indexName, e);
        throw e;
    }
}

From source file:org.meresco.lucene.numerate.UriEnumerate.java

License:Open Source License

/**
 *
 * @param path//ww  w.ja va 2s. c  o  m
 * @param max_cache_size
 * @param withTransactionLog allows for crash recovery, but slows down UriNumerate considerably because of file system flush.
 * @throws IOException
 */
public UriEnumerate(String path, int max_cache_size, boolean withTransactionLog) throws IOException {
    IndexWriterConfig config = new IndexWriterConfig(null);
    ConcurrentMergeScheduler ms = (ConcurrentMergeScheduler) config.getMergeScheduler();
    ms.setDefaultMaxMergesAndThreads(/* spins= */false);
    LogDocMergePolicy mp = new LogDocMergePolicy();
    mp.setMergeFactor(2);
    mp.setMinMergeDocs(max_cache_size);
    config.setMergePolicy(mp);
    config.setCodec(new Lucene60Codec() {
        @Override
        public PostingsFormat getPostingsFormatForField(String field) {
            return new BloomFilteringPostingsFormat(super.getPostingsFormatForField(field));
        }
    });
    config.setUseCompoundFile(false);
    this.writer = new IndexWriter(FSDirectory.open(FileSystems.getDefault().getPath(path)), config);
    this.next_ord = writer.numDocs() + 1;
    this.searcher = new SimpleSearcher(this.writer);
    this.cache = new Cache(max_cache_size, () -> this.commit());
    this.transactionLog = new TransactionLog(withTransactionLog ? path + "/transactionLog" : null);
    this.transactionLog.maybeRecover();
}

From source file:org.neo4j.kernel.api.impl.index.IndexWriterConfigs.java

License:Open Source License

public static IndexWriterConfig standard() {
    IndexWriterConfig writerConfig = new IndexWriterConfig(LuceneDataSource.KEYWORD_ANALYZER);

    writerConfig.setMaxBufferedDocs(MAX_BUFFERED_DOCS);
    writerConfig.setIndexDeletionPolicy(new MultipleBackupDeletionPolicy());
    writerConfig.setUseCompoundFile(true);
    writerConfig.setCodec(new Lucene54Codec() {
        @Override/*from   ww w. j  a va 2s. c o  m*/
        public PostingsFormat getPostingsFormatForField(String field) {
            PostingsFormat postingFormat = super.getPostingsFormatForField(field);
            return CODEC_BLOCK_TREE_ORDS_POSTING_FORMAT ? blockTreeOrdsPostingsFormat : postingFormat;
        }
    });

    LogByteSizeMergePolicy mergePolicy = new LogByteSizeMergePolicy();
    mergePolicy.setNoCFSRatio(MERGE_POLICY_NO_CFS_RATIO);
    mergePolicy.setMinMergeMB(MERGE_POLICY_MIN_MERGE_MB);
    mergePolicy.setMergeFactor(MERGE_POLICY_MERGE_FACTOR);
    writerConfig.setMergePolicy(mergePolicy);

    return writerConfig;
}

From source file:org.neo4j.kernel.api.impl.index.IndexWriterFactories.java

License:Open Source License

public static LuceneIndexWriterFactory standard() {
    return new LuceneIndexWriterFactory() {
        @Override/*  www  .ja v a  2  s  .co  m*/
        public IndexWriter create(Directory directory) throws IOException {
            IndexWriterConfig writerConfig = new IndexWriterConfig(Version.LUCENE_36,
                    LuceneDataSource.KEYWORD_ANALYZER);
            writerConfig.setMaxBufferedDocs(100000); // TODO figure out depending on environment?
            writerConfig.setIndexDeletionPolicy(new MultipleBackupDeletionPolicy());
            writerConfig.setTermIndexInterval(14);

            LogByteSizeMergePolicy mergePolicy = new LogByteSizeMergePolicy();
            mergePolicy.setUseCompoundFile(true);
            mergePolicy.setNoCFSRatio(1.0);
            mergePolicy.setMinMergeMB(0.1);
            mergePolicy.setMergeFactor(2);
            writerConfig.setMergePolicy(mergePolicy);

            return new IndexWriter(directory, writerConfig);
        }
    };
}

From source file:org.olat.search.service.indexer.JmsIndexer.java

License:Apache License

public IndexWriterConfig newIndexWriterConfig() {
    Analyzer analyzer = new StandardAnalyzer(SearchService.OO_LUCENE_VERSION);
    IndexWriterConfig indexWriterConfig = new IndexWriterConfig(SearchService.OO_LUCENE_VERSION, analyzer);
    indexWriterConfig.setMergePolicy(newLogMergePolicy());
    indexWriterConfig.setRAMBufferSizeMB(ramBufferSizeMB);// for better performance set to 48MB (see lucene docu 'how to make indexing faster")
    indexWriterConfig.setOpenMode(OpenMode.CREATE_OR_APPEND);
    return indexWriterConfig;
}

From source file:org.punksearch.crawler.IndexOperator.java

License:Open Source License

/**
 * @param dir      to open writer for/* w w w .j  a  va  2 s .c  o m*/
 * @param segments less segments number - slower indexing but faster search and vice versa, 0 - means default
 * @return new index writer
 * @throws IOException
 */
private static IndexWriter createIndexWriter(String dir, int segments) throws IOException {
    final IndexWriterConfig indexWriterConfig = new IndexWriterConfig(LuceneVersion.VERSION, analyzer);

    if (segments > 0) {
        indexWriterConfig.setMergePolicy(
                new TieredMergePolicy().setSegmentsPerTier(segments).setMaxMergeAtOnce(segments));
    }

    return new IndexWriter(FSDirectory.open(new File(dir)), indexWriterConfig);
}