Example usage for org.apache.lucene.index LogDocMergePolicy LogDocMergePolicy

List of usage examples for org.apache.lucene.index LogDocMergePolicy LogDocMergePolicy

Introduction

In this page you can find the example usage for org.apache.lucene.index LogDocMergePolicy LogDocMergePolicy.

Prototype

public LogDocMergePolicy() 

Source Link

Document

Sole constructor, setting all settings to their defaults.

Usage

From source file:DVBench.java

License:Apache License

static void doBench(int bpv) throws Exception {
    File file = new File("/data/indices/dvbench");
    file.mkdirs();/*w w w  .  j  a  va2s  .c o  m*/
    Directory dir = FSDirectory.open(file);
    IndexWriterConfig config = new IndexWriterConfig(null);
    config.setOpenMode(OpenMode.CREATE);
    config.setMergeScheduler(new SerialMergeScheduler());
    config.setMergePolicy(new LogDocMergePolicy());
    config.setMaxBufferedDocs(25000);
    IndexWriter writer = new IndexWriter(dir, config);

    MyRandom r = new MyRandom();
    int numdocs = 400000;
    Document doc = new Document();
    Field dv = new NumericDocValuesField("dv", 0);
    Field inv = new LongField("inv", 0, Field.Store.NO);
    Field boxed = new BinaryDocValuesField("boxed", new BytesRef(8));
    Field boxed2 = new BinaryDocValuesField("boxed2", new BytesRef(8));

    doc.add(dv);
    doc.add(inv);
    doc.add(boxed);
    doc.add(boxed2);
    for (int i = 0; i < numdocs; i++) {
        // defeat blockpackedwriter
        final long value;
        if (i % 8192 == 0) {
            value = bpv == 64 ? Long.MIN_VALUE : 0;
        } else if (i % 8192 == 1) {
            value = bpv == 64 ? Long.MAX_VALUE : (1L << bpv) - 1;
        } else {
            value = r.nextLong(bpv);
        }
        dv.setLongValue(value);
        inv.setLongValue(value);
        box(value, boxed.binaryValue());
        box(value, boxed2.binaryValue());
        boxed2.binaryValue().length = (bpv + 7) / 8; // fixed length
        writer.addDocument(doc);
    }

    writer.close();

    // run dv search tests
    String description = "dv (bpv=" + bpv + ")";
    DirectoryReader reader = DirectoryReader.open(dir);
    IndexSearcher searcher = new IndexSearcher(reader);
    searcher.setQueryCache(null); // don't bench the cache

    int hash = 0;
    // warmup
    hash += search(description, searcher, "dv", 300, true);
    hash += search(description, searcher, "dv", 300, false);

    // Uninverting
    Map<String, UninvertingReader.Type> mapping = Collections.singletonMap("inv", UninvertingReader.Type.LONG);
    DirectoryReader uninv = UninvertingReader.wrap(reader, mapping);
    IndexSearcher searcher2 = new IndexSearcher(uninv);
    searcher2.setQueryCache(null); // don't bench the cache

    description = "fc (bpv=" + bpv + ")";
    // warmup
    hash += search(description, searcher2, "inv", 300, true);
    hash += search(description, searcher2, "inv", 300, false);

    // Boxed inside binary
    DirectoryReader boxedReader = new BinaryAsVLongReader(reader);
    IndexSearcher searcher3 = new IndexSearcher(boxedReader);
    searcher3.setQueryCache(null); // don't bench the cache
    description = "boxed (bpv=" + bpv + ")";
    // warmup
    hash += search(description, searcher3, "boxed", 300, true);
    hash += search(description, searcher3, "boxed", 300, false);

    description = "boxed fixed-length (bpv=" + bpv + ")";
    // warmup
    hash += search(description, searcher3, "boxed2", 300, true);
    hash += search(description, searcher3, "boxed2", 300, false);

    if (hash == 3) {
        // wont happen
        System.out.println("hash=" + hash);
    }
    reader.close();
    dir.close();
}

From source file:IndexAndSearchOpenStreetMaps1D.java

License:Apache License

private static void createIndex() throws IOException {

    long t0 = System.nanoTime();

    CharsetDecoder decoder = StandardCharsets.UTF_8.newDecoder().onMalformedInput(CodingErrorAction.REPORT)
            .onUnmappableCharacter(CodingErrorAction.REPORT);

    int BUFFER_SIZE = 1 << 16; // 64K
    InputStream is = Files
            .newInputStream(Paths.get("/lucenedata/open-street-maps/latlon.subsetPlusAllLondon.txt"));
    BufferedReader reader = new BufferedReader(new InputStreamReader(is, decoder), BUFFER_SIZE);

    Directory dir = FSDirectory.open(Paths.get("/c/tmp/bkdtest1d" + (USE_NF ? "_nf" : "")));

    IndexWriterConfig iwc = new IndexWriterConfig(null);
    iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
    //iwc.setMaxBufferedDocs(109630);
    //iwc.setRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH);
    iwc.setRAMBufferSizeMB(256.0);//from  ww w  . j a va 2  s  . co  m
    iwc.setMergePolicy(new LogDocMergePolicy());
    iwc.setMergeScheduler(new SerialMergeScheduler());
    iwc.setInfoStream(new PrintStreamInfoStream(System.out));
    IndexWriter w = new IndexWriter(dir, iwc);

    int count = 0;
    byte[] scratch = new byte[4];
    while (true) {
        String line = reader.readLine();
        if (line == null) {
            break;
        }

        String[] parts = line.split(",");
        //long id = Long.parseLong(parts[0]);
        int lat = (int) (1000000. * Double.parseDouble(parts[1]));
        //int lon = (int) (1000000. * Double.parseDouble(parts[2]));
        Document doc = new Document();
        if (USE_NF) {
            doc.add(new LegacyIntField("latnum", lat, Field.Store.NO));
            //doc.add(new LongField("lonnum", lon, Field.Store.NO));
        } else {
            doc.add(new IntPoint("lat", lat));
            //doc.add(new SortedNumericDocValuesField("lon", lon));
        }
        w.addDocument(doc);
        count++;
        if (count % 1000000 == 0) {
            System.out.println(count + "...");
        }
    }
    //w.forceMerge(1);
    w.commit();
    System.out.println(w.maxDoc() + " total docs");

    w.close();
    long t1 = System.nanoTime();
    System.out.println(((t1 - t0) / 1000000000.0) + " sec to build index");
}

From source file:com.aliasi.lingmed.medline.IndexMedline.java

License:Lingpipe license

/**
 * Run the command.  See class documentation above for details on
 * arguments and behavior./*from  www  . j av a 2s . c om*/
 */
public void run() {
    System.out.println("start run");
    try {
        File[] files = getLaterFiles(mDistDir);
        System.out.println("Total files to process: " + files.length);
        System.out.println("File names: " + java.util.Arrays.asList(files));
        //            if (mLogger.isDebugEnabled())
        //                mLogger.debug("File names: " + java.util.Arrays.asList(files));
        if (files.length > 0) {
            MedlineParser parser = new MedlineParser(true); // true = save raw XML

            Directory fsDir = FSDirectory.open(mIndex);
            IndexWriterConfig iwConf = new IndexWriterConfig(Version.LUCENE_36, mCodec.getAnalyzer());
            iwConf.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
            iwConf.setRAMBufferSizeMB(RAM_BUF_SIZE);
            if (sIsBaseline) {
                LogDocMergePolicy ldmp = new LogDocMergePolicy();
                ldmp.setMergeFactor(MERGE_FACTOR_HI);
                iwConf.setMergePolicy(ldmp);
            }
            IndexWriter indexWriter = new IndexWriter(fsDir, iwConf);

            for (File file : files) {
                System.out.println("processing file: " + file);
                MedlineIndexer indexer = new MedlineIndexer(indexWriter, mCodec);
                parser.setHandler(indexer);
                parseFile(parser, file);
                indexer.close();
                recordFile(indexWriter, file.getName());
                System.out.println("completed processing file: " + file);
            }
            System.out.println("All files parsed, now optimize index");
            indexWriter.forceMerge(1);
            indexWriter.commit();
            indexWriter.close();
        }
        System.out.println("Processing complete.");
    } catch (Exception e) {
        //            mLogger.warn("Unexpected Exception: "+e.getMessage());
        //            mLogger.warn("stack trace: "+Logging.logStackTrace(e));
        //            mLogger.warn("Aborting this run");
        IllegalStateException e2 = new IllegalStateException(e.getMessage());
        e2.setStackTrace(e.getStackTrace());
        throw e2;
    }
}

From source file:com.edgenius.wiki.search.lucene.SimpleIndexFactory.java

License:Open Source License

private IndexWriterConfig getIndexWriterConfig() {
    IndexWriterConfig conf = new IndexWriterConfig(LuceneConfig.VERSION, analyzerProvider.getIndexAnalyzer());
    conf.setMaxBufferedDocs(maxBufferedDocs);
    conf.setTermIndexInterval(termIndexInterval);
    conf.setWriteLockTimeout(writeLockTimeout);

    LogMergePolicy mergePolicy = new LogDocMergePolicy();
    mergePolicy.setUseCompoundFile(useCompoundFile);
    mergePolicy.setMaxMergeDocs(maxMergeDocs);
    mergePolicy.setMergeFactor(mergeFactor);
    conf.setMergePolicy(mergePolicy);/* ww  w  .  j a  va2  s .c o m*/

    return conf;
}

From source file:com.impetus.kundera.index.LuceneIndexer.java

License:Apache License

/**
 * Instantiates a new lucene indexer.//  w w w  .j a va2s.  c  o  m
 * 
 * @param analyzer
 *            the analyzer
 * @param lucDirPath
 *            the luc dir path
 */
private LuceneIndexer(String lucDirPath) {
    try {
        luceneDirPath = lucDirPath;
        File file = new File(luceneDirPath);
        if (file.exists()) {
            FSDirectory sourceDir = FSDirectory.open(getIndexDirectory().toPath());

            // TODO initialize context.
            index = new RAMDirectory(sourceDir, IOContext.DEFAULT);
        } else {
            index = new RAMDirectory();
        }
        /*
         * FSDirectory.open(getIndexDirectory( ))
         */
        // isInitialized
        /* writer */
        IndexWriterConfig indexWriterConfig = new IndexWriterConfig(analyzer);
        LogDocMergePolicy logDocMergePolicy = new LogDocMergePolicy();
        logDocMergePolicy.setMergeFactor(1000);
        indexWriterConfig.setMergePolicy(logDocMergePolicy);
        w = new IndexWriter(index, indexWriterConfig);
        w.getConfig().setRAMBufferSizeMB(32);
    } catch (Exception e) {
        log.error("Error while instantiating LuceneIndexer, Caused by :.", e);
        throw new LuceneIndexingException(e);
    }
}

From source file:com.impetus.kundera.index.LuceneIndexer.java

License:Apache License

@Override
public final void unindex(EntityMetadata metadata, Object id, KunderaMetadata kunderaMetadata,
        Class<?> parentClazz) throws LuceneIndexingException {
    if (log.isDebugEnabled())
        log.debug("Unindexing @Entity[{}] for key:{}", metadata.getEntityClazz().getName(), id);
    String luceneQuery = null;/*w  w w . ja va 2 s.c  o  m*/
    boolean isEmbeddedId = false;

    MetamodelImpl metaModel = null;
    if (kunderaMetadata != null && metadata != null) {
        metaModel = (MetamodelImpl) kunderaMetadata.getApplicationMetadata()
                .getMetamodel(metadata.getPersistenceUnit());
        isEmbeddedId = metaModel.isEmbeddable(metadata.getIdAttribute().getBindableJavaType());
    }

    try {
        QueryParser qp = new QueryParser(DEFAULT_SEARCHABLE_FIELD, new StandardAnalyzer());

        qp.setLowercaseExpandedTerms(false);
        qp.setAllowLeadingWildcard(true);
        luceneQuery = getLuceneQuery(metadata, id, isEmbeddedId, metaModel, parentClazz);
        Query q = qp.parse(luceneQuery);

        w.deleteDocuments(q);
        w.commit();
        w.close();
        IndexWriterConfig indexWriterConfig = new IndexWriterConfig(analyzer);
        LogDocMergePolicy logDocMergePolicy = new LogDocMergePolicy();
        logDocMergePolicy.setMergeFactor(1000);
        indexWriterConfig.setMergePolicy(logDocMergePolicy);
        w = new IndexWriter(index, indexWriterConfig);

        w.getConfig().setRAMBufferSizeMB(32);
        // flushInternal();
    } catch (Exception e) {
        log.error("Error while instantiating LuceneIndexer, Caused by :.", e);
        throw new LuceneIndexingException(e);
    }
}

From source file:com.rocana.lucene.codec.v1.TestRocanaPerFieldPostingsFormat2.java

License:Apache License

private IndexWriter newWriter(Directory dir, IndexWriterConfig conf) throws IOException {
    LogDocMergePolicy logByteSizeMergePolicy = new LogDocMergePolicy();
    logByteSizeMergePolicy.setNoCFSRatio(0.0); // make sure we use plain
    // files//from w w w  .  j  a  va2  s. c  om
    conf.setMergePolicy(logByteSizeMergePolicy);

    final IndexWriter writer = new IndexWriter(dir, conf);
    return writer;
}

From source file:com.zimbra.cs.index.LuceneIndex.java

License:Open Source License

private IndexWriterConfig getWriterConfig() {
    IndexWriterConfig config = new IndexWriterConfig(VERSION, mailbox.index.getAnalyzer());
    config.setMergeScheduler(new MergeScheduler());
    config.setMaxBufferedDocs(LC.zimbra_index_lucene_max_buffered_docs.intValue());
    config.setRAMBufferSizeMB(LC.zimbra_index_lucene_ram_buffer_size_kb.intValue() / 1024.0);
    if (LC.zimbra_index_lucene_merge_policy.booleanValue()) {
        LogDocMergePolicy policy = new LogDocMergePolicy();
        config.setMergePolicy(policy);/*from  ww  w  . ja  v a  2  s . c o m*/
        policy.setUseCompoundFile(LC.zimbra_index_lucene_use_compound_file.booleanValue());
        policy.setMergeFactor(LC.zimbra_index_lucene_merge_factor.intValue());
        policy.setMinMergeDocs(LC.zimbra_index_lucene_min_merge.intValue());
        if (LC.zimbra_index_lucene_max_merge.intValue() != Integer.MAX_VALUE) {
            policy.setMaxMergeDocs(LC.zimbra_index_lucene_max_merge.intValue());
        }
    } else {
        LogByteSizeMergePolicy policy = new LogByteSizeMergePolicy();
        config.setMergePolicy(policy);
        policy.setUseCompoundFile(LC.zimbra_index_lucene_use_compound_file.booleanValue());
        policy.setMergeFactor(LC.zimbra_index_lucene_merge_factor.intValue());
        policy.setMinMergeMB(LC.zimbra_index_lucene_min_merge.intValue() / 1024.0);
        if (LC.zimbra_index_lucene_max_merge.intValue() != Integer.MAX_VALUE) {
            policy.setMaxMergeMB(LC.zimbra_index_lucene_max_merge.intValue() / 1024.0);
        }
    }
    return config;
}

From source file:de.csw.linkgenerator.plugin.lucene.IndexUpdater.java

License:Open Source License

private void openWriter(OpenMode openMode) {
    if (writer != null) {
        LOG.error("Writer already open and createWriter called");
        return;/*ww  w  . j av a2  s .c  om*/
    }

    try {
        // fix for windows by Daniel Cortes:
        //            FSDirectory f = FSDirectory.getDirectory(indexDir);
        IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_40, analyzer);
        conf.setOpenMode(openMode);

        // Ralph: This is kind of guesswork
        LogDocMergePolicy mergePolicy = new LogDocMergePolicy();
        mergePolicy.setUseCompoundFile(true);
        conf.setMergePolicy(mergePolicy);
        // writer = new IndexWriter (indexDir, analyzer, create);
        writer = new IndexWriter(indexDir, conf);
        //            writer.setUseCompoundFile(true);

        if (LOG.isDebugEnabled()) {
            LOG.debug("successfully opened index writer : " + indexDir);
        }
    } catch (IOException e) {
        LOG.error("IOException when opening Lucene Index for writing at " + indexDir, e);
    }
}

From source file:dk.defxws.fgslucene.OperationsImpl.java

License:Open Source License

private void getIndexWriter(String indexName) throws GenericSearchException {
    if (iw == null) {
        Directory dir;/*from  ww  w .  j  a  va2  s.c  o  m*/
        try {
            dir = new SimpleFSDirectory(new File(config.getIndexDir(indexName)));
        } catch (Exception e) {
            throw new GenericSearchException("IndexWriter new error indexName=" + indexName + " :\n", e);
        }
        IndexWriterConfig iwconfig = new IndexWriterConfig(Version.LUCENE_36, getQueryAnalyzer(indexName));
        int maxBufferedDocs = config.getMaxBufferedDocs(indexName);
        if (maxBufferedDocs > 0) {
            iwconfig.setMaxBufferedDocs(maxBufferedDocs);
        }
        int mergeFactor = config.getMergeFactor(indexName);
        if (mergeFactor > 0) {
            LogDocMergePolicy ldmp = new LogDocMergePolicy();
            ldmp.setMergeFactor(mergeFactor);
            iwconfig.setMergePolicy(ldmp);
        }
        long defaultWriteLockTimeout = config.getDefaultWriteLockTimeout(indexName);
        if (defaultWriteLockTimeout > 0) {
            IndexWriterConfig.setDefaultWriteLockTimeout(defaultWriteLockTimeout);
        }
        try {
            iw = new IndexWriter(dir, iwconfig);
        } catch (Exception e) {
            throw new GenericSearchException("IndexWriter new error indexName=" + indexName + " :\n", e);
        }
    }
    try {
        docCount = iw.numDocs();
    } catch (Exception e) {
        closeIndexWriter(indexName);
        throw new GenericSearchException("IndexWriter numDocs error indexName=" + indexName + " :\n", e);
    }
    if (logger.isDebugEnabled())
        logger.debug("getIndexWriter indexName=" + indexName + " docCount=" + docCount);
}