Example usage for org.apache.lucene.index IndexWriterConfig setRAMBufferSizeMB

List of usage examples for org.apache.lucene.index IndexWriterConfig setRAMBufferSizeMB

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexWriterConfig setRAMBufferSizeMB.

Prototype

@Override
    public IndexWriterConfig setRAMBufferSizeMB(double ramBufferSizeMB) 

Source Link

Usage

From source file:com.stratio.cassandra.lucene.service.LuceneIndex.java

License:Apache License

/**
 * Builds a new {@code RowDirectory} using the specified directory path and analyzer.
 *
 * @param keyspace        The keyspace name.
 * @param table           The table name.
 * @param name            The index name.
 * @param path            The path of the directory in where the Lucene files will be stored.
 * @param ramBufferMB     The index writer buffer size in MB.
 * @param maxMergeMB      NRTCachingDirectory max merge size in MB.
 * @param maxCachedMB     NRTCachingDirectory max cached MB.
 * @param analyzer        The default {@link Analyzer}.
 * @param refreshSeconds  The index readers refresh time in seconds. Writings are not visible until this time.
 * @param refreshCallback A runnable to be run on index refresh.
 * @throws IOException If Lucene throws IO errors.
 *///from   w w w .  j av a  2  s.c om
public LuceneIndex(String keyspace, String table, String name, Path path, Integer ramBufferMB,
        Integer maxMergeMB, Integer maxCachedMB, Analyzer analyzer, Double refreshSeconds,
        Runnable refreshCallback) throws IOException {
    this.path = path;
    this.refreshCallback = refreshCallback;
    this.logName = String.format("Lucene index %s.%s.%s", keyspace, table, name);

    // Open or create directory
    FSDirectory fsDirectory = FSDirectory.open(path);
    directory = new NRTCachingDirectory(fsDirectory, maxMergeMB, maxCachedMB);

    // Setup index writer
    IndexWriterConfig config = new IndexWriterConfig(analyzer);
    config.setRAMBufferSizeMB(ramBufferMB);
    config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
    config.setUseCompoundFile(true);
    config.setMergePolicy(new TieredMergePolicy());
    indexWriter = new IndexWriter(directory, config);

    // Setup NRT search
    SearcherFactory searcherFactory = new SearcherFactory() {
        public IndexSearcher newSearcher(IndexReader reader) throws IOException {
            LuceneIndex.this.refreshCallBack();
            IndexSearcher searcher = new IndexSearcher(reader);
            searcher.setSimilarity(new NoIDFSimilarity());
            return searcher;
        }
    };
    TrackingIndexWriter trackingIndexWriter = new TrackingIndexWriter(indexWriter);
    searcherManager = new SearcherManager(indexWriter, true, searcherFactory);
    searcherReopener = new ControlledRealTimeReopenThread<>(trackingIndexWriter, searcherManager,
            refreshSeconds, refreshSeconds);
    searcherReopener.start(); // Start the refresher thread

    // Register JMX MBean
    try {
        objectName = new ObjectName(
                String.format("com.stratio.cassandra.lucene:type=LuceneIndexes,keyspace=%s,table=%s,index=%s",
                        keyspace, table, name));
        ManagementFactory.getPlatformMBeanServer().registerMBean(this, objectName);
    } catch (MBeanException | OperationsException e) {
        Log.error(e, "Error while registering MBean");
    }
}

From source file:com.tuplejump.stargate.lucene.BasicIndexer.java

License:Apache License

private IndexWriter getIndexWriter(Version luceneV) throws IOException {
    file = LuceneUtils.getDirectory(keyspaceName, cfName, indexName, vNodeName);
    IndexWriterConfig config = new IndexWriterConfig(luceneV, analyzer);
    config.setRAMBufferSizeMB(256);
    config.setOpenMode(OPEN_MODE);//from   ww  w.  j  a  va  2s.  c o m
    directory = FSDirectory.open(file);
    logger.warn(
            indexName + " SG Index - Opened dir[" + file.getAbsolutePath() + "] - Openmode[" + OPEN_MODE + "]");
    return new IndexWriter(directory, config);
}

From source file:com.tuplejump.stargate.lucene.NearRealTimeIndexer.java

License:Apache License

private IndexWriter getIndexWriter(Version luceneV) throws IOException {
    file = Utils.getDirectory(keyspaceName, cfName, indexName, vNodeName);
    IndexWriterConfig config = new IndexWriterConfig(luceneV, analyzer);
    config.setRAMBufferSizeMB(256);
    config.setOpenMode(OPEN_MODE);//from   ww  w  . j a v  a  2 s. co m
    directory = new NRTCachingDirectory(FSDirectory.open(file), 100, 100);
    logger.warn(
            indexName + " SG Index - Opened dir[" + file.getAbsolutePath() + "] - Openmode[" + OPEN_MODE + "]");
    return new IndexWriter(directory, config);
}

From source file:com.twentyn.patentSearch.DocumentIndexer.java

License:Open Source License

public static void main(String[] args) throws Exception {
    System.out.println("Starting up...");
    System.out.flush();//  w ww.  j av a  2s .c  o m
    Options opts = new Options();
    opts.addOption(Option.builder("i").longOpt("input").hasArg().required()
            .desc("Input file or directory to index").build());
    opts.addOption(Option.builder("x").longOpt("index").hasArg().required()
            .desc("Path to index file to generate").build());
    opts.addOption(Option.builder("h").longOpt("help").desc("Print this help message and exit").build());
    opts.addOption(Option.builder("v").longOpt("verbose").desc("Print verbose log output").build());

    HelpFormatter helpFormatter = new HelpFormatter();
    CommandLineParser cmdLineParser = new DefaultParser();
    CommandLine cmdLine = null;
    try {
        cmdLine = cmdLineParser.parse(opts, args);
    } catch (ParseException e) {
        System.out.println("Caught exception when parsing command line: " + e.getMessage());
        helpFormatter.printHelp("DocumentIndexer", opts);
        System.exit(1);
    }

    if (cmdLine.hasOption("help")) {
        helpFormatter.printHelp("DocumentIndexer", opts);
        System.exit(0);
    }

    if (cmdLine.hasOption("verbose")) {
        // With help from http://stackoverflow.com/questions/23434252/programmatically-change-log-level-in-log4j2
        LoggerContext ctx = (LoggerContext) LogManager.getContext(false);
        Configuration ctxConfig = ctx.getConfiguration();
        LoggerConfig logConfig = ctxConfig.getLoggerConfig(LogManager.ROOT_LOGGER_NAME);
        logConfig.setLevel(Level.DEBUG);

        ctx.updateLoggers();
        LOGGER.debug("Verbose logging enabled");
    }

    LOGGER.info("Opening index at " + cmdLine.getOptionValue("index"));
    Directory indexDir = FSDirectory.open(new File(cmdLine.getOptionValue("index")).toPath());

    /* The standard analyzer is too aggressive with chemical entities (it strips structural annotations, for one
     * thing), and the whitespace analyzer doesn't do any case normalization or stop word elimination.  This custom
     * analyzer appears to treat chemical entities better than the standard analyzer without admitting too much
     * cruft to the index. */
    Analyzer analyzer = CustomAnalyzer.builder().withTokenizer("whitespace").addTokenFilter("lowercase")
            .addTokenFilter("stop").build();

    IndexWriterConfig writerConfig = new IndexWriterConfig(analyzer);
    writerConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
    writerConfig.setRAMBufferSizeMB(1 << 10);
    IndexWriter indexWriter = new IndexWriter(indexDir, writerConfig);

    String inputFileOrDir = cmdLine.getOptionValue("input");
    File splitFileOrDir = new File(inputFileOrDir);
    if (!(splitFileOrDir.exists())) {
        LOGGER.error("Unable to find directory at " + inputFileOrDir);
        System.exit(1);
    }

    DocumentIndexer indexer = new DocumentIndexer(indexWriter);
    PatentCorpusReader corpusReader = new PatentCorpusReader(indexer, splitFileOrDir);
    corpusReader.readPatentCorpus();
    indexer.commitAndClose();
}

From source file:com.vmware.dcp.services.common.LuceneBlobIndexService.java

License:Open Source License

public IndexWriter createWriter(File directory) throws IOException {
    Directory dir = MMapDirectory.open(directory.toPath());
    Analyzer analyzer = new SimpleAnalyzer();
    IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
    if (this.indexOptions.contains(BlobIndexOption.CREATE)) {
        iwc.setOpenMode(OpenMode.CREATE);
    } else {//from   www .  ja va 2s  .c  o  m
        iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
    }
    Long totalMBs = getHost().getServiceMemoryLimitMB(getSelfLink(), MemoryLimitType.EXACT);
    if (totalMBs != null) {
        totalMBs = Math.max(1, totalMBs);
        iwc.setRAMBufferSizeMB(totalMBs);
    }
    IndexWriter w = new IndexWriter(dir, iwc);
    w.commit();
    return w;
}

From source file:com.vmware.dcp.services.common.LuceneDocumentIndexService.java

License:Open Source License

public IndexWriter createWriter(File directory, boolean doUpgrade) throws Exception {
    Directory dir = MMapDirectory.open(directory.toPath());
    Analyzer analyzer = new SimpleAnalyzer();

    // Upgrade the index in place if necessary.
    if (doUpgrade && DirectoryReader.indexExists(dir)) {
        upgradeIndex(dir);/*from w  w  w  . j  a v  a2  s .c o  m*/
    }

    IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
    iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
    iwc.setIndexDeletionPolicy(new SnapshotDeletionPolicy(new KeepOnlyLastCommitDeletionPolicy()));
    Long totalMBs = getHost().getServiceMemoryLimitMB(getSelfLink(), MemoryLimitType.EXACT);
    if (totalMBs != null) {
        // give half to the index, the other half we keep for service caching context
        totalMBs = Math.max(1, totalMBs / 2);
        iwc.setRAMBufferSizeMB(totalMBs);
    }

    this.writer = new IndexWriter(dir, iwc);
    this.writer.commit();
    this.indexUpdateTimeMicros = Utils.getNowMicrosUtc();
    this.indexWriterCreationTimeMicros = this.indexUpdateTimeMicros;
    return this.writer;
}

From source file:com.vmware.demo.sgf.lucene.impl.LuceneGemFireRepositoryImpl.java

License:Apache License

/**
 * Set up stores for indexes//ww  w.j ava2 s .  c o m
 */
public void initialize() {
    if (!hasInitialized) {
        try {
            directory = new NIOFSDirectory(new File(filepath), new SimpleFSLockFactory());
            analyzer = new StandardAnalyzer(Version.LUCENE_40);

            IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_40, analyzer);
            conf.setRAMBufferSizeMB(ramsize);
            conf.setOpenMode(OpenMode.CREATE_OR_APPEND);
            conf.setWriteLockTimeout(2000);
            indexWriter = new IndexWriter(directory, conf);

            // Open a reader/searcher
            searchManager = new SearcherManager(indexWriter, applyAllDeletes, new SearcherFactory());

            hasInitialized = true;

        } catch (IOException e) {
            e.printStackTrace();
        }
    }
}

From source file:com.vmware.xenon.services.common.LuceneDocumentIndexService.java

License:Open Source License

public IndexWriter createWriter(File directory, boolean doUpgrade) throws Exception {
    Analyzer analyzer = new SimpleAnalyzer();
    IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
    Long totalMBs = getHost().getServiceMemoryLimitMB(getSelfLink(), MemoryLimitType.EXACT);
    if (totalMBs != null) {
        long cacheSizeMB = (totalMBs * 3) / 4;
        cacheSizeMB = Math.max(1, cacheSizeMB);
        iwc.setRAMBufferSizeMB(cacheSizeMB);
        this.linkAccessMemoryLimitMB = totalMBs / 4;
    }//from w ww  .j a v  a 2s.  c  om

    Directory dir = MMapDirectory.open(directory.toPath());

    // Upgrade the index in place if necessary.
    if (doUpgrade && DirectoryReader.indexExists(dir)) {
        upgradeIndex(dir);
    }

    iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
    iwc.setIndexDeletionPolicy(new SnapshotDeletionPolicy(new KeepOnlyLastCommitDeletionPolicy()));

    IndexWriter w = new IndexWriter(dir, iwc);
    w.commit();

    synchronized (this.searchSync) {
        this.writer = w;
        this.linkAccessTimes.clear();
        this.indexUpdateTimeMicros = Utils.getNowMicrosUtc();
        this.indexWriterCreationTimeMicros = this.indexUpdateTimeMicros;
    }
    return this.writer;
}

From source file:com.zimbra.cs.index.LuceneIndex.java

License:Open Source License

private IndexWriterConfig getWriterConfig() {
    IndexWriterConfig config = new IndexWriterConfig(VERSION, mailbox.index.getAnalyzer());
    config.setMergeScheduler(new MergeScheduler());
    config.setMaxBufferedDocs(LC.zimbra_index_lucene_max_buffered_docs.intValue());
    config.setRAMBufferSizeMB(LC.zimbra_index_lucene_ram_buffer_size_kb.intValue() / 1024.0);
    if (LC.zimbra_index_lucene_merge_policy.booleanValue()) {
        LogDocMergePolicy policy = new LogDocMergePolicy();
        config.setMergePolicy(policy);/*from w  w w  . j  a  v a  2  s.  c o  m*/
        policy.setUseCompoundFile(LC.zimbra_index_lucene_use_compound_file.booleanValue());
        policy.setMergeFactor(LC.zimbra_index_lucene_merge_factor.intValue());
        policy.setMinMergeDocs(LC.zimbra_index_lucene_min_merge.intValue());
        if (LC.zimbra_index_lucene_max_merge.intValue() != Integer.MAX_VALUE) {
            policy.setMaxMergeDocs(LC.zimbra_index_lucene_max_merge.intValue());
        }
    } else {
        LogByteSizeMergePolicy policy = new LogByteSizeMergePolicy();
        config.setMergePolicy(policy);
        policy.setUseCompoundFile(LC.zimbra_index_lucene_use_compound_file.booleanValue());
        policy.setMergeFactor(LC.zimbra_index_lucene_merge_factor.intValue());
        policy.setMinMergeMB(LC.zimbra_index_lucene_min_merge.intValue() / 1024.0);
        if (LC.zimbra_index_lucene_max_merge.intValue() != Integer.MAX_VALUE) {
            policy.setMaxMergeMB(LC.zimbra_index_lucene_max_merge.intValue() / 1024.0);
        }
    }
    return config;
}

From source file:de.citec.lucene.CreateIndex.java

public static void main(String[] args) throws IOException {
    Analyzer analyzer = null;/*from   w w  w  . j a  v a 2  s.c o m*/

    List<String> files = new ArrayList<>();
    files.add("/Users/swalter/Documents/EsaDeutsch/new_copus_german.txt");
    String indexPath = "/Users/swalter/Documents/EsaDeutsch/Index/";
    Language language = Language.DE;
    Directory dir = FSDirectory.open(Paths.get(indexPath));

    if (language.equals(Language.DE))
        analyzer = new GermanAnalyzer();
    if (language.equals(Language.ES))
        analyzer = new SpanishAnalyzer();
    if (language.equals(Language.EN))
        analyzer = new EnglishAnalyzer();

    IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
    iwc.setOpenMode(OpenMode.CREATE);
    iwc.setRAMBufferSizeMB(12000);
    try (IndexWriter writer = new IndexWriter(dir, iwc)) {
        files.forEach(f -> {
            try {
                indexDocs(writer, Paths.get(f));
            } catch (IOException ex) {
                Logger.getLogger(CreateIndex.class.getName()).log(Level.SEVERE, null, ex);
            }
        });

    }
    System.out.println(counter);

}