List of usage examples for org.apache.lucene.index IndexWriterConfig setRAMBufferSizeMB
@Override public IndexWriterConfig setRAMBufferSizeMB(double ramBufferSizeMB)
From source file:com.stratio.cassandra.lucene.service.LuceneIndex.java
License:Apache License
/** * Builds a new {@code RowDirectory} using the specified directory path and analyzer. * * @param keyspace The keyspace name. * @param table The table name. * @param name The index name. * @param path The path of the directory in where the Lucene files will be stored. * @param ramBufferMB The index writer buffer size in MB. * @param maxMergeMB NRTCachingDirectory max merge size in MB. * @param maxCachedMB NRTCachingDirectory max cached MB. * @param analyzer The default {@link Analyzer}. * @param refreshSeconds The index readers refresh time in seconds. Writings are not visible until this time. * @param refreshCallback A runnable to be run on index refresh. * @throws IOException If Lucene throws IO errors. *///from w w w . j av a 2 s.c om public LuceneIndex(String keyspace, String table, String name, Path path, Integer ramBufferMB, Integer maxMergeMB, Integer maxCachedMB, Analyzer analyzer, Double refreshSeconds, Runnable refreshCallback) throws IOException { this.path = path; this.refreshCallback = refreshCallback; this.logName = String.format("Lucene index %s.%s.%s", keyspace, table, name); // Open or create directory FSDirectory fsDirectory = FSDirectory.open(path); directory = new NRTCachingDirectory(fsDirectory, maxMergeMB, maxCachedMB); // Setup index writer IndexWriterConfig config = new IndexWriterConfig(analyzer); config.setRAMBufferSizeMB(ramBufferMB); config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); config.setUseCompoundFile(true); config.setMergePolicy(new TieredMergePolicy()); indexWriter = new IndexWriter(directory, config); // Setup NRT search SearcherFactory searcherFactory = new SearcherFactory() { public IndexSearcher newSearcher(IndexReader reader) throws IOException { LuceneIndex.this.refreshCallBack(); IndexSearcher searcher = new IndexSearcher(reader); searcher.setSimilarity(new NoIDFSimilarity()); return searcher; } }; TrackingIndexWriter trackingIndexWriter = new TrackingIndexWriter(indexWriter); searcherManager = new SearcherManager(indexWriter, true, searcherFactory); searcherReopener = new ControlledRealTimeReopenThread<>(trackingIndexWriter, searcherManager, refreshSeconds, refreshSeconds); searcherReopener.start(); // Start the refresher thread // Register JMX MBean try { objectName = new ObjectName( String.format("com.stratio.cassandra.lucene:type=LuceneIndexes,keyspace=%s,table=%s,index=%s", keyspace, table, name)); ManagementFactory.getPlatformMBeanServer().registerMBean(this, objectName); } catch (MBeanException | OperationsException e) { Log.error(e, "Error while registering MBean"); } }
From source file:com.tuplejump.stargate.lucene.BasicIndexer.java
License:Apache License
private IndexWriter getIndexWriter(Version luceneV) throws IOException { file = LuceneUtils.getDirectory(keyspaceName, cfName, indexName, vNodeName); IndexWriterConfig config = new IndexWriterConfig(luceneV, analyzer); config.setRAMBufferSizeMB(256); config.setOpenMode(OPEN_MODE);//from ww w. j a va 2s. c o m directory = FSDirectory.open(file); logger.warn( indexName + " SG Index - Opened dir[" + file.getAbsolutePath() + "] - Openmode[" + OPEN_MODE + "]"); return new IndexWriter(directory, config); }
From source file:com.tuplejump.stargate.lucene.NearRealTimeIndexer.java
License:Apache License
private IndexWriter getIndexWriter(Version luceneV) throws IOException { file = Utils.getDirectory(keyspaceName, cfName, indexName, vNodeName); IndexWriterConfig config = new IndexWriterConfig(luceneV, analyzer); config.setRAMBufferSizeMB(256); config.setOpenMode(OPEN_MODE);//from ww w . j a v a 2 s. co m directory = new NRTCachingDirectory(FSDirectory.open(file), 100, 100); logger.warn( indexName + " SG Index - Opened dir[" + file.getAbsolutePath() + "] - Openmode[" + OPEN_MODE + "]"); return new IndexWriter(directory, config); }
From source file:com.twentyn.patentSearch.DocumentIndexer.java
License:Open Source License
public static void main(String[] args) throws Exception { System.out.println("Starting up..."); System.out.flush();// w ww. j av a 2s .c o m Options opts = new Options(); opts.addOption(Option.builder("i").longOpt("input").hasArg().required() .desc("Input file or directory to index").build()); opts.addOption(Option.builder("x").longOpt("index").hasArg().required() .desc("Path to index file to generate").build()); opts.addOption(Option.builder("h").longOpt("help").desc("Print this help message and exit").build()); opts.addOption(Option.builder("v").longOpt("verbose").desc("Print verbose log output").build()); HelpFormatter helpFormatter = new HelpFormatter(); CommandLineParser cmdLineParser = new DefaultParser(); CommandLine cmdLine = null; try { cmdLine = cmdLineParser.parse(opts, args); } catch (ParseException e) { System.out.println("Caught exception when parsing command line: " + e.getMessage()); helpFormatter.printHelp("DocumentIndexer", opts); System.exit(1); } if (cmdLine.hasOption("help")) { helpFormatter.printHelp("DocumentIndexer", opts); System.exit(0); } if (cmdLine.hasOption("verbose")) { // With help from http://stackoverflow.com/questions/23434252/programmatically-change-log-level-in-log4j2 LoggerContext ctx = (LoggerContext) LogManager.getContext(false); Configuration ctxConfig = ctx.getConfiguration(); LoggerConfig logConfig = ctxConfig.getLoggerConfig(LogManager.ROOT_LOGGER_NAME); logConfig.setLevel(Level.DEBUG); ctx.updateLoggers(); LOGGER.debug("Verbose logging enabled"); } LOGGER.info("Opening index at " + cmdLine.getOptionValue("index")); Directory indexDir = FSDirectory.open(new File(cmdLine.getOptionValue("index")).toPath()); /* The standard analyzer is too aggressive with chemical entities (it strips structural annotations, for one * thing), and the whitespace analyzer doesn't do any case normalization or stop word elimination. This custom * analyzer appears to treat chemical entities better than the standard analyzer without admitting too much * cruft to the index. */ Analyzer analyzer = CustomAnalyzer.builder().withTokenizer("whitespace").addTokenFilter("lowercase") .addTokenFilter("stop").build(); IndexWriterConfig writerConfig = new IndexWriterConfig(analyzer); writerConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); writerConfig.setRAMBufferSizeMB(1 << 10); IndexWriter indexWriter = new IndexWriter(indexDir, writerConfig); String inputFileOrDir = cmdLine.getOptionValue("input"); File splitFileOrDir = new File(inputFileOrDir); if (!(splitFileOrDir.exists())) { LOGGER.error("Unable to find directory at " + inputFileOrDir); System.exit(1); } DocumentIndexer indexer = new DocumentIndexer(indexWriter); PatentCorpusReader corpusReader = new PatentCorpusReader(indexer, splitFileOrDir); corpusReader.readPatentCorpus(); indexer.commitAndClose(); }
From source file:com.vmware.dcp.services.common.LuceneBlobIndexService.java
License:Open Source License
public IndexWriter createWriter(File directory) throws IOException { Directory dir = MMapDirectory.open(directory.toPath()); Analyzer analyzer = new SimpleAnalyzer(); IndexWriterConfig iwc = new IndexWriterConfig(analyzer); if (this.indexOptions.contains(BlobIndexOption.CREATE)) { iwc.setOpenMode(OpenMode.CREATE); } else {//from www . ja va 2s .c o m iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); } Long totalMBs = getHost().getServiceMemoryLimitMB(getSelfLink(), MemoryLimitType.EXACT); if (totalMBs != null) { totalMBs = Math.max(1, totalMBs); iwc.setRAMBufferSizeMB(totalMBs); } IndexWriter w = new IndexWriter(dir, iwc); w.commit(); return w; }
From source file:com.vmware.dcp.services.common.LuceneDocumentIndexService.java
License:Open Source License
public IndexWriter createWriter(File directory, boolean doUpgrade) throws Exception { Directory dir = MMapDirectory.open(directory.toPath()); Analyzer analyzer = new SimpleAnalyzer(); // Upgrade the index in place if necessary. if (doUpgrade && DirectoryReader.indexExists(dir)) { upgradeIndex(dir);/*from w w w . j a v a2 s .c o m*/ } IndexWriterConfig iwc = new IndexWriterConfig(analyzer); iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); iwc.setIndexDeletionPolicy(new SnapshotDeletionPolicy(new KeepOnlyLastCommitDeletionPolicy())); Long totalMBs = getHost().getServiceMemoryLimitMB(getSelfLink(), MemoryLimitType.EXACT); if (totalMBs != null) { // give half to the index, the other half we keep for service caching context totalMBs = Math.max(1, totalMBs / 2); iwc.setRAMBufferSizeMB(totalMBs); } this.writer = new IndexWriter(dir, iwc); this.writer.commit(); this.indexUpdateTimeMicros = Utils.getNowMicrosUtc(); this.indexWriterCreationTimeMicros = this.indexUpdateTimeMicros; return this.writer; }
From source file:com.vmware.demo.sgf.lucene.impl.LuceneGemFireRepositoryImpl.java
License:Apache License
/** * Set up stores for indexes//ww w.j ava2 s . c o m */ public void initialize() { if (!hasInitialized) { try { directory = new NIOFSDirectory(new File(filepath), new SimpleFSLockFactory()); analyzer = new StandardAnalyzer(Version.LUCENE_40); IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_40, analyzer); conf.setRAMBufferSizeMB(ramsize); conf.setOpenMode(OpenMode.CREATE_OR_APPEND); conf.setWriteLockTimeout(2000); indexWriter = new IndexWriter(directory, conf); // Open a reader/searcher searchManager = new SearcherManager(indexWriter, applyAllDeletes, new SearcherFactory()); hasInitialized = true; } catch (IOException e) { e.printStackTrace(); } } }
From source file:com.vmware.xenon.services.common.LuceneDocumentIndexService.java
License:Open Source License
public IndexWriter createWriter(File directory, boolean doUpgrade) throws Exception { Analyzer analyzer = new SimpleAnalyzer(); IndexWriterConfig iwc = new IndexWriterConfig(analyzer); Long totalMBs = getHost().getServiceMemoryLimitMB(getSelfLink(), MemoryLimitType.EXACT); if (totalMBs != null) { long cacheSizeMB = (totalMBs * 3) / 4; cacheSizeMB = Math.max(1, cacheSizeMB); iwc.setRAMBufferSizeMB(cacheSizeMB); this.linkAccessMemoryLimitMB = totalMBs / 4; }//from w ww .j a v a 2s. c om Directory dir = MMapDirectory.open(directory.toPath()); // Upgrade the index in place if necessary. if (doUpgrade && DirectoryReader.indexExists(dir)) { upgradeIndex(dir); } iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); iwc.setIndexDeletionPolicy(new SnapshotDeletionPolicy(new KeepOnlyLastCommitDeletionPolicy())); IndexWriter w = new IndexWriter(dir, iwc); w.commit(); synchronized (this.searchSync) { this.writer = w; this.linkAccessTimes.clear(); this.indexUpdateTimeMicros = Utils.getNowMicrosUtc(); this.indexWriterCreationTimeMicros = this.indexUpdateTimeMicros; } return this.writer; }
From source file:com.zimbra.cs.index.LuceneIndex.java
License:Open Source License
private IndexWriterConfig getWriterConfig() { IndexWriterConfig config = new IndexWriterConfig(VERSION, mailbox.index.getAnalyzer()); config.setMergeScheduler(new MergeScheduler()); config.setMaxBufferedDocs(LC.zimbra_index_lucene_max_buffered_docs.intValue()); config.setRAMBufferSizeMB(LC.zimbra_index_lucene_ram_buffer_size_kb.intValue() / 1024.0); if (LC.zimbra_index_lucene_merge_policy.booleanValue()) { LogDocMergePolicy policy = new LogDocMergePolicy(); config.setMergePolicy(policy);/*from w w w . j a v a 2 s. c o m*/ policy.setUseCompoundFile(LC.zimbra_index_lucene_use_compound_file.booleanValue()); policy.setMergeFactor(LC.zimbra_index_lucene_merge_factor.intValue()); policy.setMinMergeDocs(LC.zimbra_index_lucene_min_merge.intValue()); if (LC.zimbra_index_lucene_max_merge.intValue() != Integer.MAX_VALUE) { policy.setMaxMergeDocs(LC.zimbra_index_lucene_max_merge.intValue()); } } else { LogByteSizeMergePolicy policy = new LogByteSizeMergePolicy(); config.setMergePolicy(policy); policy.setUseCompoundFile(LC.zimbra_index_lucene_use_compound_file.booleanValue()); policy.setMergeFactor(LC.zimbra_index_lucene_merge_factor.intValue()); policy.setMinMergeMB(LC.zimbra_index_lucene_min_merge.intValue() / 1024.0); if (LC.zimbra_index_lucene_max_merge.intValue() != Integer.MAX_VALUE) { policy.setMaxMergeMB(LC.zimbra_index_lucene_max_merge.intValue() / 1024.0); } } return config; }
From source file:de.citec.lucene.CreateIndex.java
public static void main(String[] args) throws IOException { Analyzer analyzer = null;/*from w w w . j a v a 2 s.c o m*/ List<String> files = new ArrayList<>(); files.add("/Users/swalter/Documents/EsaDeutsch/new_copus_german.txt"); String indexPath = "/Users/swalter/Documents/EsaDeutsch/Index/"; Language language = Language.DE; Directory dir = FSDirectory.open(Paths.get(indexPath)); if (language.equals(Language.DE)) analyzer = new GermanAnalyzer(); if (language.equals(Language.ES)) analyzer = new SpanishAnalyzer(); if (language.equals(Language.EN)) analyzer = new EnglishAnalyzer(); IndexWriterConfig iwc = new IndexWriterConfig(analyzer); iwc.setOpenMode(OpenMode.CREATE); iwc.setRAMBufferSizeMB(12000); try (IndexWriter writer = new IndexWriter(dir, iwc)) { files.forEach(f -> { try { indexDocs(writer, Paths.get(f)); } catch (IOException ex) { Logger.getLogger(CreateIndex.class.getName()).log(Level.SEVERE, null, ex); } }); } System.out.println(counter); }