List of usage examples for org.apache.lucene.index LogDocMergePolicy LogDocMergePolicy
public LogDocMergePolicy()
From source file:DVBench.java
License:Apache License
static void doBench(int bpv) throws Exception { File file = new File("/data/indices/dvbench"); file.mkdirs();/*w w w . j a va2s .c o m*/ Directory dir = FSDirectory.open(file); IndexWriterConfig config = new IndexWriterConfig(null); config.setOpenMode(OpenMode.CREATE); config.setMergeScheduler(new SerialMergeScheduler()); config.setMergePolicy(new LogDocMergePolicy()); config.setMaxBufferedDocs(25000); IndexWriter writer = new IndexWriter(dir, config); MyRandom r = new MyRandom(); int numdocs = 400000; Document doc = new Document(); Field dv = new NumericDocValuesField("dv", 0); Field inv = new LongField("inv", 0, Field.Store.NO); Field boxed = new BinaryDocValuesField("boxed", new BytesRef(8)); Field boxed2 = new BinaryDocValuesField("boxed2", new BytesRef(8)); doc.add(dv); doc.add(inv); doc.add(boxed); doc.add(boxed2); for (int i = 0; i < numdocs; i++) { // defeat blockpackedwriter final long value; if (i % 8192 == 0) { value = bpv == 64 ? Long.MIN_VALUE : 0; } else if (i % 8192 == 1) { value = bpv == 64 ? Long.MAX_VALUE : (1L << bpv) - 1; } else { value = r.nextLong(bpv); } dv.setLongValue(value); inv.setLongValue(value); box(value, boxed.binaryValue()); box(value, boxed2.binaryValue()); boxed2.binaryValue().length = (bpv + 7) / 8; // fixed length writer.addDocument(doc); } writer.close(); // run dv search tests String description = "dv (bpv=" + bpv + ")"; DirectoryReader reader = DirectoryReader.open(dir); IndexSearcher searcher = new IndexSearcher(reader); searcher.setQueryCache(null); // don't bench the cache int hash = 0; // warmup hash += search(description, searcher, "dv", 300, true); hash += search(description, searcher, "dv", 300, false); // Uninverting Map<String, UninvertingReader.Type> mapping = Collections.singletonMap("inv", UninvertingReader.Type.LONG); DirectoryReader uninv = UninvertingReader.wrap(reader, mapping); IndexSearcher searcher2 = new IndexSearcher(uninv); searcher2.setQueryCache(null); // don't bench the cache description = "fc (bpv=" + bpv + ")"; // warmup hash += search(description, searcher2, "inv", 300, true); hash += search(description, searcher2, "inv", 300, false); // Boxed inside binary DirectoryReader boxedReader = new BinaryAsVLongReader(reader); IndexSearcher searcher3 = new IndexSearcher(boxedReader); searcher3.setQueryCache(null); // don't bench the cache description = "boxed (bpv=" + bpv + ")"; // warmup hash += search(description, searcher3, "boxed", 300, true); hash += search(description, searcher3, "boxed", 300, false); description = "boxed fixed-length (bpv=" + bpv + ")"; // warmup hash += search(description, searcher3, "boxed2", 300, true); hash += search(description, searcher3, "boxed2", 300, false); if (hash == 3) { // wont happen System.out.println("hash=" + hash); } reader.close(); dir.close(); }
From source file:IndexAndSearchOpenStreetMaps1D.java
License:Apache License
private static void createIndex() throws IOException { long t0 = System.nanoTime(); CharsetDecoder decoder = StandardCharsets.UTF_8.newDecoder().onMalformedInput(CodingErrorAction.REPORT) .onUnmappableCharacter(CodingErrorAction.REPORT); int BUFFER_SIZE = 1 << 16; // 64K InputStream is = Files .newInputStream(Paths.get("/lucenedata/open-street-maps/latlon.subsetPlusAllLondon.txt")); BufferedReader reader = new BufferedReader(new InputStreamReader(is, decoder), BUFFER_SIZE); Directory dir = FSDirectory.open(Paths.get("/c/tmp/bkdtest1d" + (USE_NF ? "_nf" : ""))); IndexWriterConfig iwc = new IndexWriterConfig(null); iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE); //iwc.setMaxBufferedDocs(109630); //iwc.setRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH); iwc.setRAMBufferSizeMB(256.0);//from ww w . j a va 2 s . co m iwc.setMergePolicy(new LogDocMergePolicy()); iwc.setMergeScheduler(new SerialMergeScheduler()); iwc.setInfoStream(new PrintStreamInfoStream(System.out)); IndexWriter w = new IndexWriter(dir, iwc); int count = 0; byte[] scratch = new byte[4]; while (true) { String line = reader.readLine(); if (line == null) { break; } String[] parts = line.split(","); //long id = Long.parseLong(parts[0]); int lat = (int) (1000000. * Double.parseDouble(parts[1])); //int lon = (int) (1000000. * Double.parseDouble(parts[2])); Document doc = new Document(); if (USE_NF) { doc.add(new LegacyIntField("latnum", lat, Field.Store.NO)); //doc.add(new LongField("lonnum", lon, Field.Store.NO)); } else { doc.add(new IntPoint("lat", lat)); //doc.add(new SortedNumericDocValuesField("lon", lon)); } w.addDocument(doc); count++; if (count % 1000000 == 0) { System.out.println(count + "..."); } } //w.forceMerge(1); w.commit(); System.out.println(w.maxDoc() + " total docs"); w.close(); long t1 = System.nanoTime(); System.out.println(((t1 - t0) / 1000000000.0) + " sec to build index"); }
From source file:com.aliasi.lingmed.medline.IndexMedline.java
License:Lingpipe license
/** * Run the command. See class documentation above for details on * arguments and behavior./*from www . j av a 2s . c om*/ */ public void run() { System.out.println("start run"); try { File[] files = getLaterFiles(mDistDir); System.out.println("Total files to process: " + files.length); System.out.println("File names: " + java.util.Arrays.asList(files)); // if (mLogger.isDebugEnabled()) // mLogger.debug("File names: " + java.util.Arrays.asList(files)); if (files.length > 0) { MedlineParser parser = new MedlineParser(true); // true = save raw XML Directory fsDir = FSDirectory.open(mIndex); IndexWriterConfig iwConf = new IndexWriterConfig(Version.LUCENE_36, mCodec.getAnalyzer()); iwConf.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); iwConf.setRAMBufferSizeMB(RAM_BUF_SIZE); if (sIsBaseline) { LogDocMergePolicy ldmp = new LogDocMergePolicy(); ldmp.setMergeFactor(MERGE_FACTOR_HI); iwConf.setMergePolicy(ldmp); } IndexWriter indexWriter = new IndexWriter(fsDir, iwConf); for (File file : files) { System.out.println("processing file: " + file); MedlineIndexer indexer = new MedlineIndexer(indexWriter, mCodec); parser.setHandler(indexer); parseFile(parser, file); indexer.close(); recordFile(indexWriter, file.getName()); System.out.println("completed processing file: " + file); } System.out.println("All files parsed, now optimize index"); indexWriter.forceMerge(1); indexWriter.commit(); indexWriter.close(); } System.out.println("Processing complete."); } catch (Exception e) { // mLogger.warn("Unexpected Exception: "+e.getMessage()); // mLogger.warn("stack trace: "+Logging.logStackTrace(e)); // mLogger.warn("Aborting this run"); IllegalStateException e2 = new IllegalStateException(e.getMessage()); e2.setStackTrace(e.getStackTrace()); throw e2; } }
From source file:com.edgenius.wiki.search.lucene.SimpleIndexFactory.java
License:Open Source License
private IndexWriterConfig getIndexWriterConfig() { IndexWriterConfig conf = new IndexWriterConfig(LuceneConfig.VERSION, analyzerProvider.getIndexAnalyzer()); conf.setMaxBufferedDocs(maxBufferedDocs); conf.setTermIndexInterval(termIndexInterval); conf.setWriteLockTimeout(writeLockTimeout); LogMergePolicy mergePolicy = new LogDocMergePolicy(); mergePolicy.setUseCompoundFile(useCompoundFile); mergePolicy.setMaxMergeDocs(maxMergeDocs); mergePolicy.setMergeFactor(mergeFactor); conf.setMergePolicy(mergePolicy);/* ww w . j a va2 s .c o m*/ return conf; }
From source file:com.impetus.kundera.index.LuceneIndexer.java
License:Apache License
/** * Instantiates a new lucene indexer.// w w w .j a va2s. c o m * * @param analyzer * the analyzer * @param lucDirPath * the luc dir path */ private LuceneIndexer(String lucDirPath) { try { luceneDirPath = lucDirPath; File file = new File(luceneDirPath); if (file.exists()) { FSDirectory sourceDir = FSDirectory.open(getIndexDirectory().toPath()); // TODO initialize context. index = new RAMDirectory(sourceDir, IOContext.DEFAULT); } else { index = new RAMDirectory(); } /* * FSDirectory.open(getIndexDirectory( )) */ // isInitialized /* writer */ IndexWriterConfig indexWriterConfig = new IndexWriterConfig(analyzer); LogDocMergePolicy logDocMergePolicy = new LogDocMergePolicy(); logDocMergePolicy.setMergeFactor(1000); indexWriterConfig.setMergePolicy(logDocMergePolicy); w = new IndexWriter(index, indexWriterConfig); w.getConfig().setRAMBufferSizeMB(32); } catch (Exception e) { log.error("Error while instantiating LuceneIndexer, Caused by :.", e); throw new LuceneIndexingException(e); } }
From source file:com.impetus.kundera.index.LuceneIndexer.java
License:Apache License
@Override public final void unindex(EntityMetadata metadata, Object id, KunderaMetadata kunderaMetadata, Class<?> parentClazz) throws LuceneIndexingException { if (log.isDebugEnabled()) log.debug("Unindexing @Entity[{}] for key:{}", metadata.getEntityClazz().getName(), id); String luceneQuery = null;/*w w w . ja va 2 s.c o m*/ boolean isEmbeddedId = false; MetamodelImpl metaModel = null; if (kunderaMetadata != null && metadata != null) { metaModel = (MetamodelImpl) kunderaMetadata.getApplicationMetadata() .getMetamodel(metadata.getPersistenceUnit()); isEmbeddedId = metaModel.isEmbeddable(metadata.getIdAttribute().getBindableJavaType()); } try { QueryParser qp = new QueryParser(DEFAULT_SEARCHABLE_FIELD, new StandardAnalyzer()); qp.setLowercaseExpandedTerms(false); qp.setAllowLeadingWildcard(true); luceneQuery = getLuceneQuery(metadata, id, isEmbeddedId, metaModel, parentClazz); Query q = qp.parse(luceneQuery); w.deleteDocuments(q); w.commit(); w.close(); IndexWriterConfig indexWriterConfig = new IndexWriterConfig(analyzer); LogDocMergePolicy logDocMergePolicy = new LogDocMergePolicy(); logDocMergePolicy.setMergeFactor(1000); indexWriterConfig.setMergePolicy(logDocMergePolicy); w = new IndexWriter(index, indexWriterConfig); w.getConfig().setRAMBufferSizeMB(32); // flushInternal(); } catch (Exception e) { log.error("Error while instantiating LuceneIndexer, Caused by :.", e); throw new LuceneIndexingException(e); } }
From source file:com.rocana.lucene.codec.v1.TestRocanaPerFieldPostingsFormat2.java
License:Apache License
private IndexWriter newWriter(Directory dir, IndexWriterConfig conf) throws IOException { LogDocMergePolicy logByteSizeMergePolicy = new LogDocMergePolicy(); logByteSizeMergePolicy.setNoCFSRatio(0.0); // make sure we use plain // files//from w w w . j a va2 s. c om conf.setMergePolicy(logByteSizeMergePolicy); final IndexWriter writer = new IndexWriter(dir, conf); return writer; }
From source file:com.zimbra.cs.index.LuceneIndex.java
License:Open Source License
private IndexWriterConfig getWriterConfig() { IndexWriterConfig config = new IndexWriterConfig(VERSION, mailbox.index.getAnalyzer()); config.setMergeScheduler(new MergeScheduler()); config.setMaxBufferedDocs(LC.zimbra_index_lucene_max_buffered_docs.intValue()); config.setRAMBufferSizeMB(LC.zimbra_index_lucene_ram_buffer_size_kb.intValue() / 1024.0); if (LC.zimbra_index_lucene_merge_policy.booleanValue()) { LogDocMergePolicy policy = new LogDocMergePolicy(); config.setMergePolicy(policy);/*from ww w . ja v a 2 s . c o m*/ policy.setUseCompoundFile(LC.zimbra_index_lucene_use_compound_file.booleanValue()); policy.setMergeFactor(LC.zimbra_index_lucene_merge_factor.intValue()); policy.setMinMergeDocs(LC.zimbra_index_lucene_min_merge.intValue()); if (LC.zimbra_index_lucene_max_merge.intValue() != Integer.MAX_VALUE) { policy.setMaxMergeDocs(LC.zimbra_index_lucene_max_merge.intValue()); } } else { LogByteSizeMergePolicy policy = new LogByteSizeMergePolicy(); config.setMergePolicy(policy); policy.setUseCompoundFile(LC.zimbra_index_lucene_use_compound_file.booleanValue()); policy.setMergeFactor(LC.zimbra_index_lucene_merge_factor.intValue()); policy.setMinMergeMB(LC.zimbra_index_lucene_min_merge.intValue() / 1024.0); if (LC.zimbra_index_lucene_max_merge.intValue() != Integer.MAX_VALUE) { policy.setMaxMergeMB(LC.zimbra_index_lucene_max_merge.intValue() / 1024.0); } } return config; }
From source file:de.csw.linkgenerator.plugin.lucene.IndexUpdater.java
License:Open Source License
private void openWriter(OpenMode openMode) { if (writer != null) { LOG.error("Writer already open and createWriter called"); return;/*ww w . j av a2 s .c om*/ } try { // fix for windows by Daniel Cortes: // FSDirectory f = FSDirectory.getDirectory(indexDir); IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_40, analyzer); conf.setOpenMode(openMode); // Ralph: This is kind of guesswork LogDocMergePolicy mergePolicy = new LogDocMergePolicy(); mergePolicy.setUseCompoundFile(true); conf.setMergePolicy(mergePolicy); // writer = new IndexWriter (indexDir, analyzer, create); writer = new IndexWriter(indexDir, conf); // writer.setUseCompoundFile(true); if (LOG.isDebugEnabled()) { LOG.debug("successfully opened index writer : " + indexDir); } } catch (IOException e) { LOG.error("IOException when opening Lucene Index for writing at " + indexDir, e); } }
From source file:dk.defxws.fgslucene.OperationsImpl.java
License:Open Source License
private void getIndexWriter(String indexName) throws GenericSearchException { if (iw == null) { Directory dir;/*from ww w . j a va2 s.c o m*/ try { dir = new SimpleFSDirectory(new File(config.getIndexDir(indexName))); } catch (Exception e) { throw new GenericSearchException("IndexWriter new error indexName=" + indexName + " :\n", e); } IndexWriterConfig iwconfig = new IndexWriterConfig(Version.LUCENE_36, getQueryAnalyzer(indexName)); int maxBufferedDocs = config.getMaxBufferedDocs(indexName); if (maxBufferedDocs > 0) { iwconfig.setMaxBufferedDocs(maxBufferedDocs); } int mergeFactor = config.getMergeFactor(indexName); if (mergeFactor > 0) { LogDocMergePolicy ldmp = new LogDocMergePolicy(); ldmp.setMergeFactor(mergeFactor); iwconfig.setMergePolicy(ldmp); } long defaultWriteLockTimeout = config.getDefaultWriteLockTimeout(indexName); if (defaultWriteLockTimeout > 0) { IndexWriterConfig.setDefaultWriteLockTimeout(defaultWriteLockTimeout); } try { iw = new IndexWriter(dir, iwconfig); } catch (Exception e) { throw new GenericSearchException("IndexWriter new error indexName=" + indexName + " :\n", e); } } try { docCount = iw.numDocs(); } catch (Exception e) { closeIndexWriter(indexName); throw new GenericSearchException("IndexWriter numDocs error indexName=" + indexName + " :\n", e); } if (logger.isDebugEnabled()) logger.debug("getIndexWriter indexName=" + indexName + " docCount=" + docCount); }