List of usage examples for org.apache.lucene.index IndexWriterConfig setMergePolicy
@Override
public IndexWriterConfig setMergePolicy(MergePolicy mergePolicy)
From source file:DVBench.java
License:Apache License
static void doBench(int bpv) throws Exception { File file = new File("/data/indices/dvbench"); file.mkdirs();/*from w w w . j av a2 s. c o m*/ Directory dir = FSDirectory.open(file); IndexWriterConfig config = new IndexWriterConfig(null); config.setOpenMode(OpenMode.CREATE); config.setMergeScheduler(new SerialMergeScheduler()); config.setMergePolicy(new LogDocMergePolicy()); config.setMaxBufferedDocs(25000); IndexWriter writer = new IndexWriter(dir, config); MyRandom r = new MyRandom(); int numdocs = 400000; Document doc = new Document(); Field dv = new NumericDocValuesField("dv", 0); Field inv = new LongField("inv", 0, Field.Store.NO); Field boxed = new BinaryDocValuesField("boxed", new BytesRef(8)); Field boxed2 = new BinaryDocValuesField("boxed2", new BytesRef(8)); doc.add(dv); doc.add(inv); doc.add(boxed); doc.add(boxed2); for (int i = 0; i < numdocs; i++) { // defeat blockpackedwriter final long value; if (i % 8192 == 0) { value = bpv == 64 ? Long.MIN_VALUE : 0; } else if (i % 8192 == 1) { value = bpv == 64 ? Long.MAX_VALUE : (1L << bpv) - 1; } else { value = r.nextLong(bpv); } dv.setLongValue(value); inv.setLongValue(value); box(value, boxed.binaryValue()); box(value, boxed2.binaryValue()); boxed2.binaryValue().length = (bpv + 7) / 8; // fixed length writer.addDocument(doc); } writer.close(); // run dv search tests String description = "dv (bpv=" + bpv + ")"; DirectoryReader reader = DirectoryReader.open(dir); IndexSearcher searcher = new IndexSearcher(reader); searcher.setQueryCache(null); // don't bench the cache int hash = 0; // warmup hash += search(description, searcher, "dv", 300, true); hash += search(description, searcher, "dv", 300, false); // Uninverting Map<String, UninvertingReader.Type> mapping = Collections.singletonMap("inv", UninvertingReader.Type.LONG); DirectoryReader uninv = UninvertingReader.wrap(reader, mapping); IndexSearcher searcher2 = new IndexSearcher(uninv); searcher2.setQueryCache(null); // don't bench the cache description = "fc (bpv=" + bpv + ")"; // warmup hash += search(description, searcher2, "inv", 300, true); hash += search(description, searcher2, "inv", 300, false); // Boxed inside binary DirectoryReader boxedReader = new BinaryAsVLongReader(reader); IndexSearcher searcher3 = new IndexSearcher(boxedReader); searcher3.setQueryCache(null); // don't bench the cache description = "boxed (bpv=" + bpv + ")"; // warmup hash += search(description, searcher3, "boxed", 300, true); hash += search(description, searcher3, "boxed", 300, false); description = "boxed fixed-length (bpv=" + bpv + ")"; // warmup hash += search(description, searcher3, "boxed2", 300, true); hash += search(description, searcher3, "boxed2", 300, false); if (hash == 3) { // wont happen System.out.println("hash=" + hash); } reader.close(); dir.close(); }
From source file:IndexAndSearchOpenStreetMaps1D.java
License:Apache License
private static void createIndex() throws IOException { long t0 = System.nanoTime(); CharsetDecoder decoder = StandardCharsets.UTF_8.newDecoder().onMalformedInput(CodingErrorAction.REPORT) .onUnmappableCharacter(CodingErrorAction.REPORT); int BUFFER_SIZE = 1 << 16; // 64K InputStream is = Files .newInputStream(Paths.get("/lucenedata/open-street-maps/latlon.subsetPlusAllLondon.txt")); BufferedReader reader = new BufferedReader(new InputStreamReader(is, decoder), BUFFER_SIZE); Directory dir = FSDirectory.open(Paths.get("/c/tmp/bkdtest1d" + (USE_NF ? "_nf" : ""))); IndexWriterConfig iwc = new IndexWriterConfig(null); iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE); //iwc.setMaxBufferedDocs(109630); //iwc.setRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH); iwc.setRAMBufferSizeMB(256.0);/*from w ww . j a v a 2 s. c om*/ iwc.setMergePolicy(new LogDocMergePolicy()); iwc.setMergeScheduler(new SerialMergeScheduler()); iwc.setInfoStream(new PrintStreamInfoStream(System.out)); IndexWriter w = new IndexWriter(dir, iwc); int count = 0; byte[] scratch = new byte[4]; while (true) { String line = reader.readLine(); if (line == null) { break; } String[] parts = line.split(","); //long id = Long.parseLong(parts[0]); int lat = (int) (1000000. * Double.parseDouble(parts[1])); //int lon = (int) (1000000. * Double.parseDouble(parts[2])); Document doc = new Document(); if (USE_NF) { doc.add(new LegacyIntField("latnum", lat, Field.Store.NO)); //doc.add(new LongField("lonnum", lon, Field.Store.NO)); } else { doc.add(new IntPoint("lat", lat)); //doc.add(new SortedNumericDocValuesField("lon", lon)); } w.addDocument(doc); count++; if (count % 1000000 == 0) { System.out.println(count + "..."); } } //w.forceMerge(1); w.commit(); System.out.println(w.maxDoc() + " total docs"); w.close(); long t1 = System.nanoTime(); System.out.println(((t1 - t0) / 1000000000.0) + " sec to build index"); }
From source file:cn.hbu.cs.esearch.index.DiskSearchIndex.java
License:Apache License
/** * Opens an index modifier.// w ww . ja v a2 s.c om * @param analyzer Analyzer * @return IndexModifer instance */ @Override public IndexWriter openIndexWriter(Analyzer analyzer, Similarity similarity) throws IOException { if (_indexWriter != null) { return _indexWriter; } Directory directory = _dirMgr.getDirectory(true); log.info("opening index writer at: " + _dirMgr.getPath()); EsearchMergePolicy mergePolicy = new EsearchMergePolicy(); mergePolicy.setMergePolicyParams(_mergePolicyParams); // hao: autocommit is set to false with this constructor IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_43, analyzer); config.setOpenMode(OpenMode.CREATE_OR_APPEND); _deletionPolicy = new ZoieIndexDeletionPolicy(); config.setIndexDeletionPolicy(_deletionPolicy); config.setMergeScheduler(_mergeScheduler); config.setMergePolicy(mergePolicy); config.setReaderPooling(false); if (similarity != null) { config.setSimilarity(similarity); } config.setRAMBufferSizeMB(5); IndexWriter idxWriter = new IndexWriter(directory, config); // we need retrieve deletionPolicy from IndexWriter since deletionPolicy is deep cloned _deletionPolicy = (ZoieIndexDeletionPolicy) (idxWriter.getConfig().getIndexDeletionPolicy()); _indexWriter = idxWriter; return idxWriter; }
From source file:cn.hbu.cs.esearch.index.RAMSearchIndex.java
License:Apache License
@Override public IndexWriter openIndexWriter(Analyzer analyzer, Similarity similarity) throws IOException { if (_indexWriter != null) { return _indexWriter; }//w w w . j a v a2 s .c o m EsearchMergePolicy mergePolicy = new EsearchMergePolicy(); mergePolicy.setMergePolicyParams(_mergePolicyParams); mergePolicy.setUseCompoundFile(false); IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_43, analyzer); config.setOpenMode(OpenMode.CREATE_OR_APPEND); config.setMergeScheduler(_mergeScheduler); config.setMergePolicy(mergePolicy); config.setReaderPooling(false); if (similarity != null) { config.setSimilarity(similarity); } config.setRAMBufferSizeMB(3); IndexWriter idxWriter = new IndexWriter(_directory, config); _indexWriter = idxWriter; return idxWriter; }
From source file:cn.hbu.cs.esearch.store.LuceneStore.java
License:Apache License
@Override public void open() throws IOException { if (closed) { IndexWriterConfig idxWriterConfig = new IndexWriterConfig(Version.LUCENE_43, new StandardAnalyzer(Version.LUCENE_43)); idxWriterConfig.setMergePolicy(new EsearchMergePolicy()); idxWriterConfig.setOpenMode(OpenMode.CREATE_OR_APPEND); indexWriter = new IndexWriter(directory, idxWriterConfig); updateReader();//from w w w. ja va 2s . c o m closed = false; } }
From source file:com.aliasi.lingmed.medline.IndexMedline.java
License:Lingpipe license
/** * Run the command. See class documentation above for details on * arguments and behavior.//w w w . j ava2 s . c om */ public void run() { System.out.println("start run"); try { File[] files = getLaterFiles(mDistDir); System.out.println("Total files to process: " + files.length); System.out.println("File names: " + java.util.Arrays.asList(files)); // if (mLogger.isDebugEnabled()) // mLogger.debug("File names: " + java.util.Arrays.asList(files)); if (files.length > 0) { MedlineParser parser = new MedlineParser(true); // true = save raw XML Directory fsDir = FSDirectory.open(mIndex); IndexWriterConfig iwConf = new IndexWriterConfig(Version.LUCENE_36, mCodec.getAnalyzer()); iwConf.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); iwConf.setRAMBufferSizeMB(RAM_BUF_SIZE); if (sIsBaseline) { LogDocMergePolicy ldmp = new LogDocMergePolicy(); ldmp.setMergeFactor(MERGE_FACTOR_HI); iwConf.setMergePolicy(ldmp); } IndexWriter indexWriter = new IndexWriter(fsDir, iwConf); for (File file : files) { System.out.println("processing file: " + file); MedlineIndexer indexer = new MedlineIndexer(indexWriter, mCodec); parser.setHandler(indexer); parseFile(parser, file); indexer.close(); recordFile(indexWriter, file.getName()); System.out.println("completed processing file: " + file); } System.out.println("All files parsed, now optimize index"); indexWriter.forceMerge(1); indexWriter.commit(); indexWriter.close(); } System.out.println("Processing complete."); } catch (Exception e) { // mLogger.warn("Unexpected Exception: "+e.getMessage()); // mLogger.warn("stack trace: "+Logging.logStackTrace(e)); // mLogger.warn("Aborting this run"); IllegalStateException e2 = new IllegalStateException(e.getMessage()); e2.setStackTrace(e.getStackTrace()); throw e2; } }
From source file:com.edgenius.wiki.search.lucene.SimpleIndexFactory.java
License:Open Source License
private IndexWriterConfig getIndexWriterConfig() { IndexWriterConfig conf = new IndexWriterConfig(LuceneConfig.VERSION, analyzerProvider.getIndexAnalyzer()); conf.setMaxBufferedDocs(maxBufferedDocs); conf.setTermIndexInterval(termIndexInterval); conf.setWriteLockTimeout(writeLockTimeout); LogMergePolicy mergePolicy = new LogDocMergePolicy(); mergePolicy.setUseCompoundFile(useCompoundFile); mergePolicy.setMaxMergeDocs(maxMergeDocs); mergePolicy.setMergeFactor(mergeFactor); conf.setMergePolicy(mergePolicy); return conf;// www . j ava 2 s .com }
From source file:com.graphhopper.compare.misc.LuceneStorage.java
License:Apache License
public boolean init(boolean forceCreate) { try {/* w w w .j av a 2s . c o m*/ File file = new File("osm.lucene.test"); if (forceCreate) Helper.deleteDir(file); // germany.osm => 3.6 GB on disc for nodes only, 1.5 GB memory usage at the end of the nodes Directory dir = FSDirectory.open(file); IndexWriterConfig cfg = new IndexWriterConfig(Version.LUCENE_35, new KeywordAnalyzer()); LogByteSizeMergePolicy mp = new LogByteSizeMergePolicy(); mp.setMaxMergeMB(3000); cfg.setRAMBufferSizeMB(128); cfg.setTermIndexInterval(512); cfg.setMergePolicy(mp); // specify different formats for id fields etc // -> this breaks 16 of our tests!? Lucene Bug? // cfg.setCodec(new Lucene40Codec() { // // @Override public PostingsFormat getPostingsFormatForField(String field) { // return new Pulsing40PostingsFormat(); // } // }); // cfg.setMaxThreadStates(8); boolean create = !IndexReader.indexExists(dir); cfg.setOpenMode(create ? IndexWriterConfig.OpenMode.CREATE : IndexWriterConfig.OpenMode.APPEND); writer = new IndexWriter(dir, cfg); return true; } catch (Exception ex) { logger.error("cannot init lucene storage", ex); return false; } }
From source file:com.impetus.kundera.index.LuceneIndexer.java
License:Apache License
/** * Instantiates a new lucene indexer./*from w w w .j a v a 2s . c o m*/ * * @param analyzer * the analyzer * @param lucDirPath * the luc dir path */ private LuceneIndexer(String lucDirPath) { try { luceneDirPath = lucDirPath; File file = new File(luceneDirPath); if (file.exists()) { FSDirectory sourceDir = FSDirectory.open(getIndexDirectory().toPath()); // TODO initialize context. index = new RAMDirectory(sourceDir, IOContext.DEFAULT); } else { index = new RAMDirectory(); } /* * FSDirectory.open(getIndexDirectory( )) */ // isInitialized /* writer */ IndexWriterConfig indexWriterConfig = new IndexWriterConfig(analyzer); LogDocMergePolicy logDocMergePolicy = new LogDocMergePolicy(); logDocMergePolicy.setMergeFactor(1000); indexWriterConfig.setMergePolicy(logDocMergePolicy); w = new IndexWriter(index, indexWriterConfig); w.getConfig().setRAMBufferSizeMB(32); } catch (Exception e) { log.error("Error while instantiating LuceneIndexer, Caused by :.", e); throw new LuceneIndexingException(e); } }
From source file:com.impetus.kundera.index.LuceneIndexer.java
License:Apache License
@Override public final void unindex(EntityMetadata metadata, Object id, KunderaMetadata kunderaMetadata, Class<?> parentClazz) throws LuceneIndexingException { if (log.isDebugEnabled()) log.debug("Unindexing @Entity[{}] for key:{}", metadata.getEntityClazz().getName(), id); String luceneQuery = null;//from w w w .ja v a2s . c om boolean isEmbeddedId = false; MetamodelImpl metaModel = null; if (kunderaMetadata != null && metadata != null) { metaModel = (MetamodelImpl) kunderaMetadata.getApplicationMetadata() .getMetamodel(metadata.getPersistenceUnit()); isEmbeddedId = metaModel.isEmbeddable(metadata.getIdAttribute().getBindableJavaType()); } try { QueryParser qp = new QueryParser(DEFAULT_SEARCHABLE_FIELD, new StandardAnalyzer()); qp.setLowercaseExpandedTerms(false); qp.setAllowLeadingWildcard(true); luceneQuery = getLuceneQuery(metadata, id, isEmbeddedId, metaModel, parentClazz); Query q = qp.parse(luceneQuery); w.deleteDocuments(q); w.commit(); w.close(); IndexWriterConfig indexWriterConfig = new IndexWriterConfig(analyzer); LogDocMergePolicy logDocMergePolicy = new LogDocMergePolicy(); logDocMergePolicy.setMergeFactor(1000); indexWriterConfig.setMergePolicy(logDocMergePolicy); w = new IndexWriter(index, indexWriterConfig); w.getConfig().setRAMBufferSizeMB(32); // flushInternal(); } catch (Exception e) { log.error("Error while instantiating LuceneIndexer, Caused by :.", e); throw new LuceneIndexingException(e); } }