List of usage examples for org.apache.lucene.index SerialMergeScheduler SerialMergeScheduler
public SerialMergeScheduler()
From source file:DVBench.java
License:Apache License
static void doBench(int bpv) throws Exception { File file = new File("/data/indices/dvbench"); file.mkdirs();//from ww w. j av a 2s . c o m Directory dir = FSDirectory.open(file); IndexWriterConfig config = new IndexWriterConfig(null); config.setOpenMode(OpenMode.CREATE); config.setMergeScheduler(new SerialMergeScheduler()); config.setMergePolicy(new LogDocMergePolicy()); config.setMaxBufferedDocs(25000); IndexWriter writer = new IndexWriter(dir, config); MyRandom r = new MyRandom(); int numdocs = 400000; Document doc = new Document(); Field dv = new NumericDocValuesField("dv", 0); Field inv = new LongField("inv", 0, Field.Store.NO); Field boxed = new BinaryDocValuesField("boxed", new BytesRef(8)); Field boxed2 = new BinaryDocValuesField("boxed2", new BytesRef(8)); doc.add(dv); doc.add(inv); doc.add(boxed); doc.add(boxed2); for (int i = 0; i < numdocs; i++) { // defeat blockpackedwriter final long value; if (i % 8192 == 0) { value = bpv == 64 ? Long.MIN_VALUE : 0; } else if (i % 8192 == 1) { value = bpv == 64 ? Long.MAX_VALUE : (1L << bpv) - 1; } else { value = r.nextLong(bpv); } dv.setLongValue(value); inv.setLongValue(value); box(value, boxed.binaryValue()); box(value, boxed2.binaryValue()); boxed2.binaryValue().length = (bpv + 7) / 8; // fixed length writer.addDocument(doc); } writer.close(); // run dv search tests String description = "dv (bpv=" + bpv + ")"; DirectoryReader reader = DirectoryReader.open(dir); IndexSearcher searcher = new IndexSearcher(reader); searcher.setQueryCache(null); // don't bench the cache int hash = 0; // warmup hash += search(description, searcher, "dv", 300, true); hash += search(description, searcher, "dv", 300, false); // Uninverting Map<String, UninvertingReader.Type> mapping = Collections.singletonMap("inv", UninvertingReader.Type.LONG); DirectoryReader uninv = UninvertingReader.wrap(reader, mapping); IndexSearcher searcher2 = new IndexSearcher(uninv); searcher2.setQueryCache(null); // don't bench the cache description = "fc (bpv=" + bpv + ")"; // warmup hash += search(description, searcher2, "inv", 300, true); hash += search(description, searcher2, "inv", 300, false); // Boxed inside binary DirectoryReader boxedReader = new BinaryAsVLongReader(reader); IndexSearcher searcher3 = new IndexSearcher(boxedReader); searcher3.setQueryCache(null); // don't bench the cache description = "boxed (bpv=" + bpv + ")"; // warmup hash += search(description, searcher3, "boxed", 300, true); hash += search(description, searcher3, "boxed", 300, false); description = "boxed fixed-length (bpv=" + bpv + ")"; // warmup hash += search(description, searcher3, "boxed2", 300, true); hash += search(description, searcher3, "boxed2", 300, false); if (hash == 3) { // wont happen System.out.println("hash=" + hash); } reader.close(); dir.close(); }
From source file:IndexAndSearchOpenStreetMaps1D.java
License:Apache License
private static void createIndex() throws IOException { long t0 = System.nanoTime(); CharsetDecoder decoder = StandardCharsets.UTF_8.newDecoder().onMalformedInput(CodingErrorAction.REPORT) .onUnmappableCharacter(CodingErrorAction.REPORT); int BUFFER_SIZE = 1 << 16; // 64K InputStream is = Files .newInputStream(Paths.get("/lucenedata/open-street-maps/latlon.subsetPlusAllLondon.txt")); BufferedReader reader = new BufferedReader(new InputStreamReader(is, decoder), BUFFER_SIZE); Directory dir = FSDirectory.open(Paths.get("/c/tmp/bkdtest1d" + (USE_NF ? "_nf" : ""))); IndexWriterConfig iwc = new IndexWriterConfig(null); iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE); //iwc.setMaxBufferedDocs(109630); //iwc.setRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH); iwc.setRAMBufferSizeMB(256.0);/*w w w .j av a2s . co m*/ iwc.setMergePolicy(new LogDocMergePolicy()); iwc.setMergeScheduler(new SerialMergeScheduler()); iwc.setInfoStream(new PrintStreamInfoStream(System.out)); IndexWriter w = new IndexWriter(dir, iwc); int count = 0; byte[] scratch = new byte[4]; while (true) { String line = reader.readLine(); if (line == null) { break; } String[] parts = line.split(","); //long id = Long.parseLong(parts[0]); int lat = (int) (1000000. * Double.parseDouble(parts[1])); //int lon = (int) (1000000. * Double.parseDouble(parts[2])); Document doc = new Document(); if (USE_NF) { doc.add(new LegacyIntField("latnum", lat, Field.Store.NO)); //doc.add(new LongField("lonnum", lon, Field.Store.NO)); } else { doc.add(new IntPoint("lat", lat)); //doc.add(new SortedNumericDocValuesField("lon", lon)); } w.addDocument(doc); count++; if (count % 1000000 == 0) { System.out.println(count + "..."); } } //w.forceMerge(1); w.commit(); System.out.println(w.maxDoc() + " total docs"); w.close(); long t1 = System.nanoTime(); System.out.println(((t1 - t0) / 1000000000.0) + " sec to build index"); }
From source file:cn.hbu.cs.esearch.index.DiskSearchIndex.java
License:Apache License
public DiskSearchIndex(DirectoryManager dirMgr, IndexReaderDecorator<R> decorator, SearchIndexManager<R> idxMgr) { super(idxMgr, true); _dirMgr = dirMgr;/* w w w . jav a 2s. com*/ // create index signature file try { _dirMgr.getDirectory(true); } catch (IOException e) { e.printStackTrace(); throw new RuntimeException(e); } _mergePolicyParams = new MergePolicyParams(); _dispenser = new IndexReaderDispenser<R>(_dirMgr, decorator, this); _mergeScheduler = new SerialMergeScheduler(); _deletionPolicy = null; }
From source file:cn.hbu.cs.esearch.index.RAMSearchIndex.java
License:Apache License
public RAMSearchIndex(String version, IndexReaderDecorator<R> decorator, SearchIndexManager<R> idxMgr, Directory ramIdxDir, File backingdir) { super(idxMgr, true); _directory = ramIdxDir;/*from w w w.j a v a2 s . c o m*/ _backingdir = backingdir; _version = version; _decorator = decorator; _currentReader = null; _mergeScheduler = new SerialMergeScheduler(); _mergePolicyParams = new EsearchMergePolicy.MergePolicyParams(); _mergePolicyParams.setNumLargeSegments(3); _mergePolicyParams.setMergeFactor(3); _mergePolicyParams.setMaxSmallSegments(4); }
From source file:com.jaeksoft.searchlib.index.WriterLocal.java
License:Open Source License
private final IndexWriter open(boolean create) throws CorruptIndexException, LockObtainFailedException, IOException, SearchLibException { IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_36, null); config.setOpenMode(create ? OpenMode.CREATE_OR_APPEND : OpenMode.APPEND); config.setMergeScheduler(new SerialMergeScheduler()); config.setWriteLockTimeout(indexConfig.getWriteLockTimeout()); Similarity similarity = indexConfig.getNewSimilarityInstance(); if (similarity != null) config.setSimilarity(similarity); Logging.debug("WriteLocal open " + indexDirectory.getDirectory()); return new IndexWriter(indexDirectory.getDirectory(), config); }
From source file:com.jaeksoft.searchlib.index.WriterLucene.java
License:Open Source License
private final IndexWriter open(boolean create) throws CorruptIndexException, LockObtainFailedException, IOException, SearchLibException { IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_36, null); config.setOpenMode(create ? OpenMode.CREATE_OR_APPEND : OpenMode.APPEND); config.setMergeScheduler(new SerialMergeScheduler()); Similarity similarity = indexConfig.getNewSimilarityInstance(); if (similarity != null) config.setSimilarity(similarity); Logging.debug("WriteLocal open " + indexDirectory.getDirectory()); return new IndexWriter(indexDirectory.getDirectory(), config); }
From source file:com.lucid.solr.sidecar.SidecarIndexReaderFactory.java
License:Apache License
DirectoryReader buildParallelReader(DirectoryReader main, SolrIndexSearcher source, boolean rebuild) { try {/* w w w. j a v a 2 s.com*/ if (source == null) { throw new Exception("Source collection is missing."); } // create as a sibling path of the main index Directory d = main.directory(); File primaryDir = null; if (d instanceof FSDirectory) { String path = ((FSDirectory) d).getDirectory().getPath(); primaryDir = new File(path); sidecarIndex = new File(primaryDir.getParentFile(), sidecarIndexLocation); } else { String secondaryPath = System.getProperty("java.io.tmpdir") + File.separator + sidecarIndexLocation + "-" + System.currentTimeMillis(); sidecarIndex = new File(secondaryPath); } // create a new tmp dir for the secondary indexes File secondaryIndex = new File(sidecarIndex, System.currentTimeMillis() + "-index"); if (rebuild) { safeDelete(sidecarIndex); } parallelFields.addAll(source.getFieldNames()); parallelFields.remove("id"); LOG.debug("building a new index"); Directory dir = FSDirectory.open(secondaryIndex); if (IndexWriter.isLocked(dir)) { // try forcing unlock try { IndexWriter.unlock(dir); } catch (Exception e) { LOG.warn("Failed to unlock " + secondaryIndex); } } int[] mergeTargets; AtomicReader[] subReaders = SidecarIndexReader.getSequentialSubReaders(main); if (subReaders == null || subReaders.length == 0) { mergeTargets = new int[] { main.maxDoc() }; } else { mergeTargets = new int[subReaders.length]; for (int i = 0; i < subReaders.length; i++) { mergeTargets[i] = subReaders[i].maxDoc(); } } Version ver = currentCore.getLatestSchema().getDefaultLuceneMatchVersion(); IndexWriterConfig cfg = new IndexWriterConfig(ver, currentCore.getLatestSchema().getAnalyzer()); //cfg.setInfoStream(System.err); cfg.setMergeScheduler(new SerialMergeScheduler()); cfg.setMergePolicy(new SidecarMergePolicy(mergeTargets, false)); IndexWriter iw = new IndexWriter(dir, cfg); LOG.info("processing " + main.maxDoc() + " docs / " + main.numDeletedDocs() + " dels in main index"); int boostedDocs = 0; Bits live = MultiFields.getLiveDocs(main); int targetPos = 0; int nextTarget = mergeTargets[targetPos]; BytesRef idRef = new BytesRef(); for (int i = 0; i < main.maxDoc(); i++) { if (i == nextTarget) { iw.commit(); nextTarget = nextTarget + mergeTargets[++targetPos]; } if (live != null && !live.get(i)) { addDummy(iw); // this is required to preserve doc numbers. continue; } else { DocumentStoredFieldVisitor visitor = new DocumentStoredFieldVisitor(docIdField); main.document(i, visitor); Document doc = visitor.getDocument(); // get docId String id = doc.get(docIdField); if (id == null) { LOG.debug("missing id, docNo=" + i); addDummy(iw); continue; } else { // find the data, if any doc = lookup(source, id, idRef, parallelFields); if (doc == null) { LOG.debug("missing boost data, docId=" + id); addDummy(iw); continue; } else { LOG.debug("adding boost data, docId=" + id + ", b=" + doc); iw.addDocument(doc); boostedDocs++; } } } } iw.close(); DirectoryReader other = DirectoryReader.open(dir); LOG.info("SidecarIndexReader with " + boostedDocs + " boosted documents."); SidecarIndexReader pr = createSidecarIndexReader(main, other, sourceCollection, secondaryIndex); return pr; } catch (Exception e) { LOG.warn("Unable to build parallel index: " + e.toString(), e); LOG.warn("Proceeding with single main index."); try { return new SidecarIndexReader(this, main, null, SidecarIndexReader.getSequentialSubReaders(main), sourceCollection, null); } catch (Exception e1) { LOG.warn("Unexpected exception, returning single main index", e1); return main; } } }
From source file:com.svenjacobs.lugaene.GaeIndexWriterConfigHelper.java
License:Apache License
/** * Creates {@link IndexWriterConfig} with options required for GAE runtime environment. * * @param version Lucene version// www. java 2 s .c om * @param analyzer Lucene analyzer * @return IndexWriterConfig instance */ public static IndexWriterConfig create(final Version version, final Analyzer analyzer) { final IndexWriterConfig config = new IndexWriterConfig(version, analyzer); config.setMergeScheduler(new SerialMergeScheduler()); return config; }
From source file:jetbrains.exodus.lucene.ExodusLuceneTestsBase.java
License:Apache License
protected void createIndexWriterConfig() { indexConfig = new IndexWriterConfig(LUCENE_VERSION, analyzer); indexConfig.setMergeScheduler(new SerialMergeScheduler()); indexConfig.setMaxThreadStates(1);/*from w w w. jav a 2 s.co m*/ }
From source file:org.alfresco.repo.search.impl.lucene.index.IndexInfo.java
License:Open Source License
/** * Construct an index in the given directory. * //from w w w . j av a 2 s . com * @param indexDirectory File * @param config LuceneConfig */ private IndexInfo(File indexDirectory, LuceneConfig config) { super(); initialiseTransitions(); this.config = config; if (config != null) { this.readWriteLock = new ReentrantReadWriteLock(config.getFairLocking()); this.maxFieldLength = config.getIndexerMaxFieldLength(); this.threadPoolExecutor = config.getThreadPoolExecutor(); IndexInfo.useNIOMemoryMapping = config.getUseNioMemoryMapping(); this.maxDocsForInMemoryMerge = config.getMaxDocsForInMemoryMerge(); this.maxRamInMbForInMemoryMerge = config.getMaxRamInMbForInMemoryMerge(); this.maxDocsForInMemoryIndex = config.getMaxDocsForInMemoryIndex(); this.maxRamInMbForInMemoryIndex = config.getMaxRamInMbForInMemoryIndex(); this.writerMaxBufferedDocs = config.getWriterMaxBufferedDocs(); this.writerRamBufferSizeMb = config.getWriterRamBufferSizeMb(); this.writerMergeFactor = config.getWriterMergeFactor(); this.writerMaxMergeDocs = config.getWriterMaxMergeDocs(); this.mergerMaxBufferedDocs = config.getMergerMaxBufferedDocs(); this.mergerRamBufferSizeMb = config.getMergerRamBufferSizeMb(); this.mergerMergeFactor = config.getMergerMergeFactor(); this.mergerMaxMergeDocs = config.getMergerMaxMergeDocs(); this.termIndexInterval = config.getTermIndexInterval(); this.mergerTargetOverlays = config.getMergerTargetOverlayCount(); this.mergerTargetIndexes = config.getMergerTargetIndexCount(); this.mergerTargetOverlaysBlockingFactor = config.getMergerTargetOverlaysBlockingFactor(); // Work out the relative path of the index try { String indexRoot = new File(config.getIndexRootLocation()).getCanonicalPath(); this.relativePath = indexDirectory.getCanonicalPath().substring(indexRoot.length() + 1); } catch (IOException e) { throw new AlfrescoRuntimeException("Failed to determine index relative path", e); } } else { this.readWriteLock = new ReentrantReadWriteLock(false); // need a default thread pool .... TraceableThreadFactory threadFactory = new TraceableThreadFactory(); threadFactory.setThreadDaemon(true); threadFactory.setThreadPriority(5); threadPoolExecutor = new ThreadPoolExecutor(10, 10, 90, TimeUnit.SECONDS, new LinkedBlockingQueue<Runnable>(), threadFactory, new ThreadPoolExecutor.CallerRunsPolicy()); // Create a 'fake' relative path try { this.relativePath = indexDirectory.getCanonicalPath(); int sepIndex = this.relativePath.indexOf(File.separator); if (sepIndex != -1) { if (this.relativePath.length() > sepIndex + 1) { this.relativePath = this.relativePath.substring(sepIndex + 1); } else { this.relativePath = ""; } } } catch (IOException e) { throw new AlfrescoRuntimeException("Failed to determine index relative path", e); } } // Create an empty in memory index IndexWriter writer; try { writer = new IndexWriter(emptyIndex, new AlfrescoStandardAnalyser(), true, MaxFieldLength.LIMITED); writer.setUseCompoundFile(writerUseCompoundFile); writer.setMaxBufferedDocs(writerMaxBufferedDocs); writer.setRAMBufferSizeMB(writerRamBufferSizeMb); writer.setMergeFactor(writerMergeFactor); writer.setMaxMergeDocs(writerMaxMergeDocs); writer.setWriteLockTimeout(writeLockTimeout); writer.setMaxFieldLength(maxFieldLength); writer.setTermIndexInterval(termIndexInterval); writer.setMergeScheduler(new SerialMergeScheduler()); writer.setMergePolicy(new LogDocMergePolicy()); writer.close(); } catch (IOException e) { throw new IndexerException("Failed to create an empty in memory index!"); } this.indexDirectory = indexDirectory; // Make sure the directory exists if (!this.indexDirectory.exists()) { if (!this.indexDirectory.mkdirs()) { throw new AlfrescoRuntimeException("Failed to create index directory"); } } if (!this.indexDirectory.isDirectory()) { throw new AlfrescoRuntimeException("The index must be held in a directory"); } // Create the info files. File indexInfoFile = new File(this.indexDirectory, INDEX_INFO); File indexInfoBackupFile = new File(this.indexDirectory, INDEX_INFO_BACKUP); if (createFile(indexInfoFile) && createFile(indexInfoBackupFile)) { // If both files required creation this is a new index version = 0; } // Open the files and channels for the index info file and the backup this.indexInfoRAF = openFile(indexInfoFile); this.indexInfoChannel = this.indexInfoRAF.getChannel(); this.indexInfoBackupRAF = openFile(indexInfoBackupFile); this.indexInfoBackupChannel = this.indexInfoBackupRAF.getChannel(); // If the index found no info files (i.e. it is new), check if there is // an old style index and covert it. if (version == 0) { // Check if an old style index exists final File oldIndex = new File(this.indexDirectory, OLD_INDEX); if (IndexReader.indexExists(oldIndex)) { getWriteLock(); try { doWithFileLock(new LockWork<Object>() { public Object doWork() throws Exception { IndexWriter writer; try { writer = new IndexWriter(oldIndex, new AlfrescoStandardAnalyser(), false, MaxFieldLength.LIMITED); writer.setUseCompoundFile(writerUseCompoundFile); writer.setMaxBufferedDocs(writerMaxBufferedDocs); writer.setRAMBufferSizeMB(writerRamBufferSizeMb); writer.setMergeFactor(writerMergeFactor); writer.setMaxMergeDocs(writerMaxMergeDocs); writer.setWriteLockTimeout(writeLockTimeout); writer.setMaxFieldLength(maxFieldLength); writer.setTermIndexInterval(termIndexInterval); writer.setMergeScheduler(new SerialMergeScheduler()); writer.setMergePolicy(new LogDocMergePolicy()); writer.optimize(); long docs = writer.numDocs(); writer.close(); IndexEntry entry = new IndexEntry(IndexType.INDEX, OLD_INDEX, "", TransactionStatus.COMMITTED, "", docs, 0, false); indexEntries.put(OLD_INDEX, entry); writeStatus(); // The index exists and we should initialise the single reader registerReferenceCountingIndexReader(entry.getName(), buildReferenceCountingIndexReader(entry.getName(), entry.getDocumentCount())); } catch (IOException e) { throw new IndexerException("Failed to optimise old index"); } return null; } public boolean canRetry() { return false; } }); } finally { releaseWriteLock(); } } } // The index exists else if (version == -1) { getWriteLock(); try { doWithFileLock(new LockWork<Object>() { public Object doWork() throws Exception { setStatusFromFile(); // If the index is not shared we can do some easy clean // up if (!indexIsShared) { HashSet<String> deletable = new HashSet<String>(); // clean up for (IndexEntry entry : indexEntries.values()) { switch (entry.getStatus()) { // states which can be deleted // We could check prepared states can be // committed. case ACTIVE: case MARKED_ROLLBACK: case NO_TRANSACTION: case PREPARING: case ROLLEDBACK: case ROLLINGBACK: case MERGE_TARGET: case UNKNOWN: case PREPARED: case DELETABLE: if (s_logger.isInfoEnabled()) { s_logger.info("Deleting index entry " + entry); } entry.setStatus(TransactionStatus.DELETABLE); deletable.add(entry.getName()); break; // States which are in mid-transition which we // can roll back to the committed state case COMMITTED_DELETING: case MERGE: if (s_logger.isInfoEnabled()) { s_logger.info("Resetting merge to committed " + entry); } entry.setStatus(TransactionStatus.COMMITTED); registerReferenceCountingIndexReader(entry.getName(), buildReferenceCountingIndexReader(entry.getName(), entry.getDocumentCount())); break; // Complete committing (which is post database // commit) case COMMITTING: // do the commit if (s_logger.isInfoEnabled()) { s_logger.info("Committing " + entry); } entry.setStatus(TransactionStatus.COMMITTED); registerReferenceCountingIndexReader(entry.getName(), buildReferenceCountingIndexReader(entry.getName(), entry.getDocumentCount())); break; // States that require no action case COMMITTED: registerReferenceCountingIndexReader(entry.getName(), buildReferenceCountingIndexReader(entry.getName(), entry.getDocumentCount())); break; default: // nothing to do break; } } // Delete entries that are not required invalidateMainReadersFromFirst(deletable); for (String id : deletable) { indexEntries.remove(id); } clearOldReaders(); cleaner.schedule(); merger.schedule(); // persist the new state writeStatus(); } return null; } public boolean canRetry() { return false; } }); } finally { releaseWriteLock(); } } // Need to do with file lock - must share info about other readers to support this with shared indexer // implementation getWriteLock(); try { LockWork<Object> work = new DeleteUnknownGuidDirectories(); doWithFileLock(work); } finally { releaseWriteLock(); } // Run the cleaner around every 20 secods - this just makes the request to the thread pool timer.schedule(new TimerTask() { @Override public void run() { cleaner.schedule(); } }, 0, 20000); publishDiscoveryEvent(); }