List of usage examples for org.apache.lucene.index IndexWriterConfig getMergePolicy
@Override
public MergePolicy getMergePolicy()
From source file:perf.IDPerfTest.java
License:Apache License
private static Result testOne(String indexPath, String desc, IDIterator ids, final int minTermsInBlock, final int maxTermsInBlock) throws IOException { System.out.println("\ntest: " + desc + " termBlocks=" + minTermsInBlock + "/" + maxTermsInBlock); Directory dir = FSDirectory.open(new File(indexPath)); //IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_48, new StandardAnalyzer(Version.LUCENE_48)); IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_4_8, new StandardAnalyzer(Version.LUCENE_4_8)); iwc.setMergeScheduler(new SerialMergeScheduler()); iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE); // So I can walk the files and get the *.tip sizes: iwc.setUseCompoundFile(false);//from ww w .ja v a 2 s. c o m iwc.setCodec(new Lucene53Codec() { @Override public PostingsFormat getPostingsFormatForField(String field) { return new Lucene50PostingsFormat(minTermsInBlock, maxTermsInBlock); } }); /// 7/7/7 segment structure: iwc.setMaxBufferedDocs(ID_COUNT / 777); iwc.setRAMBufferSizeMB(-1); //iwc.setInfoStream(new PrintStreamInfoStream(System.out)); //iwc.setMergePolicy(new LogDocMergePolicy()); ((TieredMergePolicy) iwc.getMergePolicy()).setFloorSegmentMB(.001); ((TieredMergePolicy) iwc.getMergePolicy()).setNoCFSRatio(0.0); //((LogDocMergePolicy) iwc.getMergePolicy()).setMinMergeDocs(1000); iwc.getMergePolicy().setNoCFSRatio(0.0); IndexWriter w = new IndexWriter(dir, iwc); Document doc = new Document(); FieldType ft = new FieldType(StringField.TYPE_NOT_STORED); ft.setTokenized(true); ft.freeze(); BytesRef idValue = new BytesRef(64); Field idField = new Field("id", new BinaryTokenStream(idValue), ft); doc.add(idField); long t0 = System.nanoTime(); BytesRef[] lookupIDs = new BytesRef[ID_SEARCH_COUNT]; Random random = new Random(17); int lookupCount = 0; double rate = 1.01 * ((double) ID_SEARCH_COUNT) / ID_COUNT; for (int i = 0; i < ID_COUNT; i++) { ids.next(idValue); if (lookupCount < lookupIDs.length && random.nextDouble() <= rate) { lookupIDs[lookupCount++] = BytesRef.deepCopyOf(idValue); } // Trickery: the idsIter changed the idValue which the BinaryTokenStream reuses for each added doc w.addDocument(doc); } if (lookupCount < lookupIDs.length) { throw new RuntimeException("didn't get enough lookup ids: " + lookupCount + " vs " + lookupIDs.length); } long indexTime = System.nanoTime() - t0; System.out.println(" indexing done; waitForMerges..."); w.waitForMerges(); IndexReader r = DirectoryReader.open(w, true); System.out.println(" reader=" + r); shuffle(random, lookupIDs); shuffle(random, lookupIDs); long bestTime = Long.MAX_VALUE; long checksum = 0; List<AtomicReaderContext> leaves = new ArrayList<>(r.leaves()); // Sort largest to smallest: Collections.sort(leaves, new Comparator<AtomicReaderContext>() { @Override public int compare(AtomicReaderContext c1, AtomicReaderContext c2) { return c2.reader().maxDoc() - c1.reader().maxDoc(); } }); TermsEnum[] termsEnums = new TermsEnum[leaves.size()]; DocsEnum[] docsEnums = new DocsEnum[leaves.size()]; int[] docBases = new int[leaves.size()]; for (int i = 0; i < leaves.size(); i++) { //System.out.println("i=" + i + " count=" + leaves.get(i).reader().maxDoc()); termsEnums[i] = leaves.get(i).reader().fields().terms("id").iterator(null); docBases[i] = leaves.get(i).docBase; } long rawLookupCount = 0; int countx = 0; for (int iter = 0; iter < 5; iter++) { t0 = System.nanoTime(); BlockTreeTermsReader.seekExactFastNotFound = 0; BlockTreeTermsReader.seekExactFastRootNotFound = 0; rawLookupCount = 0; for (BytesRef id : lookupIDs) { if (countx++ < 50) { System.out.println(" id=" + id); } boolean found = false; for (int seg = 0; seg < termsEnums.length; seg++) { rawLookupCount++; if (termsEnums[seg].seekExact(id)) { docsEnums[seg] = termsEnums[seg].docs(null, docsEnums[seg], 0); int docID = docsEnums[seg].nextDoc(); if (docID == DocsEnum.NO_MORE_DOCS) { // uh-oh! throw new RuntimeException("id not found: " + id); } // paranoia: checksum += docID + docBases[seg]; found = true; // Optimization vs MultiFields: we don't need to check any more segments since id is PK break; } } if (found == false) { // uh-oh! throw new RuntimeException("id not found: " + id); } } long lookupTime = System.nanoTime() - t0; System.out.println(String.format(Locale.ROOT, " iter=" + iter + " lookupTime=%.3f sec", lookupTime / 1000000000.0)); if (lookupTime < bestTime) { bestTime = lookupTime; System.out.println(" **"); } } long totalBytes = 0; long termsIndexTotalBytes = 0; for (String fileName : dir.listAll()) { long bytes = dir.fileLength(fileName); totalBytes += bytes; if (fileName.endsWith(".tip")) { termsIndexTotalBytes += bytes; } } r.close(); w.rollback(); dir.close(); return new Result(desc, ID_COUNT / (indexTime / 1000000.0), lookupIDs.length / (bestTime / 1000000.0), totalBytes, termsIndexTotalBytes, checksum, BlockTreeTermsReader.seekExactFastNotFound, BlockTreeTermsReader.seekExactFastRootNotFound, rawLookupCount, minTermsInBlock, maxTermsInBlock); }
From source file:perf.IndexAndSearchOpenStreetMaps.java
License:Apache License
private static void createIndex(boolean fast, boolean doForceMerge, boolean doDistanceSort) throws IOException, InterruptedException { CharsetDecoder decoder = StandardCharsets.UTF_8.newDecoder().onMalformedInput(CodingErrorAction.REPORT) .onUnmappableCharacter(CodingErrorAction.REPORT); int BUFFER_SIZE = 1 << 16; // 64K InputStream is;/* w w w .j av a2 s. c o m*/ if (SMALL) { is = Files.newInputStream(Paths.get(DATA_LOCATION, "latlon.subsetPlusAllLondon.txt")); } else { is = Files.newInputStream(Paths.get(DATA_LOCATION, "latlon.txt")); } BufferedReader reader = new BufferedReader(new InputStreamReader(is, decoder), BUFFER_SIZE); int NUM_THREADS; if (fast) { NUM_THREADS = 4; } else { NUM_THREADS = 1; } int CHUNK = 10000; long t0 = System.nanoTime(); AtomicLong totalCount = new AtomicLong(); for (int part = 0; part < NUM_PARTS; part++) { Directory dir = FSDirectory.open(Paths.get(getName(part, doDistanceSort))); IndexWriterConfig iwc = new IndexWriterConfig(null); iwc.setCodec(getCodec(fast)); iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE); if (fast) { ((TieredMergePolicy) iwc.getMergePolicy()).setMaxMergedSegmentMB(Double.POSITIVE_INFINITY); iwc.setRAMBufferSizeMB(1024); } else { iwc.setMaxBufferedDocs(109630); iwc.setMergePolicy(new LogDocMergePolicy()); iwc.setMergeScheduler(new SerialMergeScheduler()); } iwc.setInfoStream(new PrintStreamInfoStream(System.out)); IndexWriter w = new IndexWriter(dir, iwc); Thread[] threads = new Thread[NUM_THREADS]; AtomicBoolean finished = new AtomicBoolean(); Object lock = new Object(); final int finalPart = part; for (int t = 0; t < NUM_THREADS; t++) { threads[t] = new Thread() { @Override public void run() { String[] lines = new String[CHUNK]; int chunkCount = 0; while (finished.get() == false) { try { int count = CHUNK; synchronized (lock) { for (int i = 0; i < CHUNK; i++) { String line = reader.readLine(); if (line == null) { count = i; finished.set(true); break; } lines[i] = line; } if (finalPart == 0 && totalCount.get() + count >= 2000000000) { finished.set(true); } } for (int i = 0; i < count; i++) { String[] parts = lines[i].split(","); //long id = Long.parseLong(parts[0]); double lat = Double.parseDouble(parts[1]); double lon = Double.parseDouble(parts[2]); Document doc = new Document(); if (useGeoPoint) { doc.add(new GeoPointField("point", lat, lon, Field.Store.NO)); } else if (useGeo3D || useGeo3DLarge) { doc.add(new Geo3DPoint("point", lat, lon)); } else { doc.add(new LatLonPoint("point", lat, lon)); if (doDistanceSort) { doc.add(new LatLonDocValuesField("point", lat, lon)); } } w.addDocument(doc); long x = totalCount.incrementAndGet(); if (x % 1000000 == 0) { System.out.println(x + "..."); } } chunkCount++; if (false && SMALL == false && chunkCount == 20000) { System.out.println("NOW BREAK EARLY"); break; } } catch (IOException ioe) { throw new RuntimeException(ioe); } } } }; threads[t].start(); } for (Thread thread : threads) { thread.join(); } System.out.println("Part " + part + " is done: w.maxDoc()=" + w.maxDoc()); w.commit(); System.out.println("done commit"); long t1 = System.nanoTime(); System.out.println(((t1 - t0) / 1000000000.0) + " sec to index part " + part); if (doForceMerge) { w.forceMerge(1); long t2 = System.nanoTime(); System.out.println(((t2 - t1) / 1000000000.0) + " sec to force merge part " + part); } w.close(); } //System.out.println(totalCount.get() + " total docs"); //System.out.println("Force merge..."); //w.forceMerge(1); //long t2 = System.nanoTime(); //System.out.println(((t2-t1)/1000000000.0) + " sec to force merge"); //w.close(); //long t3 = System.nanoTime(); //System.out.println(((t3-t2)/1000000000.0) + " sec to close"); //System.out.println(((t3-t2)/1000000000.0) + " sec to close"); }
From source file:perf.PKLookupPerfTest3X.java
License:Apache License
private static void createIndex(final Directory dir, final int docCount) throws IOException { System.out.println("Create index... " + docCount + " docs"); final IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_35, new WhitespaceAnalyzer(Version.LUCENE_35)); iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE); // 5 segs per level in 3 levels: int mbd = docCount / (5 * 111); iwc.setMaxBufferedDocs(mbd);/* www . j ava2 s .c om*/ iwc.setRAMBufferSizeMB(-1.0); ((TieredMergePolicy) iwc.getMergePolicy()).setUseCompoundFile(false); final IndexWriter w = new IndexWriter(dir, iwc); //w.setInfoStream(System.out); final Document doc = new Document(); final Field field = new Field("id", "", Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS); field.setIndexOptions(FieldInfo.IndexOptions.DOCS_ONLY); doc.add(field); for (int i = 0; i < docCount; i++) { field.setValue(String.format("%09d", i)); w.addDocument(doc); if ((i + 1) % 1000000 == 0) { System.out.println((i + 1) + "..."); } } w.waitForMerges(); w.close(); }
From source file:perf.SearchPerfTest.java
License:Apache License
private static void _main(String[] clArgs) throws Exception { // args: dirImpl indexPath numThread numIterPerThread // eg java SearchPerfTest /path/to/index 4 100 final Args args = new Args(clArgs); Directory dir0;/*from ww w .j av a 2 s .co m*/ final String dirPath = args.getString("-indexPath") + "/index"; final String dirImpl = args.getString("-dirImpl"); OpenDirectory od = OpenDirectory.get(dirImpl); /* } else if (dirImpl.equals("NativePosixMMapDirectory")) { dir0 = new NativePosixMMapDirectory(new File(dirPath)); ramDir = null; if (doFacets) { facetsDir = new NativePosixMMapDirectory(new File(facetsDirPath)); } } else if (dirImpl.equals("CachingDirWrapper")) { dir0 = new CachingRAMDirectory(new MMapDirectory(new File(dirPath))); ramDir = null; } else if (dirImpl.equals("RAMExceptDirectPostingsDirectory")) { // Load only non-postings files into RAMDir (assumes // Lucene40PF is the wrapped PF): Set<String> postingsExtensions = new HashSet<String>(); postingsExtensions.add("frq"); postingsExtensions.add("prx"); postingsExtensions.add("tip"); postingsExtensions.add("tim"); ramDir = new RAMDirectory(); Directory fsDir = new MMapDirectory(new File(dirPath)); for (String file : fsDir.listAll()) { int idx = file.indexOf('.'); if (idx != -1 && postingsExtensions.contains(file.substring(idx+1, file.length()))) { continue; } fsDir.copy(ramDir, file, file, IOContext.READ); } dir0 = new FileSwitchDirectory(postingsExtensions, fsDir, ramDir, true); if (doFacets) { facetsDir = new RAMDirectory(new SimpleFSDirectory(new File(facetsDirPath)), IOContext.READ); } */ final RAMDirectory ramDir; dir0 = od.open(Paths.get(dirPath)); if (dir0 instanceof RAMDirectory) { ramDir = (RAMDirectory) dir0; } else { ramDir = null; } // TODO: NativeUnixDir? final String analyzer = args.getString("-analyzer"); final String tasksFile = args.getString("-taskSource"); final int searchThreadCount = args.getInt("-searchThreadCount"); final String fieldName = args.getString("-field"); final boolean printHeap = args.getFlag("-printHeap"); final boolean doPKLookup = args.getFlag("-pk"); final int topN = args.getInt("-topN"); final boolean doStoredLoads = args.getFlag("-loadStoredFields"); // Used to choose which random subset of tasks we will // run, to generate the PKLookup tasks, and to generate // any random pct filters: final long staticRandomSeed = args.getLong("-staticSeed"); // Used to shuffle the random subset of tasks: final long randomSeed = args.getLong("-seed"); // TODO: this could be way better. final String similarity = args.getString("-similarity"); // now reflect final Class<? extends Similarity> simClazz = Class .forName("org.apache.lucene.search.similarities." + similarity).asSubclass(Similarity.class); final Similarity sim = simClazz.newInstance(); System.out.println("Using dir impl " + dir0.getClass().getName()); System.out.println("Analyzer " + analyzer); System.out.println("Similarity " + similarity); System.out.println("Search thread count " + searchThreadCount); System.out.println("topN " + topN); System.out.println("JVM " + (Constants.JRE_IS_64BIT ? "is" : "is not") + " 64bit"); System.out.println("Pointer is " + RamUsageEstimator.NUM_BYTES_OBJECT_REF + " bytes"); final Analyzer a; if (analyzer.equals("EnglishAnalyzer")) { a = new EnglishAnalyzer(); } else if (analyzer.equals("ClassicAnalyzer")) { a = new ClassicAnalyzer(); } else if (analyzer.equals("StandardAnalyzer")) { a = new StandardAnalyzer(); } else if (analyzer.equals("StandardAnalyzerNoStopWords")) { a = new StandardAnalyzer(CharArraySet.EMPTY_SET); } else if (analyzer.equals("ShingleStandardAnalyzer")) { a = new ShingleAnalyzerWrapper(new StandardAnalyzer(CharArraySet.EMPTY_SET), 2, 2, ShingleFilter.DEFAULT_TOKEN_SEPARATOR, true, true, ShingleFilter.DEFAULT_FILLER_TOKEN); } else { throw new RuntimeException("unknown analyzer " + analyzer); } final ReferenceManager<IndexSearcher> mgr; final IndexWriter writer; final Directory dir; final String commit = args.getString("-commit"); final String hiliteImpl = args.getString("-hiliteImpl"); final String logFile = args.getString("-log"); final long tSearcherStart = System.currentTimeMillis(); final boolean verifyCheckSum = !args.getFlag("-skipVerifyChecksum"); final boolean recacheFilterDeletes = args.getFlag("-recacheFilterDeletes"); if (recacheFilterDeletes) { throw new UnsupportedOperationException("recacheFilterDeletes was deprecated"); } if (args.getFlag("-nrt")) { // TODO: get taxoReader working here too // TODO: factor out & share this CL processing w/ Indexer final int indexThreadCount = args.getInt("-indexThreadCount"); final String lineDocsFile = args.getString("-lineDocsFile"); final float docsPerSecPerThread = args.getFloat("-docsPerSecPerThread"); final float reopenEverySec = args.getFloat("-reopenEverySec"); final boolean storeBody = args.getFlag("-store"); final boolean tvsBody = args.getFlag("-tvs"); final boolean useCFS = args.getFlag("-cfs"); final String defaultPostingsFormat = args.getString("-postingsFormat"); final String idFieldPostingsFormat = args.getString("-idFieldPostingsFormat"); final boolean verbose = args.getFlag("-verbose"); final boolean cloneDocs = args.getFlag("-cloneDocs"); final Mode mode = Mode.valueOf(args.getString("-mode", "update").toUpperCase(Locale.ROOT)); final long reopenEveryMS = (long) (1000 * reopenEverySec); if (verbose) { InfoStream.setDefault(new PrintStreamInfoStream(System.out)); } if (!dirImpl.equals("RAMDirectory") && !dirImpl.equals("RAMExceptDirectPostingsDirectory")) { System.out.println("Wrap NRTCachingDirectory"); dir0 = new NRTCachingDirectory(dir0, 20, 400.0); } dir = dir0; final IndexWriterConfig iwc = new IndexWriterConfig(a); iwc.setOpenMode(IndexWriterConfig.OpenMode.APPEND); iwc.setRAMBufferSizeMB(256.0); iwc.setIndexDeletionPolicy(NoDeletionPolicy.INSTANCE); // TODO: also RAMDirExceptDirect...? need to // ... block deletes against wrapped FSDir? if (dirImpl.equals("RAMDirectory")) { // Let IW remove files only referenced by starting commit: iwc.setIndexDeletionPolicy(new KeepNoCommitsDeletionPolicy()); } if (commit != null && commit.length() > 0) { System.out.println("Opening writer on commit=" + commit); iwc.setIndexCommit(PerfUtils.findCommitPoint(commit, dir)); } ((TieredMergePolicy) iwc.getMergePolicy()).setNoCFSRatio(useCFS ? 1.0 : 0.0); //((TieredMergePolicy) iwc.getMergePolicy()).setMaxMergedSegmentMB(1024); //((TieredMergePolicy) iwc.getMergePolicy()).setReclaimDeletesWeight(3.0); //((TieredMergePolicy) iwc.getMergePolicy()).setMaxMergeAtOnce(4); final Codec codec = new Lucene62Codec() { @Override public PostingsFormat getPostingsFormatForField(String field) { return PostingsFormat .forName(field.equals("id") ? idFieldPostingsFormat : defaultPostingsFormat); } }; iwc.setCodec(codec); final ConcurrentMergeScheduler cms = (ConcurrentMergeScheduler) iwc.getMergeScheduler(); // Only let one merge run at a time... // ... but queue up up to 4, before index thread is stalled: cms.setMaxMergesAndThreads(4, 1); iwc.setMergedSegmentWarmer(new IndexWriter.IndexReaderWarmer() { @Override public void warm(LeafReader reader) throws IOException { final long t0 = System.currentTimeMillis(); //System.out.println("DO WARM: " + reader); IndexSearcher s = new IndexSearcher(reader); s.setQueryCache(null); // don't bench the cache s.search(new TermQuery(new Term(fieldName, "united")), 10); final long t1 = System.currentTimeMillis(); System.out.println("warm segment=" + reader + " numDocs=" + reader.numDocs() + ": took " + (t1 - t0) + " msec"); } }); writer = new IndexWriter(dir, iwc); System.out.println("Initial writer.maxDoc()=" + writer.maxDoc()); // TODO: add -nrtBodyPostingsOffsets instead of // hardwired false: boolean addDVFields = mode == Mode.BDV_UPDATE || mode == Mode.NDV_UPDATE; LineFileDocs lineFileDocs = new LineFileDocs(lineDocsFile, false, storeBody, tvsBody, false, cloneDocs, null, null, null, addDVFields); IndexThreads threads = new IndexThreads(new Random(17), writer, new AtomicBoolean(false), lineFileDocs, indexThreadCount, -1, false, false, mode, docsPerSecPerThread, null, -1.0, -1); threads.start(); mgr = new SearcherManager(writer, new SearcherFactory() { @Override public IndexSearcher newSearcher(IndexReader reader, IndexReader previous) { IndexSearcher s = new IndexSearcher(reader); s.setQueryCache(null); // don't bench the cache s.setSimilarity(sim); return s; } }); System.out.println("reopen every " + reopenEverySec); Thread reopenThread = new Thread() { @Override public void run() { try { final long startMS = System.currentTimeMillis(); int reopenCount = 1; while (true) { final long sleepMS = startMS + (reopenCount * reopenEveryMS) - System.currentTimeMillis(); if (sleepMS < 0) { System.out.println("WARNING: reopen fell behind by " + Math.abs(sleepMS) + " ms"); } else { Thread.sleep(sleepMS); } Thread.sleep(sleepMS); mgr.maybeRefresh(); reopenCount++; IndexSearcher s = mgr.acquire(); try { if (ramDir != null) { System.out.println(String.format(Locale.ENGLISH, "%.1fs: index: %d bytes in RAMDir; writer.maxDoc()=%d; searcher.maxDoc()=%d; searcher.numDocs()=%d", (System.currentTimeMillis() - startMS) / 1000.0, ramDir.ramBytesUsed(), writer.maxDoc(), s.getIndexReader().maxDoc(), s.getIndexReader().numDocs())); //String[] l = ramDir.listAll(); //Arrays.sort(l); //for(String f : l) { //System.out.println(" " + f + ": " + ramDir.fileLength(f)); //} } else { System.out.println(String.format(Locale.ENGLISH, "%.1fs: done reopen; writer.maxDoc()=%d; searcher.maxDoc()=%d; searcher.numDocs()=%d", (System.currentTimeMillis() - startMS) / 1000.0, writer.maxDoc(), s.getIndexReader().maxDoc(), s.getIndexReader().numDocs())); } } finally { mgr.release(s); } } } catch (Exception e) { throw new RuntimeException(e); } } }; reopenThread.setName("ReopenThread"); reopenThread.setPriority(4 + Thread.currentThread().getPriority()); reopenThread.start(); } else { dir = dir0; writer = null; final DirectoryReader reader; if (commit != null && commit.length() > 0) { System.out.println("Opening searcher on commit=" + commit); reader = DirectoryReader.open(PerfUtils.findCommitPoint(commit, dir)); } else { // open last commit reader = DirectoryReader.open(dir); } IndexSearcher s = new IndexSearcher(reader); s.setQueryCache(null); // don't bench the cache s.setSimilarity(sim); System.out.println("maxDoc=" + reader.maxDoc() + " numDocs=" + reader.numDocs() + " %tg deletes=" + (100. * reader.maxDoc() / reader.numDocs())); mgr = new SingleIndexSearcher(s); } System.out.println((System.currentTimeMillis() - tSearcherStart) + " msec to init searcher/NRT"); { IndexSearcher s = mgr.acquire(); try { System.out.println("Searcher: numDocs=" + s.getIndexReader().numDocs() + " maxDoc=" + s.getIndexReader().maxDoc() + ": " + s); } finally { mgr.release(s); } } //System.out.println("searcher=" + searcher); FacetsConfig facetsConfig = new FacetsConfig(); facetsConfig.setHierarchical("Date", true); TaxonomyReader taxoReader; Path taxoPath = Paths.get(args.getString("-indexPath"), "facets"); Directory taxoDir = od.open(taxoPath); if (DirectoryReader.indexExists(taxoDir)) { taxoReader = new DirectoryTaxonomyReader(taxoDir); System.out.println("Taxonomy has " + taxoReader.getSize() + " ords"); } else { taxoReader = null; } final Random staticRandom = new Random(staticRandomSeed); final Random random = new Random(randomSeed); final DirectSpellChecker spellChecker = new DirectSpellChecker(); final IndexState indexState = new IndexState(mgr, taxoReader, fieldName, spellChecker, hiliteImpl, facetsConfig); final QueryParser queryParser = new QueryParser("body", a); TaskParser taskParser = new TaskParser(indexState, queryParser, fieldName, topN, staticRandom, doStoredLoads); final TaskSource tasks; if (tasksFile.startsWith("server:")) { int idx = tasksFile.indexOf(':', 8); if (idx == -1) { throw new RuntimeException( "server is missing the port; should be server:interface:port (got: " + tasksFile + ")"); } String iface = tasksFile.substring(7, idx); int port = Integer.valueOf(tasksFile.substring(1 + idx)); RemoteTaskSource remoteTasks = new RemoteTaskSource(iface, port, searchThreadCount, taskParser); // nocommit must stop thread? tasks = remoteTasks; } else { // Load the tasks from a file: final int taskRepeatCount = args.getInt("-taskRepeatCount"); final int numTaskPerCat = args.getInt("-tasksPerCat"); tasks = new LocalTaskSource(indexState, taskParser, tasksFile, staticRandom, random, numTaskPerCat, taskRepeatCount, doPKLookup); System.out.println("Task repeat count " + taskRepeatCount); System.out.println("Tasks file " + tasksFile); System.out.println("Num task per cat " + numTaskPerCat); } args.check(); // Evil respeller: //spellChecker.setMinPrefix(0); //spellChecker.setMaxInspections(1024); final TaskThreads taskThreads = new TaskThreads(tasks, indexState, searchThreadCount); Thread.sleep(10); final long startNanos = System.nanoTime(); taskThreads.start(); taskThreads.finish(); final long endNanos = System.nanoTime(); System.out.println("\n" + ((endNanos - startNanos) / 1000000.0) + " msec total"); final List<Task> allTasks = tasks.getAllTasks(); PrintStream out = new PrintStream(logFile); if (allTasks != null) { // Tasks were local: verify checksums: // indexState.setDocIDToID(); final Map<Task, Task> tasksSeen = new HashMap<Task, Task>(); out.println("\nResults for " + allTasks.size() + " tasks:"); boolean fail = false; for (final Task task : allTasks) { if (verifyCheckSum) { final Task other = tasksSeen.get(task); if (other != null) { if (task.checksum() != other.checksum()) { System.out.println("\nTASK:"); task.printResults(System.out, indexState); System.out.println("\nOTHER TASK:"); other.printResults(System.out, indexState); fail = true; //throw new RuntimeException("task " + task + " hit different checksums: " + task.checksum() + " vs " + other.checksum() + " other=" + other); } } else { tasksSeen.put(task, task); } } out.println("\nTASK: " + task); out.println(" " + (task.runTimeNanos / 1000000.0) + " msec"); out.println(" thread " + task.threadID); task.printResults(out, indexState); } if (fail) { throw new RuntimeException("some tasks got different results across different threads"); } allTasks.clear(); } mgr.close(); if (taxoReader != null) { taxoReader.close(); } if (writer != null) { // Don't actually commit any index changes: writer.rollback(); } dir.close(); if (printHeap) { // Try to get RAM usage -- some ideas poached from http://www.javaworld.com/javaworld/javatips/jw-javatip130.html final Runtime runtime = Runtime.getRuntime(); long usedMem1 = PerfUtils.usedMemory(runtime); long usedMem2 = Long.MAX_VALUE; for (int iter = 0; iter < 10; iter++) { runtime.runFinalization(); runtime.gc(); Thread.yield(); Thread.sleep(100); usedMem2 = usedMem1; usedMem1 = PerfUtils.usedMemory(runtime); } out.println("\nHEAP: " + PerfUtils.usedMemory(runtime)); } out.close(); }
From source file:perf.TermsQueryPerf.java
License:Apache License
public static void main(String[] args) throws Exception { List<BytesRef> lookupIDs = new ArrayList<>(); Random random = new Random(17); double rate = 1.01 * ((double) NUM_QUERIES * ID_SEARCH_COUNT) / ID_INDEX_COUNT; Path indexPath = Paths.get(args[0]); boolean doIndex = Files.exists(indexPath) == false; Directory dir = FSDirectory.open(indexPath); if (doIndex) { IndexWriterConfig iwc = new IndexWriterConfig(new WhitespaceAnalyzer()); iwc.setMergeScheduler(new SerialMergeScheduler()); iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE); // So I can walk the files and get the *.tip sizes: iwc.setUseCompoundFile(false);/*from w ww .j ava 2 s . c o m*/ /// 7/7/7 segment structure: iwc.setMaxBufferedDocs(ID_INDEX_COUNT / 777); iwc.setRAMBufferSizeMB(-1); ((TieredMergePolicy) iwc.getMergePolicy()).setFloorSegmentMB(.001); ((TieredMergePolicy) iwc.getMergePolicy()).setNoCFSRatio(0.0); IndexWriter w = new IndexWriter(dir, iwc); // IDIterator ids = zeroPadSequentialIDs(10); IDIterator ids = randomIDs(10, random); BytesRef idValue = new BytesRef(64); for (int i = 0; i < ID_INDEX_COUNT; i++) { ids.next(idValue); Document doc = new Document(); doc.add(new StringField("id", idValue, Field.Store.NO)); w.addDocument(doc); if (random.nextDouble() <= rate && lookupIDs.size() < NUM_QUERIES * ID_SEARCH_COUNT) { lookupIDs.add(BytesRef.deepCopyOf(idValue)); } if (i % 100000 == 0) { System.out.println(i + " docs..."); } } w.close(); } IndexReader r = DirectoryReader.open(dir); if (doIndex == false) { System.out.println("Build lookup ids"); TermsEnum termsEnum = MultiFields.getTerms(r, "id").iterator(); BytesRef idValue; while ((idValue = termsEnum.next()) != null) { if (random.nextDouble() <= rate && lookupIDs.size() < NUM_QUERIES * ID_SEARCH_COUNT) { lookupIDs.add(BytesRef.deepCopyOf(idValue)); //System.out.println("add: " + idValue); } } shuffle(random, lookupIDs); System.out.println("Done build lookup ids"); } IndexSearcher s = new IndexSearcher(r); if (lookupIDs.size() < NUM_QUERIES * ID_SEARCH_COUNT) { throw new RuntimeException( "didn't get enough lookup ids: " + (NUM_QUERIES * ID_SEARCH_COUNT) + " vs " + lookupIDs.size()); } List<Query> queries = new ArrayList<Query>(); for (int i = 0; i < NUM_QUERIES; i++) { List<BytesRef> sortedTermBytes = new ArrayList<>(); for (BytesRef term : lookupIDs.subList(i * ID_SEARCH_COUNT, (i + 1) * ID_SEARCH_COUNT)) { sortedTermBytes.add(term); } Collections.sort(sortedTermBytes); // nocommit only do this if term count is high enough? // nocommit: we can be more efficient here, go straight to binary: Query query = new AutomatonQuery(new Term("id", "manyterms"), Automata.makeStringUnion(sortedTermBytes)); //((MultiTermQuery) query).setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_REWRITE); //Query query = new TermsQuery("id", lookupIDs.subList(i*ID_SEARCH_COUNT, (i+1)*ID_SEARCH_COUNT)); queries.add(query); } // TODO: also include construction time of queries long best = Long.MAX_VALUE; for (int iter = 0; iter < 100; iter++) { long t0 = System.nanoTime(); long totCount = 0; for (int i = 0; i < NUM_QUERIES; i++) { //Query query = new TermsQuery("id", lookupIDs.subList(i*ID_SEARCH_COUNT, (i+1)*ID_SEARCH_COUNT)); Query query = queries.get(i); totCount += s.search(query, 10).totalHits; } if (totCount != NUM_QUERIES * ID_SEARCH_COUNT) { throw new RuntimeException( "totCount=" + totCount + " but expected " + (NUM_QUERIES * ID_SEARCH_COUNT)); } long t = System.nanoTime() - t0; System.out.println("ITER: " + iter + ": " + (t / 1000000.) + " msec"); if (t < best) { System.out.println(" **"); best = t; } } IOUtils.close(r, dir); }