List of usage examples for org.apache.lucene.analysis CharArraySet EMPTY_SET
CharArraySet EMPTY_SET
To view the source code for org.apache.lucene.analysis CharArraySet EMPTY_SET.
Click Source Link
From source file:perf.NRTPerfTest.java
License:Apache License
public static void main(String[] args) throws Exception { final String dirImpl = args[0]; final String dirPath = args[1]; final String commit = args[2]; final String lineDocFile = args[3]; final long seed = Long.parseLong(args[4]); final double docsPerSec = Double.parseDouble(args[5]); final double runTimeSec = Double.parseDouble(args[6]); final int numSearchThreads = Integer.parseInt(args[7]); int numIndexThreads = Integer.parseInt(args[8]); if (numIndexThreads > docsPerSec) { System.out.println("INFO: numIndexThreads higher than docsPerSec, adjusting numIndexThreads"); numIndexThreads = (int) Math.max(1, docsPerSec); }/*w ww. j a va2s . c o m*/ final double reopenPerSec = Double.parseDouble(args[9]); final Mode mode = Mode.valueOf(args[10].toUpperCase(Locale.ROOT)); statsEverySec = Integer.parseInt(args[11]); final boolean doCommit = args[12].equals("yes"); final double mergeMaxWriteMBPerSec = Double.parseDouble(args[13]); if (mergeMaxWriteMBPerSec != 0.0) { throw new IllegalArgumentException("mergeMaxWriteMBPerSec must be 0.0 until LUCENE-3202 is done"); } final String tasksFile = args[14]; if (Files.notExists(Paths.get(tasksFile))) { throw new FileNotFoundException("tasks file not found " + tasksFile); } final boolean hasProcMemInfo = Files.exists(Paths.get("/proc/meminfo")); System.out.println("DIR=" + dirImpl); System.out.println("Index=" + dirPath); System.out.println("Commit=" + commit); System.out.println("LineDocs=" + lineDocFile); System.out.println("Docs/sec=" + docsPerSec); System.out.println("Run time sec=" + runTimeSec); System.out.println("NumSearchThreads=" + numSearchThreads); System.out.println("NumIndexThreads=" + numIndexThreads); System.out.println("Reopen/sec=" + reopenPerSec); System.out.println("Mode=" + mode); System.out.println("tasksFile=" + tasksFile); System.out.println("Record stats every " + statsEverySec + " seconds"); final int count = (int) ((runTimeSec / statsEverySec) + 2); docsIndexedByTime = new AtomicInteger[count]; searchesByTime = new AtomicInteger[count]; totalUpdateTimeByTime = new AtomicLong[count]; final AtomicInteger reopensByTime[] = new AtomicInteger[count]; for (int i = 0; i < count; i++) { docsIndexedByTime[i] = new AtomicInteger(); searchesByTime[i] = new AtomicInteger(); totalUpdateTimeByTime[i] = new AtomicLong(); reopensByTime[i] = new AtomicInteger(); } System.out.println( "Max merge MB/sec = " + (mergeMaxWriteMBPerSec <= 0.0 ? "unlimited" : mergeMaxWriteMBPerSec)); final Random random = new Random(seed); final LineFileDocs docs = new LineFileDocs(lineDocFile, true, false, false, false, false, null, new HashSet<String>(), null, true); final Directory dir0; if (dirImpl.equals("MMapDirectory")) { dir0 = new MMapDirectory(Paths.get(dirPath)); } else if (dirImpl.equals("NIOFSDirectory")) { dir0 = new NIOFSDirectory(Paths.get(dirPath)); } else if (dirImpl.equals("SimpleFSDirectory")) { dir0 = new SimpleFSDirectory(Paths.get(dirPath)); } else { docs.close(); throw new RuntimeException("unknown directory impl \"" + dirImpl + "\""); } //final NRTCachingDirectory dir = new NRTCachingDirectory(dir0, 10, 200.0, mergeMaxWriteMBPerSec); final NRTCachingDirectory dir = new NRTCachingDirectory(dir0, 20, 400.0); //final MergeScheduler ms = dir.getMergeScheduler(); //final Directory dir = dir0; //final MergeScheduler ms = new ConcurrentMergeScheduler(); final String field = "body"; // Open an IW on the requested commit point, but, don't // delete other (past or future) commit points: // TODO take Analyzer as parameter StandardAnalyzer analyzer = new StandardAnalyzer(CharArraySet.EMPTY_SET); final IndexWriterConfig conf = new IndexWriterConfig(analyzer); conf.setIndexDeletionPolicy(NoDeletionPolicy.INSTANCE); conf.setRAMBufferSizeMB(256.0); //iwc.setMergeScheduler(ms); final Codec codec = new Lucene62Codec() { @Override public PostingsFormat getPostingsFormatForField(String field) { if (field.equals("id")) { return PostingsFormat.forName("Memory"); } else { return PostingsFormat.forName("Lucene50"); } } private final DocValuesFormat direct = DocValuesFormat.forName("Direct"); @Override public DocValuesFormat getDocValuesFormatForField(String field) { return direct; } }; conf.setCodec(codec); /* iwc.setMergePolicy(new LogByteSizeMergePolicy()); ((LogMergePolicy) iwc.getMergePolicy()).setUseCompoundFile(false); ((LogMergePolicy) iwc.getMergePolicy()).setMergeFactor(30); ((LogByteSizeMergePolicy) iwc.getMergePolicy()).setMaxMergeMB(10000.0); System.out.println("USING LOG BS MP"); */ TieredMergePolicy tmp = new TieredMergePolicy(); tmp.setNoCFSRatio(0.0); tmp.setMaxMergedSegmentMB(1000000.0); //tmp.setReclaimDeletesWeight(3.0); //tmp.setMaxMergedSegmentMB(7000.0); conf.setMergePolicy(tmp); if (!commit.equals("none")) { conf.setIndexCommit(PerfUtils.findCommitPoint(commit, dir)); } // Make sure merges run @ higher prio than indexing: final ConcurrentMergeScheduler cms = (ConcurrentMergeScheduler) conf.getMergeScheduler(); cms.setMaxMergesAndThreads(4, 1); conf.setMergedSegmentWarmer(new MergedReaderWarmer(field)); final IndexWriter w = new IndexWriter(dir, conf); // w.setInfoStream(System.out); IndexThreads.UpdatesListener updatesListener = new IndexThreads.UpdatesListener() { long startTimeNS; @Override public void beforeUpdate() { startTimeNS = System.nanoTime(); } @Override public void afterUpdate() { int idx = currentQT.get(); totalUpdateTimeByTime[idx].addAndGet(System.nanoTime() - startTimeNS); docsIndexedByTime[idx].incrementAndGet(); } }; IndexThreads indexThreads = new IndexThreads(random, w, new AtomicBoolean(false), docs, numIndexThreads, -1, false, false, mode, (float) (docsPerSec / numIndexThreads), updatesListener, -1.0, w.maxDoc()); // NativePosixUtil.mlockTermsDict(startR, "id"); final SearcherManager manager = new SearcherManager(w, null); IndexSearcher s = manager.acquire(); try { System.out.println("Reader=" + s.getIndexReader()); } finally { manager.release(s); } final DirectSpellChecker spellChecker = new DirectSpellChecker(); final IndexState indexState = new IndexState(manager, null, field, spellChecker, "PostingsHighlighter", null); final QueryParser qp = new QueryParser(field, analyzer); TaskParser taskParser = new TaskParser(indexState, qp, field, 10, random, true); final TaskSource tasks = new RandomTaskSource(taskParser, tasksFile, random) { @Override public void taskDone(Task task, long queueTimeNS, int toalHitCount) { searchesByTime[currentQT.get()].incrementAndGet(); } }; System.out.println("Task repeat count 1"); System.out.println("Tasks file " + tasksFile); System.out.println("Num task per cat 20"); final TaskThreads taskThreads = new TaskThreads(tasks, indexState, numSearchThreads); final ReopenThread reopenThread = new ReopenThread(reopenPerSec, manager, reopensByTime, runTimeSec); reopenThread.setName("ReopenThread"); reopenThread.setPriority(4 + Thread.currentThread().getPriority()); System.out.println("REOPEN PRI " + reopenThread.getPriority()); indexThreads.start(); reopenThread.start(); taskThreads.start(); Thread.currentThread().setPriority(5 + Thread.currentThread().getPriority()); System.out.println("TIMER PRI " + Thread.currentThread().getPriority()); //System.out.println("Start: " + new Date()); final long startMS = System.currentTimeMillis(); final long stopMS = startMS + (long) (runTimeSec * 1000); int lastQT = -1; while (true) { final long t = System.currentTimeMillis(); if (t >= stopMS) { break; } final int qt = (int) ((t - startMS) / statsEverySec / 1000); currentQT.set(qt); if (qt != lastQT) { final int prevQT = lastQT; lastQT = qt; if (prevQT > 0) { final String other; if (hasProcMemInfo) { other = " D=" + getLinuxDirtyBytes(); } else { other = ""; } int prev = prevQT - 1; System.out.println(String.format("QT %d searches=%d docs=%d reopens=%s totUpdateTime=%d", prev, searchesByTime[prev].get(), docsIndexedByTime[prev].get(), reopensByTime[prev].get() + other, TimeUnit.NANOSECONDS.toMillis(totalUpdateTimeByTime[prev].get()))); } } Thread.sleep(25); } taskThreads.stop(); reopenThread.join(); indexThreads.stop(); System.out.println("By time:"); for (int i = 0; i < searchesByTime.length - 2; i++) { System.out.println(String.format(" %d searches=%d docs=%d reopens=%d totUpdateTime=%d", i * statsEverySec, searchesByTime[i].get(), docsIndexedByTime[i].get(), reopensByTime[i].get(), TimeUnit.NANOSECONDS.toMillis(totalUpdateTimeByTime[i].get()))); } manager.close(); if (doCommit) { w.close(); } else { w.rollback(); } }
From source file:perf.SearchPerfTest.java
License:Apache License
private static void _main(String[] clArgs) throws Exception { // args: dirImpl indexPath numThread numIterPerThread // eg java SearchPerfTest /path/to/index 4 100 final Args args = new Args(clArgs); Directory dir0;//ww w.j a va 2s . c o m final String dirPath = args.getString("-indexPath") + "/index"; final String dirImpl = args.getString("-dirImpl"); OpenDirectory od = OpenDirectory.get(dirImpl); /* } else if (dirImpl.equals("NativePosixMMapDirectory")) { dir0 = new NativePosixMMapDirectory(new File(dirPath)); ramDir = null; if (doFacets) { facetsDir = new NativePosixMMapDirectory(new File(facetsDirPath)); } } else if (dirImpl.equals("CachingDirWrapper")) { dir0 = new CachingRAMDirectory(new MMapDirectory(new File(dirPath))); ramDir = null; } else if (dirImpl.equals("RAMExceptDirectPostingsDirectory")) { // Load only non-postings files into RAMDir (assumes // Lucene40PF is the wrapped PF): Set<String> postingsExtensions = new HashSet<String>(); postingsExtensions.add("frq"); postingsExtensions.add("prx"); postingsExtensions.add("tip"); postingsExtensions.add("tim"); ramDir = new RAMDirectory(); Directory fsDir = new MMapDirectory(new File(dirPath)); for (String file : fsDir.listAll()) { int idx = file.indexOf('.'); if (idx != -1 && postingsExtensions.contains(file.substring(idx+1, file.length()))) { continue; } fsDir.copy(ramDir, file, file, IOContext.READ); } dir0 = new FileSwitchDirectory(postingsExtensions, fsDir, ramDir, true); if (doFacets) { facetsDir = new RAMDirectory(new SimpleFSDirectory(new File(facetsDirPath)), IOContext.READ); } */ final RAMDirectory ramDir; dir0 = od.open(Paths.get(dirPath)); if (dir0 instanceof RAMDirectory) { ramDir = (RAMDirectory) dir0; } else { ramDir = null; } // TODO: NativeUnixDir? final String analyzer = args.getString("-analyzer"); final String tasksFile = args.getString("-taskSource"); final int searchThreadCount = args.getInt("-searchThreadCount"); final String fieldName = args.getString("-field"); final boolean printHeap = args.getFlag("-printHeap"); final boolean doPKLookup = args.getFlag("-pk"); final int topN = args.getInt("-topN"); final boolean doStoredLoads = args.getFlag("-loadStoredFields"); // Used to choose which random subset of tasks we will // run, to generate the PKLookup tasks, and to generate // any random pct filters: final long staticRandomSeed = args.getLong("-staticSeed"); // Used to shuffle the random subset of tasks: final long randomSeed = args.getLong("-seed"); // TODO: this could be way better. final String similarity = args.getString("-similarity"); // now reflect final Class<? extends Similarity> simClazz = Class .forName("org.apache.lucene.search.similarities." + similarity).asSubclass(Similarity.class); final Similarity sim = simClazz.newInstance(); System.out.println("Using dir impl " + dir0.getClass().getName()); System.out.println("Analyzer " + analyzer); System.out.println("Similarity " + similarity); System.out.println("Search thread count " + searchThreadCount); System.out.println("topN " + topN); System.out.println("JVM " + (Constants.JRE_IS_64BIT ? "is" : "is not") + " 64bit"); System.out.println("Pointer is " + RamUsageEstimator.NUM_BYTES_OBJECT_REF + " bytes"); final Analyzer a; if (analyzer.equals("EnglishAnalyzer")) { a = new EnglishAnalyzer(); } else if (analyzer.equals("ClassicAnalyzer")) { a = new ClassicAnalyzer(); } else if (analyzer.equals("StandardAnalyzer")) { a = new StandardAnalyzer(); } else if (analyzer.equals("StandardAnalyzerNoStopWords")) { a = new StandardAnalyzer(CharArraySet.EMPTY_SET); } else if (analyzer.equals("ShingleStandardAnalyzer")) { a = new ShingleAnalyzerWrapper(new StandardAnalyzer(CharArraySet.EMPTY_SET), 2, 2, ShingleFilter.DEFAULT_TOKEN_SEPARATOR, true, true, ShingleFilter.DEFAULT_FILLER_TOKEN); } else { throw new RuntimeException("unknown analyzer " + analyzer); } final ReferenceManager<IndexSearcher> mgr; final IndexWriter writer; final Directory dir; final String commit = args.getString("-commit"); final String hiliteImpl = args.getString("-hiliteImpl"); final String logFile = args.getString("-log"); final long tSearcherStart = System.currentTimeMillis(); final boolean verifyCheckSum = !args.getFlag("-skipVerifyChecksum"); final boolean recacheFilterDeletes = args.getFlag("-recacheFilterDeletes"); if (recacheFilterDeletes) { throw new UnsupportedOperationException("recacheFilterDeletes was deprecated"); } if (args.getFlag("-nrt")) { // TODO: get taxoReader working here too // TODO: factor out & share this CL processing w/ Indexer final int indexThreadCount = args.getInt("-indexThreadCount"); final String lineDocsFile = args.getString("-lineDocsFile"); final float docsPerSecPerThread = args.getFloat("-docsPerSecPerThread"); final float reopenEverySec = args.getFloat("-reopenEverySec"); final boolean storeBody = args.getFlag("-store"); final boolean tvsBody = args.getFlag("-tvs"); final boolean useCFS = args.getFlag("-cfs"); final String defaultPostingsFormat = args.getString("-postingsFormat"); final String idFieldPostingsFormat = args.getString("-idFieldPostingsFormat"); final boolean verbose = args.getFlag("-verbose"); final boolean cloneDocs = args.getFlag("-cloneDocs"); final Mode mode = Mode.valueOf(args.getString("-mode", "update").toUpperCase(Locale.ROOT)); final long reopenEveryMS = (long) (1000 * reopenEverySec); if (verbose) { InfoStream.setDefault(new PrintStreamInfoStream(System.out)); } if (!dirImpl.equals("RAMDirectory") && !dirImpl.equals("RAMExceptDirectPostingsDirectory")) { System.out.println("Wrap NRTCachingDirectory"); dir0 = new NRTCachingDirectory(dir0, 20, 400.0); } dir = dir0; final IndexWriterConfig iwc = new IndexWriterConfig(a); iwc.setOpenMode(IndexWriterConfig.OpenMode.APPEND); iwc.setRAMBufferSizeMB(256.0); iwc.setIndexDeletionPolicy(NoDeletionPolicy.INSTANCE); // TODO: also RAMDirExceptDirect...? need to // ... block deletes against wrapped FSDir? if (dirImpl.equals("RAMDirectory")) { // Let IW remove files only referenced by starting commit: iwc.setIndexDeletionPolicy(new KeepNoCommitsDeletionPolicy()); } if (commit != null && commit.length() > 0) { System.out.println("Opening writer on commit=" + commit); iwc.setIndexCommit(PerfUtils.findCommitPoint(commit, dir)); } ((TieredMergePolicy) iwc.getMergePolicy()).setNoCFSRatio(useCFS ? 1.0 : 0.0); //((TieredMergePolicy) iwc.getMergePolicy()).setMaxMergedSegmentMB(1024); //((TieredMergePolicy) iwc.getMergePolicy()).setReclaimDeletesWeight(3.0); //((TieredMergePolicy) iwc.getMergePolicy()).setMaxMergeAtOnce(4); final Codec codec = new Lucene62Codec() { @Override public PostingsFormat getPostingsFormatForField(String field) { return PostingsFormat .forName(field.equals("id") ? idFieldPostingsFormat : defaultPostingsFormat); } }; iwc.setCodec(codec); final ConcurrentMergeScheduler cms = (ConcurrentMergeScheduler) iwc.getMergeScheduler(); // Only let one merge run at a time... // ... but queue up up to 4, before index thread is stalled: cms.setMaxMergesAndThreads(4, 1); iwc.setMergedSegmentWarmer(new IndexWriter.IndexReaderWarmer() { @Override public void warm(LeafReader reader) throws IOException { final long t0 = System.currentTimeMillis(); //System.out.println("DO WARM: " + reader); IndexSearcher s = new IndexSearcher(reader); s.setQueryCache(null); // don't bench the cache s.search(new TermQuery(new Term(fieldName, "united")), 10); final long t1 = System.currentTimeMillis(); System.out.println("warm segment=" + reader + " numDocs=" + reader.numDocs() + ": took " + (t1 - t0) + " msec"); } }); writer = new IndexWriter(dir, iwc); System.out.println("Initial writer.maxDoc()=" + writer.maxDoc()); // TODO: add -nrtBodyPostingsOffsets instead of // hardwired false: boolean addDVFields = mode == Mode.BDV_UPDATE || mode == Mode.NDV_UPDATE; LineFileDocs lineFileDocs = new LineFileDocs(lineDocsFile, false, storeBody, tvsBody, false, cloneDocs, null, null, null, addDVFields); IndexThreads threads = new IndexThreads(new Random(17), writer, new AtomicBoolean(false), lineFileDocs, indexThreadCount, -1, false, false, mode, docsPerSecPerThread, null, -1.0, -1); threads.start(); mgr = new SearcherManager(writer, new SearcherFactory() { @Override public IndexSearcher newSearcher(IndexReader reader, IndexReader previous) { IndexSearcher s = new IndexSearcher(reader); s.setQueryCache(null); // don't bench the cache s.setSimilarity(sim); return s; } }); System.out.println("reopen every " + reopenEverySec); Thread reopenThread = new Thread() { @Override public void run() { try { final long startMS = System.currentTimeMillis(); int reopenCount = 1; while (true) { final long sleepMS = startMS + (reopenCount * reopenEveryMS) - System.currentTimeMillis(); if (sleepMS < 0) { System.out.println("WARNING: reopen fell behind by " + Math.abs(sleepMS) + " ms"); } else { Thread.sleep(sleepMS); } Thread.sleep(sleepMS); mgr.maybeRefresh(); reopenCount++; IndexSearcher s = mgr.acquire(); try { if (ramDir != null) { System.out.println(String.format(Locale.ENGLISH, "%.1fs: index: %d bytes in RAMDir; writer.maxDoc()=%d; searcher.maxDoc()=%d; searcher.numDocs()=%d", (System.currentTimeMillis() - startMS) / 1000.0, ramDir.ramBytesUsed(), writer.maxDoc(), s.getIndexReader().maxDoc(), s.getIndexReader().numDocs())); //String[] l = ramDir.listAll(); //Arrays.sort(l); //for(String f : l) { //System.out.println(" " + f + ": " + ramDir.fileLength(f)); //} } else { System.out.println(String.format(Locale.ENGLISH, "%.1fs: done reopen; writer.maxDoc()=%d; searcher.maxDoc()=%d; searcher.numDocs()=%d", (System.currentTimeMillis() - startMS) / 1000.0, writer.maxDoc(), s.getIndexReader().maxDoc(), s.getIndexReader().numDocs())); } } finally { mgr.release(s); } } } catch (Exception e) { throw new RuntimeException(e); } } }; reopenThread.setName("ReopenThread"); reopenThread.setPriority(4 + Thread.currentThread().getPriority()); reopenThread.start(); } else { dir = dir0; writer = null; final DirectoryReader reader; if (commit != null && commit.length() > 0) { System.out.println("Opening searcher on commit=" + commit); reader = DirectoryReader.open(PerfUtils.findCommitPoint(commit, dir)); } else { // open last commit reader = DirectoryReader.open(dir); } IndexSearcher s = new IndexSearcher(reader); s.setQueryCache(null); // don't bench the cache s.setSimilarity(sim); System.out.println("maxDoc=" + reader.maxDoc() + " numDocs=" + reader.numDocs() + " %tg deletes=" + (100. * reader.maxDoc() / reader.numDocs())); mgr = new SingleIndexSearcher(s); } System.out.println((System.currentTimeMillis() - tSearcherStart) + " msec to init searcher/NRT"); { IndexSearcher s = mgr.acquire(); try { System.out.println("Searcher: numDocs=" + s.getIndexReader().numDocs() + " maxDoc=" + s.getIndexReader().maxDoc() + ": " + s); } finally { mgr.release(s); } } //System.out.println("searcher=" + searcher); FacetsConfig facetsConfig = new FacetsConfig(); facetsConfig.setHierarchical("Date", true); TaxonomyReader taxoReader; Path taxoPath = Paths.get(args.getString("-indexPath"), "facets"); Directory taxoDir = od.open(taxoPath); if (DirectoryReader.indexExists(taxoDir)) { taxoReader = new DirectoryTaxonomyReader(taxoDir); System.out.println("Taxonomy has " + taxoReader.getSize() + " ords"); } else { taxoReader = null; } final Random staticRandom = new Random(staticRandomSeed); final Random random = new Random(randomSeed); final DirectSpellChecker spellChecker = new DirectSpellChecker(); final IndexState indexState = new IndexState(mgr, taxoReader, fieldName, spellChecker, hiliteImpl, facetsConfig); final QueryParser queryParser = new QueryParser("body", a); TaskParser taskParser = new TaskParser(indexState, queryParser, fieldName, topN, staticRandom, doStoredLoads); final TaskSource tasks; if (tasksFile.startsWith("server:")) { int idx = tasksFile.indexOf(':', 8); if (idx == -1) { throw new RuntimeException( "server is missing the port; should be server:interface:port (got: " + tasksFile + ")"); } String iface = tasksFile.substring(7, idx); int port = Integer.valueOf(tasksFile.substring(1 + idx)); RemoteTaskSource remoteTasks = new RemoteTaskSource(iface, port, searchThreadCount, taskParser); // nocommit must stop thread? tasks = remoteTasks; } else { // Load the tasks from a file: final int taskRepeatCount = args.getInt("-taskRepeatCount"); final int numTaskPerCat = args.getInt("-tasksPerCat"); tasks = new LocalTaskSource(indexState, taskParser, tasksFile, staticRandom, random, numTaskPerCat, taskRepeatCount, doPKLookup); System.out.println("Task repeat count " + taskRepeatCount); System.out.println("Tasks file " + tasksFile); System.out.println("Num task per cat " + numTaskPerCat); } args.check(); // Evil respeller: //spellChecker.setMinPrefix(0); //spellChecker.setMaxInspections(1024); final TaskThreads taskThreads = new TaskThreads(tasks, indexState, searchThreadCount); Thread.sleep(10); final long startNanos = System.nanoTime(); taskThreads.start(); taskThreads.finish(); final long endNanos = System.nanoTime(); System.out.println("\n" + ((endNanos - startNanos) / 1000000.0) + " msec total"); final List<Task> allTasks = tasks.getAllTasks(); PrintStream out = new PrintStream(logFile); if (allTasks != null) { // Tasks were local: verify checksums: // indexState.setDocIDToID(); final Map<Task, Task> tasksSeen = new HashMap<Task, Task>(); out.println("\nResults for " + allTasks.size() + " tasks:"); boolean fail = false; for (final Task task : allTasks) { if (verifyCheckSum) { final Task other = tasksSeen.get(task); if (other != null) { if (task.checksum() != other.checksum()) { System.out.println("\nTASK:"); task.printResults(System.out, indexState); System.out.println("\nOTHER TASK:"); other.printResults(System.out, indexState); fail = true; //throw new RuntimeException("task " + task + " hit different checksums: " + task.checksum() + " vs " + other.checksum() + " other=" + other); } } else { tasksSeen.put(task, task); } } out.println("\nTASK: " + task); out.println(" " + (task.runTimeNanos / 1000000.0) + " msec"); out.println(" thread " + task.threadID); task.printResults(out, indexState); } if (fail) { throw new RuntimeException("some tasks got different results across different threads"); } allTasks.clear(); } mgr.close(); if (taxoReader != null) { taxoReader.close(); } if (writer != null) { // Don't actually commit any index changes: writer.rollback(); } dir.close(); if (printHeap) { // Try to get RAM usage -- some ideas poached from http://www.javaworld.com/javaworld/javatips/jw-javatip130.html final Runtime runtime = Runtime.getRuntime(); long usedMem1 = PerfUtils.usedMemory(runtime); long usedMem2 = Long.MAX_VALUE; for (int iter = 0; iter < 10; iter++) { runtime.runFinalization(); runtime.gc(); Thread.yield(); Thread.sleep(100); usedMem2 = usedMem1; usedMem1 = PerfUtils.usedMemory(runtime); } out.println("\nHEAP: " + PerfUtils.usedMemory(runtime)); } out.close(); }
From source file:perf.TestAnalyzerPerf.java
License:Apache License
public static void main(String[] args) throws Exception { File wikiLinesFile = new File(args[0]); testAnalyzer("Standard", wikiLinesFile, new StandardAnalyzer(CharArraySet.EMPTY_SET)); testAnalyzer("LowerCase", wikiLinesFile, new LowerCaseAnalyzer()); testAnalyzer("EdgeNGrams", wikiLinesFile, new EdgeNGramsAnalyzer()); testAnalyzer("Shingles", wikiLinesFile, new ShinglesAnalyzer()); testAnalyzer("WordDelimiterFilter", wikiLinesFile, new WDFAnalyzer()); }
From source file:reaction.news.index.MyPortugueseAnalyzer.java
License:Apache License
/** * Builds an analyzer with the given stop words. * //w ww .j a v a 2s .com * @param matchVersion lucene compatibility version * @param stopwords a stopword set */ public MyPortugueseAnalyzer(Version matchVersion, Set<?> stopwords) { this(matchVersion, stopwords, CharArraySet.EMPTY_SET); }
From source file:text_analyzer.SpanishAnalyzer.java
License:Apache License
/** * Builds an analyzer with the given stop words. * //from w w w. ja v a 2 s . c o m * @param matchVersion lucene compatibility version * @param stopwords a stopword set */ public SpanishAnalyzer(Version matchVersion, Set<?> stopwords) { this(matchVersion, stopwords, CharArraySet.EMPTY_SET); }