List of usage examples for org.apache.lucene.index CheckIndex setInfoStream
public void setInfoStream(PrintStream out, boolean verbose)
From source file:perf.AutoPrefixPerf.java
License:Apache License
public static void main(String[] args) throws Exception { String numbersFile = args[0]; String queriesFile = args[1]; Path indexPath = Paths.get(args[2]); int precStep = Integer.parseInt(args[3]); boolean useNumericField = (precStep != 0); int maxTermsInPrefix; int minTermsInPrefix; if (useNumericField == false) { minTermsInPrefix = Integer.parseInt(args[4]); maxTermsInPrefix = Integer.parseInt(args[5]); } else {/*from w w w .j a v a 2 s. c o m*/ minTermsInPrefix = 0; maxTermsInPrefix = 0; } BytesRefBuilder binaryToken = new BytesRefBuilder(); binaryToken.grow(8); binaryToken.setLength(8); Directory dir = FSDirectory.open(indexPath); if (Files.notExists(indexPath) == false) { IndexWriterConfig iwc = new IndexWriterConfig(new StandardAnalyzer()); iwc.setMaxBufferedDocs(30000); iwc.setRAMBufferSizeMB(-1); iwc.setMergePolicy(new LogDocMergePolicy()); final PostingsFormat pf; if (useNumericField) { // Disable auto-prefix when testing NumericField! if (minTermsInPrefix != 0) { throw new IllegalArgumentException("only precStep or minTermsInPrefix should be non-zero"); } pf = new Lucene50PostingsFormat(25, 48, 0, 0); } else { /* if (minTermsInPrefix == 0) { throw new IllegalArgumentException("one of precStep or minTermsInPrefix must be non-zero"); } */ pf = new Lucene50PostingsFormat(25, 48, minTermsInPrefix, maxTermsInPrefix); //pf = new Lucene50PostingsFormat(25, 48, minTermsInPrefix, Integer.MAX_VALUE); } iwc.setCodec(new Lucene53Codec() { @Override public PostingsFormat getPostingsFormatForField(String field) { return pf; } }); iwc.setInfoStream(new PrintStreamInfoStream(System.out)); iwc.setMergeScheduler(new SerialMergeScheduler()); //TieredMergePolicy tmp = (TieredMergePolicy) iwc.getMergePolicy(); //tmp.setFloorSegmentMB(.1); //ConcurrentMergeScheduler cms = (ConcurrentMergeScheduler) iwc.getMergeScheduler(); // More concurrency (for SSD) //cms.setMaxMergesAndThreads(5, 3); final IndexWriter w = new IndexWriter(dir, iwc); Document doc = new Document(); Field field; if (useNumericField) { FieldType longFieldType = new FieldType(LongField.TYPE_NOT_STORED); longFieldType.setNumericPrecisionStep(precStep); longFieldType.freeze(); field = new LongField("number", 0L, longFieldType); doc.add(field); } else { FieldType longFieldType = new FieldType(TextField.TYPE_NOT_STORED); longFieldType.setIndexOptions(IndexOptions.DOCS_ONLY); longFieldType.setOmitNorms(true); longFieldType.setIndexRanges(true); longFieldType.freeze(); field = new Field("number", new BinaryTokenStream(binaryToken.get()), longFieldType); doc.add(field); } long startMS = System.currentTimeMillis(); // 64K buffer: InputStream is = new FileInputStream(numbersFile); BufferedReader reader = new BufferedReader(new InputStreamReader(is, "UTF-8"), 1 << 16); int count = 0; while (true) { String line = reader.readLine(); if (line == null) { break; } Long v = Long.parseLong(line.trim()); if (useNumericField) { field.setLongValue(v); } else { //NumericUtils.longToPrefixCoded(v, 0, binaryToken); longToBytes(v, binaryToken); //if (bytesToLong(binaryToken.get()) != v) { // throw new RuntimeException("wrong long: v=" + v + " vs " + bytesToLong(binaryToken.get())); //} } w.addDocument(doc); count++; if (count % 200000 == 0) { long ms = System.currentTimeMillis(); System.out.println("Indexed " + count + ": " + ((ms - startMS) / 1000.0) + " sec"); } } reader.close(); System.out.println( "Final Indexed " + count + ": " + ((System.currentTimeMillis() - startMS) / 1000.0) + " sec"); // nocommit just to make debugging easier: //System.out.println("Optimize..."); //w.forceMerge(1); System.out.println("Close..."); w.close(); System.out.println("After close: " + ((System.currentTimeMillis() - startMS) / 1000.0) + " sec"); // Print CheckIndex: ByteArrayOutputStream bos = new ByteArrayOutputStream(1024); CheckIndex checker = new CheckIndex(dir); checker.setInfoStream(new PrintStream(bos, false, IOUtils.UTF_8), true); CheckIndex.Status status = checker.checkIndex(); System.out.println("Done CheckIndex:"); System.out.println(bos.toString(IOUtils.UTF_8)); if (status.clean == false) { throw new IllegalStateException("CheckIndex failed"); } SegmentInfos infos = new SegmentInfos(); infos.read(dir); long totBytes = 0; for (SegmentCommitInfo info : infos) { totBytes += info.sizeInBytes(); } System.out.println("\nTotal index size: " + totBytes + " bytes"); } else { System.out.println("Skip indexing: index already exists"); } List<Query> queries = new ArrayList<>(); InputStream is = new FileInputStream(queriesFile); BufferedReader reader = new BufferedReader(new InputStreamReader(is, "UTF-8"), 1 << 16); while (true) { String line = reader.readLine(); if (line == null) { break; } String[] numbers = line.trim().split(" "); if (numbers.length != 2) { throw new IllegalArgumentException("could not parse query line: " + line); } long minValue = Long.parseLong(numbers[0]); long maxValue = Long.parseLong(numbers[1]); if (useNumericField) { queries.add(NumericRangeQuery.newLongRange("number", precStep, minValue, maxValue, true, true)); } else { longToBytes(minValue, binaryToken); BytesRef minTerm = binaryToken.toBytesRef(); longToBytes(maxValue, binaryToken); BytesRef maxTerm = binaryToken.toBytesRef(); queries.add(new TermRangeQuery("number", minTerm, maxTerm, true, true)); } if (queries.size() == 200) { break; } } DirectoryReader r = DirectoryReader.open(dir); IndexSearcher s = new IndexSearcher(r); s.setQueryCache(null); // don't bench the cache printQueryTerms((MultiTermQuery) queries.get(0), s); long bestMS = Long.MAX_VALUE; for (int iter = 0; iter < 10; iter++) { long startMS = System.currentTimeMillis(); long totalHits = 0; long hash = 0; for (Query query : queries) { TopDocs hits = s.search(query, 10); totalHits += hits.totalHits; hash = hash * 31 + hits.totalHits; } long ms = System.currentTimeMillis() - startMS; System.out.println("iter " + iter + ": " + ms + " msec; totalHits=" + totalHits + " hash=" + hash); if (ms < bestMS) { System.out.println(" **"); bestMS = ms; } } /* long t0 = System.currentTimeMillis(); long bytesUsed = 0; for(int i=0;i<1000;i++) { for(AtomicReaderContext ctx : r.leaves()) { bytesUsed += ((SegmentReader) ctx.reader()).ramBytesUsed(); } } System.out.println((System.currentTimeMillis() - t0) + " msec for 1000 ramBytesUsed: " + (bytesUsed / 1000)); */ r.close(); dir.close(); }