Example usage for org.apache.lucene.index CheckIndex setInfoStream

List of usage examples for org.apache.lucene.index CheckIndex setInfoStream

Introduction

In this page you can find the example usage for org.apache.lucene.index CheckIndex setInfoStream.

Prototype

public void setInfoStream(PrintStream out, boolean verbose) 

Source Link

Document

Set infoStream where messages should go.

Usage

From source file:perf.AutoPrefixPerf.java

License:Apache License

public static void main(String[] args) throws Exception {
    String numbersFile = args[0];
    String queriesFile = args[1];
    Path indexPath = Paths.get(args[2]);

    int precStep = Integer.parseInt(args[3]);
    boolean useNumericField = (precStep != 0);
    int maxTermsInPrefix;
    int minTermsInPrefix;
    if (useNumericField == false) {
        minTermsInPrefix = Integer.parseInt(args[4]);
        maxTermsInPrefix = Integer.parseInt(args[5]);
    } else {/*from  w  w w  .j  a v a 2 s. c o m*/
        minTermsInPrefix = 0;
        maxTermsInPrefix = 0;
    }

    BytesRefBuilder binaryToken = new BytesRefBuilder();
    binaryToken.grow(8);
    binaryToken.setLength(8);

    Directory dir = FSDirectory.open(indexPath);
    if (Files.notExists(indexPath) == false) {
        IndexWriterConfig iwc = new IndexWriterConfig(new StandardAnalyzer());
        iwc.setMaxBufferedDocs(30000);
        iwc.setRAMBufferSizeMB(-1);
        iwc.setMergePolicy(new LogDocMergePolicy());

        final PostingsFormat pf;

        if (useNumericField) {
            // Disable auto-prefix when testing NumericField!
            if (minTermsInPrefix != 0) {
                throw new IllegalArgumentException("only precStep or minTermsInPrefix should be non-zero");
            }
            pf = new Lucene50PostingsFormat(25, 48, 0, 0);
        } else {
            /*
            if (minTermsInPrefix == 0) {
              throw new IllegalArgumentException("one of precStep or minTermsInPrefix must be non-zero");
            }
            */
            pf = new Lucene50PostingsFormat(25, 48, minTermsInPrefix, maxTermsInPrefix);
            //pf = new Lucene50PostingsFormat(25, 48, minTermsInPrefix, Integer.MAX_VALUE);
        }

        iwc.setCodec(new Lucene53Codec() {
            @Override
            public PostingsFormat getPostingsFormatForField(String field) {
                return pf;
            }
        });

        iwc.setInfoStream(new PrintStreamInfoStream(System.out));
        iwc.setMergeScheduler(new SerialMergeScheduler());

        //TieredMergePolicy tmp = (TieredMergePolicy) iwc.getMergePolicy();
        //tmp.setFloorSegmentMB(.1);
        //ConcurrentMergeScheduler cms = (ConcurrentMergeScheduler) iwc.getMergeScheduler();
        // More concurrency (for SSD)
        //cms.setMaxMergesAndThreads(5, 3);
        final IndexWriter w = new IndexWriter(dir, iwc);

        Document doc = new Document();
        Field field;
        if (useNumericField) {
            FieldType longFieldType = new FieldType(LongField.TYPE_NOT_STORED);
            longFieldType.setNumericPrecisionStep(precStep);
            longFieldType.freeze();
            field = new LongField("number", 0L, longFieldType);
            doc.add(field);
        } else {
            FieldType longFieldType = new FieldType(TextField.TYPE_NOT_STORED);
            longFieldType.setIndexOptions(IndexOptions.DOCS_ONLY);
            longFieldType.setOmitNorms(true);
            longFieldType.setIndexRanges(true);
            longFieldType.freeze();
            field = new Field("number", new BinaryTokenStream(binaryToken.get()), longFieldType);
            doc.add(field);
        }

        long startMS = System.currentTimeMillis();

        // 64K buffer:
        InputStream is = new FileInputStream(numbersFile);
        BufferedReader reader = new BufferedReader(new InputStreamReader(is, "UTF-8"), 1 << 16);

        int count = 0;
        while (true) {
            String line = reader.readLine();
            if (line == null) {
                break;
            }
            Long v = Long.parseLong(line.trim());
            if (useNumericField) {
                field.setLongValue(v);
            } else {
                //NumericUtils.longToPrefixCoded(v, 0, binaryToken);
                longToBytes(v, binaryToken);
                //if (bytesToLong(binaryToken.get()) != v) {
                //  throw new RuntimeException("wrong long: v=" + v + " vs " + bytesToLong(binaryToken.get()));
                //}
            }
            w.addDocument(doc);
            count++;
            if (count % 200000 == 0) {
                long ms = System.currentTimeMillis();
                System.out.println("Indexed " + count + ": " + ((ms - startMS) / 1000.0) + " sec");
            }
        }
        reader.close();

        System.out.println(
                "Final Indexed " + count + ": " + ((System.currentTimeMillis() - startMS) / 1000.0) + " sec");

        // nocommit just to make debugging easier:
        //System.out.println("Optimize...");
        //w.forceMerge(1);

        System.out.println("Close...");
        w.close();
        System.out.println("After close: " + ((System.currentTimeMillis() - startMS) / 1000.0) + " sec");

        // Print CheckIndex:
        ByteArrayOutputStream bos = new ByteArrayOutputStream(1024);
        CheckIndex checker = new CheckIndex(dir);
        checker.setInfoStream(new PrintStream(bos, false, IOUtils.UTF_8), true);
        CheckIndex.Status status = checker.checkIndex();
        System.out.println("Done CheckIndex:");
        System.out.println(bos.toString(IOUtils.UTF_8));
        if (status.clean == false) {
            throw new IllegalStateException("CheckIndex failed");
        }

        SegmentInfos infos = new SegmentInfos();
        infos.read(dir);

        long totBytes = 0;
        for (SegmentCommitInfo info : infos) {
            totBytes += info.sizeInBytes();
        }
        System.out.println("\nTotal index size: " + totBytes + " bytes");
    } else {
        System.out.println("Skip indexing: index already exists");
    }

    List<Query> queries = new ArrayList<>();
    InputStream is = new FileInputStream(queriesFile);
    BufferedReader reader = new BufferedReader(new InputStreamReader(is, "UTF-8"), 1 << 16);
    while (true) {
        String line = reader.readLine();
        if (line == null) {
            break;
        }
        String[] numbers = line.trim().split(" ");
        if (numbers.length != 2) {
            throw new IllegalArgumentException("could not parse query line: " + line);
        }
        long minValue = Long.parseLong(numbers[0]);
        long maxValue = Long.parseLong(numbers[1]);
        if (useNumericField) {
            queries.add(NumericRangeQuery.newLongRange("number", precStep, minValue, maxValue, true, true));
        } else {
            longToBytes(minValue, binaryToken);
            BytesRef minTerm = binaryToken.toBytesRef();
            longToBytes(maxValue, binaryToken);
            BytesRef maxTerm = binaryToken.toBytesRef();
            queries.add(new TermRangeQuery("number", minTerm, maxTerm, true, true));
        }

        if (queries.size() == 200) {
            break;
        }
    }

    DirectoryReader r = DirectoryReader.open(dir);
    IndexSearcher s = new IndexSearcher(r);
    s.setQueryCache(null); // don't bench the cache

    printQueryTerms((MultiTermQuery) queries.get(0), s);

    long bestMS = Long.MAX_VALUE;
    for (int iter = 0; iter < 10; iter++) {
        long startMS = System.currentTimeMillis();
        long totalHits = 0;
        long hash = 0;
        for (Query query : queries) {
            TopDocs hits = s.search(query, 10);
            totalHits += hits.totalHits;
            hash = hash * 31 + hits.totalHits;
        }
        long ms = System.currentTimeMillis() - startMS;
        System.out.println("iter " + iter + ": " + ms + " msec; totalHits=" + totalHits + " hash=" + hash);
        if (ms < bestMS) {
            System.out.println("  **");
            bestMS = ms;
        }
    }

    /*
    long t0 = System.currentTimeMillis();
    long bytesUsed = 0;
    for(int i=0;i<1000;i++) {
      for(AtomicReaderContext ctx : r.leaves()) {
        bytesUsed += ((SegmentReader) ctx.reader()).ramBytesUsed();
      }
    }
    System.out.println((System.currentTimeMillis() - t0) + " msec for 1000 ramBytesUsed: " + (bytesUsed / 1000));
    */

    r.close();
    dir.close();
}