Example usage for org.apache.lucene.index SegmentReader numDocs

List of usage examples for org.apache.lucene.index SegmentReader numDocs

Introduction

In this page you can find the example usage for org.apache.lucene.index SegmentReader numDocs.

Prototype

int numDocs

To view the source code for org.apache.lucene.index SegmentReader numDocs.

Click Source Link

Usage

From source file:org.apache.mahout.text.LuceneSegmentInputSplitTest.java

License:Apache License

private void assertSegmentContainsOneDoc(String segmentName) throws IOException {
    LuceneSegmentInputSplit inputSplit = new LuceneSegmentInputSplit(indexPath1, segmentName, 1000);
    SegmentCommitInfo segment = inputSplit.getSegment(configuration);
    SegmentReader segmentReader = new SegmentReader(segment, 1, IOContext.READ);//SegmentReader.get(true, segment, 1);
    assertEquals(segmentName, segment.info.name);
    assertEquals(1, segmentReader.numDocs());
}

From source file:perf.DiskUsage.java

License:Apache License

static void report(SegmentReader reader, Set<FieldStats> stats) throws Exception {
    long totalSize = 0;
    long storeSize = 0;
    long vectorSize = 0;
    long normsSize = 0;
    long dvsSize = 0;
    long postingsSize = 0;
    long pointsSize = 0;
    long termsSize = 0;
    long proxSize = 0;
    for (String file : reader.directory().listAll()) {
        long size = reader.directory().fileLength(file);
        totalSize += size;/*from  w  w  w.  j a  v  a2s.c o  m*/
        String extension = IndexFileNames.getExtension(file);
        if (extension != null) {
            switch (extension) {
            case "fdt":
            case "fdx":
                storeSize += size;
                break;
            case "tvx":
            case "tvd":
                vectorSize += size;
                break;
            case "nvd":
            case "nvm":
                normsSize += size;
                break;
            case "dvd":
            case "dvm":
                dvsSize += size;
                break;
            case "tim":
            case "tip":
                termsSize += size;
                break;
            case "pos":
            case "pay":
                proxSize += size;
                break;
            case "doc":
                postingsSize += size;
                break;
            case "dii":
            case "dim":
                pointsSize += size;
                break;
            }
        }
    }

    DecimalFormat df = new DecimalFormat("#,##0");
    System.out.printf("total disk:    %15s\n", df.format(totalSize));
    System.out.printf("num docs:      %15s\n", df.format(reader.numDocs()));
    System.out.printf("stored fields: %15s\n", df.format(storeSize));
    System.out.printf("term vectors:  %15s\n", df.format(vectorSize));
    System.out.printf("norms:         %15s\n", df.format(normsSize));
    System.out.printf("docvalues:     %15s\n", df.format(dvsSize));
    System.out.printf("postings:      %15s\n", df.format(postingsSize));
    System.out.printf("prox:          %15s\n", df.format(proxSize));
    System.out.printf("points:        %15s\n", df.format(pointsSize));
    System.out.printf("terms:         %15s\n", df.format(termsSize));
    System.out.println();

    int maxFieldNameLength = 0;
    for (FieldStats field : stats) {
        maxFieldNameLength = Math.max(maxFieldNameLength, field.name.length());
    }

    // Make sure we format to enough room for the max field length:
    String fieldNameFormat = "%" + maxFieldNameLength + "s";

    System.out.printf(fieldNameFormat + " %15s %15s %15s %15s %15s %15s %15s %20s\n", "field", "total",
            "terms dict", "postings", "proximity", "points", "docvalues", "% with dv", "features");
    System.out.printf(fieldNameFormat + " %15s %15s %15s %15s %15s %15s %15s %20s\n", "=====", "=====",
            "==========", "========", "=========", "=========", "=========", "========", "========");

    for (FieldStats field : stats) {
        System.out.printf(fieldNameFormat + " %15s %15s %15s %15s %15s %15s %14.1f%% %20s\n", field.name,
                df.format(field.totalBytes()), df.format(field.termsBytes), df.format(field.postingsBytes),
                df.format(field.proxBytes), df.format(field.pointsBytes), df.format(field.dvBytes),
                (100.0 * field.docCountWithField) / reader.maxDoc(),
                features(reader.getFieldInfos().fieldInfo(field.name)));
    }
}

From source file:proj.zoie.api.ZoieMultiReader.java

License:Apache License

@Override
public ZoieIndexReader<R> reopen(boolean openReadOnly) throws CorruptIndexException, IOException {
    long t0 = System.currentTimeMillis();
    long version = in.getVersion();
    IndexReader inner = in.reopen(openReadOnly);
    if (inner == in && inner.getVersion() == version) {
        t0 = System.currentTimeMillis() - t0;
        if (t0 > 1000) {
            log.info("reopen returns in " + t0 + "ms without change");
        } else {/*from  w  w  w  .j  a v a  2  s . com*/
            if (log.isDebugEnabled()) {
                log.debug("reopen returns in " + t0 + "ms without change");
            }
        }
        return this;
    }

    IndexReader[] subReaders = inner.getSequentialSubReaders();
    ArrayList<IndexReader> subReaderList = new ArrayList<IndexReader>(subReaders.length);
    for (IndexReader subReader : subReaders) {
        if (subReader instanceof SegmentReader) {
            SegmentReader sr = (SegmentReader) subReader;
            String segmentName = sr.getSegmentName();
            ZoieSegmentReader<R> zoieSegmentReader = _readerMap.get(segmentName);
            if (zoieSegmentReader != null) {
                int numDocs = sr.numDocs();
                int maxDocs = sr.maxDoc();
                boolean hasDeletes = false;
                if (zoieSegmentReader.numDocs() != numDocs || zoieSegmentReader.maxDoc() != maxDocs) {
                    hasDeletes = true;
                }
                zoieSegmentReader = new ZoieSegmentReader<R>(zoieSegmentReader, sr, hasDeletes);
            } else {
                zoieSegmentReader = new ZoieSegmentReader<R>(sr, _decorator);
            }
            subReaderList.add(zoieSegmentReader);
        } else {
            throw new IllegalStateException("reader not insance of " + SegmentReader.class);
        }
    }
    ZoieIndexReader<R> ret = newInstance(inner, subReaderList.toArray(new IndexReader[subReaderList.size()]));
    t0 = System.currentTimeMillis() - t0;
    if (t0 > 1000) {
        log.info("reopen returns in " + t0 + "ms with change");
    } else {
        if (log.isDebugEnabled()) {
            log.debug("reopen returns in " + t0 + "ms with change");
        }
    }
    return ret;
}