Example usage for org.apache.lucene.index SegmentReader maxDoc

List of usage examples for org.apache.lucene.index SegmentReader maxDoc

Introduction

In this page you can find the example usage for org.apache.lucene.index SegmentReader maxDoc.

Prototype

@Override
    public int maxDoc() 

Source Link

Usage

From source file:com.browseengine.bobo.geosearch.index.impl.GeoIndexReader.java

License:Apache License

private List<GeoSegmentReader<CartesianGeoRecord>> buildGeoSegmentReaders(GeoSearchConfig geoSearchConfig)
        throws IOException {
    geoSegmentReaders = new ArrayList<GeoSegmentReader<CartesianGeoRecord>>();
    if (subGeoReaders == null || subGeoReaders.size() == 0) {
        if (in instanceof SegmentReader) {
            SegmentReader segmentReader = (SegmentReader) in;
            int maxDoc = segmentReader.maxDoc();
            String segmentName = segmentReader.getSegmentName();
            String geoSegmentName = geoSearchConfig.getGeoFileName(segmentName);
            GeoSegmentReader<CartesianGeoRecord> geoSegmentReader = new GeoSegmentReader<CartesianGeoRecord>(
                    directory(), geoSegmentName, maxDoc, DEFAULT_BUFFER_SIZE_PER_SEGMENT, geoRecordSerializer,
                    geoRecordComparator);
            geoSegmentReaders.add(geoSegmentReader);
        }//from ww w.  jav a 2 s . co m
    } else {
        for (GeoIndexReader subReader : subGeoReaders) {
            for (GeoSegmentReader<CartesianGeoRecord> geoSegmentReader : subReader.getGeoSegmentReaders()) {
                geoSegmentReaders.add(geoSegmentReader);
            }
        }
    }

    return geoSegmentReaders;
}

From source file:com.browseengine.bobo.geosearch.merge.impl.BufferedGeoMerger.java

License:Apache License

private BitVector buildDeletedDocsForSegment(SegmentReader reader) {
    BitVector deletedDocs = new BitVector(reader.maxDoc());
    for (int i = 0; i < deletedDocs.size(); i++) {
        if (reader.isDeleted(i)) {
            deletedDocs.set(i);/*from w w w .jav a 2s .  com*/
        }
    }

    return deletedDocs;
}

From source file:perf.DiskUsage.java

License:Apache License

static Set<FieldStats> analyzeFields(SegmentReader reader) throws Exception {
    Map<String, FieldStats> stats = new HashMap<>();
    Map<String, String> dvSuffixes = new HashMap<>();
    Map<String, String> postingsSuffixes = new HashMap<>();
    for (FieldInfo field : reader.getFieldInfos()) {
        FieldStats fieldStats = new FieldStats(field.name);
        stats.put(field.name, fieldStats);
        Map<String, String> attributes = field.attributes();
        if (attributes != null) {
            String postingsSuffix = attributes.get(PerFieldPostingsFormat.PER_FIELD_SUFFIX_KEY);
            if (postingsSuffix != null) {
                postingsSuffixes.put(postingsSuffix, field.name);
            }/*from   w  w w  .  j av  a2s.  co m*/
            String dvSuffix = attributes.get(PerFieldDocValuesFormat.PER_FIELD_SUFFIX_KEY);
            if (dvSuffix != null) {
                dvSuffixes.put(dvSuffix, field.name);
            }
        }

        Bits docsWithField = reader.getDocsWithField(field.name);
        if (docsWithField != null) {
            int count = 0;
            for (int docID = 0; docID < reader.maxDoc(); docID++) {
                if (docsWithField.get(docID)) {
                    count++;
                }
            }
            fieldStats.docCountWithField = count;
        }
    }

    Directory directory = reader.directory();
    for (String file : directory.listAll()) {
        String suffix = parseSuffix(file);
        long bytes = directory.fileLength(file);
        if (suffix != null) {
            switch (IndexFileNames.getExtension(file)) {
            case "dvd":
            case "dvm":
                stats.get(dvSuffixes.get(suffix)).dvBytes += bytes;
                break;
            case "tim":
            case "tip":
                stats.get(postingsSuffixes.get(suffix)).termsBytes += bytes;
                break;
            case "doc":
                stats.get(postingsSuffixes.get(suffix)).postingsBytes += bytes;
                break;
            case "pos":
            case "pay":
                stats.get(postingsSuffixes.get(suffix)).proxBytes += bytes;
                break;
            default:
                throw new AssertionError("unexpected suffixed file: " + file);
            }
        } else {
            // not a per-field file, but we can hackishly do this for the points case.
            if ("dii".equals(IndexFileNames.getExtension(file))) {
                System.err.println(
                        "retrieving per-field point usage, if you see a scary corruption error, its probably just this tool!!!!");
                try (ChecksumIndexInput in = directory.openChecksumInput(file, IOContext.READONCE)) {
                    // fail hard if its not exactly the version we do this hack for.
                    CodecUtil.checkIndexHeader(in, "Lucene60PointsFormatMeta", 0, 0,
                            reader.getSegmentInfo().info.getId(), "");
                    int fieldCount = in.readVInt();
                    // strangely, bkd offsets are not in any guaranteed order
                    TreeMap<Long, String> offsetToField = new TreeMap<>();
                    for (int i = 0; i < fieldCount; i++) {
                        int field = in.readVInt();
                        long offset = in.readVLong();
                        offsetToField.put(offset, reader.getFieldInfos().fieldInfo(field).name);
                    }
                    // now we can traverse in order
                    long previousOffset = 0;
                    for (Map.Entry<Long, String> entry : offsetToField.entrySet()) {
                        long offset = entry.getKey();
                        String field = entry.getValue();
                        stats.get(field).pointsBytes += (offset - previousOffset);
                        previousOffset = offset;
                    }
                    CodecUtil.checkFooter(in);
                }
            }
        }
    }

    return new TreeSet<FieldStats>(stats.values());
}

From source file:perf.DiskUsage.java

License:Apache License

static void report(SegmentReader reader, Set<FieldStats> stats) throws Exception {
    long totalSize = 0;
    long storeSize = 0;
    long vectorSize = 0;
    long normsSize = 0;
    long dvsSize = 0;
    long postingsSize = 0;
    long pointsSize = 0;
    long termsSize = 0;
    long proxSize = 0;
    for (String file : reader.directory().listAll()) {
        long size = reader.directory().fileLength(file);
        totalSize += size;//from www .ja v  a 2 s  .  c  o m
        String extension = IndexFileNames.getExtension(file);
        if (extension != null) {
            switch (extension) {
            case "fdt":
            case "fdx":
                storeSize += size;
                break;
            case "tvx":
            case "tvd":
                vectorSize += size;
                break;
            case "nvd":
            case "nvm":
                normsSize += size;
                break;
            case "dvd":
            case "dvm":
                dvsSize += size;
                break;
            case "tim":
            case "tip":
                termsSize += size;
                break;
            case "pos":
            case "pay":
                proxSize += size;
                break;
            case "doc":
                postingsSize += size;
                break;
            case "dii":
            case "dim":
                pointsSize += size;
                break;
            }
        }
    }

    DecimalFormat df = new DecimalFormat("#,##0");
    System.out.printf("total disk:    %15s\n", df.format(totalSize));
    System.out.printf("num docs:      %15s\n", df.format(reader.numDocs()));
    System.out.printf("stored fields: %15s\n", df.format(storeSize));
    System.out.printf("term vectors:  %15s\n", df.format(vectorSize));
    System.out.printf("norms:         %15s\n", df.format(normsSize));
    System.out.printf("docvalues:     %15s\n", df.format(dvsSize));
    System.out.printf("postings:      %15s\n", df.format(postingsSize));
    System.out.printf("prox:          %15s\n", df.format(proxSize));
    System.out.printf("points:        %15s\n", df.format(pointsSize));
    System.out.printf("terms:         %15s\n", df.format(termsSize));
    System.out.println();

    int maxFieldNameLength = 0;
    for (FieldStats field : stats) {
        maxFieldNameLength = Math.max(maxFieldNameLength, field.name.length());
    }

    // Make sure we format to enough room for the max field length:
    String fieldNameFormat = "%" + maxFieldNameLength + "s";

    System.out.printf(fieldNameFormat + " %15s %15s %15s %15s %15s %15s %15s %20s\n", "field", "total",
            "terms dict", "postings", "proximity", "points", "docvalues", "% with dv", "features");
    System.out.printf(fieldNameFormat + " %15s %15s %15s %15s %15s %15s %15s %20s\n", "=====", "=====",
            "==========", "========", "=========", "=========", "=========", "========", "========");

    for (FieldStats field : stats) {
        System.out.printf(fieldNameFormat + " %15s %15s %15s %15s %15s %15s %14.1f%% %20s\n", field.name,
                df.format(field.totalBytes()), df.format(field.termsBytes), df.format(field.postingsBytes),
                df.format(field.proxBytes), df.format(field.pointsBytes), df.format(field.dvBytes),
                (100.0 * field.docCountWithField) / reader.maxDoc(),
                features(reader.getFieldInfos().fieldInfo(field.name)));
    }
}

From source file:proj.zoie.api.ZoieMultiReader.java

License:Apache License

@Override
public ZoieIndexReader<R> reopen(boolean openReadOnly) throws CorruptIndexException, IOException {
    long t0 = System.currentTimeMillis();
    long version = in.getVersion();
    IndexReader inner = in.reopen(openReadOnly);
    if (inner == in && inner.getVersion() == version) {
        t0 = System.currentTimeMillis() - t0;
        if (t0 > 1000) {
            log.info("reopen returns in " + t0 + "ms without change");
        } else {/*from  w  w w. ja  v a 2 s.  c om*/
            if (log.isDebugEnabled()) {
                log.debug("reopen returns in " + t0 + "ms without change");
            }
        }
        return this;
    }

    IndexReader[] subReaders = inner.getSequentialSubReaders();
    ArrayList<IndexReader> subReaderList = new ArrayList<IndexReader>(subReaders.length);
    for (IndexReader subReader : subReaders) {
        if (subReader instanceof SegmentReader) {
            SegmentReader sr = (SegmentReader) subReader;
            String segmentName = sr.getSegmentName();
            ZoieSegmentReader<R> zoieSegmentReader = _readerMap.get(segmentName);
            if (zoieSegmentReader != null) {
                int numDocs = sr.numDocs();
                int maxDocs = sr.maxDoc();
                boolean hasDeletes = false;
                if (zoieSegmentReader.numDocs() != numDocs || zoieSegmentReader.maxDoc() != maxDocs) {
                    hasDeletes = true;
                }
                zoieSegmentReader = new ZoieSegmentReader<R>(zoieSegmentReader, sr, hasDeletes);
            } else {
                zoieSegmentReader = new ZoieSegmentReader<R>(sr, _decorator);
            }
            subReaderList.add(zoieSegmentReader);
        } else {
            throw new IllegalStateException("reader not insance of " + SegmentReader.class);
        }
    }
    ZoieIndexReader<R> ret = newInstance(inner, subReaderList.toArray(new IndexReader[subReaderList.size()]));
    t0 = System.currentTimeMillis() - t0;
    if (t0 > 1000) {
        log.info("reopen returns in " + t0 + "ms with change");
    } else {
        if (log.isDebugEnabled()) {
            log.debug("reopen returns in " + t0 + "ms with change");
        }
    }
    return ret;
}