Example usage for org.apache.lucene.index IndexFileNames getExtension

List of usage examples for org.apache.lucene.index IndexFileNames getExtension

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexFileNames getExtension.

Prototype

public static String getExtension(String filename) 

Source Link

Document

Return the extension (anything after the first '.'), or null if there is no '.'

Usage

From source file:perf.DiskUsage.java

License:Apache License

static Set<FieldStats> analyzeFields(SegmentReader reader) throws Exception {
    Map<String, FieldStats> stats = new HashMap<>();
    Map<String, String> dvSuffixes = new HashMap<>();
    Map<String, String> postingsSuffixes = new HashMap<>();
    for (FieldInfo field : reader.getFieldInfos()) {
        FieldStats fieldStats = new FieldStats(field.name);
        stats.put(field.name, fieldStats);
        Map<String, String> attributes = field.attributes();
        if (attributes != null) {
            String postingsSuffix = attributes.get(PerFieldPostingsFormat.PER_FIELD_SUFFIX_KEY);
            if (postingsSuffix != null) {
                postingsSuffixes.put(postingsSuffix, field.name);
            }/*from   w ww.ja v  a  2s.  c  o m*/
            String dvSuffix = attributes.get(PerFieldDocValuesFormat.PER_FIELD_SUFFIX_KEY);
            if (dvSuffix != null) {
                dvSuffixes.put(dvSuffix, field.name);
            }
        }

        Bits docsWithField = reader.getDocsWithField(field.name);
        if (docsWithField != null) {
            int count = 0;
            for (int docID = 0; docID < reader.maxDoc(); docID++) {
                if (docsWithField.get(docID)) {
                    count++;
                }
            }
            fieldStats.docCountWithField = count;
        }
    }

    Directory directory = reader.directory();
    for (String file : directory.listAll()) {
        String suffix = parseSuffix(file);
        long bytes = directory.fileLength(file);
        if (suffix != null) {
            switch (IndexFileNames.getExtension(file)) {
            case "dvd":
            case "dvm":
                stats.get(dvSuffixes.get(suffix)).dvBytes += bytes;
                break;
            case "tim":
            case "tip":
                stats.get(postingsSuffixes.get(suffix)).termsBytes += bytes;
                break;
            case "doc":
                stats.get(postingsSuffixes.get(suffix)).postingsBytes += bytes;
                break;
            case "pos":
            case "pay":
                stats.get(postingsSuffixes.get(suffix)).proxBytes += bytes;
                break;
            default:
                throw new AssertionError("unexpected suffixed file: " + file);
            }
        } else {
            // not a per-field file, but we can hackishly do this for the points case.
            if ("dii".equals(IndexFileNames.getExtension(file))) {
                System.err.println(
                        "retrieving per-field point usage, if you see a scary corruption error, its probably just this tool!!!!");
                try (ChecksumIndexInput in = directory.openChecksumInput(file, IOContext.READONCE)) {
                    // fail hard if its not exactly the version we do this hack for.
                    CodecUtil.checkIndexHeader(in, "Lucene60PointsFormatMeta", 0, 0,
                            reader.getSegmentInfo().info.getId(), "");
                    int fieldCount = in.readVInt();
                    // strangely, bkd offsets are not in any guaranteed order
                    TreeMap<Long, String> offsetToField = new TreeMap<>();
                    for (int i = 0; i < fieldCount; i++) {
                        int field = in.readVInt();
                        long offset = in.readVLong();
                        offsetToField.put(offset, reader.getFieldInfos().fieldInfo(field).name);
                    }
                    // now we can traverse in order
                    long previousOffset = 0;
                    for (Map.Entry<Long, String> entry : offsetToField.entrySet()) {
                        long offset = entry.getKey();
                        String field = entry.getValue();
                        stats.get(field).pointsBytes += (offset - previousOffset);
                        previousOffset = offset;
                    }
                    CodecUtil.checkFooter(in);
                }
            }
        }
    }

    return new TreeSet<FieldStats>(stats.values());
}

From source file:perf.DiskUsage.java

License:Apache License

static void report(SegmentReader reader, Set<FieldStats> stats) throws Exception {
    long totalSize = 0;
    long storeSize = 0;
    long vectorSize = 0;
    long normsSize = 0;
    long dvsSize = 0;
    long postingsSize = 0;
    long pointsSize = 0;
    long termsSize = 0;
    long proxSize = 0;
    for (String file : reader.directory().listAll()) {
        long size = reader.directory().fileLength(file);
        totalSize += size;/*  w ww .ja v a  2s.  c  om*/
        String extension = IndexFileNames.getExtension(file);
        if (extension != null) {
            switch (extension) {
            case "fdt":
            case "fdx":
                storeSize += size;
                break;
            case "tvx":
            case "tvd":
                vectorSize += size;
                break;
            case "nvd":
            case "nvm":
                normsSize += size;
                break;
            case "dvd":
            case "dvm":
                dvsSize += size;
                break;
            case "tim":
            case "tip":
                termsSize += size;
                break;
            case "pos":
            case "pay":
                proxSize += size;
                break;
            case "doc":
                postingsSize += size;
                break;
            case "dii":
            case "dim":
                pointsSize += size;
                break;
            }
        }
    }

    DecimalFormat df = new DecimalFormat("#,##0");
    System.out.printf("total disk:    %15s\n", df.format(totalSize));
    System.out.printf("num docs:      %15s\n", df.format(reader.numDocs()));
    System.out.printf("stored fields: %15s\n", df.format(storeSize));
    System.out.printf("term vectors:  %15s\n", df.format(vectorSize));
    System.out.printf("norms:         %15s\n", df.format(normsSize));
    System.out.printf("docvalues:     %15s\n", df.format(dvsSize));
    System.out.printf("postings:      %15s\n", df.format(postingsSize));
    System.out.printf("prox:          %15s\n", df.format(proxSize));
    System.out.printf("points:        %15s\n", df.format(pointsSize));
    System.out.printf("terms:         %15s\n", df.format(termsSize));
    System.out.println();

    int maxFieldNameLength = 0;
    for (FieldStats field : stats) {
        maxFieldNameLength = Math.max(maxFieldNameLength, field.name.length());
    }

    // Make sure we format to enough room for the max field length:
    String fieldNameFormat = "%" + maxFieldNameLength + "s";

    System.out.printf(fieldNameFormat + " %15s %15s %15s %15s %15s %15s %15s %20s\n", "field", "total",
            "terms dict", "postings", "proximity", "points", "docvalues", "% with dv", "features");
    System.out.printf(fieldNameFormat + " %15s %15s %15s %15s %15s %15s %15s %20s\n", "=====", "=====",
            "==========", "========", "=========", "=========", "=========", "========", "========");

    for (FieldStats field : stats) {
        System.out.printf(fieldNameFormat + " %15s %15s %15s %15s %15s %15s %14.1f%% %20s\n", field.name,
                df.format(field.totalBytes()), df.format(field.termsBytes), df.format(field.postingsBytes),
                df.format(field.proxBytes), df.format(field.pointsBytes), df.format(field.dvBytes),
                (100.0 * field.docCountWithField) / reader.maxDoc(),
                features(reader.getFieldInfos().fieldInfo(field.name)));
    }
}

From source file:perf.DiskUsage.java

License:Apache License

static Set<FieldStats> analyzeFields(SegmentReader reader) throws Exception {
    Map<String, FieldStats> stats = new HashMap<>();
    Map<String, String> dvSuffixes = new HashMap<>();
    Map<String, String> postingsSuffixes = new HashMap<>();
    for (FieldInfo field : reader.getFieldInfos()) {
        FieldStats fieldStats = new FieldStats(field.name);
        stats.put(field.name, fieldStats);
        Map<String, String> attributes = field.attributes();
        if (attributes != null) {
            String postingsSuffix = attributes.get(PerFieldPostingsFormat.PER_FIELD_SUFFIX_KEY);
            if (postingsSuffix != null) {
                postingsSuffixes.put(postingsSuffix, field.name);
            }/*from  w  ww  .j ava  2  s .  c om*/
            String dvSuffix = attributes.get(PerFieldDocValuesFormat.PER_FIELD_SUFFIX_KEY);
            if (dvSuffix != null) {
                dvSuffixes.put(dvSuffix, field.name);
            }
        }

        DocIdSetIterator docsWithField;
        switch (field.getDocValuesType()) {
        case NUMERIC:
            docsWithField = reader.getNumericDocValues(field.name);
            break;
        case BINARY:
            docsWithField = reader.getBinaryDocValues(field.name);
            break;
        case SORTED:
            docsWithField = reader.getSortedDocValues(field.name);
            break;
        case SORTED_NUMERIC:
            docsWithField = reader.getSortedNumericDocValues(field.name);
            break;
        case SORTED_SET:
            docsWithField = reader.getSortedSetDocValues(field.name);
            break;
        case NONE:
            docsWithField = null;
            break;
        default:
            docsWithField = null;
            break;
        }

        if (docsWithField != null) {
            int count = 0;
            while (docsWithField.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
                count++;
            }
            fieldStats.docCountWithField = count;
        }
    }

    Directory directory = reader.directory();
    for (String file : directory.listAll()) {
        String suffix = parseSuffix(file);
        long bytes = directory.fileLength(file);
        if (suffix != null) {
            switch (IndexFileNames.getExtension(file)) {
            case "dvd":
            case "dvm":
                stats.get(dvSuffixes.get(suffix)).dvBytes += bytes;
                break;
            case "tim":
            case "tip":
                stats.get(postingsSuffixes.get(suffix)).termsBytes += bytes;
                break;
            case "doc":
                stats.get(postingsSuffixes.get(suffix)).postingsBytes += bytes;
                break;
            case "pos":
            case "pay":
                stats.get(postingsSuffixes.get(suffix)).proxBytes += bytes;
                break;
            default:
                throw new AssertionError("unexpected suffixed file: " + file);
            }
        } else {
            // not a per-field file, but we can hackishly do this for the points case.
            if ("dii".equals(IndexFileNames.getExtension(file))) {
                System.err.println(
                        "retrieving per-field point usage, if you see a scary corruption error, its probably just this tool!!!!");
                try (ChecksumIndexInput in = directory.openChecksumInput(file, IOContext.READONCE)) {
                    // fail hard if its not exactly the version we do this hack for.
                    CodecUtil.checkIndexHeader(in, "Lucene60PointsFormatMeta", 0, 0,
                            reader.getSegmentInfo().info.getId(), "");
                    int fieldCount = in.readVInt();
                    // strangely, bkd offsets are not in any guaranteed order
                    TreeMap<Long, String> offsetToField = new TreeMap<>();
                    for (int i = 0; i < fieldCount; i++) {
                        int field = in.readVInt();
                        long offset = in.readVLong();
                        offsetToField.put(offset, reader.getFieldInfos().fieldInfo(field).name);
                    }
                    // now we can traverse in order
                    long previousOffset = 0;
                    for (Map.Entry<Long, String> entry : offsetToField.entrySet()) {
                        long offset = entry.getKey();
                        String field = entry.getValue();
                        stats.get(field).pointsBytes += (offset - previousOffset);
                        previousOffset = offset;
                    }
                    CodecUtil.checkFooter(in);
                }
            }
        }
    }

    return new TreeSet<FieldStats>(stats.values());
}