Example usage for org.apache.lucene.store Directory openChecksumInput

List of usage examples for org.apache.lucene.store Directory openChecksumInput

Introduction

In this page you can find the example usage for org.apache.lucene.store Directory openChecksumInput.

Prototype

public ChecksumIndexInput openChecksumInput(String name, IOContext context) throws IOException 

Source Link

Document

Opens a checksum-computing stream for reading an existing file.

Usage

From source file:com.lucure.core.codec.CompressingStoredFieldsReader.java

License:Apache License

/** Sole constructor. */
public CompressingStoredFieldsReader(Directory d, SegmentInfo si, String segmentSuffix, FieldInfos fn,
        IOContext context, String formatName, CompressionMode compressionMode) throws IOException {
    this.compressionMode = compressionMode;
    final String segment = si.name;
    boolean success = false;
    fieldInfos = fn;// ww w .j  av a  2  s  .com
    numDocs = si.getDocCount();
    ChecksumIndexInput indexStream = null;
    try {
        final String indexStreamFN = IndexFileNames.segmentFileName(segment, segmentSuffix,
                FIELDS_INDEX_EXTENSION);
        final String fieldsStreamFN = IndexFileNames.segmentFileName(segment, segmentSuffix, FIELDS_EXTENSION);
        // Load the index into memory
        indexStream = d.openChecksumInput(indexStreamFN, context);
        final String codecNameIdx = formatName + CODEC_SFX_IDX;
        version = CodecUtil.checkHeader(indexStream, codecNameIdx, VERSION_START, VERSION_CURRENT);
        assert CodecUtil.headerLength(codecNameIdx) == indexStream.getFilePointer();
        indexReader = new CompressingStoredFieldsIndexReader(indexStream, si);

        long maxPointer = -1;

        if (version >= VERSION_CHECKSUM) {
            maxPointer = indexStream.readVLong();
            CodecUtil.checkFooter(indexStream);
        } else {
            CodecUtil.checkEOF(indexStream);
        }
        indexStream.close();
        indexStream = null;

        // Open the data file and read metadata
        fieldsStream = d.openInput(fieldsStreamFN, context);
        if (version >= VERSION_CHECKSUM) {
            if (maxPointer + CodecUtil.footerLength() != fieldsStream.length()) {
                throw new CorruptIndexException("Invalid fieldsStream maxPointer (file truncated?): maxPointer="
                        + maxPointer + ", length=" + fieldsStream.length());
            }
        } else {
            maxPointer = fieldsStream.length();
        }
        this.maxPointer = maxPointer;
        final String codecNameDat = formatName + CODEC_SFX_DAT;
        final int fieldsVersion = CodecUtil.checkHeader(fieldsStream, codecNameDat, VERSION_START,
                VERSION_CURRENT);
        if (version != fieldsVersion) {
            throw new CorruptIndexException("Version mismatch between stored fields index and data: " + version
                    + " != " + fieldsVersion);
        }
        assert CodecUtil.headerLength(codecNameDat) == fieldsStream.getFilePointer();

        if (version >= VERSION_BIG_CHUNKS) {
            chunkSize = fieldsStream.readVInt();
        } else {
            chunkSize = -1;
        }
        packedIntsVersion = fieldsStream.readVInt();
        decompressor = compressionMode.newDecompressor();
        this.bytes = new BytesRef();

        if (version >= VERSION_CHECKSUM) {
            // NOTE: data file is too costly to verify checksum against all the bytes on open,
            // but for now we at least verify proper structure of the checksum footer: which looks
            // for FOOTER_MAGIC + algorithmID. This is cheap and can detect some forms of corruption
            // such as file truncation.
            CodecUtil.retrieveChecksum(fieldsStream);
        }

        success = true;
    } finally {
        if (!success) {
            IOUtils.closeWhileHandlingException(this, indexStream);
        }
    }
}

From source file:com.vmware.xenon.services.common.Lucene60FieldInfosFormatWithCache.java

License:Open Source License

@Override
public FieldInfos read(Directory directory, SegmentInfo segmentInfo, String segmentSuffix, IOContext context)
        throws IOException {
    //////////////////////
    boolean checkInfosCache = true;
    //////////////////////
    final String fileName = IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, EXTENSION);
    try (ChecksumIndexInput input = directory.openChecksumInput(fileName, context)) {
        Throwable priorE = null;//  w  w  w  . jav  a  2 s . c  om
        FieldInfo[] infos = null;
        try {
            CodecUtil.checkIndexHeader(input, Lucene60FieldInfosFormatWithCache.CODEC_NAME,
                    Lucene60FieldInfosFormatWithCache.FORMAT_START,
                    Lucene60FieldInfosFormatWithCache.FORMAT_CURRENT, segmentInfo.getId(), segmentSuffix);

            final int size = input.readVInt(); //read in the size
            infos = new FieldInfo[size];

            // previous field's attribute map, we share when possible:
            Map<String, String> lastAttributes = Collections.emptyMap();

            for (int i = 0; i < size; i++) {
                String name = input.readString();
                final int fieldNumber = input.readVInt();
                if (fieldNumber < 0) {
                    throw new CorruptIndexException(
                            "invalid field number for field: " + name + ", fieldNumber=" + fieldNumber, input);
                }
                byte bits = input.readByte();
                boolean storeTermVector = (bits & STORE_TERMVECTOR) != 0;
                boolean omitNorms = (bits & OMIT_NORMS) != 0;
                boolean storePayloads = (bits & STORE_PAYLOADS) != 0;

                final IndexOptions indexOptions = getIndexOptions(input, input.readByte());

                // DV Types are packed in one byte
                final DocValuesType docValuesType = getDocValuesType(input, input.readByte());
                final long dvGen = input.readLong();
                Map<String, String> attributes = input.readMapOfStrings();
                // just use the last field's map if its the same
                if (attributes.equals(lastAttributes)) {
                    attributes = lastAttributes;
                }
                lastAttributes = attributes;
                int pointDimensionCount = input.readVInt();
                int pointNumBytes;
                if (pointDimensionCount != 0) {
                    pointNumBytes = input.readVInt();
                } else {
                    pointNumBytes = 0;
                }

                try {
                    //////////////////////
                    if (dvGen >= 0) {
                        // skip fields with docValues, they don't cache well
                        checkInfosCache = false;
                        infos[i] = new FieldInfo(name, fieldNumber, storeTermVector, omitNorms, storePayloads,
                                indexOptions, docValuesType, dvGen, attributes, pointDimensionCount,
                                pointNumBytes);
                    } else {
                        infos[i] = this.cache.dedupFieldInfo(name, fieldNumber, storeTermVector, omitNorms,
                                storePayloads, indexOptions, docValuesType, dvGen, attributes,
                                pointDimensionCount, pointNumBytes);
                    }
                    //////////////////////
                } catch (IllegalStateException e) {
                    throw new CorruptIndexException(
                            "invalid fieldinfo for field: " + name + ", fieldNumber=" + fieldNumber, input, e);
                }
            }
        } catch (Throwable exception) {
            priorE = exception;
        } finally {
            CodecUtil.checkFooter(input, priorE);
        }

        //////////////////////
        if (checkInfosCache) {
            return this.cache.dedupFieldInfos(infos);
        } else {
            FieldInfos result = new FieldInfos(infos);
            this.cache.trimFieldInfos(result);
            return result;
        }
        //////////////////////
    }
}

From source file:perf.DiskUsage.java

License:Apache License

static Set<FieldStats> analyzeFields(SegmentReader reader) throws Exception {
    Map<String, FieldStats> stats = new HashMap<>();
    Map<String, String> dvSuffixes = new HashMap<>();
    Map<String, String> postingsSuffixes = new HashMap<>();
    for (FieldInfo field : reader.getFieldInfos()) {
        FieldStats fieldStats = new FieldStats(field.name);
        stats.put(field.name, fieldStats);
        Map<String, String> attributes = field.attributes();
        if (attributes != null) {
            String postingsSuffix = attributes.get(PerFieldPostingsFormat.PER_FIELD_SUFFIX_KEY);
            if (postingsSuffix != null) {
                postingsSuffixes.put(postingsSuffix, field.name);
            }//from w  w  w  .j  ava 2  s  . c  o m
            String dvSuffix = attributes.get(PerFieldDocValuesFormat.PER_FIELD_SUFFIX_KEY);
            if (dvSuffix != null) {
                dvSuffixes.put(dvSuffix, field.name);
            }
        }

        Bits docsWithField = reader.getDocsWithField(field.name);
        if (docsWithField != null) {
            int count = 0;
            for (int docID = 0; docID < reader.maxDoc(); docID++) {
                if (docsWithField.get(docID)) {
                    count++;
                }
            }
            fieldStats.docCountWithField = count;
        }
    }

    Directory directory = reader.directory();
    for (String file : directory.listAll()) {
        String suffix = parseSuffix(file);
        long bytes = directory.fileLength(file);
        if (suffix != null) {
            switch (IndexFileNames.getExtension(file)) {
            case "dvd":
            case "dvm":
                stats.get(dvSuffixes.get(suffix)).dvBytes += bytes;
                break;
            case "tim":
            case "tip":
                stats.get(postingsSuffixes.get(suffix)).termsBytes += bytes;
                break;
            case "doc":
                stats.get(postingsSuffixes.get(suffix)).postingsBytes += bytes;
                break;
            case "pos":
            case "pay":
                stats.get(postingsSuffixes.get(suffix)).proxBytes += bytes;
                break;
            default:
                throw new AssertionError("unexpected suffixed file: " + file);
            }
        } else {
            // not a per-field file, but we can hackishly do this for the points case.
            if ("dii".equals(IndexFileNames.getExtension(file))) {
                System.err.println(
                        "retrieving per-field point usage, if you see a scary corruption error, its probably just this tool!!!!");
                try (ChecksumIndexInput in = directory.openChecksumInput(file, IOContext.READONCE)) {
                    // fail hard if its not exactly the version we do this hack for.
                    CodecUtil.checkIndexHeader(in, "Lucene60PointsFormatMeta", 0, 0,
                            reader.getSegmentInfo().info.getId(), "");
                    int fieldCount = in.readVInt();
                    // strangely, bkd offsets are not in any guaranteed order
                    TreeMap<Long, String> offsetToField = new TreeMap<>();
                    for (int i = 0; i < fieldCount; i++) {
                        int field = in.readVInt();
                        long offset = in.readVLong();
                        offsetToField.put(offset, reader.getFieldInfos().fieldInfo(field).name);
                    }
                    // now we can traverse in order
                    long previousOffset = 0;
                    for (Map.Entry<Long, String> entry : offsetToField.entrySet()) {
                        long offset = entry.getKey();
                        String field = entry.getValue();
                        stats.get(field).pointsBytes += (offset - previousOffset);
                        previousOffset = offset;
                    }
                    CodecUtil.checkFooter(in);
                }
            }
        }
    }

    return new TreeSet<FieldStats>(stats.values());
}

From source file:perf.DiskUsage.java

License:Apache License

static Set<FieldStats> analyzeFields(SegmentReader reader) throws Exception {
    Map<String, FieldStats> stats = new HashMap<>();
    Map<String, String> dvSuffixes = new HashMap<>();
    Map<String, String> postingsSuffixes = new HashMap<>();
    for (FieldInfo field : reader.getFieldInfos()) {
        FieldStats fieldStats = new FieldStats(field.name);
        stats.put(field.name, fieldStats);
        Map<String, String> attributes = field.attributes();
        if (attributes != null) {
            String postingsSuffix = attributes.get(PerFieldPostingsFormat.PER_FIELD_SUFFIX_KEY);
            if (postingsSuffix != null) {
                postingsSuffixes.put(postingsSuffix, field.name);
            }/*from w w w .  ja  v  a  2s.c  om*/
            String dvSuffix = attributes.get(PerFieldDocValuesFormat.PER_FIELD_SUFFIX_KEY);
            if (dvSuffix != null) {
                dvSuffixes.put(dvSuffix, field.name);
            }
        }

        DocIdSetIterator docsWithField;
        switch (field.getDocValuesType()) {
        case NUMERIC:
            docsWithField = reader.getNumericDocValues(field.name);
            break;
        case BINARY:
            docsWithField = reader.getBinaryDocValues(field.name);
            break;
        case SORTED:
            docsWithField = reader.getSortedDocValues(field.name);
            break;
        case SORTED_NUMERIC:
            docsWithField = reader.getSortedNumericDocValues(field.name);
            break;
        case SORTED_SET:
            docsWithField = reader.getSortedSetDocValues(field.name);
            break;
        case NONE:
            docsWithField = null;
            break;
        default:
            docsWithField = null;
            break;
        }

        if (docsWithField != null) {
            int count = 0;
            while (docsWithField.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
                count++;
            }
            fieldStats.docCountWithField = count;
        }
    }

    Directory directory = reader.directory();
    for (String file : directory.listAll()) {
        String suffix = parseSuffix(file);
        long bytes = directory.fileLength(file);
        if (suffix != null) {
            switch (IndexFileNames.getExtension(file)) {
            case "dvd":
            case "dvm":
                stats.get(dvSuffixes.get(suffix)).dvBytes += bytes;
                break;
            case "tim":
            case "tip":
                stats.get(postingsSuffixes.get(suffix)).termsBytes += bytes;
                break;
            case "doc":
                stats.get(postingsSuffixes.get(suffix)).postingsBytes += bytes;
                break;
            case "pos":
            case "pay":
                stats.get(postingsSuffixes.get(suffix)).proxBytes += bytes;
                break;
            default:
                throw new AssertionError("unexpected suffixed file: " + file);
            }
        } else {
            // not a per-field file, but we can hackishly do this for the points case.
            if ("dii".equals(IndexFileNames.getExtension(file))) {
                System.err.println(
                        "retrieving per-field point usage, if you see a scary corruption error, its probably just this tool!!!!");
                try (ChecksumIndexInput in = directory.openChecksumInput(file, IOContext.READONCE)) {
                    // fail hard if its not exactly the version we do this hack for.
                    CodecUtil.checkIndexHeader(in, "Lucene60PointsFormatMeta", 0, 0,
                            reader.getSegmentInfo().info.getId(), "");
                    int fieldCount = in.readVInt();
                    // strangely, bkd offsets are not in any guaranteed order
                    TreeMap<Long, String> offsetToField = new TreeMap<>();
                    for (int i = 0; i < fieldCount; i++) {
                        int field = in.readVInt();
                        long offset = in.readVLong();
                        offsetToField.put(offset, reader.getFieldInfos().fieldInfo(field).name);
                    }
                    // now we can traverse in order
                    long previousOffset = 0;
                    for (Map.Entry<Long, String> entry : offsetToField.entrySet()) {
                        long offset = entry.getKey();
                        String field = entry.getValue();
                        stats.get(field).pointsBytes += (offset - previousOffset);
                        previousOffset = offset;
                    }
                    CodecUtil.checkFooter(in);
                }
            }
        }
    }

    return new TreeSet<FieldStats>(stats.values());
}