Example usage for org.apache.lucene.codecs CodecUtil checkHeaderNoMagic

List of usage examples for org.apache.lucene.codecs CodecUtil checkHeaderNoMagic

Introduction

In this page you can find the example usage for org.apache.lucene.codecs CodecUtil checkHeaderNoMagic.

Prototype

public static int checkHeaderNoMagic(DataInput in, String codec, int minVersion, int maxVersion)
        throws IOException 

Source Link

Document

Like #checkHeader(DataInput,String,int,int) except this version assumes the first int has already been read and validated from the input.

Usage

From source file:org.apache.blur.mapreduce.lib.GenericRecordReader.java

License:Apache License

private SegmentInfoPerCommit segmentInfosRead(Directory directory, String segmentFileName,
        String segmentInfoName) throws IOException {
    boolean success = false;

    ChecksumIndexInput input = new ChecksumIndexInput(directory.openInput(segmentFileName, IOContext.READ));
    try {//from  w  w w . j a v  a2 s  .c o m
        final int format = input.readInt();
        if (format == CodecUtil.CODEC_MAGIC) {
            // 4.0+
            CodecUtil.checkHeaderNoMagic(input, "segments", SegmentInfos.VERSION_40, SegmentInfos.VERSION_40);
            input.readLong();// read version
            input.readInt(); // read counter
            int numSegments = input.readInt();
            if (numSegments < 0) {
                throw new CorruptIndexException(
                        "invalid segment count: " + numSegments + " (resource: " + input + ")");
            }
            for (int seg = 0; seg < numSegments; seg++) {
                String segName = input.readString();
                Codec codec = Codec.forName(input.readString());
                SegmentInfo info = codec.segmentInfoFormat().getSegmentInfoReader().read(directory, segName,
                        IOContext.READ);
                info.setCodec(codec);
                long delGen = input.readLong();
                int delCount = input.readInt();
                if (delCount < 0 || delCount > info.getDocCount()) {
                    throw new CorruptIndexException(
                            "invalid deletion count: " + delCount + " (resource: " + input + ")");
                }
                if (segName.equals(segmentInfoName)) {
                    success = true;
                    return new SegmentInfoPerCommit(info, delCount, delGen);
                }
            }
        } else {
            throw new IOException("Legacy Infos not supported for dir [" + directory + "].");
        }
        throw new IOException("Segment [" + segmentInfoName + "] nout found in dir [" + directory + "]");
    } finally {
        if (!success) {
            IOUtils.closeWhileHandlingException(input);
        } else {
            input.close();
        }
    }
}

From source file:org.elasticsearch.index.translog.TranslogReader.java

License:Apache License

/**
 * Given a file, return a VersionedTranslogStream based on an
 * optionally-existing header in the file. If the file does not exist, or
 * has zero length, returns the latest version. If the header does not
 * exist, assumes Version 0 of the translog file format.
 * <p/>/*from  w w  w  . j av a2 s  .  c  o m*/
 *
 * @throws IOException
 */
public static ImmutableTranslogReader open(ChannelReference channelReference, Checkpoint checkpoint,
        String translogUUID) throws IOException {
    final FileChannel channel = channelReference.getChannel();
    final Path path = channelReference.getPath();
    assert channelReference.getGeneration() == checkpoint.generation : "expected generation: "
            + channelReference.getGeneration() + " but got: " + checkpoint.generation;

    try {
        if (checkpoint.offset == 0 && checkpoint.numOps == TranslogReader.UNKNOWN_OP_COUNT) { // only old files can be empty
            return new LegacyTranslogReader(channelReference.getGeneration(), channelReference, 0);
        }

        InputStreamStreamInput headerStream = new InputStreamStreamInput(Channels.newInputStream(channel)); // don't close
        // Lucene's CodecUtil writes a magic number of 0x3FD76C17 with the
        // header, in binary this looks like:
        //
        // binary: 0011 1111 1101 0111 0110 1100 0001 0111
        // hex   :    3    f    d    7    6    c    1    7
        //
        // With version 0 of the translog, the first byte is the
        // Operation.Type, which will always be between 0-4, so we know if
        // we grab the first byte, it can be:
        // 0x3f => Lucene's magic number, so we can assume it's version 1 or later
        // 0x00 => version 0 of the translog
        //
        // otherwise the first byte of the translog is corrupted and we
        // should bail
        byte b1 = headerStream.readByte();
        if (b1 == LUCENE_CODEC_HEADER_BYTE) {
            // Read 3 more bytes, meaning a whole integer has been read
            byte b2 = headerStream.readByte();
            byte b3 = headerStream.readByte();
            byte b4 = headerStream.readByte();
            // Convert the 4 bytes that were read into an integer
            int header = ((b1 & 0xFF) << 24) + ((b2 & 0xFF) << 16) + ((b3 & 0xFF) << 8) + ((b4 & 0xFF) << 0);
            // We confirm CodecUtil's CODEC_MAGIC number (0x3FD76C17)
            // ourselves here, because it allows us to read the first
            // byte separately
            if (header != CodecUtil.CODEC_MAGIC) {
                throw new TranslogCorruptedException(
                        "translog looks like version 1 or later, but has corrupted header");
            }
            // Confirm the rest of the header using CodecUtil, extracting
            // the translog version
            int version = CodecUtil.checkHeaderNoMagic(new InputStreamDataInput(headerStream),
                    TranslogWriter.TRANSLOG_CODEC, 1, Integer.MAX_VALUE);
            switch (version) {
            case TranslogWriter.VERSION_CHECKSUMS:
                assert checkpoint.numOps == TranslogReader.UNKNOWN_OP_COUNT : "expected unknown op count but got: "
                        + checkpoint.numOps;
                assert checkpoint.offset == Files.size(path) : "offset(" + checkpoint.offset + ") != file_size("
                        + Files.size(path) + ") for: " + path;
                // legacy - we still have to support it somehow
                return new LegacyTranslogReaderBase(channelReference.getGeneration(), channelReference,
                        CodecUtil.headerLength(TranslogWriter.TRANSLOG_CODEC), checkpoint.offset);
            case TranslogWriter.VERSION_CHECKPOINTS:
                assert path.getFileName().toString()
                        .endsWith(Translog.TRANSLOG_FILE_SUFFIX) : "new file ends with old suffix: " + path;
                assert checkpoint.numOps > TranslogReader.UNKNOWN_OP_COUNT : "expected at least 0 operatin but got: "
                        + checkpoint.numOps;
                assert checkpoint.offset <= channel.size() : "checkpoint is inconsistent with channel length: "
                        + channel.size() + " " + checkpoint;
                int len = headerStream.readInt();
                if (len > channel.size()) {
                    throw new TranslogCorruptedException("uuid length can't be larger than the translog");
                }
                BytesRef ref = new BytesRef(len);
                ref.length = len;
                headerStream.read(ref.bytes, ref.offset, ref.length);
                BytesRef uuidBytes = new BytesRef(translogUUID);
                if (uuidBytes.bytesEquals(ref) == false) {
                    throw new TranslogCorruptedException("expected shard UUID [" + uuidBytes + "] but got: ["
                            + ref + "] this translog file belongs to a different translog");
                }
                return new ImmutableTranslogReader(channelReference.getGeneration(), channelReference,
                        ref.length + CodecUtil.headerLength(TranslogWriter.TRANSLOG_CODEC)
                                + RamUsageEstimator.NUM_BYTES_INT,
                        checkpoint.offset, checkpoint.numOps);
            default:
                throw new TranslogCorruptedException(
                        "No known translog stream version: " + version + " path:" + path);
            }
        } else if (b1 == UNVERSIONED_TRANSLOG_HEADER_BYTE) {
            assert checkpoint.numOps == TranslogReader.UNKNOWN_OP_COUNT : "expected unknown op count but got: "
                    + checkpoint.numOps;
            assert checkpoint.offset == Files.size(path) : "offset(" + checkpoint.offset + ") != file_size("
                    + Files.size(path) + ") for: " + path;
            return new LegacyTranslogReader(channelReference.getGeneration(), channelReference,
                    checkpoint.offset);
        } else {
            throw new TranslogCorruptedException("Invalid first byte in translog file, got: "
                    + Long.toHexString(b1) + ", expected 0x00 or 0x3f");
        }
    } catch (CorruptIndexException | IndexFormatTooOldException | IndexFormatTooNewException e) {
        throw new TranslogCorruptedException("Translog header corrupted", e);
    }
}

From source file:org.elasticsearch.index.translog.TranslogStreams.java

License:Apache License

/**
 * Given a file, return a VersionedTranslogStream based on an
 * optionally-existing header in the file. If the file does not exist, or
 * has zero length, returns the latest version. If the header does not
 * exist, assumes Version 0 of the translog file format.
 * <p/>/*from w  w  w. j  ava  2 s  . c  om*/
 * The caller is responsible for closing the TranslogStream.
 *
 * @throws IOException
 */
public static TranslogStream translogStreamFor(File translogFile) throws IOException {

    try (InputStreamStreamInput headerStream = new InputStreamStreamInput(new FileInputStream(translogFile));) {
        if (translogFile.exists() == false || translogFile.length() == 0) {
            // if it doesn't exist or has no data, use the latest version,
            // there aren't any backwards compatibility issues
            return CHECKSUMMED_TRANSLOG_STREAM;
        }
        // Lucene's CodecUtil writes a magic number of 0x3FD76C17 with the
        // header, in binary this looks like:
        //
        // binary: 0011 1111 1101 0111 0110 1100 0001 0111
        // hex   :    3    f    d    7    6    c    1    7
        //
        // With version 0 of the translog, the first byte is the
        // Operation.Type, which will always be between 0-4, so we know if
        // we grab the first byte, it can be:
        // 0x3f => Lucene's magic number, so we can assume it's version 1 or later
        // 0x00 => version 0 of the translog
        //
        // otherwise the first byte of the translog is corrupted and we
        // should bail
        byte b1 = headerStream.readByte();
        if (b1 == LUCENE_CODEC_HEADER_BYTE) {
            // Read 3 more bytes, meaning a whole integer has been read
            byte b2 = headerStream.readByte();
            byte b3 = headerStream.readByte();
            byte b4 = headerStream.readByte();
            // Convert the 4 bytes that were read into an integer
            int header = ((b1 & 0xFF) << 24) + ((b2 & 0xFF) << 16) + ((b3 & 0xFF) << 8) + ((b4 & 0xFF) << 0);
            // We confirm CodecUtil's CODEC_MAGIC number (0x3FD76C17)
            // ourselves here, because it allows us to read the first
            // byte separately
            if (header != CodecUtil.CODEC_MAGIC) {
                throw new TranslogCorruptedException(
                        "translog looks like version 1 or later, but has corrupted header");
            }
            // Confirm the rest of the header using CodecUtil, extracting
            // the translog version
            int version = CodecUtil.checkHeaderNoMagic(new InputStreamDataInput(headerStream), TRANSLOG_CODEC,
                    1, Integer.MAX_VALUE);
            switch (version) {
            case ChecksummedTranslogStream.VERSION:
                return CHECKSUMMED_TRANSLOG_STREAM;
            default:
                throw new TranslogCorruptedException("No known translog stream version: " + version);
            }
        } else if (b1 == UNVERSIONED_TRANSLOG_HEADER_BYTE) {
            return LEGACY_TRANSLOG_STREAM;
        } else {
            throw new TranslogCorruptedException("Invalid first byte in translog file, got: "
                    + Long.toHexString(b1) + ", expected 0x00 or 0x3f");
        }
    } catch (CorruptIndexException e) {
        throw new TranslogCorruptedException("Translog header corrupted", e);
    }
}