Example usage for org.apache.lucene.codecs CodecUtil CODEC_MAGIC

List of usage examples for org.apache.lucene.codecs CodecUtil CODEC_MAGIC

Introduction

In this page you can find the example usage for org.apache.lucene.codecs CodecUtil CODEC_MAGIC.

Prototype

int CODEC_MAGIC

To view the source code for org.apache.lucene.codecs CodecUtil CODEC_MAGIC.

Click Source Link

Document

Constant to identify the start of a codec header.

Usage

From source file:org.apache.blur.mapreduce.lib.GenericRecordReader.java

License:Apache License

private SegmentInfoPerCommit segmentInfosRead(Directory directory, String segmentFileName,
        String segmentInfoName) throws IOException {
    boolean success = false;

    ChecksumIndexInput input = new ChecksumIndexInput(directory.openInput(segmentFileName, IOContext.READ));
    try {/*from  w ww .  ja  v a2s .  c om*/
        final int format = input.readInt();
        if (format == CodecUtil.CODEC_MAGIC) {
            // 4.0+
            CodecUtil.checkHeaderNoMagic(input, "segments", SegmentInfos.VERSION_40, SegmentInfos.VERSION_40);
            input.readLong();// read version
            input.readInt(); // read counter
            int numSegments = input.readInt();
            if (numSegments < 0) {
                throw new CorruptIndexException(
                        "invalid segment count: " + numSegments + " (resource: " + input + ")");
            }
            for (int seg = 0; seg < numSegments; seg++) {
                String segName = input.readString();
                Codec codec = Codec.forName(input.readString());
                SegmentInfo info = codec.segmentInfoFormat().getSegmentInfoReader().read(directory, segName,
                        IOContext.READ);
                info.setCodec(codec);
                long delGen = input.readLong();
                int delCount = input.readInt();
                if (delCount < 0 || delCount > info.getDocCount()) {
                    throw new CorruptIndexException(
                            "invalid deletion count: " + delCount + " (resource: " + input + ")");
                }
                if (segName.equals(segmentInfoName)) {
                    success = true;
                    return new SegmentInfoPerCommit(info, delCount, delGen);
                }
            }
        } else {
            throw new IOException("Legacy Infos not supported for dir [" + directory + "].");
        }
        throw new IOException("Segment [" + segmentInfoName + "] nout found in dir [" + directory + "]");
    } finally {
        if (!success) {
            IOUtils.closeWhileHandlingException(input);
        } else {
            input.close();
        }
    }
}

From source file:org.elasticsearch.index.translog.TranslogReader.java

License:Apache License

/**
 * Given a file, return a VersionedTranslogStream based on an
 * optionally-existing header in the file. If the file does not exist, or
 * has zero length, returns the latest version. If the header does not
 * exist, assumes Version 0 of the translog file format.
 * <p/>/*from  w w w.ja  va  2 s . c om*/
 *
 * @throws IOException
 */
public static ImmutableTranslogReader open(ChannelReference channelReference, Checkpoint checkpoint,
        String translogUUID) throws IOException {
    final FileChannel channel = channelReference.getChannel();
    final Path path = channelReference.getPath();
    assert channelReference.getGeneration() == checkpoint.generation : "expected generation: "
            + channelReference.getGeneration() + " but got: " + checkpoint.generation;

    try {
        if (checkpoint.offset == 0 && checkpoint.numOps == TranslogReader.UNKNOWN_OP_COUNT) { // only old files can be empty
            return new LegacyTranslogReader(channelReference.getGeneration(), channelReference, 0);
        }

        InputStreamStreamInput headerStream = new InputStreamStreamInput(Channels.newInputStream(channel)); // don't close
        // Lucene's CodecUtil writes a magic number of 0x3FD76C17 with the
        // header, in binary this looks like:
        //
        // binary: 0011 1111 1101 0111 0110 1100 0001 0111
        // hex   :    3    f    d    7    6    c    1    7
        //
        // With version 0 of the translog, the first byte is the
        // Operation.Type, which will always be between 0-4, so we know if
        // we grab the first byte, it can be:
        // 0x3f => Lucene's magic number, so we can assume it's version 1 or later
        // 0x00 => version 0 of the translog
        //
        // otherwise the first byte of the translog is corrupted and we
        // should bail
        byte b1 = headerStream.readByte();
        if (b1 == LUCENE_CODEC_HEADER_BYTE) {
            // Read 3 more bytes, meaning a whole integer has been read
            byte b2 = headerStream.readByte();
            byte b3 = headerStream.readByte();
            byte b4 = headerStream.readByte();
            // Convert the 4 bytes that were read into an integer
            int header = ((b1 & 0xFF) << 24) + ((b2 & 0xFF) << 16) + ((b3 & 0xFF) << 8) + ((b4 & 0xFF) << 0);
            // We confirm CodecUtil's CODEC_MAGIC number (0x3FD76C17)
            // ourselves here, because it allows us to read the first
            // byte separately
            if (header != CodecUtil.CODEC_MAGIC) {
                throw new TranslogCorruptedException(
                        "translog looks like version 1 or later, but has corrupted header");
            }
            // Confirm the rest of the header using CodecUtil, extracting
            // the translog version
            int version = CodecUtil.checkHeaderNoMagic(new InputStreamDataInput(headerStream),
                    TranslogWriter.TRANSLOG_CODEC, 1, Integer.MAX_VALUE);
            switch (version) {
            case TranslogWriter.VERSION_CHECKSUMS:
                assert checkpoint.numOps == TranslogReader.UNKNOWN_OP_COUNT : "expected unknown op count but got: "
                        + checkpoint.numOps;
                assert checkpoint.offset == Files.size(path) : "offset(" + checkpoint.offset + ") != file_size("
                        + Files.size(path) + ") for: " + path;
                // legacy - we still have to support it somehow
                return new LegacyTranslogReaderBase(channelReference.getGeneration(), channelReference,
                        CodecUtil.headerLength(TranslogWriter.TRANSLOG_CODEC), checkpoint.offset);
            case TranslogWriter.VERSION_CHECKPOINTS:
                assert path.getFileName().toString()
                        .endsWith(Translog.TRANSLOG_FILE_SUFFIX) : "new file ends with old suffix: " + path;
                assert checkpoint.numOps > TranslogReader.UNKNOWN_OP_COUNT : "expected at least 0 operatin but got: "
                        + checkpoint.numOps;
                assert checkpoint.offset <= channel.size() : "checkpoint is inconsistent with channel length: "
                        + channel.size() + " " + checkpoint;
                int len = headerStream.readInt();
                if (len > channel.size()) {
                    throw new TranslogCorruptedException("uuid length can't be larger than the translog");
                }
                BytesRef ref = new BytesRef(len);
                ref.length = len;
                headerStream.read(ref.bytes, ref.offset, ref.length);
                BytesRef uuidBytes = new BytesRef(translogUUID);
                if (uuidBytes.bytesEquals(ref) == false) {
                    throw new TranslogCorruptedException("expected shard UUID [" + uuidBytes + "] but got: ["
                            + ref + "] this translog file belongs to a different translog");
                }
                return new ImmutableTranslogReader(channelReference.getGeneration(), channelReference,
                        ref.length + CodecUtil.headerLength(TranslogWriter.TRANSLOG_CODEC)
                                + RamUsageEstimator.NUM_BYTES_INT,
                        checkpoint.offset, checkpoint.numOps);
            default:
                throw new TranslogCorruptedException(
                        "No known translog stream version: " + version + " path:" + path);
            }
        } else if (b1 == UNVERSIONED_TRANSLOG_HEADER_BYTE) {
            assert checkpoint.numOps == TranslogReader.UNKNOWN_OP_COUNT : "expected unknown op count but got: "
                    + checkpoint.numOps;
            assert checkpoint.offset == Files.size(path) : "offset(" + checkpoint.offset + ") != file_size("
                    + Files.size(path) + ") for: " + path;
            return new LegacyTranslogReader(channelReference.getGeneration(), channelReference,
                    checkpoint.offset);
        } else {
            throw new TranslogCorruptedException("Invalid first byte in translog file, got: "
                    + Long.toHexString(b1) + ", expected 0x00 or 0x3f");
        }
    } catch (CorruptIndexException | IndexFormatTooOldException | IndexFormatTooNewException e) {
        throw new TranslogCorruptedException("Translog header corrupted", e);
    }
}

From source file:org.elasticsearch.index.translog.TranslogStreams.java

License:Apache License

/**
 * Given a file, return a VersionedTranslogStream based on an
 * optionally-existing header in the file. If the file does not exist, or
 * has zero length, returns the latest version. If the header does not
 * exist, assumes Version 0 of the translog file format.
 * <p/>/*from ww w  .  j  a  va2 s. co  m*/
 * The caller is responsible for closing the TranslogStream.
 *
 * @throws IOException
 */
public static TranslogStream translogStreamFor(File translogFile) throws IOException {

    try (InputStreamStreamInput headerStream = new InputStreamStreamInput(new FileInputStream(translogFile));) {
        if (translogFile.exists() == false || translogFile.length() == 0) {
            // if it doesn't exist or has no data, use the latest version,
            // there aren't any backwards compatibility issues
            return CHECKSUMMED_TRANSLOG_STREAM;
        }
        // Lucene's CodecUtil writes a magic number of 0x3FD76C17 with the
        // header, in binary this looks like:
        //
        // binary: 0011 1111 1101 0111 0110 1100 0001 0111
        // hex   :    3    f    d    7    6    c    1    7
        //
        // With version 0 of the translog, the first byte is the
        // Operation.Type, which will always be between 0-4, so we know if
        // we grab the first byte, it can be:
        // 0x3f => Lucene's magic number, so we can assume it's version 1 or later
        // 0x00 => version 0 of the translog
        //
        // otherwise the first byte of the translog is corrupted and we
        // should bail
        byte b1 = headerStream.readByte();
        if (b1 == LUCENE_CODEC_HEADER_BYTE) {
            // Read 3 more bytes, meaning a whole integer has been read
            byte b2 = headerStream.readByte();
            byte b3 = headerStream.readByte();
            byte b4 = headerStream.readByte();
            // Convert the 4 bytes that were read into an integer
            int header = ((b1 & 0xFF) << 24) + ((b2 & 0xFF) << 16) + ((b3 & 0xFF) << 8) + ((b4 & 0xFF) << 0);
            // We confirm CodecUtil's CODEC_MAGIC number (0x3FD76C17)
            // ourselves here, because it allows us to read the first
            // byte separately
            if (header != CodecUtil.CODEC_MAGIC) {
                throw new TranslogCorruptedException(
                        "translog looks like version 1 or later, but has corrupted header");
            }
            // Confirm the rest of the header using CodecUtil, extracting
            // the translog version
            int version = CodecUtil.checkHeaderNoMagic(new InputStreamDataInput(headerStream), TRANSLOG_CODEC,
                    1, Integer.MAX_VALUE);
            switch (version) {
            case ChecksummedTranslogStream.VERSION:
                return CHECKSUMMED_TRANSLOG_STREAM;
            default:
                throw new TranslogCorruptedException("No known translog stream version: " + version);
            }
        } else if (b1 == UNVERSIONED_TRANSLOG_HEADER_BYTE) {
            return LEGACY_TRANSLOG_STREAM;
        } else {
            throw new TranslogCorruptedException("Invalid first byte in translog file, got: "
                    + Long.toHexString(b1) + ", expected 0x00 or 0x3f");
        }
    } catch (CorruptIndexException e) {
        throw new TranslogCorruptedException("Translog header corrupted", e);
    }
}