List of usage examples for org.apache.lucene.codecs CodecUtil headerLength
public static int headerLength(String codec)
From source file:com.lucure.core.codec.CompressingStoredFieldsReader.java
License:Apache License
/** Sole constructor. */ public CompressingStoredFieldsReader(Directory d, SegmentInfo si, String segmentSuffix, FieldInfos fn, IOContext context, String formatName, CompressionMode compressionMode) throws IOException { this.compressionMode = compressionMode; final String segment = si.name; boolean success = false; fieldInfos = fn;//from w ww . ja v a2 s .c om numDocs = si.getDocCount(); ChecksumIndexInput indexStream = null; try { final String indexStreamFN = IndexFileNames.segmentFileName(segment, segmentSuffix, FIELDS_INDEX_EXTENSION); final String fieldsStreamFN = IndexFileNames.segmentFileName(segment, segmentSuffix, FIELDS_EXTENSION); // Load the index into memory indexStream = d.openChecksumInput(indexStreamFN, context); final String codecNameIdx = formatName + CODEC_SFX_IDX; version = CodecUtil.checkHeader(indexStream, codecNameIdx, VERSION_START, VERSION_CURRENT); assert CodecUtil.headerLength(codecNameIdx) == indexStream.getFilePointer(); indexReader = new CompressingStoredFieldsIndexReader(indexStream, si); long maxPointer = -1; if (version >= VERSION_CHECKSUM) { maxPointer = indexStream.readVLong(); CodecUtil.checkFooter(indexStream); } else { CodecUtil.checkEOF(indexStream); } indexStream.close(); indexStream = null; // Open the data file and read metadata fieldsStream = d.openInput(fieldsStreamFN, context); if (version >= VERSION_CHECKSUM) { if (maxPointer + CodecUtil.footerLength() != fieldsStream.length()) { throw new CorruptIndexException("Invalid fieldsStream maxPointer (file truncated?): maxPointer=" + maxPointer + ", length=" + fieldsStream.length()); } } else { maxPointer = fieldsStream.length(); } this.maxPointer = maxPointer; final String codecNameDat = formatName + CODEC_SFX_DAT; final int fieldsVersion = CodecUtil.checkHeader(fieldsStream, codecNameDat, VERSION_START, VERSION_CURRENT); if (version != fieldsVersion) { throw new CorruptIndexException("Version mismatch between stored fields index and data: " + version + " != " + fieldsVersion); } assert CodecUtil.headerLength(codecNameDat) == fieldsStream.getFilePointer(); if (version >= VERSION_BIG_CHUNKS) { chunkSize = fieldsStream.readVInt(); } else { chunkSize = -1; } packedIntsVersion = fieldsStream.readVInt(); decompressor = compressionMode.newDecompressor(); this.bytes = new BytesRef(); if (version >= VERSION_CHECKSUM) { // NOTE: data file is too costly to verify checksum against all the bytes on open, // but for now we at least verify proper structure of the checksum footer: which looks // for FOOTER_MAGIC + algorithmID. This is cheap and can detect some forms of corruption // such as file truncation. CodecUtil.retrieveChecksum(fieldsStream); } success = true; } finally { if (!success) { IOUtils.closeWhileHandlingException(this, indexStream); } } }
From source file:com.lucure.core.codec.CompressingStoredFieldsWriter.java
License:Apache License
/** Sole constructor. */ public CompressingStoredFieldsWriter(Directory directory, SegmentInfo si, String segmentSuffix, IOContext context, String formatName, CompressionMode compressionMode, int chunkSize) throws IOException { assert directory != null; this.directory = directory; this.segment = si.name; this.segmentSuffix = segmentSuffix; this.compressionMode = compressionMode; this.compressor = compressionMode.newCompressor(); this.chunkSize = chunkSize; this.docBase = 0; this.bufferedDocs = new GrowableByteArrayDataOutput(chunkSize); this.numStoredFields = new int[16]; this.endOffsets = new int[16]; this.numBufferedDocs = 0; boolean success = false; IndexOutput indexStream = directory.createOutput( IndexFileNames.segmentFileName(segment, segmentSuffix, FIELDS_INDEX_EXTENSION), context); try {/*from ww w . java 2 s. co m*/ fieldsStream = directory.createOutput( IndexFileNames.segmentFileName(segment, segmentSuffix, FIELDS_EXTENSION), context); final String codecNameIdx = formatName + CODEC_SFX_IDX; final String codecNameDat = formatName + CODEC_SFX_DAT; CodecUtil.writeHeader(indexStream, codecNameIdx, VERSION_CURRENT); CodecUtil.writeHeader(fieldsStream, codecNameDat, VERSION_CURRENT); assert CodecUtil.headerLength(codecNameDat) == fieldsStream.getFilePointer(); assert CodecUtil.headerLength(codecNameIdx) == indexStream.getFilePointer(); indexWriter = new CompressingStoredFieldsIndexWriter(indexStream); indexStream = null; fieldsStream.writeVInt(chunkSize); fieldsStream.writeVInt(PackedInts.VERSION_CURRENT); success = true; } finally { if (!success) { IOUtils.closeWhileHandlingException(indexStream); abort(); } } }
From source file:org.elasticsearch.index.store.CorruptedTranslogTests.java
License:Apache License
private void corruptRandomTranslogFiles() throws IOException { ClusterState state = client().admin().cluster().prepareState().get().getState(); GroupShardsIterator shardIterators = state.getRoutingNodes().getRoutingTable() .activePrimaryShardsGrouped(new String[] { "test" }, false); List<ShardIterator> iterators = Lists.newArrayList(shardIterators); ShardIterator shardIterator = RandomPicks.randomFrom(getRandom(), iterators); ShardRouting shardRouting = shardIterator.nextOrNull(); assertNotNull(shardRouting);//from w ww .j a v a 2 s . c o m assertTrue(shardRouting.primary()); assertTrue(shardRouting.assignedToNode()); String nodeId = shardRouting.currentNodeId(); NodesStatsResponse nodeStatses = client().admin().cluster().prepareNodesStats(nodeId).setFs(true).get(); Set<File> files = new TreeSet<>(); // treeset makes sure iteration order is deterministic for (FsStats.Info info : nodeStatses.getNodes()[0].getFs()) { String path = info.getPath(); final String relativeDataLocationPath = "indices/test/" + Integer.toString(shardRouting.getId()) + "/translog"; File file = new File(path, relativeDataLocationPath); logger.info("--> path: {}", file); files.addAll(Arrays.asList(file.listFiles(new FileFilter() { @Override public boolean accept(File pathname) { logger.info("--> File: {}", pathname); return pathname.isFile() && pathname.getName().startsWith("translog-"); } }))); } File fileToCorrupt = null; if (!files.isEmpty()) { int corruptions = randomIntBetween(5, 20); for (int i = 0; i < corruptions; i++) { fileToCorrupt = RandomPicks.randomFrom(getRandom(), files); try (FileChannel raf = FileChannel.open(fileToCorrupt.toPath(), StandardOpenOption.READ, StandardOpenOption.WRITE)) { // read raf.position(randomIntBetween(0, (int) Math .min(CodecUtil.headerLength(TranslogStreams.TRANSLOG_CODEC), raf.size() - 1))); // only corrupt the header to ensure we actually fail long filePointer = raf.position(); ByteBuffer bb = ByteBuffer.wrap(new byte[1]); raf.read(bb); bb.flip(); // corrupt byte oldValue = bb.get(0); byte newValue = (byte) (oldValue + 1); bb.put(0, newValue); // rewrite raf.position(filePointer); raf.write(bb); logger.info("--> corrupting file {} -- flipping at position {} from {} to {} file: {}", fileToCorrupt, filePointer, Integer.toHexString(oldValue), Integer.toHexString(newValue), fileToCorrupt); } } } assertThat("no file corrupted", fileToCorrupt, notNullValue()); }
From source file:org.elasticsearch.index.translog.ChecksummedTranslogStream.java
License:Apache License
@Override public int writeHeader(FileChannel channel) throws IOException { // This OutputStreamDataOutput is intentionally not closed because // closing it will close the FileChannel OutputStreamDataOutput out = new OutputStreamDataOutput(Channels.newOutputStream(channel)); CodecUtil.writeHeader(out, TranslogStreams.TRANSLOG_CODEC, VERSION); return CodecUtil.headerLength(TranslogStreams.TRANSLOG_CODEC); }
From source file:org.elasticsearch.index.translog.TranslogHeader.java
License:Apache License
private static int headerSizeInBytes(int version, int uuidLength) { int size = CodecUtil.headerLength(TRANSLOG_CODEC); size += Integer.BYTES + uuidLength; // uuid if (version >= VERSION_PRIMARY_TERM) { size += Long.BYTES; // primary term size += Integer.BYTES; // checksum }//www . jav a 2 s .c o m return size; }
From source file:org.elasticsearch.index.translog.TranslogReader.java
License:Apache License
/** * Given a file, return a VersionedTranslogStream based on an * optionally-existing header in the file. If the file does not exist, or * has zero length, returns the latest version. If the header does not * exist, assumes Version 0 of the translog file format. * <p/>//from w ww. j av a 2s . c o m * * @throws IOException */ public static ImmutableTranslogReader open(ChannelReference channelReference, Checkpoint checkpoint, String translogUUID) throws IOException { final FileChannel channel = channelReference.getChannel(); final Path path = channelReference.getPath(); assert channelReference.getGeneration() == checkpoint.generation : "expected generation: " + channelReference.getGeneration() + " but got: " + checkpoint.generation; try { if (checkpoint.offset == 0 && checkpoint.numOps == TranslogReader.UNKNOWN_OP_COUNT) { // only old files can be empty return new LegacyTranslogReader(channelReference.getGeneration(), channelReference, 0); } InputStreamStreamInput headerStream = new InputStreamStreamInput(Channels.newInputStream(channel)); // don't close // Lucene's CodecUtil writes a magic number of 0x3FD76C17 with the // header, in binary this looks like: // // binary: 0011 1111 1101 0111 0110 1100 0001 0111 // hex : 3 f d 7 6 c 1 7 // // With version 0 of the translog, the first byte is the // Operation.Type, which will always be between 0-4, so we know if // we grab the first byte, it can be: // 0x3f => Lucene's magic number, so we can assume it's version 1 or later // 0x00 => version 0 of the translog // // otherwise the first byte of the translog is corrupted and we // should bail byte b1 = headerStream.readByte(); if (b1 == LUCENE_CODEC_HEADER_BYTE) { // Read 3 more bytes, meaning a whole integer has been read byte b2 = headerStream.readByte(); byte b3 = headerStream.readByte(); byte b4 = headerStream.readByte(); // Convert the 4 bytes that were read into an integer int header = ((b1 & 0xFF) << 24) + ((b2 & 0xFF) << 16) + ((b3 & 0xFF) << 8) + ((b4 & 0xFF) << 0); // We confirm CodecUtil's CODEC_MAGIC number (0x3FD76C17) // ourselves here, because it allows us to read the first // byte separately if (header != CodecUtil.CODEC_MAGIC) { throw new TranslogCorruptedException( "translog looks like version 1 or later, but has corrupted header"); } // Confirm the rest of the header using CodecUtil, extracting // the translog version int version = CodecUtil.checkHeaderNoMagic(new InputStreamDataInput(headerStream), TranslogWriter.TRANSLOG_CODEC, 1, Integer.MAX_VALUE); switch (version) { case TranslogWriter.VERSION_CHECKSUMS: assert checkpoint.numOps == TranslogReader.UNKNOWN_OP_COUNT : "expected unknown op count but got: " + checkpoint.numOps; assert checkpoint.offset == Files.size(path) : "offset(" + checkpoint.offset + ") != file_size(" + Files.size(path) + ") for: " + path; // legacy - we still have to support it somehow return new LegacyTranslogReaderBase(channelReference.getGeneration(), channelReference, CodecUtil.headerLength(TranslogWriter.TRANSLOG_CODEC), checkpoint.offset); case TranslogWriter.VERSION_CHECKPOINTS: assert path.getFileName().toString() .endsWith(Translog.TRANSLOG_FILE_SUFFIX) : "new file ends with old suffix: " + path; assert checkpoint.numOps > TranslogReader.UNKNOWN_OP_COUNT : "expected at least 0 operatin but got: " + checkpoint.numOps; assert checkpoint.offset <= channel.size() : "checkpoint is inconsistent with channel length: " + channel.size() + " " + checkpoint; int len = headerStream.readInt(); if (len > channel.size()) { throw new TranslogCorruptedException("uuid length can't be larger than the translog"); } BytesRef ref = new BytesRef(len); ref.length = len; headerStream.read(ref.bytes, ref.offset, ref.length); BytesRef uuidBytes = new BytesRef(translogUUID); if (uuidBytes.bytesEquals(ref) == false) { throw new TranslogCorruptedException("expected shard UUID [" + uuidBytes + "] but got: [" + ref + "] this translog file belongs to a different translog"); } return new ImmutableTranslogReader(channelReference.getGeneration(), channelReference, ref.length + CodecUtil.headerLength(TranslogWriter.TRANSLOG_CODEC) + RamUsageEstimator.NUM_BYTES_INT, checkpoint.offset, checkpoint.numOps); default: throw new TranslogCorruptedException( "No known translog stream version: " + version + " path:" + path); } } else if (b1 == UNVERSIONED_TRANSLOG_HEADER_BYTE) { assert checkpoint.numOps == TranslogReader.UNKNOWN_OP_COUNT : "expected unknown op count but got: " + checkpoint.numOps; assert checkpoint.offset == Files.size(path) : "offset(" + checkpoint.offset + ") != file_size(" + Files.size(path) + ") for: " + path; return new LegacyTranslogReader(channelReference.getGeneration(), channelReference, checkpoint.offset); } else { throw new TranslogCorruptedException("Invalid first byte in translog file, got: " + Long.toHexString(b1) + ", expected 0x00 or 0x3f"); } } catch (CorruptIndexException | IndexFormatTooOldException | IndexFormatTooNewException e) { throw new TranslogCorruptedException("Translog header corrupted", e); } }
From source file:org.elasticsearch.index.translog.TranslogTests.java
License:Apache License
public void testStats() throws IOException { final long firstOperationPosition = translog.getFirstOperationPosition(); TranslogStats stats = stats();//from w ww . j av a 2 s . c o m assertThat(stats.estimatedNumberOfOperations(), equalTo(0L)); long lastSize = stats.getTranslogSizeInBytes(); assertThat((int) firstOperationPosition, greaterThan(CodecUtil.headerLength(TranslogWriter.TRANSLOG_CODEC))); assertThat(lastSize, equalTo(firstOperationPosition)); TranslogStats total = new TranslogStats(); translog.add(new Translog.Index("test", "1", new byte[] { 1 })); stats = stats(); total.add(stats); assertThat(stats.estimatedNumberOfOperations(), equalTo(1L)); assertThat(stats.getTranslogSizeInBytes(), greaterThan(lastSize)); lastSize = stats.getTranslogSizeInBytes(); translog.add(new Translog.Delete(newUid("2"))); stats = stats(); total.add(stats); assertThat(stats.estimatedNumberOfOperations(), equalTo(2L)); assertThat(stats.getTranslogSizeInBytes(), greaterThan(lastSize)); lastSize = stats.getTranslogSizeInBytes(); translog.add(new Translog.Delete(newUid("3"))); translog.prepareCommit(); stats = stats(); total.add(stats); assertThat(stats.estimatedNumberOfOperations(), equalTo(3L)); assertThat(stats.getTranslogSizeInBytes(), greaterThan(lastSize)); translog.commit(); stats = stats(); total.add(stats); assertThat(stats.estimatedNumberOfOperations(), equalTo(0L)); assertThat(stats.getTranslogSizeInBytes(), equalTo(firstOperationPosition)); assertEquals(6, total.estimatedNumberOfOperations()); assertEquals(455, total.getTranslogSizeInBytes()); BytesStreamOutput out = new BytesStreamOutput(); total.writeTo(out); TranslogStats copy = new TranslogStats(); copy.readFrom(out.bytes().streamInput()); assertEquals(6, copy.estimatedNumberOfOperations()); assertEquals(455, copy.getTranslogSizeInBytes()); try (XContentBuilder builder = XContentFactory.jsonBuilder()) { builder.startObject(); copy.toXContent(builder, ToXContent.EMPTY_PARAMS); builder.endObject(); assertEquals("{\"translog\":{\"operations\":6,\"size_in_bytes\":455}}", builder.string()); } try { new TranslogStats(1, -1); fail("must be positive"); } catch (IllegalArgumentException ex) { //all well } try { new TranslogStats(-1, 1); fail("must be positive"); } catch (IllegalArgumentException ex) { //all well } }
From source file:org.elasticsearch.index.translog.TranslogWriter.java
License:Apache License
public static TranslogWriter create(Type type, ShardId shardId, String translogUUID, long fileGeneration, Path file, Callback<ChannelReference> onClose, int bufferSize, ChannelFactory channelFactory) throws IOException { final BytesRef ref = new BytesRef(translogUUID); final int headerLength = CodecUtil.headerLength(TRANSLOG_CODEC) + ref.length + RamUsageEstimator.NUM_BYTES_INT; final FileChannel channel = channelFactory.open(file); try {/*ww w .ja v a2 s .c om*/ // This OutputStreamDataOutput is intentionally not closed because // closing it will close the FileChannel final OutputStreamDataOutput out = new OutputStreamDataOutput( java.nio.channels.Channels.newOutputStream(channel)); CodecUtil.writeHeader(out, TRANSLOG_CODEC, VERSION); out.writeInt(ref.length); out.writeBytes(ref.bytes, ref.offset, ref.length); channel.force(false); writeCheckpoint(headerLength, 0, file.getParent(), fileGeneration, StandardOpenOption.WRITE); final TranslogWriter writer = type.create(shardId, fileGeneration, new ChannelReference(file, fileGeneration, channel, onClose), bufferSize); return writer; } catch (Throwable throwable) { IOUtils.closeWhileHandlingException(channel); try { Files.delete(file); // remove the file as well } catch (IOException ex) { throwable.addSuppressed(ex); } throw throwable; } }