List of usage examples for org.apache.lucene.store InputStreamDataInput InputStreamDataInput
public InputStreamDataInput(InputStream is)
From source file:com.github.cstoku.neologd.unidic.lucene.analysis.ja.dict.BinaryDictionary.java
License:Apache License
protected BinaryDictionary() throws IOException { InputStream mapIS = null, dictIS = null, posIS = null; int[] targetMapOffsets = null, targetMap = null; String[] posDict = null;//from w w w.j a v a 2 s. com String[] inflFormDict = null; String[] inflTypeDict = null; ByteBuffer buffer = null; boolean success = false; try { mapIS = getResource(TARGETMAP_FILENAME_SUFFIX); mapIS = new BufferedInputStream(mapIS); DataInput in = new InputStreamDataInput(mapIS); CodecUtil.checkHeader(in, TARGETMAP_HEADER, VERSION, VERSION); targetMap = new int[in.readVInt()]; targetMapOffsets = new int[in.readVInt()]; int accum = 0, sourceId = 0; for (int ofs = 0; ofs < targetMap.length; ofs++) { final int val = in.readVInt(); if ((val & 0x01) != 0) { targetMapOffsets[sourceId] = ofs; sourceId++; } accum += val >>> 1; targetMap[ofs] = accum; } if (sourceId + 1 != targetMapOffsets.length) throw new IOException("targetMap file format broken"); targetMapOffsets[sourceId] = targetMap.length; mapIS.close(); mapIS = null; posIS = getResource(POSDICT_FILENAME_SUFFIX); posIS = new BufferedInputStream(posIS); in = new InputStreamDataInput(posIS); CodecUtil.checkHeader(in, POSDICT_HEADER, VERSION, VERSION); int posSize = in.readVInt(); posDict = new String[posSize]; inflTypeDict = new String[posSize]; inflFormDict = new String[posSize]; for (int j = 0; j < posSize; j++) { posDict[j] = in.readString(); inflTypeDict[j] = in.readString(); inflFormDict[j] = in.readString(); // this is how we encode null inflections if (inflTypeDict[j].length() == 0) { inflTypeDict[j] = null; } if (inflFormDict[j].length() == 0) { inflFormDict[j] = null; } } posIS.close(); posIS = null; dictIS = getResource(DICT_FILENAME_SUFFIX); // no buffering here, as we load in one large buffer in = new InputStreamDataInput(dictIS); CodecUtil.checkHeader(in, DICT_HEADER, VERSION, VERSION); final int size = in.readVInt(); final ByteBuffer tmpBuffer = ByteBuffer.allocateDirect(size); final ReadableByteChannel channel = Channels.newChannel(dictIS); final int read = channel.read(tmpBuffer); if (read != size) { throw new EOFException("Cannot read whole dictionary"); } dictIS.close(); dictIS = null; buffer = tmpBuffer.asReadOnlyBuffer(); success = true; } finally { if (success) { IOUtils.close(mapIS, posIS, dictIS); } else { IOUtils.closeWhileHandlingException(mapIS, posIS, dictIS); } } this.targetMap = targetMap; this.targetMapOffsets = targetMapOffsets; this.posDict = posDict; this.inflTypeDict = inflTypeDict; this.inflFormDict = inflFormDict; this.buffer = buffer; }
From source file:com.github.cstoku.neologd.unidic.lucene.analysis.ja.dict.CharacterDefinition.java
License:Apache License
private CharacterDefinition() throws IOException { InputStream is = null;/*from w w w . java2 s . co m*/ boolean success = false; try { is = BinaryDictionary.getClassResource(getClass(), FILENAME_SUFFIX); is = new BufferedInputStream(is); final DataInput in = new InputStreamDataInput(is); CodecUtil.checkHeader(in, HEADER, VERSION, VERSION); in.readBytes(characterCategoryMap, 0, characterCategoryMap.length); for (int i = 0; i < CLASS_COUNT; i++) { final byte b = in.readByte(); invokeMap[i] = (b & 0x01) != 0; groupMap[i] = (b & 0x02) != 0; } success = true; } finally { if (success) { IOUtils.close(is); } else { IOUtils.closeWhileHandlingException(is); } } }
From source file:com.github.cstoku.neologd.unidic.lucene.analysis.ja.dict.ConnectionCosts.java
License:Apache License
private ConnectionCosts() throws IOException { InputStream is = null;//from w ww. jav a 2s . c om short[][] costs = null; boolean success = false; try { is = BinaryDictionary.getClassResource(getClass(), FILENAME_SUFFIX); is = new BufferedInputStream(is); final DataInput in = new InputStreamDataInput(is); CodecUtil.checkHeader(in, HEADER, VERSION, VERSION); int forwardSize = in.readVInt(); int backwardSize = in.readVInt(); costs = new short[backwardSize][forwardSize]; int accum = 0; for (int j = 0; j < costs.length; j++) { final short[] a = costs[j]; for (int i = 0; i < a.length; i++) { accum += in.readZInt(); a[i] = (short) accum; } } success = true; } finally { if (success) { IOUtils.close(is); } else { IOUtils.closeWhileHandlingException(is); } } this.costs = costs; }
From source file:com.github.cstoku.neologd.unidic.lucene.analysis.ja.dict.TokenInfoDictionary.java
License:Apache License
private TokenInfoDictionary() throws IOException { super();//from w ww .j a va 2 s .c o m InputStream is = null; FST<Long> fst = null; boolean success = false; try { is = getResource(FST_FILENAME_SUFFIX); is = new BufferedInputStream(is); fst = new FST<>(new InputStreamDataInput(is), PositiveIntOutputs.getSingleton()); success = true; } finally { if (success) { IOUtils.close(is); } else { IOUtils.closeWhileHandlingException(is); } } // TODO: some way to configure? this.fst = new TokenInfoFST(fst, true); }
From source file:org.apache.jackrabbit.oak.plugins.index.lucene.OakDirectoryTest.java
License:Apache License
private static void writeFile(Directory directory, String fileName, long size) throws Exception { IndexOutput o = directory.createOutput(fileName, IOContext.DEFAULT); o.copyBytes(new InputStreamDataInput(new NullInputStream(size)), size); o.close();/*from www . j a v a2 s .com*/ }
From source file:org.elasticsearch.index.translog.Checkpoint.java
License:Apache License
public static Checkpoint read(Path path) throws IOException { try (InputStream in = Files.newInputStream(path)) { return new Checkpoint(new InputStreamDataInput(in)); }//from w ww . j a v a2 s. c o m }
From source file:org.elasticsearch.index.translog.ChecksummedTranslogStream.java
License:Apache License
@Override public StreamInput openInput(File translogFile) throws IOException { final FileInputStream fileInputStream = new FileInputStream(translogFile); boolean success = false; try {//from w w w . j av a2s. c o m final InputStreamStreamInput in = new InputStreamStreamInput(fileInputStream); CodecUtil.checkHeader(new InputStreamDataInput(in), TranslogStreams.TRANSLOG_CODEC, VERSION, VERSION); success = true; return in; } catch (EOFException e) { throw new TruncatedTranslogException("translog header truncated", e); } catch (IOException e) { throw new TranslogCorruptedException("translog header corrupted", e); } finally { if (success == false) { IOUtils.closeWhileHandlingException(fileInputStream); } } }
From source file:org.elasticsearch.index.translog.TranslogHeader.java
License:Apache License
/** * Read a translog header from the given path and file channel *//*w w w.j a v a 2 s . co m*/ static TranslogHeader read(final String translogUUID, final Path path, final FileChannel channel) throws IOException { // This input is intentionally not closed because closing it will close the FileChannel. final BufferedChecksumStreamInput in = new BufferedChecksumStreamInput( new InputStreamStreamInput(java.nio.channels.Channels.newInputStream(channel), channel.size()), path.toString()); final int version; try { version = CodecUtil.checkHeader(new InputStreamDataInput(in), TRANSLOG_CODEC, VERSION_CHECKSUMS, VERSION_PRIMARY_TERM); } catch (CorruptIndexException | IndexFormatTooOldException | IndexFormatTooNewException e) { tryReportOldVersionError(path, channel); throw new TranslogCorruptedException(path.toString(), "translog header corrupted", e); } if (version == VERSION_CHECKSUMS) { throw new IllegalStateException("pre-2.0 translog found [" + path + "]"); } // Read the translogUUID final int uuidLen = in.readInt(); if (uuidLen > channel.size()) { throw new TranslogCorruptedException(path.toString(), "UUID length can't be larger than the translog"); } final BytesRef uuid = new BytesRef(uuidLen); uuid.length = uuidLen; in.read(uuid.bytes, uuid.offset, uuid.length); final BytesRef expectedUUID = new BytesRef(translogUUID); if (uuid.bytesEquals(expectedUUID) == false) { throw new TranslogCorruptedException(path.toString(), "expected shard UUID " + expectedUUID + " but got: " + uuid + " this translog file belongs to a different translog"); } // Read the primary term final long primaryTerm; if (version == VERSION_PRIMARY_TERM) { primaryTerm = in.readLong(); assert primaryTerm >= 0 : "Primary term must be non-negative [" + primaryTerm + "]; translog path [" + path + "]"; } else { assert version == VERSION_CHECKPOINTS : "Unknown header version [" + version + "]"; primaryTerm = UNKNOWN_PRIMARY_TERM; } // Verify the checksum if (version >= VERSION_PRIMARY_TERM) { Translog.verifyChecksum(in); } final int headerSizeInBytes = headerSizeInBytes(version, uuid.length); assert channel.position() == headerSizeInBytes : "Header is not fully read; header size [" + headerSizeInBytes + "], position [" + channel.position() + "]"; return new TranslogHeader(translogUUID, primaryTerm, headerSizeInBytes); }
From source file:org.elasticsearch.index.translog.TranslogReader.java
License:Apache License
/** * Given a file, return a VersionedTranslogStream based on an * optionally-existing header in the file. If the file does not exist, or * has zero length, returns the latest version. If the header does not * exist, assumes Version 0 of the translog file format. * <p/>//from w ww. j a v a 2 s . c om * * @throws IOException */ public static ImmutableTranslogReader open(ChannelReference channelReference, Checkpoint checkpoint, String translogUUID) throws IOException { final FileChannel channel = channelReference.getChannel(); final Path path = channelReference.getPath(); assert channelReference.getGeneration() == checkpoint.generation : "expected generation: " + channelReference.getGeneration() + " but got: " + checkpoint.generation; try { if (checkpoint.offset == 0 && checkpoint.numOps == TranslogReader.UNKNOWN_OP_COUNT) { // only old files can be empty return new LegacyTranslogReader(channelReference.getGeneration(), channelReference, 0); } InputStreamStreamInput headerStream = new InputStreamStreamInput(Channels.newInputStream(channel)); // don't close // Lucene's CodecUtil writes a magic number of 0x3FD76C17 with the // header, in binary this looks like: // // binary: 0011 1111 1101 0111 0110 1100 0001 0111 // hex : 3 f d 7 6 c 1 7 // // With version 0 of the translog, the first byte is the // Operation.Type, which will always be between 0-4, so we know if // we grab the first byte, it can be: // 0x3f => Lucene's magic number, so we can assume it's version 1 or later // 0x00 => version 0 of the translog // // otherwise the first byte of the translog is corrupted and we // should bail byte b1 = headerStream.readByte(); if (b1 == LUCENE_CODEC_HEADER_BYTE) { // Read 3 more bytes, meaning a whole integer has been read byte b2 = headerStream.readByte(); byte b3 = headerStream.readByte(); byte b4 = headerStream.readByte(); // Convert the 4 bytes that were read into an integer int header = ((b1 & 0xFF) << 24) + ((b2 & 0xFF) << 16) + ((b3 & 0xFF) << 8) + ((b4 & 0xFF) << 0); // We confirm CodecUtil's CODEC_MAGIC number (0x3FD76C17) // ourselves here, because it allows us to read the first // byte separately if (header != CodecUtil.CODEC_MAGIC) { throw new TranslogCorruptedException( "translog looks like version 1 or later, but has corrupted header"); } // Confirm the rest of the header using CodecUtil, extracting // the translog version int version = CodecUtil.checkHeaderNoMagic(new InputStreamDataInput(headerStream), TranslogWriter.TRANSLOG_CODEC, 1, Integer.MAX_VALUE); switch (version) { case TranslogWriter.VERSION_CHECKSUMS: assert checkpoint.numOps == TranslogReader.UNKNOWN_OP_COUNT : "expected unknown op count but got: " + checkpoint.numOps; assert checkpoint.offset == Files.size(path) : "offset(" + checkpoint.offset + ") != file_size(" + Files.size(path) + ") for: " + path; // legacy - we still have to support it somehow return new LegacyTranslogReaderBase(channelReference.getGeneration(), channelReference, CodecUtil.headerLength(TranslogWriter.TRANSLOG_CODEC), checkpoint.offset); case TranslogWriter.VERSION_CHECKPOINTS: assert path.getFileName().toString() .endsWith(Translog.TRANSLOG_FILE_SUFFIX) : "new file ends with old suffix: " + path; assert checkpoint.numOps > TranslogReader.UNKNOWN_OP_COUNT : "expected at least 0 operatin but got: " + checkpoint.numOps; assert checkpoint.offset <= channel.size() : "checkpoint is inconsistent with channel length: " + channel.size() + " " + checkpoint; int len = headerStream.readInt(); if (len > channel.size()) { throw new TranslogCorruptedException("uuid length can't be larger than the translog"); } BytesRef ref = new BytesRef(len); ref.length = len; headerStream.read(ref.bytes, ref.offset, ref.length); BytesRef uuidBytes = new BytesRef(translogUUID); if (uuidBytes.bytesEquals(ref) == false) { throw new TranslogCorruptedException("expected shard UUID [" + uuidBytes + "] but got: [" + ref + "] this translog file belongs to a different translog"); } return new ImmutableTranslogReader(channelReference.getGeneration(), channelReference, ref.length + CodecUtil.headerLength(TranslogWriter.TRANSLOG_CODEC) + RamUsageEstimator.NUM_BYTES_INT, checkpoint.offset, checkpoint.numOps); default: throw new TranslogCorruptedException( "No known translog stream version: " + version + " path:" + path); } } else if (b1 == UNVERSIONED_TRANSLOG_HEADER_BYTE) { assert checkpoint.numOps == TranslogReader.UNKNOWN_OP_COUNT : "expected unknown op count but got: " + checkpoint.numOps; assert checkpoint.offset == Files.size(path) : "offset(" + checkpoint.offset + ") != file_size(" + Files.size(path) + ") for: " + path; return new LegacyTranslogReader(channelReference.getGeneration(), channelReference, checkpoint.offset); } else { throw new TranslogCorruptedException("Invalid first byte in translog file, got: " + Long.toHexString(b1) + ", expected 0x00 or 0x3f"); } } catch (CorruptIndexException | IndexFormatTooOldException | IndexFormatTooNewException e) { throw new TranslogCorruptedException("Translog header corrupted", e); } }
From source file:org.elasticsearch.index.translog.TranslogStreams.java
License:Apache License
/** * Given a file, return a VersionedTranslogStream based on an * optionally-existing header in the file. If the file does not exist, or * has zero length, returns the latest version. If the header does not * exist, assumes Version 0 of the translog file format. * <p/>//w ww.j a va 2s . c om * The caller is responsible for closing the TranslogStream. * * @throws IOException */ public static TranslogStream translogStreamFor(File translogFile) throws IOException { try (InputStreamStreamInput headerStream = new InputStreamStreamInput(new FileInputStream(translogFile));) { if (translogFile.exists() == false || translogFile.length() == 0) { // if it doesn't exist or has no data, use the latest version, // there aren't any backwards compatibility issues return CHECKSUMMED_TRANSLOG_STREAM; } // Lucene's CodecUtil writes a magic number of 0x3FD76C17 with the // header, in binary this looks like: // // binary: 0011 1111 1101 0111 0110 1100 0001 0111 // hex : 3 f d 7 6 c 1 7 // // With version 0 of the translog, the first byte is the // Operation.Type, which will always be between 0-4, so we know if // we grab the first byte, it can be: // 0x3f => Lucene's magic number, so we can assume it's version 1 or later // 0x00 => version 0 of the translog // // otherwise the first byte of the translog is corrupted and we // should bail byte b1 = headerStream.readByte(); if (b1 == LUCENE_CODEC_HEADER_BYTE) { // Read 3 more bytes, meaning a whole integer has been read byte b2 = headerStream.readByte(); byte b3 = headerStream.readByte(); byte b4 = headerStream.readByte(); // Convert the 4 bytes that were read into an integer int header = ((b1 & 0xFF) << 24) + ((b2 & 0xFF) << 16) + ((b3 & 0xFF) << 8) + ((b4 & 0xFF) << 0); // We confirm CodecUtil's CODEC_MAGIC number (0x3FD76C17) // ourselves here, because it allows us to read the first // byte separately if (header != CodecUtil.CODEC_MAGIC) { throw new TranslogCorruptedException( "translog looks like version 1 or later, but has corrupted header"); } // Confirm the rest of the header using CodecUtil, extracting // the translog version int version = CodecUtil.checkHeaderNoMagic(new InputStreamDataInput(headerStream), TRANSLOG_CODEC, 1, Integer.MAX_VALUE); switch (version) { case ChecksummedTranslogStream.VERSION: return CHECKSUMMED_TRANSLOG_STREAM; default: throw new TranslogCorruptedException("No known translog stream version: " + version); } } else if (b1 == UNVERSIONED_TRANSLOG_HEADER_BYTE) { return LEGACY_TRANSLOG_STREAM; } else { throw new TranslogCorruptedException("Invalid first byte in translog file, got: " + Long.toHexString(b1) + ", expected 0x00 or 0x3f"); } } catch (CorruptIndexException e) { throw new TranslogCorruptedException("Translog header corrupted", e); } }