Example usage for org.apache.lucene.store InputStreamDataInput InputStreamDataInput

List of usage examples for org.apache.lucene.store InputStreamDataInput InputStreamDataInput

Introduction

In this page you can find the example usage for org.apache.lucene.store InputStreamDataInput InputStreamDataInput.

Prototype

public InputStreamDataInput(InputStream is) 

Source Link

Usage

From source file:com.github.cstoku.neologd.unidic.lucene.analysis.ja.dict.BinaryDictionary.java

License:Apache License

protected BinaryDictionary() throws IOException {
    InputStream mapIS = null, dictIS = null, posIS = null;
    int[] targetMapOffsets = null, targetMap = null;
    String[] posDict = null;//from  w w w.j a v  a  2 s.  com
    String[] inflFormDict = null;
    String[] inflTypeDict = null;
    ByteBuffer buffer = null;
    boolean success = false;
    try {
        mapIS = getResource(TARGETMAP_FILENAME_SUFFIX);
        mapIS = new BufferedInputStream(mapIS);
        DataInput in = new InputStreamDataInput(mapIS);
        CodecUtil.checkHeader(in, TARGETMAP_HEADER, VERSION, VERSION);
        targetMap = new int[in.readVInt()];
        targetMapOffsets = new int[in.readVInt()];
        int accum = 0, sourceId = 0;
        for (int ofs = 0; ofs < targetMap.length; ofs++) {
            final int val = in.readVInt();
            if ((val & 0x01) != 0) {
                targetMapOffsets[sourceId] = ofs;
                sourceId++;
            }
            accum += val >>> 1;
            targetMap[ofs] = accum;
        }
        if (sourceId + 1 != targetMapOffsets.length)
            throw new IOException("targetMap file format broken");
        targetMapOffsets[sourceId] = targetMap.length;
        mapIS.close();
        mapIS = null;

        posIS = getResource(POSDICT_FILENAME_SUFFIX);
        posIS = new BufferedInputStream(posIS);
        in = new InputStreamDataInput(posIS);
        CodecUtil.checkHeader(in, POSDICT_HEADER, VERSION, VERSION);
        int posSize = in.readVInt();
        posDict = new String[posSize];
        inflTypeDict = new String[posSize];
        inflFormDict = new String[posSize];
        for (int j = 0; j < posSize; j++) {
            posDict[j] = in.readString();
            inflTypeDict[j] = in.readString();
            inflFormDict[j] = in.readString();
            // this is how we encode null inflections
            if (inflTypeDict[j].length() == 0) {
                inflTypeDict[j] = null;
            }
            if (inflFormDict[j].length() == 0) {
                inflFormDict[j] = null;
            }
        }
        posIS.close();
        posIS = null;

        dictIS = getResource(DICT_FILENAME_SUFFIX);
        // no buffering here, as we load in one large buffer
        in = new InputStreamDataInput(dictIS);
        CodecUtil.checkHeader(in, DICT_HEADER, VERSION, VERSION);
        final int size = in.readVInt();
        final ByteBuffer tmpBuffer = ByteBuffer.allocateDirect(size);
        final ReadableByteChannel channel = Channels.newChannel(dictIS);
        final int read = channel.read(tmpBuffer);
        if (read != size) {
            throw new EOFException("Cannot read whole dictionary");
        }
        dictIS.close();
        dictIS = null;
        buffer = tmpBuffer.asReadOnlyBuffer();
        success = true;
    } finally {
        if (success) {
            IOUtils.close(mapIS, posIS, dictIS);
        } else {
            IOUtils.closeWhileHandlingException(mapIS, posIS, dictIS);
        }
    }

    this.targetMap = targetMap;
    this.targetMapOffsets = targetMapOffsets;
    this.posDict = posDict;
    this.inflTypeDict = inflTypeDict;
    this.inflFormDict = inflFormDict;
    this.buffer = buffer;
}

From source file:com.github.cstoku.neologd.unidic.lucene.analysis.ja.dict.CharacterDefinition.java

License:Apache License

private CharacterDefinition() throws IOException {
    InputStream is = null;/*from  w  w  w . java2 s .  co  m*/
    boolean success = false;
    try {
        is = BinaryDictionary.getClassResource(getClass(), FILENAME_SUFFIX);
        is = new BufferedInputStream(is);
        final DataInput in = new InputStreamDataInput(is);
        CodecUtil.checkHeader(in, HEADER, VERSION, VERSION);
        in.readBytes(characterCategoryMap, 0, characterCategoryMap.length);
        for (int i = 0; i < CLASS_COUNT; i++) {
            final byte b = in.readByte();
            invokeMap[i] = (b & 0x01) != 0;
            groupMap[i] = (b & 0x02) != 0;
        }
        success = true;
    } finally {
        if (success) {
            IOUtils.close(is);
        } else {
            IOUtils.closeWhileHandlingException(is);
        }
    }
}

From source file:com.github.cstoku.neologd.unidic.lucene.analysis.ja.dict.ConnectionCosts.java

License:Apache License

private ConnectionCosts() throws IOException {
    InputStream is = null;//from   w  ww. jav a  2s  .  c om
    short[][] costs = null;
    boolean success = false;
    try {
        is = BinaryDictionary.getClassResource(getClass(), FILENAME_SUFFIX);
        is = new BufferedInputStream(is);
        final DataInput in = new InputStreamDataInput(is);
        CodecUtil.checkHeader(in, HEADER, VERSION, VERSION);
        int forwardSize = in.readVInt();
        int backwardSize = in.readVInt();
        costs = new short[backwardSize][forwardSize];
        int accum = 0;
        for (int j = 0; j < costs.length; j++) {
            final short[] a = costs[j];
            for (int i = 0; i < a.length; i++) {
                accum += in.readZInt();
                a[i] = (short) accum;
            }
        }
        success = true;
    } finally {
        if (success) {
            IOUtils.close(is);
        } else {
            IOUtils.closeWhileHandlingException(is);
        }
    }

    this.costs = costs;
}

From source file:com.github.cstoku.neologd.unidic.lucene.analysis.ja.dict.TokenInfoDictionary.java

License:Apache License

private TokenInfoDictionary() throws IOException {
    super();//from w ww .j  a  va 2 s  .c o  m
    InputStream is = null;
    FST<Long> fst = null;
    boolean success = false;
    try {
        is = getResource(FST_FILENAME_SUFFIX);
        is = new BufferedInputStream(is);
        fst = new FST<>(new InputStreamDataInput(is), PositiveIntOutputs.getSingleton());
        success = true;
    } finally {
        if (success) {
            IOUtils.close(is);
        } else {
            IOUtils.closeWhileHandlingException(is);
        }
    }
    // TODO: some way to configure?
    this.fst = new TokenInfoFST(fst, true);
}

From source file:org.apache.jackrabbit.oak.plugins.index.lucene.OakDirectoryTest.java

License:Apache License

private static void writeFile(Directory directory, String fileName, long size) throws Exception {
    IndexOutput o = directory.createOutput(fileName, IOContext.DEFAULT);
    o.copyBytes(new InputStreamDataInput(new NullInputStream(size)), size);
    o.close();/*from www . j  a v  a2 s .com*/
}

From source file:org.elasticsearch.index.translog.Checkpoint.java

License:Apache License

public static Checkpoint read(Path path) throws IOException {
    try (InputStream in = Files.newInputStream(path)) {
        return new Checkpoint(new InputStreamDataInput(in));
    }//from w ww .  j  a v  a2  s. c o m
}

From source file:org.elasticsearch.index.translog.ChecksummedTranslogStream.java

License:Apache License

@Override
public StreamInput openInput(File translogFile) throws IOException {
    final FileInputStream fileInputStream = new FileInputStream(translogFile);
    boolean success = false;
    try {//from  w  w  w . j  av  a2s.  c  o  m
        final InputStreamStreamInput in = new InputStreamStreamInput(fileInputStream);
        CodecUtil.checkHeader(new InputStreamDataInput(in), TranslogStreams.TRANSLOG_CODEC, VERSION, VERSION);
        success = true;
        return in;
    } catch (EOFException e) {
        throw new TruncatedTranslogException("translog header truncated", e);
    } catch (IOException e) {
        throw new TranslogCorruptedException("translog header corrupted", e);
    } finally {
        if (success == false) {
            IOUtils.closeWhileHandlingException(fileInputStream);
        }
    }
}

From source file:org.elasticsearch.index.translog.TranslogHeader.java

License:Apache License

/**
 * Read a translog header from the given path and file channel
 *//*w  w  w.j a v a 2  s  . co m*/
static TranslogHeader read(final String translogUUID, final Path path, final FileChannel channel)
        throws IOException {
    // This input is intentionally not closed because closing it will close the FileChannel.
    final BufferedChecksumStreamInput in = new BufferedChecksumStreamInput(
            new InputStreamStreamInput(java.nio.channels.Channels.newInputStream(channel), channel.size()),
            path.toString());
    final int version;
    try {
        version = CodecUtil.checkHeader(new InputStreamDataInput(in), TRANSLOG_CODEC, VERSION_CHECKSUMS,
                VERSION_PRIMARY_TERM);
    } catch (CorruptIndexException | IndexFormatTooOldException | IndexFormatTooNewException e) {
        tryReportOldVersionError(path, channel);
        throw new TranslogCorruptedException(path.toString(), "translog header corrupted", e);
    }
    if (version == VERSION_CHECKSUMS) {
        throw new IllegalStateException("pre-2.0 translog found [" + path + "]");
    }
    // Read the translogUUID
    final int uuidLen = in.readInt();
    if (uuidLen > channel.size()) {
        throw new TranslogCorruptedException(path.toString(), "UUID length can't be larger than the translog");
    }
    final BytesRef uuid = new BytesRef(uuidLen);
    uuid.length = uuidLen;
    in.read(uuid.bytes, uuid.offset, uuid.length);
    final BytesRef expectedUUID = new BytesRef(translogUUID);
    if (uuid.bytesEquals(expectedUUID) == false) {
        throw new TranslogCorruptedException(path.toString(), "expected shard UUID " + expectedUUID
                + " but got: " + uuid + " this translog file belongs to a different translog");
    }
    // Read the primary term
    final long primaryTerm;
    if (version == VERSION_PRIMARY_TERM) {
        primaryTerm = in.readLong();
        assert primaryTerm >= 0 : "Primary term must be non-negative [" + primaryTerm + "]; translog path ["
                + path + "]";
    } else {
        assert version == VERSION_CHECKPOINTS : "Unknown header version [" + version + "]";
        primaryTerm = UNKNOWN_PRIMARY_TERM;
    }
    // Verify the checksum
    if (version >= VERSION_PRIMARY_TERM) {
        Translog.verifyChecksum(in);
    }
    final int headerSizeInBytes = headerSizeInBytes(version, uuid.length);
    assert channel.position() == headerSizeInBytes : "Header is not fully read; header size ["
            + headerSizeInBytes + "], position [" + channel.position() + "]";
    return new TranslogHeader(translogUUID, primaryTerm, headerSizeInBytes);
}

From source file:org.elasticsearch.index.translog.TranslogReader.java

License:Apache License

/**
 * Given a file, return a VersionedTranslogStream based on an
 * optionally-existing header in the file. If the file does not exist, or
 * has zero length, returns the latest version. If the header does not
 * exist, assumes Version 0 of the translog file format.
 * <p/>//from w  ww.  j  a  v a 2  s .  c om
 *
 * @throws IOException
 */
public static ImmutableTranslogReader open(ChannelReference channelReference, Checkpoint checkpoint,
        String translogUUID) throws IOException {
    final FileChannel channel = channelReference.getChannel();
    final Path path = channelReference.getPath();
    assert channelReference.getGeneration() == checkpoint.generation : "expected generation: "
            + channelReference.getGeneration() + " but got: " + checkpoint.generation;

    try {
        if (checkpoint.offset == 0 && checkpoint.numOps == TranslogReader.UNKNOWN_OP_COUNT) { // only old files can be empty
            return new LegacyTranslogReader(channelReference.getGeneration(), channelReference, 0);
        }

        InputStreamStreamInput headerStream = new InputStreamStreamInput(Channels.newInputStream(channel)); // don't close
        // Lucene's CodecUtil writes a magic number of 0x3FD76C17 with the
        // header, in binary this looks like:
        //
        // binary: 0011 1111 1101 0111 0110 1100 0001 0111
        // hex   :    3    f    d    7    6    c    1    7
        //
        // With version 0 of the translog, the first byte is the
        // Operation.Type, which will always be between 0-4, so we know if
        // we grab the first byte, it can be:
        // 0x3f => Lucene's magic number, so we can assume it's version 1 or later
        // 0x00 => version 0 of the translog
        //
        // otherwise the first byte of the translog is corrupted and we
        // should bail
        byte b1 = headerStream.readByte();
        if (b1 == LUCENE_CODEC_HEADER_BYTE) {
            // Read 3 more bytes, meaning a whole integer has been read
            byte b2 = headerStream.readByte();
            byte b3 = headerStream.readByte();
            byte b4 = headerStream.readByte();
            // Convert the 4 bytes that were read into an integer
            int header = ((b1 & 0xFF) << 24) + ((b2 & 0xFF) << 16) + ((b3 & 0xFF) << 8) + ((b4 & 0xFF) << 0);
            // We confirm CodecUtil's CODEC_MAGIC number (0x3FD76C17)
            // ourselves here, because it allows us to read the first
            // byte separately
            if (header != CodecUtil.CODEC_MAGIC) {
                throw new TranslogCorruptedException(
                        "translog looks like version 1 or later, but has corrupted header");
            }
            // Confirm the rest of the header using CodecUtil, extracting
            // the translog version
            int version = CodecUtil.checkHeaderNoMagic(new InputStreamDataInput(headerStream),
                    TranslogWriter.TRANSLOG_CODEC, 1, Integer.MAX_VALUE);
            switch (version) {
            case TranslogWriter.VERSION_CHECKSUMS:
                assert checkpoint.numOps == TranslogReader.UNKNOWN_OP_COUNT : "expected unknown op count but got: "
                        + checkpoint.numOps;
                assert checkpoint.offset == Files.size(path) : "offset(" + checkpoint.offset + ") != file_size("
                        + Files.size(path) + ") for: " + path;
                // legacy - we still have to support it somehow
                return new LegacyTranslogReaderBase(channelReference.getGeneration(), channelReference,
                        CodecUtil.headerLength(TranslogWriter.TRANSLOG_CODEC), checkpoint.offset);
            case TranslogWriter.VERSION_CHECKPOINTS:
                assert path.getFileName().toString()
                        .endsWith(Translog.TRANSLOG_FILE_SUFFIX) : "new file ends with old suffix: " + path;
                assert checkpoint.numOps > TranslogReader.UNKNOWN_OP_COUNT : "expected at least 0 operatin but got: "
                        + checkpoint.numOps;
                assert checkpoint.offset <= channel.size() : "checkpoint is inconsistent with channel length: "
                        + channel.size() + " " + checkpoint;
                int len = headerStream.readInt();
                if (len > channel.size()) {
                    throw new TranslogCorruptedException("uuid length can't be larger than the translog");
                }
                BytesRef ref = new BytesRef(len);
                ref.length = len;
                headerStream.read(ref.bytes, ref.offset, ref.length);
                BytesRef uuidBytes = new BytesRef(translogUUID);
                if (uuidBytes.bytesEquals(ref) == false) {
                    throw new TranslogCorruptedException("expected shard UUID [" + uuidBytes + "] but got: ["
                            + ref + "] this translog file belongs to a different translog");
                }
                return new ImmutableTranslogReader(channelReference.getGeneration(), channelReference,
                        ref.length + CodecUtil.headerLength(TranslogWriter.TRANSLOG_CODEC)
                                + RamUsageEstimator.NUM_BYTES_INT,
                        checkpoint.offset, checkpoint.numOps);
            default:
                throw new TranslogCorruptedException(
                        "No known translog stream version: " + version + " path:" + path);
            }
        } else if (b1 == UNVERSIONED_TRANSLOG_HEADER_BYTE) {
            assert checkpoint.numOps == TranslogReader.UNKNOWN_OP_COUNT : "expected unknown op count but got: "
                    + checkpoint.numOps;
            assert checkpoint.offset == Files.size(path) : "offset(" + checkpoint.offset + ") != file_size("
                    + Files.size(path) + ") for: " + path;
            return new LegacyTranslogReader(channelReference.getGeneration(), channelReference,
                    checkpoint.offset);
        } else {
            throw new TranslogCorruptedException("Invalid first byte in translog file, got: "
                    + Long.toHexString(b1) + ", expected 0x00 or 0x3f");
        }
    } catch (CorruptIndexException | IndexFormatTooOldException | IndexFormatTooNewException e) {
        throw new TranslogCorruptedException("Translog header corrupted", e);
    }
}

From source file:org.elasticsearch.index.translog.TranslogStreams.java

License:Apache License

/**
 * Given a file, return a VersionedTranslogStream based on an
 * optionally-existing header in the file. If the file does not exist, or
 * has zero length, returns the latest version. If the header does not
 * exist, assumes Version 0 of the translog file format.
 * <p/>//w  ww.j  a  va  2s  . c  om
 * The caller is responsible for closing the TranslogStream.
 *
 * @throws IOException
 */
public static TranslogStream translogStreamFor(File translogFile) throws IOException {

    try (InputStreamStreamInput headerStream = new InputStreamStreamInput(new FileInputStream(translogFile));) {
        if (translogFile.exists() == false || translogFile.length() == 0) {
            // if it doesn't exist or has no data, use the latest version,
            // there aren't any backwards compatibility issues
            return CHECKSUMMED_TRANSLOG_STREAM;
        }
        // Lucene's CodecUtil writes a magic number of 0x3FD76C17 with the
        // header, in binary this looks like:
        //
        // binary: 0011 1111 1101 0111 0110 1100 0001 0111
        // hex   :    3    f    d    7    6    c    1    7
        //
        // With version 0 of the translog, the first byte is the
        // Operation.Type, which will always be between 0-4, so we know if
        // we grab the first byte, it can be:
        // 0x3f => Lucene's magic number, so we can assume it's version 1 or later
        // 0x00 => version 0 of the translog
        //
        // otherwise the first byte of the translog is corrupted and we
        // should bail
        byte b1 = headerStream.readByte();
        if (b1 == LUCENE_CODEC_HEADER_BYTE) {
            // Read 3 more bytes, meaning a whole integer has been read
            byte b2 = headerStream.readByte();
            byte b3 = headerStream.readByte();
            byte b4 = headerStream.readByte();
            // Convert the 4 bytes that were read into an integer
            int header = ((b1 & 0xFF) << 24) + ((b2 & 0xFF) << 16) + ((b3 & 0xFF) << 8) + ((b4 & 0xFF) << 0);
            // We confirm CodecUtil's CODEC_MAGIC number (0x3FD76C17)
            // ourselves here, because it allows us to read the first
            // byte separately
            if (header != CodecUtil.CODEC_MAGIC) {
                throw new TranslogCorruptedException(
                        "translog looks like version 1 or later, but has corrupted header");
            }
            // Confirm the rest of the header using CodecUtil, extracting
            // the translog version
            int version = CodecUtil.checkHeaderNoMagic(new InputStreamDataInput(headerStream), TRANSLOG_CODEC,
                    1, Integer.MAX_VALUE);
            switch (version) {
            case ChecksummedTranslogStream.VERSION:
                return CHECKSUMMED_TRANSLOG_STREAM;
            default:
                throw new TranslogCorruptedException("No known translog stream version: " + version);
            }
        } else if (b1 == UNVERSIONED_TRANSLOG_HEADER_BYTE) {
            return LEGACY_TRANSLOG_STREAM;
        } else {
            throw new TranslogCorruptedException("Invalid first byte in translog file, got: "
                    + Long.toHexString(b1) + ", expected 0x00 or 0x3f");
        }
    } catch (CorruptIndexException e) {
        throw new TranslogCorruptedException("Translog header corrupted", e);
    }
}