Example usage for org.apache.lucene.util.packed PackedInts VERSION_CURRENT

List of usage examples for org.apache.lucene.util.packed PackedInts VERSION_CURRENT

Introduction

In this page you can find the example usage for org.apache.lucene.util.packed PackedInts VERSION_CURRENT.

Prototype

int VERSION_CURRENT

To view the source code for org.apache.lucene.util.packed PackedInts VERSION_CURRENT.

Click Source Link

Usage

From source file:com.lucure.core.codec.CompressingStoredFieldsIndexWriter.java

License:Apache License

CompressingStoredFieldsIndexWriter(IndexOutput indexOutput) throws IOException {
    this.fieldsIndexOut = indexOutput;
    reset();/*from w ww.j a  v  a2 s . c o m*/
    totalDocs = 0;
    docBaseDeltas = new int[BLOCK_SIZE];
    startPointerDeltas = new long[BLOCK_SIZE];
    fieldsIndexOut.writeVInt(PackedInts.VERSION_CURRENT);
}

From source file:com.lucure.core.codec.CompressingStoredFieldsWriter.java

License:Apache License

/** Sole constructor. */
public CompressingStoredFieldsWriter(Directory directory, SegmentInfo si, String segmentSuffix,
        IOContext context, String formatName, CompressionMode compressionMode, int chunkSize)
        throws IOException {
    assert directory != null;
    this.directory = directory;
    this.segment = si.name;
    this.segmentSuffix = segmentSuffix;
    this.compressionMode = compressionMode;
    this.compressor = compressionMode.newCompressor();
    this.chunkSize = chunkSize;
    this.docBase = 0;
    this.bufferedDocs = new GrowableByteArrayDataOutput(chunkSize);
    this.numStoredFields = new int[16];
    this.endOffsets = new int[16];
    this.numBufferedDocs = 0;

    boolean success = false;
    IndexOutput indexStream = directory.createOutput(
            IndexFileNames.segmentFileName(segment, segmentSuffix, FIELDS_INDEX_EXTENSION), context);
    try {//from  ww  w . ja  v a 2s . co  m
        fieldsStream = directory.createOutput(
                IndexFileNames.segmentFileName(segment, segmentSuffix, FIELDS_EXTENSION), context);

        final String codecNameIdx = formatName + CODEC_SFX_IDX;
        final String codecNameDat = formatName + CODEC_SFX_DAT;
        CodecUtil.writeHeader(indexStream, codecNameIdx, VERSION_CURRENT);
        CodecUtil.writeHeader(fieldsStream, codecNameDat, VERSION_CURRENT);
        assert CodecUtil.headerLength(codecNameDat) == fieldsStream.getFilePointer();
        assert CodecUtil.headerLength(codecNameIdx) == indexStream.getFilePointer();

        indexWriter = new CompressingStoredFieldsIndexWriter(indexStream);
        indexStream = null;

        fieldsStream.writeVInt(chunkSize);
        fieldsStream.writeVInt(PackedInts.VERSION_CURRENT);

        success = true;
    } finally {
        if (!success) {
            IOUtils.closeWhileHandlingException(indexStream);
            abort();
        }
    }
}

From source file:com.lucure.core.codec.ForUtil.java

License:Apache License

/**
 * Create a new {@link ForUtil} instance and save state into <code>out</code>.
 *///  w  w w  . j  a  v a  2  s  .  c om
ForUtil(float acceptableOverheadRatio, DataOutput out) throws IOException {
    out.writeVInt(PackedInts.VERSION_CURRENT);
    encodedSizes = new int[33];
    encoders = new PackedInts.Encoder[33];
    decoders = new PackedInts.Decoder[33];
    iterations = new int[33];

    for (int bpv = 1; bpv <= 32; ++bpv) {
        final FormatAndBits formatAndBits = PackedInts.fastestFormatAndBits(BLOCK_SIZE, bpv,
                acceptableOverheadRatio);
        assert formatAndBits.format.isSupported(formatAndBits.bitsPerValue);
        assert formatAndBits.bitsPerValue <= 32;
        encodedSizes[bpv] = encodedSize(formatAndBits.format, PackedInts.VERSION_CURRENT,
                formatAndBits.bitsPerValue);
        encoders[bpv] = PackedInts.getEncoder(formatAndBits.format, PackedInts.VERSION_CURRENT,
                formatAndBits.bitsPerValue);
        decoders[bpv] = PackedInts.getDecoder(formatAndBits.format, PackedInts.VERSION_CURRENT,
                formatAndBits.bitsPerValue);
        iterations[bpv] = computeIterations(decoders[bpv]);

        out.writeVInt(formatAndBits.format.getId() << 5 | (formatAndBits.bitsPerValue - 1));
    }
}

From source file:org.apache.blur.lucene.codec.DiskDocValuesConsumer.java

License:Apache License

@Override
public void addNumericField(FieldInfo field, Iterable<Number> values) throws IOException {
    long count = 0;
    for (@SuppressWarnings("unused")
    Number nv : values) {//  w w w . j a  va2 s.c o  m
        ++count;
    }

    meta.writeVInt(field.number);
    meta.writeByte(DiskDocValuesFormat.NUMERIC);
    meta.writeVInt(PackedInts.VERSION_CURRENT);
    meta.writeLong(data.getFilePointer());
    meta.writeVLong(count);
    meta.writeVInt(BLOCK_SIZE);

    final BlockPackedWriter writer = new BlockPackedWriter(data, BLOCK_SIZE);
    for (Number nv : values) {
        writer.add(nv.longValue());
    }
    writer.finish();
}

From source file:org.apache.blur.lucene.codec.DiskDocValuesConsumer.java

License:Apache License

@Override
public void addBinaryField(FieldInfo field, final Iterable<BytesRef> values) throws IOException {
    // write the byte[] data
    meta.writeVInt(field.number);// ww  w.jav  a  2 s  .c o m
    meta.writeByte(DiskDocValuesFormat.BINARY);
    int minLength = Integer.MAX_VALUE;
    int maxLength = Integer.MIN_VALUE;
    final long startFP = data.getFilePointer();
    long count = 0;
    for (BytesRef v : values) {
        minLength = Math.min(minLength, v.length);
        maxLength = Math.max(maxLength, v.length);
        data.writeBytes(v.bytes, v.offset, v.length);
        count++;
    }
    meta.writeVInt(minLength);
    meta.writeVInt(maxLength);
    meta.writeVLong(count);
    meta.writeLong(startFP);

    // if minLength == maxLength, its a fixed-length byte[], we are done (the addresses are implicit)
    // otherwise, we need to record the length fields...
    if (minLength != maxLength) {
        meta.writeLong(data.getFilePointer());
        meta.writeVInt(PackedInts.VERSION_CURRENT);
        meta.writeVInt(BLOCK_SIZE);

        final MonotonicBlockPackedWriter writer = new MonotonicBlockPackedWriter(data, BLOCK_SIZE);
        long addr = 0;
        for (BytesRef v : values) {
            addr += v.length;
            writer.add(addr);
        }
        writer.finish();
    }
}

From source file:org.apache.blur.lucene.codec.DiskDocValuesConsumer.java

License:Apache License

@Override
public void addSortedSetField(FieldInfo field, Iterable<BytesRef> values, Iterable<Number> docToOrdCount,
        Iterable<Number> ords) throws IOException {
    meta.writeVInt(field.number);/* w  ww  .j  a v  a  2 s  .c om*/
    meta.writeByte(DiskDocValuesFormat.SORTED_SET);
    // write the ord -> byte[] as a binary field
    addBinaryField(field, values);
    // write the stream of ords as a numeric field
    // NOTE: we could return an iterator that delta-encodes these within a doc
    addNumericField(field, ords);

    // write the doc -> ord count as a absolute index to the stream
    meta.writeVInt(field.number);
    meta.writeByte(DiskDocValuesFormat.NUMERIC);
    meta.writeVInt(PackedInts.VERSION_CURRENT);
    meta.writeLong(data.getFilePointer());
    meta.writeVLong(maxDoc);
    meta.writeVInt(BLOCK_SIZE);

    final MonotonicBlockPackedWriter writer = new MonotonicBlockPackedWriter(data, BLOCK_SIZE);
    long addr = 0;
    for (Number v : docToOrdCount) {
        addr += v.longValue();
        writer.add(addr);
    }
    writer.finish();
}

From source file:org.elasticsearch.index.fielddata.plain.PackedArrayIndexFieldData.java

License:Apache License

@Override
public AtomicNumericFieldData loadDirect(AtomicReaderContext context) throws Exception {
    AtomicReader reader = context.reader();
    Terms terms = reader.terms(getFieldNames().indexName());
    PackedArrayAtomicFieldData data = null;
    PackedArrayEstimator estimator = new PackedArrayEstimator(breakerService.getBreaker(), getNumericType());
    if (terms == null) {
        data = PackedArrayAtomicFieldData.empty(reader.maxDoc());
        estimator.adjustForNoTerms(data.getMemorySizeInBytes());
        return data;
    }/*  w  w w.  j  av  a 2 s .co  m*/
    // TODO: how can we guess the number of terms? numerics end up creating more terms per value...
    // Lucene encodes numeric data so that the lexicographical (encoded) order matches the integer order so we know the sequence of
    // longs is going to be monotonically increasing
    final MonotonicAppendingLongBuffer values = new MonotonicAppendingLongBuffer();

    final float acceptableTransientOverheadRatio = fieldDataType.getSettings().getAsFloat(
            "acceptable_transient_overhead_ratio", OrdinalsBuilder.DEFAULT_ACCEPTABLE_OVERHEAD_RATIO);
    OrdinalsBuilder builder = new OrdinalsBuilder(-1, reader.maxDoc(), acceptableTransientOverheadRatio);
    TermsEnum termsEnum = estimator.beforeLoad(terms);
    boolean success = false;
    try {
        BytesRefIterator iter = builder.buildFromTerms(termsEnum);
        BytesRef term;
        assert !getNumericType().isFloatingPoint();
        final boolean indexedAsLong = getNumericType().requiredBits() > 32;
        while ((term = iter.next()) != null) {
            final long value = indexedAsLong ? NumericUtils.prefixCodedToLong(term)
                    : NumericUtils.prefixCodedToInt(term);
            assert values.size() == 0 || value > values.get(values.size() - 1);
            values.add(value);
        }
        Ordinals build = builder.build(fieldDataType.getSettings());

        if (!build.isMultiValued() && CommonSettings.removeOrdsOnSingleValue(fieldDataType)) {
            Docs ordinals = build.ordinals();
            final FixedBitSet set = builder.buildDocsWithValuesSet();

            long minValue, maxValue;
            minValue = maxValue = 0;
            if (values.size() > 0) {
                minValue = values.get(0);
                maxValue = values.get(values.size() - 1);
            }

            // Encode document without a value with a special value
            long missingValue = 0;
            if (set != null) {
                if ((maxValue - minValue + 1) == values.size()) {
                    // values are dense
                    if (minValue > Long.MIN_VALUE) {
                        missingValue = --minValue;
                    } else {
                        assert maxValue != Long.MAX_VALUE;
                        missingValue = ++maxValue;
                    }
                } else {
                    for (long i = 1; i < values.size(); ++i) {
                        if (values.get(i) > values.get(i - 1) + 1) {
                            missingValue = values.get(i - 1) + 1;
                            break;
                        }
                    }
                }
                missingValue -= minValue; // delta
            }

            final long delta = maxValue - minValue;
            final int bitsRequired = delta < 0 ? 64 : PackedInts.bitsRequired(delta);
            final float acceptableOverheadRatio = fieldDataType.getSettings()
                    .getAsFloat("acceptable_overhead_ratio", PackedInts.DEFAULT);
            final PackedInts.FormatAndBits formatAndBits = PackedInts.fastestFormatAndBits(reader.maxDoc(),
                    bitsRequired, acceptableOverheadRatio);

            // there's sweet spot where due to low unique value count, using ordinals will consume less memory
            final long singleValuesSize = formatAndBits.format.longCount(PackedInts.VERSION_CURRENT,
                    reader.maxDoc(), formatAndBits.bitsPerValue) * 8L;
            final long uniqueValuesSize = values.ramBytesUsed();
            final long ordinalsSize = build.getMemorySizeInBytes();

            if (uniqueValuesSize + ordinalsSize < singleValuesSize) {
                data = new PackedArrayAtomicFieldData.WithOrdinals(values, reader.maxDoc(), build);
            } else {
                final PackedInts.Mutable sValues = PackedInts.getMutable(reader.maxDoc(), bitsRequired,
                        acceptableOverheadRatio);
                if (missingValue != 0) {
                    sValues.fill(0, sValues.size(), missingValue);
                }
                for (int i = 0; i < reader.maxDoc(); i++) {
                    final long ord = ordinals.getOrd(i);
                    if (ord != Ordinals.MISSING_ORDINAL) {
                        sValues.set(i, values.get(ord - 1) - minValue);
                    }
                }
                if (set == null) {
                    data = new PackedArrayAtomicFieldData.Single(sValues, minValue, reader.maxDoc(),
                            ordinals.getNumOrds());
                } else {
                    data = new PackedArrayAtomicFieldData.SingleSparse(sValues, minValue, reader.maxDoc(),
                            missingValue, ordinals.getNumOrds());
                }
            }
        } else {
            data = new PackedArrayAtomicFieldData.WithOrdinals(values, reader.maxDoc(), build);
        }

        success = true;
        return data;
    } finally {
        if (!success) {
            // If something went wrong, unwind any current estimations we've made
            estimator.afterLoad(termsEnum, 0);
        } else {
            // Adjust as usual, based on the actual size of the field data
            estimator.afterLoad(termsEnum, data.getMemorySizeInBytes());
        }
        builder.close();
    }

}

From source file:org.yipeng.test.util.packed.TestPackedInts.java

License:Apache License

@Test
public void testByteCount() {
    for (int i = 0; i < 5; ++i) {
        final int valueCount = random.nextInt(Integer.MAX_VALUE);
        for (PackedInts.Format format : PackedInts.Format.values()) {
            for (int bpv = 1; bpv <= 64; ++bpv) {
                final long byteCount = format.byteCount(PackedInts.VERSION_CURRENT, valueCount, bpv);
                String msg = "format=" + format + ", byteCount=" + byteCount + ", valueCount=" + valueCount
                        + ", bpv=" + bpv;
                assertTrue(msg, byteCount * 8 >= (long) valueCount * bpv);
                if (format == PackedInts.Format.PACKED) {
                    assertTrue(msg, (byteCount - 1) * 8 < (long) valueCount * bpv);
                }//  ww w .  j a  v a  2 s.c om
            }
        }
    }
}