List of usage examples for org.apache.lucene.util.packed PackedInts VERSION_CURRENT
int VERSION_CURRENT
To view the source code for org.apache.lucene.util.packed PackedInts VERSION_CURRENT.
Click Source Link
From source file:com.lucure.core.codec.CompressingStoredFieldsIndexWriter.java
License:Apache License
CompressingStoredFieldsIndexWriter(IndexOutput indexOutput) throws IOException { this.fieldsIndexOut = indexOutput; reset();/*from w ww.j a v a2 s . c o m*/ totalDocs = 0; docBaseDeltas = new int[BLOCK_SIZE]; startPointerDeltas = new long[BLOCK_SIZE]; fieldsIndexOut.writeVInt(PackedInts.VERSION_CURRENT); }
From source file:com.lucure.core.codec.CompressingStoredFieldsWriter.java
License:Apache License
/** Sole constructor. */ public CompressingStoredFieldsWriter(Directory directory, SegmentInfo si, String segmentSuffix, IOContext context, String formatName, CompressionMode compressionMode, int chunkSize) throws IOException { assert directory != null; this.directory = directory; this.segment = si.name; this.segmentSuffix = segmentSuffix; this.compressionMode = compressionMode; this.compressor = compressionMode.newCompressor(); this.chunkSize = chunkSize; this.docBase = 0; this.bufferedDocs = new GrowableByteArrayDataOutput(chunkSize); this.numStoredFields = new int[16]; this.endOffsets = new int[16]; this.numBufferedDocs = 0; boolean success = false; IndexOutput indexStream = directory.createOutput( IndexFileNames.segmentFileName(segment, segmentSuffix, FIELDS_INDEX_EXTENSION), context); try {//from ww w . ja v a 2s . co m fieldsStream = directory.createOutput( IndexFileNames.segmentFileName(segment, segmentSuffix, FIELDS_EXTENSION), context); final String codecNameIdx = formatName + CODEC_SFX_IDX; final String codecNameDat = formatName + CODEC_SFX_DAT; CodecUtil.writeHeader(indexStream, codecNameIdx, VERSION_CURRENT); CodecUtil.writeHeader(fieldsStream, codecNameDat, VERSION_CURRENT); assert CodecUtil.headerLength(codecNameDat) == fieldsStream.getFilePointer(); assert CodecUtil.headerLength(codecNameIdx) == indexStream.getFilePointer(); indexWriter = new CompressingStoredFieldsIndexWriter(indexStream); indexStream = null; fieldsStream.writeVInt(chunkSize); fieldsStream.writeVInt(PackedInts.VERSION_CURRENT); success = true; } finally { if (!success) { IOUtils.closeWhileHandlingException(indexStream); abort(); } } }
From source file:com.lucure.core.codec.ForUtil.java
License:Apache License
/** * Create a new {@link ForUtil} instance and save state into <code>out</code>. */// w w w . j a v a 2 s . c om ForUtil(float acceptableOverheadRatio, DataOutput out) throws IOException { out.writeVInt(PackedInts.VERSION_CURRENT); encodedSizes = new int[33]; encoders = new PackedInts.Encoder[33]; decoders = new PackedInts.Decoder[33]; iterations = new int[33]; for (int bpv = 1; bpv <= 32; ++bpv) { final FormatAndBits formatAndBits = PackedInts.fastestFormatAndBits(BLOCK_SIZE, bpv, acceptableOverheadRatio); assert formatAndBits.format.isSupported(formatAndBits.bitsPerValue); assert formatAndBits.bitsPerValue <= 32; encodedSizes[bpv] = encodedSize(formatAndBits.format, PackedInts.VERSION_CURRENT, formatAndBits.bitsPerValue); encoders[bpv] = PackedInts.getEncoder(formatAndBits.format, PackedInts.VERSION_CURRENT, formatAndBits.bitsPerValue); decoders[bpv] = PackedInts.getDecoder(formatAndBits.format, PackedInts.VERSION_CURRENT, formatAndBits.bitsPerValue); iterations[bpv] = computeIterations(decoders[bpv]); out.writeVInt(formatAndBits.format.getId() << 5 | (formatAndBits.bitsPerValue - 1)); } }
From source file:org.apache.blur.lucene.codec.DiskDocValuesConsumer.java
License:Apache License
@Override public void addNumericField(FieldInfo field, Iterable<Number> values) throws IOException { long count = 0; for (@SuppressWarnings("unused") Number nv : values) {// w w w . j a va2 s.c o m ++count; } meta.writeVInt(field.number); meta.writeByte(DiskDocValuesFormat.NUMERIC); meta.writeVInt(PackedInts.VERSION_CURRENT); meta.writeLong(data.getFilePointer()); meta.writeVLong(count); meta.writeVInt(BLOCK_SIZE); final BlockPackedWriter writer = new BlockPackedWriter(data, BLOCK_SIZE); for (Number nv : values) { writer.add(nv.longValue()); } writer.finish(); }
From source file:org.apache.blur.lucene.codec.DiskDocValuesConsumer.java
License:Apache License
@Override public void addBinaryField(FieldInfo field, final Iterable<BytesRef> values) throws IOException { // write the byte[] data meta.writeVInt(field.number);// ww w.jav a 2 s .c o m meta.writeByte(DiskDocValuesFormat.BINARY); int minLength = Integer.MAX_VALUE; int maxLength = Integer.MIN_VALUE; final long startFP = data.getFilePointer(); long count = 0; for (BytesRef v : values) { minLength = Math.min(minLength, v.length); maxLength = Math.max(maxLength, v.length); data.writeBytes(v.bytes, v.offset, v.length); count++; } meta.writeVInt(minLength); meta.writeVInt(maxLength); meta.writeVLong(count); meta.writeLong(startFP); // if minLength == maxLength, its a fixed-length byte[], we are done (the addresses are implicit) // otherwise, we need to record the length fields... if (minLength != maxLength) { meta.writeLong(data.getFilePointer()); meta.writeVInt(PackedInts.VERSION_CURRENT); meta.writeVInt(BLOCK_SIZE); final MonotonicBlockPackedWriter writer = new MonotonicBlockPackedWriter(data, BLOCK_SIZE); long addr = 0; for (BytesRef v : values) { addr += v.length; writer.add(addr); } writer.finish(); } }
From source file:org.apache.blur.lucene.codec.DiskDocValuesConsumer.java
License:Apache License
@Override public void addSortedSetField(FieldInfo field, Iterable<BytesRef> values, Iterable<Number> docToOrdCount, Iterable<Number> ords) throws IOException { meta.writeVInt(field.number);/* w ww .j a v a 2 s .c om*/ meta.writeByte(DiskDocValuesFormat.SORTED_SET); // write the ord -> byte[] as a binary field addBinaryField(field, values); // write the stream of ords as a numeric field // NOTE: we could return an iterator that delta-encodes these within a doc addNumericField(field, ords); // write the doc -> ord count as a absolute index to the stream meta.writeVInt(field.number); meta.writeByte(DiskDocValuesFormat.NUMERIC); meta.writeVInt(PackedInts.VERSION_CURRENT); meta.writeLong(data.getFilePointer()); meta.writeVLong(maxDoc); meta.writeVInt(BLOCK_SIZE); final MonotonicBlockPackedWriter writer = new MonotonicBlockPackedWriter(data, BLOCK_SIZE); long addr = 0; for (Number v : docToOrdCount) { addr += v.longValue(); writer.add(addr); } writer.finish(); }
From source file:org.elasticsearch.index.fielddata.plain.PackedArrayIndexFieldData.java
License:Apache License
@Override public AtomicNumericFieldData loadDirect(AtomicReaderContext context) throws Exception { AtomicReader reader = context.reader(); Terms terms = reader.terms(getFieldNames().indexName()); PackedArrayAtomicFieldData data = null; PackedArrayEstimator estimator = new PackedArrayEstimator(breakerService.getBreaker(), getNumericType()); if (terms == null) { data = PackedArrayAtomicFieldData.empty(reader.maxDoc()); estimator.adjustForNoTerms(data.getMemorySizeInBytes()); return data; }/* w w w. j av a 2 s .co m*/ // TODO: how can we guess the number of terms? numerics end up creating more terms per value... // Lucene encodes numeric data so that the lexicographical (encoded) order matches the integer order so we know the sequence of // longs is going to be monotonically increasing final MonotonicAppendingLongBuffer values = new MonotonicAppendingLongBuffer(); final float acceptableTransientOverheadRatio = fieldDataType.getSettings().getAsFloat( "acceptable_transient_overhead_ratio", OrdinalsBuilder.DEFAULT_ACCEPTABLE_OVERHEAD_RATIO); OrdinalsBuilder builder = new OrdinalsBuilder(-1, reader.maxDoc(), acceptableTransientOverheadRatio); TermsEnum termsEnum = estimator.beforeLoad(terms); boolean success = false; try { BytesRefIterator iter = builder.buildFromTerms(termsEnum); BytesRef term; assert !getNumericType().isFloatingPoint(); final boolean indexedAsLong = getNumericType().requiredBits() > 32; while ((term = iter.next()) != null) { final long value = indexedAsLong ? NumericUtils.prefixCodedToLong(term) : NumericUtils.prefixCodedToInt(term); assert values.size() == 0 || value > values.get(values.size() - 1); values.add(value); } Ordinals build = builder.build(fieldDataType.getSettings()); if (!build.isMultiValued() && CommonSettings.removeOrdsOnSingleValue(fieldDataType)) { Docs ordinals = build.ordinals(); final FixedBitSet set = builder.buildDocsWithValuesSet(); long minValue, maxValue; minValue = maxValue = 0; if (values.size() > 0) { minValue = values.get(0); maxValue = values.get(values.size() - 1); } // Encode document without a value with a special value long missingValue = 0; if (set != null) { if ((maxValue - minValue + 1) == values.size()) { // values are dense if (minValue > Long.MIN_VALUE) { missingValue = --minValue; } else { assert maxValue != Long.MAX_VALUE; missingValue = ++maxValue; } } else { for (long i = 1; i < values.size(); ++i) { if (values.get(i) > values.get(i - 1) + 1) { missingValue = values.get(i - 1) + 1; break; } } } missingValue -= minValue; // delta } final long delta = maxValue - minValue; final int bitsRequired = delta < 0 ? 64 : PackedInts.bitsRequired(delta); final float acceptableOverheadRatio = fieldDataType.getSettings() .getAsFloat("acceptable_overhead_ratio", PackedInts.DEFAULT); final PackedInts.FormatAndBits formatAndBits = PackedInts.fastestFormatAndBits(reader.maxDoc(), bitsRequired, acceptableOverheadRatio); // there's sweet spot where due to low unique value count, using ordinals will consume less memory final long singleValuesSize = formatAndBits.format.longCount(PackedInts.VERSION_CURRENT, reader.maxDoc(), formatAndBits.bitsPerValue) * 8L; final long uniqueValuesSize = values.ramBytesUsed(); final long ordinalsSize = build.getMemorySizeInBytes(); if (uniqueValuesSize + ordinalsSize < singleValuesSize) { data = new PackedArrayAtomicFieldData.WithOrdinals(values, reader.maxDoc(), build); } else { final PackedInts.Mutable sValues = PackedInts.getMutable(reader.maxDoc(), bitsRequired, acceptableOverheadRatio); if (missingValue != 0) { sValues.fill(0, sValues.size(), missingValue); } for (int i = 0; i < reader.maxDoc(); i++) { final long ord = ordinals.getOrd(i); if (ord != Ordinals.MISSING_ORDINAL) { sValues.set(i, values.get(ord - 1) - minValue); } } if (set == null) { data = new PackedArrayAtomicFieldData.Single(sValues, minValue, reader.maxDoc(), ordinals.getNumOrds()); } else { data = new PackedArrayAtomicFieldData.SingleSparse(sValues, minValue, reader.maxDoc(), missingValue, ordinals.getNumOrds()); } } } else { data = new PackedArrayAtomicFieldData.WithOrdinals(values, reader.maxDoc(), build); } success = true; return data; } finally { if (!success) { // If something went wrong, unwind any current estimations we've made estimator.afterLoad(termsEnum, 0); } else { // Adjust as usual, based on the actual size of the field data estimator.afterLoad(termsEnum, data.getMemorySizeInBytes()); } builder.close(); } }
From source file:org.yipeng.test.util.packed.TestPackedInts.java
License:Apache License
@Test public void testByteCount() { for (int i = 0; i < 5; ++i) { final int valueCount = random.nextInt(Integer.MAX_VALUE); for (PackedInts.Format format : PackedInts.Format.values()) { for (int bpv = 1; bpv <= 64; ++bpv) { final long byteCount = format.byteCount(PackedInts.VERSION_CURRENT, valueCount, bpv); String msg = "format=" + format + ", byteCount=" + byteCount + ", valueCount=" + valueCount + ", bpv=" + bpv; assertTrue(msg, byteCount * 8 >= (long) valueCount * bpv); if (format == PackedInts.Format.PACKED) { assertTrue(msg, (byteCount - 1) * 8 < (long) valueCount * bpv); }// ww w . j a v a 2 s.c om } } } }