List of usage examples for org.apache.lucene.util.packed PackedInts DEFAULT
float DEFAULT
To view the source code for org.apache.lucene.util.packed PackedInts DEFAULT.
Click Source Link
From source file:org.codelibs.elasticsearch.search.aggregations.bucket.BestBucketsDeferringCollector.java
License:Apache License
@Override public LeafBucketCollector getLeafCollector(LeafReaderContext ctx) throws IOException { finishLeaf();//from www. j av a2 s.com context = ctx; docDeltas = PackedLongValues.packedBuilder(PackedInts.DEFAULT); buckets = PackedLongValues.packedBuilder(PackedInts.DEFAULT); return new LeafBucketCollector() { int lastDoc = 0; @Override public void collect(int doc, long bucket) throws IOException { docDeltas.add(doc - lastDoc); buckets.add(bucket); lastDoc = doc; maxBucket = Math.max(maxBucket, bucket); } }; }
From source file:org.elasticsearch.index.codec.postingsformat.MemoryPostingsFormatProvider.java
License:Apache License
@Inject public MemoryPostingsFormatProvider(@Assisted String name, @Assisted Settings postingsFormatSettings) { super(name);/*from ww w . java2s .co m*/ this.packFst = postingsFormatSettings.getAsBoolean("pack_fst", false); this.acceptableOverheadRatio = postingsFormatSettings.getAsFloat("acceptable_overhead_ratio", PackedInts.DEFAULT); // TODO this should really be an ENUM? this.postingsFormat = new MemoryPostingsFormat(packFst, acceptableOverheadRatio); }
From source file:org.elasticsearch.index.fielddata.ordinals.GlobalOrdinalsBuilder.java
License:Apache License
/** * Build global ordinals for the provided {@link IndexReader}. *//* ww w . j a v a 2 s . c o m*/ public static IndexOrdinalsFieldData build(final IndexReader indexReader, IndexOrdinalsFieldData indexFieldData, Settings settings, CircuitBreakerService breakerService, ESLogger logger) throws IOException { assert indexReader.leaves().size() > 1; long startTimeNS = System.nanoTime(); final AtomicOrdinalsFieldData[] atomicFD = new AtomicOrdinalsFieldData[indexReader.leaves().size()]; final RandomAccessOrds[] subs = new RandomAccessOrds[indexReader.leaves().size()]; for (int i = 0; i < indexReader.leaves().size(); ++i) { atomicFD[i] = indexFieldData.load(indexReader.leaves().get(i)); subs[i] = atomicFD[i].getOrdinalsValues(); } final OrdinalMap ordinalMap = OrdinalMap.build(null, subs, PackedInts.DEFAULT); final long memorySizeInBytes = ordinalMap.ramBytesUsed(); breakerService.getBreaker(CircuitBreaker.FIELDDATA).addWithoutBreaking(memorySizeInBytes); if (logger.isDebugEnabled()) { logger.debug("Global-ordinals[{}][{}] took {} ms", indexFieldData.getFieldNames().fullName(), ordinalMap.getValueCount(), TimeValue.nsecToMSec(System.nanoTime() - startTimeNS)); } return new InternalGlobalOrdinalsIndexFieldData(indexFieldData.index(), settings, indexFieldData.getFieldNames(), indexFieldData.getFieldDataType(), atomicFD, ordinalMap, memorySizeInBytes); }
From source file:org.elasticsearch.index.fielddata.ordinals.GlobalOrdinalsBuilder.java
License:Apache License
public static IndexOrdinalsFieldData buildEmpty(Index index, Settings settings, final IndexReader indexReader, IndexOrdinalsFieldData indexFieldData) throws IOException { assert indexReader.leaves().size() > 1; final AtomicOrdinalsFieldData[] atomicFD = new AtomicOrdinalsFieldData[indexReader.leaves().size()]; final RandomAccessOrds[] subs = new RandomAccessOrds[indexReader.leaves().size()]; for (int i = 0; i < indexReader.leaves().size(); ++i) { atomicFD[i] = new AbstractAtomicOrdinalsFieldData() { @Override/*from w w w . ja v a2 s . c o m*/ public RandomAccessOrds getOrdinalsValues() { return DocValues.emptySortedSet(); } @Override public long ramBytesUsed() { return 0; } @Override public Collection<Accountable> getChildResources() { return Collections.emptyList(); } @Override public void close() { } }; subs[i] = atomicFD[i].getOrdinalsValues(); } final OrdinalMap ordinalMap = OrdinalMap.build(null, subs, PackedInts.DEFAULT); return new InternalGlobalOrdinalsIndexFieldData(index, settings, indexFieldData.getFieldNames(), indexFieldData.getFieldDataType(), atomicFD, ordinalMap, 0); }
From source file:org.elasticsearch.index.fielddata.plain.PackedArrayIndexFieldData.java
License:Apache License
@Override public AtomicNumericFieldData loadDirect(AtomicReaderContext context) throws Exception { AtomicReader reader = context.reader(); Terms terms = reader.terms(getFieldNames().indexName()); PackedArrayAtomicFieldData data = null; PackedArrayEstimator estimator = new PackedArrayEstimator(breakerService.getBreaker(), getNumericType()); if (terms == null) { data = PackedArrayAtomicFieldData.empty(reader.maxDoc()); estimator.adjustForNoTerms(data.getMemorySizeInBytes()); return data; }/*from w w w . ja v a2s . c o m*/ // TODO: how can we guess the number of terms? numerics end up creating more terms per value... // Lucene encodes numeric data so that the lexicographical (encoded) order matches the integer order so we know the sequence of // longs is going to be monotonically increasing final MonotonicAppendingLongBuffer values = new MonotonicAppendingLongBuffer(); final float acceptableTransientOverheadRatio = fieldDataType.getSettings().getAsFloat( "acceptable_transient_overhead_ratio", OrdinalsBuilder.DEFAULT_ACCEPTABLE_OVERHEAD_RATIO); OrdinalsBuilder builder = new OrdinalsBuilder(-1, reader.maxDoc(), acceptableTransientOverheadRatio); TermsEnum termsEnum = estimator.beforeLoad(terms); boolean success = false; try { BytesRefIterator iter = builder.buildFromTerms(termsEnum); BytesRef term; assert !getNumericType().isFloatingPoint(); final boolean indexedAsLong = getNumericType().requiredBits() > 32; while ((term = iter.next()) != null) { final long value = indexedAsLong ? NumericUtils.prefixCodedToLong(term) : NumericUtils.prefixCodedToInt(term); assert values.size() == 0 || value > values.get(values.size() - 1); values.add(value); } Ordinals build = builder.build(fieldDataType.getSettings()); if (!build.isMultiValued() && CommonSettings.removeOrdsOnSingleValue(fieldDataType)) { Docs ordinals = build.ordinals(); final FixedBitSet set = builder.buildDocsWithValuesSet(); long minValue, maxValue; minValue = maxValue = 0; if (values.size() > 0) { minValue = values.get(0); maxValue = values.get(values.size() - 1); } // Encode document without a value with a special value long missingValue = 0; if (set != null) { if ((maxValue - minValue + 1) == values.size()) { // values are dense if (minValue > Long.MIN_VALUE) { missingValue = --minValue; } else { assert maxValue != Long.MAX_VALUE; missingValue = ++maxValue; } } else { for (long i = 1; i < values.size(); ++i) { if (values.get(i) > values.get(i - 1) + 1) { missingValue = values.get(i - 1) + 1; break; } } } missingValue -= minValue; // delta } final long delta = maxValue - minValue; final int bitsRequired = delta < 0 ? 64 : PackedInts.bitsRequired(delta); final float acceptableOverheadRatio = fieldDataType.getSettings() .getAsFloat("acceptable_overhead_ratio", PackedInts.DEFAULT); final PackedInts.FormatAndBits formatAndBits = PackedInts.fastestFormatAndBits(reader.maxDoc(), bitsRequired, acceptableOverheadRatio); // there's sweet spot where due to low unique value count, using ordinals will consume less memory final long singleValuesSize = formatAndBits.format.longCount(PackedInts.VERSION_CURRENT, reader.maxDoc(), formatAndBits.bitsPerValue) * 8L; final long uniqueValuesSize = values.ramBytesUsed(); final long ordinalsSize = build.getMemorySizeInBytes(); if (uniqueValuesSize + ordinalsSize < singleValuesSize) { data = new PackedArrayAtomicFieldData.WithOrdinals(values, reader.maxDoc(), build); } else { final PackedInts.Mutable sValues = PackedInts.getMutable(reader.maxDoc(), bitsRequired, acceptableOverheadRatio); if (missingValue != 0) { sValues.fill(0, sValues.size(), missingValue); } for (int i = 0; i < reader.maxDoc(); i++) { final long ord = ordinals.getOrd(i); if (ord != Ordinals.MISSING_ORDINAL) { sValues.set(i, values.get(ord - 1) - minValue); } } if (set == null) { data = new PackedArrayAtomicFieldData.Single(sValues, minValue, reader.maxDoc(), ordinals.getNumOrds()); } else { data = new PackedArrayAtomicFieldData.SingleSparse(sValues, minValue, reader.maxDoc(), missingValue, ordinals.getNumOrds()); } } } else { data = new PackedArrayAtomicFieldData.WithOrdinals(values, reader.maxDoc(), build); } success = true; return data; } finally { if (!success) { // If something went wrong, unwind any current estimations we've made estimator.afterLoad(termsEnum, 0); } else { // Adjust as usual, based on the actual size of the field data estimator.afterLoad(termsEnum, data.getMemorySizeInBytes()); } builder.close(); } }
From source file:org.elasticsearch.index.merge.policy.ElasticsearchMergePolicy.java
License:Apache License
/** Return an "upgraded" view of the reader. */ static AtomicReader filter(AtomicReader reader) throws IOException { final FieldInfos fieldInfos = reader.getFieldInfos(); final FieldInfo versionInfo = fieldInfos.fieldInfo(VersionFieldMapper.NAME); if (versionInfo != null && versionInfo.hasDocValues()) { // the reader is a recent one, it has versions and they are stored // in a numeric doc values field return reader; }// ww w .ja v a2 s.c om // The segment is an old one, load all versions in memory and hide // them behind a numeric doc values field final Terms terms = reader.terms(UidFieldMapper.NAME); if (terms == null || !terms.hasPayloads()) { // The segment doesn't have an _uid field or doesn't have paylods // don't try to do anything clever. If any other segment has versions // all versions of this segment will be initialized to 0 return reader; } final TermsEnum uids = terms.iterator(null); final GrowableWriter versions = new GrowableWriter(2, reader.maxDoc(), PackedInts.DEFAULT); DocsAndPositionsEnum dpe = null; for (BytesRef uid = uids.next(); uid != null; uid = uids.next()) { dpe = uids.docsAndPositions(reader.getLiveDocs(), dpe, DocsAndPositionsEnum.FLAG_PAYLOADS); assert dpe != null : "field has payloads"; for (int doc = dpe.nextDoc(); doc != DocsEnum.NO_MORE_DOCS; doc = dpe.nextDoc()) { dpe.nextPosition(); final BytesRef payload = dpe.getPayload(); if (payload != null && payload.length == 8) { final long version = Numbers.bytesToLong(payload); versions.set(doc, version); break; } } } // Build new field infos, doc values, and return a filter reader final FieldInfo newVersionInfo; if (versionInfo == null) { // Find a free field number int fieldNumber = 0; for (FieldInfo fi : fieldInfos) { fieldNumber = Math.max(fieldNumber, fi.number + 1); } newVersionInfo = new FieldInfo(VersionFieldMapper.NAME, false, fieldNumber, false, true, false, IndexOptions.DOCS_ONLY, DocValuesType.NUMERIC, DocValuesType.NUMERIC, -1, Collections.<String, String>emptyMap()); } else { newVersionInfo = new FieldInfo(VersionFieldMapper.NAME, versionInfo.isIndexed(), versionInfo.number, versionInfo.hasVectors(), versionInfo.omitsNorms(), versionInfo.hasPayloads(), versionInfo.getIndexOptions(), versionInfo.getDocValuesType(), versionInfo.getNormType(), versionInfo.getDocValuesGen(), versionInfo.attributes()); } final ArrayList<FieldInfo> fieldInfoList = new ArrayList<>(); for (FieldInfo info : fieldInfos) { if (info != versionInfo) { fieldInfoList.add(info); } } fieldInfoList.add(newVersionInfo); final FieldInfos newFieldInfos = new FieldInfos(fieldInfoList.toArray(new FieldInfo[fieldInfoList.size()])); final NumericDocValues versionValues = new NumericDocValues() { @Override public long get(int index) { return versions.get(index); } }; return new FilterAtomicReader(reader) { @Override public FieldInfos getFieldInfos() { return newFieldInfos; } @Override public NumericDocValues getNumericDocValues(String field) throws IOException { if (VersionFieldMapper.NAME.equals(field)) { return versionValues; } return super.getNumericDocValues(field); } @Override public Bits getDocsWithField(String field) throws IOException { return new Bits.MatchAllBits(in.maxDoc()); } }; }
From source file:org.elasticsearch.index.merge.policy.IndexUpgraderMergePolicy.java
License:Apache License
/** Return an "upgraded" view of the reader. */ static AtomicReader filter(AtomicReader reader) throws IOException { final FieldInfos fieldInfos = reader.getFieldInfos(); final FieldInfo versionInfo = fieldInfos.fieldInfo(VersionFieldMapper.NAME); if (versionInfo != null && versionInfo.hasDocValues()) { // the reader is a recent one, it has versions and they are stored // in a numeric doc values field return reader; }//from w w w .j a v a2 s . c o m // The segment is an old one, load all versions in memory and hide // them behind a numeric doc values field final Terms terms = reader.terms(UidFieldMapper.NAME); if (terms == null || !terms.hasPayloads()) { // The segment doesn't have an _uid field or doesn't have paylods // don't try to do anything clever. If any other segment has versions // all versions of this segment will be initialized to 0 return reader; } final TermsEnum uids = terms.iterator(null); final GrowableWriter versions = new GrowableWriter(2, reader.maxDoc(), PackedInts.DEFAULT); DocsAndPositionsEnum dpe = null; for (BytesRef uid = uids.next(); uid != null; uid = uids.next()) { dpe = uids.docsAndPositions(reader.getLiveDocs(), dpe, DocsAndPositionsEnum.FLAG_PAYLOADS); assert dpe != null : "field has payloads"; for (int doc = dpe.nextDoc(); doc != DocsEnum.NO_MORE_DOCS; doc = dpe.nextDoc()) { dpe.nextPosition(); final BytesRef payload = dpe.getPayload(); if (payload != null && payload.length == 8) { final long version = Numbers.bytesToLong(payload); versions.set(doc, version); break; } } } // Build new field infos, doc values, and return a filter reader final FieldInfo newVersionInfo; if (versionInfo == null) { // Find a free field number int fieldNumber = 0; for (FieldInfo fi : fieldInfos) { fieldNumber = Math.max(fieldNumber, fi.number + 1); } newVersionInfo = new FieldInfo(VersionFieldMapper.NAME, false, fieldNumber, false, true, false, IndexOptions.DOCS_ONLY, DocValuesType.NUMERIC, DocValuesType.NUMERIC, Collections.<String, String>emptyMap()); } else { newVersionInfo = new FieldInfo(VersionFieldMapper.NAME, versionInfo.isIndexed(), versionInfo.number, versionInfo.hasVectors(), versionInfo.omitsNorms(), versionInfo.hasPayloads(), versionInfo.getIndexOptions(), versionInfo.getDocValuesType(), versionInfo.getNormType(), versionInfo.attributes()); } final ArrayList<FieldInfo> fieldInfoList = new ArrayList<FieldInfo>(); for (FieldInfo info : fieldInfos) { if (info != versionInfo) { fieldInfoList.add(info); } } fieldInfoList.add(newVersionInfo); final FieldInfos newFieldInfos = new FieldInfos(fieldInfoList.toArray(new FieldInfo[fieldInfoList.size()])); final NumericDocValues versionValues = new NumericDocValues() { @Override public long get(int index) { return versions.get(index); } }; return new FilterAtomicReader(reader) { @Override public FieldInfos getFieldInfos() { return newFieldInfos; } @Override public NumericDocValues getNumericDocValues(String field) throws IOException { if (VersionFieldMapper.NAME.equals(field)) { return versionValues; } return super.getNumericDocValues(field); } @Override public Bits getDocsWithField(String field) throws IOException { return new Bits.MatchAllBits(in.maxDoc()); } }; }
From source file:org.elasticsearch.search.aggregations.bucket.MergingBucketsDeferringCollector.java
License:Apache License
@Override public LeafBucketCollector getLeafCollector(LeafReaderContext ctx) throws IOException { finishLeaf();//from w w w.ja v a2s . com context = ctx; docDeltas = PackedLongValues.packedBuilder(PackedInts.DEFAULT); buckets = PackedLongValues.packedBuilder(PackedInts.DEFAULT); return new LeafBucketCollector() { int lastDoc = 0; @Override public void collect(int doc, long bucket) { docDeltas.add(doc - lastDoc); buckets.add(bucket); lastDoc = doc; maxBucket = Math.max(maxBucket, bucket); } }; }
From source file:org.elasticsearch.search.aggregations.bucket.MergingBucketsDeferringCollector.java
License:Apache License
public void mergeBuckets(long[] mergeMap) { List<Entry> newEntries = new ArrayList<>(entries.size()); for (Entry sourceEntry : entries) { PackedLongValues.Builder newBuckets = PackedLongValues.packedBuilder(PackedInts.DEFAULT); for (PackedLongValues.Iterator itr = sourceEntry.buckets.iterator(); itr.hasNext();) { long bucket = itr.next(); newBuckets.add(mergeMap[Math.toIntExact(bucket)]); }//from w ww . j av a2 s . c om newEntries.add(new Entry(sourceEntry.context, sourceEntry.docDeltas, newBuckets.build())); } entries = newEntries; // if there are buckets that have been collected in the current segment // we need to update the bucket ordinals there too if (buckets.size() > 0) { PackedLongValues currentBuckets = buckets.build(); PackedLongValues.Builder newBuckets = PackedLongValues.packedBuilder(PackedInts.DEFAULT); for (PackedLongValues.Iterator itr = currentBuckets.iterator(); itr.hasNext();) { long bucket = itr.next(); newBuckets.add(mergeMap[Math.toIntExact(bucket)]); } buckets = newBuckets; } }