List of usage examples for org.apache.lucene.util BytesRef deepCopyOf
public static BytesRef deepCopyOf(BytesRef other)
other
The returned BytesRef will have a length of other.length and an offset of zero.
From source file:org.elasticsearch.index.fielddata.BytesValues.java
License:Apache License
/** * Converts the current shared {@link BytesRef} to a stable instance. Note, * this calls makes the bytes safe for *reads*, not writes (into the same BytesRef). For example, * it makes it safe to be placed in a map. *//*from w ww . j a v a 2 s . c o m*/ public BytesRef copyShared() { return BytesRef.deepCopyOf(scratch); }
From source file:org.elasticsearch.index.fielddata.DuelFieldDataTests.java
License:Apache License
private static void duelFieldDataBytes(Random random, AtomicReaderContext context, IndexFieldData<?> left, IndexFieldData<?> right, Preprocessor pre) throws Exception { AtomicFieldData<?> leftData = random.nextBoolean() ? left.load(context) : left.loadDirect(context); AtomicFieldData<?> rightData = random.nextBoolean() ? right.load(context) : right.loadDirect(context); assertThat(leftData.getNumDocs(), equalTo(rightData.getNumDocs())); int numDocs = leftData.getNumDocs(); BytesValues leftBytesValues = leftData.getBytesValues(random.nextBoolean()); BytesValues rightBytesValues = rightData.getBytesValues(random.nextBoolean()); BytesRef leftSpare = new BytesRef(); BytesRef rightSpare = new BytesRef(); for (int i = 0; i < numDocs; i++) { int numValues = 0; assertThat((numValues = leftBytesValues.setDocument(i)), equalTo(rightBytesValues.setDocument(i))); BytesRef previous = null;/*from ww w .java 2s . c o m*/ for (int j = 0; j < numValues; j++) { rightSpare.copyBytes(rightBytesValues.nextValue()); leftSpare.copyBytes(leftBytesValues.nextValue()); assertThat(rightSpare.hashCode(), equalTo(rightBytesValues.currentValueHash())); assertThat(leftSpare.hashCode(), equalTo(leftBytesValues.currentValueHash())); if (previous != null && leftBytesValues.getOrder() == rightBytesValues.getOrder()) { // we can only compare the assertThat(pre.compare(previous, rightSpare), lessThan(0)); } previous = BytesRef.deepCopyOf(rightSpare); pre.toString(rightSpare); pre.toString(leftSpare); assertThat(pre.toString(leftSpare), equalTo(pre.toString(rightSpare))); if (leftSpare.equals(rightSpare)) { assertThat(leftBytesValues.currentValueHash(), equalTo(rightBytesValues.currentValueHash())); } } } }
From source file:org.elasticsearch.index.fielddata.fieldcomparator.StringScriptDataComparator.java
License:Apache License
@Override public void copy(int slot, int doc) { setSpare(doc);//from w w w . j a v a 2 s . c om if (values[slot] == null) { values[slot] = BytesRef.deepCopyOf(spare); } else { values[slot].copyBytes(spare); } }
From source file:org.elasticsearch.index.fielddata.plain.ConcreteBytesRefIndexFieldData.java
License:Apache License
@Override public ConcreteBytesRefAtomicFieldData loadDirect(AtomicReaderContext context) throws Exception { AtomicReader reader = context.reader(); Terms terms = reader.terms(getFieldNames().indexName()); if (terms == null) { return ConcreteBytesRefAtomicFieldData.empty(reader.maxDoc()); }/* w ww . j a v a 2s . com*/ long size = terms.size(); if (size == -1) { size = 1024; } final ArrayList<BytesRef> values = new ArrayList<BytesRef>((int) size); values.add(null); // first "t" indicates null value OrdinalsBuilder builder = new OrdinalsBuilder(terms, reader.maxDoc()); try { BytesRefIterator iter = builder.buildFromTerms(terms.iterator(null), reader.getLiveDocs()); BytesRef term; while ((term = iter.next()) != null) { values.add(BytesRef.deepCopyOf(term)); } return new ConcreteBytesRefAtomicFieldData(values.toArray(new BytesRef[values.size()]), builder.build(fieldDataType.getSettings())); } finally { builder.close(); } }
From source file:org.elasticsearch.search.aggregations.bucket.composite.BinaryValuesSource.java
License:Apache License
@Override public void copyCurrent(int slot) { values[slot] = BytesRef.deepCopyOf(currentValue); }
From source file:org.elasticsearch.search.aggregations.bucket.composite.GlobalOrdinalValuesSource.java
License:Apache License
@Override BytesRef toComparable(int slot) throws IOException { long globalOrd = values.get(slot); if (globalOrd == lastLookupOrd) { return lastLookupValue; } else {//from w ww .jav a 2 s . c o m lastLookupOrd = globalOrd; lastLookupValue = BytesRef.deepCopyOf(lookup.lookupOrd(values.get(slot))); return lastLookupValue; } }
From source file:org.elasticsearch.search.aggregations.bucket.significant.GlobalOrdinalsSignificantTermsAggregator.java
License:Apache License
@Override public SignificantStringTerms buildAggregation(long owningBucketOrdinal) throws IOException { assert owningBucketOrdinal == 0; if (globalOrds == null) { // no context in this reader return buildEmptyAggregation(); }//from ww w .jav a 2 s. c o m final int size; if (bucketCountThresholds.getMinDocCount() == 0) { // if minDocCount == 0 then we can end up with more buckets then maxBucketOrd() returns size = (int) Math.min(globalOrds.getValueCount(), bucketCountThresholds.getShardSize()); } else { size = (int) Math.min(maxBucketOrd(), bucketCountThresholds.getShardSize()); } long supersetSize = termsAggFactory.prepareBackground(context); long subsetSize = numCollectedDocs; BucketSignificancePriorityQueue ordered = new BucketSignificancePriorityQueue(size); SignificantStringTerms.Bucket spare = null; for (long globalTermOrd = 0; globalTermOrd < globalOrds.getValueCount(); ++globalTermOrd) { if (includeExclude != null && !acceptedGlobalOrdinals.get(globalTermOrd)) { continue; } final long bucketOrd = getBucketOrd(globalTermOrd); final int bucketDocCount = bucketOrd < 0 ? 0 : bucketDocCount(bucketOrd); if (bucketCountThresholds.getMinDocCount() > 0 && bucketDocCount == 0) { continue; } if (bucketDocCount < bucketCountThresholds.getShardMinDocCount()) { continue; } if (spare == null) { spare = new SignificantStringTerms.Bucket(new BytesRef(), 0, 0, 0, 0, null); } spare.bucketOrd = bucketOrd; copy(globalOrds.lookupOrd(globalTermOrd), spare.termBytes); spare.subsetDf = bucketDocCount; spare.subsetSize = subsetSize; spare.supersetDf = termsAggFactory.getBackgroundFrequency(spare.termBytes); spare.supersetSize = supersetSize; // During shard-local down-selection we use subset/superset stats // that are for this shard only // Back at the central reducer these properties will be updated with // global stats spare.updateScore(termsAggFactory.getSignificanceHeuristic()); spare = (SignificantStringTerms.Bucket) ordered.insertWithOverflow(spare); } final InternalSignificantTerms.Bucket[] list = new InternalSignificantTerms.Bucket[ordered.size()]; for (int i = ordered.size() - 1; i >= 0; i--) { final SignificantStringTerms.Bucket bucket = (SignificantStringTerms.Bucket) ordered.pop(); // the terms are owned by the BytesRefHash, we need to pull a copy since the BytesRef hash data may be recycled at some point bucket.termBytes = BytesRef.deepCopyOf(bucket.termBytes); bucket.aggregations = bucketAggregations(bucket.bucketOrd); list[i] = bucket; } return new SignificantStringTerms(subsetSize, supersetSize, name, bucketCountThresholds.getRequiredSize(), bucketCountThresholds.getMinDocCount(), termsAggFactory.getSignificanceHeuristic(), Arrays.asList(list), pipelineAggregators(), metaData()); }
From source file:org.elasticsearch.search.aggregations.bucket.significant.SignificantStringTermsAggregator.java
License:Apache License
@Override public SignificantStringTerms buildAggregation(long owningBucketOrdinal) throws IOException { assert owningBucketOrdinal == 0; final int size = (int) Math.min(bucketOrds.size(), bucketCountThresholds.getShardSize()); long supersetSize = termsAggFactory.prepareBackground(context); long subsetSize = numCollectedDocs; BucketSignificancePriorityQueue ordered = new BucketSignificancePriorityQueue(size); SignificantStringTerms.Bucket spare = null; for (int i = 0; i < bucketOrds.size(); i++) { final int docCount = bucketDocCount(i); if (docCount < bucketCountThresholds.getShardMinDocCount()) { continue; }// w ww . j a v a2s . c om if (spare == null) { spare = new SignificantStringTerms.Bucket(new BytesRef(), 0, 0, 0, 0, null); } bucketOrds.get(i, spare.termBytes); spare.subsetDf = docCount; spare.subsetSize = subsetSize; spare.supersetDf = termsAggFactory.getBackgroundFrequency(spare.termBytes); spare.supersetSize = supersetSize; // During shard-local down-selection we use subset/superset stats // that are for this shard only // Back at the central reducer these properties will be updated with // global stats spare.updateScore(termsAggFactory.getSignificanceHeuristic()); spare.bucketOrd = i; spare = (SignificantStringTerms.Bucket) ordered.insertWithOverflow(spare); } final InternalSignificantTerms.Bucket[] list = new InternalSignificantTerms.Bucket[ordered.size()]; for (int i = ordered.size() - 1; i >= 0; i--) { final SignificantStringTerms.Bucket bucket = (SignificantStringTerms.Bucket) ordered.pop(); // the terms are owned by the BytesRefHash, we need to pull a copy since the BytesRef hash data may be recycled at some point bucket.termBytes = BytesRef.deepCopyOf(bucket.termBytes); bucket.aggregations = bucketAggregations(bucket.bucketOrd); list[i] = bucket; } return new SignificantStringTerms(subsetSize, supersetSize, name, bucketCountThresholds.getRequiredSize(), bucketCountThresholds.getMinDocCount(), termsAggFactory.getSignificanceHeuristic(), Arrays.asList(list), pipelineAggregators(), metaData()); }
From source file:org.elasticsearch.search.aggregations.bucket.terms.StringTermsAggregator.java
License:Apache License
@Override public StringTerms buildAggregation(long owningBucketOrdinal) { assert owningBucketOrdinal == 0; if (minDocCount == 0 && (order != InternalOrder.COUNT_DESC || bucketOrds.size() < requiredSize)) { // we need to fill-in the blanks List<BytesValues.WithOrdinals> valuesWithOrdinals = Lists.newArrayList(); for (AtomicReaderContext ctx : context.searchContext().searcher().getTopReaderContext().leaves()) { context.setNextReader(ctx);/*from ww w. jav a 2 s .c o m*/ final BytesValues values = valuesSource.bytesValues(); if (values instanceof BytesValues.WithOrdinals) { valuesWithOrdinals.add((BytesValues.WithOrdinals) values); } else { // brute force for (int docId = 0; docId < ctx.reader().maxDoc(); ++docId) { final int valueCount = values.setDocument(docId); for (int i = 0; i < valueCount; ++i) { final BytesRef term = values.nextValue(); if (includeExclude == null || includeExclude.accept(term)) { bucketOrds.add(term, values.currentValueHash()); } } } } } // With ordinals we can be smarter and add just as many terms as necessary to the hash table // For instance, if sorting by term asc, we only need to get the first `requiredSize` terms as other terms would // either be excluded by the priority queue or at reduce time. if (valuesWithOrdinals.size() > 0) { final boolean reverse = order == InternalOrder.TERM_DESC; Comparator<BytesRef> comparator = BytesRef.getUTF8SortedAsUnicodeComparator(); if (reverse) { comparator = Collections.reverseOrder(comparator); } Iterator<? extends BytesRef>[] iterators = new Iterator[valuesWithOrdinals.size()]; for (int i = 0; i < valuesWithOrdinals.size(); ++i) { iterators[i] = terms(valuesWithOrdinals.get(i), reverse); } Iterator<BytesRef> terms = Iterators2.mergeSorted(Arrays.asList(iterators), comparator, true); if (includeExclude != null) { terms = Iterators.filter(terms, new Predicate<BytesRef>() { @Override public boolean apply(BytesRef input) { return includeExclude.accept(input); } }); } if (order == InternalOrder.COUNT_ASC) { // let's try to find `shardSize` terms that matched no hit // this one needs shardSize and not requiredSize because even though terms have a count of 0 here, // they might have higher counts on other shards for (int added = 0; added < shardSize && terms.hasNext();) { if (bucketOrds.add(terms.next()) >= 0) { ++added; } } } else if (order == InternalOrder.COUNT_DESC) { // add terms until there are enough buckets while (bucketOrds.size() < requiredSize && terms.hasNext()) { bucketOrds.add(terms.next()); } } else if (order == InternalOrder.TERM_ASC || order == InternalOrder.TERM_DESC) { // add the `requiredSize` least terms for (int i = 0; i < requiredSize && terms.hasNext(); ++i) { bucketOrds.add(terms.next()); } } else { // other orders (aggregations) are not optimizable while (terms.hasNext()) { bucketOrds.add(terms.next()); } } } } final int size = (int) Math.min(bucketOrds.size(), shardSize); BucketPriorityQueue ordered = new BucketPriorityQueue(size, order.comparator(this)); StringTerms.Bucket spare = null; for (int i = 0; i < bucketOrds.size(); i++) { if (spare == null) { spare = new StringTerms.Bucket(new BytesRef(), 0, null); } bucketOrds.get(i, spare.termBytes); spare.docCount = bucketDocCount(i); spare.bucketOrd = i; spare = (StringTerms.Bucket) ordered.insertWithOverflow(spare); } final InternalTerms.Bucket[] list = new InternalTerms.Bucket[ordered.size()]; for (int i = ordered.size() - 1; i >= 0; --i) { final StringTerms.Bucket bucket = (StringTerms.Bucket) ordered.pop(); // the terms are owned by the BytesRefHash, we need to pull a copy since the BytesRef hash data may be recycled at some point bucket.termBytes = BytesRef.deepCopyOf(bucket.termBytes); bucket.aggregations = bucketAggregations(bucket.bucketOrd); list[i] = bucket; } return new StringTerms(name, order, requiredSize, minDocCount, Arrays.asList(list)); }
From source file:org.elasticsearch.search.facet.terms.strings.HashedAggregator.java
License:Apache License
protected void onValue(int docId, BytesRef value, int hashCode, BytesValues values) { final boolean added = hash.add(value, hashCode, values); // note: we must do a deep copy here the incoming value could have been // modified by a script or so assert assertHash.add(BytesRef.deepCopyOf(value), hashCode, values) == added : "asserting counter diverged from current counter - value: " + value + " hash: " + hashCode;/*from ww w . j av a 2 s .c o m*/ }