Example usage for org.apache.lucene.util BytesRef deepCopyOf

Introduction

In this page you can find the example usage for org.apache.lucene.util BytesRef deepCopyOf.

Prototype

public static BytesRef deepCopyOf(BytesRef other)

Source Link

Document

Creates a new BytesRef that points to a copy of the bytes from other

The returned BytesRef will have a length of other.length and an offset of zero.

Usage

From source file:org.elasticsearch.index.fielddata.BytesValues.java

License:Apache License

/**
 * Converts the current shared {@link BytesRef} to a stable instance. Note,
 * this calls makes the bytes safe for *reads*, not writes (into the same BytesRef). For example,
 * it makes it safe to be placed in a map.
 *//*from  w ww  .  j a v  a  2 s  . c o m*/
public BytesRef copyShared() {
    return BytesRef.deepCopyOf(scratch);
}

From source file:org.elasticsearch.index.fielddata.DuelFieldDataTests.java

License:Apache License

private static void duelFieldDataBytes(Random random, AtomicReaderContext context, IndexFieldData<?> left,
        IndexFieldData<?> right, Preprocessor pre) throws Exception {
    AtomicFieldData<?> leftData = random.nextBoolean() ? left.load(context) : left.loadDirect(context);
    AtomicFieldData<?> rightData = random.nextBoolean() ? right.load(context) : right.loadDirect(context);
    assertThat(leftData.getNumDocs(), equalTo(rightData.getNumDocs()));

    int numDocs = leftData.getNumDocs();
    BytesValues leftBytesValues = leftData.getBytesValues(random.nextBoolean());
    BytesValues rightBytesValues = rightData.getBytesValues(random.nextBoolean());
    BytesRef leftSpare = new BytesRef();
    BytesRef rightSpare = new BytesRef();

    for (int i = 0; i < numDocs; i++) {
        int numValues = 0;
        assertThat((numValues = leftBytesValues.setDocument(i)), equalTo(rightBytesValues.setDocument(i)));
        BytesRef previous = null;/*from   ww w .java  2s  .  c o m*/
        for (int j = 0; j < numValues; j++) {

            rightSpare.copyBytes(rightBytesValues.nextValue());
            leftSpare.copyBytes(leftBytesValues.nextValue());
            assertThat(rightSpare.hashCode(), equalTo(rightBytesValues.currentValueHash()));
            assertThat(leftSpare.hashCode(), equalTo(leftBytesValues.currentValueHash()));
            if (previous != null && leftBytesValues.getOrder() == rightBytesValues.getOrder()) { // we can only compare the
                assertThat(pre.compare(previous, rightSpare), lessThan(0));
            }
            previous = BytesRef.deepCopyOf(rightSpare);
            pre.toString(rightSpare);
            pre.toString(leftSpare);
            assertThat(pre.toString(leftSpare), equalTo(pre.toString(rightSpare)));
            if (leftSpare.equals(rightSpare)) {
                assertThat(leftBytesValues.currentValueHash(), equalTo(rightBytesValues.currentValueHash()));
            }
        }
    }
}

From source file:org.elasticsearch.index.fielddata.fieldcomparator.StringScriptDataComparator.java

License:Apache License

@Override
public void copy(int slot, int doc) {
    setSpare(doc);//from w w  w  .  j a v  a 2 s . c  om
    if (values[slot] == null) {
        values[slot] = BytesRef.deepCopyOf(spare);
    } else {
        values[slot].copyBytes(spare);
    }
}

From source file:org.elasticsearch.index.fielddata.plain.ConcreteBytesRefIndexFieldData.java

License:Apache License

@Override
public ConcreteBytesRefAtomicFieldData loadDirect(AtomicReaderContext context) throws Exception {
    AtomicReader reader = context.reader();

    Terms terms = reader.terms(getFieldNames().indexName());
    if (terms == null) {
        return ConcreteBytesRefAtomicFieldData.empty(reader.maxDoc());
    }/*  w ww . j a  v  a  2s  .  com*/

    long size = terms.size();
    if (size == -1) {
        size = 1024;
    }
    final ArrayList<BytesRef> values = new ArrayList<BytesRef>((int) size);
    values.add(null); // first "t" indicates null value
    OrdinalsBuilder builder = new OrdinalsBuilder(terms, reader.maxDoc());
    try {
        BytesRefIterator iter = builder.buildFromTerms(terms.iterator(null), reader.getLiveDocs());
        BytesRef term;
        while ((term = iter.next()) != null) {
            values.add(BytesRef.deepCopyOf(term));
        }
        return new ConcreteBytesRefAtomicFieldData(values.toArray(new BytesRef[values.size()]),
                builder.build(fieldDataType.getSettings()));
    } finally {
        builder.close();
    }
}

From source file:org.elasticsearch.search.aggregations.bucket.composite.BinaryValuesSource.java

License:Apache License

@Override
public void copyCurrent(int slot) {
    values[slot] = BytesRef.deepCopyOf(currentValue);
}

From source file:org.elasticsearch.search.aggregations.bucket.composite.GlobalOrdinalValuesSource.java

License:Apache License

@Override
BytesRef toComparable(int slot) throws IOException {
    long globalOrd = values.get(slot);
    if (globalOrd == lastLookupOrd) {
        return lastLookupValue;
    } else {//from  w ww .jav  a  2 s  . c  o m
        lastLookupOrd = globalOrd;
        lastLookupValue = BytesRef.deepCopyOf(lookup.lookupOrd(values.get(slot)));
        return lastLookupValue;
    }
}

From source file:org.elasticsearch.search.aggregations.bucket.significant.GlobalOrdinalsSignificantTermsAggregator.java

License:Apache License

@Override
public SignificantStringTerms buildAggregation(long owningBucketOrdinal) throws IOException {
    assert owningBucketOrdinal == 0;
    if (globalOrds == null) { // no context in this reader
        return buildEmptyAggregation();
    }//from   ww w .jav a  2 s.  c o  m

    final int size;
    if (bucketCountThresholds.getMinDocCount() == 0) {
        // if minDocCount == 0 then we can end up with more buckets then maxBucketOrd() returns
        size = (int) Math.min(globalOrds.getValueCount(), bucketCountThresholds.getShardSize());
    } else {
        size = (int) Math.min(maxBucketOrd(), bucketCountThresholds.getShardSize());
    }
    long supersetSize = termsAggFactory.prepareBackground(context);
    long subsetSize = numCollectedDocs;

    BucketSignificancePriorityQueue ordered = new BucketSignificancePriorityQueue(size);
    SignificantStringTerms.Bucket spare = null;
    for (long globalTermOrd = 0; globalTermOrd < globalOrds.getValueCount(); ++globalTermOrd) {
        if (includeExclude != null && !acceptedGlobalOrdinals.get(globalTermOrd)) {
            continue;
        }
        final long bucketOrd = getBucketOrd(globalTermOrd);
        final int bucketDocCount = bucketOrd < 0 ? 0 : bucketDocCount(bucketOrd);
        if (bucketCountThresholds.getMinDocCount() > 0 && bucketDocCount == 0) {
            continue;
        }
        if (bucketDocCount < bucketCountThresholds.getShardMinDocCount()) {
            continue;
        }

        if (spare == null) {
            spare = new SignificantStringTerms.Bucket(new BytesRef(), 0, 0, 0, 0, null);
        }
        spare.bucketOrd = bucketOrd;
        copy(globalOrds.lookupOrd(globalTermOrd), spare.termBytes);
        spare.subsetDf = bucketDocCount;
        spare.subsetSize = subsetSize;
        spare.supersetDf = termsAggFactory.getBackgroundFrequency(spare.termBytes);
        spare.supersetSize = supersetSize;
        // During shard-local down-selection we use subset/superset stats
        // that are for this shard only
        // Back at the central reducer these properties will be updated with
        // global stats
        spare.updateScore(termsAggFactory.getSignificanceHeuristic());
        spare = (SignificantStringTerms.Bucket) ordered.insertWithOverflow(spare);
    }

    final InternalSignificantTerms.Bucket[] list = new InternalSignificantTerms.Bucket[ordered.size()];
    for (int i = ordered.size() - 1; i >= 0; i--) {
        final SignificantStringTerms.Bucket bucket = (SignificantStringTerms.Bucket) ordered.pop();
        // the terms are owned by the BytesRefHash, we need to pull a copy since the BytesRef hash data may be recycled at some point
        bucket.termBytes = BytesRef.deepCopyOf(bucket.termBytes);
        bucket.aggregations = bucketAggregations(bucket.bucketOrd);
        list[i] = bucket;
    }

    return new SignificantStringTerms(subsetSize, supersetSize, name, bucketCountThresholds.getRequiredSize(),
            bucketCountThresholds.getMinDocCount(), termsAggFactory.getSignificanceHeuristic(),
            Arrays.asList(list), pipelineAggregators(), metaData());
}

From source file:org.elasticsearch.search.aggregations.bucket.significant.SignificantStringTermsAggregator.java

License:Apache License

@Override
public SignificantStringTerms buildAggregation(long owningBucketOrdinal) throws IOException {
    assert owningBucketOrdinal == 0;

    final int size = (int) Math.min(bucketOrds.size(), bucketCountThresholds.getShardSize());
    long supersetSize = termsAggFactory.prepareBackground(context);
    long subsetSize = numCollectedDocs;

    BucketSignificancePriorityQueue ordered = new BucketSignificancePriorityQueue(size);
    SignificantStringTerms.Bucket spare = null;
    for (int i = 0; i < bucketOrds.size(); i++) {
        final int docCount = bucketDocCount(i);
        if (docCount < bucketCountThresholds.getShardMinDocCount()) {
            continue;
        }// w  ww  .  j a  v  a2s  .  c  om

        if (spare == null) {
            spare = new SignificantStringTerms.Bucket(new BytesRef(), 0, 0, 0, 0, null);
        }

        bucketOrds.get(i, spare.termBytes);
        spare.subsetDf = docCount;
        spare.subsetSize = subsetSize;
        spare.supersetDf = termsAggFactory.getBackgroundFrequency(spare.termBytes);
        spare.supersetSize = supersetSize;
        // During shard-local down-selection we use subset/superset stats
        // that are for this shard only
        // Back at the central reducer these properties will be updated with
        // global stats
        spare.updateScore(termsAggFactory.getSignificanceHeuristic());

        spare.bucketOrd = i;
        spare = (SignificantStringTerms.Bucket) ordered.insertWithOverflow(spare);
    }

    final InternalSignificantTerms.Bucket[] list = new InternalSignificantTerms.Bucket[ordered.size()];
    for (int i = ordered.size() - 1; i >= 0; i--) {
        final SignificantStringTerms.Bucket bucket = (SignificantStringTerms.Bucket) ordered.pop();
        // the terms are owned by the BytesRefHash, we need to pull a copy since the BytesRef hash data may be recycled at some point
        bucket.termBytes = BytesRef.deepCopyOf(bucket.termBytes);
        bucket.aggregations = bucketAggregations(bucket.bucketOrd);
        list[i] = bucket;
    }

    return new SignificantStringTerms(subsetSize, supersetSize, name, bucketCountThresholds.getRequiredSize(),
            bucketCountThresholds.getMinDocCount(), termsAggFactory.getSignificanceHeuristic(),
            Arrays.asList(list), pipelineAggregators(), metaData());
}

From source file:org.elasticsearch.search.aggregations.bucket.terms.StringTermsAggregator.java

License:Apache License

@Override
public StringTerms buildAggregation(long owningBucketOrdinal) {
    assert owningBucketOrdinal == 0;

    if (minDocCount == 0 && (order != InternalOrder.COUNT_DESC || bucketOrds.size() < requiredSize)) {
        // we need to fill-in the blanks
        List<BytesValues.WithOrdinals> valuesWithOrdinals = Lists.newArrayList();
        for (AtomicReaderContext ctx : context.searchContext().searcher().getTopReaderContext().leaves()) {
            context.setNextReader(ctx);/*from   ww w.  jav a  2  s  .c o  m*/
            final BytesValues values = valuesSource.bytesValues();
            if (values instanceof BytesValues.WithOrdinals) {
                valuesWithOrdinals.add((BytesValues.WithOrdinals) values);
            } else {
                // brute force
                for (int docId = 0; docId < ctx.reader().maxDoc(); ++docId) {
                    final int valueCount = values.setDocument(docId);
                    for (int i = 0; i < valueCount; ++i) {
                        final BytesRef term = values.nextValue();
                        if (includeExclude == null || includeExclude.accept(term)) {
                            bucketOrds.add(term, values.currentValueHash());
                        }
                    }
                }
            }
        }

        // With ordinals we can be smarter and add just as many terms as necessary to the hash table
        // For instance, if sorting by term asc, we only need to get the first `requiredSize` terms as other terms would
        // either be excluded by the priority queue or at reduce time.
        if (valuesWithOrdinals.size() > 0) {
            final boolean reverse = order == InternalOrder.TERM_DESC;
            Comparator<BytesRef> comparator = BytesRef.getUTF8SortedAsUnicodeComparator();
            if (reverse) {
                comparator = Collections.reverseOrder(comparator);
            }
            Iterator<? extends BytesRef>[] iterators = new Iterator[valuesWithOrdinals.size()];
            for (int i = 0; i < valuesWithOrdinals.size(); ++i) {
                iterators[i] = terms(valuesWithOrdinals.get(i), reverse);
            }
            Iterator<BytesRef> terms = Iterators2.mergeSorted(Arrays.asList(iterators), comparator, true);
            if (includeExclude != null) {
                terms = Iterators.filter(terms, new Predicate<BytesRef>() {
                    @Override
                    public boolean apply(BytesRef input) {
                        return includeExclude.accept(input);
                    }
                });
            }
            if (order == InternalOrder.COUNT_ASC) {
                // let's try to find `shardSize` terms that matched no hit
                // this one needs shardSize and not requiredSize because even though terms have a count of 0 here,
                // they might have higher counts on other shards
                for (int added = 0; added < shardSize && terms.hasNext();) {
                    if (bucketOrds.add(terms.next()) >= 0) {
                        ++added;
                    }
                }
            } else if (order == InternalOrder.COUNT_DESC) {
                // add terms until there are enough buckets
                while (bucketOrds.size() < requiredSize && terms.hasNext()) {
                    bucketOrds.add(terms.next());
                }
            } else if (order == InternalOrder.TERM_ASC || order == InternalOrder.TERM_DESC) {
                // add the `requiredSize` least terms
                for (int i = 0; i < requiredSize && terms.hasNext(); ++i) {
                    bucketOrds.add(terms.next());
                }
            } else {
                // other orders (aggregations) are not optimizable
                while (terms.hasNext()) {
                    bucketOrds.add(terms.next());
                }
            }
        }
    }

    final int size = (int) Math.min(bucketOrds.size(), shardSize);

    BucketPriorityQueue ordered = new BucketPriorityQueue(size, order.comparator(this));
    StringTerms.Bucket spare = null;
    for (int i = 0; i < bucketOrds.size(); i++) {
        if (spare == null) {
            spare = new StringTerms.Bucket(new BytesRef(), 0, null);
        }
        bucketOrds.get(i, spare.termBytes);
        spare.docCount = bucketDocCount(i);
        spare.bucketOrd = i;
        spare = (StringTerms.Bucket) ordered.insertWithOverflow(spare);
    }

    final InternalTerms.Bucket[] list = new InternalTerms.Bucket[ordered.size()];
    for (int i = ordered.size() - 1; i >= 0; --i) {
        final StringTerms.Bucket bucket = (StringTerms.Bucket) ordered.pop();
        // the terms are owned by the BytesRefHash, we need to pull a copy since the BytesRef hash data may be recycled at some point
        bucket.termBytes = BytesRef.deepCopyOf(bucket.termBytes);
        bucket.aggregations = bucketAggregations(bucket.bucketOrd);
        list[i] = bucket;
    }

    return new StringTerms(name, order, requiredSize, minDocCount, Arrays.asList(list));
}

From source file:org.elasticsearch.search.facet.terms.strings.HashedAggregator.java

License:Apache License

protected void onValue(int docId, BytesRef value, int hashCode, BytesValues values) {
    final boolean added = hash.add(value, hashCode, values);
    // note: we must do a deep copy here the incoming value could have been
    // modified by a script or so
    assert assertHash.add(BytesRef.deepCopyOf(value), hashCode,
            values) == added : "asserting counter diverged from current counter - value: " + value + " hash: "
                    + hashCode;/*from   ww w  . j  av a 2 s  .c o  m*/
}