Example usage for org.apache.lucene.util BytesRef deepCopyOf

List of usage examples for org.apache.lucene.util BytesRef deepCopyOf

Introduction

In this page you can find the example usage for org.apache.lucene.util BytesRef deepCopyOf.

Prototype

public static BytesRef deepCopyOf(BytesRef other) 

Source Link

Document

Creates a new BytesRef that points to a copy of the bytes from other

The returned BytesRef will have a length of other.length and an offset of zero.

Usage

From source file:org.elasticsearch.index.fielddata.BytesValues.java

License:Apache License

/**
 * Converts the current shared {@link BytesRef} to a stable instance. Note,
 * this calls makes the bytes safe for *reads*, not writes (into the same BytesRef). For example,
 * it makes it safe to be placed in a map.
 *//*from  w ww  .  j a v  a  2 s  . c o m*/
public BytesRef copyShared() {
    return BytesRef.deepCopyOf(scratch);
}

From source file:org.elasticsearch.index.fielddata.DuelFieldDataTests.java

License:Apache License

private static void duelFieldDataBytes(Random random, AtomicReaderContext context, IndexFieldData<?> left,
        IndexFieldData<?> right, Preprocessor pre) throws Exception {
    AtomicFieldData<?> leftData = random.nextBoolean() ? left.load(context) : left.loadDirect(context);
    AtomicFieldData<?> rightData = random.nextBoolean() ? right.load(context) : right.loadDirect(context);
    assertThat(leftData.getNumDocs(), equalTo(rightData.getNumDocs()));

    int numDocs = leftData.getNumDocs();
    BytesValues leftBytesValues = leftData.getBytesValues(random.nextBoolean());
    BytesValues rightBytesValues = rightData.getBytesValues(random.nextBoolean());
    BytesRef leftSpare = new BytesRef();
    BytesRef rightSpare = new BytesRef();

    for (int i = 0; i < numDocs; i++) {
        int numValues = 0;
        assertThat((numValues = leftBytesValues.setDocument(i)), equalTo(rightBytesValues.setDocument(i)));
        BytesRef previous = null;/*from   ww w .java  2s  .  c o m*/
        for (int j = 0; j < numValues; j++) {

            rightSpare.copyBytes(rightBytesValues.nextValue());
            leftSpare.copyBytes(leftBytesValues.nextValue());
            assertThat(rightSpare.hashCode(), equalTo(rightBytesValues.currentValueHash()));
            assertThat(leftSpare.hashCode(), equalTo(leftBytesValues.currentValueHash()));
            if (previous != null && leftBytesValues.getOrder() == rightBytesValues.getOrder()) { // we can only compare the
                assertThat(pre.compare(previous, rightSpare), lessThan(0));
            }
            previous = BytesRef.deepCopyOf(rightSpare);
            pre.toString(rightSpare);
            pre.toString(leftSpare);
            assertThat(pre.toString(leftSpare), equalTo(pre.toString(rightSpare)));
            if (leftSpare.equals(rightSpare)) {
                assertThat(leftBytesValues.currentValueHash(), equalTo(rightBytesValues.currentValueHash()));
            }
        }
    }
}

From source file:org.elasticsearch.index.fielddata.fieldcomparator.StringScriptDataComparator.java

License:Apache License

@Override
public void copy(int slot, int doc) {
    setSpare(doc);//from w w  w  .  j a v  a 2 s . c  om
    if (values[slot] == null) {
        values[slot] = BytesRef.deepCopyOf(spare);
    } else {
        values[slot].copyBytes(spare);
    }
}

From source file:org.elasticsearch.index.fielddata.plain.ConcreteBytesRefIndexFieldData.java

License:Apache License

@Override
public ConcreteBytesRefAtomicFieldData loadDirect(AtomicReaderContext context) throws Exception {
    AtomicReader reader = context.reader();

    Terms terms = reader.terms(getFieldNames().indexName());
    if (terms == null) {
        return ConcreteBytesRefAtomicFieldData.empty(reader.maxDoc());
    }/*  w ww . j a  v  a  2s  .  com*/

    long size = terms.size();
    if (size == -1) {
        size = 1024;
    }
    final ArrayList<BytesRef> values = new ArrayList<BytesRef>((int) size);
    values.add(null); // first "t" indicates null value
    OrdinalsBuilder builder = new OrdinalsBuilder(terms, reader.maxDoc());
    try {
        BytesRefIterator iter = builder.buildFromTerms(terms.iterator(null), reader.getLiveDocs());
        BytesRef term;
        while ((term = iter.next()) != null) {
            values.add(BytesRef.deepCopyOf(term));
        }
        return new ConcreteBytesRefAtomicFieldData(values.toArray(new BytesRef[values.size()]),
                builder.build(fieldDataType.getSettings()));
    } finally {
        builder.close();
    }
}

From source file:org.elasticsearch.search.aggregations.bucket.composite.BinaryValuesSource.java

License:Apache License

@Override
public void copyCurrent(int slot) {
    values[slot] = BytesRef.deepCopyOf(currentValue);
}

From source file:org.elasticsearch.search.aggregations.bucket.composite.GlobalOrdinalValuesSource.java

License:Apache License

@Override
BytesRef toComparable(int slot) throws IOException {
    long globalOrd = values.get(slot);
    if (globalOrd == lastLookupOrd) {
        return lastLookupValue;
    } else {//from  w ww .jav  a  2 s  . c  o m
        lastLookupOrd = globalOrd;
        lastLookupValue = BytesRef.deepCopyOf(lookup.lookupOrd(values.get(slot)));
        return lastLookupValue;
    }
}

From source file:org.elasticsearch.search.aggregations.bucket.significant.GlobalOrdinalsSignificantTermsAggregator.java

License:Apache License

@Override
public SignificantStringTerms buildAggregation(long owningBucketOrdinal) throws IOException {
    assert owningBucketOrdinal == 0;
    if (globalOrds == null) { // no context in this reader
        return buildEmptyAggregation();
    }//from   ww w .jav a  2 s.  c o  m

    final int size;
    if (bucketCountThresholds.getMinDocCount() == 0) {
        // if minDocCount == 0 then we can end up with more buckets then maxBucketOrd() returns
        size = (int) Math.min(globalOrds.getValueCount(), bucketCountThresholds.getShardSize());
    } else {
        size = (int) Math.min(maxBucketOrd(), bucketCountThresholds.getShardSize());
    }
    long supersetSize = termsAggFactory.prepareBackground(context);
    long subsetSize = numCollectedDocs;

    BucketSignificancePriorityQueue ordered = new BucketSignificancePriorityQueue(size);
    SignificantStringTerms.Bucket spare = null;
    for (long globalTermOrd = 0; globalTermOrd < globalOrds.getValueCount(); ++globalTermOrd) {
        if (includeExclude != null && !acceptedGlobalOrdinals.get(globalTermOrd)) {
            continue;
        }
        final long bucketOrd = getBucketOrd(globalTermOrd);
        final int bucketDocCount = bucketOrd < 0 ? 0 : bucketDocCount(bucketOrd);
        if (bucketCountThresholds.getMinDocCount() > 0 && bucketDocCount == 0) {
            continue;
        }
        if (bucketDocCount < bucketCountThresholds.getShardMinDocCount()) {
            continue;
        }

        if (spare == null) {
            spare = new SignificantStringTerms.Bucket(new BytesRef(), 0, 0, 0, 0, null);
        }
        spare.bucketOrd = bucketOrd;
        copy(globalOrds.lookupOrd(globalTermOrd), spare.termBytes);
        spare.subsetDf = bucketDocCount;
        spare.subsetSize = subsetSize;
        spare.supersetDf = termsAggFactory.getBackgroundFrequency(spare.termBytes);
        spare.supersetSize = supersetSize;
        // During shard-local down-selection we use subset/superset stats
        // that are for this shard only
        // Back at the central reducer these properties will be updated with
        // global stats
        spare.updateScore(termsAggFactory.getSignificanceHeuristic());
        spare = (SignificantStringTerms.Bucket) ordered.insertWithOverflow(spare);
    }

    final InternalSignificantTerms.Bucket[] list = new InternalSignificantTerms.Bucket[ordered.size()];
    for (int i = ordered.size() - 1; i >= 0; i--) {
        final SignificantStringTerms.Bucket bucket = (SignificantStringTerms.Bucket) ordered.pop();
        // the terms are owned by the BytesRefHash, we need to pull a copy since the BytesRef hash data may be recycled at some point
        bucket.termBytes = BytesRef.deepCopyOf(bucket.termBytes);
        bucket.aggregations = bucketAggregations(bucket.bucketOrd);
        list[i] = bucket;
    }

    return new SignificantStringTerms(subsetSize, supersetSize, name, bucketCountThresholds.getRequiredSize(),
            bucketCountThresholds.getMinDocCount(), termsAggFactory.getSignificanceHeuristic(),
            Arrays.asList(list), pipelineAggregators(), metaData());
}

From source file:org.elasticsearch.search.aggregations.bucket.significant.SignificantStringTermsAggregator.java

License:Apache License

@Override
public SignificantStringTerms buildAggregation(long owningBucketOrdinal) throws IOException {
    assert owningBucketOrdinal == 0;

    final int size = (int) Math.min(bucketOrds.size(), bucketCountThresholds.getShardSize());
    long supersetSize = termsAggFactory.prepareBackground(context);
    long subsetSize = numCollectedDocs;

    BucketSignificancePriorityQueue ordered = new BucketSignificancePriorityQueue(size);
    SignificantStringTerms.Bucket spare = null;
    for (int i = 0; i < bucketOrds.size(); i++) {
        final int docCount = bucketDocCount(i);
        if (docCount < bucketCountThresholds.getShardMinDocCount()) {
            continue;
        }// w  ww  .  j a  v  a2s  .  c  om

        if (spare == null) {
            spare = new SignificantStringTerms.Bucket(new BytesRef(), 0, 0, 0, 0, null);
        }

        bucketOrds.get(i, spare.termBytes);
        spare.subsetDf = docCount;
        spare.subsetSize = subsetSize;
        spare.supersetDf = termsAggFactory.getBackgroundFrequency(spare.termBytes);
        spare.supersetSize = supersetSize;
        // During shard-local down-selection we use subset/superset stats
        // that are for this shard only
        // Back at the central reducer these properties will be updated with
        // global stats
        spare.updateScore(termsAggFactory.getSignificanceHeuristic());

        spare.bucketOrd = i;
        spare = (SignificantStringTerms.Bucket) ordered.insertWithOverflow(spare);
    }

    final InternalSignificantTerms.Bucket[] list = new InternalSignificantTerms.Bucket[ordered.size()];
    for (int i = ordered.size() - 1; i >= 0; i--) {
        final SignificantStringTerms.Bucket bucket = (SignificantStringTerms.Bucket) ordered.pop();
        // the terms are owned by the BytesRefHash, we need to pull a copy since the BytesRef hash data may be recycled at some point
        bucket.termBytes = BytesRef.deepCopyOf(bucket.termBytes);
        bucket.aggregations = bucketAggregations(bucket.bucketOrd);
        list[i] = bucket;
    }

    return new SignificantStringTerms(subsetSize, supersetSize, name, bucketCountThresholds.getRequiredSize(),
            bucketCountThresholds.getMinDocCount(), termsAggFactory.getSignificanceHeuristic(),
            Arrays.asList(list), pipelineAggregators(), metaData());
}

From source file:org.elasticsearch.search.aggregations.bucket.terms.StringTermsAggregator.java

License:Apache License

@Override
public StringTerms buildAggregation(long owningBucketOrdinal) {
    assert owningBucketOrdinal == 0;

    if (minDocCount == 0 && (order != InternalOrder.COUNT_DESC || bucketOrds.size() < requiredSize)) {
        // we need to fill-in the blanks
        List<BytesValues.WithOrdinals> valuesWithOrdinals = Lists.newArrayList();
        for (AtomicReaderContext ctx : context.searchContext().searcher().getTopReaderContext().leaves()) {
            context.setNextReader(ctx);/*from   ww w.  jav a  2  s  .c o  m*/
            final BytesValues values = valuesSource.bytesValues();
            if (values instanceof BytesValues.WithOrdinals) {
                valuesWithOrdinals.add((BytesValues.WithOrdinals) values);
            } else {
                // brute force
                for (int docId = 0; docId < ctx.reader().maxDoc(); ++docId) {
                    final int valueCount = values.setDocument(docId);
                    for (int i = 0; i < valueCount; ++i) {
                        final BytesRef term = values.nextValue();
                        if (includeExclude == null || includeExclude.accept(term)) {
                            bucketOrds.add(term, values.currentValueHash());
                        }
                    }
                }
            }
        }

        // With ordinals we can be smarter and add just as many terms as necessary to the hash table
        // For instance, if sorting by term asc, we only need to get the first `requiredSize` terms as other terms would
        // either be excluded by the priority queue or at reduce time.
        if (valuesWithOrdinals.size() > 0) {
            final boolean reverse = order == InternalOrder.TERM_DESC;
            Comparator<BytesRef> comparator = BytesRef.getUTF8SortedAsUnicodeComparator();
            if (reverse) {
                comparator = Collections.reverseOrder(comparator);
            }
            Iterator<? extends BytesRef>[] iterators = new Iterator[valuesWithOrdinals.size()];
            for (int i = 0; i < valuesWithOrdinals.size(); ++i) {
                iterators[i] = terms(valuesWithOrdinals.get(i), reverse);
            }
            Iterator<BytesRef> terms = Iterators2.mergeSorted(Arrays.asList(iterators), comparator, true);
            if (includeExclude != null) {
                terms = Iterators.filter(terms, new Predicate<BytesRef>() {
                    @Override
                    public boolean apply(BytesRef input) {
                        return includeExclude.accept(input);
                    }
                });
            }
            if (order == InternalOrder.COUNT_ASC) {
                // let's try to find `shardSize` terms that matched no hit
                // this one needs shardSize and not requiredSize because even though terms have a count of 0 here,
                // they might have higher counts on other shards
                for (int added = 0; added < shardSize && terms.hasNext();) {
                    if (bucketOrds.add(terms.next()) >= 0) {
                        ++added;
                    }
                }
            } else if (order == InternalOrder.COUNT_DESC) {
                // add terms until there are enough buckets
                while (bucketOrds.size() < requiredSize && terms.hasNext()) {
                    bucketOrds.add(terms.next());
                }
            } else if (order == InternalOrder.TERM_ASC || order == InternalOrder.TERM_DESC) {
                // add the `requiredSize` least terms
                for (int i = 0; i < requiredSize && terms.hasNext(); ++i) {
                    bucketOrds.add(terms.next());
                }
            } else {
                // other orders (aggregations) are not optimizable
                while (terms.hasNext()) {
                    bucketOrds.add(terms.next());
                }
            }
        }
    }

    final int size = (int) Math.min(bucketOrds.size(), shardSize);

    BucketPriorityQueue ordered = new BucketPriorityQueue(size, order.comparator(this));
    StringTerms.Bucket spare = null;
    for (int i = 0; i < bucketOrds.size(); i++) {
        if (spare == null) {
            spare = new StringTerms.Bucket(new BytesRef(), 0, null);
        }
        bucketOrds.get(i, spare.termBytes);
        spare.docCount = bucketDocCount(i);
        spare.bucketOrd = i;
        spare = (StringTerms.Bucket) ordered.insertWithOverflow(spare);
    }

    final InternalTerms.Bucket[] list = new InternalTerms.Bucket[ordered.size()];
    for (int i = ordered.size() - 1; i >= 0; --i) {
        final StringTerms.Bucket bucket = (StringTerms.Bucket) ordered.pop();
        // the terms are owned by the BytesRefHash, we need to pull a copy since the BytesRef hash data may be recycled at some point
        bucket.termBytes = BytesRef.deepCopyOf(bucket.termBytes);
        bucket.aggregations = bucketAggregations(bucket.bucketOrd);
        list[i] = bucket;
    }

    return new StringTerms(name, order, requiredSize, minDocCount, Arrays.asList(list));
}

From source file:org.elasticsearch.search.facet.terms.strings.HashedAggregator.java

License:Apache License

protected void onValue(int docId, BytesRef value, int hashCode, BytesValues values) {
    final boolean added = hash.add(value, hashCode, values);
    // note: we must do a deep copy here the incoming value could have been
    // modified by a script or so
    assert assertHash.add(BytesRef.deepCopyOf(value), hashCode,
            values) == added : "asserting counter diverged from current counter - value: " + value + " hash: "
                    + hashCode;/*from   ww w  . j  av a 2 s  .c o  m*/
}