Example usage for org.apache.lucene.util BytesRef deepCopyOf

Introduction

In this page you can find the example usage for org.apache.lucene.util BytesRef deepCopyOf.

Prototype

public static BytesRef deepCopyOf(BytesRef other)

Source Link

Document

Creates a new BytesRef that points to a copy of the bytes from other

The returned BytesRef will have a length of other.length and an offset of zero.

Usage

From source file:org.apache.solr.uninverting.TestFieldCache.java

License:Apache License

public void test() throws IOException {
    FieldCache cache = FieldCache.DEFAULT;
    NumericDocValues doubles = cache.getNumerics(reader, "theDouble", FieldCache.DOUBLE_POINT_PARSER);
    for (int i = 0; i < NUM_DOCS; i++) {
        assertEquals(i, doubles.nextDoc());
        assertEquals(Double.doubleToLongBits(Double.MAX_VALUE - i), doubles.longValue());
    }/*ww  w.  j a va2  s.  co  m*/

    NumericDocValues longs = cache.getNumerics(reader, "theLong", FieldCache.LONG_POINT_PARSER);
    for (int i = 0; i < NUM_DOCS; i++) {
        assertEquals(i, longs.nextDoc());
        assertEquals(Long.MAX_VALUE - i, longs.longValue());
    }

    NumericDocValues ints = cache.getNumerics(reader, "theInt", FieldCache.INT_POINT_PARSER);
    for (int i = 0; i < NUM_DOCS; i++) {
        assertEquals(i, ints.nextDoc());
        assertEquals(Integer.MAX_VALUE - i, ints.longValue());
    }

    NumericDocValues floats = cache.getNumerics(reader, "theFloat", FieldCache.FLOAT_POINT_PARSER);
    for (int i = 0; i < NUM_DOCS; i++) {
        assertEquals(i, floats.nextDoc());
        assertEquals(Float.floatToIntBits(Float.MAX_VALUE - i), floats.longValue());
    }

    Bits docsWithField = cache.getDocsWithField(reader, "theLong", FieldCache.LONG_POINT_PARSER);
    assertSame("Second request to cache return same array", docsWithField,
            cache.getDocsWithField(reader, "theLong", FieldCache.LONG_POINT_PARSER));
    assertTrue("docsWithField(theLong) must be class Bits.MatchAllBits",
            docsWithField instanceof Bits.MatchAllBits);
    assertTrue("docsWithField(theLong) Size: " + docsWithField.length() + " is not: " + NUM_DOCS,
            docsWithField.length() == NUM_DOCS);
    for (int i = 0; i < docsWithField.length(); i++) {
        assertTrue(docsWithField.get(i));
    }

    docsWithField = cache.getDocsWithField(reader, "sparse", FieldCache.INT_POINT_PARSER);
    assertSame("Second request to cache return same array", docsWithField,
            cache.getDocsWithField(reader, "sparse", FieldCache.INT_POINT_PARSER));
    assertFalse("docsWithField(sparse) must not be class Bits.MatchAllBits",
            docsWithField instanceof Bits.MatchAllBits);
    assertTrue("docsWithField(sparse) Size: " + docsWithField.length() + " is not: " + NUM_DOCS,
            docsWithField.length() == NUM_DOCS);
    for (int i = 0; i < docsWithField.length(); i++) {
        assertEquals(i % 2 == 0, docsWithField.get(i));
    }

    // getTermsIndex
    SortedDocValues termsIndex = cache.getTermsIndex(reader, "theRandomUnicodeString");
    for (int i = 0; i < NUM_DOCS; i++) {
        final String s;
        if (i > termsIndex.docID()) {
            termsIndex.advance(i);
        }
        if (i == termsIndex.docID()) {
            s = termsIndex.binaryValue().utf8ToString();
        } else {
            s = null;
        }
        assertTrue("for doc " + i + ": " + s + " does not equal: " + unicodeStrings[i],
                unicodeStrings[i] == null || unicodeStrings[i].equals(s));
    }

    int nTerms = termsIndex.getValueCount();

    TermsEnum tenum = termsIndex.termsEnum();
    for (int i = 0; i < nTerms; i++) {
        BytesRef val1 = BytesRef.deepCopyOf(tenum.next());
        final BytesRef val = termsIndex.lookupOrd(i);
        // System.out.println("i="+i);
        assertEquals(val, val1);
    }

    // seek the enum around (note this isn't a great test here)
    int num = atLeast(100);
    for (int i = 0; i < num; i++) {
        int k = random().nextInt(nTerms);
        final BytesRef val = BytesRef.deepCopyOf(termsIndex.lookupOrd(k));
        assertEquals(TermsEnum.SeekStatus.FOUND, tenum.seekCeil(val));
        assertEquals(val, tenum.term());
    }

    for (int i = 0; i < nTerms; i++) {
        final BytesRef val = BytesRef.deepCopyOf(termsIndex.lookupOrd(i));
        assertEquals(TermsEnum.SeekStatus.FOUND, tenum.seekCeil(val));
        assertEquals(val, tenum.term());
    }

    // test bad field
    termsIndex = cache.getTermsIndex(reader, "bogusfield");

    // getTerms
    BinaryDocValues terms = cache.getTerms(reader, "theRandomUnicodeString");
    for (int i = 0; i < NUM_DOCS; i++) {
        if (terms.docID() < i) {
            terms.nextDoc();
        }
        if (terms.docID() == i) {
            assertEquals(unicodeStrings[i], terms.binaryValue().utf8ToString());
        } else {
            assertNull(unicodeStrings[i]);
        }
    }

    // test bad field
    terms = cache.getTerms(reader, "bogusfield");

    // getDocTermOrds
    SortedSetDocValues termOrds = cache.getDocTermOrds(reader, "theRandomUnicodeMultiValuedField", null);
    int numEntries = cache.getCacheEntries().length;
    // ask for it again, and check that we didnt create any additional entries:
    termOrds = cache.getDocTermOrds(reader, "theRandomUnicodeMultiValuedField", null);
    assertEquals(numEntries, cache.getCacheEntries().length);

    for (int i = 0; i < NUM_DOCS; i++) {
        // This will remove identical terms. A DocTermOrds doesn't return duplicate ords for a docId
        List<BytesRef> values = new ArrayList<>(new LinkedHashSet<>(Arrays.asList(multiValued[i])));
        for (BytesRef v : values) {
            if (v == null) {
                // why does this test use null values... instead of an empty list: confusing
                break;
            }
            if (i > termOrds.docID()) {
                assertEquals(i, termOrds.nextDoc());
            }
            long ord = termOrds.nextOrd();
            assert ord != SortedSetDocValues.NO_MORE_ORDS;
            BytesRef scratch = termOrds.lookupOrd(ord);
            assertEquals(v, scratch);
        }
        if (i == termOrds.docID()) {
            assertEquals(SortedSetDocValues.NO_MORE_ORDS, termOrds.nextOrd());
        }
    }

    // test bad field
    termOrds = cache.getDocTermOrds(reader, "bogusfield", null);
    assertTrue(termOrds.getValueCount() == 0);

    FieldCache.DEFAULT.purgeByCacheKey(reader.getCoreCacheKey());
}

From source file:org.apache.solr.uninverting.TestFieldCacheVsDocValues.java

License:Apache License

private void assertEquals(int maxDoc, SortedDocValues expected, SortedDocValues actual) throws Exception {
    // can be null for the segment if no docs actually had any SortedDocValues
    // in this case FC.getDocTermsOrds returns EMPTY
    if (actual == null) {
        assertEquals(expected.getValueCount(), 0);
        return;/* w ww.  j a v a 2s .  c  o m*/
    }
    assertEquals(expected.getValueCount(), actual.getValueCount());

    // compare ord lists
    while (true) {
        int docID = expected.nextDoc();
        if (docID == NO_MORE_DOCS) {
            assertEquals(NO_MORE_DOCS, actual.nextDoc());
            break;
        }
        assertEquals(docID, actual.nextDoc());
        assertEquals(expected.ordValue(), actual.ordValue());
        assertEquals(expected.binaryValue(), actual.binaryValue());
    }

    // compare ord dictionary
    for (long i = 0; i < expected.getValueCount(); i++) {
        final BytesRef expectedBytes = BytesRef.deepCopyOf(expected.lookupOrd((int) i));
        final BytesRef actualBytes = actual.lookupOrd((int) i);
        assertEquals(expectedBytes, actualBytes);
    }

    // compare termsenum
    assertEquals(expected.getValueCount(), expected.termsEnum(), actual.termsEnum());
}

From source file:org.apache.solr.uninverting.TestFieldCacheVsDocValues.java

License:Apache License

private void assertEquals(int maxDoc, SortedSetDocValues expected, SortedSetDocValues actual) throws Exception {
    // can be null for the segment if no docs actually had any SortedDocValues
    // in this case FC.getDocTermsOrds returns EMPTY
    if (actual == null) {
        assertEquals(expected.getValueCount(), 0);
        return;/*  w  ww.  j a va 2s .c  om*/
    }
    assertEquals(expected.getValueCount(), actual.getValueCount());
    while (true) {
        int docID = expected.nextDoc();
        assertEquals(docID, actual.nextDoc());
        if (docID == NO_MORE_DOCS) {
            break;
        }
        long expectedOrd;
        while ((expectedOrd = expected.nextOrd()) != NO_MORE_ORDS) {
            assertEquals(expectedOrd, actual.nextOrd());
        }
        assertEquals(NO_MORE_ORDS, actual.nextOrd());
    }

    // compare ord dictionary
    for (long i = 0; i < expected.getValueCount(); i++) {
        final BytesRef expectedBytes = BytesRef.deepCopyOf(expected.lookupOrd(i));
        final BytesRef actualBytes = actual.lookupOrd(i);
        assertEquals(expectedBytes, actualBytes);
    }

    // compare termsenum
    assertEquals(expected.getValueCount(), expected.termsEnum(), actual.termsEnum());
}

From source file:org.codelibs.elasticsearch.common.bytes.BytesArray.java

License:Apache License

public BytesArray(BytesRef bytesRef, boolean deepCopy) {
    if (deepCopy) {
        bytesRef = BytesRef.deepCopyOf(bytesRef);
    }//from ww  w  . j  a  va  2  s  .  c  o m
    bytes = bytesRef.bytes;
    offset = bytesRef.offset;
    length = bytesRef.length;
}

From source file:org.codelibs.elasticsearch.common.bytes.BytesReference.java

License:Apache License

/**
 * Returns a compact array from the given BytesReference. The returned array won't be copied unless necessary. If you need
 * to modify the returned array use <tt>BytesRef.deepCopyOf(reference.toBytesRef()</tt> instead
 *//*from   w ww  . j  a v  a 2 s .com*/
public static byte[] toBytes(BytesReference reference) {
    final BytesRef bytesRef = reference.toBytesRef();
    if (bytesRef.offset == 0 && bytesRef.length == bytesRef.bytes.length) {
        return bytesRef.bytes;
    }
    return BytesRef.deepCopyOf(bytesRef).bytes;
}

From source file:org.codelibs.elasticsearch.index.query.WrapperQueryBuilder.java

License:Apache License

/**
 * Creates a query builder given a query provided as a {BytesReference}
 *///  ww w .j a  v  a  2  s.  c  om
public WrapperQueryBuilder(BytesReference source) {
    if (source == null || source.length() == 0) {
        throw new IllegalArgumentException("query source text cannot be null or empty");
    }
    this.source = BytesRef.deepCopyOf(source.toBytesRef()).bytes;
}

From source file:org.codelibs.elasticsearch.search.aggregations.bucket.significant.GlobalOrdinalsSignificantTermsAggregator.java

License:Apache License

@Override
public SignificantStringTerms buildAggregation(long owningBucketOrdinal) throws IOException {
    assert owningBucketOrdinal == 0;
    if (globalOrds == null) { // no context in this reader
        return buildEmptyAggregation();
    }//from www .  j  av  a 2  s .c  o  m

    final int size;
    if (bucketCountThresholds.getMinDocCount() == 0) {
        // if minDocCount == 0 then we can end up with more buckets then maxBucketOrd() returns
        size = (int) Math.min(globalOrds.getValueCount(), bucketCountThresholds.getShardSize());
    } else {
        size = (int) Math.min(maxBucketOrd(), bucketCountThresholds.getShardSize());
    }
    long supersetSize = termsAggFactory.getSupersetNumDocs();
    long subsetSize = numCollectedDocs;

    BucketSignificancePriorityQueue<SignificantStringTerms.Bucket> ordered = new BucketSignificancePriorityQueue<>(
            size);
    SignificantStringTerms.Bucket spare = null;
    for (long globalTermOrd = 0; globalTermOrd < globalOrds.getValueCount(); ++globalTermOrd) {
        if (includeExclude != null && !acceptedGlobalOrdinals.get(globalTermOrd)) {
            continue;
        }
        final long bucketOrd = getBucketOrd(globalTermOrd);
        final int bucketDocCount = bucketOrd < 0 ? 0 : bucketDocCount(bucketOrd);
        if (bucketCountThresholds.getMinDocCount() > 0 && bucketDocCount == 0) {
            continue;
        }
        if (bucketDocCount < bucketCountThresholds.getShardMinDocCount()) {
            continue;
        }

        if (spare == null) {
            spare = new SignificantStringTerms.Bucket(new BytesRef(), 0, 0, 0, 0, null, format);
        }
        spare.bucketOrd = bucketOrd;
        copy(globalOrds.lookupOrd(globalTermOrd), spare.termBytes);
        spare.subsetDf = bucketDocCount;
        spare.subsetSize = subsetSize;
        spare.supersetDf = termsAggFactory.getBackgroundFrequency(spare.termBytes);
        spare.supersetSize = supersetSize;
        // During shard-local down-selection we use subset/superset stats
        // that are for this shard only
        // Back at the central reducer these properties will be updated with
        // global stats
        spare.updateScore(significanceHeuristic);
        spare = ordered.insertWithOverflow(spare);
    }

    final SignificantStringTerms.Bucket[] list = new SignificantStringTerms.Bucket[ordered.size()];
    for (int i = ordered.size() - 1; i >= 0; i--) {
        final SignificantStringTerms.Bucket bucket = ordered.pop();
        // the terms are owned by the BytesRefHash, we need to pull a copy since the BytesRef hash data may be recycled at some point
        bucket.termBytes = BytesRef.deepCopyOf(bucket.termBytes);
        bucket.aggregations = bucketAggregations(bucket.bucketOrd);
        list[i] = bucket;
    }

    return new SignificantStringTerms(name, bucketCountThresholds.getRequiredSize(),
            bucketCountThresholds.getMinDocCount(), pipelineAggregators(), metaData(), format, subsetSize,
            supersetSize, significanceHeuristic, Arrays.asList(list));
}

From source file:org.codelibs.elasticsearch.search.aggregations.bucket.significant.SignificantStringTermsAggregator.java

License:Apache License

@Override
public SignificantStringTerms buildAggregation(long owningBucketOrdinal) throws IOException {
    assert owningBucketOrdinal == 0;

    final int size = (int) Math.min(bucketOrds.size(), bucketCountThresholds.getShardSize());
    long supersetSize = termsAggFactory.getSupersetNumDocs();
    long subsetSize = numCollectedDocs;

    BucketSignificancePriorityQueue<SignificantStringTerms.Bucket> ordered = new BucketSignificancePriorityQueue<>(
            size);/*  w w w  .j a v  a2 s  . c  o  m*/
    SignificantStringTerms.Bucket spare = null;
    for (int i = 0; i < bucketOrds.size(); i++) {
        final int docCount = bucketDocCount(i);
        if (docCount < bucketCountThresholds.getShardMinDocCount()) {
            continue;
        }

        if (spare == null) {
            spare = new SignificantStringTerms.Bucket(new BytesRef(), 0, 0, 0, 0, null, format);
        }

        bucketOrds.get(i, spare.termBytes);
        spare.subsetDf = docCount;
        spare.subsetSize = subsetSize;
        spare.supersetDf = termsAggFactory.getBackgroundFrequency(spare.termBytes);
        spare.supersetSize = supersetSize;
        // During shard-local down-selection we use subset/superset stats
        // that are for this shard only
        // Back at the central reducer these properties will be updated with
        // global stats
        spare.updateScore(significanceHeuristic);

        spare.bucketOrd = i;
        spare = ordered.insertWithOverflow(spare);
    }

    final SignificantStringTerms.Bucket[] list = new SignificantStringTerms.Bucket[ordered.size()];
    for (int i = ordered.size() - 1; i >= 0; i--) {
        final SignificantStringTerms.Bucket bucket = ordered.pop();
        // the terms are owned by the BytesRefHash, we need to pull a copy since the BytesRef hash data may be recycled at some point
        bucket.termBytes = BytesRef.deepCopyOf(bucket.termBytes);
        bucket.aggregations = bucketAggregations(bucket.bucketOrd);
        list[i] = bucket;
    }

    return new SignificantStringTerms(name, bucketCountThresholds.getRequiredSize(),
            bucketCountThresholds.getMinDocCount(), pipelineAggregators(), metaData(), format, subsetSize,
            supersetSize, significanceHeuristic, Arrays.asList(list));
}

From source file:org.codelibs.elasticsearch.search.suggest.phrase.NoisyChannelSpellChecker.java

License:Apache License

public Result getCorrections(TokenStream stream, final CandidateGenerator generator, float maxErrors,
        int numCorrections, WordScorer wordScorer, float confidence, int gramSize) throws IOException {

    final List<CandidateSet> candidateSetsList = new ArrayList<>();
    DirectCandidateGenerator.analyze(stream, new DirectCandidateGenerator.TokenConsumer() {
        CandidateSet currentSet = null;//w  w w.  j  av a2  s .  c o m
        private TypeAttribute typeAttribute;
        private final BytesRefBuilder termsRef = new BytesRefBuilder();
        private boolean anyUnigram = false;
        private boolean anyTokens = false;

        @Override
        public void reset(TokenStream stream) {
            super.reset(stream);
            typeAttribute = stream.addAttribute(TypeAttribute.class);
        }

        @Override
        public void nextToken() throws IOException {
            anyTokens = true;
            BytesRef term = fillBytesRef(termsRef);
            if (requireUnigram && typeAttribute.type() == ShingleFilter.DEFAULT_TOKEN_TYPE) {
                return;
            }
            anyUnigram = true;
            if (posIncAttr.getPositionIncrement() == 0 && typeAttribute.type() == SynonymFilter.TYPE_SYNONYM) {
                assert currentSet != null;
                long freq = 0;
                if ((freq = generator.frequency(term)) > 0) {
                    currentSet.addOneCandidate(
                            generator.createCandidate(BytesRef.deepCopyOf(term), freq, realWordLikelihood));
                }
            } else {
                if (currentSet != null) {
                    candidateSetsList.add(currentSet);
                }
                currentSet = new CandidateSet(Candidate.EMPTY,
                        generator.createCandidate(BytesRef.deepCopyOf(term), true));
            }
        }

        @Override
        public void end() {
            if (currentSet != null) {
                candidateSetsList.add(currentSet);
            }
            if (requireUnigram && !anyUnigram && anyTokens) {
                throw new IllegalStateException("At least one unigram is required but all tokens were ngrams");
            }
        }
    });

    if (candidateSetsList.isEmpty() || candidateSetsList.size() >= tokenLimit) {
        return Result.EMPTY;
    }

    for (CandidateSet candidateSet : candidateSetsList) {
        generator.drawCandidates(candidateSet);
    }
    double cutoffScore = Double.MIN_VALUE;
    CandidateScorer scorer = new CandidateScorer(wordScorer, numCorrections, gramSize);
    CandidateSet[] candidateSets = candidateSetsList.toArray(new CandidateSet[candidateSetsList.size()]);
    if (confidence > 0.0) {
        Candidate[] candidates = new Candidate[candidateSets.length];
        for (int i = 0; i < candidates.length; i++) {
            candidates[i] = candidateSets[i].originalTerm;
        }
        double inputPhraseScore = scorer.score(candidates, candidateSets);
        cutoffScore = inputPhraseScore * confidence;
    }
    Correction[] bestCandidates = scorer.findBestCandiates(candidateSets, maxErrors, cutoffScore);

    return new Result(bestCandidates, cutoffScore);
}

From source file:org.codelibs.elasticsearch.search.suggest.term.TermSuggester.java

License:Apache License

private static List<Token> queryTerms(SuggestionContext suggestion, CharsRefBuilder spare) throws IOException {
    final List<Token> result = new ArrayList<>();
    final String field = suggestion.getField();
    DirectCandidateGenerator.analyze(suggestion.getAnalyzer(), suggestion.getText(), field,
            new DirectCandidateGenerator.TokenConsumer() {
                @Override//w  w w .  j  a  va 2s. co  m
                public void nextToken() {
                    Term term = new Term(field, BytesRef.deepCopyOf(fillBytesRef(new BytesRefBuilder())));
                    result.add(new Token(term, offsetAttr.startOffset(), offsetAttr.endOffset()));
                }
            }, spare);
    return result;
}