Example usage for org.apache.lucene.util BytesRef deepCopyOf

List of usage examples for org.apache.lucene.util BytesRef deepCopyOf

Introduction

In this page you can find the example usage for org.apache.lucene.util BytesRef deepCopyOf.

Prototype

public static BytesRef deepCopyOf(BytesRef other) 

Source Link

Document

Creates a new BytesRef that points to a copy of the bytes from other

The returned BytesRef will have a length of other.length and an offset of zero.

Usage

From source file:org.apache.solr.uninverting.TestFieldCache.java

License:Apache License

public void test() throws IOException {
    FieldCache cache = FieldCache.DEFAULT;
    NumericDocValues doubles = cache.getNumerics(reader, "theDouble", FieldCache.DOUBLE_POINT_PARSER);
    for (int i = 0; i < NUM_DOCS; i++) {
        assertEquals(i, doubles.nextDoc());
        assertEquals(Double.doubleToLongBits(Double.MAX_VALUE - i), doubles.longValue());
    }/*ww  w.  j a va2  s.  co  m*/

    NumericDocValues longs = cache.getNumerics(reader, "theLong", FieldCache.LONG_POINT_PARSER);
    for (int i = 0; i < NUM_DOCS; i++) {
        assertEquals(i, longs.nextDoc());
        assertEquals(Long.MAX_VALUE - i, longs.longValue());
    }

    NumericDocValues ints = cache.getNumerics(reader, "theInt", FieldCache.INT_POINT_PARSER);
    for (int i = 0; i < NUM_DOCS; i++) {
        assertEquals(i, ints.nextDoc());
        assertEquals(Integer.MAX_VALUE - i, ints.longValue());
    }

    NumericDocValues floats = cache.getNumerics(reader, "theFloat", FieldCache.FLOAT_POINT_PARSER);
    for (int i = 0; i < NUM_DOCS; i++) {
        assertEquals(i, floats.nextDoc());
        assertEquals(Float.floatToIntBits(Float.MAX_VALUE - i), floats.longValue());
    }

    Bits docsWithField = cache.getDocsWithField(reader, "theLong", FieldCache.LONG_POINT_PARSER);
    assertSame("Second request to cache return same array", docsWithField,
            cache.getDocsWithField(reader, "theLong", FieldCache.LONG_POINT_PARSER));
    assertTrue("docsWithField(theLong) must be class Bits.MatchAllBits",
            docsWithField instanceof Bits.MatchAllBits);
    assertTrue("docsWithField(theLong) Size: " + docsWithField.length() + " is not: " + NUM_DOCS,
            docsWithField.length() == NUM_DOCS);
    for (int i = 0; i < docsWithField.length(); i++) {
        assertTrue(docsWithField.get(i));
    }

    docsWithField = cache.getDocsWithField(reader, "sparse", FieldCache.INT_POINT_PARSER);
    assertSame("Second request to cache return same array", docsWithField,
            cache.getDocsWithField(reader, "sparse", FieldCache.INT_POINT_PARSER));
    assertFalse("docsWithField(sparse) must not be class Bits.MatchAllBits",
            docsWithField instanceof Bits.MatchAllBits);
    assertTrue("docsWithField(sparse) Size: " + docsWithField.length() + " is not: " + NUM_DOCS,
            docsWithField.length() == NUM_DOCS);
    for (int i = 0; i < docsWithField.length(); i++) {
        assertEquals(i % 2 == 0, docsWithField.get(i));
    }

    // getTermsIndex
    SortedDocValues termsIndex = cache.getTermsIndex(reader, "theRandomUnicodeString");
    for (int i = 0; i < NUM_DOCS; i++) {
        final String s;
        if (i > termsIndex.docID()) {
            termsIndex.advance(i);
        }
        if (i == termsIndex.docID()) {
            s = termsIndex.binaryValue().utf8ToString();
        } else {
            s = null;
        }
        assertTrue("for doc " + i + ": " + s + " does not equal: " + unicodeStrings[i],
                unicodeStrings[i] == null || unicodeStrings[i].equals(s));
    }

    int nTerms = termsIndex.getValueCount();

    TermsEnum tenum = termsIndex.termsEnum();
    for (int i = 0; i < nTerms; i++) {
        BytesRef val1 = BytesRef.deepCopyOf(tenum.next());
        final BytesRef val = termsIndex.lookupOrd(i);
        // System.out.println("i="+i);
        assertEquals(val, val1);
    }

    // seek the enum around (note this isn't a great test here)
    int num = atLeast(100);
    for (int i = 0; i < num; i++) {
        int k = random().nextInt(nTerms);
        final BytesRef val = BytesRef.deepCopyOf(termsIndex.lookupOrd(k));
        assertEquals(TermsEnum.SeekStatus.FOUND, tenum.seekCeil(val));
        assertEquals(val, tenum.term());
    }

    for (int i = 0; i < nTerms; i++) {
        final BytesRef val = BytesRef.deepCopyOf(termsIndex.lookupOrd(i));
        assertEquals(TermsEnum.SeekStatus.FOUND, tenum.seekCeil(val));
        assertEquals(val, tenum.term());
    }

    // test bad field
    termsIndex = cache.getTermsIndex(reader, "bogusfield");

    // getTerms
    BinaryDocValues terms = cache.getTerms(reader, "theRandomUnicodeString");
    for (int i = 0; i < NUM_DOCS; i++) {
        if (terms.docID() < i) {
            terms.nextDoc();
        }
        if (terms.docID() == i) {
            assertEquals(unicodeStrings[i], terms.binaryValue().utf8ToString());
        } else {
            assertNull(unicodeStrings[i]);
        }
    }

    // test bad field
    terms = cache.getTerms(reader, "bogusfield");

    // getDocTermOrds
    SortedSetDocValues termOrds = cache.getDocTermOrds(reader, "theRandomUnicodeMultiValuedField", null);
    int numEntries = cache.getCacheEntries().length;
    // ask for it again, and check that we didnt create any additional entries:
    termOrds = cache.getDocTermOrds(reader, "theRandomUnicodeMultiValuedField", null);
    assertEquals(numEntries, cache.getCacheEntries().length);

    for (int i = 0; i < NUM_DOCS; i++) {
        // This will remove identical terms. A DocTermOrds doesn't return duplicate ords for a docId
        List<BytesRef> values = new ArrayList<>(new LinkedHashSet<>(Arrays.asList(multiValued[i])));
        for (BytesRef v : values) {
            if (v == null) {
                // why does this test use null values... instead of an empty list: confusing
                break;
            }
            if (i > termOrds.docID()) {
                assertEquals(i, termOrds.nextDoc());
            }
            long ord = termOrds.nextOrd();
            assert ord != SortedSetDocValues.NO_MORE_ORDS;
            BytesRef scratch = termOrds.lookupOrd(ord);
            assertEquals(v, scratch);
        }
        if (i == termOrds.docID()) {
            assertEquals(SortedSetDocValues.NO_MORE_ORDS, termOrds.nextOrd());
        }
    }

    // test bad field
    termOrds = cache.getDocTermOrds(reader, "bogusfield", null);
    assertTrue(termOrds.getValueCount() == 0);

    FieldCache.DEFAULT.purgeByCacheKey(reader.getCoreCacheKey());
}

From source file:org.apache.solr.uninverting.TestFieldCacheVsDocValues.java

License:Apache License

private void assertEquals(int maxDoc, SortedDocValues expected, SortedDocValues actual) throws Exception {
    // can be null for the segment if no docs actually had any SortedDocValues
    // in this case FC.getDocTermsOrds returns EMPTY
    if (actual == null) {
        assertEquals(expected.getValueCount(), 0);
        return;/* w ww.  j a v a 2s .  c  o m*/
    }
    assertEquals(expected.getValueCount(), actual.getValueCount());

    // compare ord lists
    while (true) {
        int docID = expected.nextDoc();
        if (docID == NO_MORE_DOCS) {
            assertEquals(NO_MORE_DOCS, actual.nextDoc());
            break;
        }
        assertEquals(docID, actual.nextDoc());
        assertEquals(expected.ordValue(), actual.ordValue());
        assertEquals(expected.binaryValue(), actual.binaryValue());
    }

    // compare ord dictionary
    for (long i = 0; i < expected.getValueCount(); i++) {
        final BytesRef expectedBytes = BytesRef.deepCopyOf(expected.lookupOrd((int) i));
        final BytesRef actualBytes = actual.lookupOrd((int) i);
        assertEquals(expectedBytes, actualBytes);
    }

    // compare termsenum
    assertEquals(expected.getValueCount(), expected.termsEnum(), actual.termsEnum());
}

From source file:org.apache.solr.uninverting.TestFieldCacheVsDocValues.java

License:Apache License

private void assertEquals(int maxDoc, SortedSetDocValues expected, SortedSetDocValues actual) throws Exception {
    // can be null for the segment if no docs actually had any SortedDocValues
    // in this case FC.getDocTermsOrds returns EMPTY
    if (actual == null) {
        assertEquals(expected.getValueCount(), 0);
        return;/*  w  ww.  j a va 2s .c  om*/
    }
    assertEquals(expected.getValueCount(), actual.getValueCount());
    while (true) {
        int docID = expected.nextDoc();
        assertEquals(docID, actual.nextDoc());
        if (docID == NO_MORE_DOCS) {
            break;
        }
        long expectedOrd;
        while ((expectedOrd = expected.nextOrd()) != NO_MORE_ORDS) {
            assertEquals(expectedOrd, actual.nextOrd());
        }
        assertEquals(NO_MORE_ORDS, actual.nextOrd());
    }

    // compare ord dictionary
    for (long i = 0; i < expected.getValueCount(); i++) {
        final BytesRef expectedBytes = BytesRef.deepCopyOf(expected.lookupOrd(i));
        final BytesRef actualBytes = actual.lookupOrd(i);
        assertEquals(expectedBytes, actualBytes);
    }

    // compare termsenum
    assertEquals(expected.getValueCount(), expected.termsEnum(), actual.termsEnum());
}

From source file:org.codelibs.elasticsearch.common.bytes.BytesArray.java

License:Apache License

public BytesArray(BytesRef bytesRef, boolean deepCopy) {
    if (deepCopy) {
        bytesRef = BytesRef.deepCopyOf(bytesRef);
    }//from ww  w  . j  a  va  2  s  .  c  o m
    bytes = bytesRef.bytes;
    offset = bytesRef.offset;
    length = bytesRef.length;
}

From source file:org.codelibs.elasticsearch.common.bytes.BytesReference.java

License:Apache License

/**
 * Returns a compact array from the given BytesReference. The returned array won't be copied unless necessary. If you need
 * to modify the returned array use <tt>BytesRef.deepCopyOf(reference.toBytesRef()</tt> instead
 *//*from   w ww  . j  a v  a 2 s .com*/
public static byte[] toBytes(BytesReference reference) {
    final BytesRef bytesRef = reference.toBytesRef();
    if (bytesRef.offset == 0 && bytesRef.length == bytesRef.bytes.length) {
        return bytesRef.bytes;
    }
    return BytesRef.deepCopyOf(bytesRef).bytes;
}

From source file:org.codelibs.elasticsearch.index.query.WrapperQueryBuilder.java

License:Apache License

/**
 * Creates a query builder given a query provided as a {BytesReference}
 *///  ww w .j a  v  a  2  s.  c  om
public WrapperQueryBuilder(BytesReference source) {
    if (source == null || source.length() == 0) {
        throw new IllegalArgumentException("query source text cannot be null or empty");
    }
    this.source = BytesRef.deepCopyOf(source.toBytesRef()).bytes;
}

From source file:org.codelibs.elasticsearch.search.aggregations.bucket.significant.GlobalOrdinalsSignificantTermsAggregator.java

License:Apache License

@Override
public SignificantStringTerms buildAggregation(long owningBucketOrdinal) throws IOException {
    assert owningBucketOrdinal == 0;
    if (globalOrds == null) { // no context in this reader
        return buildEmptyAggregation();
    }//from www .  j  av  a 2  s .c  o  m

    final int size;
    if (bucketCountThresholds.getMinDocCount() == 0) {
        // if minDocCount == 0 then we can end up with more buckets then maxBucketOrd() returns
        size = (int) Math.min(globalOrds.getValueCount(), bucketCountThresholds.getShardSize());
    } else {
        size = (int) Math.min(maxBucketOrd(), bucketCountThresholds.getShardSize());
    }
    long supersetSize = termsAggFactory.getSupersetNumDocs();
    long subsetSize = numCollectedDocs;

    BucketSignificancePriorityQueue<SignificantStringTerms.Bucket> ordered = new BucketSignificancePriorityQueue<>(
            size);
    SignificantStringTerms.Bucket spare = null;
    for (long globalTermOrd = 0; globalTermOrd < globalOrds.getValueCount(); ++globalTermOrd) {
        if (includeExclude != null && !acceptedGlobalOrdinals.get(globalTermOrd)) {
            continue;
        }
        final long bucketOrd = getBucketOrd(globalTermOrd);
        final int bucketDocCount = bucketOrd < 0 ? 0 : bucketDocCount(bucketOrd);
        if (bucketCountThresholds.getMinDocCount() > 0 && bucketDocCount == 0) {
            continue;
        }
        if (bucketDocCount < bucketCountThresholds.getShardMinDocCount()) {
            continue;
        }

        if (spare == null) {
            spare = new SignificantStringTerms.Bucket(new BytesRef(), 0, 0, 0, 0, null, format);
        }
        spare.bucketOrd = bucketOrd;
        copy(globalOrds.lookupOrd(globalTermOrd), spare.termBytes);
        spare.subsetDf = bucketDocCount;
        spare.subsetSize = subsetSize;
        spare.supersetDf = termsAggFactory.getBackgroundFrequency(spare.termBytes);
        spare.supersetSize = supersetSize;
        // During shard-local down-selection we use subset/superset stats
        // that are for this shard only
        // Back at the central reducer these properties will be updated with
        // global stats
        spare.updateScore(significanceHeuristic);
        spare = ordered.insertWithOverflow(spare);
    }

    final SignificantStringTerms.Bucket[] list = new SignificantStringTerms.Bucket[ordered.size()];
    for (int i = ordered.size() - 1; i >= 0; i--) {
        final SignificantStringTerms.Bucket bucket = ordered.pop();
        // the terms are owned by the BytesRefHash, we need to pull a copy since the BytesRef hash data may be recycled at some point
        bucket.termBytes = BytesRef.deepCopyOf(bucket.termBytes);
        bucket.aggregations = bucketAggregations(bucket.bucketOrd);
        list[i] = bucket;
    }

    return new SignificantStringTerms(name, bucketCountThresholds.getRequiredSize(),
            bucketCountThresholds.getMinDocCount(), pipelineAggregators(), metaData(), format, subsetSize,
            supersetSize, significanceHeuristic, Arrays.asList(list));
}

From source file:org.codelibs.elasticsearch.search.aggregations.bucket.significant.SignificantStringTermsAggregator.java

License:Apache License

@Override
public SignificantStringTerms buildAggregation(long owningBucketOrdinal) throws IOException {
    assert owningBucketOrdinal == 0;

    final int size = (int) Math.min(bucketOrds.size(), bucketCountThresholds.getShardSize());
    long supersetSize = termsAggFactory.getSupersetNumDocs();
    long subsetSize = numCollectedDocs;

    BucketSignificancePriorityQueue<SignificantStringTerms.Bucket> ordered = new BucketSignificancePriorityQueue<>(
            size);/*  w w w  .j a v  a2 s  . c  o  m*/
    SignificantStringTerms.Bucket spare = null;
    for (int i = 0; i < bucketOrds.size(); i++) {
        final int docCount = bucketDocCount(i);
        if (docCount < bucketCountThresholds.getShardMinDocCount()) {
            continue;
        }

        if (spare == null) {
            spare = new SignificantStringTerms.Bucket(new BytesRef(), 0, 0, 0, 0, null, format);
        }

        bucketOrds.get(i, spare.termBytes);
        spare.subsetDf = docCount;
        spare.subsetSize = subsetSize;
        spare.supersetDf = termsAggFactory.getBackgroundFrequency(spare.termBytes);
        spare.supersetSize = supersetSize;
        // During shard-local down-selection we use subset/superset stats
        // that are for this shard only
        // Back at the central reducer these properties will be updated with
        // global stats
        spare.updateScore(significanceHeuristic);

        spare.bucketOrd = i;
        spare = ordered.insertWithOverflow(spare);
    }

    final SignificantStringTerms.Bucket[] list = new SignificantStringTerms.Bucket[ordered.size()];
    for (int i = ordered.size() - 1; i >= 0; i--) {
        final SignificantStringTerms.Bucket bucket = ordered.pop();
        // the terms are owned by the BytesRefHash, we need to pull a copy since the BytesRef hash data may be recycled at some point
        bucket.termBytes = BytesRef.deepCopyOf(bucket.termBytes);
        bucket.aggregations = bucketAggregations(bucket.bucketOrd);
        list[i] = bucket;
    }

    return new SignificantStringTerms(name, bucketCountThresholds.getRequiredSize(),
            bucketCountThresholds.getMinDocCount(), pipelineAggregators(), metaData(), format, subsetSize,
            supersetSize, significanceHeuristic, Arrays.asList(list));
}

From source file:org.codelibs.elasticsearch.search.suggest.phrase.NoisyChannelSpellChecker.java

License:Apache License

public Result getCorrections(TokenStream stream, final CandidateGenerator generator, float maxErrors,
        int numCorrections, WordScorer wordScorer, float confidence, int gramSize) throws IOException {

    final List<CandidateSet> candidateSetsList = new ArrayList<>();
    DirectCandidateGenerator.analyze(stream, new DirectCandidateGenerator.TokenConsumer() {
        CandidateSet currentSet = null;//w  w w.  j  av a2  s .  c o m
        private TypeAttribute typeAttribute;
        private final BytesRefBuilder termsRef = new BytesRefBuilder();
        private boolean anyUnigram = false;
        private boolean anyTokens = false;

        @Override
        public void reset(TokenStream stream) {
            super.reset(stream);
            typeAttribute = stream.addAttribute(TypeAttribute.class);
        }

        @Override
        public void nextToken() throws IOException {
            anyTokens = true;
            BytesRef term = fillBytesRef(termsRef);
            if (requireUnigram && typeAttribute.type() == ShingleFilter.DEFAULT_TOKEN_TYPE) {
                return;
            }
            anyUnigram = true;
            if (posIncAttr.getPositionIncrement() == 0 && typeAttribute.type() == SynonymFilter.TYPE_SYNONYM) {
                assert currentSet != null;
                long freq = 0;
                if ((freq = generator.frequency(term)) > 0) {
                    currentSet.addOneCandidate(
                            generator.createCandidate(BytesRef.deepCopyOf(term), freq, realWordLikelihood));
                }
            } else {
                if (currentSet != null) {
                    candidateSetsList.add(currentSet);
                }
                currentSet = new CandidateSet(Candidate.EMPTY,
                        generator.createCandidate(BytesRef.deepCopyOf(term), true));
            }
        }

        @Override
        public void end() {
            if (currentSet != null) {
                candidateSetsList.add(currentSet);
            }
            if (requireUnigram && !anyUnigram && anyTokens) {
                throw new IllegalStateException("At least one unigram is required but all tokens were ngrams");
            }
        }
    });

    if (candidateSetsList.isEmpty() || candidateSetsList.size() >= tokenLimit) {
        return Result.EMPTY;
    }

    for (CandidateSet candidateSet : candidateSetsList) {
        generator.drawCandidates(candidateSet);
    }
    double cutoffScore = Double.MIN_VALUE;
    CandidateScorer scorer = new CandidateScorer(wordScorer, numCorrections, gramSize);
    CandidateSet[] candidateSets = candidateSetsList.toArray(new CandidateSet[candidateSetsList.size()]);
    if (confidence > 0.0) {
        Candidate[] candidates = new Candidate[candidateSets.length];
        for (int i = 0; i < candidates.length; i++) {
            candidates[i] = candidateSets[i].originalTerm;
        }
        double inputPhraseScore = scorer.score(candidates, candidateSets);
        cutoffScore = inputPhraseScore * confidence;
    }
    Correction[] bestCandidates = scorer.findBestCandiates(candidateSets, maxErrors, cutoffScore);

    return new Result(bestCandidates, cutoffScore);
}

From source file:org.codelibs.elasticsearch.search.suggest.term.TermSuggester.java

License:Apache License

private static List<Token> queryTerms(SuggestionContext suggestion, CharsRefBuilder spare) throws IOException {
    final List<Token> result = new ArrayList<>();
    final String field = suggestion.getField();
    DirectCandidateGenerator.analyze(suggestion.getAnalyzer(), suggestion.getText(), field,
            new DirectCandidateGenerator.TokenConsumer() {
                @Override//w  w w .  j  a  va 2s. co  m
                public void nextToken() {
                    Term term = new Term(field, BytesRef.deepCopyOf(fillBytesRef(new BytesRefBuilder())));
                    result.add(new Token(term, offsetAttr.startOffset(), offsetAttr.endOffset()));
                }
            }, spare);
    return result;
}