Example usage for org.apache.lucene.util BytesRef BytesRef

Introduction

In this page you can find the example usage for org.apache.lucene.util BytesRef BytesRef.

Prototype

public BytesRef(byte[] bytes, int offset, int length)

Source Link

Document

This instance will directly reference bytes w/o making a copy.

Usage

From source file:com.rocana.lucene.codec.v1.RocanaSegmentTermsEnum.java

License:Apache License

@SuppressWarnings("unused")
private void printSeekState(PrintStream out) throws IOException {
    if (currentFrame == staticFrame) {
        out.println("  no prior seek");
    } else {//from w ww .  jav  a2s. c  om
        out.println("  prior seek state:");
        int ord = 0;
        boolean isSeekFrame = true;
        while (true) {
            RocanaSegmentTermsEnumFrame f = getFrame(ord);
            assert f != null;
            final BytesRef prefix = new BytesRef(term.get().bytes, 0, f.prefix);
            if (f.nextEnt == -1) {
                out.println("    frame " + (isSeekFrame ? "(seek)" : "(next)") + " ord=" + ord + " fp=" + f.fp
                        + (f.isFloor ? (" (fpOrig=" + f.fpOrig + ")") : "") + " prefixLen=" + f.prefix
                        + " prefix=" + prefix + (f.nextEnt == -1 ? "" : (" (of " + f.entCount + ")"))
                        + " hasTerms=" + f.hasTerms + " isFloor=" + f.isFloor + " code="
                        + ((f.fp << RocanaBlockTreeTermsReader.OUTPUT_FLAGS_NUM_BITS)
                                + (f.hasTerms ? RocanaBlockTreeTermsReader.OUTPUT_FLAG_HAS_TERMS : 0)
                                + (f.isFloor ? RocanaBlockTreeTermsReader.OUTPUT_FLAG_IS_FLOOR : 0))
                        + " isLastInFloor=" + f.isLastInFloor + " mdUpto=" + f.metaDataUpto + " tbOrd="
                        + f.getTermBlockOrd());
            } else {
                out.println("    frame " + (isSeekFrame ? "(seek, loaded)" : "(next, loaded)") + " ord=" + ord
                        + " fp=" + f.fp + (f.isFloor ? (" (fpOrig=" + f.fpOrig + ")") : "") + " prefixLen="
                        + f.prefix + " prefix=" + prefix + " nextEnt=" + f.nextEnt
                        + (f.nextEnt == -1 ? "" : (" (of " + f.entCount + ")")) + " hasTerms=" + f.hasTerms
                        + " isFloor=" + f.isFloor + " code="
                        + ((f.fp << RocanaBlockTreeTermsReader.OUTPUT_FLAGS_NUM_BITS)
                                + (f.hasTerms ? RocanaBlockTreeTermsReader.OUTPUT_FLAG_HAS_TERMS : 0)
                                + (f.isFloor ? RocanaBlockTreeTermsReader.OUTPUT_FLAG_IS_FLOOR : 0))
                        + " lastSubFP=" + f.lastSubFP + " isLastInFloor=" + f.isLastInFloor + " mdUpto="
                        + f.metaDataUpto + " tbOrd=" + f.getTermBlockOrd());
            }
            if (fr.index != null) {
                assert !isSeekFrame || f.arc != null : "isSeekFrame=" + isSeekFrame + " f.arc=" + f.arc;
                if (f.prefix > 0 && isSeekFrame && f.arc.label != (term.byteAt(f.prefix - 1) & 0xFF)) {
                    out.println("      broken seek state: arc.label=" + (char) f.arc.label + " vs term byte="
                            + (char) (term.byteAt(f.prefix - 1) & 0xFF));
                    throw new RuntimeException("seek state is broken");
                }
                BytesRef output = Util.get(fr.index, prefix);
                if (output == null) {
                    out.println("      broken seek state: prefix is not final in index");
                    throw new RuntimeException("seek state is broken");
                } else if (isSeekFrame && !f.isFloor) {
                    final ByteArrayDataInput reader = new ByteArrayDataInput(output.bytes, output.offset,
                            output.length);
                    final long codeOrig = reader.readVLong();
                    final long code = (f.fp << RocanaBlockTreeTermsReader.OUTPUT_FLAGS_NUM_BITS)
                            | (f.hasTerms ? RocanaBlockTreeTermsReader.OUTPUT_FLAG_HAS_TERMS : 0)
                            | (f.isFloor ? RocanaBlockTreeTermsReader.OUTPUT_FLAG_IS_FLOOR : 0);
                    if (codeOrig != code) {
                        out.println("      broken seek state: output code=" + codeOrig
                                + " doesn't match frame code=" + code);
                        throw new RuntimeException("seek state is broken");
                    }
                }
            }
            if (f == currentFrame) {
                break;
            }
            if (f.prefix == validIndexPrefix) {
                isSeekFrame = false;
            }
            ord++;
        }
    }
}

From source file:com.rocana.lucene.codec.v1.TestBlockPostingsFormat3.java

License:Apache License

private void assertTermsSeeking(Terms leftTerms, Terms rightTerms) throws Exception {
    TermsEnum leftEnum = null;/*from   ww w  .ja va 2 s . c  o  m*/
    TermsEnum rightEnum = null;

    // just an upper bound
    int numTests = atLeast(20);
    Random random = random();

    // collect this number of terms from the left side
    HashSet<BytesRef> tests = new HashSet<>();
    int numPasses = 0;
    while (numPasses < 10 && tests.size() < numTests) {
        leftEnum = leftTerms.iterator();
        BytesRef term = null;
        while ((term = leftEnum.next()) != null) {
            int code = random.nextInt(10);
            if (code == 0) {
                // the term
                tests.add(BytesRef.deepCopyOf(term));
            } else if (code == 1) {
                // truncated subsequence of term
                term = BytesRef.deepCopyOf(term);
                if (term.length > 0) {
                    // truncate it
                    term.length = random.nextInt(term.length);
                }
            } else if (code == 2) {
                // term, but ensure a non-zero offset
                byte newbytes[] = new byte[term.length + 5];
                System.arraycopy(term.bytes, term.offset, newbytes, 5, term.length);
                tests.add(new BytesRef(newbytes, 5, term.length));
            }
        }
        numPasses++;
    }

    ArrayList<BytesRef> shuffledTests = new ArrayList<>(tests);
    Collections.shuffle(shuffledTests, random);

    for (BytesRef b : shuffledTests) {
        leftEnum = leftTerms.iterator();
        rightEnum = rightTerms.iterator();

        assertEquals(leftEnum.seekExact(b), rightEnum.seekExact(b));
        assertEquals(leftEnum.seekExact(b), rightEnum.seekExact(b));

        SeekStatus leftStatus;
        SeekStatus rightStatus;

        leftStatus = leftEnum.seekCeil(b);
        rightStatus = rightEnum.seekCeil(b);
        assertEquals(leftStatus, rightStatus);
        if (leftStatus != SeekStatus.END) {
            assertEquals(leftEnum.term(), rightEnum.term());
        }

        leftStatus = leftEnum.seekCeil(b);
        rightStatus = rightEnum.seekCeil(b);
        assertEquals(leftStatus, rightStatus);
        if (leftStatus != SeekStatus.END) {
            assertEquals(leftEnum.term(), rightEnum.term());
        }
    }
}

From source file:com.tcdi.zombodb.query.VisibilityQueryHelper.java

License:Apache License

static Map<Integer, FixedBitSet> determineVisibility(final Query query, final String field, final long myXid,
        final long xmin, final long xmax, final Set<Long> activeXids, IndexSearcher searcher,
        List<BytesRef> updatedCtids) throws IOException {
    final Map<Integer, FixedBitSet> visibilityBitSets = new HashMap<>();

    if (updatedCtids.size() == 0)
        return visibilityBitSets;

    ///*from  w  w  w  .  j a  v a  2s  . co m*/
    // build a map of {@link VisibilityInfo} objects by each _prev_ctid
    //
    // We use XConstantScoreQuery here so that we exclude deleted docs
    //

    final Map<BytesRef, List<VisibilityInfo>> map = new HashMap<>();
    searcher.search(
            new XConstantScoreQuery(
                    SearchContext.current().filterCache().cache(new TermsFilter(field, updatedCtids))),
            new ZomboDBTermsCollector(field) {
                private SortedDocValues prevCtids;
                private SortedNumericDocValues xids;
                private SortedNumericDocValues sequence;
                private int ord;
                private int maxdoc;

                @Override
                public void collect(int doc) throws IOException {
                    xids.setDocument(doc);
                    sequence.setDocument(doc);

                    long xid = xids.valueAt(0);
                    long seq = sequence.valueAt(0);
                    BytesRef prevCtid = prevCtids.get(doc);

                    List<VisibilityInfo> matchingDocs = map.get(prevCtid);

                    if (matchingDocs == null)
                        map.put(BytesRef.deepCopyOf(prevCtid), matchingDocs = new ArrayList<>());
                    matchingDocs.add(new VisibilityInfo(ord, maxdoc, doc, xid, seq));
                }

                @Override
                public void setNextReader(AtomicReaderContext context) throws IOException {
                    prevCtids = FieldCache.DEFAULT.getTermsIndex(context.reader(), field);
                    xids = context.reader().getSortedNumericDocValues("_xid");
                    sequence = context.reader().getSortedNumericDocValues("_zdb_seq");
                    ord = context.ord;
                    maxdoc = context.reader().maxDoc();
                }
            });

    if (map.isEmpty())
        return visibilityBitSets;

    //
    // pick out the first VisibilityInfo for each document that is visible & committed
    // and build a FixedBitSet for each reader 'ord' that contains visible
    // documents.  A map of these (key'd on reader ord) is what we return.
    //

    BytesRefBuilder bytesRefBuilder = new BytesRefBuilder() {
        /* overloaded to avoid making a copy of the byte array */
        @Override
        public BytesRef toBytesRef() {
            return new BytesRef(this.bytes(), 0, this.length());
        }
    };

    Terms committedXidsTerms = MultiFields.getFields(searcher.getIndexReader()).terms("_zdb_committed_xid");
    TermsEnum committedXidsEnum = committedXidsTerms == null ? null : committedXidsTerms.iterator(null);
    for (List<VisibilityInfo> visibility : map.values()) {
        CollectionUtil.introSort(visibility, new Comparator<VisibilityInfo>() {
            @Override
            public int compare(VisibilityInfo o1, VisibilityInfo o2) {
                int cmp = Long.compare(o2.xid, o1.xid);
                return cmp == 0 ? Long.compare(o2.sequence, o1.sequence) : cmp;
            }
        });

        boolean foundVisible = false;
        for (VisibilityInfo mapping : visibility) {

            if (foundVisible || mapping.xid > xmax || activeXids.contains(mapping.xid) || (mapping.xid != myXid
                    && !isCommitted(committedXidsEnum, mapping.xid, bytesRefBuilder))) {
                // document is not visible to us
                FixedBitSet visibilityBitset = visibilityBitSets.get(mapping.readerOrd);
                if (visibilityBitset == null)
                    visibilityBitSets.put(mapping.readerOrd,
                            visibilityBitset = new FixedBitSet(mapping.maxdoc));
                visibilityBitset.set(mapping.docid);
            } else {
                foundVisible = true;
            }
        }
    }

    return visibilityBitSets;
}

From source file:com.tuplejump.stargate.lucene.LuceneUtils.java

License:Apache License

public static Field idDocValue(final ByteBuffer byteBufferValue) {
    BytesRef bytesRef = new BytesRef(byteBufferValue.array(), byteBufferValue.arrayOffset(),
            byteBufferValue.limit());/*from  w  w w. j  a  va 2 s. c  o  m*/
    return new SortedDocValuesField(PK_NAME_DOC_VAL, bytesRef);
}

From source file:edu.upenn.library.solrplugins.CaseInsensitiveSortingTextField.java

License:Apache License

@Override
public CharsRef indexedToNormalized(BytesRef input, CharsRefBuilder output) {
    int endIndex = delimOffset(input);
    if (endIndex < 0) {
        return super.indexedToReadable(input, output);
    } else {/*from  w  ww  .j  a  v  a 2 s. c o  m*/
        return super.indexedToReadable(new BytesRef(input.bytes, input.offset, endIndex - input.offset),
                output);
    }
}

From source file:io.crate.expression.scalar.SubstrFunction.java

License:Apache License

@VisibleForTesting
static BytesRef substring(BytesRef utf8, int begin, int end) {
    int pos = utf8.offset;
    final int limit = pos + utf8.length;
    final byte[] bytes = utf8.bytes;
    int posBegin = pos;

    int codePointCount = 0;
    for (; pos < limit; codePointCount++) {
        if (codePointCount == begin) {
            posBegin = pos;//from  w w  w .  ja v  a 2s.  c o  m
        }
        if (codePointCount == end) {
            break;
        }

        int v = bytes[pos] & 0xFF;
        if (v < /* 0xxx xxxx */ 0x80) {
            pos += 1;
            continue;
        }
        if (v >= /* 110x xxxx */ 0xc0) {
            if (v < /* 111x xxxx */ 0xe0) {
                pos += 2;
                continue;
            }
            if (v < /* 1111 xxxx */ 0xf0) {
                pos += 3;
                continue;
            }
            if (v < /* 1111 1xxx */ 0xf8) {
                pos += 4;
                continue;
            }
            // fallthrough, consider 5 and 6 byte sequences invalid.
        }

        // Anything not covered above is invalid UTF8.
        throw new IllegalArgumentException("substr: invalid UTF8 string found.");
    }

    // Check if we didn't go over the limit on the last character.
    if (pos > limit)
        throw new IllegalArgumentException("begin index must not be > end index");
    return new BytesRef(bytes, posBegin, pos - posBegin);
}