Example usage for org.apache.lucene.util BytesRef BytesRef

List of usage examples for org.apache.lucene.util BytesRef BytesRef

Introduction

In this page you can find the example usage for org.apache.lucene.util BytesRef BytesRef.

Prototype

public BytesRef(byte[] bytes, int offset, int length) 

Source Link

Document

This instance will directly reference bytes w/o making a copy.

Usage

From source file:com.rocana.lucene.codec.v1.RocanaSegmentTermsEnum.java

License:Apache License

@SuppressWarnings("unused")
private void printSeekState(PrintStream out) throws IOException {
    if (currentFrame == staticFrame) {
        out.println("  no prior seek");
    } else {//from w ww .  jav  a2s. c  om
        out.println("  prior seek state:");
        int ord = 0;
        boolean isSeekFrame = true;
        while (true) {
            RocanaSegmentTermsEnumFrame f = getFrame(ord);
            assert f != null;
            final BytesRef prefix = new BytesRef(term.get().bytes, 0, f.prefix);
            if (f.nextEnt == -1) {
                out.println("    frame " + (isSeekFrame ? "(seek)" : "(next)") + " ord=" + ord + " fp=" + f.fp
                        + (f.isFloor ? (" (fpOrig=" + f.fpOrig + ")") : "") + " prefixLen=" + f.prefix
                        + " prefix=" + prefix + (f.nextEnt == -1 ? "" : (" (of " + f.entCount + ")"))
                        + " hasTerms=" + f.hasTerms + " isFloor=" + f.isFloor + " code="
                        + ((f.fp << RocanaBlockTreeTermsReader.OUTPUT_FLAGS_NUM_BITS)
                                + (f.hasTerms ? RocanaBlockTreeTermsReader.OUTPUT_FLAG_HAS_TERMS : 0)
                                + (f.isFloor ? RocanaBlockTreeTermsReader.OUTPUT_FLAG_IS_FLOOR : 0))
                        + " isLastInFloor=" + f.isLastInFloor + " mdUpto=" + f.metaDataUpto + " tbOrd="
                        + f.getTermBlockOrd());
            } else {
                out.println("    frame " + (isSeekFrame ? "(seek, loaded)" : "(next, loaded)") + " ord=" + ord
                        + " fp=" + f.fp + (f.isFloor ? (" (fpOrig=" + f.fpOrig + ")") : "") + " prefixLen="
                        + f.prefix + " prefix=" + prefix + " nextEnt=" + f.nextEnt
                        + (f.nextEnt == -1 ? "" : (" (of " + f.entCount + ")")) + " hasTerms=" + f.hasTerms
                        + " isFloor=" + f.isFloor + " code="
                        + ((f.fp << RocanaBlockTreeTermsReader.OUTPUT_FLAGS_NUM_BITS)
                                + (f.hasTerms ? RocanaBlockTreeTermsReader.OUTPUT_FLAG_HAS_TERMS : 0)
                                + (f.isFloor ? RocanaBlockTreeTermsReader.OUTPUT_FLAG_IS_FLOOR : 0))
                        + " lastSubFP=" + f.lastSubFP + " isLastInFloor=" + f.isLastInFloor + " mdUpto="
                        + f.metaDataUpto + " tbOrd=" + f.getTermBlockOrd());
            }
            if (fr.index != null) {
                assert !isSeekFrame || f.arc != null : "isSeekFrame=" + isSeekFrame + " f.arc=" + f.arc;
                if (f.prefix > 0 && isSeekFrame && f.arc.label != (term.byteAt(f.prefix - 1) & 0xFF)) {
                    out.println("      broken seek state: arc.label=" + (char) f.arc.label + " vs term byte="
                            + (char) (term.byteAt(f.prefix - 1) & 0xFF));
                    throw new RuntimeException("seek state is broken");
                }
                BytesRef output = Util.get(fr.index, prefix);
                if (output == null) {
                    out.println("      broken seek state: prefix is not final in index");
                    throw new RuntimeException("seek state is broken");
                } else if (isSeekFrame && !f.isFloor) {
                    final ByteArrayDataInput reader = new ByteArrayDataInput(output.bytes, output.offset,
                            output.length);
                    final long codeOrig = reader.readVLong();
                    final long code = (f.fp << RocanaBlockTreeTermsReader.OUTPUT_FLAGS_NUM_BITS)
                            | (f.hasTerms ? RocanaBlockTreeTermsReader.OUTPUT_FLAG_HAS_TERMS : 0)
                            | (f.isFloor ? RocanaBlockTreeTermsReader.OUTPUT_FLAG_IS_FLOOR : 0);
                    if (codeOrig != code) {
                        out.println("      broken seek state: output code=" + codeOrig
                                + " doesn't match frame code=" + code);
                        throw new RuntimeException("seek state is broken");
                    }
                }
            }
            if (f == currentFrame) {
                break;
            }
            if (f.prefix == validIndexPrefix) {
                isSeekFrame = false;
            }
            ord++;
        }
    }
}

From source file:com.rocana.lucene.codec.v1.TestBlockPostingsFormat3.java

License:Apache License

private void assertTermsSeeking(Terms leftTerms, Terms rightTerms) throws Exception {
    TermsEnum leftEnum = null;/*from   ww w  .ja va 2 s . c  o  m*/
    TermsEnum rightEnum = null;

    // just an upper bound
    int numTests = atLeast(20);
    Random random = random();

    // collect this number of terms from the left side
    HashSet<BytesRef> tests = new HashSet<>();
    int numPasses = 0;
    while (numPasses < 10 && tests.size() < numTests) {
        leftEnum = leftTerms.iterator();
        BytesRef term = null;
        while ((term = leftEnum.next()) != null) {
            int code = random.nextInt(10);
            if (code == 0) {
                // the term
                tests.add(BytesRef.deepCopyOf(term));
            } else if (code == 1) {
                // truncated subsequence of term
                term = BytesRef.deepCopyOf(term);
                if (term.length > 0) {
                    // truncate it
                    term.length = random.nextInt(term.length);
                }
            } else if (code == 2) {
                // term, but ensure a non-zero offset
                byte newbytes[] = new byte[term.length + 5];
                System.arraycopy(term.bytes, term.offset, newbytes, 5, term.length);
                tests.add(new BytesRef(newbytes, 5, term.length));
            }
        }
        numPasses++;
    }

    ArrayList<BytesRef> shuffledTests = new ArrayList<>(tests);
    Collections.shuffle(shuffledTests, random);

    for (BytesRef b : shuffledTests) {
        leftEnum = leftTerms.iterator();
        rightEnum = rightTerms.iterator();

        assertEquals(leftEnum.seekExact(b), rightEnum.seekExact(b));
        assertEquals(leftEnum.seekExact(b), rightEnum.seekExact(b));

        SeekStatus leftStatus;
        SeekStatus rightStatus;

        leftStatus = leftEnum.seekCeil(b);
        rightStatus = rightEnum.seekCeil(b);
        assertEquals(leftStatus, rightStatus);
        if (leftStatus != SeekStatus.END) {
            assertEquals(leftEnum.term(), rightEnum.term());
        }

        leftStatus = leftEnum.seekCeil(b);
        rightStatus = rightEnum.seekCeil(b);
        assertEquals(leftStatus, rightStatus);
        if (leftStatus != SeekStatus.END) {
            assertEquals(leftEnum.term(), rightEnum.term());
        }
    }
}

From source file:com.tcdi.zombodb.query.VisibilityQueryHelper.java

License:Apache License

static Map<Integer, FixedBitSet> determineVisibility(final Query query, final String field, final long myXid,
        final long xmin, final long xmax, final Set<Long> activeXids, IndexSearcher searcher,
        List<BytesRef> updatedCtids) throws IOException {
    final Map<Integer, FixedBitSet> visibilityBitSets = new HashMap<>();

    if (updatedCtids.size() == 0)
        return visibilityBitSets;

    ///*from  w  w  w  .  j a  v a  2s  . co m*/
    // build a map of {@link VisibilityInfo} objects by each _prev_ctid
    //
    // We use XConstantScoreQuery here so that we exclude deleted docs
    //

    final Map<BytesRef, List<VisibilityInfo>> map = new HashMap<>();
    searcher.search(
            new XConstantScoreQuery(
                    SearchContext.current().filterCache().cache(new TermsFilter(field, updatedCtids))),
            new ZomboDBTermsCollector(field) {
                private SortedDocValues prevCtids;
                private SortedNumericDocValues xids;
                private SortedNumericDocValues sequence;
                private int ord;
                private int maxdoc;

                @Override
                public void collect(int doc) throws IOException {
                    xids.setDocument(doc);
                    sequence.setDocument(doc);

                    long xid = xids.valueAt(0);
                    long seq = sequence.valueAt(0);
                    BytesRef prevCtid = prevCtids.get(doc);

                    List<VisibilityInfo> matchingDocs = map.get(prevCtid);

                    if (matchingDocs == null)
                        map.put(BytesRef.deepCopyOf(prevCtid), matchingDocs = new ArrayList<>());
                    matchingDocs.add(new VisibilityInfo(ord, maxdoc, doc, xid, seq));
                }

                @Override
                public void setNextReader(AtomicReaderContext context) throws IOException {
                    prevCtids = FieldCache.DEFAULT.getTermsIndex(context.reader(), field);
                    xids = context.reader().getSortedNumericDocValues("_xid");
                    sequence = context.reader().getSortedNumericDocValues("_zdb_seq");
                    ord = context.ord;
                    maxdoc = context.reader().maxDoc();
                }
            });

    if (map.isEmpty())
        return visibilityBitSets;

    //
    // pick out the first VisibilityInfo for each document that is visible & committed
    // and build a FixedBitSet for each reader 'ord' that contains visible
    // documents.  A map of these (key'd on reader ord) is what we return.
    //

    BytesRefBuilder bytesRefBuilder = new BytesRefBuilder() {
        /* overloaded to avoid making a copy of the byte array */
        @Override
        public BytesRef toBytesRef() {
            return new BytesRef(this.bytes(), 0, this.length());
        }
    };

    Terms committedXidsTerms = MultiFields.getFields(searcher.getIndexReader()).terms("_zdb_committed_xid");
    TermsEnum committedXidsEnum = committedXidsTerms == null ? null : committedXidsTerms.iterator(null);
    for (List<VisibilityInfo> visibility : map.values()) {
        CollectionUtil.introSort(visibility, new Comparator<VisibilityInfo>() {
            @Override
            public int compare(VisibilityInfo o1, VisibilityInfo o2) {
                int cmp = Long.compare(o2.xid, o1.xid);
                return cmp == 0 ? Long.compare(o2.sequence, o1.sequence) : cmp;
            }
        });

        boolean foundVisible = false;
        for (VisibilityInfo mapping : visibility) {

            if (foundVisible || mapping.xid > xmax || activeXids.contains(mapping.xid) || (mapping.xid != myXid
                    && !isCommitted(committedXidsEnum, mapping.xid, bytesRefBuilder))) {
                // document is not visible to us
                FixedBitSet visibilityBitset = visibilityBitSets.get(mapping.readerOrd);
                if (visibilityBitset == null)
                    visibilityBitSets.put(mapping.readerOrd,
                            visibilityBitset = new FixedBitSet(mapping.maxdoc));
                visibilityBitset.set(mapping.docid);
            } else {
                foundVisible = true;
            }
        }
    }

    return visibilityBitSets;
}

From source file:com.tuplejump.stargate.lucene.LuceneUtils.java

License:Apache License

public static Field idDocValue(final ByteBuffer byteBufferValue) {
    BytesRef bytesRef = new BytesRef(byteBufferValue.array(), byteBufferValue.arrayOffset(),
            byteBufferValue.limit());/*from  w  w w. j  a  va 2 s. c  o  m*/
    return new SortedDocValuesField(PK_NAME_DOC_VAL, bytesRef);
}

From source file:edu.upenn.library.solrplugins.CaseInsensitiveSortingTextField.java

License:Apache License

@Override
public CharsRef indexedToNormalized(BytesRef input, CharsRefBuilder output) {
    int endIndex = delimOffset(input);
    if (endIndex < 0) {
        return super.indexedToReadable(input, output);
    } else {/*from  w  ww  .j  a  v  a 2 s. c o  m*/
        return super.indexedToReadable(new BytesRef(input.bytes, input.offset, endIndex - input.offset),
                output);
    }
}

From source file:io.crate.expression.scalar.SubstrFunction.java

License:Apache License

@VisibleForTesting
static BytesRef substring(BytesRef utf8, int begin, int end) {
    int pos = utf8.offset;
    final int limit = pos + utf8.length;
    final byte[] bytes = utf8.bytes;
    int posBegin = pos;

    int codePointCount = 0;
    for (; pos < limit; codePointCount++) {
        if (codePointCount == begin) {
            posBegin = pos;//from  w w  w .  ja v  a 2s.  c o  m
        }
        if (codePointCount == end) {
            break;
        }

        int v = bytes[pos] & 0xFF;
        if (v < /* 0xxx xxxx */ 0x80) {
            pos += 1;
            continue;
        }
        if (v >= /* 110x xxxx */ 0xc0) {
            if (v < /* 111x xxxx */ 0xe0) {
                pos += 2;
                continue;
            }
            if (v < /* 1111 xxxx */ 0xf0) {
                pos += 3;
                continue;
            }
            if (v < /* 1111 1xxx */ 0xf8) {
                pos += 4;
                continue;
            }
            // fallthrough, consider 5 and 6 byte sequences invalid.
        }

        // Anything not covered above is invalid UTF8.
        throw new IllegalArgumentException("substr: invalid UTF8 string found.");
    }

    // Check if we didn't go over the limit on the last character.
    if (pos > limit)
        throw new IllegalArgumentException("begin index must not be > end index");
    return new BytesRef(bytes, posBegin, pos - posBegin);
}