List of usage examples for org.apache.lucene.util BytesRef BytesRef
public BytesRef(byte[] bytes, int offset, int length)
From source file:com.rocana.lucene.codec.v1.RocanaSegmentTermsEnum.java
License:Apache License
@SuppressWarnings("unused") private void printSeekState(PrintStream out) throws IOException { if (currentFrame == staticFrame) { out.println(" no prior seek"); } else {//from w ww . jav a2s. c om out.println(" prior seek state:"); int ord = 0; boolean isSeekFrame = true; while (true) { RocanaSegmentTermsEnumFrame f = getFrame(ord); assert f != null; final BytesRef prefix = new BytesRef(term.get().bytes, 0, f.prefix); if (f.nextEnt == -1) { out.println(" frame " + (isSeekFrame ? "(seek)" : "(next)") + " ord=" + ord + " fp=" + f.fp + (f.isFloor ? (" (fpOrig=" + f.fpOrig + ")") : "") + " prefixLen=" + f.prefix + " prefix=" + prefix + (f.nextEnt == -1 ? "" : (" (of " + f.entCount + ")")) + " hasTerms=" + f.hasTerms + " isFloor=" + f.isFloor + " code=" + ((f.fp << RocanaBlockTreeTermsReader.OUTPUT_FLAGS_NUM_BITS) + (f.hasTerms ? RocanaBlockTreeTermsReader.OUTPUT_FLAG_HAS_TERMS : 0) + (f.isFloor ? RocanaBlockTreeTermsReader.OUTPUT_FLAG_IS_FLOOR : 0)) + " isLastInFloor=" + f.isLastInFloor + " mdUpto=" + f.metaDataUpto + " tbOrd=" + f.getTermBlockOrd()); } else { out.println(" frame " + (isSeekFrame ? "(seek, loaded)" : "(next, loaded)") + " ord=" + ord + " fp=" + f.fp + (f.isFloor ? (" (fpOrig=" + f.fpOrig + ")") : "") + " prefixLen=" + f.prefix + " prefix=" + prefix + " nextEnt=" + f.nextEnt + (f.nextEnt == -1 ? "" : (" (of " + f.entCount + ")")) + " hasTerms=" + f.hasTerms + " isFloor=" + f.isFloor + " code=" + ((f.fp << RocanaBlockTreeTermsReader.OUTPUT_FLAGS_NUM_BITS) + (f.hasTerms ? RocanaBlockTreeTermsReader.OUTPUT_FLAG_HAS_TERMS : 0) + (f.isFloor ? RocanaBlockTreeTermsReader.OUTPUT_FLAG_IS_FLOOR : 0)) + " lastSubFP=" + f.lastSubFP + " isLastInFloor=" + f.isLastInFloor + " mdUpto=" + f.metaDataUpto + " tbOrd=" + f.getTermBlockOrd()); } if (fr.index != null) { assert !isSeekFrame || f.arc != null : "isSeekFrame=" + isSeekFrame + " f.arc=" + f.arc; if (f.prefix > 0 && isSeekFrame && f.arc.label != (term.byteAt(f.prefix - 1) & 0xFF)) { out.println(" broken seek state: arc.label=" + (char) f.arc.label + " vs term byte=" + (char) (term.byteAt(f.prefix - 1) & 0xFF)); throw new RuntimeException("seek state is broken"); } BytesRef output = Util.get(fr.index, prefix); if (output == null) { out.println(" broken seek state: prefix is not final in index"); throw new RuntimeException("seek state is broken"); } else if (isSeekFrame && !f.isFloor) { final ByteArrayDataInput reader = new ByteArrayDataInput(output.bytes, output.offset, output.length); final long codeOrig = reader.readVLong(); final long code = (f.fp << RocanaBlockTreeTermsReader.OUTPUT_FLAGS_NUM_BITS) | (f.hasTerms ? RocanaBlockTreeTermsReader.OUTPUT_FLAG_HAS_TERMS : 0) | (f.isFloor ? RocanaBlockTreeTermsReader.OUTPUT_FLAG_IS_FLOOR : 0); if (codeOrig != code) { out.println(" broken seek state: output code=" + codeOrig + " doesn't match frame code=" + code); throw new RuntimeException("seek state is broken"); } } } if (f == currentFrame) { break; } if (f.prefix == validIndexPrefix) { isSeekFrame = false; } ord++; } } }
From source file:com.rocana.lucene.codec.v1.TestBlockPostingsFormat3.java
License:Apache License
private void assertTermsSeeking(Terms leftTerms, Terms rightTerms) throws Exception { TermsEnum leftEnum = null;/*from ww w .ja va 2 s . c o m*/ TermsEnum rightEnum = null; // just an upper bound int numTests = atLeast(20); Random random = random(); // collect this number of terms from the left side HashSet<BytesRef> tests = new HashSet<>(); int numPasses = 0; while (numPasses < 10 && tests.size() < numTests) { leftEnum = leftTerms.iterator(); BytesRef term = null; while ((term = leftEnum.next()) != null) { int code = random.nextInt(10); if (code == 0) { // the term tests.add(BytesRef.deepCopyOf(term)); } else if (code == 1) { // truncated subsequence of term term = BytesRef.deepCopyOf(term); if (term.length > 0) { // truncate it term.length = random.nextInt(term.length); } } else if (code == 2) { // term, but ensure a non-zero offset byte newbytes[] = new byte[term.length + 5]; System.arraycopy(term.bytes, term.offset, newbytes, 5, term.length); tests.add(new BytesRef(newbytes, 5, term.length)); } } numPasses++; } ArrayList<BytesRef> shuffledTests = new ArrayList<>(tests); Collections.shuffle(shuffledTests, random); for (BytesRef b : shuffledTests) { leftEnum = leftTerms.iterator(); rightEnum = rightTerms.iterator(); assertEquals(leftEnum.seekExact(b), rightEnum.seekExact(b)); assertEquals(leftEnum.seekExact(b), rightEnum.seekExact(b)); SeekStatus leftStatus; SeekStatus rightStatus; leftStatus = leftEnum.seekCeil(b); rightStatus = rightEnum.seekCeil(b); assertEquals(leftStatus, rightStatus); if (leftStatus != SeekStatus.END) { assertEquals(leftEnum.term(), rightEnum.term()); } leftStatus = leftEnum.seekCeil(b); rightStatus = rightEnum.seekCeil(b); assertEquals(leftStatus, rightStatus); if (leftStatus != SeekStatus.END) { assertEquals(leftEnum.term(), rightEnum.term()); } } }
From source file:com.tcdi.zombodb.query.VisibilityQueryHelper.java
License:Apache License
static Map<Integer, FixedBitSet> determineVisibility(final Query query, final String field, final long myXid, final long xmin, final long xmax, final Set<Long> activeXids, IndexSearcher searcher, List<BytesRef> updatedCtids) throws IOException { final Map<Integer, FixedBitSet> visibilityBitSets = new HashMap<>(); if (updatedCtids.size() == 0) return visibilityBitSets; ///*from w w w . j a v a 2s . co m*/ // build a map of {@link VisibilityInfo} objects by each _prev_ctid // // We use XConstantScoreQuery here so that we exclude deleted docs // final Map<BytesRef, List<VisibilityInfo>> map = new HashMap<>(); searcher.search( new XConstantScoreQuery( SearchContext.current().filterCache().cache(new TermsFilter(field, updatedCtids))), new ZomboDBTermsCollector(field) { private SortedDocValues prevCtids; private SortedNumericDocValues xids; private SortedNumericDocValues sequence; private int ord; private int maxdoc; @Override public void collect(int doc) throws IOException { xids.setDocument(doc); sequence.setDocument(doc); long xid = xids.valueAt(0); long seq = sequence.valueAt(0); BytesRef prevCtid = prevCtids.get(doc); List<VisibilityInfo> matchingDocs = map.get(prevCtid); if (matchingDocs == null) map.put(BytesRef.deepCopyOf(prevCtid), matchingDocs = new ArrayList<>()); matchingDocs.add(new VisibilityInfo(ord, maxdoc, doc, xid, seq)); } @Override public void setNextReader(AtomicReaderContext context) throws IOException { prevCtids = FieldCache.DEFAULT.getTermsIndex(context.reader(), field); xids = context.reader().getSortedNumericDocValues("_xid"); sequence = context.reader().getSortedNumericDocValues("_zdb_seq"); ord = context.ord; maxdoc = context.reader().maxDoc(); } }); if (map.isEmpty()) return visibilityBitSets; // // pick out the first VisibilityInfo for each document that is visible & committed // and build a FixedBitSet for each reader 'ord' that contains visible // documents. A map of these (key'd on reader ord) is what we return. // BytesRefBuilder bytesRefBuilder = new BytesRefBuilder() { /* overloaded to avoid making a copy of the byte array */ @Override public BytesRef toBytesRef() { return new BytesRef(this.bytes(), 0, this.length()); } }; Terms committedXidsTerms = MultiFields.getFields(searcher.getIndexReader()).terms("_zdb_committed_xid"); TermsEnum committedXidsEnum = committedXidsTerms == null ? null : committedXidsTerms.iterator(null); for (List<VisibilityInfo> visibility : map.values()) { CollectionUtil.introSort(visibility, new Comparator<VisibilityInfo>() { @Override public int compare(VisibilityInfo o1, VisibilityInfo o2) { int cmp = Long.compare(o2.xid, o1.xid); return cmp == 0 ? Long.compare(o2.sequence, o1.sequence) : cmp; } }); boolean foundVisible = false; for (VisibilityInfo mapping : visibility) { if (foundVisible || mapping.xid > xmax || activeXids.contains(mapping.xid) || (mapping.xid != myXid && !isCommitted(committedXidsEnum, mapping.xid, bytesRefBuilder))) { // document is not visible to us FixedBitSet visibilityBitset = visibilityBitSets.get(mapping.readerOrd); if (visibilityBitset == null) visibilityBitSets.put(mapping.readerOrd, visibilityBitset = new FixedBitSet(mapping.maxdoc)); visibilityBitset.set(mapping.docid); } else { foundVisible = true; } } } return visibilityBitSets; }
From source file:com.tuplejump.stargate.lucene.LuceneUtils.java
License:Apache License
public static Field idDocValue(final ByteBuffer byteBufferValue) { BytesRef bytesRef = new BytesRef(byteBufferValue.array(), byteBufferValue.arrayOffset(), byteBufferValue.limit());/*from w w w. j a va 2 s. c o m*/ return new SortedDocValuesField(PK_NAME_DOC_VAL, bytesRef); }
From source file:edu.upenn.library.solrplugins.CaseInsensitiveSortingTextField.java
License:Apache License
@Override public CharsRef indexedToNormalized(BytesRef input, CharsRefBuilder output) { int endIndex = delimOffset(input); if (endIndex < 0) { return super.indexedToReadable(input, output); } else {/*from w ww .j a v a 2 s. c o m*/ return super.indexedToReadable(new BytesRef(input.bytes, input.offset, endIndex - input.offset), output); } }
From source file:io.crate.expression.scalar.SubstrFunction.java
License:Apache License
@VisibleForTesting static BytesRef substring(BytesRef utf8, int begin, int end) { int pos = utf8.offset; final int limit = pos + utf8.length; final byte[] bytes = utf8.bytes; int posBegin = pos; int codePointCount = 0; for (; pos < limit; codePointCount++) { if (codePointCount == begin) { posBegin = pos;//from w w w . ja v a 2s. c o m } if (codePointCount == end) { break; } int v = bytes[pos] & 0xFF; if (v < /* 0xxx xxxx */ 0x80) { pos += 1; continue; } if (v >= /* 110x xxxx */ 0xc0) { if (v < /* 111x xxxx */ 0xe0) { pos += 2; continue; } if (v < /* 1111 xxxx */ 0xf0) { pos += 3; continue; } if (v < /* 1111 1xxx */ 0xf8) { pos += 4; continue; } // fallthrough, consider 5 and 6 byte sequences invalid. } // Anything not covered above is invalid UTF8. throw new IllegalArgumentException("substr: invalid UTF8 string found."); } // Check if we didn't go over the limit on the last character. if (pos > limit) throw new IllegalArgumentException("begin index must not be > end index"); return new BytesRef(bytes, posBegin, pos - posBegin); }