Example usage for org.apache.lucene.util BytesRef deepCopyOf

Introduction

In this page you can find the example usage for org.apache.lucene.util BytesRef deepCopyOf.

Prototype

public static BytesRef deepCopyOf(BytesRef other)

Source Link

Document

Creates a new BytesRef that points to a copy of the bytes from other

The returned BytesRef will have a length of other.length and an offset of zero.

Usage

From source file:com.rocana.lucene.codec.v1.TestBlockPostingsFormat3.java

License:Apache License

private void assertTermsSeeking(Terms leftTerms, Terms rightTerms) throws Exception {
    TermsEnum leftEnum = null;//  w  ww.ja  va  2  s .co m
    TermsEnum rightEnum = null;

    // just an upper bound
    int numTests = atLeast(20);
    Random random = random();

    // collect this number of terms from the left side
    HashSet<BytesRef> tests = new HashSet<>();
    int numPasses = 0;
    while (numPasses < 10 && tests.size() < numTests) {
        leftEnum = leftTerms.iterator();
        BytesRef term = null;
        while ((term = leftEnum.next()) != null) {
            int code = random.nextInt(10);
            if (code == 0) {
                // the term
                tests.add(BytesRef.deepCopyOf(term));
            } else if (code == 1) {
                // truncated subsequence of term
                term = BytesRef.deepCopyOf(term);
                if (term.length > 0) {
                    // truncate it
                    term.length = random.nextInt(term.length);
                }
            } else if (code == 2) {
                // term, but ensure a non-zero offset
                byte newbytes[] = new byte[term.length + 5];
                System.arraycopy(term.bytes, term.offset, newbytes, 5, term.length);
                tests.add(new BytesRef(newbytes, 5, term.length));
            }
        }
        numPasses++;
    }

    ArrayList<BytesRef> shuffledTests = new ArrayList<>(tests);
    Collections.shuffle(shuffledTests, random);

    for (BytesRef b : shuffledTests) {
        leftEnum = leftTerms.iterator();
        rightEnum = rightTerms.iterator();

        assertEquals(leftEnum.seekExact(b), rightEnum.seekExact(b));
        assertEquals(leftEnum.seekExact(b), rightEnum.seekExact(b));

        SeekStatus leftStatus;
        SeekStatus rightStatus;

        leftStatus = leftEnum.seekCeil(b);
        rightStatus = rightEnum.seekCeil(b);
        assertEquals(leftStatus, rightStatus);
        if (leftStatus != SeekStatus.END) {
            assertEquals(leftEnum.term(), rightEnum.term());
        }

        leftStatus = leftEnum.seekCeil(b);
        rightStatus = rightEnum.seekCeil(b);
        assertEquals(leftStatus, rightStatus);
        if (leftStatus != SeekStatus.END) {
            assertEquals(leftEnum.term(), rightEnum.term());
        }
    }
}

From source file:com.shaie.utils.IndexUtils.java

License:Apache License

/** Prints the terms indexed under the given fields with full postings information. */
public static void printFieldTermsWithInfo(LeafReader reader, String... fields) throws IOException {
    for (final String field : fields) {
        System.out.println(format("Terms for field [%s], with positional info:", field));
        final TermsEnum te = reader.terms(field).iterator();
        BytesRef scratch;//from   w  w w.j  av a 2  s. co  m
        PostingsEnum postings = null;
        while ((scratch = te.next()) != null) {
            System.out.println(format("  %s", scratch.utf8ToString()));
            postings = te.postings(postings, PostingsEnum.ALL);
            for (postings.nextDoc(); postings.docID() != DocIdSetIterator.NO_MORE_DOCS; postings.nextDoc()) {
                final Map<Integer, BytesRef> positions = Maps.newTreeMap();
                boolean addedPayload = false;
                for (int i = 0; i < postings.freq(); i++) {
                    final int pos = postings.nextPosition();
                    final BytesRef payload = postings.getPayload();
                    if (payload != null) {
                        positions.put(pos, BytesRef.deepCopyOf(payload));
                        addedPayload = true;
                    } else {
                        positions.put(pos, null);
                    }
                }
                if (addedPayload) {
                    System.out.println(
                            format("    doc=%d, freq=%d", postings.docID(), postings.freq(), positions));
                    for (final Entry<Integer, BytesRef> e : positions.entrySet()) {
                        System.out.println(format("      pos=%d, payload=%s", e.getKey(), e.getValue()));
                    }
                } else {
                    System.out.println(format("    doc=%d, freq=%d, pos=%s", postings.docID(), postings.freq(),
                            positions.keySet()));
                }
            }
        }
    }
}

From source file:com.sindicetech.siren.search.node.TestNodeNumericRangeQuery32.java

License:Open Source License

private int countTerms(final MultiNodeTermQuery q) throws Exception {
    final Terms terms = MultiFields.getTerms(index.reader, q.getField());
    if (terms == null)
        return 0;
    final TermsEnum termEnum = q.getTermsEnum(terms);
    assertNotNull(termEnum);//from w ww .  j a  v  a2  s  .  c om
    int count = 0;
    BytesRef cur, last = null;
    while ((cur = termEnum.next()) != null) {
        count++;
        if (last != null) {
            assertTrue(last.compareTo(cur) < 0);
        }
        last = BytesRef.deepCopyOf(cur);
    }
    // LUCENE-3314: the results after next() already returned null are undefined,
    // assertNull(termEnum.next());
    return count;
}

From source file:com.sindicetech.siren.search.node.TopNodeTermsRewrite.java

License:Open Source License

@Override
public Q rewrite(final IndexReader reader, final MultiNodeTermQuery query) throws IOException {
    final int maxSize = Math.min(size, this.getMaxSize());
    final PriorityQueue<ScoreTerm> stQueue = new PriorityQueue<ScoreTerm>();
    this.collectTerms(reader, query, new TermCollector() {
        private final MaxNonCompetitiveBoostAttribute maxBoostAtt = attributes
                .addAttribute(MaxNonCompetitiveBoostAttribute.class);

        private final Map<BytesRef, ScoreTerm> visitedTerms = new HashMap<BytesRef, ScoreTerm>();

        private TermsEnum termsEnum;
        private Comparator<BytesRef> termComp;
        private BoostAttribute boostAtt;
        private ScoreTerm st;

        @Override/*from   ww w .  j  av a 2s  .c  o m*/
        public void setNextEnum(final TermsEnum termsEnum) throws IOException {
            this.termsEnum = termsEnum;
            this.termComp = termsEnum.getComparator();

            assert this.compareToLastTerm(null);

            // lazy init the initial ScoreTerm because comparator is not known on ctor:
            if (st == null)
                st = new ScoreTerm(this.termComp, new TermContext(topReaderContext));
            boostAtt = termsEnum.attributes().addAttribute(BoostAttribute.class);
        }

        // for assert:
        private BytesRef lastTerm;

        private boolean compareToLastTerm(final BytesRef t) throws IOException {
            if (lastTerm == null && t != null) {
                lastTerm = BytesRef.deepCopyOf(t);
            } else if (t == null) {
                lastTerm = null;
            } else {
                assert termsEnum.getComparator().compare(lastTerm, t) < 0 : "lastTerm=" + lastTerm + " t=" + t;
                lastTerm.copyBytes(t);
            }
            return true;
        }

        @Override
        public boolean collect(final BytesRef bytes) throws IOException {
            final float boost = boostAtt.getBoost();

            // make sure within a single seg we always collect
            // terms in order
            assert this.compareToLastTerm(bytes);

            //System.out.println("TTR.collect term=" + bytes.utf8ToString() + " boost=" + boost + " ord=" + readerContext.ord);
            // ignore uncompetitive hits
            if (stQueue.size() == maxSize) {
                final ScoreTerm t = stQueue.peek();
                if (boost < t.boost)
                    return true;
                if (boost == t.boost && termComp.compare(bytes, t.bytes) > 0)
                    return true;
            }
            ScoreTerm t = visitedTerms.get(bytes);
            final TermState state = termsEnum.termState();
            assert state != null;
            if (t != null) {
                // if the term is already in the PQ, only update docFreq of term in PQ
                assert t.boost == boost : "boost should be equal in all segment TermsEnums";
                t.termState.register(state, readerContext.ord, termsEnum.docFreq(), termsEnum.totalTermFreq());
            } else {
                // add new entry in PQ, we must clone the term, else it may get overwritten!
                st.bytes.copyBytes(bytes);
                st.boost = boost;
                visitedTerms.put(st.bytes, st);
                assert st.termState.docFreq() == 0;
                st.termState.register(state, readerContext.ord, termsEnum.docFreq(), termsEnum.totalTermFreq());
                stQueue.offer(st);
                // possibly drop entries from queue
                if (stQueue.size() > maxSize) {
                    st = stQueue.poll();
                    visitedTerms.remove(st.bytes);
                    st.termState.clear(); // reset the termstate!
                } else {
                    st = new ScoreTerm(termComp, new TermContext(topReaderContext));
                }
                assert stQueue.size() <= maxSize : "the PQ size must be limited to maxSize";
                // set maxBoostAtt with values to help FuzzyTermsEnum to optimize
                if (stQueue.size() == maxSize) {
                    t = stQueue.peek();
                    maxBoostAtt.setMaxNonCompetitiveBoost(t.boost);
                    maxBoostAtt.setCompetitiveTerm(t.bytes);
                }
            }

            return true;
        }
    });

    final Q q = this.getTopLevelQuery(query);
    final ScoreTerm[] scoreTerms = stQueue.toArray(new ScoreTerm[stQueue.size()]);
    ArrayUtil.timSort(scoreTerms, scoreTermSortByTermComp);

    for (final ScoreTerm st : scoreTerms) {
        final Term term = new Term(query.field, st.bytes);
        assert reader.docFreq(term) == st.termState.docFreq() : "reader DF is " + reader.docFreq(term) + " vs "
                + st.termState.docFreq() + " term=" + term;
        this.addClause(q, term, st.termState.docFreq(), query.getBoost() * st.boost, st.termState); // add to query
    }
    return q;
}

From source file:com.stratio.cassandra.index.query.Condition.java

License:Apache License

protected String analyze(String field, String value, ColumnMapper<?> columnMapper) {
    TokenStream source = null;/*from ww w  .  j a  va2 s  . co m*/
    try {
        Analyzer analyzer = columnMapper.analyzer();
        source = analyzer.tokenStream(field, value);
        source.reset();

        TermToBytesRefAttribute termAtt = source.getAttribute(TermToBytesRefAttribute.class);
        BytesRef bytes = termAtt.getBytesRef();

        if (!source.incrementToken()) {
            return null;
        }
        termAtt.fillBytesRef();
        if (source.incrementToken()) {
            throw new IllegalArgumentException("analyzer returned too many terms for multiTerm term: " + value);
        }
        source.end();
        return BytesRef.deepCopyOf(bytes).utf8ToString();
    } catch (IOException e) {
        throw new RuntimeException("Error analyzing multiTerm term: " + value, e);
    } finally {
        IOUtils.closeWhileHandlingException(source);
    }
}

From source file:com.tcdi.zombodb.query.VisibilityQueryHelper.java

License:Apache License

static List<BytesRef> findUpdatedCtids(IndexSearcher searcher) throws IOException {
    final List<BytesRef> updatedCtids = new ArrayList<>();

    ////from   w  w w  .  j  a  v  a2s .  co  m
    // search the "state" type and collect a distinct set of all the _ctids
    // these represent the records in the index that have been updated
    // used below to determine visibility
    //
    // We use XConstantScoreQuery here so that we exclude deleted docs
    //

    searcher.search(
            new XConstantScoreQuery(
                    SearchContext.current().filterCache().cache(new TermFilter(new Term("_type", "state")))),
            new ZomboDBTermsCollector("_ctid") {
                SortedDocValues ctids;

                @Override
                public void collect(int doc) throws IOException {
                    updatedCtids.add(BytesRef.deepCopyOf(ctids.get(doc)));
                }

                @Override
                public void setNextReader(AtomicReaderContext context) throws IOException {
                    ctids = FieldCache.DEFAULT.getTermsIndex(context.reader(), "_ctid");
                }
            });

    Collections.sort(updatedCtids);
    return updatedCtids;
}

From source file:com.tcdi.zombodb.query.VisibilityQueryHelper.java

License:Apache License

static Map<Integer, FixedBitSet> determineVisibility(final Query query, final String field, final long myXid,
        final long xmin, final long xmax, final Set<Long> activeXids, IndexSearcher searcher,
        List<BytesRef> updatedCtids) throws IOException {
    final Map<Integer, FixedBitSet> visibilityBitSets = new HashMap<>();

    if (updatedCtids.size() == 0)
        return visibilityBitSets;

    ///*from www .  j ava2 s  . co m*/
    // build a map of {@link VisibilityInfo} objects by each _prev_ctid
    //
    // We use XConstantScoreQuery here so that we exclude deleted docs
    //

    final Map<BytesRef, List<VisibilityInfo>> map = new HashMap<>();
    searcher.search(
            new XConstantScoreQuery(
                    SearchContext.current().filterCache().cache(new TermsFilter(field, updatedCtids))),
            new ZomboDBTermsCollector(field) {
                private SortedDocValues prevCtids;
                private SortedNumericDocValues xids;
                private SortedNumericDocValues sequence;
                private int ord;
                private int maxdoc;

                @Override
                public void collect(int doc) throws IOException {
                    xids.setDocument(doc);
                    sequence.setDocument(doc);

                    long xid = xids.valueAt(0);
                    long seq = sequence.valueAt(0);
                    BytesRef prevCtid = prevCtids.get(doc);

                    List<VisibilityInfo> matchingDocs = map.get(prevCtid);

                    if (matchingDocs == null)
                        map.put(BytesRef.deepCopyOf(prevCtid), matchingDocs = new ArrayList<>());
                    matchingDocs.add(new VisibilityInfo(ord, maxdoc, doc, xid, seq));
                }

                @Override
                public void setNextReader(AtomicReaderContext context) throws IOException {
                    prevCtids = FieldCache.DEFAULT.getTermsIndex(context.reader(), field);
                    xids = context.reader().getSortedNumericDocValues("_xid");
                    sequence = context.reader().getSortedNumericDocValues("_zdb_seq");
                    ord = context.ord;
                    maxdoc = context.reader().maxDoc();
                }
            });

    if (map.isEmpty())
        return visibilityBitSets;

    //
    // pick out the first VisibilityInfo for each document that is visible & committed
    // and build a FixedBitSet for each reader 'ord' that contains visible
    // documents.  A map of these (key'd on reader ord) is what we return.
    //

    BytesRefBuilder bytesRefBuilder = new BytesRefBuilder() {
        /* overloaded to avoid making a copy of the byte array */
        @Override
        public BytesRef toBytesRef() {
            return new BytesRef(this.bytes(), 0, this.length());
        }
    };

    Terms committedXidsTerms = MultiFields.getFields(searcher.getIndexReader()).terms("_zdb_committed_xid");
    TermsEnum committedXidsEnum = committedXidsTerms == null ? null : committedXidsTerms.iterator(null);
    for (List<VisibilityInfo> visibility : map.values()) {
        CollectionUtil.introSort(visibility, new Comparator<VisibilityInfo>() {
            @Override
            public int compare(VisibilityInfo o1, VisibilityInfo o2) {
                int cmp = Long.compare(o2.xid, o1.xid);
                return cmp == 0 ? Long.compare(o2.sequence, o1.sequence) : cmp;
            }
        });

        boolean foundVisible = false;
        for (VisibilityInfo mapping : visibility) {

            if (foundVisible || mapping.xid > xmax || activeXids.contains(mapping.xid) || (mapping.xid != myXid
                    && !isCommitted(committedXidsEnum, mapping.xid, bytesRefBuilder))) {
                // document is not visible to us
                FixedBitSet visibilityBitset = visibilityBitSets.get(mapping.readerOrd);
                if (visibilityBitset == null)
                    visibilityBitSets.put(mapping.readerOrd,
                            visibilityBitset = new FixedBitSet(mapping.maxdoc));
                visibilityBitset.set(mapping.docid);
            } else {
                foundVisible = true;
            }
        }
    }

    return visibilityBitSets;
}

From source file:com.tuplejump.stargate.lucene.query.Condition.java

License:Apache License

protected String analyze(String field, String value, Analyzer analyzer) {
    TokenStream source = null;// w  w w. j  a  v a  2  s .  c  o  m
    try {
        source = analyzer.tokenStream(field, value);
        source.reset();

        TermToBytesRefAttribute termAtt = source.getAttribute(TermToBytesRefAttribute.class);
        BytesRef bytes = termAtt.getBytesRef();

        if (!source.incrementToken()) {
            return null;
        }
        termAtt.fillBytesRef();
        if (source.incrementToken()) {
            throw new IllegalArgumentException("analyzer returned too many terms for multiTerm term: " + value);
        }
        source.end();
        return BytesRef.deepCopyOf(bytes).utf8ToString();
    } catch (IOException e) {
        throw new RuntimeException("Error analyzing multiTerm term: " + value, e);
    } finally {
        IOUtils.closeWhileHandlingException(source);
    }
}

From source file:de.unihildesheim.iw.lucene.index.FDRIndexDataProvider.java

License:Open Source License

@SuppressFBWarnings("EXS_EXCEPTION_SOFTENING_NO_CONSTRAINTS")
@Override/*from  w  w  w  . j  a v  a2 s . c  o m*/
public long getTermFrequency(@NotNull final BytesRef term) {
    // try get a cached value first
    @Nullable
    Long tf = this.cache_tf.get(term);
    if (tf == null) {
        tf = 0L;
        for (final LeafReaderContext lrc : this.index.reader.leaves()) {
            final LeafReader r = lrc.reader();
            long fieldTf = 0L;
            if (r.numDocs() > 0) {
                try {
                    for (final String s : r.fields()) {
                        @Nullable
                        final Terms terms = r.terms(s);
                        if (terms != null) {
                            final TermsEnum termsEnum = terms.iterator(null);
                            if (termsEnum.seekExact(term)) {
                                fieldTf += termsEnum.totalTermFreq();
                            }
                        }
                    }
                } catch (final IOException e) {
                    throw new UncheckedIOException(e);
                }
            }
            tf += fieldTf;
        }
        this.cache_tf.put(BytesRef.deepCopyOf(term), tf);
    }

    return tf;
}

From source file:de.unihildesheim.iw.lucene.index.FDRIndexDataProvider.java

License:Open Source License

@SuppressFBWarnings("EXS_EXCEPTION_SOFTENING_NO_CONSTRAINTS")
@Override/*w  ww.  j a  va  2 s. c  om*/
public int getDocumentFrequency(@NotNull final BytesRef term) {
    Integer df = this.cache_df.get(term);
    if (df == null) {
        df = this.index.reader.leaves().stream().map(LeafReaderContext::reader).filter(r -> r.numDocs() > 0)
                .mapToInt(r -> {
                    try {
                        return StreamSupport.stream(r.fields().spliterator(), false).mapToInt(f -> {
                            try {
                                @Nullable
                                final Terms terms = r.terms(f);
                                if (terms == null) {
                                    return 0;
                                }
                                final TermsEnum termsEnum = terms.iterator(null);
                                return termsEnum.seekExact(term) ? termsEnum.docFreq() : 0;
                            } catch (final IOException e) {
                                throw new UncheckedIOException(e);
                            }
                        }).max().orElse(0);
                    } catch (final IOException e) {
                        throw new UncheckedIOException(e);
                    }
                }).sum();
        this.cache_df.put(BytesRef.deepCopyOf(term), df);
    }
    return df;
}