Example usage for org.apache.lucene.util BytesRef deepCopyOf

List of usage examples for org.apache.lucene.util BytesRef deepCopyOf

Introduction

In this page you can find the example usage for org.apache.lucene.util BytesRef deepCopyOf.

Prototype

public static BytesRef deepCopyOf(BytesRef other) 

Source Link

Document

Creates a new BytesRef that points to a copy of the bytes from other

The returned BytesRef will have a length of other.length and an offset of zero.

Usage

From source file:com.rocana.lucene.codec.v1.TestBlockPostingsFormat3.java

License:Apache License

private void assertTermsSeeking(Terms leftTerms, Terms rightTerms) throws Exception {
    TermsEnum leftEnum = null;//  w  ww.ja  va  2  s .co m
    TermsEnum rightEnum = null;

    // just an upper bound
    int numTests = atLeast(20);
    Random random = random();

    // collect this number of terms from the left side
    HashSet<BytesRef> tests = new HashSet<>();
    int numPasses = 0;
    while (numPasses < 10 && tests.size() < numTests) {
        leftEnum = leftTerms.iterator();
        BytesRef term = null;
        while ((term = leftEnum.next()) != null) {
            int code = random.nextInt(10);
            if (code == 0) {
                // the term
                tests.add(BytesRef.deepCopyOf(term));
            } else if (code == 1) {
                // truncated subsequence of term
                term = BytesRef.deepCopyOf(term);
                if (term.length > 0) {
                    // truncate it
                    term.length = random.nextInt(term.length);
                }
            } else if (code == 2) {
                // term, but ensure a non-zero offset
                byte newbytes[] = new byte[term.length + 5];
                System.arraycopy(term.bytes, term.offset, newbytes, 5, term.length);
                tests.add(new BytesRef(newbytes, 5, term.length));
            }
        }
        numPasses++;
    }

    ArrayList<BytesRef> shuffledTests = new ArrayList<>(tests);
    Collections.shuffle(shuffledTests, random);

    for (BytesRef b : shuffledTests) {
        leftEnum = leftTerms.iterator();
        rightEnum = rightTerms.iterator();

        assertEquals(leftEnum.seekExact(b), rightEnum.seekExact(b));
        assertEquals(leftEnum.seekExact(b), rightEnum.seekExact(b));

        SeekStatus leftStatus;
        SeekStatus rightStatus;

        leftStatus = leftEnum.seekCeil(b);
        rightStatus = rightEnum.seekCeil(b);
        assertEquals(leftStatus, rightStatus);
        if (leftStatus != SeekStatus.END) {
            assertEquals(leftEnum.term(), rightEnum.term());
        }

        leftStatus = leftEnum.seekCeil(b);
        rightStatus = rightEnum.seekCeil(b);
        assertEquals(leftStatus, rightStatus);
        if (leftStatus != SeekStatus.END) {
            assertEquals(leftEnum.term(), rightEnum.term());
        }
    }
}

From source file:com.shaie.utils.IndexUtils.java

License:Apache License

/** Prints the terms indexed under the given fields with full postings information. */
public static void printFieldTermsWithInfo(LeafReader reader, String... fields) throws IOException {
    for (final String field : fields) {
        System.out.println(format("Terms for field [%s], with positional info:", field));
        final TermsEnum te = reader.terms(field).iterator();
        BytesRef scratch;//from   w  w w.j  av a 2  s. co  m
        PostingsEnum postings = null;
        while ((scratch = te.next()) != null) {
            System.out.println(format("  %s", scratch.utf8ToString()));
            postings = te.postings(postings, PostingsEnum.ALL);
            for (postings.nextDoc(); postings.docID() != DocIdSetIterator.NO_MORE_DOCS; postings.nextDoc()) {
                final Map<Integer, BytesRef> positions = Maps.newTreeMap();
                boolean addedPayload = false;
                for (int i = 0; i < postings.freq(); i++) {
                    final int pos = postings.nextPosition();
                    final BytesRef payload = postings.getPayload();
                    if (payload != null) {
                        positions.put(pos, BytesRef.deepCopyOf(payload));
                        addedPayload = true;
                    } else {
                        positions.put(pos, null);
                    }
                }
                if (addedPayload) {
                    System.out.println(
                            format("    doc=%d, freq=%d", postings.docID(), postings.freq(), positions));
                    for (final Entry<Integer, BytesRef> e : positions.entrySet()) {
                        System.out.println(format("      pos=%d, payload=%s", e.getKey(), e.getValue()));
                    }
                } else {
                    System.out.println(format("    doc=%d, freq=%d, pos=%s", postings.docID(), postings.freq(),
                            positions.keySet()));
                }
            }
        }
    }
}

From source file:com.sindicetech.siren.search.node.TestNodeNumericRangeQuery32.java

License:Open Source License

private int countTerms(final MultiNodeTermQuery q) throws Exception {
    final Terms terms = MultiFields.getTerms(index.reader, q.getField());
    if (terms == null)
        return 0;
    final TermsEnum termEnum = q.getTermsEnum(terms);
    assertNotNull(termEnum);//from w ww .  j a  v  a2  s  .  c om
    int count = 0;
    BytesRef cur, last = null;
    while ((cur = termEnum.next()) != null) {
        count++;
        if (last != null) {
            assertTrue(last.compareTo(cur) < 0);
        }
        last = BytesRef.deepCopyOf(cur);
    }
    // LUCENE-3314: the results after next() already returned null are undefined,
    // assertNull(termEnum.next());
    return count;
}

From source file:com.sindicetech.siren.search.node.TopNodeTermsRewrite.java

License:Open Source License

@Override
public Q rewrite(final IndexReader reader, final MultiNodeTermQuery query) throws IOException {
    final int maxSize = Math.min(size, this.getMaxSize());
    final PriorityQueue<ScoreTerm> stQueue = new PriorityQueue<ScoreTerm>();
    this.collectTerms(reader, query, new TermCollector() {
        private final MaxNonCompetitiveBoostAttribute maxBoostAtt = attributes
                .addAttribute(MaxNonCompetitiveBoostAttribute.class);

        private final Map<BytesRef, ScoreTerm> visitedTerms = new HashMap<BytesRef, ScoreTerm>();

        private TermsEnum termsEnum;
        private Comparator<BytesRef> termComp;
        private BoostAttribute boostAtt;
        private ScoreTerm st;

        @Override/*from   ww w .  j  av a 2s  .c  o m*/
        public void setNextEnum(final TermsEnum termsEnum) throws IOException {
            this.termsEnum = termsEnum;
            this.termComp = termsEnum.getComparator();

            assert this.compareToLastTerm(null);

            // lazy init the initial ScoreTerm because comparator is not known on ctor:
            if (st == null)
                st = new ScoreTerm(this.termComp, new TermContext(topReaderContext));
            boostAtt = termsEnum.attributes().addAttribute(BoostAttribute.class);
        }

        // for assert:
        private BytesRef lastTerm;

        private boolean compareToLastTerm(final BytesRef t) throws IOException {
            if (lastTerm == null && t != null) {
                lastTerm = BytesRef.deepCopyOf(t);
            } else if (t == null) {
                lastTerm = null;
            } else {
                assert termsEnum.getComparator().compare(lastTerm, t) < 0 : "lastTerm=" + lastTerm + " t=" + t;
                lastTerm.copyBytes(t);
            }
            return true;
        }

        @Override
        public boolean collect(final BytesRef bytes) throws IOException {
            final float boost = boostAtt.getBoost();

            // make sure within a single seg we always collect
            // terms in order
            assert this.compareToLastTerm(bytes);

            //System.out.println("TTR.collect term=" + bytes.utf8ToString() + " boost=" + boost + " ord=" + readerContext.ord);
            // ignore uncompetitive hits
            if (stQueue.size() == maxSize) {
                final ScoreTerm t = stQueue.peek();
                if (boost < t.boost)
                    return true;
                if (boost == t.boost && termComp.compare(bytes, t.bytes) > 0)
                    return true;
            }
            ScoreTerm t = visitedTerms.get(bytes);
            final TermState state = termsEnum.termState();
            assert state != null;
            if (t != null) {
                // if the term is already in the PQ, only update docFreq of term in PQ
                assert t.boost == boost : "boost should be equal in all segment TermsEnums";
                t.termState.register(state, readerContext.ord, termsEnum.docFreq(), termsEnum.totalTermFreq());
            } else {
                // add new entry in PQ, we must clone the term, else it may get overwritten!
                st.bytes.copyBytes(bytes);
                st.boost = boost;
                visitedTerms.put(st.bytes, st);
                assert st.termState.docFreq() == 0;
                st.termState.register(state, readerContext.ord, termsEnum.docFreq(), termsEnum.totalTermFreq());
                stQueue.offer(st);
                // possibly drop entries from queue
                if (stQueue.size() > maxSize) {
                    st = stQueue.poll();
                    visitedTerms.remove(st.bytes);
                    st.termState.clear(); // reset the termstate!
                } else {
                    st = new ScoreTerm(termComp, new TermContext(topReaderContext));
                }
                assert stQueue.size() <= maxSize : "the PQ size must be limited to maxSize";
                // set maxBoostAtt with values to help FuzzyTermsEnum to optimize
                if (stQueue.size() == maxSize) {
                    t = stQueue.peek();
                    maxBoostAtt.setMaxNonCompetitiveBoost(t.boost);
                    maxBoostAtt.setCompetitiveTerm(t.bytes);
                }
            }

            return true;
        }
    });

    final Q q = this.getTopLevelQuery(query);
    final ScoreTerm[] scoreTerms = stQueue.toArray(new ScoreTerm[stQueue.size()]);
    ArrayUtil.timSort(scoreTerms, scoreTermSortByTermComp);

    for (final ScoreTerm st : scoreTerms) {
        final Term term = new Term(query.field, st.bytes);
        assert reader.docFreq(term) == st.termState.docFreq() : "reader DF is " + reader.docFreq(term) + " vs "
                + st.termState.docFreq() + " term=" + term;
        this.addClause(q, term, st.termState.docFreq(), query.getBoost() * st.boost, st.termState); // add to query
    }
    return q;
}

From source file:com.stratio.cassandra.index.query.Condition.java

License:Apache License

protected String analyze(String field, String value, ColumnMapper<?> columnMapper) {
    TokenStream source = null;/*from ww w  .  j a  va2 s  . co m*/
    try {
        Analyzer analyzer = columnMapper.analyzer();
        source = analyzer.tokenStream(field, value);
        source.reset();

        TermToBytesRefAttribute termAtt = source.getAttribute(TermToBytesRefAttribute.class);
        BytesRef bytes = termAtt.getBytesRef();

        if (!source.incrementToken()) {
            return null;
        }
        termAtt.fillBytesRef();
        if (source.incrementToken()) {
            throw new IllegalArgumentException("analyzer returned too many terms for multiTerm term: " + value);
        }
        source.end();
        return BytesRef.deepCopyOf(bytes).utf8ToString();
    } catch (IOException e) {
        throw new RuntimeException("Error analyzing multiTerm term: " + value, e);
    } finally {
        IOUtils.closeWhileHandlingException(source);
    }
}

From source file:com.tcdi.zombodb.query.VisibilityQueryHelper.java

License:Apache License

static List<BytesRef> findUpdatedCtids(IndexSearcher searcher) throws IOException {
    final List<BytesRef> updatedCtids = new ArrayList<>();

    ////from   w  w w  .  j  a  v  a2s .  co  m
    // search the "state" type and collect a distinct set of all the _ctids
    // these represent the records in the index that have been updated
    // used below to determine visibility
    //
    // We use XConstantScoreQuery here so that we exclude deleted docs
    //

    searcher.search(
            new XConstantScoreQuery(
                    SearchContext.current().filterCache().cache(new TermFilter(new Term("_type", "state")))),
            new ZomboDBTermsCollector("_ctid") {
                SortedDocValues ctids;

                @Override
                public void collect(int doc) throws IOException {
                    updatedCtids.add(BytesRef.deepCopyOf(ctids.get(doc)));
                }

                @Override
                public void setNextReader(AtomicReaderContext context) throws IOException {
                    ctids = FieldCache.DEFAULT.getTermsIndex(context.reader(), "_ctid");
                }
            });

    Collections.sort(updatedCtids);
    return updatedCtids;
}

From source file:com.tcdi.zombodb.query.VisibilityQueryHelper.java

License:Apache License

static Map<Integer, FixedBitSet> determineVisibility(final Query query, final String field, final long myXid,
        final long xmin, final long xmax, final Set<Long> activeXids, IndexSearcher searcher,
        List<BytesRef> updatedCtids) throws IOException {
    final Map<Integer, FixedBitSet> visibilityBitSets = new HashMap<>();

    if (updatedCtids.size() == 0)
        return visibilityBitSets;

    ///*from www .  j ava2 s  . co m*/
    // build a map of {@link VisibilityInfo} objects by each _prev_ctid
    //
    // We use XConstantScoreQuery here so that we exclude deleted docs
    //

    final Map<BytesRef, List<VisibilityInfo>> map = new HashMap<>();
    searcher.search(
            new XConstantScoreQuery(
                    SearchContext.current().filterCache().cache(new TermsFilter(field, updatedCtids))),
            new ZomboDBTermsCollector(field) {
                private SortedDocValues prevCtids;
                private SortedNumericDocValues xids;
                private SortedNumericDocValues sequence;
                private int ord;
                private int maxdoc;

                @Override
                public void collect(int doc) throws IOException {
                    xids.setDocument(doc);
                    sequence.setDocument(doc);

                    long xid = xids.valueAt(0);
                    long seq = sequence.valueAt(0);
                    BytesRef prevCtid = prevCtids.get(doc);

                    List<VisibilityInfo> matchingDocs = map.get(prevCtid);

                    if (matchingDocs == null)
                        map.put(BytesRef.deepCopyOf(prevCtid), matchingDocs = new ArrayList<>());
                    matchingDocs.add(new VisibilityInfo(ord, maxdoc, doc, xid, seq));
                }

                @Override
                public void setNextReader(AtomicReaderContext context) throws IOException {
                    prevCtids = FieldCache.DEFAULT.getTermsIndex(context.reader(), field);
                    xids = context.reader().getSortedNumericDocValues("_xid");
                    sequence = context.reader().getSortedNumericDocValues("_zdb_seq");
                    ord = context.ord;
                    maxdoc = context.reader().maxDoc();
                }
            });

    if (map.isEmpty())
        return visibilityBitSets;

    //
    // pick out the first VisibilityInfo for each document that is visible & committed
    // and build a FixedBitSet for each reader 'ord' that contains visible
    // documents.  A map of these (key'd on reader ord) is what we return.
    //

    BytesRefBuilder bytesRefBuilder = new BytesRefBuilder() {
        /* overloaded to avoid making a copy of the byte array */
        @Override
        public BytesRef toBytesRef() {
            return new BytesRef(this.bytes(), 0, this.length());
        }
    };

    Terms committedXidsTerms = MultiFields.getFields(searcher.getIndexReader()).terms("_zdb_committed_xid");
    TermsEnum committedXidsEnum = committedXidsTerms == null ? null : committedXidsTerms.iterator(null);
    for (List<VisibilityInfo> visibility : map.values()) {
        CollectionUtil.introSort(visibility, new Comparator<VisibilityInfo>() {
            @Override
            public int compare(VisibilityInfo o1, VisibilityInfo o2) {
                int cmp = Long.compare(o2.xid, o1.xid);
                return cmp == 0 ? Long.compare(o2.sequence, o1.sequence) : cmp;
            }
        });

        boolean foundVisible = false;
        for (VisibilityInfo mapping : visibility) {

            if (foundVisible || mapping.xid > xmax || activeXids.contains(mapping.xid) || (mapping.xid != myXid
                    && !isCommitted(committedXidsEnum, mapping.xid, bytesRefBuilder))) {
                // document is not visible to us
                FixedBitSet visibilityBitset = visibilityBitSets.get(mapping.readerOrd);
                if (visibilityBitset == null)
                    visibilityBitSets.put(mapping.readerOrd,
                            visibilityBitset = new FixedBitSet(mapping.maxdoc));
                visibilityBitset.set(mapping.docid);
            } else {
                foundVisible = true;
            }
        }
    }

    return visibilityBitSets;
}

From source file:com.tuplejump.stargate.lucene.query.Condition.java

License:Apache License

protected String analyze(String field, String value, Analyzer analyzer) {
    TokenStream source = null;// w  w w. j  a  v a  2  s .  c  o  m
    try {
        source = analyzer.tokenStream(field, value);
        source.reset();

        TermToBytesRefAttribute termAtt = source.getAttribute(TermToBytesRefAttribute.class);
        BytesRef bytes = termAtt.getBytesRef();

        if (!source.incrementToken()) {
            return null;
        }
        termAtt.fillBytesRef();
        if (source.incrementToken()) {
            throw new IllegalArgumentException("analyzer returned too many terms for multiTerm term: " + value);
        }
        source.end();
        return BytesRef.deepCopyOf(bytes).utf8ToString();
    } catch (IOException e) {
        throw new RuntimeException("Error analyzing multiTerm term: " + value, e);
    } finally {
        IOUtils.closeWhileHandlingException(source);
    }
}

From source file:de.unihildesheim.iw.lucene.index.FDRIndexDataProvider.java

License:Open Source License

@SuppressFBWarnings("EXS_EXCEPTION_SOFTENING_NO_CONSTRAINTS")
@Override/*from  w  w  w  . j  a v  a2 s . c  o m*/
public long getTermFrequency(@NotNull final BytesRef term) {
    // try get a cached value first
    @Nullable
    Long tf = this.cache_tf.get(term);
    if (tf == null) {
        tf = 0L;
        for (final LeafReaderContext lrc : this.index.reader.leaves()) {
            final LeafReader r = lrc.reader();
            long fieldTf = 0L;
            if (r.numDocs() > 0) {
                try {
                    for (final String s : r.fields()) {
                        @Nullable
                        final Terms terms = r.terms(s);
                        if (terms != null) {
                            final TermsEnum termsEnum = terms.iterator(null);
                            if (termsEnum.seekExact(term)) {
                                fieldTf += termsEnum.totalTermFreq();
                            }
                        }
                    }
                } catch (final IOException e) {
                    throw new UncheckedIOException(e);
                }
            }
            tf += fieldTf;
        }
        this.cache_tf.put(BytesRef.deepCopyOf(term), tf);
    }

    return tf;
}

From source file:de.unihildesheim.iw.lucene.index.FDRIndexDataProvider.java

License:Open Source License

@SuppressFBWarnings("EXS_EXCEPTION_SOFTENING_NO_CONSTRAINTS")
@Override/*w  ww.  j a  va  2 s. c  om*/
public int getDocumentFrequency(@NotNull final BytesRef term) {
    Integer df = this.cache_df.get(term);
    if (df == null) {
        df = this.index.reader.leaves().stream().map(LeafReaderContext::reader).filter(r -> r.numDocs() > 0)
                .mapToInt(r -> {
                    try {
                        return StreamSupport.stream(r.fields().spliterator(), false).mapToInt(f -> {
                            try {
                                @Nullable
                                final Terms terms = r.terms(f);
                                if (terms == null) {
                                    return 0;
                                }
                                final TermsEnum termsEnum = terms.iterator(null);
                                return termsEnum.seekExact(term) ? termsEnum.docFreq() : 0;
                            } catch (final IOException e) {
                                throw new UncheckedIOException(e);
                            }
                        }).max().orElse(0);
                    } catch (final IOException e) {
                        throw new UncheckedIOException(e);
                    }
                }).sum();
        this.cache_df.put(BytesRef.deepCopyOf(term), df);
    }
    return df;
}