Example usage for org.apache.lucene.util BytesRef deepCopyOf

List of usage examples for org.apache.lucene.util BytesRef deepCopyOf

Introduction

In this page you can find the example usage for org.apache.lucene.util BytesRef deepCopyOf.

Prototype

public static BytesRef deepCopyOf(BytesRef other) 

Source Link

Document

Creates a new BytesRef that points to a copy of the bytes from other

The returned BytesRef will have a length of other.length and an offset of zero.

Usage

From source file:perf.TermsQueryPerf.java

License:Apache License

public static void main(String[] args) throws Exception {

    List<BytesRef> lookupIDs = new ArrayList<>();
    Random random = new Random(17);
    double rate = 1.01 * ((double) NUM_QUERIES * ID_SEARCH_COUNT) / ID_INDEX_COUNT;

    Path indexPath = Paths.get(args[0]);

    boolean doIndex = Files.exists(indexPath) == false;

    Directory dir = FSDirectory.open(indexPath);

    if (doIndex) {
        IndexWriterConfig iwc = new IndexWriterConfig(new WhitespaceAnalyzer());
        iwc.setMergeScheduler(new SerialMergeScheduler());
        iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE);

        // So I can walk the files and get the *.tip sizes:
        iwc.setUseCompoundFile(false);/* w w  w .  ja  v  a 2 s . c o  m*/

        /// 7/7/7 segment structure:
        iwc.setMaxBufferedDocs(ID_INDEX_COUNT / 777);
        iwc.setRAMBufferSizeMB(-1);

        ((TieredMergePolicy) iwc.getMergePolicy()).setFloorSegmentMB(.001);
        ((TieredMergePolicy) iwc.getMergePolicy()).setNoCFSRatio(0.0);

        IndexWriter w = new IndexWriter(dir, iwc);
        // IDIterator ids = zeroPadSequentialIDs(10);
        IDIterator ids = randomIDs(10, random);

        BytesRef idValue = new BytesRef(64);
        for (int i = 0; i < ID_INDEX_COUNT; i++) {
            ids.next(idValue);
            Document doc = new Document();
            doc.add(new StringField("id", idValue, Field.Store.NO));
            w.addDocument(doc);
            if (random.nextDouble() <= rate && lookupIDs.size() < NUM_QUERIES * ID_SEARCH_COUNT) {
                lookupIDs.add(BytesRef.deepCopyOf(idValue));
            }
            if (i % 100000 == 0) {
                System.out.println(i + " docs...");
            }
        }
        w.close();
    }

    IndexReader r = DirectoryReader.open(dir);

    if (doIndex == false) {
        System.out.println("Build lookup ids");
        TermsEnum termsEnum = MultiFields.getTerms(r, "id").iterator();
        BytesRef idValue;
        while ((idValue = termsEnum.next()) != null) {
            if (random.nextDouble() <= rate && lookupIDs.size() < NUM_QUERIES * ID_SEARCH_COUNT) {
                lookupIDs.add(BytesRef.deepCopyOf(idValue));
                //System.out.println("add: " + idValue);
            }
        }
        shuffle(random, lookupIDs);
        System.out.println("Done build lookup ids");
    }

    IndexSearcher s = new IndexSearcher(r);

    if (lookupIDs.size() < NUM_QUERIES * ID_SEARCH_COUNT) {
        throw new RuntimeException(
                "didn't get enough lookup ids: " + (NUM_QUERIES * ID_SEARCH_COUNT) + " vs " + lookupIDs.size());
    }

    List<Query> queries = new ArrayList<Query>();
    for (int i = 0; i < NUM_QUERIES; i++) {

        List<BytesRef> sortedTermBytes = new ArrayList<>();
        for (BytesRef term : lookupIDs.subList(i * ID_SEARCH_COUNT, (i + 1) * ID_SEARCH_COUNT)) {
            sortedTermBytes.add(term);
        }
        Collections.sort(sortedTermBytes);

        // nocommit only do this if term count is high enough?
        // nocommit: we can be more efficient here, go straight to binary:
        Query query = new AutomatonQuery(new Term("id", "manyterms"),
                Automata.makeStringUnion(sortedTermBytes));
        //((MultiTermQuery) query).setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_REWRITE);
        //Query query = new TermsQuery("id", lookupIDs.subList(i*ID_SEARCH_COUNT, (i+1)*ID_SEARCH_COUNT));
        queries.add(query);
    }

    // TODO: also include construction time of queries
    long best = Long.MAX_VALUE;
    for (int iter = 0; iter < 100; iter++) {
        long t0 = System.nanoTime();
        long totCount = 0;
        for (int i = 0; i < NUM_QUERIES; i++) {
            //Query query = new TermsQuery("id", lookupIDs.subList(i*ID_SEARCH_COUNT, (i+1)*ID_SEARCH_COUNT));
            Query query = queries.get(i);
            totCount += s.search(query, 10).totalHits;
        }
        if (totCount != NUM_QUERIES * ID_SEARCH_COUNT) {
            throw new RuntimeException(
                    "totCount=" + totCount + " but expected " + (NUM_QUERIES * ID_SEARCH_COUNT));
        }
        long t = System.nanoTime() - t0;
        System.out.println("ITER: " + iter + ": " + (t / 1000000.) + " msec");
        if (t < best) {
            System.out.println("  **");
            best = t;
        }
    }

    IOUtils.close(r, dir);
}

From source file:uk.co.flax.luwak.Monitor.java

License:Apache License

/**
 * Remove unused queries from the query cache.
 *
 * This is normally called from a background thread at a rate set by configurePurgeFrequency().
 *
 * @throws IOException on IO errors//from ww w.  j  a  v a2s.com
 */
public void purgeCache() throws IOException {
    queryIndex.purgeCache(new QueryIndex.CachePopulator() {
        @Override
        public void populateCacheWithIndex(final Map<BytesRef, QueryCacheEntry> newCache) throws IOException {
            queryIndex.scan(new QueryIndex.QueryCollector() {
                @Override
                public void matchQuery(String id, QueryCacheEntry query, QueryIndex.DataValues dataValues)
                        throws IOException {
                    if (query != null)
                        newCache.put(BytesRef.deepCopyOf(query.hash), query);
                }
            });
        }
    });

    lastPurged = System.nanoTime();
    afterPurge();
}

From source file:uk.co.flax.luwak.presearcher.FieldFilterPresearcherComponent.java

License:Apache License

private Query buildFilterClause(LeafReader reader) throws IOException {

    Terms terms = reader.fields().terms(field);
    if (terms == null)
        return null;

    BooleanQuery.Builder bq = new BooleanQuery.Builder();

    int docsInBatch = reader.maxDoc();

    BytesRef term;//from   w w w. j ava 2 s  . com
    TermsEnum te = terms.iterator();
    while ((term = te.next()) != null) {
        // we need to check that every document in the batch has the same field values, otherwise
        // this filtering will not work
        if (te.docFreq() != docsInBatch)
            throw new IllegalArgumentException("Some documents in this batch do not have a term value of "
                    + field + ":" + Term.toString(term));
        bq.add(new TermQuery(new Term(field, BytesRef.deepCopyOf(term))), BooleanClause.Occur.SHOULD);
    }

    BooleanQuery built = bq.build();

    if (built.clauses().size() == 0)
        return null;

    return built;
}

From source file:uk.co.flax.luwak.presearcher.TermFilteredPresearcher.java

License:Apache License

@Override
public final Query buildQuery(LeafReader reader, QueryTermFilter queryTermFilter) {
    try {//from w w  w. j ava2s . co m
        DocumentQueryBuilder queryBuilder = getQueryBuilder();
        for (String field : reader.fields()) {

            TokenStream ts = new TermsEnumTokenStream(reader.terms(field).iterator());
            for (PresearcherComponent component : components) {
                ts = component.filterDocumentTokens(field, ts);
            }

            ts = new BytesRefFilteredTokenFilter(ts, queryTermFilter.getTerms(field));

            TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
            while (ts.incrementToken()) {
                queryBuilder.addTerm(field, BytesRef.deepCopyOf(termAtt.getBytesRef()));
            }

        }
        Query presearcherQuery = queryBuilder.build();

        BooleanQuery.Builder bq = new BooleanQuery.Builder();
        bq.add(presearcherQuery, BooleanClause.Occur.SHOULD);
        bq.add(new TermQuery(new Term(ANYTOKEN_FIELD, ANYTOKEN)), BooleanClause.Occur.SHOULD);
        presearcherQuery = bq.build();

        for (PresearcherComponent component : components) {
            presearcherQuery = component.adjustPresearcherQuery(reader, presearcherQuery);
        }

        return presearcherQuery;
    } catch (IOException e) {
        // We're a MemoryIndex, so this shouldn't happen...
        throw new RuntimeException(e);
    }
}

From source file:uk.co.flax.luwak.util.SpanRewriter.java

License:Apache License

protected Query rewriteTermsQuery(TermsQuery query) {

    Map<String, List<SpanTermQuery>> spanQueries = new HashMap<>();

    try {/*from  w  w w . j ava  2 s  .com*/
        Field termsField = TermsQuery.class.getDeclaredField("termData");
        termsField.setAccessible(true);
        PrefixCodedTerms terms = (PrefixCodedTerms) termsField.get(query);
        PrefixCodedTerms.TermIterator it = terms.iterator();
        for (int i = 0; i < terms.size(); i++) {
            BytesRef term = BytesRef.deepCopyOf(it.next());
            if (spanQueries.containsKey(it.field()) == false) {
                spanQueries.put(it.field(), new ArrayList<SpanTermQuery>());
            }
            spanQueries.get(it.field()).add(new SpanTermQuery(new Term(it.field(), term)));
        }
        BooleanQuery.Builder builder = new BooleanQuery.Builder();
        for (Map.Entry<String, List<SpanTermQuery>> entry : spanQueries.entrySet()) {
            List<SpanTermQuery> termQueries = entry.getValue();
            builder.add(new SpanOrQuery(termQueries.toArray(new SpanTermQuery[termQueries.size()])),
                    BooleanClause.Occur.SHOULD);
        }
        return builder.build();
    } catch (Exception e) {
        throw new IllegalStateException(e);
    }

}

From source file:utils.HighFreqTerms.java

License:Apache License

public static void fillQueue(TermsEnum termsEnum, TermStatsQueue tiq, String field) throws Exception {
    BytesRef term;/* w w  w. ja v a  2  s.  c  o m*/
    while ((term = termsEnum.next()) != null) {
        // BytesRef r = new BytesRef();
        // r.copyBytes(term);
        BytesRef r = BytesRef.deepCopyOf(term);
        tiq.insertWithOverflow(new TermStats(field, r, termsEnum.docFreq()));
    }
}