List of usage examples for org.apache.lucene.util BytesRef deepCopyOf
public static BytesRef deepCopyOf(BytesRef other)
other
The returned BytesRef will have a length of other.length and an offset of zero.
From source file:perf.TermsQueryPerf.java
License:Apache License
public static void main(String[] args) throws Exception { List<BytesRef> lookupIDs = new ArrayList<>(); Random random = new Random(17); double rate = 1.01 * ((double) NUM_QUERIES * ID_SEARCH_COUNT) / ID_INDEX_COUNT; Path indexPath = Paths.get(args[0]); boolean doIndex = Files.exists(indexPath) == false; Directory dir = FSDirectory.open(indexPath); if (doIndex) { IndexWriterConfig iwc = new IndexWriterConfig(new WhitespaceAnalyzer()); iwc.setMergeScheduler(new SerialMergeScheduler()); iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE); // So I can walk the files and get the *.tip sizes: iwc.setUseCompoundFile(false);/* w w w . ja v a 2 s . c o m*/ /// 7/7/7 segment structure: iwc.setMaxBufferedDocs(ID_INDEX_COUNT / 777); iwc.setRAMBufferSizeMB(-1); ((TieredMergePolicy) iwc.getMergePolicy()).setFloorSegmentMB(.001); ((TieredMergePolicy) iwc.getMergePolicy()).setNoCFSRatio(0.0); IndexWriter w = new IndexWriter(dir, iwc); // IDIterator ids = zeroPadSequentialIDs(10); IDIterator ids = randomIDs(10, random); BytesRef idValue = new BytesRef(64); for (int i = 0; i < ID_INDEX_COUNT; i++) { ids.next(idValue); Document doc = new Document(); doc.add(new StringField("id", idValue, Field.Store.NO)); w.addDocument(doc); if (random.nextDouble() <= rate && lookupIDs.size() < NUM_QUERIES * ID_SEARCH_COUNT) { lookupIDs.add(BytesRef.deepCopyOf(idValue)); } if (i % 100000 == 0) { System.out.println(i + " docs..."); } } w.close(); } IndexReader r = DirectoryReader.open(dir); if (doIndex == false) { System.out.println("Build lookup ids"); TermsEnum termsEnum = MultiFields.getTerms(r, "id").iterator(); BytesRef idValue; while ((idValue = termsEnum.next()) != null) { if (random.nextDouble() <= rate && lookupIDs.size() < NUM_QUERIES * ID_SEARCH_COUNT) { lookupIDs.add(BytesRef.deepCopyOf(idValue)); //System.out.println("add: " + idValue); } } shuffle(random, lookupIDs); System.out.println("Done build lookup ids"); } IndexSearcher s = new IndexSearcher(r); if (lookupIDs.size() < NUM_QUERIES * ID_SEARCH_COUNT) { throw new RuntimeException( "didn't get enough lookup ids: " + (NUM_QUERIES * ID_SEARCH_COUNT) + " vs " + lookupIDs.size()); } List<Query> queries = new ArrayList<Query>(); for (int i = 0; i < NUM_QUERIES; i++) { List<BytesRef> sortedTermBytes = new ArrayList<>(); for (BytesRef term : lookupIDs.subList(i * ID_SEARCH_COUNT, (i + 1) * ID_SEARCH_COUNT)) { sortedTermBytes.add(term); } Collections.sort(sortedTermBytes); // nocommit only do this if term count is high enough? // nocommit: we can be more efficient here, go straight to binary: Query query = new AutomatonQuery(new Term("id", "manyterms"), Automata.makeStringUnion(sortedTermBytes)); //((MultiTermQuery) query).setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_REWRITE); //Query query = new TermsQuery("id", lookupIDs.subList(i*ID_SEARCH_COUNT, (i+1)*ID_SEARCH_COUNT)); queries.add(query); } // TODO: also include construction time of queries long best = Long.MAX_VALUE; for (int iter = 0; iter < 100; iter++) { long t0 = System.nanoTime(); long totCount = 0; for (int i = 0; i < NUM_QUERIES; i++) { //Query query = new TermsQuery("id", lookupIDs.subList(i*ID_SEARCH_COUNT, (i+1)*ID_SEARCH_COUNT)); Query query = queries.get(i); totCount += s.search(query, 10).totalHits; } if (totCount != NUM_QUERIES * ID_SEARCH_COUNT) { throw new RuntimeException( "totCount=" + totCount + " but expected " + (NUM_QUERIES * ID_SEARCH_COUNT)); } long t = System.nanoTime() - t0; System.out.println("ITER: " + iter + ": " + (t / 1000000.) + " msec"); if (t < best) { System.out.println(" **"); best = t; } } IOUtils.close(r, dir); }
From source file:uk.co.flax.luwak.Monitor.java
License:Apache License
/** * Remove unused queries from the query cache. * * This is normally called from a background thread at a rate set by configurePurgeFrequency(). * * @throws IOException on IO errors//from ww w. j a v a2s.com */ public void purgeCache() throws IOException { queryIndex.purgeCache(new QueryIndex.CachePopulator() { @Override public void populateCacheWithIndex(final Map<BytesRef, QueryCacheEntry> newCache) throws IOException { queryIndex.scan(new QueryIndex.QueryCollector() { @Override public void matchQuery(String id, QueryCacheEntry query, QueryIndex.DataValues dataValues) throws IOException { if (query != null) newCache.put(BytesRef.deepCopyOf(query.hash), query); } }); } }); lastPurged = System.nanoTime(); afterPurge(); }
From source file:uk.co.flax.luwak.presearcher.FieldFilterPresearcherComponent.java
License:Apache License
private Query buildFilterClause(LeafReader reader) throws IOException { Terms terms = reader.fields().terms(field); if (terms == null) return null; BooleanQuery.Builder bq = new BooleanQuery.Builder(); int docsInBatch = reader.maxDoc(); BytesRef term;//from w w w. j ava 2 s . com TermsEnum te = terms.iterator(); while ((term = te.next()) != null) { // we need to check that every document in the batch has the same field values, otherwise // this filtering will not work if (te.docFreq() != docsInBatch) throw new IllegalArgumentException("Some documents in this batch do not have a term value of " + field + ":" + Term.toString(term)); bq.add(new TermQuery(new Term(field, BytesRef.deepCopyOf(term))), BooleanClause.Occur.SHOULD); } BooleanQuery built = bq.build(); if (built.clauses().size() == 0) return null; return built; }
From source file:uk.co.flax.luwak.presearcher.TermFilteredPresearcher.java
License:Apache License
@Override public final Query buildQuery(LeafReader reader, QueryTermFilter queryTermFilter) { try {//from w w w. j ava2s . co m DocumentQueryBuilder queryBuilder = getQueryBuilder(); for (String field : reader.fields()) { TokenStream ts = new TermsEnumTokenStream(reader.terms(field).iterator()); for (PresearcherComponent component : components) { ts = component.filterDocumentTokens(field, ts); } ts = new BytesRefFilteredTokenFilter(ts, queryTermFilter.getTerms(field)); TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class); while (ts.incrementToken()) { queryBuilder.addTerm(field, BytesRef.deepCopyOf(termAtt.getBytesRef())); } } Query presearcherQuery = queryBuilder.build(); BooleanQuery.Builder bq = new BooleanQuery.Builder(); bq.add(presearcherQuery, BooleanClause.Occur.SHOULD); bq.add(new TermQuery(new Term(ANYTOKEN_FIELD, ANYTOKEN)), BooleanClause.Occur.SHOULD); presearcherQuery = bq.build(); for (PresearcherComponent component : components) { presearcherQuery = component.adjustPresearcherQuery(reader, presearcherQuery); } return presearcherQuery; } catch (IOException e) { // We're a MemoryIndex, so this shouldn't happen... throw new RuntimeException(e); } }
From source file:uk.co.flax.luwak.util.SpanRewriter.java
License:Apache License
protected Query rewriteTermsQuery(TermsQuery query) { Map<String, List<SpanTermQuery>> spanQueries = new HashMap<>(); try {/*from w w w . j ava 2 s .com*/ Field termsField = TermsQuery.class.getDeclaredField("termData"); termsField.setAccessible(true); PrefixCodedTerms terms = (PrefixCodedTerms) termsField.get(query); PrefixCodedTerms.TermIterator it = terms.iterator(); for (int i = 0; i < terms.size(); i++) { BytesRef term = BytesRef.deepCopyOf(it.next()); if (spanQueries.containsKey(it.field()) == false) { spanQueries.put(it.field(), new ArrayList<SpanTermQuery>()); } spanQueries.get(it.field()).add(new SpanTermQuery(new Term(it.field(), term))); } BooleanQuery.Builder builder = new BooleanQuery.Builder(); for (Map.Entry<String, List<SpanTermQuery>> entry : spanQueries.entrySet()) { List<SpanTermQuery> termQueries = entry.getValue(); builder.add(new SpanOrQuery(termQueries.toArray(new SpanTermQuery[termQueries.size()])), BooleanClause.Occur.SHOULD); } return builder.build(); } catch (Exception e) { throw new IllegalStateException(e); } }
From source file:utils.HighFreqTerms.java
License:Apache License
public static void fillQueue(TermsEnum termsEnum, TermStatsQueue tiq, String field) throws Exception { BytesRef term;/* w w w. ja v a 2 s. c o m*/ while ((term = termsEnum.next()) != null) { // BytesRef r = new BytesRef(); // r.copyBytes(term); BytesRef r = BytesRef.deepCopyOf(term); tiq.insertWithOverflow(new TermStats(field, r, termsEnum.docFreq())); } }