List of usage examples for org.apache.lucene.index IndexReader getContext
public abstract IndexReaderContext getContext();
From source file:com.joliciel.jochre.search.highlight.LuceneQueryHighlighter.java
License:Open Source License
public LuceneQueryHighlighter(JochreQuery jochreQuery, IndexSearcher indexSearcher) { try {// www. j ava 2 s . c o m this.indexSearcher = indexSearcher; this.jochreQuery = jochreQuery; query = rewrite(jochreQuery.getLuceneQuery()); queryTerms = new TreeSet<Term>(); query.extractTerms(queryTerms); if (LOG.isTraceEnabled()) queryTermList = new ArrayList<Term>(queryTerms); final IndexReader reader = indexSearcher.getIndexReader(); // add 1 to doc count to ensure even terms in all docs get a very small weight docCountLog = Math.log(reader.numDocs() + 1); IndexReaderContext readerContext = reader.getContext(); leaves = readerContext.leaves(); // since the same terms might be contained in the query multiple times (e.g. once per field) // we only consider them once each by using a HashSet terms = new HashSet<BytesRef>(); Map<BytesRef, Integer> termFreqs = new HashMap<BytesRef, Integer>(); for (Term term : queryTerms) { terms.add(term.bytes()); termFreqs.put(term.bytes(), 0); } termLogs = new HashMap<BytesRef, Double>(); for (Term term : queryTerms) { int freq = termFreqs.get(term.bytes()); freq += reader.docFreq(term); termFreqs.put(term.bytes(), freq); } for (BytesRef term : terms) { int freq = termFreqs.get(term); termLogs.put(term, Math.log(freq)); } } catch (IOException e) { LogUtils.logError(LOG, e); throw new RuntimeException(e); } }
From source file:com.lucene.MyIndexSearcher.java
License:Apache License
public MyIndexSearcher(IndexReader r) { this(r.getContext()); }
From source file:com.o19s.solr.swan.highlight.SpanAwareFieldQuery.java
License:Apache License
void flatten(Query sourceQuery, IndexReader reader, Collection<Query> flatQueries) throws IOException { if (sourceQuery instanceof BooleanQuery) { BooleanQuery bq = (BooleanQuery) sourceQuery; for (BooleanClause clause : bq.getClauses()) { if (!clause.isProhibited()) flatten(clause.getQuery(), reader, flatQueries); }//from ww w . ja v a 2 s.c o m } else if (sourceQuery instanceof DisjunctionMaxQuery) { DisjunctionMaxQuery dmq = (DisjunctionMaxQuery) sourceQuery; for (Query query : dmq) { flatten(query, reader, flatQueries); } } else if (sourceQuery instanceof TermQuery) { if (!flatQueries.contains(sourceQuery)) flatQueries.add(sourceQuery); } else if (sourceQuery instanceof MultiTermQuery && reader != null) { MultiTermQuery copy = (MultiTermQuery) sourceQuery.clone(); copy.setRewriteMethod(new MultiTermQuery.TopTermsScoringBooleanQueryRewrite(MAX_MTQ_TERMS)); BooleanQuery mtqTerms = (BooleanQuery) copy.rewrite(reader); flatten(mtqTerms, reader, flatQueries); } else if (sourceQuery instanceof PhraseQuery) { if (!flatQueries.contains(sourceQuery)) { PhraseQuery pq = (PhraseQuery) sourceQuery; if (pq.getTerms().length > 1) flatQueries.add(pq); else if (pq.getTerms().length == 1) { flatQueries.add(new TermQuery(pq.getTerms()[0])); } } } else if (sourceQuery instanceof SpanQuery) { //TODO Note that the way we are doing phrases, they become SpanQueries - thus we loose //all of the corner case fixes for the phrases already in highlighing - the result will be //phrases that have different color highlights for each term Set<Term> terms = new LinkedHashSet<Term>(); List<AtomicReaderContext> readerContexts = reader.getContext().leaves(); int offset = 0; if (readerContexts.size() < 1) { return; } for (AtomicReaderContext arc : readerContexts) { if (sourceQuery instanceof SpanNotQuery) { SpanNotQuery query = (SpanNotQuery) sourceQuery; addSpansPositions(offset, query.getField(), query.getInclude().getSpans(arc, null, new HashMap<Term, TermContext>())); } else { SpanQuery query = (SpanQuery) sourceQuery; addSpansPositions(offset, query.getField(), query.getSpans(arc, null, new HashMap<Term, TermContext>())); } offset += arc.reader().maxDoc(); } //TODO it is necessary to call getSpans first so that if there is a MultiTerm query it get's rewritten by com.o19s.solr.swan.nodes.SwanTermNode.SwanSpanMultiTermQueryWrapper //no easy way around this sourceQuery.extractTerms(terms); for (Term t : terms) { flatQueries.add(new SpanTermQuery(t));//TODO need to check that this isn't already in the flatQueries (see example above) } } // else discard queries }
From source file:com.sindicetech.siren.search.node.NodeTermCollectingRewrite.java
License:Open Source License
final void collectTerms(final IndexReader reader, final MultiNodeTermQuery query, final TermCollector collector) throws IOException { final IndexReaderContext topReaderContext = reader.getContext(); Comparator<BytesRef> lastTermComp = null; for (final AtomicReaderContext context : topReaderContext.leaves()) { final Fields fields = context.reader().fields(); if (fields == null) { // reader has no fields continue; }/*from w w w. j a v a 2 s. c om*/ final Terms terms = fields.terms(query.field); if (terms == null) { // field does not exist continue; } final TermsEnum termsEnum = this.getTermsEnum(query, terms, collector.attributes); assert termsEnum != null; if (termsEnum == TermsEnum.EMPTY) continue; // Check comparator compatibility: final Comparator<BytesRef> newTermComp = termsEnum.getComparator(); if (lastTermComp != null && newTermComp != null && newTermComp != lastTermComp) throw new RuntimeException("term comparator should not change between segments: " + lastTermComp + " != " + newTermComp); lastTermComp = newTermComp; collector.setReaderContext(topReaderContext, context); collector.setNextEnum(termsEnum); BytesRef bytes; while ((bytes = termsEnum.next()) != null) { if (!collector.collect(bytes)) return; // interrupt whole term collection, so also don't iterate other subReaders } } }
From source file:com.sindicetech.siren.util.SirenTestCase.java
License:Open Source License
/** * Create a new searcher over the reader. This searcher might randomly use * threads./*from w ww. j a v a 2 s . c o m*/ * <p> * Override the original {@link LuceneTestCase#newSearcher(IndexReader)} * implementation in order to avoid getting {@link org.apache.lucene.search.AssertingIndexSearcher} * which is incompatible with SIREn. * <p> * TODO: Implement our own {@link AssertingIndexSearcher} and {@link org.apache.lucene.search.AssertingScorer} */ public static IndexSearcher newSearcher(final IndexReader r) { final Random random = random(); if (usually()) { // compared to the original implementation, we do not wrap to avoid // wrapping into an AssertingAtomicReader return random.nextBoolean() ? new IndexSearcher(r) : new IndexSearcher(r.getContext()); } else { int threads = 0; final ThreadPoolExecutor ex; if (random.nextBoolean()) { ex = null; } else { threads = TestUtil.nextInt(random, 1, 8); ex = new ThreadPoolExecutor(threads, threads, 0L, TimeUnit.MILLISECONDS, new LinkedBlockingQueue<Runnable>(), new NamedThreadFactory("LuceneTestCase")); } if (ex != null) { if (VERBOSE) { System.out.println("NOTE: newSearcher using ExecutorService with " + threads + " threads"); } r.addReaderClosedListener(new ReaderClosedListener() { @Override public void onClose(final IndexReader reader) { TestUtil.shutdownExecutorService(ex); } }); } final IndexSearcher ret = random.nextBoolean() ? new IndexSearcher(r, ex) : new IndexSearcher(r.getContext(), ex); return ret; } }
From source file:dk.dbc.opensearch.fedora.search.LuceneFieldIndex.java
License:Open Source License
/** * For queries that are beforehand known to retrieve all (active) documents * from the index, this method can bypass the performance penalty of an * actual search, and simply return all documents from an IndexReader. * @return all PIDs in index as IPidList object * @throws IOException if IndexWriter or IndexReader throws an exception */// w w w .ja va 2 s.co m IPidList getAll() throws IOException { IPidList results = null; searchManager.maybeRefreshBlocking(); IndexSearcher localSearcher = searchManager.acquire(); IndexReader localReader = localSearcher.getIndexReader(); try { PidCollector pidCollector = new PidCollector(pidCollectorMaxInMemory, pidCollectorTmpDir); for (AtomicReaderContext context : localReader.getContext().leaves()) { AtomicReader subReader = context.reader(); pidCollector.setNextReader(context); Bits liveDocs = subReader.getLiveDocs(); int numDocs = subReader.numDocs(); int numDelDocs = subReader.numDeletedDocs(); log.debug("getAll, reader has {} documents, {} deleted documents", numDocs, numDelDocs); for (int i = 0; i < numDocs + numDelDocs; i++) { if (liveDocs != null && !liveDocs.get(i)) { // Skip deleted documents log.trace("Skipping deleted document {}", i); continue; } log.trace("Getting doc id {}", i); pidCollector.collect(i); } } results = pidCollector.getResults(); } finally { searchManager.release(localSearcher); } return results; }
From source file:dk.statsbiblioteket.netark.dvenabler.DVReaderTest.java
License:Apache License
private static List<String> getSortedSetDocValues(IndexReader reader, int docID, String field) throws IOException { if (!reader.getContext().isTopLevel) { throw new IllegalStateException("Expected the reader to be topLevel"); }//from w w w . ja v a 2 s. c o m for (AtomicReaderContext atom : reader.getContext().leaves()) { if (atom.docBase <= docID && atom.docBase + atom.reader().maxDoc() > docID) { return getSortedSetDocValues(atom, docID, field); } } throw new IllegalArgumentException("The docID " + docID + " exceeded the index size"); }
From source file:dk.statsbiblioteket.netark.dvenabler.DVReaderTest.java
License:Apache License
private static String getSortedDocValue(IndexReader reader, int docID, String field) throws IOException { if (!reader.getContext().isTopLevel) { throw new IllegalStateException("Expected the reader to be topLevel"); }/*w w w . j a v a2 s .c o m*/ for (AtomicReaderContext atom : reader.getContext().leaves()) { if (atom.docBase <= docID && atom.docBase + atom.reader().maxDoc() > docID) { return getSortedDocValue(atom, docID, field); } } throw new IllegalArgumentException("The docID " + docID + " exceeded the index size"); }
From source file:dk.statsbiblioteket.netark.dvenabler.DVReaderTest.java
License:Apache License
private double getDoubleDocValue(IndexReader reader, int docID, String field) throws IOException { if (!reader.getContext().isTopLevel) { throw new IllegalStateException("Expected the reader to be topLevel"); }//from ww w. j a va 2s. c o m for (AtomicReaderContext atom : reader.getContext().leaves()) { if (atom.docBase <= docID && atom.docBase + atom.reader().maxDoc() > docID) { return getDoubleDocValue(atom, docID, field); } } throw new IllegalArgumentException("The docID " + docID + " exceeded the index size"); }
From source file:dk.statsbiblioteket.netark.dvenabler.DVReaderTest.java
License:Apache License
private static long getLongDocValue(IndexReader reader, int docID, String field) throws IOException { if (!reader.getContext().isTopLevel) { throw new IllegalStateException("Expected the reader to be topLevel"); }/*from w w w .ja v a2s . c o m*/ for (AtomicReaderContext atom : reader.getContext().leaves()) { if (atom.docBase <= docID && atom.docBase + atom.reader().maxDoc() > docID) { return getLongDocValue(atom, docID, field); } } throw new IllegalArgumentException("The docID " + docID + " exceeded the index size"); }