Example usage for org.apache.lucene.index IndexReader getContext

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexReader getContext.

Prototype

public abstract IndexReaderContext getContext();

Source Link

Document

Expert: Returns the root IndexReaderContext for this IndexReader 's sub-reader tree.

Usage

From source file:com.joliciel.jochre.search.highlight.LuceneQueryHighlighter.java

License:Open Source License

public LuceneQueryHighlighter(JochreQuery jochreQuery, IndexSearcher indexSearcher) {
    try {// www. j  ava 2  s  .  c o m
        this.indexSearcher = indexSearcher;
        this.jochreQuery = jochreQuery;
        query = rewrite(jochreQuery.getLuceneQuery());
        queryTerms = new TreeSet<Term>();
        query.extractTerms(queryTerms);
        if (LOG.isTraceEnabled())
            queryTermList = new ArrayList<Term>(queryTerms);

        final IndexReader reader = indexSearcher.getIndexReader();
        // add 1 to doc count to ensure even terms in all docs get a very small weight
        docCountLog = Math.log(reader.numDocs() + 1);

        IndexReaderContext readerContext = reader.getContext();
        leaves = readerContext.leaves();

        // since the same terms might be contained in the query multiple times (e.g. once per field)
        // we only consider them once each by using a HashSet
        terms = new HashSet<BytesRef>();
        Map<BytesRef, Integer> termFreqs = new HashMap<BytesRef, Integer>();
        for (Term term : queryTerms) {
            terms.add(term.bytes());
            termFreqs.put(term.bytes(), 0);
        }

        termLogs = new HashMap<BytesRef, Double>();
        for (Term term : queryTerms) {
            int freq = termFreqs.get(term.bytes());
            freq += reader.docFreq(term);
            termFreqs.put(term.bytes(), freq);
        }
        for (BytesRef term : terms) {
            int freq = termFreqs.get(term);
            termLogs.put(term, Math.log(freq));
        }
    } catch (IOException e) {
        LogUtils.logError(LOG, e);
        throw new RuntimeException(e);
    }
}

From source file:com.lucene.MyIndexSearcher.java

License:Apache License

public MyIndexSearcher(IndexReader r) {
    this(r.getContext());
}

From source file:com.o19s.solr.swan.highlight.SpanAwareFieldQuery.java

License:Apache License

void flatten(Query sourceQuery, IndexReader reader, Collection<Query> flatQueries) throws IOException {
    if (sourceQuery instanceof BooleanQuery) {
        BooleanQuery bq = (BooleanQuery) sourceQuery;
        for (BooleanClause clause : bq.getClauses()) {
            if (!clause.isProhibited())
                flatten(clause.getQuery(), reader, flatQueries);
        }//from ww  w . ja v a 2  s.c  o m
    } else if (sourceQuery instanceof DisjunctionMaxQuery) {
        DisjunctionMaxQuery dmq = (DisjunctionMaxQuery) sourceQuery;
        for (Query query : dmq) {
            flatten(query, reader, flatQueries);
        }
    } else if (sourceQuery instanceof TermQuery) {
        if (!flatQueries.contains(sourceQuery))
            flatQueries.add(sourceQuery);
    } else if (sourceQuery instanceof MultiTermQuery && reader != null) {
        MultiTermQuery copy = (MultiTermQuery) sourceQuery.clone();
        copy.setRewriteMethod(new MultiTermQuery.TopTermsScoringBooleanQueryRewrite(MAX_MTQ_TERMS));
        BooleanQuery mtqTerms = (BooleanQuery) copy.rewrite(reader);
        flatten(mtqTerms, reader, flatQueries);
    } else if (sourceQuery instanceof PhraseQuery) {
        if (!flatQueries.contains(sourceQuery)) {
            PhraseQuery pq = (PhraseQuery) sourceQuery;
            if (pq.getTerms().length > 1)
                flatQueries.add(pq);
            else if (pq.getTerms().length == 1) {
                flatQueries.add(new TermQuery(pq.getTerms()[0]));
            }
        }
    } else if (sourceQuery instanceof SpanQuery) {
        //TODO Note that the way we are doing phrases, they become SpanQueries - thus we loose
        //all of the corner case fixes for the phrases already in highlighing - the result will be
        //phrases that have different color highlights for each term
        Set<Term> terms = new LinkedHashSet<Term>();
        List<AtomicReaderContext> readerContexts = reader.getContext().leaves();

        int offset = 0;

        if (readerContexts.size() < 1) {
            return;
        }
        for (AtomicReaderContext arc : readerContexts) {
            if (sourceQuery instanceof SpanNotQuery) {
                SpanNotQuery query = (SpanNotQuery) sourceQuery;
                addSpansPositions(offset, query.getField(),
                        query.getInclude().getSpans(arc, null, new HashMap<Term, TermContext>()));
            } else {
                SpanQuery query = (SpanQuery) sourceQuery;
                addSpansPositions(offset, query.getField(),
                        query.getSpans(arc, null, new HashMap<Term, TermContext>()));
            }
            offset += arc.reader().maxDoc();
        }

        //TODO it is necessary to call getSpans first so that if there is a MultiTerm query it get's rewritten by com.o19s.solr.swan.nodes.SwanTermNode.SwanSpanMultiTermQueryWrapper
        //no easy way around this
        sourceQuery.extractTerms(terms);
        for (Term t : terms) {
            flatQueries.add(new SpanTermQuery(t));//TODO need to check that this isn't already in the flatQueries (see example above)
        }

    }
    // else discard queries
}

From source file:com.sindicetech.siren.search.node.NodeTermCollectingRewrite.java

License:Open Source License

final void collectTerms(final IndexReader reader, final MultiNodeTermQuery query, final TermCollector collector)
        throws IOException {
    final IndexReaderContext topReaderContext = reader.getContext();
    Comparator<BytesRef> lastTermComp = null;
    for (final AtomicReaderContext context : topReaderContext.leaves()) {
        final Fields fields = context.reader().fields();
        if (fields == null) {
            // reader has no fields
            continue;
        }/*from   w w  w. j a v a 2 s.  c om*/

        final Terms terms = fields.terms(query.field);
        if (terms == null) {
            // field does not exist
            continue;
        }

        final TermsEnum termsEnum = this.getTermsEnum(query, terms, collector.attributes);
        assert termsEnum != null;

        if (termsEnum == TermsEnum.EMPTY)
            continue;

        // Check comparator compatibility:
        final Comparator<BytesRef> newTermComp = termsEnum.getComparator();
        if (lastTermComp != null && newTermComp != null && newTermComp != lastTermComp)
            throw new RuntimeException("term comparator should not change between segments: " + lastTermComp
                    + " != " + newTermComp);
        lastTermComp = newTermComp;
        collector.setReaderContext(topReaderContext, context);
        collector.setNextEnum(termsEnum);
        BytesRef bytes;
        while ((bytes = termsEnum.next()) != null) {
            if (!collector.collect(bytes))
                return; // interrupt whole term collection, so also don't iterate other subReaders
        }
    }
}

From source file:com.sindicetech.siren.util.SirenTestCase.java

License:Open Source License

/**
 * Create a new searcher over the reader. This searcher might randomly use
 * threads./*from w ww. j a  v  a 2 s  . c o m*/
 * <p>
 * Override the original {@link LuceneTestCase#newSearcher(IndexReader)}
 * implementation in order to avoid getting {@link org.apache.lucene.search.AssertingIndexSearcher}
 * which is incompatible with SIREn.
 * <p>
 * TODO: Implement our own {@link AssertingIndexSearcher} and {@link org.apache.lucene.search.AssertingScorer}
 */
public static IndexSearcher newSearcher(final IndexReader r) {
    final Random random = random();
    if (usually()) {
        // compared to the original implementation, we do not wrap to avoid
        // wrapping into an AssertingAtomicReader
        return random.nextBoolean() ? new IndexSearcher(r) : new IndexSearcher(r.getContext());
    } else {
        int threads = 0;
        final ThreadPoolExecutor ex;
        if (random.nextBoolean()) {
            ex = null;
        } else {
            threads = TestUtil.nextInt(random, 1, 8);
            ex = new ThreadPoolExecutor(threads, threads, 0L, TimeUnit.MILLISECONDS,
                    new LinkedBlockingQueue<Runnable>(), new NamedThreadFactory("LuceneTestCase"));
        }
        if (ex != null) {
            if (VERBOSE) {
                System.out.println("NOTE: newSearcher using ExecutorService with " + threads + " threads");
            }
            r.addReaderClosedListener(new ReaderClosedListener() {
                @Override
                public void onClose(final IndexReader reader) {
                    TestUtil.shutdownExecutorService(ex);
                }
            });
        }
        final IndexSearcher ret = random.nextBoolean() ? new IndexSearcher(r, ex)
                : new IndexSearcher(r.getContext(), ex);
        return ret;
    }
}

From source file:dk.dbc.opensearch.fedora.search.LuceneFieldIndex.java

License:Open Source License

/**
 * For queries that are beforehand known to retrieve all (active) documents
 * from the index, this method can bypass the performance penalty of an
 * actual search, and simply return all documents from an IndexReader.
 * @return all PIDs in index as IPidList object
 * @throws IOException if IndexWriter or IndexReader throws an exception
 *///  w w  w  .ja va  2  s.co m
IPidList getAll() throws IOException {
    IPidList results = null;

    searchManager.maybeRefreshBlocking();
    IndexSearcher localSearcher = searchManager.acquire();
    IndexReader localReader = localSearcher.getIndexReader();

    try {
        PidCollector pidCollector = new PidCollector(pidCollectorMaxInMemory, pidCollectorTmpDir);

        for (AtomicReaderContext context : localReader.getContext().leaves()) {
            AtomicReader subReader = context.reader();
            pidCollector.setNextReader(context);
            Bits liveDocs = subReader.getLiveDocs();

            int numDocs = subReader.numDocs();
            int numDelDocs = subReader.numDeletedDocs();
            log.debug("getAll, reader has {} documents, {} deleted documents", numDocs, numDelDocs);
            for (int i = 0; i < numDocs + numDelDocs; i++) {
                if (liveDocs != null && !liveDocs.get(i)) {
                    // Skip deleted documents
                    log.trace("Skipping deleted document {}", i);
                    continue;
                }
                log.trace("Getting doc id {}", i);
                pidCollector.collect(i);
            }
        }
        results = pidCollector.getResults();
    } finally {
        searchManager.release(localSearcher);
    }
    return results;
}

From source file:dk.statsbiblioteket.netark.dvenabler.DVReaderTest.java

License:Apache License

private static List<String> getSortedSetDocValues(IndexReader reader, int docID, String field)
        throws IOException {
    if (!reader.getContext().isTopLevel) {
        throw new IllegalStateException("Expected the reader to be topLevel");
    }//from   w  w w . ja  v  a  2 s.  c o m
    for (AtomicReaderContext atom : reader.getContext().leaves()) {
        if (atom.docBase <= docID && atom.docBase + atom.reader().maxDoc() > docID) {
            return getSortedSetDocValues(atom, docID, field);
        }
    }
    throw new IllegalArgumentException("The docID " + docID + " exceeded the index size");
}

From source file:dk.statsbiblioteket.netark.dvenabler.DVReaderTest.java

License:Apache License

private static String getSortedDocValue(IndexReader reader, int docID, String field) throws IOException {
    if (!reader.getContext().isTopLevel) {
        throw new IllegalStateException("Expected the reader to be topLevel");
    }/*w w  w  . j a  v a2  s  .c o  m*/
    for (AtomicReaderContext atom : reader.getContext().leaves()) {
        if (atom.docBase <= docID && atom.docBase + atom.reader().maxDoc() > docID) {
            return getSortedDocValue(atom, docID, field);
        }
    }
    throw new IllegalArgumentException("The docID " + docID + " exceeded the index size");
}

From source file:dk.statsbiblioteket.netark.dvenabler.DVReaderTest.java

License:Apache License

private double getDoubleDocValue(IndexReader reader, int docID, String field) throws IOException {
    if (!reader.getContext().isTopLevel) {
        throw new IllegalStateException("Expected the reader to be topLevel");
    }//from  ww  w. j  a va 2s.  c  o m
    for (AtomicReaderContext atom : reader.getContext().leaves()) {
        if (atom.docBase <= docID && atom.docBase + atom.reader().maxDoc() > docID) {
            return getDoubleDocValue(atom, docID, field);
        }
    }
    throw new IllegalArgumentException("The docID " + docID + " exceeded the index size");
}

From source file:dk.statsbiblioteket.netark.dvenabler.DVReaderTest.java

License:Apache License

private static long getLongDocValue(IndexReader reader, int docID, String field) throws IOException {
    if (!reader.getContext().isTopLevel) {
        throw new IllegalStateException("Expected the reader to be topLevel");
    }/*from  w w  w  .ja v  a2s  . c  o m*/
    for (AtomicReaderContext atom : reader.getContext().leaves()) {
        if (atom.docBase <= docID && atom.docBase + atom.reader().maxDoc() > docID) {
            return getLongDocValue(atom, docID, field);
        }
    }
    throw new IllegalArgumentException("The docID " + docID + " exceeded the index size");
}