Example usage for org.apache.lucene.index IndexReader getContext

List of usage examples for org.apache.lucene.index IndexReader getContext

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexReader getContext.

Prototype

public abstract IndexReaderContext getContext();

Source Link

Document

Expert: Returns the root IndexReaderContext for this IndexReader 's sub-reader tree.

Usage

From source file:com.joliciel.jochre.search.highlight.LuceneQueryHighlighter.java

License:Open Source License

public LuceneQueryHighlighter(JochreQuery jochreQuery, IndexSearcher indexSearcher) {
    try {// www. j  ava 2  s  .  c o m
        this.indexSearcher = indexSearcher;
        this.jochreQuery = jochreQuery;
        query = rewrite(jochreQuery.getLuceneQuery());
        queryTerms = new TreeSet<Term>();
        query.extractTerms(queryTerms);
        if (LOG.isTraceEnabled())
            queryTermList = new ArrayList<Term>(queryTerms);

        final IndexReader reader = indexSearcher.getIndexReader();
        // add 1 to doc count to ensure even terms in all docs get a very small weight
        docCountLog = Math.log(reader.numDocs() + 1);

        IndexReaderContext readerContext = reader.getContext();
        leaves = readerContext.leaves();

        // since the same terms might be contained in the query multiple times (e.g. once per field)
        // we only consider them once each by using a HashSet
        terms = new HashSet<BytesRef>();
        Map<BytesRef, Integer> termFreqs = new HashMap<BytesRef, Integer>();
        for (Term term : queryTerms) {
            terms.add(term.bytes());
            termFreqs.put(term.bytes(), 0);
        }

        termLogs = new HashMap<BytesRef, Double>();
        for (Term term : queryTerms) {
            int freq = termFreqs.get(term.bytes());
            freq += reader.docFreq(term);
            termFreqs.put(term.bytes(), freq);
        }
        for (BytesRef term : terms) {
            int freq = termFreqs.get(term);
            termLogs.put(term, Math.log(freq));
        }
    } catch (IOException e) {
        LogUtils.logError(LOG, e);
        throw new RuntimeException(e);
    }
}

From source file:com.lucene.MyIndexSearcher.java

License:Apache License

public MyIndexSearcher(IndexReader r) {
    this(r.getContext());
}

From source file:com.o19s.solr.swan.highlight.SpanAwareFieldQuery.java

License:Apache License

void flatten(Query sourceQuery, IndexReader reader, Collection<Query> flatQueries) throws IOException {
    if (sourceQuery instanceof BooleanQuery) {
        BooleanQuery bq = (BooleanQuery) sourceQuery;
        for (BooleanClause clause : bq.getClauses()) {
            if (!clause.isProhibited())
                flatten(clause.getQuery(), reader, flatQueries);
        }//from ww  w . ja v a 2  s.c  o m
    } else if (sourceQuery instanceof DisjunctionMaxQuery) {
        DisjunctionMaxQuery dmq = (DisjunctionMaxQuery) sourceQuery;
        for (Query query : dmq) {
            flatten(query, reader, flatQueries);
        }
    } else if (sourceQuery instanceof TermQuery) {
        if (!flatQueries.contains(sourceQuery))
            flatQueries.add(sourceQuery);
    } else if (sourceQuery instanceof MultiTermQuery && reader != null) {
        MultiTermQuery copy = (MultiTermQuery) sourceQuery.clone();
        copy.setRewriteMethod(new MultiTermQuery.TopTermsScoringBooleanQueryRewrite(MAX_MTQ_TERMS));
        BooleanQuery mtqTerms = (BooleanQuery) copy.rewrite(reader);
        flatten(mtqTerms, reader, flatQueries);
    } else if (sourceQuery instanceof PhraseQuery) {
        if (!flatQueries.contains(sourceQuery)) {
            PhraseQuery pq = (PhraseQuery) sourceQuery;
            if (pq.getTerms().length > 1)
                flatQueries.add(pq);
            else if (pq.getTerms().length == 1) {
                flatQueries.add(new TermQuery(pq.getTerms()[0]));
            }
        }
    } else if (sourceQuery instanceof SpanQuery) {
        //TODO Note that the way we are doing phrases, they become SpanQueries - thus we loose
        //all of the corner case fixes for the phrases already in highlighing - the result will be
        //phrases that have different color highlights for each term
        Set<Term> terms = new LinkedHashSet<Term>();
        List<AtomicReaderContext> readerContexts = reader.getContext().leaves();

        int offset = 0;

        if (readerContexts.size() < 1) {
            return;
        }
        for (AtomicReaderContext arc : readerContexts) {
            if (sourceQuery instanceof SpanNotQuery) {
                SpanNotQuery query = (SpanNotQuery) sourceQuery;
                addSpansPositions(offset, query.getField(),
                        query.getInclude().getSpans(arc, null, new HashMap<Term, TermContext>()));
            } else {
                SpanQuery query = (SpanQuery) sourceQuery;
                addSpansPositions(offset, query.getField(),
                        query.getSpans(arc, null, new HashMap<Term, TermContext>()));
            }
            offset += arc.reader().maxDoc();
        }

        //TODO it is necessary to call getSpans first so that if there is a MultiTerm query it get's rewritten by com.o19s.solr.swan.nodes.SwanTermNode.SwanSpanMultiTermQueryWrapper
        //no easy way around this
        sourceQuery.extractTerms(terms);
        for (Term t : terms) {
            flatQueries.add(new SpanTermQuery(t));//TODO need to check that this isn't already in the flatQueries (see example above)
        }

    }
    // else discard queries
}

From source file:com.sindicetech.siren.search.node.NodeTermCollectingRewrite.java

License:Open Source License

final void collectTerms(final IndexReader reader, final MultiNodeTermQuery query, final TermCollector collector)
        throws IOException {
    final IndexReaderContext topReaderContext = reader.getContext();
    Comparator<BytesRef> lastTermComp = null;
    for (final AtomicReaderContext context : topReaderContext.leaves()) {
        final Fields fields = context.reader().fields();
        if (fields == null) {
            // reader has no fields
            continue;
        }/*from   w w  w. j a v a 2 s.  c om*/

        final Terms terms = fields.terms(query.field);
        if (terms == null) {
            // field does not exist
            continue;
        }

        final TermsEnum termsEnum = this.getTermsEnum(query, terms, collector.attributes);
        assert termsEnum != null;

        if (termsEnum == TermsEnum.EMPTY)
            continue;

        // Check comparator compatibility:
        final Comparator<BytesRef> newTermComp = termsEnum.getComparator();
        if (lastTermComp != null && newTermComp != null && newTermComp != lastTermComp)
            throw new RuntimeException("term comparator should not change between segments: " + lastTermComp
                    + " != " + newTermComp);
        lastTermComp = newTermComp;
        collector.setReaderContext(topReaderContext, context);
        collector.setNextEnum(termsEnum);
        BytesRef bytes;
        while ((bytes = termsEnum.next()) != null) {
            if (!collector.collect(bytes))
                return; // interrupt whole term collection, so also don't iterate other subReaders
        }
    }
}

From source file:com.sindicetech.siren.util.SirenTestCase.java

License:Open Source License

/**
 * Create a new searcher over the reader. This searcher might randomly use
 * threads./*from w ww. j a  v  a 2 s  . c o m*/
 * <p>
 * Override the original {@link LuceneTestCase#newSearcher(IndexReader)}
 * implementation in order to avoid getting {@link org.apache.lucene.search.AssertingIndexSearcher}
 * which is incompatible with SIREn.
 * <p>
 * TODO: Implement our own {@link AssertingIndexSearcher} and {@link org.apache.lucene.search.AssertingScorer}
 */
public static IndexSearcher newSearcher(final IndexReader r) {
    final Random random = random();
    if (usually()) {
        // compared to the original implementation, we do not wrap to avoid
        // wrapping into an AssertingAtomicReader
        return random.nextBoolean() ? new IndexSearcher(r) : new IndexSearcher(r.getContext());
    } else {
        int threads = 0;
        final ThreadPoolExecutor ex;
        if (random.nextBoolean()) {
            ex = null;
        } else {
            threads = TestUtil.nextInt(random, 1, 8);
            ex = new ThreadPoolExecutor(threads, threads, 0L, TimeUnit.MILLISECONDS,
                    new LinkedBlockingQueue<Runnable>(), new NamedThreadFactory("LuceneTestCase"));
        }
        if (ex != null) {
            if (VERBOSE) {
                System.out.println("NOTE: newSearcher using ExecutorService with " + threads + " threads");
            }
            r.addReaderClosedListener(new ReaderClosedListener() {
                @Override
                public void onClose(final IndexReader reader) {
                    TestUtil.shutdownExecutorService(ex);
                }
            });
        }
        final IndexSearcher ret = random.nextBoolean() ? new IndexSearcher(r, ex)
                : new IndexSearcher(r.getContext(), ex);
        return ret;
    }
}

From source file:dk.dbc.opensearch.fedora.search.LuceneFieldIndex.java

License:Open Source License

/**
 * For queries that are beforehand known to retrieve all (active) documents
 * from the index, this method can bypass the performance penalty of an
 * actual search, and simply return all documents from an IndexReader.
 * @return all PIDs in index as IPidList object
 * @throws IOException if IndexWriter or IndexReader throws an exception
 *///  w w  w  .ja va  2  s.co m
IPidList getAll() throws IOException {
    IPidList results = null;

    searchManager.maybeRefreshBlocking();
    IndexSearcher localSearcher = searchManager.acquire();
    IndexReader localReader = localSearcher.getIndexReader();

    try {
        PidCollector pidCollector = new PidCollector(pidCollectorMaxInMemory, pidCollectorTmpDir);

        for (AtomicReaderContext context : localReader.getContext().leaves()) {
            AtomicReader subReader = context.reader();
            pidCollector.setNextReader(context);
            Bits liveDocs = subReader.getLiveDocs();

            int numDocs = subReader.numDocs();
            int numDelDocs = subReader.numDeletedDocs();
            log.debug("getAll, reader has {} documents, {} deleted documents", numDocs, numDelDocs);
            for (int i = 0; i < numDocs + numDelDocs; i++) {
                if (liveDocs != null && !liveDocs.get(i)) {
                    // Skip deleted documents
                    log.trace("Skipping deleted document {}", i);
                    continue;
                }
                log.trace("Getting doc id {}", i);
                pidCollector.collect(i);
            }
        }
        results = pidCollector.getResults();
    } finally {
        searchManager.release(localSearcher);
    }
    return results;
}

From source file:dk.statsbiblioteket.netark.dvenabler.DVReaderTest.java

License:Apache License

private static List<String> getSortedSetDocValues(IndexReader reader, int docID, String field)
        throws IOException {
    if (!reader.getContext().isTopLevel) {
        throw new IllegalStateException("Expected the reader to be topLevel");
    }//from   w  w w . ja  v  a  2 s.  c o m
    for (AtomicReaderContext atom : reader.getContext().leaves()) {
        if (atom.docBase <= docID && atom.docBase + atom.reader().maxDoc() > docID) {
            return getSortedSetDocValues(atom, docID, field);
        }
    }
    throw new IllegalArgumentException("The docID " + docID + " exceeded the index size");
}

From source file:dk.statsbiblioteket.netark.dvenabler.DVReaderTest.java

License:Apache License

private static String getSortedDocValue(IndexReader reader, int docID, String field) throws IOException {
    if (!reader.getContext().isTopLevel) {
        throw new IllegalStateException("Expected the reader to be topLevel");
    }/*w w  w  . j a  v a2  s  .c o  m*/
    for (AtomicReaderContext atom : reader.getContext().leaves()) {
        if (atom.docBase <= docID && atom.docBase + atom.reader().maxDoc() > docID) {
            return getSortedDocValue(atom, docID, field);
        }
    }
    throw new IllegalArgumentException("The docID " + docID + " exceeded the index size");
}

From source file:dk.statsbiblioteket.netark.dvenabler.DVReaderTest.java

License:Apache License

private double getDoubleDocValue(IndexReader reader, int docID, String field) throws IOException {
    if (!reader.getContext().isTopLevel) {
        throw new IllegalStateException("Expected the reader to be topLevel");
    }//from  ww  w. j  a va 2s.  c  o m
    for (AtomicReaderContext atom : reader.getContext().leaves()) {
        if (atom.docBase <= docID && atom.docBase + atom.reader().maxDoc() > docID) {
            return getDoubleDocValue(atom, docID, field);
        }
    }
    throw new IllegalArgumentException("The docID " + docID + " exceeded the index size");
}

From source file:dk.statsbiblioteket.netark.dvenabler.DVReaderTest.java

License:Apache License

private static long getLongDocValue(IndexReader reader, int docID, String field) throws IOException {
    if (!reader.getContext().isTopLevel) {
        throw new IllegalStateException("Expected the reader to be topLevel");
    }/*from  w w  w  .ja v  a2s  . c  o m*/
    for (AtomicReaderContext atom : reader.getContext().leaves()) {
        if (atom.docBase <= docID && atom.docBase + atom.reader().maxDoc() > docID) {
            return getLongDocValue(atom, docID, field);
        }
    }
    throw new IllegalArgumentException("The docID " + docID + " exceeded the index size");
}