List of usage examples for org.apache.lucene.search IndexSearcher getTopReaderContext
public IndexReaderContext getTopReaderContext()
From source file:com.elasticsearch.custom.query.keyword.KeywordTermQuery.java
License:Apache License
@Override public Weight createWeight(IndexSearcher searcher) throws IOException { final IndexReaderContext context = searcher.getTopReaderContext(); final TermContext termState; if (perReaderTermState == null || perReaderTermState.topReaderContext != context) { // make TermQuery single-pass if we don't have a PRTS or if the context differs! termState = TermContext.build(context, term, true); // cache term lookups! } else {// w w w . j ava 2s. com // PRTS was pre-build for this IS termState = this.perReaderTermState; } // we must not ignore the given docFreq - if set use the given value (lie) if (docFreq != -1) termState.setDocFreq(docFreq); return new TermWeight(searcher, termState); }
From source file:com.o19s.es.explore.ExplorerQuery.java
License:Apache License
@Override public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException { if (!needsScores) { return searcher.createWeight(query, false, boost); }//from ww w . ja va 2 s . c o m final Weight subWeight = searcher.createWeight(query, true, boost); Set<Term> terms = new HashSet<>(); subWeight.extractTerms(terms); if (isCollectionScoped()) { ClassicSimilarity sim = new ClassicSimilarity(); StatisticsHelper df_stats = new StatisticsHelper(); StatisticsHelper idf_stats = new StatisticsHelper(); StatisticsHelper ttf_stats = new StatisticsHelper(); for (Term term : terms) { TermContext ctx = TermContext.build(searcher.getTopReaderContext(), term); TermStatistics tStats = searcher.termStatistics(term, ctx); df_stats.add(tStats.docFreq()); idf_stats.add(sim.idf(tStats.docFreq(), searcher.getIndexReader().numDocs())); ttf_stats.add(tStats.totalTermFreq()); } /* If no terms are parsed in the query we opt for returning 0 instead of throwing an exception that could break various pipelines. */ float constantScore; if (terms.size() > 0) { switch (type) { case ("sum_classic_idf"): constantScore = idf_stats.getSum(); break; case ("mean_classic_idf"): constantScore = idf_stats.getMean(); break; case ("max_classic_idf"): constantScore = idf_stats.getMax(); break; case ("min_classic_idf"): constantScore = idf_stats.getMin(); break; case ("stddev_classic_idf"): constantScore = idf_stats.getStdDev(); break; case "sum_raw_df": constantScore = df_stats.getSum(); break; case "min_raw_df": constantScore = df_stats.getMin(); break; case "max_raw_df": constantScore = df_stats.getMax(); break; case "mean_raw_df": constantScore = df_stats.getMean(); break; case "stddev_raw_df": constantScore = df_stats.getStdDev(); break; case "sum_raw_ttf": constantScore = ttf_stats.getSum(); break; case "min_raw_ttf": constantScore = ttf_stats.getMin(); break; case "max_raw_ttf": constantScore = ttf_stats.getMax(); break; case "mean_raw_ttf": constantScore = ttf_stats.getMean(); break; case "stddev_raw_ttf": constantScore = ttf_stats.getStdDev(); break; case "unique_terms_count": constantScore = terms.size(); break; default: throw new RuntimeException("Invalid stat type specified."); } } else { constantScore = 0.0f; } return new ConstantScoreWeight(ExplorerQuery.this, constantScore) { @Override public Explanation explain(LeafReaderContext context, int doc) throws IOException { Scorer scorer = scorer(context); int newDoc = scorer.iterator().advance(doc); assert newDoc == doc; // this is a DocIdSetIterator.all return Explanation.match(scorer.score(), "Stat Score: " + type); } @Override public Scorer scorer(LeafReaderContext context) throws IOException { return new ConstantScoreScorer(this, constantScore, DocIdSetIterator.all(context.reader().maxDoc())); } @Override public boolean isCacheable(LeafReaderContext ctx) { return true; } }; } else if (type.endsWith("_raw_tf")) { // Rewrite this into a boolean query where we can inject our PostingsExplorerQuery BooleanQuery.Builder qb = new BooleanQuery.Builder(); for (Term t : terms) { qb.add(new BooleanClause(new PostingsExplorerQuery(t, PostingsExplorerQuery.Type.TF), BooleanClause.Occur.SHOULD)); } // FIXME: completely refactor this class and stop accepting a random query but a list of terms directly // rewriting at this point is wrong, additionally we certainly build the TermContext twice for every terms // problem is that we rely on extractTerms which happen too late in the process Query q = qb.build().rewrite(searcher.getIndexReader()); return new ExplorerQuery.ExplorerWeight(this, searcher.createWeight(q, true, boost), type); } throw new IllegalArgumentException("Unknown ExplorerQuery type [" + type + "]"); }
From source file:com.o19s.es.explore.PostingsExplorerQuery.java
License:Apache License
@Override public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException { IndexReaderContext context = searcher.getTopReaderContext(); assert needsScores : "Should not be used in filtering mode"; return new PostingsExplorerWeight(this, this.term, TermContext.build(context, this.term), this.type); }
From source file:com.o19s.es.ltr.logging.LoggingFetchSubPhase.java
License:Apache License
void doLog(Query query, List<HitLogConsumer> loggers, IndexSearcher searcher, SearchHit[] hits) throws IOException { // Reorder hits by id so we can scan all the docs belonging to the same // segment by reusing the same scorer. SearchHit[] reordered = new SearchHit[hits.length]; System.arraycopy(hits, 0, reordered, 0, hits.length); Arrays.sort(reordered, Comparator.comparingInt(SearchHit::docId)); int hitUpto = 0; int readerUpto = -1; int endDoc = 0; int docBase = 0; Scorer scorer = null;/*from w w w . j a v a 2s .c o m*/ Weight weight = searcher.createNormalizedWeight(query, true); // Loop logic borrowed from lucene QueryRescorer while (hitUpto < reordered.length) { SearchHit hit = reordered[hitUpto]; int docID = hit.docId(); loggers.forEach((l) -> l.nextDoc(hit)); LeafReaderContext readerContext = null; while (docID >= endDoc) { readerUpto++; readerContext = searcher.getTopReaderContext().leaves().get(readerUpto); endDoc = readerContext.docBase + readerContext.reader().maxDoc(); } if (readerContext != null) { // We advanced to another segment: docBase = readerContext.docBase; scorer = weight.scorer(readerContext); } if (scorer != null) { int targetDoc = docID - docBase; int actualDoc = scorer.docID(); if (actualDoc < targetDoc) { actualDoc = scorer.iterator().advance(targetDoc); } if (actualDoc == targetDoc) { // Scoring will trigger log collection scorer.score(); } } hitUpto++; } }
From source file:com.shaie.annots.MultiPositionSpanTermQuery.java
License:Apache License
@Override public SpanWeight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException { final IndexReaderContext topContext = searcher.getTopReaderContext(); final TermContext context; if (termContext == null || termContext.wasBuiltFor(topContext) == false) { context = TermContext.build(topContext, term); } else {/*from w w w. j a v a 2 s . co m*/ context = termContext; } final Map<Term, TermContext> terms = needsScores ? Collections.singletonMap(term, context) : null; return new SpanTermWeight(context, searcher, terms, boost) { @Override public Spans getSpans(LeafReaderContext context, Postings requiredPostings) throws IOException { final TermSpans spans = (TermSpans) super.getSpans(context, requiredPostings.atLeast(Postings.PAYLOADS)); if (spans == null) { // term is not present in that reader assert context.reader().docFreq(term) == 0 : "no term exists in reader term=" + term; return null; } return new Spans() { private final PositionSpansCollector payloadCollector = new PositionSpansCollector(); private int end = -1; @Override public int advance(int target) throws IOException { end = -1; return spans.advance(target); } @Override public void collect(SpanCollector collector) throws IOException { spans.collect(collector); } @Override public long cost() { return spans.cost(); } @Override public int docID() { return spans.docID(); } @Override public int endPosition() { return end; } @Override public int nextDoc() throws IOException { end = -1; return spans.nextDoc(); } @Override public int nextStartPosition() throws IOException { final int pos = spans.nextStartPosition(); if (pos == NO_MORE_POSITIONS) { end = NO_MORE_POSITIONS; return NO_MORE_POSITIONS; } spans.collect(payloadCollector); end = payloadCollector.payloadValue + pos; return pos; } @Override public float positionsCost() { return spans.positionsCost(); } @Override public int startPosition() { return spans.startPosition(); } @Override public int width() { return spans.width(); } }; } }; }
From source file:com.sindicetech.siren.search.node.NodeTermQuery.java
License:Open Source License
@Override public Weight createWeight(final IndexSearcher searcher) throws IOException { final IndexReaderContext context = searcher.getTopReaderContext(); final TermContext termState; if (perReaderTermState == null || perReaderTermState.topReaderContext != context) { // make TermQuery single-pass if we don't have a PRTS or if the context differs! termState = TermContext.build(context, term); // cache term lookups! } else {//from w w w. j a va 2 s .c om // PRTS was pre-build for this IS termState = this.perReaderTermState; } // we must not ignore the given docFreq - if set use the given value (lie) if (docFreq != -1) termState.setDocFreq(docFreq); return new NodeTermWeight(searcher, termState); }
From source file:com.sindicetech.siren.search.spans.TermSpanQuery.java
License:Open Source License
@Override public Weight createWeight(final IndexSearcher searcher) throws IOException { final IndexReaderContext context = searcher.getTopReaderContext(); final TermContext termState = TermContext.build(context, term); return new TermSpanWeight(searcher, termState); }
From source file:com.xiaomi.linden.lucene.query.flexiblequery.FlexibleWeight.java
License:Apache License
public FlexibleWeight(FlexibleQuery query, IndexSearcher searcher) throws IOException { this.query = query; this.similarity = searcher.getSimilarity(); final IndexReaderContext context = searcher.getTopReaderContext(); int[] maxDocFreqs = null; long[] maxTotalTermFreqs = null; Map<Term, TermContext> builtTermMap = new HashMap<>(); if (query.enableGlobalIDF()) { FlexibleQuery.FlexibleTerm[][] globalTerms = query.getGlobalTerms(); TermContext[][] globalStates = new TermContext[globalTerms.length][]; for (int i = 0; i < globalTerms.length; ++i) { globalStates[i] = new TermContext[globalTerms[i].length]; for (int j = 0; j < globalTerms[i].length; ++j) { Term term = globalTerms[i][j].term; TermContext termContext = builtTermMap.get(term); if (termContext != null) { globalStates[i][j] = termContext; } else { globalStates[i][j] = TermContext.build(context, globalTerms[i][j].term); builtTermMap.put(term, globalStates[i][j]); }//from ww w.j ava2s . c om } } maxDocFreqs = new int[globalTerms[0].length]; maxTotalTermFreqs = new long[globalTerms[0].length]; int fieldLength = globalTerms.length; int termLength = globalTerms[0].length; for (int i = 0; i < termLength; ++i) { int maxDocFreq = 0; long maxTotalTermFreq = 0; for (int j = 0; j < fieldLength; ++j) { maxDocFreq = Math.max(globalStates[j][i].docFreq(), maxDocFreq); maxTotalTermFreq = Math.max(globalStates[j][i].totalTermFreq(), maxTotalTermFreq); } maxDocFreqs[i] = maxDocFreq; maxTotalTermFreqs[i] = maxTotalTermFreq; } } FlexibleQuery.FlexibleTerm[][] terms = query.getTerms(); TermContext[][] states = new TermContext[terms.length][]; for (int i = 0; i < terms.length; ++i) { states[i] = new TermContext[terms[i].length]; for (int j = 0; j < terms[i].length; ++j) { Term term = terms[i][j].term; TermContext termContext = builtTermMap.get(term); if (termContext != null) { states[i][j] = termContext; } else { states[i][j] = TermContext.build(context, terms[i][j].term); builtTermMap.put(term, states[i][j]); } } } termStatsMatrix = new TermStats[terms.length][]; for (int i = 0; i < terms.length; ++i) { termStatsMatrix[i] = new TermStats[terms[i].length]; for (int j = 0; j < terms[i].length; ++j) { FlexibleQuery.FlexibleTerm term = terms[i][j]; TermContext state = states[i][j]; TermStatistics termStats; if (query.enableGlobalIDF()) { termStats = new TermStatistics(term.term.bytes(), maxDocFreqs[j], maxTotalTermFreqs[j]); } else { termStats = searcher.termStatistics(term.term, state); } Similarity.SimWeight stats = similarity.computeWeight(term.boost, searcher.collectionStatistics(term.term.field()), termStats); TermStats termStatsInfo = new TermStats(); termStatsInfo.stats = stats; termStatsInfo.term = term.term; termStatsInfo.termContext = state; termStatsMatrix[i][j] = termStatsInfo; } } }
From source file:de.jetsli.lumeo.RawLucene.java
License:Apache License
public Document findById(final long id) { //Check cache IndexOp result = getCurrentRTCache(latestGen).get(id); if (result != null) { if (result.type == IndexOp.Type.DELETE) return null; return result.document; }/*www . j av a2 s . co m*/ return searchSomething(new SearchExecutor<Document>() { @Override public Document execute(IndexSearcher searcher) throws Exception { // TODO optimize via indexReader.termDocsEnum !? IndexReaderContext trc = searcher.getTopReaderContext(); AtomicReaderContext[] arc = trc.leaves(); for (int i = 0; i < arc.length; i++) { AtomicReader subreader = arc[i].reader(); DocsEnum docs = subreader.terms(UID).iterator(null).docs(subreader.getLiveDocs(), null, false); if (docs != null) { int docID = docs.nextDoc(); if (docID != DocsEnum.NO_MORE_DOCS) { return subreader.document(docID); } } } return null; } }); }
From source file:de.uop.code.disambiguation.ltr.lucene.query.TermQuery.java
License:Apache License
@Override public Weight createWeight(final IndexSearcher searcher) throws IOException { final IndexReaderContext context = searcher.getTopReaderContext(); final TermContext termState; if ((this.perReaderTermS == null) || (this.perReaderTermS.topReaderContext != context)) { // make TermQuery single-pass if we don't have a PRTS or if the // context differs! termState = TermContext.build(context, this.term); } else {/*from w w w . j a va2s.c om*/ // PRTS was pre-build for this IS termState = this.perReaderTermS; } // we must not ignore the given docFreq - if set use the given value // (lie) if (this.docFreq != -1) { termState.setDocFreq(this.docFreq); } return new TermWeight(searcher, termState); }