List of usage examples for org.apache.lucene.search Scorer score
public abstract float score() throws IOException;
From source file:aplicacion.sistema.indexer.test.SearchFiles.java
License:Apache License
/** * This method uses a custom HitCollector implementation which simply prints out * the docId and score of every matching document. * // w w w. j av a2s . c o m * This simulates the streaming search use case, where all hits are supposed to * be processed, regardless of their relevance. */ public static void doStreamingSearch(final Searcher searcher, Query query) throws IOException { Collector streamingHitCollector = new Collector() { private Scorer scorer; private int docBase; // simply print docId and score of every matching document @Override public void collect(int doc) throws IOException { System.out.println("doc=" + doc + docBase + " score=" + scorer.score()); } @Override public boolean acceptsDocsOutOfOrder() { return true; } @Override public void setNextReader(IndexReader reader, int docBase) throws IOException { this.docBase = docBase; } @Override public void setScorer(Scorer scorer) throws IOException { this.scorer = scorer; } }; searcher.search(query, streamingHitCollector); }
From source file:com.basistech.lucene.tools.LuceneQueryTool.java
License:Apache License
private void runQuery(String queryString, final PrintStream out) throws IOException, org.apache.lucene.queryparser.classic.ParseException { final IndexSearcher searcher = new IndexSearcher(indexReader); docsPrinted = 0;//from w ww. j a v a 2s . c o m Query query; if (queryString == null) { query = new MatchAllDocsQuery(); } else { if (!queryString.contains(":") && defaultField == null) { throw new RuntimeException("query has no ':' and no query-field defined"); } QueryParser queryParser = new QueryParser(defaultField, analyzer); queryParser.setLowercaseExpandedTerms(false); query = queryParser.parse(queryString).rewrite(indexReader); Set<Term> terms = Sets.newHashSet(); query.createWeight(searcher, false).extractTerms(terms); List<String> invalidFieldNames = Lists.newArrayList(); for (Term term : terms) { if (!allFieldNames.contains(term.field())) { invalidFieldNames.add(term.field()); } } if (!invalidFieldNames.isEmpty()) { throw new RuntimeException("Invalid field names: " + invalidFieldNames); } } final Set<String> fieldSet = Sets.newHashSet(fieldNames); // use a Collector instead of TopDocs for memory efficiency, especially // for the %all query class MyCollector extends SimpleCollector { private Scorer scorer; private long totalHits; private int docBase; @Override protected void doSetNextReader(LeafReaderContext context) throws IOException { docBase = context.docBase; } @Override public void collect(int id) throws IOException { totalHits++; if (docsPrinted >= outputLimit) { return; } id += docBase; Document doc = fieldSet.isEmpty() ? searcher.doc(id) : searcher.doc(id, fieldSet); boolean passedFilter = regexField == null; if (regexField != null) { String value = doc.get(regexField); if (value != null && regex.matcher(value).matches()) { passedFilter = true; } } if (passedFilter) { float score = scorer.score(); printDocument(doc, id, score, out); } } @Override public boolean needsScores() { return true; } @Override public void setScorer(Scorer scorer) throws IOException { this.scorer = scorer; } } MyCollector collector = new MyCollector(); searcher.search(query, collector); if (showHits) { out.println("totalHits: " + collector.totalHits); out.println(); } }
From source file:com.browseengine.bobo.query.RecencyBoostScorerBuilder.java
License:Apache License
public Scorer createScorer(final Scorer innerScorer, IndexReader reader, boolean scoreDocsInOrder, boolean topScorer) throws IOException { if (reader instanceof BoboIndexReader) { BoboIndexReader boboReader = (BoboIndexReader) reader; Object dataObj = boboReader.getFacetData(_timeFacetName); if (dataObj instanceof FacetDataCache<?>) { FacetDataCache<Long> facetDataCache = (FacetDataCache<Long>) (boboReader .getFacetData(_timeFacetName)); final BigSegmentedArray orderArray = facetDataCache.orderArray; final TermLongList termList = (TermLongList) facetDataCache.valArray; return new Scorer(innerScorer.getSimilarity()) { @Override// w w w . ja v a2s . c o m public float score() throws IOException { float rawScore = innerScorer.score(); long timeVal = termList.getRawValue(orderArray.get(innerScorer.docID())); float timeScore = computeTimeFactor(timeVal); return combineScores(timeScore, rawScore); } @Override public int advance(int target) throws IOException { return innerScorer.advance(target); } @Override public int docID() { return innerScorer.docID(); } @Override public int nextDoc() throws IOException { return innerScorer.nextDoc(); } }; } else { throw new IllegalStateException("underlying facet data must be of type FacetDataCache<Long>"); } } else { throw new IllegalStateException("reader not instance of " + BoboIndexReader.class); } }
From source file:com.o19s.es.explore.ExplorerQuery.java
License:Apache License
@Override public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException { if (!needsScores) { return searcher.createWeight(query, false, boost); }/*from w w w . j a v a 2 s . c om*/ final Weight subWeight = searcher.createWeight(query, true, boost); Set<Term> terms = new HashSet<>(); subWeight.extractTerms(terms); if (isCollectionScoped()) { ClassicSimilarity sim = new ClassicSimilarity(); StatisticsHelper df_stats = new StatisticsHelper(); StatisticsHelper idf_stats = new StatisticsHelper(); StatisticsHelper ttf_stats = new StatisticsHelper(); for (Term term : terms) { TermContext ctx = TermContext.build(searcher.getTopReaderContext(), term); TermStatistics tStats = searcher.termStatistics(term, ctx); df_stats.add(tStats.docFreq()); idf_stats.add(sim.idf(tStats.docFreq(), searcher.getIndexReader().numDocs())); ttf_stats.add(tStats.totalTermFreq()); } /* If no terms are parsed in the query we opt for returning 0 instead of throwing an exception that could break various pipelines. */ float constantScore; if (terms.size() > 0) { switch (type) { case ("sum_classic_idf"): constantScore = idf_stats.getSum(); break; case ("mean_classic_idf"): constantScore = idf_stats.getMean(); break; case ("max_classic_idf"): constantScore = idf_stats.getMax(); break; case ("min_classic_idf"): constantScore = idf_stats.getMin(); break; case ("stddev_classic_idf"): constantScore = idf_stats.getStdDev(); break; case "sum_raw_df": constantScore = df_stats.getSum(); break; case "min_raw_df": constantScore = df_stats.getMin(); break; case "max_raw_df": constantScore = df_stats.getMax(); break; case "mean_raw_df": constantScore = df_stats.getMean(); break; case "stddev_raw_df": constantScore = df_stats.getStdDev(); break; case "sum_raw_ttf": constantScore = ttf_stats.getSum(); break; case "min_raw_ttf": constantScore = ttf_stats.getMin(); break; case "max_raw_ttf": constantScore = ttf_stats.getMax(); break; case "mean_raw_ttf": constantScore = ttf_stats.getMean(); break; case "stddev_raw_ttf": constantScore = ttf_stats.getStdDev(); break; case "unique_terms_count": constantScore = terms.size(); break; default: throw new RuntimeException("Invalid stat type specified."); } } else { constantScore = 0.0f; } return new ConstantScoreWeight(ExplorerQuery.this, constantScore) { @Override public Explanation explain(LeafReaderContext context, int doc) throws IOException { Scorer scorer = scorer(context); int newDoc = scorer.iterator().advance(doc); assert newDoc == doc; // this is a DocIdSetIterator.all return Explanation.match(scorer.score(), "Stat Score: " + type); } @Override public Scorer scorer(LeafReaderContext context) throws IOException { return new ConstantScoreScorer(this, constantScore, DocIdSetIterator.all(context.reader().maxDoc())); } @Override public boolean isCacheable(LeafReaderContext ctx) { return true; } }; } else if (type.endsWith("_raw_tf")) { // Rewrite this into a boolean query where we can inject our PostingsExplorerQuery BooleanQuery.Builder qb = new BooleanQuery.Builder(); for (Term t : terms) { qb.add(new BooleanClause(new PostingsExplorerQuery(t, PostingsExplorerQuery.Type.TF), BooleanClause.Occur.SHOULD)); } // FIXME: completely refactor this class and stop accepting a random query but a list of terms directly // rewriting at this point is wrong, additionally we certainly build the TermContext twice for every terms // problem is that we rely on extractTerms which happen too late in the process Query q = qb.build().rewrite(searcher.getIndexReader()); return new ExplorerQuery.ExplorerWeight(this, searcher.createWeight(q, true, boost), type); } throw new IllegalArgumentException("Unknown ExplorerQuery type [" + type + "]"); }
From source file:com.o19s.es.ltr.logging.LoggingFetchSubPhase.java
License:Apache License
void doLog(Query query, List<HitLogConsumer> loggers, IndexSearcher searcher, SearchHit[] hits) throws IOException { // Reorder hits by id so we can scan all the docs belonging to the same // segment by reusing the same scorer. SearchHit[] reordered = new SearchHit[hits.length]; System.arraycopy(hits, 0, reordered, 0, hits.length); Arrays.sort(reordered, Comparator.comparingInt(SearchHit::docId)); int hitUpto = 0; int readerUpto = -1; int endDoc = 0; int docBase = 0; Scorer scorer = null; Weight weight = searcher.createNormalizedWeight(query, true); // Loop logic borrowed from lucene QueryRescorer while (hitUpto < reordered.length) { SearchHit hit = reordered[hitUpto]; int docID = hit.docId(); loggers.forEach((l) -> l.nextDoc(hit)); LeafReaderContext readerContext = null; while (docID >= endDoc) { readerUpto++;/*from ww w . j av a2 s.c o m*/ readerContext = searcher.getTopReaderContext().leaves().get(readerUpto); endDoc = readerContext.docBase + readerContext.reader().maxDoc(); } if (readerContext != null) { // We advanced to another segment: docBase = readerContext.docBase; scorer = weight.scorer(readerContext); } if (scorer != null) { int targetDoc = docID - docBase; int actualDoc = scorer.docID(); if (actualDoc < targetDoc) { actualDoc = scorer.iterator().advance(targetDoc); } if (actualDoc == targetDoc) { // Scoring will trigger log collection scorer.score(); } } hitUpto++; } }
From source file:com.o19s.es.ltr.query.LtrQueryTests.java
License:Apache License
public Map<String, Map<Integer, Float>> getFeatureScores(List<PrebuiltFeature> features) throws IOException { Map<String, Map<Integer, Float>> featuresPerDoc = new HashMap<>(); PrebuiltFeatureSet set = new PrebuiltFeatureSet("test", features); Map<Integer, Float> collectedScores = new HashMap<>(); LogLtrRanker.LogConsumer logger = new LogLtrRanker.LogConsumer() { @Override//from www. ja va2 s. c om public void accept(int featureOrdinal, float score) { collectedScores.put(featureOrdinal, score); } @Override public void reset() { collectedScores.clear(); } }; RankerQuery query = RankerQuery.buildLogQuery(logger, set, null, Collections.emptyMap()); searcherUnderTest.search(query, new SimpleCollector() { private LeafReaderContext context; private Scorer scorer; @Override public boolean needsScores() { return true; } @Override public void setScorer(Scorer scorer) throws IOException { this.scorer = scorer; } @Override protected void doSetNextReader(LeafReaderContext context) throws IOException { this.context = context; } @Override public void collect(int doc) throws IOException { scorer.score(); Document d = context.reader().document(doc); featuresPerDoc.put(d.get("id"), new HashMap<>(collectedScores)); } }); return featuresPerDoc; }
From source file:com.o19s.es.ltr.query.LtrScorer.java
License:Apache License
@Override public float score() throws IOException { DataPoint allScores = new DenseProgramaticDataPoint(_subScorers.size()); int featureIdx = 1; // RankLib is 1-based for (Scorer scorer : _subScorers) { if (scorer.docID() < docID()) { scorer.iterator().advance(docID()); }//from w w w .j av a 2 s . c o m float featureVal = 0.0f; if (scorer.docID() == docID()) { featureVal = scorer.score(); } //System.out.printf("Doc %d, feature %d, val %f\n", docID(), featureIdx, featureVal); allScores.setFeatureValue(featureIdx, featureVal); featureIdx++; } float score = (float) _rankModel.eval(allScores); //System.out.printf("Doc %d, score %f\n", docID(), score); return score; }
From source file:com.scsb.crpro.lucene.SearchFiles.java
License:Apache License
/** * This method uses a custom HitCollector implementation which simply prints out * the docId and score of every matching document. * /* w ww . j a v a2 s . c o m*/ * This simulates the streaming search use case, where all hits are supposed to * be processed, regardless of their relevance. */ public static void doStreamingSearch(final Searcher searcher, Query query) throws IOException { Collector streamingHitCollector = new Collector() { private Scorer scorer; private int docBase; // simply print docId and score of every matching document public void collect(int doc) throws IOException { System.out.println("doc=" + doc + docBase + " score=" + scorer.score()); } public boolean acceptsDocsOutOfOrder() { return true; } public void setNextReader(IndexReader reader, int docBase) throws IOException { this.docBase = docBase; } public void setScorer(Scorer scorer) throws IOException { this.scorer = scorer; } }; searcher.search(query, streamingHitCollector); }
From source file:com.sindicetech.siren.search.node.TestLuceneProxyNodeScorer.java
License:Open Source License
@Test public void testAdvanceInfiniteLoop() throws Exception { this.addDocuments("{ \"baba\" : \"bibi ccc\" , \"ccc\" : \"bbb ccc\" }", "{ \"baba bibi baba bibi\" : \"aaa bbb ddd\" }", "{ \"baba bibi\" : \"aaa bbb ddd\" }"); final Scorer scorer1 = this.getScorer(nbq(must("baba", "bibi")).getLuceneProxyQuery()); assertTrue(scorer1.advance(0) != DocIdSetIterator.NO_MORE_DOCS); assertEquals(1, scorer1.docID());/*from w ww .ja v a 2 s .c o m*/ assertEquals(2, scorer1.freq(), 0); final float score1 = scorer1.score(); assertTrue(scorer1.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); assertEquals(2, scorer1.docID()); assertEquals(2, scorer1.freq(), 0); final float score2 = scorer1.score(); assertTrue(score1 > score2); assertTrue(scorer1.nextDoc() == DocIdSetIterator.NO_MORE_DOCS); }
From source file:de.uop.code.disambiguation.ltr.lucene.query.DisjunctionSumScorer.java
License:Apache License
public void afterNext() throws IOException { final Scorer sub = this.subScorers[0]; this.doc = sub.docID(); if (this.doc == NO_MORE_DOCS) { this.nrMatchers = Integer.MAX_VALUE; // stop looping } else {//w ww . jav a2 s.c o m this.scoreVal = sub.score() * this.clauses[0].getWeight(); this.clauses[0].addFeatureValue(this.docBase, this.doc, (float) this.scoreVal); this.nrMatchers = 1; this.countMatches(1); this.countMatches(2); } }