Example usage for org.apache.lucene.search Scorer score

List of usage examples for org.apache.lucene.search Scorer score

Introduction

In this page you can find the example usage for org.apache.lucene.search Scorer score.

Prototype

public abstract float score() throws IOException;

Source Link

Document

Returns the score of the current document matching the query.

Usage

From source file:aplicacion.sistema.indexer.test.SearchFiles.java

License:Apache License

/**
 * This method uses a custom HitCollector implementation which simply prints out
 * the docId and score of every matching document. 
 * //  w w w.  j av a2s .  c o m
 *  This simulates the streaming search use case, where all hits are supposed to
 *  be processed, regardless of their relevance.
 */
public static void doStreamingSearch(final Searcher searcher, Query query) throws IOException {
    Collector streamingHitCollector = new Collector() {
        private Scorer scorer;
        private int docBase;

        // simply print docId and score of every matching document
        @Override
        public void collect(int doc) throws IOException {
            System.out.println("doc=" + doc + docBase + " score=" + scorer.score());
        }

        @Override
        public boolean acceptsDocsOutOfOrder() {
            return true;
        }

        @Override
        public void setNextReader(IndexReader reader, int docBase) throws IOException {
            this.docBase = docBase;
        }

        @Override
        public void setScorer(Scorer scorer) throws IOException {
            this.scorer = scorer;
        }

    };

    searcher.search(query, streamingHitCollector);
}

From source file:com.basistech.lucene.tools.LuceneQueryTool.java

License:Apache License

private void runQuery(String queryString, final PrintStream out)
        throws IOException, org.apache.lucene.queryparser.classic.ParseException {
    final IndexSearcher searcher = new IndexSearcher(indexReader);
    docsPrinted = 0;//from  w ww.  j a  v  a 2s  .  c o m
    Query query;
    if (queryString == null) {
        query = new MatchAllDocsQuery();
    } else {
        if (!queryString.contains(":") && defaultField == null) {
            throw new RuntimeException("query has no ':' and no query-field defined");
        }
        QueryParser queryParser = new QueryParser(defaultField, analyzer);
        queryParser.setLowercaseExpandedTerms(false);
        query = queryParser.parse(queryString).rewrite(indexReader);
        Set<Term> terms = Sets.newHashSet();
        query.createWeight(searcher, false).extractTerms(terms);
        List<String> invalidFieldNames = Lists.newArrayList();
        for (Term term : terms) {
            if (!allFieldNames.contains(term.field())) {
                invalidFieldNames.add(term.field());
            }
        }
        if (!invalidFieldNames.isEmpty()) {
            throw new RuntimeException("Invalid field names: " + invalidFieldNames);
        }
    }

    final Set<String> fieldSet = Sets.newHashSet(fieldNames);

    // use a Collector instead of TopDocs for memory efficiency, especially
    // for the %all query
    class MyCollector extends SimpleCollector {
        private Scorer scorer;
        private long totalHits;
        private int docBase;

        @Override
        protected void doSetNextReader(LeafReaderContext context) throws IOException {
            docBase = context.docBase;
        }

        @Override
        public void collect(int id) throws IOException {
            totalHits++;
            if (docsPrinted >= outputLimit) {
                return;
            }

            id += docBase;
            Document doc = fieldSet.isEmpty() ? searcher.doc(id) : searcher.doc(id, fieldSet);
            boolean passedFilter = regexField == null;
            if (regexField != null) {
                String value = doc.get(regexField);
                if (value != null && regex.matcher(value).matches()) {
                    passedFilter = true;
                }
            }
            if (passedFilter) {
                float score = scorer.score();
                printDocument(doc, id, score, out);
            }
        }

        @Override
        public boolean needsScores() {
            return true;
        }

        @Override
        public void setScorer(Scorer scorer) throws IOException {
            this.scorer = scorer;
        }
    }

    MyCollector collector = new MyCollector();
    searcher.search(query, collector);
    if (showHits) {
        out.println("totalHits: " + collector.totalHits);
        out.println();
    }
}

From source file:com.browseengine.bobo.query.RecencyBoostScorerBuilder.java

License:Apache License

public Scorer createScorer(final Scorer innerScorer, IndexReader reader, boolean scoreDocsInOrder,
        boolean topScorer) throws IOException {
    if (reader instanceof BoboIndexReader) {
        BoboIndexReader boboReader = (BoboIndexReader) reader;
        Object dataObj = boboReader.getFacetData(_timeFacetName);
        if (dataObj instanceof FacetDataCache<?>) {
            FacetDataCache<Long> facetDataCache = (FacetDataCache<Long>) (boboReader
                    .getFacetData(_timeFacetName));
            final BigSegmentedArray orderArray = facetDataCache.orderArray;
            final TermLongList termList = (TermLongList) facetDataCache.valArray;
            return new Scorer(innerScorer.getSimilarity()) {

                @Override// w w w . ja v  a2s .  c  o  m
                public float score() throws IOException {
                    float rawScore = innerScorer.score();
                    long timeVal = termList.getRawValue(orderArray.get(innerScorer.docID()));
                    float timeScore = computeTimeFactor(timeVal);
                    return combineScores(timeScore, rawScore);
                }

                @Override
                public int advance(int target) throws IOException {
                    return innerScorer.advance(target);
                }

                @Override
                public int docID() {
                    return innerScorer.docID();
                }

                @Override
                public int nextDoc() throws IOException {
                    return innerScorer.nextDoc();
                }

            };
        } else {
            throw new IllegalStateException("underlying facet data must be of type FacetDataCache<Long>");
        }
    } else {
        throw new IllegalStateException("reader not instance of " + BoboIndexReader.class);
    }
}

From source file:com.o19s.es.explore.ExplorerQuery.java

License:Apache License

@Override
public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException {
    if (!needsScores) {
        return searcher.createWeight(query, false, boost);
    }/*from   w w  w  .  j a v  a 2  s .  c om*/
    final Weight subWeight = searcher.createWeight(query, true, boost);
    Set<Term> terms = new HashSet<>();
    subWeight.extractTerms(terms);
    if (isCollectionScoped()) {
        ClassicSimilarity sim = new ClassicSimilarity();
        StatisticsHelper df_stats = new StatisticsHelper();
        StatisticsHelper idf_stats = new StatisticsHelper();
        StatisticsHelper ttf_stats = new StatisticsHelper();

        for (Term term : terms) {
            TermContext ctx = TermContext.build(searcher.getTopReaderContext(), term);
            TermStatistics tStats = searcher.termStatistics(term, ctx);
            df_stats.add(tStats.docFreq());
            idf_stats.add(sim.idf(tStats.docFreq(), searcher.getIndexReader().numDocs()));
            ttf_stats.add(tStats.totalTermFreq());
        }

        /*
        If no terms are parsed in the query we opt for returning 0
        instead of throwing an exception that could break various
        pipelines.
         */
        float constantScore;

        if (terms.size() > 0) {
            switch (type) {
            case ("sum_classic_idf"):
                constantScore = idf_stats.getSum();
                break;
            case ("mean_classic_idf"):
                constantScore = idf_stats.getMean();
                break;
            case ("max_classic_idf"):
                constantScore = idf_stats.getMax();
                break;
            case ("min_classic_idf"):
                constantScore = idf_stats.getMin();
                break;
            case ("stddev_classic_idf"):
                constantScore = idf_stats.getStdDev();
                break;
            case "sum_raw_df":
                constantScore = df_stats.getSum();
                break;
            case "min_raw_df":
                constantScore = df_stats.getMin();
                break;
            case "max_raw_df":
                constantScore = df_stats.getMax();
                break;
            case "mean_raw_df":
                constantScore = df_stats.getMean();
                break;
            case "stddev_raw_df":
                constantScore = df_stats.getStdDev();
                break;
            case "sum_raw_ttf":
                constantScore = ttf_stats.getSum();
                break;
            case "min_raw_ttf":
                constantScore = ttf_stats.getMin();
                break;
            case "max_raw_ttf":
                constantScore = ttf_stats.getMax();
                break;
            case "mean_raw_ttf":
                constantScore = ttf_stats.getMean();
                break;
            case "stddev_raw_ttf":
                constantScore = ttf_stats.getStdDev();
                break;
            case "unique_terms_count":
                constantScore = terms.size();
                break;

            default:
                throw new RuntimeException("Invalid stat type specified.");
            }
        } else {
            constantScore = 0.0f;
        }

        return new ConstantScoreWeight(ExplorerQuery.this, constantScore) {

            @Override
            public Explanation explain(LeafReaderContext context, int doc) throws IOException {
                Scorer scorer = scorer(context);
                int newDoc = scorer.iterator().advance(doc);
                assert newDoc == doc; // this is a DocIdSetIterator.all
                return Explanation.match(scorer.score(), "Stat Score: " + type);
            }

            @Override
            public Scorer scorer(LeafReaderContext context) throws IOException {
                return new ConstantScoreScorer(this, constantScore,
                        DocIdSetIterator.all(context.reader().maxDoc()));
            }

            @Override
            public boolean isCacheable(LeafReaderContext ctx) {
                return true;
            }

        };
    } else if (type.endsWith("_raw_tf")) {
        // Rewrite this into a boolean query where we can inject our PostingsExplorerQuery
        BooleanQuery.Builder qb = new BooleanQuery.Builder();
        for (Term t : terms) {
            qb.add(new BooleanClause(new PostingsExplorerQuery(t, PostingsExplorerQuery.Type.TF),
                    BooleanClause.Occur.SHOULD));
        }
        // FIXME: completely refactor this class and stop accepting a random query but a list of terms directly
        // rewriting at this point is wrong, additionally we certainly build the TermContext twice for every terms
        // problem is that we rely on extractTerms which happen too late in the process
        Query q = qb.build().rewrite(searcher.getIndexReader());
        return new ExplorerQuery.ExplorerWeight(this, searcher.createWeight(q, true, boost), type);
    }
    throw new IllegalArgumentException("Unknown ExplorerQuery type [" + type + "]");
}

From source file:com.o19s.es.ltr.logging.LoggingFetchSubPhase.java

License:Apache License

void doLog(Query query, List<HitLogConsumer> loggers, IndexSearcher searcher, SearchHit[] hits)
        throws IOException {
    // Reorder hits by id so we can scan all the docs belonging to the same
    // segment by reusing the same scorer.
    SearchHit[] reordered = new SearchHit[hits.length];
    System.arraycopy(hits, 0, reordered, 0, hits.length);
    Arrays.sort(reordered, Comparator.comparingInt(SearchHit::docId));

    int hitUpto = 0;
    int readerUpto = -1;
    int endDoc = 0;
    int docBase = 0;
    Scorer scorer = null;
    Weight weight = searcher.createNormalizedWeight(query, true);
    // Loop logic borrowed from lucene QueryRescorer
    while (hitUpto < reordered.length) {
        SearchHit hit = reordered[hitUpto];
        int docID = hit.docId();
        loggers.forEach((l) -> l.nextDoc(hit));
        LeafReaderContext readerContext = null;
        while (docID >= endDoc) {
            readerUpto++;/*from   ww  w  . j av  a2  s.c o  m*/
            readerContext = searcher.getTopReaderContext().leaves().get(readerUpto);
            endDoc = readerContext.docBase + readerContext.reader().maxDoc();
        }

        if (readerContext != null) {
            // We advanced to another segment:
            docBase = readerContext.docBase;
            scorer = weight.scorer(readerContext);
        }

        if (scorer != null) {
            int targetDoc = docID - docBase;
            int actualDoc = scorer.docID();
            if (actualDoc < targetDoc) {
                actualDoc = scorer.iterator().advance(targetDoc);
            }
            if (actualDoc == targetDoc) {
                // Scoring will trigger log collection
                scorer.score();
            }
        }

        hitUpto++;
    }
}

From source file:com.o19s.es.ltr.query.LtrQueryTests.java

License:Apache License

public Map<String, Map<Integer, Float>> getFeatureScores(List<PrebuiltFeature> features) throws IOException {
    Map<String, Map<Integer, Float>> featuresPerDoc = new HashMap<>();
    PrebuiltFeatureSet set = new PrebuiltFeatureSet("test", features);

    Map<Integer, Float> collectedScores = new HashMap<>();
    LogLtrRanker.LogConsumer logger = new LogLtrRanker.LogConsumer() {
        @Override//from www.  ja  va2  s. c om
        public void accept(int featureOrdinal, float score) {
            collectedScores.put(featureOrdinal, score);
        }

        @Override
        public void reset() {
            collectedScores.clear();
        }
    };
    RankerQuery query = RankerQuery.buildLogQuery(logger, set, null, Collections.emptyMap());

    searcherUnderTest.search(query, new SimpleCollector() {
        private LeafReaderContext context;
        private Scorer scorer;

        @Override
        public boolean needsScores() {
            return true;
        }

        @Override
        public void setScorer(Scorer scorer) throws IOException {
            this.scorer = scorer;
        }

        @Override
        protected void doSetNextReader(LeafReaderContext context) throws IOException {
            this.context = context;
        }

        @Override
        public void collect(int doc) throws IOException {
            scorer.score();
            Document d = context.reader().document(doc);
            featuresPerDoc.put(d.get("id"), new HashMap<>(collectedScores));
        }
    });

    return featuresPerDoc;
}

From source file:com.o19s.es.ltr.query.LtrScorer.java

License:Apache License

@Override
public float score() throws IOException {
    DataPoint allScores = new DenseProgramaticDataPoint(_subScorers.size());
    int featureIdx = 1; // RankLib is 1-based
    for (Scorer scorer : _subScorers) {
        if (scorer.docID() < docID()) {
            scorer.iterator().advance(docID());
        }//from   w w  w .j  av a  2 s  .  c  o m
        float featureVal = 0.0f;
        if (scorer.docID() == docID()) {
            featureVal = scorer.score();
        }
        //System.out.printf("Doc %d, feature %d, val %f\n", docID(), featureIdx, featureVal);
        allScores.setFeatureValue(featureIdx, featureVal);
        featureIdx++;
    }
    float score = (float) _rankModel.eval(allScores);
    //System.out.printf("Doc %d, score %f\n", docID(), score);
    return score;
}

From source file:com.scsb.crpro.lucene.SearchFiles.java

License:Apache License

/**
 * This method uses a custom HitCollector implementation which simply prints out
 * the docId and score of every matching document. 
 * /* w  ww .  j  a v  a2 s .  c  o  m*/
 *  This simulates the streaming search use case, where all hits are supposed to
 *  be processed, regardless of their relevance.
 */
public static void doStreamingSearch(final Searcher searcher, Query query) throws IOException {
    Collector streamingHitCollector = new Collector() {
        private Scorer scorer;
        private int docBase;

        // simply print docId and score of every matching document
        public void collect(int doc) throws IOException {
            System.out.println("doc=" + doc + docBase + " score=" + scorer.score());
        }

        public boolean acceptsDocsOutOfOrder() {
            return true;
        }

        public void setNextReader(IndexReader reader, int docBase) throws IOException {
            this.docBase = docBase;
        }

        public void setScorer(Scorer scorer) throws IOException {
            this.scorer = scorer;
        }

    };

    searcher.search(query, streamingHitCollector);
}

From source file:com.sindicetech.siren.search.node.TestLuceneProxyNodeScorer.java

License:Open Source License

@Test
public void testAdvanceInfiniteLoop() throws Exception {
    this.addDocuments("{ \"baba\" : \"bibi ccc\" , \"ccc\" : \"bbb ccc\" }",
            "{ \"baba bibi baba bibi\" : \"aaa bbb ddd\" }", "{ \"baba bibi\" : \"aaa bbb ddd\" }");

    final Scorer scorer1 = this.getScorer(nbq(must("baba", "bibi")).getLuceneProxyQuery());

    assertTrue(scorer1.advance(0) != DocIdSetIterator.NO_MORE_DOCS);
    assertEquals(1, scorer1.docID());/*from w  ww .ja  v  a 2 s  .c  o  m*/
    assertEquals(2, scorer1.freq(), 0);
    final float score1 = scorer1.score();
    assertTrue(scorer1.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
    assertEquals(2, scorer1.docID());
    assertEquals(2, scorer1.freq(), 0);
    final float score2 = scorer1.score();
    assertTrue(score1 > score2);
    assertTrue(scorer1.nextDoc() == DocIdSetIterator.NO_MORE_DOCS);
}

From source file:de.uop.code.disambiguation.ltr.lucene.query.DisjunctionSumScorer.java

License:Apache License

public void afterNext() throws IOException {
    final Scorer sub = this.subScorers[0];
    this.doc = sub.docID();
    if (this.doc == NO_MORE_DOCS) {
        this.nrMatchers = Integer.MAX_VALUE; // stop looping
    } else {//w  ww . jav  a2  s.c  o m
        this.scoreVal = sub.score() * this.clauses[0].getWeight();
        this.clauses[0].addFeatureValue(this.docBase, this.doc, (float) this.scoreVal);
        this.nrMatchers = 1;
        this.countMatches(1);
        this.countMatches(2);
    }
}