Example usage for org.apache.lucene.search ConstantScoreWeight ConstantScoreWeight

List of usage examples for org.apache.lucene.search ConstantScoreWeight ConstantScoreWeight

Introduction

In this page you can find the example usage for org.apache.lucene.search ConstantScoreWeight ConstantScoreWeight.

Prototype

protected ConstantScoreWeight(Query query, float score) 

Source Link

Usage

From source file:com.o19s.es.explore.ExplorerQuery.java

License:Apache License

@Override
public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException {
    if (!needsScores) {
        return searcher.createWeight(query, false, boost);
    }//  www.  j a  v  a 2 s.co  m
    final Weight subWeight = searcher.createWeight(query, true, boost);
    Set<Term> terms = new HashSet<>();
    subWeight.extractTerms(terms);
    if (isCollectionScoped()) {
        ClassicSimilarity sim = new ClassicSimilarity();
        StatisticsHelper df_stats = new StatisticsHelper();
        StatisticsHelper idf_stats = new StatisticsHelper();
        StatisticsHelper ttf_stats = new StatisticsHelper();

        for (Term term : terms) {
            TermContext ctx = TermContext.build(searcher.getTopReaderContext(), term);
            TermStatistics tStats = searcher.termStatistics(term, ctx);
            df_stats.add(tStats.docFreq());
            idf_stats.add(sim.idf(tStats.docFreq(), searcher.getIndexReader().numDocs()));
            ttf_stats.add(tStats.totalTermFreq());
        }

        /*
        If no terms are parsed in the query we opt for returning 0
        instead of throwing an exception that could break various
        pipelines.
         */
        float constantScore;

        if (terms.size() > 0) {
            switch (type) {
            case ("sum_classic_idf"):
                constantScore = idf_stats.getSum();
                break;
            case ("mean_classic_idf"):
                constantScore = idf_stats.getMean();
                break;
            case ("max_classic_idf"):
                constantScore = idf_stats.getMax();
                break;
            case ("min_classic_idf"):
                constantScore = idf_stats.getMin();
                break;
            case ("stddev_classic_idf"):
                constantScore = idf_stats.getStdDev();
                break;
            case "sum_raw_df":
                constantScore = df_stats.getSum();
                break;
            case "min_raw_df":
                constantScore = df_stats.getMin();
                break;
            case "max_raw_df":
                constantScore = df_stats.getMax();
                break;
            case "mean_raw_df":
                constantScore = df_stats.getMean();
                break;
            case "stddev_raw_df":
                constantScore = df_stats.getStdDev();
                break;
            case "sum_raw_ttf":
                constantScore = ttf_stats.getSum();
                break;
            case "min_raw_ttf":
                constantScore = ttf_stats.getMin();
                break;
            case "max_raw_ttf":
                constantScore = ttf_stats.getMax();
                break;
            case "mean_raw_ttf":
                constantScore = ttf_stats.getMean();
                break;
            case "stddev_raw_ttf":
                constantScore = ttf_stats.getStdDev();
                break;
            case "unique_terms_count":
                constantScore = terms.size();
                break;

            default:
                throw new RuntimeException("Invalid stat type specified.");
            }
        } else {
            constantScore = 0.0f;
        }

        return new ConstantScoreWeight(ExplorerQuery.this, constantScore) {

            @Override
            public Explanation explain(LeafReaderContext context, int doc) throws IOException {
                Scorer scorer = scorer(context);
                int newDoc = scorer.iterator().advance(doc);
                assert newDoc == doc; // this is a DocIdSetIterator.all
                return Explanation.match(scorer.score(), "Stat Score: " + type);
            }

            @Override
            public Scorer scorer(LeafReaderContext context) throws IOException {
                return new ConstantScoreScorer(this, constantScore,
                        DocIdSetIterator.all(context.reader().maxDoc()));
            }

            @Override
            public boolean isCacheable(LeafReaderContext ctx) {
                return true;
            }

        };
    } else if (type.endsWith("_raw_tf")) {
        // Rewrite this into a boolean query where we can inject our PostingsExplorerQuery
        BooleanQuery.Builder qb = new BooleanQuery.Builder();
        for (Term t : terms) {
            qb.add(new BooleanClause(new PostingsExplorerQuery(t, PostingsExplorerQuery.Type.TF),
                    BooleanClause.Occur.SHOULD));
        }
        // FIXME: completely refactor this class and stop accepting a random query but a list of terms directly
        // rewriting at this point is wrong, additionally we certainly build the TermContext twice for every terms
        // problem is that we rely on extractTerms which happen too late in the process
        Query q = qb.build().rewrite(searcher.getIndexReader());
        return new ExplorerQuery.ExplorerWeight(this, searcher.createWeight(q, true, boost), type);
    }
    throw new IllegalArgumentException("Unknown ExplorerQuery type [" + type + "]");
}

From source file:com.o19s.es.ltr.query.DerivedExpressionQuery.java

License:Apache License

@Override
public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException {
    if (!needsScores) {
        // If scores are not needed simply return a constant score on all docs
        return new ConstantScoreWeight(this, boost) {
            @Override/*from w w  w  .j  ava2 s .  co  m*/
            public boolean isCacheable(LeafReaderContext ctx) {
                return true;
            }

            @Override
            public Scorer scorer(LeafReaderContext context) throws IOException {
                return new ConstantScoreScorer(this, score(), DocIdSetIterator.all(context.reader().maxDoc()));
            }

        };
    }

    return new FVWeight(this);
}

From source file:com.o19s.es.ltr.query.RankerQuery.java

License:Apache License

@Override
public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException {
    if (!needsScores) {
        // If scores are not needed simply return a constant score on all docs
        return new ConstantScoreWeight(this, boost) {
            @Override/*  w w  w .  j  ava2  s.c  o m*/
            public Scorer scorer(LeafReaderContext context) throws IOException {
                return new ConstantScoreScorer(this, score(), DocIdSetIterator.all(context.reader().maxDoc()));
            }

            @Override
            public boolean isCacheable(LeafReaderContext ctx) {
                return true;
            }
        };
    }
    List<Weight> weights = new ArrayList<>(queries.size());
    for (Query q : queries) {
        weights.add(searcher.createWeight(q, needsScores, boost));
    }
    return new RankerWeight(weights);
}

From source file:org.elasticsearch.index.shard.ShardSplittingQuery.java

License:Apache License

@Override
public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) {
    return new ConstantScoreWeight(this, boost) {
        @Override//from   www .j a  v a 2  s.  co m
        public String toString() {
            return "weight(delete docs query)";
        }

        @Override
        public Scorer scorer(LeafReaderContext context) throws IOException {
            LeafReader leafReader = context.reader();
            FixedBitSet bitSet = new FixedBitSet(leafReader.maxDoc());
            Terms terms = leafReader.terms(RoutingFieldMapper.NAME);
            Predicate<BytesRef> includeInShard = ref -> {
                int targetShardId = OperationRouting.generateShardId(indexMetaData,
                        Uid.decodeId(ref.bytes, ref.offset, ref.length), null);
                return shardId == targetShardId;
            };
            if (terms == null) {
                // this is the common case - no partitioning and no _routing values
                // in this case we also don't do anything special with regards to nested docs since we basically delete
                // by ID and parent and nested all have the same id.
                assert indexMetaData.isRoutingPartitionedIndex() == false;
                findSplitDocs(IdFieldMapper.NAME, includeInShard, leafReader, bitSet::set);
            } else {
                final BitSet parentBitSet;
                if (nestedParentBitSetProducer == null) {
                    parentBitSet = null;
                } else {
                    parentBitSet = nestedParentBitSetProducer.getBitSet(context);
                    if (parentBitSet == null) {
                        return null; // no matches
                    }
                }
                if (indexMetaData.isRoutingPartitionedIndex()) {
                    // this is the heaviest invariant. Here we have to visit all docs stored fields do extract _id and _routing
                    // this this index is routing partitioned.
                    Visitor visitor = new Visitor(leafReader);
                    TwoPhaseIterator twoPhaseIterator = parentBitSet == null
                            ? new RoutingPartitionedDocIdSetIterator(visitor)
                            : new NestedRoutingPartitionedDocIdSetIterator(visitor, parentBitSet);
                    return new ConstantScoreScorer(this, score(), twoPhaseIterator);
                } else {
                    // here we potentially guard the docID consumers with our parent bitset if we have one.
                    // this ensures that we are only marking root documents in the nested case and if necessary
                    // we do a second pass to mark the corresponding children in markChildDocs
                    Function<IntConsumer, IntConsumer> maybeWrapConsumer = consumer -> {
                        if (parentBitSet != null) {
                            return docId -> {
                                if (parentBitSet.get(docId)) {
                                    consumer.accept(docId);
                                }
                            };
                        }
                        return consumer;
                    };
                    // in the _routing case we first go and find all docs that have a routing value and mark the ones we have to delete
                    findSplitDocs(RoutingFieldMapper.NAME, ref -> {
                        int targetShardId = OperationRouting.generateShardId(indexMetaData, null,
                                ref.utf8ToString());
                        return shardId == targetShardId;
                    }, leafReader, maybeWrapConsumer.apply(bitSet::set));

                    // now if we have a mixed index where some docs have a _routing value and some don't we have to exclude the ones
                    // with a routing value from the next iteration an delete / select based on the ID.
                    if (terms.getDocCount() != leafReader.maxDoc()) {
                        // this is a special case where some of the docs have no routing values this sucks but it's possible today
                        FixedBitSet hasRoutingValue = new FixedBitSet(leafReader.maxDoc());
                        findSplitDocs(RoutingFieldMapper.NAME, ref -> false, leafReader,
                                maybeWrapConsumer.apply(hasRoutingValue::set));
                        IntConsumer bitSetConsumer = maybeWrapConsumer.apply(bitSet::set);
                        findSplitDocs(IdFieldMapper.NAME, includeInShard, leafReader, docId -> {
                            if (hasRoutingValue.get(docId) == false) {
                                bitSetConsumer.accept(docId);
                            }
                        });
                    }
                }
                if (parentBitSet != null) {
                    // if nested docs are involved we also need to mark all child docs that belong to a matching parent doc.
                    markChildDocs(parentBitSet, bitSet);
                }
            }

            return new ConstantScoreScorer(this, score(), new BitSetIterator(bitSet, bitSet.length()));
        }

        @Override
        public boolean isCacheable(LeafReaderContext ctx) {
            // This is not a regular query, let's not cache it. It wouldn't help
            // anyway.
            return false;
        }
    };
}

From source file:perf.RandomQuery.java

License:Apache License

@Override
public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException {
    return new ConstantScoreWeight(this, boost) {
        @Override//from  w w  w . j  a v a 2  s  .c  om
        public Scorer scorer(LeafReaderContext context) throws IOException {
            final int maxDoc = context.reader().maxDoc();
            final int interval = (int) (1 / fractionKeep);
            final DocIdSetIterator iterator = new DocIdSetIterator() {

                int doc = -1;

                @Override
                public int docID() {
                    return doc;
                }

                @Override
                public int nextDoc() throws IOException {
                    return advance(doc + 1);
                }

                @Override
                public int advance(int target) throws IOException {
                    if (target >= maxDoc) {
                        return doc = NO_MORE_DOCS;
                    }
                    int intervalId = target / interval;
                    int addend = (31 * intervalId) % interval;
                    doc = intervalId * interval + addend;
                    if (doc < target) {
                        intervalId++;
                        addend = (31 * intervalId) % interval;
                        doc = intervalId * interval + addend;
                    }
                    assert doc >= target;
                    if (doc >= maxDoc) {
                        return doc = NO_MORE_DOCS;
                    }
                    return doc;
                }

                @Override
                public long cost() {
                    return maxDoc / interval;
                }

            };
            return new ConstantScoreScorer(this, score(), iterator);
        }
    };
}