List of usage examples for org.apache.lucene.search ConstantScoreWeight ConstantScoreWeight
protected ConstantScoreWeight(Query query, float score)
From source file:com.o19s.es.explore.ExplorerQuery.java
License:Apache License
@Override public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException { if (!needsScores) { return searcher.createWeight(query, false, boost); }// www. j a v a 2 s.co m final Weight subWeight = searcher.createWeight(query, true, boost); Set<Term> terms = new HashSet<>(); subWeight.extractTerms(terms); if (isCollectionScoped()) { ClassicSimilarity sim = new ClassicSimilarity(); StatisticsHelper df_stats = new StatisticsHelper(); StatisticsHelper idf_stats = new StatisticsHelper(); StatisticsHelper ttf_stats = new StatisticsHelper(); for (Term term : terms) { TermContext ctx = TermContext.build(searcher.getTopReaderContext(), term); TermStatistics tStats = searcher.termStatistics(term, ctx); df_stats.add(tStats.docFreq()); idf_stats.add(sim.idf(tStats.docFreq(), searcher.getIndexReader().numDocs())); ttf_stats.add(tStats.totalTermFreq()); } /* If no terms are parsed in the query we opt for returning 0 instead of throwing an exception that could break various pipelines. */ float constantScore; if (terms.size() > 0) { switch (type) { case ("sum_classic_idf"): constantScore = idf_stats.getSum(); break; case ("mean_classic_idf"): constantScore = idf_stats.getMean(); break; case ("max_classic_idf"): constantScore = idf_stats.getMax(); break; case ("min_classic_idf"): constantScore = idf_stats.getMin(); break; case ("stddev_classic_idf"): constantScore = idf_stats.getStdDev(); break; case "sum_raw_df": constantScore = df_stats.getSum(); break; case "min_raw_df": constantScore = df_stats.getMin(); break; case "max_raw_df": constantScore = df_stats.getMax(); break; case "mean_raw_df": constantScore = df_stats.getMean(); break; case "stddev_raw_df": constantScore = df_stats.getStdDev(); break; case "sum_raw_ttf": constantScore = ttf_stats.getSum(); break; case "min_raw_ttf": constantScore = ttf_stats.getMin(); break; case "max_raw_ttf": constantScore = ttf_stats.getMax(); break; case "mean_raw_ttf": constantScore = ttf_stats.getMean(); break; case "stddev_raw_ttf": constantScore = ttf_stats.getStdDev(); break; case "unique_terms_count": constantScore = terms.size(); break; default: throw new RuntimeException("Invalid stat type specified."); } } else { constantScore = 0.0f; } return new ConstantScoreWeight(ExplorerQuery.this, constantScore) { @Override public Explanation explain(LeafReaderContext context, int doc) throws IOException { Scorer scorer = scorer(context); int newDoc = scorer.iterator().advance(doc); assert newDoc == doc; // this is a DocIdSetIterator.all return Explanation.match(scorer.score(), "Stat Score: " + type); } @Override public Scorer scorer(LeafReaderContext context) throws IOException { return new ConstantScoreScorer(this, constantScore, DocIdSetIterator.all(context.reader().maxDoc())); } @Override public boolean isCacheable(LeafReaderContext ctx) { return true; } }; } else if (type.endsWith("_raw_tf")) { // Rewrite this into a boolean query where we can inject our PostingsExplorerQuery BooleanQuery.Builder qb = new BooleanQuery.Builder(); for (Term t : terms) { qb.add(new BooleanClause(new PostingsExplorerQuery(t, PostingsExplorerQuery.Type.TF), BooleanClause.Occur.SHOULD)); } // FIXME: completely refactor this class and stop accepting a random query but a list of terms directly // rewriting at this point is wrong, additionally we certainly build the TermContext twice for every terms // problem is that we rely on extractTerms which happen too late in the process Query q = qb.build().rewrite(searcher.getIndexReader()); return new ExplorerQuery.ExplorerWeight(this, searcher.createWeight(q, true, boost), type); } throw new IllegalArgumentException("Unknown ExplorerQuery type [" + type + "]"); }
From source file:com.o19s.es.ltr.query.DerivedExpressionQuery.java
License:Apache License
@Override public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException { if (!needsScores) { // If scores are not needed simply return a constant score on all docs return new ConstantScoreWeight(this, boost) { @Override/*from w w w .j ava2 s . co m*/ public boolean isCacheable(LeafReaderContext ctx) { return true; } @Override public Scorer scorer(LeafReaderContext context) throws IOException { return new ConstantScoreScorer(this, score(), DocIdSetIterator.all(context.reader().maxDoc())); } }; } return new FVWeight(this); }
From source file:com.o19s.es.ltr.query.RankerQuery.java
License:Apache License
@Override public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException { if (!needsScores) { // If scores are not needed simply return a constant score on all docs return new ConstantScoreWeight(this, boost) { @Override/* w w w . j ava2 s.c o m*/ public Scorer scorer(LeafReaderContext context) throws IOException { return new ConstantScoreScorer(this, score(), DocIdSetIterator.all(context.reader().maxDoc())); } @Override public boolean isCacheable(LeafReaderContext ctx) { return true; } }; } List<Weight> weights = new ArrayList<>(queries.size()); for (Query q : queries) { weights.add(searcher.createWeight(q, needsScores, boost)); } return new RankerWeight(weights); }
From source file:org.elasticsearch.index.shard.ShardSplittingQuery.java
License:Apache License
@Override public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) { return new ConstantScoreWeight(this, boost) { @Override//from www .j a v a 2 s. co m public String toString() { return "weight(delete docs query)"; } @Override public Scorer scorer(LeafReaderContext context) throws IOException { LeafReader leafReader = context.reader(); FixedBitSet bitSet = new FixedBitSet(leafReader.maxDoc()); Terms terms = leafReader.terms(RoutingFieldMapper.NAME); Predicate<BytesRef> includeInShard = ref -> { int targetShardId = OperationRouting.generateShardId(indexMetaData, Uid.decodeId(ref.bytes, ref.offset, ref.length), null); return shardId == targetShardId; }; if (terms == null) { // this is the common case - no partitioning and no _routing values // in this case we also don't do anything special with regards to nested docs since we basically delete // by ID and parent and nested all have the same id. assert indexMetaData.isRoutingPartitionedIndex() == false; findSplitDocs(IdFieldMapper.NAME, includeInShard, leafReader, bitSet::set); } else { final BitSet parentBitSet; if (nestedParentBitSetProducer == null) { parentBitSet = null; } else { parentBitSet = nestedParentBitSetProducer.getBitSet(context); if (parentBitSet == null) { return null; // no matches } } if (indexMetaData.isRoutingPartitionedIndex()) { // this is the heaviest invariant. Here we have to visit all docs stored fields do extract _id and _routing // this this index is routing partitioned. Visitor visitor = new Visitor(leafReader); TwoPhaseIterator twoPhaseIterator = parentBitSet == null ? new RoutingPartitionedDocIdSetIterator(visitor) : new NestedRoutingPartitionedDocIdSetIterator(visitor, parentBitSet); return new ConstantScoreScorer(this, score(), twoPhaseIterator); } else { // here we potentially guard the docID consumers with our parent bitset if we have one. // this ensures that we are only marking root documents in the nested case and if necessary // we do a second pass to mark the corresponding children in markChildDocs Function<IntConsumer, IntConsumer> maybeWrapConsumer = consumer -> { if (parentBitSet != null) { return docId -> { if (parentBitSet.get(docId)) { consumer.accept(docId); } }; } return consumer; }; // in the _routing case we first go and find all docs that have a routing value and mark the ones we have to delete findSplitDocs(RoutingFieldMapper.NAME, ref -> { int targetShardId = OperationRouting.generateShardId(indexMetaData, null, ref.utf8ToString()); return shardId == targetShardId; }, leafReader, maybeWrapConsumer.apply(bitSet::set)); // now if we have a mixed index where some docs have a _routing value and some don't we have to exclude the ones // with a routing value from the next iteration an delete / select based on the ID. if (terms.getDocCount() != leafReader.maxDoc()) { // this is a special case where some of the docs have no routing values this sucks but it's possible today FixedBitSet hasRoutingValue = new FixedBitSet(leafReader.maxDoc()); findSplitDocs(RoutingFieldMapper.NAME, ref -> false, leafReader, maybeWrapConsumer.apply(hasRoutingValue::set)); IntConsumer bitSetConsumer = maybeWrapConsumer.apply(bitSet::set); findSplitDocs(IdFieldMapper.NAME, includeInShard, leafReader, docId -> { if (hasRoutingValue.get(docId) == false) { bitSetConsumer.accept(docId); } }); } } if (parentBitSet != null) { // if nested docs are involved we also need to mark all child docs that belong to a matching parent doc. markChildDocs(parentBitSet, bitSet); } } return new ConstantScoreScorer(this, score(), new BitSetIterator(bitSet, bitSet.length())); } @Override public boolean isCacheable(LeafReaderContext ctx) { // This is not a regular query, let's not cache it. It wouldn't help // anyway. return false; } }; }
From source file:perf.RandomQuery.java
License:Apache License
@Override public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException { return new ConstantScoreWeight(this, boost) { @Override//from w w w . j a v a 2 s .c om public Scorer scorer(LeafReaderContext context) throws IOException { final int maxDoc = context.reader().maxDoc(); final int interval = (int) (1 / fractionKeep); final DocIdSetIterator iterator = new DocIdSetIterator() { int doc = -1; @Override public int docID() { return doc; } @Override public int nextDoc() throws IOException { return advance(doc + 1); } @Override public int advance(int target) throws IOException { if (target >= maxDoc) { return doc = NO_MORE_DOCS; } int intervalId = target / interval; int addend = (31 * intervalId) % interval; doc = intervalId * interval + addend; if (doc < target) { intervalId++; addend = (31 * intervalId) % interval; doc = intervalId * interval + addend; } assert doc >= target; if (doc >= maxDoc) { return doc = NO_MORE_DOCS; } return doc; } @Override public long cost() { return maxDoc / interval; } }; return new ConstantScoreScorer(this, score(), iterator); } }; }