List of usage examples for org.apache.lucene.search Weight extractTerms
@Deprecated public abstract void extractTerms(Set<Term> terms);
From source file:com.o19s.es.explore.ExplorerQuery.java
License:Apache License
@Override public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException { if (!needsScores) { return searcher.createWeight(query, false, boost); }/* w w w . ja va 2 s . c o m*/ final Weight subWeight = searcher.createWeight(query, true, boost); Set<Term> terms = new HashSet<>(); subWeight.extractTerms(terms); if (isCollectionScoped()) { ClassicSimilarity sim = new ClassicSimilarity(); StatisticsHelper df_stats = new StatisticsHelper(); StatisticsHelper idf_stats = new StatisticsHelper(); StatisticsHelper ttf_stats = new StatisticsHelper(); for (Term term : terms) { TermContext ctx = TermContext.build(searcher.getTopReaderContext(), term); TermStatistics tStats = searcher.termStatistics(term, ctx); df_stats.add(tStats.docFreq()); idf_stats.add(sim.idf(tStats.docFreq(), searcher.getIndexReader().numDocs())); ttf_stats.add(tStats.totalTermFreq()); } /* If no terms are parsed in the query we opt for returning 0 instead of throwing an exception that could break various pipelines. */ float constantScore; if (terms.size() > 0) { switch (type) { case ("sum_classic_idf"): constantScore = idf_stats.getSum(); break; case ("mean_classic_idf"): constantScore = idf_stats.getMean(); break; case ("max_classic_idf"): constantScore = idf_stats.getMax(); break; case ("min_classic_idf"): constantScore = idf_stats.getMin(); break; case ("stddev_classic_idf"): constantScore = idf_stats.getStdDev(); break; case "sum_raw_df": constantScore = df_stats.getSum(); break; case "min_raw_df": constantScore = df_stats.getMin(); break; case "max_raw_df": constantScore = df_stats.getMax(); break; case "mean_raw_df": constantScore = df_stats.getMean(); break; case "stddev_raw_df": constantScore = df_stats.getStdDev(); break; case "sum_raw_ttf": constantScore = ttf_stats.getSum(); break; case "min_raw_ttf": constantScore = ttf_stats.getMin(); break; case "max_raw_ttf": constantScore = ttf_stats.getMax(); break; case "mean_raw_ttf": constantScore = ttf_stats.getMean(); break; case "stddev_raw_ttf": constantScore = ttf_stats.getStdDev(); break; case "unique_terms_count": constantScore = terms.size(); break; default: throw new RuntimeException("Invalid stat type specified."); } } else { constantScore = 0.0f; } return new ConstantScoreWeight(ExplorerQuery.this, constantScore) { @Override public Explanation explain(LeafReaderContext context, int doc) throws IOException { Scorer scorer = scorer(context); int newDoc = scorer.iterator().advance(doc); assert newDoc == doc; // this is a DocIdSetIterator.all return Explanation.match(scorer.score(), "Stat Score: " + type); } @Override public Scorer scorer(LeafReaderContext context) throws IOException { return new ConstantScoreScorer(this, constantScore, DocIdSetIterator.all(context.reader().maxDoc())); } @Override public boolean isCacheable(LeafReaderContext ctx) { return true; } }; } else if (type.endsWith("_raw_tf")) { // Rewrite this into a boolean query where we can inject our PostingsExplorerQuery BooleanQuery.Builder qb = new BooleanQuery.Builder(); for (Term t : terms) { qb.add(new BooleanClause(new PostingsExplorerQuery(t, PostingsExplorerQuery.Type.TF), BooleanClause.Occur.SHOULD)); } // FIXME: completely refactor this class and stop accepting a random query but a list of terms directly // rewriting at this point is wrong, additionally we certainly build the TermContext twice for every terms // problem is that we rely on extractTerms which happen too late in the process Query q = qb.build().rewrite(searcher.getIndexReader()); return new ExplorerQuery.ExplorerWeight(this, searcher.createWeight(q, true, boost), type); } throw new IllegalArgumentException("Unknown ExplorerQuery type [" + type + "]"); }
From source file:org.hibernate.search.filter.impl.CachingWrapperQuery.java
License:LGPL
@Override public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException { final Weight weight = query.createWeight(searcher, needsScores); if (needsScores) { // our cache is not sufficient, we need scores too return weight; }/*from w w w.j ava2 s. c om*/ return new ConstantScoreWeight(weight.getQuery()) { @Override public void extractTerms(Set<Term> terms) { weight.extractTerms(terms); } @Override public Scorer scorer(LeafReaderContext context) throws IOException { DocIdSet docIdSet = getDocIdSet(context); assert docIdSet != null; if (docIdSet == DocIdSet.EMPTY) { return null; } final DocIdSetIterator disi = docIdSet.iterator(); if (disi == null) { return null; } return new ConstantScoreScorer(this, 0f, disi); } private DocIdSet getDocIdSet(LeafReaderContext context) throws IOException { final LeafReader reader = context.reader(); final Object key = reader.getCoreCacheKey(); Object cached = cache.get(key); if (cached != null) { return (DocIdSet) cached; } synchronized (cache) { cached = cache.get(key); if (cached != null) { return (DocIdSet) cached; } final DocIdSet docIdSet; final Scorer scorer = weight.scorer(context); if (scorer == null) { docIdSet = DocIdSet.EMPTY; } else { docIdSet = cacheImpl(scorer.iterator(), reader); } cache.put(key, docIdSet); return docIdSet; } } }; }
From source file:org.tallison.lucene.search.concordance.util.SimpleTargetCounter.java
License:Apache License
/** * Simple utility class to perform basic term frequency/document frequency * counts on the individual terms within a query. This relies on * IndexReader and does not perform any concordance search/retrieval; * it is, therefore, very fast./* w w w.j a v a2 s.co m*/ * <p> * If you want to visit more than basic terms (e.g. SpanNear), * see {@link TargetVisitor} * * @param query query * @param searcher searcher * @return target term results * @throws java.io.IOException if there is an IOException from the searcher */ public SimpleTargetTermResults searchSingleTerms(Query query, IndexSearcher searcher) throws IOException { Query tmpQ = query.rewrite(searcher.getIndexReader()); Set<Term> terms = new HashSet<>(); Weight weight = tmpQ.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1.0f); weight.extractTerms(terms); Map<String, Integer> dfs = new HashMap<>(); Map<String, Integer> tfs = new HashMap<>(); for (Term t : terms) { String targ = t.text(); int docFreq = searcher.getIndexReader().docFreq(t); if (docFreq == 0) { continue; } Integer i = new Integer(docFreq); dfs.put(targ, i); long tf = searcher.getIndexReader().totalTermFreq(t); tfs.put(targ, (int) tf); } SimpleTargetTermResults results = new SimpleTargetTermResults(dfs, tfs); return results; }
From source file:org.tallison.lucene.search.queries.SpanQueryConverter.java
License:Apache License
@Override protected SpanQuery convertUnknownQuery(String field, Query query) { if (query instanceof CommonTermsQuery) { // specialized since rewriting would change the result query // this query is TermContext sensitive. CommonTermsQuery ctq = (CommonTermsQuery) query; Set<Term> terms = new HashSet<>(); try {// w w w .j a v a 2 s.com Weight w = ctq.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1.0f); w.extractTerms(terms); } catch (IOException e) { throw new RuntimeException("IOException on searcher!!!", e); } List<SpanQuery> spanQs = new LinkedList<SpanQuery>(); for (Term term : terms) { if (term.field().equals(field)) { spanQs.add(new SpanTermQuery(term)); } } if (spanQs.size() == 0) { return getEmptySpanQuery(); } else if (spanQs.size() == 1) { return spanQs.get(0); } else { return new SpanOrQuery(spanQs.toArray(new SpanQuery[spanQs.size()])); } } super.convertUnknownQuery(field, query); return null; }
From source file:uk.co.flax.luwak.util.ForceNoBulkScoringQuery.java
License:Apache License
@Override public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException { final Weight innerWeight = inner.createWeight(searcher, needsScores); return new Weight(ForceNoBulkScoringQuery.this) { @Override/*from w w w . ja va2s.co m*/ public void extractTerms(Set<Term> set) { innerWeight.extractTerms(set); } @Override public Explanation explain(LeafReaderContext leafReaderContext, int i) throws IOException { return innerWeight.explain(leafReaderContext, i); } @Override public float getValueForNormalization() throws IOException { return innerWeight.getValueForNormalization(); } @Override public void normalize(float v, float v1) { innerWeight.normalize(v, v1); } @Override public Scorer scorer(LeafReaderContext leafReaderContext) throws IOException { return innerWeight.scorer(leafReaderContext); } }; }