Java tutorial
package uk.co.flax.luwak; import java.util.ArrayList; import java.util.Collection; import java.util.LinkedList; import java.util.List; import org.apache.lucene.search.*; /* * Copyright (c) 2014 Lemur Consulting Ltd. * <p/> * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * <p/> * http://www.apache.org/licenses/LICENSE-2.0 * <p/> * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /** * Split a disjunction query into its consituent parts, so that they can be indexed * and run separately in the Monitor. */ public class QueryDecomposer { /** * Split a query up into individual parts that can be indexed and run separately * @param q the query * @return a collection of subqueries */ public Collection<Query> decompose(Query q) { if (q instanceof BooleanQuery) return decomposeBoolean((BooleanQuery) q); if (q instanceof DisjunctionMaxQuery) { return ((DisjunctionMaxQuery) q).getDisjuncts(); } if (q instanceof BoostQuery) { return decomposeBoostQuery((BoostQuery) q); } return listOf(q); } private static Collection<Query> listOf(Query q) { List<Query> qs = new ArrayList<>(1); qs.add(q); return qs; } /** * Apply a boost to a query * @param q the query * @param boost the boost * @return the boosted query */ public static Query boost(Query q, float boost) { if (boost == 1.0) return q; return new BoostQuery(q, boost); } public Collection<Query> decomposeBoostQuery(BoostQuery q) { if (q.getBoost() == 1.0) return decompose(q.getQuery()); List<Query> boostedDecomposedQueries = new ArrayList<>(); for (Query subq : decompose(q.getQuery())) { boostedDecomposedQueries.add(new BoostQuery(subq, q.getBoost())); } return boostedDecomposedQueries; } /** * Decompose a {@link org.apache.lucene.search.BooleanQuery} * @param q the boolean query * @return a collection of subqueries */ public Collection<Query> decomposeBoolean(BooleanQuery q) { if (q.getMinimumNumberShouldMatch() > 1) return listOf(q); List<Query> subqueries = new LinkedList<>(); List<Query> exclusions = new LinkedList<>(); List<Query> mandatory = new LinkedList<>(); float parentBoost = q.getBoost(); for (BooleanClause clause : q) { if (clause.getOccur() == BooleanClause.Occur.MUST) mandatory.add(clause.getQuery()); else if (clause.getOccur() == BooleanClause.Occur.MUST_NOT) exclusions.add(clause.getQuery()); else { for (Query subQuery : decompose(clause.getQuery())) { subqueries.add(boost(subQuery, parentBoost)); } } } // More than one MUST clause, or a single MUST clause with disjunctions if (mandatory.size() > 1 || (mandatory.size() == 1 && subqueries.size() > 0)) return listOf(q); // If we only have a single MUST clause and no SHOULD clauses, then we can // decompose the MUST clause instead if (mandatory.size() == 1) { for (Query subQuery : decompose(mandatory.get(0))) { subqueries.add(boost(subQuery, parentBoost)); } } if (exclusions.size() == 0) return subqueries; // If there are exclusions, then we need to add them to all the decomposed // queries List<Query> rewrittenSubqueries = new ArrayList<>(subqueries.size()); for (Query subquery : subqueries) { BooleanQuery.Builder bq = new BooleanQuery.Builder(); bq.add(subquery, BooleanClause.Occur.MUST); for (Query ex : exclusions) { bq.add(ex, BooleanClause.Occur.MUST_NOT); } rewrittenSubqueries.add(bq.build()); } return rewrittenSubqueries; } }