List of usage examples for org.apache.lucene.search.spans SpanBoostQuery SpanBoostQuery
public SpanBoostQuery(SpanQuery query, float boost)
From source file:com.wiseowl.WiseOwl.query.WiseOwlQParser.java
License:Apache License
@Override public Query parse() throws SyntaxError { //<start id="qqp.parse"/> Parse parse = ParserTool.parseLine(qstr, parser, 1)[0];//<co id="qqp.parseLine"/> /*//from w w w . j a va 2 s . c o m <calloutlist> <callout arearefs="qqp.parseLine"><para>Parse the question using the <classname>TreebankParser</classname>. The resulting <classname>Parse</classname> object can then be utilized by the classifier to determine the Answer Type.</para></callout> </calloutlist> */ //<end id="qqp.parse"/> //<start id="qqp.answerType"/> // String type = "P"; String type = atc.computeAnswerType(parse); String mt = atm.get(type); if (mt.equals("DESCRIPTION")) { BooleanQuery bq; BooleanQuery.Builder builder = new BooleanQuery.Builder(); //BooleanQuery bq=new BooleanQuery(false, 0); String field = "text"; SchemaField sf = req.getSchema().getFieldOrNull(field); try { Analyzer analyzer = sf.getType().getQueryAnalyzer(); TokenStream ts = analyzer.tokenStream(field, new StringReader(qstr)); ts.reset(); CharTermAttribute tok = null; while (ts.incrementToken()) {//<co id="qqp.addTerms"/> tok = ts.getAttribute(CharTermAttribute.class); String term = tok.toString(); //ts.reset(); //log.warn("terms {} ",term); builder.add(new TermQuery(new Term(field, term)), BooleanClause.Occur.SHOULD); } ts.close(); } catch (IOException e) { throw new SyntaxError(e.getLocalizedMessage()); } bq = builder.build(); return bq; //return new TermQuery(new Term("title", "she")); } else { //<end id="qqp.answerType"/> String field = "text"; //params.get(QUERY_FIELD); //String field="text"; SchemaField sp = req.getSchema().getFieldOrNull(field); if (sp == null) { throw new SolrException(ErrorCode.SERVER_ERROR, "Undefined field: " + field); } //<start id="qqp.query"/> List<SpanQuery> sql = new ArrayList<SpanQuery>(); if (mt != null) {//<co id="qqp.handleAT"/> String[] parts = mt.split("\\|"); if (parts.length == 1) { sql.add(new SpanTermQuery(new Term(field, mt.toLowerCase()))); } else { for (int pi = 0; pi < parts.length; pi++) { sql.add(new SpanTermQuery(new Term(field, parts[pi].toLowerCase()))); } } } log.warn("answer type mt : {} {} ", mt, type); FocusNoun fn = new FocusNoun(); String fnn[] = null; try { fnn = fn.getFocusNoun(qstr); } catch (IOException e1) { // TODO Auto-generated catch block e1.printStackTrace(); } try { Analyzer analyzer = sp.getType().getQueryAnalyzer(); TokenStream ts = analyzer.tokenStream(field, new StringReader(qstr)); ts.reset(); CharTermAttribute tok = null; while (ts.incrementToken()) {//<co id="qqp.addTerms"/> tok = ts.getAttribute(CharTermAttribute.class); String term = tok.toString(); log.warn("terms boosted {} ", term); if (fnn != null) if (term.equals(fnn[0]) || term.equals(fnn[1])) { SpanQuery sq = new SpanTermQuery(new Term(field, term)); sql.add(new SpanBoostQuery(sq, 100f)); } else { SpanQuery sq = new SpanTermQuery(new Term(field, term)); sql.add(new SpanBoostQuery(sq, 5f)); } // sql.add(new SpanTermQuery(new Term(field, term))); } ts.close(); } catch (IOException e) { throw new SyntaxError(e.getLocalizedMessage()); } return new SpanOrQuery(sql.toArray(new SpanQuery[sql.size()])); // return new SpanNearQuery(sql.toArray(new SpanQuery[sql.size()]), params.getInt(OWLParams.SLOP, 10), true);//<co id="qqp.spanNear"/> /* <calloutlist> <callout arearefs="qqp.handleAT"><para>Add the AnswerType to the query</para></callout> <callout arearefs="qqp.addTerms"><para>Add the original query terms to the query</para></callout> <callout arearefs="qqp.spanNear"><para>Query the index looking for all of the parts near each other</para></callout> </calloutlist> */ //<end id="qqp.query"/> } }
From source file:org.codelibs.elasticsearch.index.query.SpanMultiTermQueryBuilder.java
License:Apache License
@Override protected Query doToQuery(QueryShardContext context) throws IOException { Query subQuery = multiTermQueryBuilder.toQuery(context); float boost = AbstractQueryBuilder.DEFAULT_BOOST; if (subQuery instanceof BoostQuery) { BoostQuery boostQuery = (BoostQuery) subQuery; subQuery = boostQuery.getQuery(); boost = boostQuery.getBoost();/*from ww w. j av a 2 s. co m*/ } //no MultiTermQuery extends SpanQuery, so SpanBoostQuery is not supported here assert subQuery instanceof SpanBoostQuery == false; if (subQuery instanceof MultiTermQuery == false) { throw new UnsupportedOperationException("unsupported inner query, should be " + MultiTermQuery.class.getName() + " but was " + subQuery.getClass().getName()); } SpanQuery wrapper = new SpanMultiTermQueryWrapper<>((MultiTermQuery) subQuery); if (boost != AbstractQueryBuilder.DEFAULT_BOOST) { wrapper = new SpanBoostQuery(wrapper, boost); } return wrapper; }
From source file:org.elasticsearch.index.query.AbstractQueryBuilder.java
License:Apache License
@Override public final Query toQuery(QueryShardContext context) throws IOException { Query query = doToQuery(context); if (query != null) { if (boost != DEFAULT_BOOST) { if (query instanceof SpanQuery) { query = new SpanBoostQuery((SpanQuery) query, boost); } else { query = new BoostQuery(query, boost); }//from w w w . j a v a2 s . c om } if (queryName != null) { context.addNamedQuery(queryName, query); } } return query; }
From source file:org.tallison.lucene.queryparser.spans.AbstractSpanQueryParser.java
License:Apache License
private SpanQuery buildSpanQueryClause(List<SpanQuery> queries, SQPClause clause) throws ParseException { //queries can be null //queries can contain null elements if (queries == null) { return getEmptySpanQuery(); }/* www . j a v a 2 s . c o m*/ SpanQuery q = null; if (clause instanceof SQPOrClause) { q = buildSpanOrQuery(queries); } else if (clause instanceof SQPNearClause) { int slop = ((SQPNearClause) clause).getSlop() == null ? getPhraseSlop() : ((SQPNearClause) clause).getSlop(); Boolean inOrder = ((SQPNearClause) clause).getInOrder(); boolean order = false; if (inOrder == null) { order = slop > 0 ? false : true; } else { order = inOrder.booleanValue(); } q = buildSpanNearQuery(queries, slop, order); } else if (clause instanceof SQPNotNearClause) { q = buildSpanNotNearQuery(queries, ((SQPNotNearClause) clause).getNotPre(), ((SQPNotNearClause) clause).getNotPost()); } else { //throw early and loudly. This should never happen. throw new IllegalArgumentException("clause not recognized: " + clause.getClass()); } if (clause.getBoost() != null) { q = new SpanBoostQuery(q, clause.getBoost()); } //now update boost if clause only had one child if (clause.getBoost() != null && (q instanceof SpanTermQuery || q instanceof SpanMultiTermQueryWrapper)) { q = new SpanBoostQuery(q, clause.getBoost()); } return q; }
From source file:org.tallison.lucene.queryparser.spans.SpanQueryParser.java
License:Apache License
private Query parseRecursively(final List<SQPToken> tokens, String field, SQPClause clause) throws ParseException { int start = clause.getTokenOffsetStart(); int end = clause.getTokenOffsetEnd(); testStartEnd(tokens, start, end);//w w w. j a v a2s . co m //if this is a positionRange query, it needs to be handled //by the span parser if (clause.getStartPosition() != null || clause.getEndPosition() != null) { return _parsePureSpanClause(tokens, field, clause); } List<BooleanClause> clauses = new ArrayList<>(); int conj = CONJ_NONE; int mods = MOD_NONE; String currField = field; int i = start; while (i < end) { Query q = null; SQPToken token = tokens.get(i); //if boolean defaultOperator or field, update local buffers and continue if (token instanceof SQPBooleanOpToken) { SQPBooleanOpToken t = (SQPBooleanOpToken) token; if (t.isConj()) { conj = t.getType(); mods = MOD_NONE; } else { mods = t.getType(); } i++; continue; } else if (token instanceof SQPField) { currField = ((SQPField) token).getField(); i++; continue; } //if or clause, recurse through tokens if (token instanceof SQPOrClause) { //recurse! SQPOrClause tmpOr = (SQPOrClause) token; q = parseRecursively(tokens, currField, tmpOr); //if it isn't already boosted, apply the boost from the token if (!(q instanceof BoostQuery) && !(q instanceof SpanBoostQuery) && tmpOr.getBoost() != null) { if (q instanceof SpanQuery) { q = new SpanBoostQuery((SpanQuery) q, tmpOr.getBoost()); } else { q = new BoostQuery(q, tmpOr.getBoost()); } } i = tmpOr.getTokenOffsetEnd(); } else if (token instanceof SQPNearClause) { SQPNearClause tmpNear = (SQPNearClause) token; q = _parsePureSpanClause(tokens, currField, tmpNear); i = tmpNear.getTokenOffsetEnd(); } else if (token instanceof SQPNotNearClause) { SQPNotNearClause tmpNotNear = (SQPNotNearClause) token; q = _parsePureSpanClause(tokens, currField, tmpNotNear); i = tmpNotNear.getTokenOffsetEnd(); } else if (token instanceof SQPTerminal) { SQPTerminal tmpTerm = (SQPTerminal) token; if (tmpTerm.getStartPosition() != null || tmpTerm.getEndPosition() != null) { q = buildSpanTerminal(currField, tmpTerm); } else { q = buildTerminal(currField, tmpTerm); } i++; } else { //throw exception because this could lead to an infinite loop //if a new token type is added but not properly accounted for. throw new IllegalArgumentException( "Don't know how to process token of this type: " + token.getClass()); } if (!isEmptyQuery(q)) { addClause(clauses, conj, mods, q); } //reset mods and conj and field mods = MOD_NONE; conj = CONJ_NONE; currField = field; } if (clauses.size() == 0) { return getEmptySpanQuery(); } if (clauses.size() == 1 && clauses.get(0).getOccur() != Occur.MUST_NOT) { return clauses.get(0).getQuery(); } BooleanQuery.Builder bq = new BooleanQuery.Builder(); try { for (BooleanClause bc : clauses) { bq.add(bc); } } catch (BooleanQuery.TooManyClauses e) { throw new ParseException(e.getMessage()); } if (clause instanceof SQPOrClause) { SQPOrClause orClause = (SQPOrClause) clause; if (orClause.getMinimumNumberShouldMatch() != null) { bq.setMinimumNumberShouldMatch(orClause.getMinimumNumberShouldMatch()); } } return bq.build(); }
From source file:org.tallison.lucene.queryparser.spans.SpanQueryParserBase.java
License:Apache License
/** * * Be careful: this assumes that the sqp terminal is NOT a SpanPositionRangeQuery!!! * * @param fieldName field//from w ww . j av a2 s . c o m * @param terminal terminal * @return Query that was built or <code>null</code> if a stop word * @throws ParseException if an exception is encountered */ protected Query buildTerminal(String fieldName, SQPTerminal terminal) throws ParseException { Query ret; if (terminal instanceof SQPTerm) { ret = newFieldQuery(fieldName, ((SQPTerm) terminal).getString(), ((SQPTerm) terminal).isQuoted() || autoGeneratePhraseQueries, 0); } else if (terminal instanceof SQPFuzzyTerm) { SQPFuzzyTerm ft = (SQPFuzzyTerm) terminal; int tmpPrefixLen = (ft.getPrefixLength() != null) ? ft.getPrefixLength() : getFuzzyPrefixLength(); int tmpMaxEdits = (ft.getMaxEdits() != null) ? Math.min(fuzzyMaxEdits, ft.getMaxEdits()) : getFuzzyMaxEdits(); ret = newFuzzyQuery(fieldName, ft.getString(), tmpMaxEdits, tmpPrefixLen, getMaxExpansions(), ft.isTranspositions()); } else if (terminal instanceof SQPWildcardTerm) { ret = newWildcardQuery(fieldName, terminal.getString()); } else if (terminal instanceof SQPPrefixTerm) { ret = newPrefixQuery(fieldName, terminal.getString()); } else if (terminal instanceof SQPRangeTerm) { SQPRangeTerm rt = (SQPRangeTerm) terminal; ret = newRangeQuery(fieldName, rt.getStart(), rt.getEnd(), rt.getStartInclusive(), rt.getEndInclusive()); } else if (terminal instanceof SQPRegexTerm) { ret = newRegexpQuery(fieldName, terminal.getString()); } else if (terminal instanceof SQPAllDocsTerm) { ret = new MatchAllDocsQuery(); } else { //This should never happen. Throw early and often. throw new IllegalArgumentException("Can't build Query from: " + terminal.getClass()); } if (ret != null && terminal.getBoost() != null) { if (ret instanceof SpanQuery) { ret = new SpanBoostQuery((SpanQuery) ret, terminal.getBoost()); } else { ret = new BoostQuery(ret, terminal.getBoost()); } } return ret; }
From source file:org.tallison.lucene.queryparser.spans.SpanQueryParserBase.java
License:Apache License
SpanQuery addBoostOrPositionRangeIfExists(SpanQuery spanQuery, SQPBoostableOrPositionRangeToken token) { if (spanQuery == null) { return spanQuery; }// w w w. j a v a 2 s . com if (token.getStartPosition() != null || token.getEndPosition() != null) { if (token.getStartPosition() == null) { spanQuery = new SpanFirstQuery(spanQuery, token.getEndPosition()); } else { int end = (token.getEndPosition() == null) ? Integer.MAX_VALUE : token.getEndPosition(); spanQuery = new SpanPositionRangeQuery(spanQuery, token.getStartPosition(), end); } } if (token.getBoost() != null && !(spanQuery instanceof SpanBoostQuery)) { spanQuery = new SpanBoostQuery(spanQuery, token.getBoost()); } return spanQuery; }
From source file:org.tallison.lucene.queryparser.spans.SQPTestBase.java
License:Apache License
/** * THIS IS AN UNHOLY ABOMINATION: this exists here and in LUCENE-5317 and * in the highlighter package. Please, please forgive me. * * We need to factor this out into a standalone helper class until * the difference between Query and SpanQueries disappears. * * Converts a regular query to a {@link org.apache.lucene.search.spans.SpanQuery} for use in a highlighter. * Because of subtle differences in {@link org.apache.lucene.search.spans.SpanQuery} and {@link org.apache.lucene.search.Query}, this * {@link org.apache.lucene.search.spans.SpanQuery} will not necessarily return the same documents as the * initial Query. For example, the generated SpanQuery will not include * clauses of type BooleanClause.Occur.MUST_NOT. Also, the * {@link org.apache.lucene.search.spans.SpanQuery} will only cover a single field, whereas the {@link org.apache.lucene.search.Query} * might contain multiple fields.//from w w w . j a va 2s . com * <p> * Returns an empty SpanQuery if the {@link org.apache.lucene.search.Query} is a class that * is handled, but for some reason can't be converted from a {@link org.apache.lucene.search.Query} to a * {@link org.apache.lucene.search.spans.SpanQuery}. This can happen for many reasons: e.g. if the Query * contains no terms in the requested "field" or the Query is a MatchAllDocsQuery. * <p> * Throws IllegalArgumentException if the Query is a class that is * is not yet handled. * <p> * This class does not rewrite the SpanQuery before returning it. * Clients are required to rewrite if necessary. * <p> * Much of this code is copied directly from * oal.search.highlight.WeightedSpanTermExtractor. There are some subtle * differences. * * @param field single field to extract SpanQueries for * @param query query to convert * @return SpanQuery for use in highlighting; can return empty SpanQuery * @throws java.io.IOException for an underlying * IOException in the IndexReader or an IllegalArgumentException if the query type is not recognized */ public SpanQuery convert(String field, Query query) throws IOException { /* * copied nearly verbatim from * org.apache.lucene.search.highlight.WeightedSpanTermExtractor * TODO:refactor to avoid duplication of code if possible. * Beware: there are some subtle differences. */ if (query instanceof SpanQuery) { SpanQuery sq = (SpanQuery) query; if (sq.getField() != null && sq.getField().equals(field)) { return (SpanQuery) query; } else { return getEmptySpanQuery(); } } else if (query instanceof BooleanQuery) { List<BooleanClause> queryClauses = ((BooleanQuery) query).clauses(); List<SpanQuery> spanQs = new ArrayList<SpanQuery>(); for (int i = 0; i < queryClauses.size(); i++) { if (!queryClauses.get(i).isProhibited()) { tryToAdd(field, convert(field, queryClauses.get(i).getQuery()), spanQs); } } if (spanQs.size() == 0) { return getEmptySpanQuery(); } else if (spanQs.size() == 1) { return spanQs.get(0); } else { return new SpanOrQuery(spanQs.toArray(new SpanQuery[spanQs.size()])); } } else if (query instanceof PhraseQuery) { PhraseQuery phraseQuery = ((PhraseQuery) query); Term[] phraseQueryTerms = phraseQuery.getTerms(); if (phraseQueryTerms.length == 0) { return getEmptySpanQuery(); } else if (!phraseQueryTerms[0].field().equals(field)) { return getEmptySpanQuery(); } SpanQuery[] clauses = new SpanQuery[phraseQueryTerms.length]; for (int i = 0; i < phraseQueryTerms.length; i++) { clauses[i] = new SpanTermQuery(phraseQueryTerms[i]); } int slop = phraseQuery.getSlop(); int[] positions = phraseQuery.getPositions(); // sum position increments (>1) and add to slop if (positions.length > 0) { int lastPos = positions[0]; int sz = positions.length; for (int i = 1; i < sz; i++) { int pos = positions[i]; int inc = pos - lastPos - 1; slop += inc; lastPos = pos; } } boolean inorder = false; if (phraseQuery.getSlop() == 0) { inorder = true; } SpanNearQuery sp = new SpanNearQuery(clauses, slop, inorder); if (query instanceof BoostQuery) { return new SpanBoostQuery(sp, ((BoostQuery) query).getBoost()); } else { return sp; } } else if (query instanceof TermQuery) { TermQuery tq = (TermQuery) query; if (tq.getTerm().field().equals(field)) { return new SpanTermQuery(tq.getTerm()); } else { return getEmptySpanQuery(); } } else if (query instanceof ConstantScoreQuery) { return convert(field, ((ConstantScoreQuery) query).getQuery()); } else if (query instanceof DisjunctionMaxQuery) { List<SpanQuery> spanQs = new ArrayList<SpanQuery>(); for (Iterator<Query> iterator = ((DisjunctionMaxQuery) query).iterator(); iterator.hasNext();) { tryToAdd(field, convert(field, iterator.next()), spanQs); } if (spanQs.size() == 0) { return getEmptySpanQuery(); } else if (spanQs.size() == 1) { return spanQs.get(0); } else { return new SpanOrQuery(spanQs.toArray(new SpanQuery[spanQs.size()])); } } else if (query instanceof MatchAllDocsQuery) { return getEmptySpanQuery(); } else if (query instanceof MultiPhraseQuery) { final MultiPhraseQuery mpq = (MultiPhraseQuery) query; final Term[][] termArrays = mpq.getTermArrays(); //test for empty or wrong field if (termArrays.length == 0) { return getEmptySpanQuery(); } else if (termArrays.length > 1) { Term[] ts = termArrays[0]; if (ts.length > 0) { Term t = ts[0]; if (!t.field().equals(field)) { return getEmptySpanQuery(); } } } final int[] positions = mpq.getPositions(); if (positions.length > 0) { int maxPosition = positions[positions.length - 1]; for (int i = 0; i < positions.length - 1; ++i) { if (positions[i] > maxPosition) { maxPosition = positions[i]; } } @SuppressWarnings("unchecked") final List<SpanQuery>[] disjunctLists = new List[maxPosition + 1]; int distinctPositions = 0; for (int i = 0; i < termArrays.length; ++i) { final Term[] termArray = termArrays[i]; List<SpanQuery> disjuncts = disjunctLists[positions[i]]; if (disjuncts == null) { disjuncts = (disjunctLists[positions[i]] = new ArrayList<SpanQuery>(termArray.length)); ++distinctPositions; } for (int j = 0; j < termArray.length; ++j) { disjuncts.add(new SpanTermQuery(termArray[j])); } } int positionGaps = 0; int position = 0; final SpanQuery[] clauses = new SpanQuery[distinctPositions]; for (int i = 0; i < disjunctLists.length; ++i) { List<SpanQuery> disjuncts = disjunctLists[i]; if (disjuncts != null) { if (disjuncts.size() == 1) { clauses[position++] = disjuncts.get(0); } else { clauses[position++] = new SpanOrQuery( disjuncts.toArray(new SpanQuery[disjuncts.size()])); } } else { ++positionGaps; } } final int slop = mpq.getSlop(); final boolean inorder = (slop == 0); SpanNearQuery sp = new SpanNearQuery(clauses, slop + positionGaps, inorder); if (query instanceof BoostQuery) { return new SpanBoostQuery(sp, ((BoostQuery) query).getBoost()); } else { return sp; } } } else if (query instanceof MultiTermQuery) { return new SpanMultiTermQueryWrapper<>((MultiTermQuery) query); } else if (query instanceof SynonymQuery) { List<SpanQuery> clauses = new ArrayList<>(); for (Term term : ((SynonymQuery) query).getTerms()) { clauses.add(new SpanTermQuery(term)); } return new SpanOrQuery(clauses.toArray(new SpanQuery[clauses.size()])); } throw new IllegalArgumentException("Can't convert query of type: " + query.getClass()); }
From source file:org.tallison.lucene.queryparser.spans.TestQPTestBaseSpanQuery.java
License:Apache License
@Override public void testCJKBoostedPhrase() throws Exception { // individual CJK chars as terms SimpleCJKAnalyzer analyzer = new SimpleCJKAnalyzer(); List<SpanQuery> clauses = new ArrayList<>(); clauses.add(new SpanTermQuery(new Term("field", ""))); clauses.add(new SpanTermQuery(new Term("field", ""))); SpanQuery expected = new SpanNearQuery(clauses.toArray(new SpanQuery[clauses.size()]), 0, true); expected = new SpanBoostQuery(expected, 0.5f); assertEquals(expected, getQuery("\"\"^0.5", analyzer)); }
From source file:org.tallison.lucene.queryparser.spans.TestQPTestBaseSpanQuery.java
License:Apache License
public void testSynonyms() throws Exception { SpanQuery expectedSpan = new SpanOrQuery(new SpanQuery[] { new SpanTermQuery(new Term("field", "dog")), new SpanTermQuery(new Term("field", "dogs")) }); BooleanQuery.Builder expectedB = new BooleanQuery.Builder(); expectedB.add(new TermQuery(new Term("field", "dogs")), BooleanClause.Occur.SHOULD); expectedB.add(new TermQuery(new Term("field", "dog")), BooleanClause.Occur.SHOULD); Query expected = new SynonymQuery(new Term("field", "dog"), new Term("field", "dogs")); SpanQueryParser qp = new SpanQueryParser("field", new MockSynonymAnalyzer(), null); assertEquals(expected, qp.parse("dogs")); assertEquals(expectedSpan, qp.parse("\"dogs\"")); qp.setDefaultOperator(Operator.AND); assertEquals(expected, qp.parse("dogs")); assertEquals(expectedSpan, qp.parse("\"dogs\"")); expected = new BoostQuery(expected, 2f); expectedSpan = new SpanBoostQuery(expectedSpan, 2f); assertEquals(expected, qp.parse("dogs^2")); assertEquals(expectedSpan, qp.parse("\"dogs\"^2")); }