List of usage examples for org.apache.lucene.search BooleanQuery setMaxClauseCount
public static void setMaxClauseCount(int maxClauseCount)
From source file:it.unipd.dei.ims.falcon.ranking.SegmentQuery.java
License:Apache License
/** * Creates a empty {@link SegmentQuery}. * A {@link SegmentQuery} is a {@link org.apache.lucene.search.BooleanQuery} * with a number of {@link org.apache.lucene.search.BooleanClause} that is * at most "segmentLength", that is the number of hash in a segment of the query * * @param segmentLength number of hash in a segment of the query *//*from w ww .java2 s . c o m*/ public SegmentQuery(int segmentLength) { // create a Lucene Boolean Query super(true); // re-set the maximum number of clauses // Default is 1024, but in our case the number of hash per segment // can be segmentLength BooleanQuery.setMaxClauseCount(segmentLength); }
From source file:lucene.LuceneCandidateProvider.java
License:Apache License
public ResEntry[] getCandidates(int queryNum, String query, int maxQty) throws Exception { ArrayList<String> toks = new ArrayList<String>(); for (String s : mSpaceSplit.split(query)) { toks.add(s);/*from ww w. j av a 2 s. co m*/ } if (2 * toks.size() > BooleanQuery.getMaxClauseCount()) { // This a heuristic, but it should work fine in many cases BooleanQuery.setMaxClauseCount(2 * toks.size()); } ArrayList<ResEntry> resArr = new ArrayList<ResEntry>(); Query queryParsed = mParser.parse(query); TopDocs hits = mSearcher.search(queryParsed, maxQty); ScoreDoc[] scoreDocs = hits.scoreDocs; for (ScoreDoc oneHit : scoreDocs) { Document doc = mSearcher.doc(oneHit.doc); String id = doc.get(UtilConst.FIELD_ID); float score = oneHit.score; resArr.add(new ResEntry(id, score)); } ResEntry[] results = resArr.toArray(new ResEntry[resArr.size()]); Arrays.sort(results); return results; }
From source file:net.hillsdon.reviki.web.dispatching.impl.DispatcherServlet.java
License:Apache License
@Override public void init(final ServletConfig config) throws ServletException { super.init(config); // This package cycle is fundamental... I figure we'd fix it by // putting the impl class name in the web.xml so this is a reasonable // temporary step to get back to zero cycles. try {//from www. jav a2 s . c o m final ApplicationSession applicationSession = new PicoBuilder().build() .addComponent(Class.forName("net.hillsdon.reviki.di.impl.ApplicationSessionImpl")) .addComponent(config).addComponent(config.getServletContext()) .getComponent(ApplicationSession.class); applicationSession.start(); _dispatcher = applicationSession.getDispatcher(); // Default limit of 1024 clauses was too small in some cases (e.g. when searching for "story"). // Therefore this limit was increased in r1202, to allow searching for common terms. // This limit was tested on search with 2500 results and worked well. // Should be more than enough for the internal wikis at the moment. BooleanQuery.setMaxClauseCount(4096); } catch (ClassNotFoundException e) { throw new ServletException("Root session class not found", e); } }
From source file:net.semanticmetadata.lire.impl.searcher.VisualWordsImageSearcher.java
License:Open Source License
public VisualWordsImageSearcher(int numMaxHits, Similarity similarity, String fieldName) { this.similarity = similarity; this.numMaxHits = numMaxHits; this.fieldName = fieldName; qp = new QueryParser(LuceneUtils.LUCENE_VERSION, fieldName, new WhitespaceAnalyzer(LuceneUtils.LUCENE_VERSION)); BooleanQuery.setMaxClauseCount(10000); }
From source file:net.semanticmetadata.lire.impl.searcher.VisualWordsImageSearcher.java
License:Open Source License
public VisualWordsImageSearcher(int numMaxHits, String fieldName) { this.numMaxHits = numMaxHits; this.fieldName = fieldName; qp = new QueryParser(LuceneUtils.LUCENE_VERSION, fieldName, new WhitespaceAnalyzer(LuceneUtils.LUCENE_VERSION)); BooleanQuery.setMaxClauseCount(10000); }
From source file:net.semanticmetadata.lire.searchers.VisualWordsImageSearcher.java
License:Open Source License
public VisualWordsImageSearcher(int numMaxHits, Similarity similarity, String fieldName) { this.similarity = similarity; this.numMaxHits = numMaxHits; this.fieldName = fieldName; qp = new QueryParser(fieldName, new WhitespaceAnalyzer()); BooleanQuery.setMaxClauseCount(10000); }
From source file:net.semanticmetadata.lire.searchers.VisualWordsImageSearcher.java
License:Open Source License
public VisualWordsImageSearcher(int numMaxHits, String fieldName) { this.numMaxHits = numMaxHits; this.fieldName = fieldName; qp = new QueryParser(fieldName, new WhitespaceAnalyzer()); BooleanQuery.setMaxClauseCount(10000); }
From source file:net.sf.zekr.engine.search.lucene.QuranTextSearcher.java
/** * Main search method, for internal use. * //from ww w . j a va 2 s .c o m * @param q query string * @return a list of highlighted string objects. * @throws SearchException */ private List<SearchResultItem> internalSearch(String q) throws SearchException { IndexSearcher is = null; try { is = new IndexSearcher(zekrIndexReader.indexReader); // analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT); // resultTokenStream = new StandardTokenizer(Version.LUCENE_CURRENT, reader); QueryParser parser = QueryParserFactory.create(Version.LUCENE_CURRENT, QuranTextIndexer.CONTENTS_FIELD, analyzer); // allow search terms like "*foo" with leading star parser.setAllowLeadingWildcard(true); // parser.setFuzzyPrefixLength(10); // if this line is not set, highlighter doesn't work in in wildcard queries while query.rewrite() is done. // and sorting also doesn't work correctly for wildcard queries. parser.setMultiTermRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE); logger.debug("Parse query."); query = parser.parse(q); BooleanQuery.setMaxClauseCount(maxClauseCount); logger.debug("Rewrite query."); query = query.rewrite(zekrIndexReader.indexReader); // required to expand search terms logger.debug("Searching for: " + query.toString()); // Hits hits; TopFieldDocs tops = null; is.setDefaultFieldSortScoring(true, true); if (searchScope != null && searchScope.getScopeItems().size() > 0) { String scopeQuery = makeSearchScope(); logger.debug("Scope is: " + scopeQuery); // hits = is.search(query, new QuranRangeFilter(searchScope), sortResultOrder); tops = is.search(query, new QuranRangeFilter(searchScope), maxSearchResult, sortResultOrder); } else { // hits = is.search(query, new QueryWrapperFilter(query), 20, sortResultOrder); tops = is.search(query, new QueryWrapperFilter(query), maxSearchResult, sortResultOrder); } logger.debug("Highlight search result."); Highlighter highlighter = new Highlighter(highlightFormatter, new QueryScorer(query)); // highlighter.setFragmentScorer(new QueryTermScorer(query)); int total = Math.min(maxSearchResult, tops.totalHits); List<SearchResultItem> res = new ArrayList<SearchResultItem>(total); for (int i = 0; i < total; i++) { ScoreDoc[] sd = tops.scoreDocs; Document doc = is.doc(sd[i].doc); final String contents = doc.get(QuranTextIndexer.CONTENTS_FIELD); final IQuranLocation location = new QuranLocation(doc.get(QuranTextIndexer.LOCATION_FIELD)); TokenStream tokenStream = analyzer.tokenStream(QuranTextIndexer.CONTENTS_FIELD, new StringReader(contents)); // String resultStr = highlighter.getBestFragment(tokenStream, contents); String resultStr = highlighter.getBestFragments(tokenStream, contents, 100, "..."); SearchResultItem sri = new SearchResultItem(resultStr, location); res.add(sri); } matchedItemCount = highlightFormatter.getHighlightCount(); // highlightedTermList = highlightFormatter.getHighlightedTermList(); return res; } catch (Exception e) { throw new SearchException(e); } finally { if (is != null) { try { is.close(); } catch (IOException e) { } } } }
From source file:nicta.com.au.failureanalysis.optimalquery.OptPatentQuery.java
public Query parse() throws ParseException { if (queries.length != fields.length) { throw new IllegalArgumentException("queries.length != fields.length"); }/*from w w w . j ava 2 s . c o m*/ BooleanQuery bQuery = new BooleanQuery(); BooleanQuery bQueryFields = new BooleanQuery(); BooleanQuery.setMaxClauseCount(Integer.MAX_VALUE); for (int i = 1; i < fields.length; i++) { if (queries[i] != null && !queries[i].equals("") && boosts.get(fields[i]) != 0) { QueryParser qp; if (i == 4 || i == 6) { qp = new QueryParser(Version.LUCENE_48, fields[i - 1], new StandardAnalyzer(Version.LUCENE_48)); } else { qp = new QueryParser(Version.LUCENE_48, fields[i], new StandardAnalyzer(Version.LUCENE_48)); } Query q = qp.parse(queries[i]); q.setBoost(boosts.get(fields[i])); if (!(q instanceof BooleanQuery) || ((BooleanQuery) q).getClauses().length > 0) { bQueryFields.add(q, BooleanClause.Occur.SHOULD); } } } if (filter) { Query q = new QueryParser(Version.LUCENE_48, fields[0], new StandardAnalyzer(Version.LUCENE_48)) .parse(queries[0]); q.setBoost(boosts.get(fields[0])); bQuery.add(q, BooleanClause.Occur.MUST); } bQuery.add(bQueryFields, BooleanClause.Occur.MUST); return bQuery; }
From source file:nicta.com.au.patent.queryexpansion.PatentMMRQueryExpansion.java
License:Apache License
/** * Performs Rocchio's query expansion with pseudo feedback for each fields * separatlly qm = alpha * query + ( beta / relevanDocsCount ) * Sum ( rel * docs vector )/*w ww. j av a 2 s . c o m*/ * * @param query * * @return expandedQuery * * @throws IOException * @throws ParseException */ @Override public Query expandQuery(PatentQuery query) throws ParseException, IOException { IndexReader ir = searcher.getIndexReader(); BooleanQuery bQuery = new BooleanQuery(); BooleanQuery bQueryFieldsExpanded = new BooleanQuery(); BooleanQuery.setMaxClauseCount(Integer.MAX_VALUE); //***************************************************************** //**************** Compute the PRF for field (i)******************* //***************************************************************** TotalHitCountCollector collector = new TotalHitCountCollector(); searcher.search(query.parse(), collector); TopDocs hits = searcher.search(query.parse(), Math.max(1, collector.getTotalHits())); // Compute PRF set Map<String, Float> parameters = new HashMap<>(); parameters.put(RocchioQueryExpansion.ROCCHIO_ALPHA_FLD, (float) 1.0); parameters.put(RocchioQueryExpansion.ROCCHIO_BETA_FLD, (float) 1.0); parameters.put(RocchioQueryExpansion.ROCCHIO_GAMMA_FLD, (float) 1.0); parameters.put(RocchioQueryExpansion.DECAY_FLD, (float) 0.0); Map<String, Float> rocchioVector = null; if (model.equals("tfidf")) { RocchioQueryExpansion rocchioQueryExpansion = new RocchioQueryExpansion(hits, ir, parameters, source, Nbr_Docs, Nbr_Terms); rocchioVector = rocchioQueryExpansion.getRocchioVector(PatentQuery.getFields()[source]); } // System.err.println(hits.totalHits + " total matching documents for field " + query.getFields()[i] + "."); Query expandedQuery = null; MMRQueryExpansion qe = new MMRQueryExpansion(hits, ir, rocchioVector, MMRQE_ALPHA, MMRQE_LAMBDA, PatentQuery.getFields()[source], Nbr_Docs, Nbr_Terms); for (int i = 1; i < PatentQuery.getFields().length; i++) { if (query.getQueries()[i] != null && !query.getQueries()[i].equals("") && (i != 4 || i != 6) && query.getBoosts().get(PatentQuery.getFields()[i]) != 0) { QueryParser qp = new QueryParser(Version.LUCENE_48, PatentQuery.getFields()[i], new StandardAnalyzer(Version.LUCENE_48)); // BooleanQuery bQueryFields = new BooleanQuery();// Contain a field to make the PRF field by field Query q = qp.parse(query.getQueries()[i]); // if (query.isFilter()) { // Query filter = new QueryParser(Version.LUCENE_48, PatentQuery.getFields()[0], // new StandardAnalyzer(Version.LUCENE_48)).parse(query.getQueries()[0]); // bQueryFields.add(filter, BooleanClause.Occur.MUST); // } // if (!(q instanceof BooleanQuery) || ((BooleanQuery) q).getClauses().length > 0) { // bQueryFields.add(q, BooleanClause.Occur.MUST); // } if (expandedQuery == null) { expandedQuery = qe.expandQuery(q, PatentQuery.getFields()[i]); } else { BooleanQuery bq = ((BooleanQuery) expandedQuery).clone(); BooleanQuery bq2 = new BooleanQuery(); for (BooleanClause bc : bq.clauses()) { TermQuery tq = (TermQuery) bc.getQuery(); Term term = new Term(PatentQuery.getFields()[i], tq.getTerm().text()); TermQuery tq2 = new TermQuery(term); tq2.setBoost(tq.getBoost()); bq2.add(tq2, BooleanClause.Occur.SHOULD); } expandedQuery = bq2; } bQueryFieldsExpanded.add(expandedQuery, BooleanClause.Occur.SHOULD);// Compute the new expanded query based on PRF set // System.err.println("Expanded Query: " + expandedQuery); // hits = searcher.search(expandedQuery, 100); // System.err.println(hits.totalHits + " total matching documents"+ query.getFields()[i] + "."); } } if (query.isFilter()) { Query q = new QueryParser(Version.LUCENE_48, PatentQuery.getFields()[0], new StandardAnalyzer(Version.LUCENE_48)).parse(query.getQueries()[0]); q.setBoost(query.getBoosts().get(PatentQuery.getFields()[0])); bQuery.add(q, BooleanClause.Occur.MUST); } bQuery.add(bQueryFieldsExpanded, BooleanClause.Occur.MUST); // TopDocs hits = searcher.search(bQuery, 100); // System.err.println(hits.totalHits + " total matching documents."); return bQuery; }