List of usage examples for org.apache.lucene.search BooleanQuery setMaxClauseCount
public static void setMaxClauseCount(int maxClauseCount)
From source file:com.sindicetech.siren.search.node.TestNodeNumericRangeQuery32.java
License:Open Source License
private void testRandomTrieAndClassicRangeQuery(final int precisionStep) throws Exception { final String field = "field" + precisionStep; int totalTermCountT = 0, totalTermCountC = 0, termCountT, termCountC; final int num = TestUtil.nextInt(random(), 10, 20); BooleanQuery.setMaxClauseCount(Integer.MAX_VALUE); for (int i = 0; i < num; i++) { int lower = (int) (random().nextDouble() * noDocs * distance) + startOffset; int upper = (int) (random().nextDouble() * noDocs * distance) + startOffset; if (lower > upper) { final int a = lower; lower = upper;/*from w ww. j a v a2 s .c o m*/ upper = a; } /* * In SIREn, the numeric type and the precision step are prepended to the * indexed numeric terms. */ final BytesRef lowerBytes = new BytesRef(NumericType.INT.toString() + precisionStep); final BytesRef upperBytes = new BytesRef(NumericType.INT.toString() + precisionStep); final BytesRef lBytes = new BytesRef(NumericUtils.BUF_SIZE_INT); final BytesRef uBytes = new BytesRef(NumericUtils.BUF_SIZE_INT); NumericUtils.intToPrefixCoded(lower, 0, lBytes); NumericUtils.intToPrefixCoded(upper, 0, uBytes); lowerBytes.append(lBytes); upperBytes.append(uBytes); // test inclusive range MultiNodeTermQuery tq = (MultiNodeTermQuery) nmqInt(field, precisionStep, lower, upper, true, true) .getQuery(); MultiNodeTermQuery cq = new NodeTermRangeQuery(field, lowerBytes, upperBytes, true, true); TopDocs tTopDocs = index.searcher.search(dq(tq), 1); TopDocs cTopDocs = index.searcher.search(dq(cq), 1); assertEquals("Returned count for NumericRangeQuery and TermRangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits); totalTermCountT += termCountT = this.countTerms(tq); totalTermCountC += termCountC = this.countTerms(cq); this.checkTermCounts(precisionStep, termCountT, termCountC); // test exclusive range tq = (MultiNodeTermQuery) nmqInt(field, precisionStep, lower, upper, false, false).getQuery(); cq = new NodeTermRangeQuery(field, lowerBytes, upperBytes, false, false); tTopDocs = index.searcher.search(dq(tq), 1); cTopDocs = index.searcher.search(dq(cq), 1); assertEquals("Returned count for NumericRangeQuery and TermRangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits); totalTermCountT += termCountT = this.countTerms(tq); totalTermCountC += termCountC = this.countTerms(cq); this.checkTermCounts(precisionStep, termCountT, termCountC); // test left exclusive range tq = (MultiNodeTermQuery) nmqInt(field, precisionStep, lower, upper, false, true).getQuery(); cq = new NodeTermRangeQuery(field, lowerBytes, upperBytes, false, true); tTopDocs = index.searcher.search(dq(tq), 1); cTopDocs = index.searcher.search(dq(cq), 1); assertEquals("Returned count for NumericRangeQuery and TermRangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits); totalTermCountT += termCountT = this.countTerms(tq); totalTermCountC += termCountC = this.countTerms(cq); this.checkTermCounts(precisionStep, termCountT, termCountC); // test right exclusive range tq = (MultiNodeTermQuery) nmqInt(field, precisionStep, lower, upper, true, false).getQuery(); cq = new NodeTermRangeQuery(field, lowerBytes, upperBytes, true, false); tTopDocs = index.searcher.search(dq(tq), 1); cTopDocs = index.searcher.search(dq(cq), 1); assertEquals("Returned count for NumericRangeQuery and TermRangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits); totalTermCountT += termCountT = this.countTerms(tq); totalTermCountC += termCountC = this.countTerms(cq); this.checkTermCounts(precisionStep, termCountT, termCountC); } this.checkTermCounts(precisionStep, totalTermCountT, totalTermCountC); if (VERBOSE && precisionStep != Integer.MAX_VALUE) { System.out.println("Average number of terms during random search on '" + field + "':"); System.out.println(" Numeric query: " + (((double) totalTermCountT) / (num * 4))); System.out.println(" Classical query: " + (((double) totalTermCountC) / (num * 4))); } }
From source file:com.sindicetech.siren.search.node.TestNodeNumericRangeQuery64.java
License:Open Source License
private void testRandomTrieAndClassicRangeQuery(final int precisionStep) throws Exception { final String field = "field" + precisionStep; int totalTermCountT = 0, totalTermCountC = 0, termCountT, termCountC; final int num = TestUtil.nextInt(random(), 10, 20); BooleanQuery.setMaxClauseCount(Integer.MAX_VALUE); for (int i = 0; i < num; i++) { long lower = (long) (random().nextDouble() * noDocs * distance) + startOffset; long upper = (long) (random().nextDouble() * noDocs * distance) + startOffset; if (lower > upper) { final long a = lower; lower = upper;//from ww w . j av a2 s . c om upper = a; } /* * In SIREn, the numeric type and the precision step are prepended to the * indexed numeric terms. */ final BytesRef lowerBytes = new BytesRef(NumericType.LONG.toString() + precisionStep); final BytesRef upperBytes = new BytesRef(NumericType.LONG.toString() + precisionStep); final BytesRef lBytes = new BytesRef(NumericUtils.BUF_SIZE_LONG); final BytesRef uBytes = new BytesRef(NumericUtils.BUF_SIZE_LONG); NumericUtils.longToPrefixCoded(lower, 0, lBytes); NumericUtils.longToPrefixCoded(upper, 0, uBytes); lowerBytes.append(lBytes); upperBytes.append(uBytes); // test inclusive range MultiNodeTermQuery tq = (MultiNodeTermQuery) nmqLong(field, precisionStep, lower, upper, true, true) .getQuery(); MultiNodeTermQuery cq = new NodeTermRangeQuery(field, lowerBytes, upperBytes, true, true); TopDocs tTopDocs = index.searcher.search(dq(tq), 1); TopDocs cTopDocs = index.searcher.search(dq(cq), 1); assertEquals("Returned count for NumericRangeQuery and TermRangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits); totalTermCountT += termCountT = this.countTerms(tq); totalTermCountC += termCountC = this.countTerms(cq); this.checkTermCounts(precisionStep, termCountT, termCountC); // test exclusive range tq = (MultiNodeTermQuery) nmqLong(field, precisionStep, lower, upper, false, false).getQuery(); cq = new NodeTermRangeQuery(field, lowerBytes, upperBytes, false, false); tTopDocs = index.searcher.search(dq(tq), 1); cTopDocs = index.searcher.search(dq(cq), 1); assertEquals("Returned count for NumericRangeQuery and TermRangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits); totalTermCountT += termCountT = this.countTerms(tq); totalTermCountC += termCountC = this.countTerms(cq); this.checkTermCounts(precisionStep, termCountT, termCountC); // test left exclusive range tq = (MultiNodeTermQuery) nmqLong(field, precisionStep, lower, upper, false, true).getQuery(); cq = new NodeTermRangeQuery(field, lowerBytes, upperBytes, false, true); tTopDocs = index.searcher.search(dq(tq), 1); cTopDocs = index.searcher.search(dq(cq), 1); assertEquals("Returned count for NumericRangeQuery and TermRangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits); totalTermCountT += termCountT = this.countTerms(tq); totalTermCountC += termCountC = this.countTerms(cq); this.checkTermCounts(precisionStep, termCountT, termCountC); // test right exclusive range tq = (MultiNodeTermQuery) nmqLong(field, precisionStep, lower, upper, true, false).getQuery(); cq = new NodeTermRangeQuery(field, lowerBytes, upperBytes, true, false); tTopDocs = index.searcher.search(dq(tq), 1); cTopDocs = index.searcher.search(dq(cq), 1); assertEquals("Returned count for NumericRangeQuery and TermRangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits); totalTermCountT += termCountT = this.countTerms(tq); totalTermCountC += termCountC = this.countTerms(cq); this.checkTermCounts(precisionStep, termCountT, termCountC); } this.checkTermCounts(precisionStep, totalTermCountT, totalTermCountC); if (VERBOSE && precisionStep != Integer.MAX_VALUE) { System.out.println("Average number of terms during random search on '" + field + "':"); System.out.println(" Numeric query: " + (((double) totalTermCountT) / (num * 4))); System.out.println(" Classical query: " + (((double) totalTermCountC) / (num * 4))); } }
From source file:com.soebes.supose.core.search.SearchRepository.java
License:Open Source License
public TopDocs getQueryResult(String queryLine) { IndexReader reader = null;//from w w w . j a va2 s . com TopDocs result = null; try { reader = IndexReader.open(getIndexDirectory()); setReader(reader); BooleanQuery.setMaxClauseCount(Integer.MAX_VALUE); Searcher searcher = new IndexSearcher(reader); setSearcher(searcher); SortField[] sf = { new SortField(FieldNames.REVISION.toString()), new SortField(FieldNames.FILENAME.toString()), // We use for // sorting // the // filename }; Sort sort = new Sort(sf); // Here we define the default field for searching. QueryParser parser = new CustomQueryParser(FieldNames.CONTENTS.toString(), getAnalyzer()); // We will allow using a wildcard at the beginning of the // expression. parser.setAllowLeadingWildcard(true); // The search term will not be expanded to lowercase. parser.setLowercaseExpandedTerms(true); Query query = parser.parse(queryLine); LOGGER.info("Query: " + query.toString()); // That's not the best idea...but currently i have not better // solution for this... // This is intended to get all results not only a limited number // results. TopDocs tmp = searcher.search(query, null, 20, sort); result = searcher.search(query, null, tmp.totalHits, sort); } catch (CorruptIndexException e) { LOGGER.error("Error: The index is corrupted: ", e); } catch (IOException e) { LOGGER.error("Error: IOException: ", e); } catch (Exception e) { LOGGER.error("Error: Something has gone wrong: ", e); } return result; }
From source file:com.yahoo.bard.webservice.data.config.ConfigurationLoader.java
License:Apache License
/** * Constructor.//ww w.ja va 2 s.co m * * @param dimensionLoader DimensionLoader to load dimensions from * @param metricLoader MetricLoader to load metrics from * @param tableLoader TableLoader to load tables from */ @Inject public ConfigurationLoader(DimensionLoader dimensionLoader, MetricLoader metricLoader, TableLoader tableLoader) { DateTimeZone.setDefault(DateTimeZone.forID(TIMEZONE)); // Set the max lucene query clauses as high as it can go BooleanQuery.setMaxClauseCount(Integer.MAX_VALUE); this.dimensionLoader = dimensionLoader; this.metricLoader = metricLoader; this.tableLoader = tableLoader; }
From source file:dcu.com.ie.patent.queryreduction.PatentMagdyQueryReduction.java
License:Apache License
/** * Performs Rocchio's query expansion with pseudo feedback for each fields * separatlly qm = alpha * query + ( beta / relevanDocsCount ) * Sum ( rel * docs vector )// www.j av a 2s . co m * * @param query * * @return expandedQuery * * @throws IOException * @throws ParseException */ @Override public Query expandQuery(PatentQuery query) throws ParseException, IOException { IndexReader ir = searcher.getIndexReader(); BooleanQuery bQuery = new BooleanQuery(); BooleanQuery bQueryFieldsExpanded = new BooleanQuery(); BooleanQuery.setMaxClauseCount(Integer.MAX_VALUE); //***************************************************************** //**************** Compute the PRF for field (i)******************* //***************************************************************** TotalHitCountCollector collector = new TotalHitCountCollector(); searcher.search(query.parse(), collector); TopDocs hits = searcher.search(query.parse(), Math.max(1, collector.getTotalHits())); // Compute PRF set // System.err.println(hits.totalHits + " total matching documents for field " + query.getFields()[i] + "."); Query expandedQuery = null; MagdyQueryReduction qe = new MagdyQueryReduction(hits, ir, PatentQuery.getFields()[source], Nbr_Docs, Nbr_Terms); for (int i = 1; i < PatentQuery.getFields().length; i++) { if (query.getQueries()[i] != null && !query.getQueries()[i].equals("") && (i != 4 || i != 6) && query.getBoosts().get(PatentQuery.getFields()[i]) != 0) { QueryParser qp = new QueryParser(Version.LUCENE_48, PatentQuery.getFields()[i], new StandardAnalyzer(Version.LUCENE_48)); // BooleanQuery bQueryFields = new BooleanQuery();// Contain a field to make the PRF field by field Query q = qp.parse(query.getQueries()[i]); // if (query.isFilter()) { // Query filter = new QueryParser(Version.LUCENE_48, PatentQuery.getFields()[0], // new StandardAnalyzer(Version.LUCENE_48)).parse(query.getQueries()[0]); // bQueryFields.add(filter, BooleanClause.Occur.MUST); // } // if (!(q instanceof BooleanQuery) || ((BooleanQuery) q).getClauses().length > 0) { // bQueryFields.add(q, BooleanClause.Occur.MUST); // } if (expandedQuery == null) { expandedQuery = qe.reduceQuery(q, PatentQuery.getFields()[i]); } else { BooleanQuery bq = ((BooleanQuery) expandedQuery).clone(); BooleanQuery bq2 = new BooleanQuery(); for (BooleanClause bc : bq.clauses()) { TermQuery tq = (TermQuery) bc.getQuery(); Term term = new Term(PatentQuery.getFields()[i], tq.getTerm().text()); TermQuery tq2 = new TermQuery(term); tq2.setBoost(tq.getBoost()); bq2.add(tq2, BooleanClause.Occur.SHOULD); } expandedQuery = bq2; } bQueryFieldsExpanded.add(expandedQuery, BooleanClause.Occur.SHOULD);// Compute the new expanded query based on PRF set // System.err.println("Expanded Query: " + expandedQuery); // hits = searcher.search(expandedQuery, 100); // System.err.println(hits.totalHits + " total matching documents"+ query.getFields()[i] + "."); } } if (query.isFilter()) { Query q = new QueryParser(Version.LUCENE_48, PatentQuery.getFields()[0], new StandardAnalyzer(Version.LUCENE_48)).parse(query.getQueries()[0]); q.setBoost(query.getBoosts().get(PatentQuery.getFields()[0])); bQuery.add(q, BooleanClause.Occur.MUST); } bQuery.add(bQueryFieldsExpanded, BooleanClause.Occur.MUST); // TopDocs hits = searcher.search(bQuery, 100); // System.err.println(hits.totalHits + " total matching documents."); return bQuery; }
From source file:dcu.com.ie.synset.PatentSynSetQueryExpansion.java
@Override public Query expandQuery(PatentQuery query) throws ParseException, IOException { BooleanQuery bQuery = new BooleanQuery(); BooleanQuery bQueryFieldsExpanded = new BooleanQuery(); BooleanQuery.setMaxClauseCount(Integer.MAX_VALUE); Query expandedQuery = null;//from w w w. ja v a 2 s . c o m for (int i = 1; i < PatentQuery.getFields().length; i++) { if (query.getQueries()[i] != null && !query.getQueries()[i].equals("") && (i != 4 || i != 6) && query.getBoosts().get(PatentQuery.getFields()[i]) != 0) { QueryParser qp = new QueryParser(Version.LUCENE_48, PatentQuery.getFields()[i], new StandardAnalyzer(Version.LUCENE_48)); Query q = qp.parse(query.getQueries()[i]); if (expandedQuery == null) { BooleanQuery bq; if (q instanceof BooleanQuery) { bq = ((BooleanQuery) q).clone(); } else { bq = new BooleanQuery(); bq.add(q, BooleanClause.Occur.SHOULD); } BooleanQuery bq2 = new BooleanQuery(); for (BooleanClause bc : bq.clauses()) { TermQuery tq = (TermQuery) bc.getQuery(); bq2.add(tq, BooleanClause.Occur.SHOULD); // System.err.println(tq.getTerm().text()); List<Map.Entry<String, Double>> l = synset.getSynSeyList(tq.getTerm().text(), Nbr_Terms); for (Map.Entry<String, Double> e : l) { // System.err.println("\t" + e.getKey() + " -> " + e.getValue()); Term term = new Term(PatentQuery.getFields()[i], e.getKey()); TermQuery tq2 = new TermQuery(term); float boost = tq.getBoost(); if (weigth) { boost *= e.getValue().floatValue(); } tq2.setBoost(boost); bq2.add(tq2, BooleanClause.Occur.SHOULD); } } expandedQuery = bq2; } else { BooleanQuery bq = ((BooleanQuery) expandedQuery).clone(); BooleanQuery bq2 = new BooleanQuery(); for (BooleanClause bc : bq.clauses()) { TermQuery tq = (TermQuery) bc.getQuery(); Term term = new Term(PatentQuery.getFields()[i], tq.getTerm().text()); TermQuery tq2 = new TermQuery(term); tq2.setBoost(tq.getBoost()); bq2.add(tq2, BooleanClause.Occur.SHOULD); } expandedQuery = bq2; } bQueryFieldsExpanded.add(expandedQuery, BooleanClause.Occur.SHOULD); } } if (query.isFilter()) { Query q = new QueryParser(Version.LUCENE_48, PatentQuery.getFields()[0], new StandardAnalyzer(Version.LUCENE_48)).parse(query.getQueries()[0]); q.setBoost(query.getBoosts().get(PatentQuery.getFields()[0])); bQuery.add(q, BooleanClause.Occur.MUST); } bQuery.add(bQueryFieldsExpanded, BooleanClause.Occur.MUST); // TopDocs hits = searcher.search(bQuery, 100); // System.err.println(hits.totalHits + " total matching documents."); return bQuery; }
From source file:de.ilias.services.lucene.search.RPCSearchHandler.java
License:Open Source License
/** * Multi field searcher/*from w w w . j av a 2s. c o m*/ * Searches in all defined fields. * @todo allow configuration of searchable fields. * * * @param clientKey * @param query */ public String search(String clientKey, String queryString, int pageNumber) { LuceneSettings luceneSettings; LocalSettings.setClientKey(clientKey); IndexSearcher searcher; FieldInfo fieldInfo; String rewrittenQuery; logger.info("Query is: " + queryString); try { long start = new java.util.Date().getTime(); fieldInfo = FieldInfo.getInstance(LocalSettings.getClientKey()); luceneSettings = LuceneSettings.getInstance(LocalSettings.getClientKey()); // Append doctype searcher = SearchHolder.getInstance().getSearcher(); // Rewrite query QueryRewriter rewriter = new QueryRewriter(QueryRewriter.MODE_SEARCH, queryString); rewrittenQuery = rewriter.rewrite(); Vector<Occur> occurs = new Vector<Occur>(); for (int i = 0; i < fieldInfo.getFieldSize(); i++) { occurs.add(BooleanClause.Occur.SHOULD); } MultiFieldQueryParser multiParser = new MultiFieldQueryParser(fieldInfo.getFieldsAsStringArray(), new StandardAnalyzer()); if (luceneSettings.getDefaultOperator() == LuceneSettings.OPERATOR_AND) { multiParser.setDefaultOperator(Operator.AND); } else { multiParser.setDefaultOperator(Operator.OR); } BooleanQuery.setMaxClauseCount(10000); BooleanQuery query = (BooleanQuery) multiParser.parse(rewrittenQuery); logger.info("Max clauses allowed: " + BooleanQuery.getMaxClauseCount()); //BooleanQuery query = (BooleanQuery) MultiFieldQueryParser.parse(rewrittenQuery, // fieldInfo.getFieldsAsStringArray(), // occurs.toArray(new Occur[0]), // new StandardAnalyzer()); for (Object f : fieldInfo.getFields()) { logger.info(((String) f).toString()); } TopDocCollector collector = new TopDocCollector(1000); long s_start = new java.util.Date().getTime(); searcher.search(query, collector); long s_end = new java.util.Date().getTime(); ScoreDoc[] hits = collector.topDocs().scoreDocs; SearchResultWriter writer = new SearchResultWriter(hits); writer.setOffset(SearchHolder.SEARCH_LIMIT * (pageNumber - 1)); writer.write(); long end = new java.util.Date().getTime(); logger.info("Total time: " + (end - start)); logger.info("Query time: " + (s_end - s_start)); return writer.toXML(); } catch (ConfigurationException e) { logger.error(e); } catch (IOException e) { logger.warn(e); } catch (ParseException e) { logger.info(e); } catch (Exception e) { StringWriter writer = new StringWriter(); e.printStackTrace(new PrintWriter(writer)); logger.error(writer.toString()); } return ""; }
From source file:de.innovationgate.wgpublisher.lucene.LuceneManager.java
License:Open Source License
public WGResultSet search(WGDatabase db, List<String> fields, String phrase, Map parameters, WGA wga) throws WGQueryException { if (wga == null) { wga = WGA.get(_core);/*from w w w .j a va2 s. c o m*/ } // set max clause count for boolean queries BooleanQuery.setMaxClauseCount(_booleanQueryMaxClauseCount); if (this.isRebuildingIndex()) { throw new WGQueryException(phrase, "Lucene search temporary disabled. Rebuilding lucene index ..."); } // Registering problem in that case but not cancelling the query, as this is old, expected behaviour. The query will just return no results. if (!_core.getLuceneManager().indexIsEnabled(db.getDbReference())) { _core.getProblemRegistry().addProblem( Problem.create(new TMLContext.WebTMLOccasion(), new DatabaseScope(db.getDbReference()), "webtmlProblem.luceneIndexExpected", ProblemSeverity.LOW)); } if (phrase == null || phrase.trim().equals("")) { return null; } try { BooleanQuery wholeQuery = new BooleanQuery(); int max = WGACore.DEFAULT_QUERY_MAXRESULTS; Integer maxResults = (Integer) parameters.get(WGDatabase.QUERYOPTION_MAXRESULTS); if (maxResults != null) { if (maxResults == 0 || maxResults == -1) { max = Integer.MAX_VALUE; } else { max = maxResults; } } // handle dboption EXCLUDEDOCUMENT WGContent excludeContent = (WGContent) parameters.get(WGDatabase.QUERYOPTION_EXCLUDEDOCUMENT); if (excludeContent != null) { String uniqueKey = buildUniqueIndexKey(excludeContent.getDatabase().getDbReference(), excludeContent.getDocumentKey()); wholeQuery.add(new TermQuery(new Term(INDEXFIELD_UNIQUEKEY, uniqueKey)), BooleanClause.Occur.MUST_NOT); wholeQuery.add(new TermQuery(new Term(INDEXFIELD_PARENTKEY, uniqueKey)), BooleanClause.Occur.MUST_NOT); } // list of dbs to search in String searchScope = (String) parameters.get(LuceneManager.QUERYOPTION_SEARCHSCOPE); List searchDBKeys = new ArrayList(); if (searchScope.equals(LuceneManager.SEARCHSCOPE_DB)) { searchDBKeys.add(db.getDbReference()); } if (searchScope.equals(LuceneManager.SEARCHSCOPE_DOMAIN)) { Iterator<WGDatabase> dbs = _core .getDatabasesForDomain((String) db.getAttribute(WGACore.DBATTRIB_DOMAIN)).iterator(); while (dbs.hasNext()) { WGDatabase currentDB = dbs.next(); if (wga.openDatabase(currentDB)) { searchDBKeys.add(currentDB.getDbReference()); } } } if (searchScope.equals(LuceneManager.SEARCHSCOPE_WGA)) { Iterator dbs = _core.getContentdbs().values().iterator(); while (dbs.hasNext()) { WGDatabase currentDB = (WGDatabase) dbs.next(); if (wga.openDatabase(currentDB)) { searchDBKeys.add(currentDB.getDbReference()); } } } if (searchScope.equals(LuceneManager.SEARCHSCOPE_DB_LIST)) { String dbListCSV = (String) parameters.get(QUERYOPTION_SEARCHDBKEYS); if (dbListCSV == null || dbListCSV.trim().equals("")) { throw new WGQueryException(phrase, "Search scope is 'dblist' but no db keys given."); } else { Iterator dbkeys = WGUtils.deserializeCollection(dbListCSV, ",").iterator(); while (dbkeys.hasNext()) { String dbkey = (String) dbkeys.next(); WGDatabase currentDB = wga.db(dbkey); if (currentDB.isSessionOpen()) { searchDBKeys.add(dbkey.trim().toLowerCase()); } } } } // Handle language selection; List<WGLanguage> languagesPriorityList = null; boolean filterLanguages = false; if (parameters.containsKey(WGDatabase.QUERYOPTION_LANGUAGES)) { List<WGLanguage> langs = (List<WGLanguage>) parameters.get(WGDatabase.QUERYOPTION_LANGUAGES); if (langs.size() > 1) { BooleanQuery langQuery = new BooleanQuery(); for (WGLanguage lang : langs) { langQuery.add(new TermQuery(new Term(WGContent.META_LANGUAGE, lang.getName())), BooleanClause.Occur.SHOULD); } wholeQuery.add(langQuery, BooleanClause.Occur.MUST); languagesPriorityList = langs; filterLanguages = true; } else if (langs.size() == 1) { wholeQuery.add(new TermQuery(new Term(WGContent.META_LANGUAGE, langs.get(0).getName())), BooleanClause.Occur.MUST); languagesPriorityList = Collections.singletonList(langs.get(0)); } } else if (parameters.containsKey(WGDatabase.QUERYOPTION_ONLYLANGUAGE)) { String language = (String) parameters.get(WGDatabase.QUERYOPTION_ONLYLANGUAGE); wholeQuery.add(new TermQuery(new Term(WGContent.META_LANGUAGE, language)), BooleanClause.Occur.MUST); languagesPriorityList = Collections.singletonList(db.getLanguage(language)); } if (languagesPriorityList == null) { languagesPriorityList = getLanguagesForSearchDBKeys(searchDBKeys); ; } // Handle visibility selection if (!parameters.containsKey(WGDatabase.QUERYOPTION_ENHANCE) || parameters.get(WGDatabase.QUERYOPTION_ENHANCE).equals(new Boolean(true))) { wholeQuery.add(new TermQuery(new Term(WGContent.META_VISIBLE, "true")), BooleanClause.Occur.MUST); String role = (String) parameters.get(WGDatabase.QUERYOPTION_ROLE); if (role != null) { if (!role.equalsIgnoreCase(WGContent.DISPLAYTYPE_NONE)) { wholeQuery.add(new TermQuery(new Term("HIDDENIN" + role.toUpperCase(), "false")), BooleanClause.Occur.MUST); } } } if (parameters.containsKey(WGDatabase.QUERYOPTION_ONLYRELEASED)) { wholeQuery.add(new TermQuery(new Term(WGContent.META_STATUS, WGContent.STATUS_RELEASE)), BooleanClause.Occur.MUST); } // build dbQuery (OR combination of all searchDbs indexed by lucene) BooleanQuery dbQuery = new BooleanQuery(); Iterator itSearchDBKeys = searchDBKeys.iterator(); while (itSearchDBKeys.hasNext()) { String currentDBKey = (String) itSearchDBKeys.next(); if (_indexedDbs.containsKey(currentDBKey)) { dbQuery.add(new TermQuery(new Term(INDEXFIELD_DBKEY, currentDBKey)), BooleanClause.Occur.SHOULD); } } wholeQuery.add(dbQuery, BooleanClause.Occur.MUST); // Add parsed search phrase. // Search in allcontent for each language using the configured analyzer // if no analyzer is configured for a language search at least with one // default analyzer boolean searchWithDefaultAnalyzer = false; //if no languages found search at least with DefaultAnalyzer if (languagesPriorityList.size() <= 0) { searchWithDefaultAnalyzer = true; } // parse native options Sort sort = null; String sortFieldName = ""; Operator defaultOperator = QueryParser.AND_OPERATOR; String nativeOptionsStr = (String) parameters.get(WGDatabase.QUERYOPTION_NATIVEOPTIONS); boolean includeVirtualContent = false; String doctype = DOCTYPE_CONTENT; if (nativeOptionsStr != null) { Iterator nativeOptions = WGUtils.deserializeCollection(nativeOptionsStr, ",", true).iterator(); while (nativeOptions.hasNext()) { String option = (String) nativeOptions.next(); if (option.startsWith("sort:")) { sortFieldName = option.substring(5).trim(); boolean reverse = false; if (sortFieldName.toLowerCase().endsWith("(asc)")) { sortFieldName = sortFieldName.substring(0, sortFieldName.length() - 5).trim(); } else if (sortFieldName.toLowerCase().endsWith("(desc)")) { sortFieldName = sortFieldName.substring(0, sortFieldName.length() - 6).trim(); reverse = true; } if (sortFieldName.length() > 0) { char first = sortFieldName.charAt(0); if (first >= 'A' && first <= 'Z') { // meta sort sortFieldName = sortFieldName.toUpperCase(); } else { // item sort sortFieldName = sortFieldName.toLowerCase(); } } // sort order currently only german sort = new Sort(new SortField(SORTITEM_PREFIX + sortFieldName, Locale.GERMANY, reverse)); } else if (option.equalsIgnoreCase(NATIVE_QUERYOPTION_INCLUDEVIRTUALCONTENT)) { includeVirtualContent = true; } else if (option.startsWith("doctype:")) { doctype = option.substring("doctype:".length()).trim(); } else if (option.startsWith("operator:")) { String op = option.substring("operator:".length()).trim(); if (op.equalsIgnoreCase("or")) defaultOperator = QueryParser.OR_OPERATOR; } } } if (!includeVirtualContent) { wholeQuery.add(new TermQuery(new Term(INDEXFIELD_ISVIRTUALCONTENT, String.valueOf(true))), BooleanClause.Occur.MUST_NOT); } // handle doctype option // we cannot be sure that all documents in index already contains the field DOCTYPE (introduced with OpenWGA 7.1) therefore we have to perform some excludes if (doctype.equals(DOCTYPE_CONTENT)) { wholeQuery.add(new TermQuery(new Term(INDEXFIELD_DOCTYPE, DOCTYPE_ATTACHMENT)), BooleanClause.Occur.MUST_NOT); } else if (!doctype.equals(DOCTYPE_ALL)) { wholeQuery.add(new TermQuery(new Term(INDEXFIELD_DOCTYPE, doctype)), BooleanClause.Occur.MUST); } //build phrase query BooleanQuery phraseQuery = new BooleanQuery(); phraseQuery.setBoost(10); Iterator languageList = languagesPriorityList.iterator(); List<String> searchFields = new ArrayList<String>(); Map<String, Float> searchBoosts = new HashMap<String, Float>(); for (String field : fields) { String[] parts = field.split("\\^"); searchFields.add(parts[0]); if (parts.length == 2) { searchBoosts.put(parts[0], Float.parseFloat(parts[1])); } } if (!searchFields.contains("allcontent")) searchFields.add("allcontent"); if (!searchFields.contains("TITLE")) searchFields.add("TITLE"); if (!searchFields.contains("DESCRIPTION")) searchFields.add("DESCRIPTION"); if (!searchFields.contains("KEYWORDS")) searchFields.add("KEYWORDS"); while (languageList.hasNext()) { WGLanguage languageItem = (WGLanguage) languageList.next(); Analyzer analyzer = _core.getAnalyzerForLanguageCode(languageItem.getName().substring(0, 2)); if (analyzer != null) { QueryParser parser = new IndexingRuleBasedQueryParser(searchFields.toArray(new String[0]), analyzer, searchBoosts, _indexedDbs, searchDBKeys, _metaKeywordFields); parser.setDefaultOperator(defaultOperator); Query query = parser.parse(phrase); if (filterLanguages) { BooleanQuery testPhraseAndLangQuery = new BooleanQuery(); testPhraseAndLangQuery.add(query, BooleanClause.Occur.MUST); testPhraseAndLangQuery.add( new TermQuery(new Term(WGContent.META_LANGUAGE, languageItem.getName())), BooleanClause.Occur.MUST); phraseQuery.add(testPhraseAndLangQuery, BooleanClause.Occur.SHOULD); } else { phraseQuery.add(query, BooleanClause.Occur.SHOULD); } } else { searchWithDefaultAnalyzer = true; } } if (searchWithDefaultAnalyzer) { QueryParser parser = new IndexingRuleBasedQueryParser(searchFields.toArray(new String[0]), _core.getDefaultAnalyzer(), searchBoosts, _indexedDbs, searchDBKeys, _metaKeywordFields); parser.setDefaultOperator(defaultOperator); Query query = parser.parse(phrase); phraseQuery.add(query, BooleanClause.Occur.SHOULD); } //LOG.info(phraseQuery.toString()); wholeQuery.add(phraseQuery, BooleanClause.Occur.MUST); TopDocs hits; //register executed query as output parameter parameters.put(WGDatabase.QUERYOPTION_RETURNQUERY, wholeQuery.toString()); // simplify query and register as taginfo parameters.put(TAGINFO_SIMPLIFIEDQUERY, rewrite(wholeQuery)); long timeBefore = System.currentTimeMillis(); if (sort != null) { try { hits = search(wholeQuery, max, sort); } catch (NullPointerException e) { // lucene bug when sorting for non existing fields with Locale throw new WGQueryException(wholeQuery.toString(), "Sortfield '" + sortFieldName + "' not indexed."); } } else { try { hits = search(wholeQuery, max, null); } catch (BooleanQuery.TooManyClauses e) { parameters.put(TAGINFO_UNSPECIFICQUERY, new Boolean(true)); throw new WGQueryException(phrase, "Too many BooleanClauses in query. " + "Please use a more specific query or increase value of " + "'booleanQueryMaxClauseCount' via WGAManager. Current value is '" + this.getBooleanQueryMaxClauseCount() + "'."); } } long timeAfter = System.currentTimeMillis(); long executionTime = timeAfter - timeBefore; LuceneResultSet resultSet; if (filterLanguages) { resultSet = new LuceneLanguageChoosingResultSet(hits, wga, parameters, wholeQuery, executionTime, languagesPriorityList); } else { resultSet = new LuceneMultiDBResultSet(hits, wga, parameters, wholeQuery, executionTime); } // put resultset in per thread list List rsList = (List) _resultsetList.get(); if (rsList == null) { rsList = new LinkedList(); _resultsetList.set(rsList); } rsList.add(resultSet); return resultSet; } catch (org.apache.lucene.queryParser.ParseException e) { throw new WGQueryException("Unable to parse lucene query", e.getMessage(), e); } catch (Exception e) { LOG.error("Error executing lucene search: " + e.getClass().getName() + " - " + e.getMessage(), e); throw new WGQueryException(phrase, e.getClass().getName() + ": " + e.getMessage(), e); } }
From source file:de.u808.simpleinquest.service.search.SearchManager.java
License:Apache License
public SearchResult search(String searchString) throws ParseException, IOException { Hits hits = null;/*w w w . j a v a2 s. co m*/ SearchResult searchResult = new SearchResult(); searchResult.setSearchString(searchString); searchResult.setSearchPerformed(true); if (StringUtils.isNotEmpty(searchString)) { if (searchCach.containsKey(searchString)) { return searchCach.get(searchString); } else { Element element = globalSearchCache.getCache().get(searchString); if (element != null) { hits = (Hits) element.getObjectValue(); searchResult.setHits(hits); searchCach.put(searchString, searchResult); } else { // Query query = new QueryParser(Indexer.CONTENT_FIELD_NAME, new // StandardAnalyzer()).parse(searchString); // search.setHits(indexSearchBean.getIndexSearcher().search(query)); //TEST if (indexSearchBean.getIndexSearcher() != null) { BooleanQuery.setMaxClauseCount(Integer.MAX_VALUE); String[] fields = { Indexer.AUTOR_FIELD_NAME, Indexer.CONTENT_FIELD_NAME, Indexer.TITLE_FIELD_NAME }; Analyzer analyzer = new StandardAnalyzer(); QueryParser qp = new MultiFieldQueryParser(fields, analyzer); qp.setDefaultOperator(QueryParser.Operator.AND); Query query = qp.parse(searchString); hits = indexSearchBean.getIndexSearcher().search(query); searchResult.setHits(hits); searchCach.put(searchString, searchResult); globalSearchCache.getCache().put(new Element(searchString, hits)); } else { //TODO check lang log.warn("Index dos not exist! Returning null!"); //TODO display Info } } } } return searchResult; }
From source file:edu.cmu.geolocator.resource.gazindexing.CollaborativeIndex.CollaborativeIndex.java
License:Apache License
public CollaborativeIndex open() { try {/*from w ww .j a v a 2 s . c o m*/ stringSearcher = GetReader.getIndexSearcher(stringIndexName, stringLoad); // for setting the max clause count for search query. BooleanQuery.setMaxClauseCount(2500); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } try { infoSearcher = GetReader.getIndexSearcher(infoIndexName, infoLoad); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } return this; }