List of usage examples for org.apache.lucene.search BooleanQuery setMaxClauseCount
public static void setMaxClauseCount(int maxClauseCount)
From source file:org.geotoolkit.lucene.index.LuceneIndexSearcher.java
License:Open Source License
/** * This method proceed a lucene search and returns a list of ID. * * @param spatialQueryI The lucene query string with spatials filters. * * @return A List of metadata identifiers. * @throws SearchingException//from w w w . j av a 2 s . co m */ public Set<String> doSearch(final SpatialQuery spatialQueryI) throws SearchingException { org.geotoolkit.lucene.filter.SpatialQuery spatialQuery = (org.geotoolkit.lucene.filter.SpatialQuery) spatialQueryI; try { final long start = System.currentTimeMillis(); final Set<String> results = new LinkedHashSet<>(); spatialQuery.applyRtreeOnFilter(rTree, envelopeOnly); //we look for a cached Query if (isCacheEnabled && cachedQueries.containsKey(spatialQuery)) { final Set<String> cachedResults = cachedQueries.get(spatialQuery); LOGGER.log(logLevel, "returning result from cache ({0} matching documents)", results.size()); return cachedResults; } int maxRecords = (int) searcher.collectionStatistics("id").maxDoc(); if (maxRecords == 0) { LOGGER.warning("The index seems to be empty."); maxRecords = 1; } final String field = "title"; String stringQuery = spatialQuery.getQuery(); final QueryParser parser = new ExtendedQueryParser(field, analyzer, numericFields); parser.setDefaultOperator(Operator.AND); // remove term:* query stringQuery = removeOnlyWildchar(stringQuery); // escape '/' character stringQuery = stringQuery.replace("/", "\\/"); // we enable the leading wildcard mode if the first character of the query is a '*' if (stringQuery.indexOf(":*") != -1 || stringQuery.indexOf(":?") != -1 || stringQuery.indexOf(":(*") != -1 || stringQuery.indexOf(":(+*") != -1 || stringQuery.indexOf(":+*") != -1) { parser.setAllowLeadingWildcard(true); LOGGER.log(Level.FINER, "Allowing leading wildChar"); BooleanQuery.setMaxClauseCount(Integer.MAX_VALUE); } //we set off the mecanism setting all the character to lower case // we do that for range queries only for now. TODO see if we need to set it every time if (stringQuery.contains(" TO ")) { parser.setLowercaseExpandedTerms(false); } final Query query; if (!stringQuery.isEmpty()) { query = parser.parse(stringQuery); } else { query = SIMPLE_QUERY; } LOGGER.log(Level.FINER, "QueryType:{0}", query.getClass().getName()); final Filter filter = spatialQuery.getSpatialFilter(); final LogicalFilterType operator = spatialQuery.getLogicalOperator(); final Sort sort = spatialQuery.getSort(); String sorted = ""; if (sort != null) { sorted = "\norder by: " + sort.toString(); } String f = ""; if (filter != null) { f = '\n' + filter.toString(); } String operatorValue = ""; if (!(operator == LogicalFilterType.AND || (operator == LogicalFilterType.OR && filter == null))) { operatorValue = '\n' + SerialChainFilter.valueOf(operator); } LOGGER.log(logLevel, "Searching for: " + query.toString(field) + operatorValue + f + sorted + "\nmax records: " + maxRecords); // simple query with an AND if (operator == LogicalFilterType.AND || (operator == LogicalFilterType.OR && filter == null)) { final TopDocs docs; if (sort != null) { docs = searcher.search(query, filter, maxRecords, sort); } else { docs = searcher.search(query, filter, maxRecords); } for (ScoreDoc doc : docs.scoreDocs) { addToResult(results, doc.doc); } // for a OR we need to perform many request } else if (operator == LogicalFilterType.OR) { final TopDocs hits1; final TopDocs hits2; if (sort != null) { hits1 = searcher.search(query, null, maxRecords, sort); hits2 = searcher.search(SIMPLE_QUERY, spatialQuery.getSpatialFilter(), maxRecords, sort); } else { hits1 = searcher.search(query, maxRecords); hits2 = searcher.search(SIMPLE_QUERY, spatialQuery.getSpatialFilter(), maxRecords); } for (ScoreDoc doc : hits1.scoreDocs) { addToResult(results, doc.doc); } for (ScoreDoc doc : hits2.scoreDocs) { addToResult(results, doc.doc); } // for a NOT we need to perform many request } else if (operator == LogicalFilterType.NOT) { final TopDocs hits1; if (sort != null) { hits1 = searcher.search(query, filter, maxRecords, sort); } else { hits1 = searcher.search(query, filter, maxRecords); } final Set<String> unWanteds = new LinkedHashSet<>(); for (ScoreDoc doc : hits1.scoreDocs) { addToResult(unWanteds, doc.doc); } final TopDocs hits2; if (sort != null) { hits2 = searcher.search(SIMPLE_QUERY, null, maxRecords, sort); } else { hits2 = searcher.search(SIMPLE_QUERY, maxRecords); } for (ScoreDoc doc : hits2.scoreDocs) { final String id = identifiers.get(doc.doc); if (id != null && !unWanteds.contains(id)) { results.add(id); } } } else { throw new IllegalArgumentException("unsupported logical Operator"); } // if we have some subQueries we execute it separely and merge the result if (spatialQuery.getSubQueries().size() > 0) { if (operator == LogicalFilterType.OR && query.equals(SIMPLE_QUERY)) { results.clear(); } for (SpatialQuery sub : spatialQuery.getSubQueries()) { final Set<String> subResults = doSearch(sub); if (operator == LogicalFilterType.AND) { final Set<String> toRemove = new HashSet<>(); for (String r : results) { if (!subResults.contains(r)) { toRemove.add(r); } } results.removeAll(toRemove); } else if (operator == LogicalFilterType.OR) { results.addAll(subResults); } else { LOGGER.warning("unimplemented case in doSearch"); } } } //we put the query in cache putInCache(spatialQuery, results); LOGGER.log(logLevel, results.size() + " total matching documents (" + (System.currentTimeMillis() - start) + "ms)"); return results; } catch (ParseException ex) { throw new SearchingException("Parse Exception while performing lucene request", ex); } catch (IOException ex) { throw new SearchingException("IO Exception while performing lucene request", ex); } }
From source file:org.getopt.luke.Luke.java
License:Apache License
/** * Create a Query instance that corresponds to values selected in the UI, * such as analyzer class name and arguments, and default field. * @return// w w w . j a v a 2s . c o m */ public Query createQuery(String queryString) throws Exception { Object srchOpts = find("srchOptTabs"); Analyzer analyzer = createAnalyzer(srchOpts); if (analyzer == null) { return null; } String defField = getDefaultField(srchOpts); QueryParser qp = new QueryParser(LV, defField, analyzer); Object ckXmlParser = find(srchOpts, "ckXmlParser"); Object ckWild = find(srchOpts, "ckWild"); Object ckPosIncr = find(srchOpts, "ckPosIncr"); Object ckLoExp = find(srchOpts, "ckLoExp"); Object cbDateRes = find(srchOpts, "cbDateRes"); DateTools.Resolution resolution = Util.getResolution(getString(cbDateRes, "text")); Object cbOp = find(srchOpts, "cbOp"); Object bqMaxCount = find(srchOpts, "bqMaxCount"); int maxCount = 1024; try { maxCount = Integer.parseInt(getString(bqMaxCount, "text")); } catch (Exception e) { e.printStackTrace(); showStatus("Invalid BooleanQuery max clause count, using default 1024"); } QueryParser.Operator op; BooleanQuery.setMaxClauseCount(maxCount); String opString = getString(cbOp, "text"); if (opString.equalsIgnoreCase("OR")) { op = QueryParser.OR_OPERATOR; } else { op = QueryParser.AND_OPERATOR; } qp.setAllowLeadingWildcard(getBoolean(ckWild, "selected")); qp.setEnablePositionIncrements(getBoolean(ckPosIncr, "selected")); qp.setLowercaseExpandedTerms(getBoolean(ckLoExp, "selected")); qp.setDateResolution(resolution); qp.setDefaultOperator(op); if (getBoolean(ckXmlParser, "selected")) { CoreParser cp = createParser(defField, analyzer); Query q = cp.parse(new ByteArrayInputStream(queryString.getBytes("UTF-8"))); return q; } else { return qp.parse(queryString); } }
From source file:org.hibernate.search.test.perf.SearcherThread.java
License:Open Source License
private Query getQuery() throws ParseException { QueryParser qp = new QueryParser(SearchTestCase.getTargetLuceneVersion(), "t", SearchTestCase.standardAnalyzer); qp.setLowercaseExpandedTerms(true);/*ww w. ja va 2 s . com*/ // Parse the query Query q = qp.parse(queryString); if (q instanceof BooleanQuery) { BooleanQuery.setMaxClauseCount(Integer.MAX_VALUE); } return q; }
From source file:org.hippoecm.repository.query.lucene.AuthorizationQuery.java
License:Apache License
public AuthorizationQuery(final Subject subject, final NamespaceMappings nsMappings, final ServicingIndexingConfiguration indexingConfig, final NodeTypeManager ntMgr, final Session session) throws RepositoryException { // set the max clauses for booleans higher than the default 1024. BooleanQuery.setMaxClauseCount(Integer.MAX_VALUE); if (!(session instanceof InternalHippoSession)) { throw new RepositoryException("Session is not an instance of o.a.j.core.SessionImpl"); }// w w w . ja v a 2 s . co m if (!subject.getPrincipals(SystemPrincipal.class).isEmpty()) { this.query = new BooleanQuery(true); this.query.add(new MatchAllDocsQuery(), Occur.MUST); } else { final Set<String> memberships = new HashSet<String>(); for (GroupPrincipal groupPrincipal : subject.getPrincipals(GroupPrincipal.class)) { memberships.add(groupPrincipal.getName()); } final Set<String> userIds = new HashSet<String>(); for (UserPrincipal userPrincipal : subject.getPrincipals(UserPrincipal.class)) { userIds.add(userPrincipal.getName()); } long start = System.currentTimeMillis(); this.query = initQuery(subject.getPrincipals(FacetAuthPrincipal.class), subject.getPrincipals(AuthorizationFilterPrincipal.class), userIds, memberships, (InternalHippoSession) session, indexingConfig, nsMappings, ntMgr); log.info("Creating authorization query took {} ms. Query: {}", String.valueOf(System.currentTimeMillis() - start), query); } }
From source file:org.hupo.psi.mi.psicquic.ws.IndexBasedPsicquicService.java
License:Apache License
public IndexBasedPsicquicService() { BooleanQuery.setMaxClauseCount(200 * 1000); }
From source file:org.index.TermScore.java
public List<SOQuery> constructQueries() throws Exception { List<SOQuery> queryList = new LinkedList<SOQuery>(); boolean allTerms = true; BooleanQuery.setMaxClauseCount(20000); String queryFile = prop.getProperty("query.tsv.file"); if (queryFile == null) { // the weighted tsv files queryFile = prop.getProperty("query.wtsv.file"); allTerms = false;//from ww w .ja v a 2s. c om } FileReader fr = new FileReader(queryFile); BufferedReader br = new BufferedReader(fr); String line; List<String> terms; while ((line = br.readLine()) != null) { String[] tokens = line.split("\t"); String tags = tokens[1]; String[] tagTerms = tokens[1].split("\\,"); // the first token is the question id, the second is a list // of labels... if (allTerms) terms = getBagOfWords(tokens[2]); else terms = getSelectedTerms(tokens[2]); Query q = construct(terms, tagTerms); queryList.add(new SOQuery(Integer.parseInt(tokens[0]), q)); } return queryList; }
From source file:org.intermine.web.autocompletion.LuceneSearchEngine.java
License:GNU General Public License
/** * Perform the lucene search./*w w w . j av a2s .co m*/ * @param queryString * the string for what you search in the indexes * @param toSearch * the field in which you search * @return Hits list of documents (search results) * @throws IOException * IOException * @throws ParseException * IOException */ public TopDocs performSearch(String queryString, String toSearch) throws IOException, ParseException { QueryParser parser = new QueryParser(Version.LUCENE_30, toSearch, analyzer); BooleanQuery.setMaxClauseCount(4096); if (!"".equals(queryString) && !queryString.trim().startsWith("*")) { Query query; if (queryString.endsWith(" ")) { queryString = queryString.substring(0, queryString.length() - 1); } String[] tmp; if (queryString.contains(" ")) { tmp = queryString.replaceAll(" +", " ").trim().split(" "); queryString = new String(); for (int i = 0; i < tmp.length; i++) { queryString += tmp[i]; if (i < tmp.length - 1) { queryString += "* AND "; } } } query = parser.parse(queryString + "*"); return indexSearch.search(query, 500); // FIXME: hardcoded maximum // number of results } return null; }
From source file:org.intermine.web.autocompletion.LuceneSearchEngine.java
License:GNU General Public License
/** * Perform the search but only return n results. * @param queryS/*from w w w . j ava 2 s .co m*/ * the string for what you search in the indexes * @param toSearch * the field in which you search * @param n * first n results * @return array of ScoreDoc[] with n elements */ public String[] fastSearch(String queryS, String toSearch, int n) { QueryParser parser = new QueryParser(Version.LUCENE_30, toSearch, analyzer); BooleanQuery.setMaxClauseCount(4096); String status = "true"; String[] results = null; if (!"".equals(queryS) && !queryS.trim().startsWith("*")) { Query query = null; if (queryS.endsWith(" ")) { queryS = queryS.substring(0, queryS.length() - 1); } String[] tmp; if (queryS.contains(" ")) { tmp = queryS.replaceAll(" +", " ").trim().split(" "); queryS = new String(); for (int i = 0; i < tmp.length; i++) { queryS += tmp[i]; if (i < tmp.length - 1) { queryS += "* AND "; } } } try { query = parser.parse(queryS + "*"); TopDocs topDoc = null; try { topDoc = indexSearch.search(query, null, n); ScoreDoc[] docs = topDoc.scoreDocs; results = new String[docs.length + 1]; for (int i = 1; i < docs.length + 1; i++) { try { results[i] = indexSearch.doc(docs[i - 1].doc).get(toSearch); } catch (IOException e) { status = "No results! Please try again."; } } results[0] = status; } catch (IOException e) { status = "Please type in more characters to get results."; results = new String[1]; results[0] = status; } catch (Throwable e1) { status = "Please type in more characters to get results."; results = new String[1]; results[0] = status; } } catch (ParseException e) { status = "No results! Please try again."; } return results; } return null; }
From source file:org.jetbrains.idea.maven.server.embedder.Maven2ServerIndexerImpl.java
License:Apache License
@Override public Set<MavenArtifactInfo> search(int indexId, Query query, int maxResult) throws MavenServerIndexerException { try {//from www .j av a 2 s.c o m IndexingContext index = getIndex(indexId); TopDocs docs = null; try { BooleanQuery.setMaxClauseCount(Integer.MAX_VALUE); docs = index.getIndexSearcher().search(query, null, maxResult); } catch (BooleanQuery.TooManyClauses ignore) { // this exception occurs when too wide wildcard is used on too big data. } if (docs == null || docs.scoreDocs.length == 0) { return Collections.emptySet(); } Set<MavenArtifactInfo> result = new HashSet<MavenArtifactInfo>(); for (int i = 0; i < docs.scoreDocs.length; i++) { int docIndex = docs.scoreDocs[i].doc; Document doc = index.getIndexReader().document(docIndex); ArtifactInfo a = IndexUtils.constructArtifactInfo(doc, index); if (a == null) { continue; } a.repository = getRepositoryPathOrUrl(index); result.add(Maven2ModelConverter.convertArtifactInfo(a)); } return result; } catch (Exception e) { throw new MavenServerIndexerException(wrapException(e)); } }
From source file:org.jetbrains.idea.maven.server.Maven3ServerIndexerImpl.java
License:Apache License
@Override public Set<MavenArtifactInfo> search(int indexId, Query query, int maxResult) throws RemoteException, MavenServerIndexerException { try {//w w w.j ava 2 s . c o m IndexingContext index = getIndex(indexId); TopDocs docs = null; try { BooleanQuery.setMaxClauseCount(Integer.MAX_VALUE); docs = index.getIndexSearcher().search(query, null, maxResult); } catch (BooleanQuery.TooManyClauses ignore) { // this exception occurs when too wide wildcard is used on too big data. } if (docs == null || docs.scoreDocs.length == 0) { return Collections.emptySet(); } Set<MavenArtifactInfo> result = new HashSet<MavenArtifactInfo>(); for (int i = 0; i < docs.scoreDocs.length; i++) { int docIndex = docs.scoreDocs[i].doc; Document doc = index.getIndexReader().document(docIndex); ArtifactInfo a = IndexUtils.constructArtifactInfo(doc, index); if (a == null) { continue; } a.repository = getRepositoryPathOrUrl(index); result.add(MavenModelConverter.convertArtifactInfo(a)); } return result; } catch (Exception e) { throw new MavenServerIndexerException(wrapException(e)); } }