List of usage examples for org.apache.lucene.search BooleanQuery BooleanQuery
BooleanQuery
From source file:de.ingrid.search.utils.facet.DummyQueryParsers.java
License:EUPL
public Query parse(IngridQuery ingridQuery) { BooleanQuery booleanQuery = new BooleanQuery(); ClauseQuery[] clauses = ingridQuery.getClauses(); for (ClauseQuery clauseQuery : clauses) { final Query sc = parse(clauseQuery); if (!sc.equals(new BooleanQuery())) { Occur occur = transform(clauseQuery.isRequred(), clauseQuery.isProhibited()); booleanQuery.add(sc, occur); }//from w w w . j a va2 s . c o m } parse(ingridQuery, booleanQuery); return booleanQuery; }
From source file:de.ingrid.search.utils.facet.FacetClassProducer.java
License:EUPL
@SuppressWarnings("unchecked") private BooleanQuery addSpecialFields(Query q, IngridQuery iq) { if (LOG.isDebugEnabled()) { LOG.debug("Add special fields (partner, provider, datatype) from IngridQuery: " + iq); }/*from w w w . j ava 2 s .co m*/ BooleanQuery bq = new BooleanQuery(); List<FieldQuery> partners = iq.getArrayList("partner"); if (partners != null) { for (FieldQuery partner : partners) { Term luceneTerm = new Term(partner.getFieldName(), partner.getFieldValue()); TermQuery luceneTermQuery = new TermQuery(luceneTerm); bq.add(luceneTermQuery, Occur.MUST); } } List<FieldQuery> providers = iq.getArrayList("provider"); if (providers != null) { for (FieldQuery provider : providers) { Term luceneTerm = new Term(provider.getFieldName(), provider.getFieldValue()); TermQuery luceneTermQuery = new TermQuery(luceneTerm); bq.add(luceneTermQuery, Occur.MUST); } } List<FieldQuery> datatypes = iq.getArrayList("datatype"); if (datatypes != null) { for (FieldQuery datatype : datatypes) { Term luceneTerm = new Term(datatype.getFieldName(), datatype.getFieldValue()); TermQuery luceneTermQuery = new TermQuery(luceneTerm); bq.add(luceneTermQuery, Occur.MUST); } } if (q.toString().length() != 0) bq.add(q, Occur.MUST); return bq; }
From source file:de.ingrid.search.utils.facet.FacetClassProducerTest.java
License:EUPL
@Test public final void testProduceClassFromQuery() throws IOException { BooleanQuery bq = new BooleanQuery(); Term luceneTerm = new Term("partner", "bund"); TermQuery luceneTermQuery = new TermQuery(luceneTerm); bq.add(luceneTermQuery, Occur.MUST); FacetClass fc = fcp.produceClassFromQuery("partner:bund", bq); Assert.assertEquals("partner:bund", fc.getFacetClassName()); Assert.assertTrue(2 <= fc.getBitSets()[0].cardinality()); }
From source file:de.ingrid.search.utils.facet.LuceneBitSetSearchNonDeprecatedTest.java
License:EUPL
private Query getBaseQuery() { BooleanQuery bQuery = new BooleanQuery(); Query query = null;/*ww w . j a v a2 s . c o m*/ query = new TermQuery(new Term("content", "wasser")); bQuery.add(query, Occur.MUST); return query; }
From source file:de.ingrid.search.utils.facet.LuceneBitSetSearchTest.java
License:EUPL
private Query getBaseQuery() { BooleanQuery bQuery = new BooleanQuery(); Query query = null;//from w ww .j a v a 2 s .c om query = new TermQuery(new Term("content", "waldbrand ")); bQuery.add(query, Occur.MUST); return query; }
From source file:de.ingrid.search.utils.facet.LuceneSearchTest.java
License:EUPL
private Query getQuery(String field, String value) { BooleanQuery bQuery = new BooleanQuery(); Query query = null;/* w ww . j a v a2 s .c om*/ if (value == null) query = new TermQuery(new Term(field)); else query = new TermQuery(new Term(field, value)); bQuery.add(query, Occur.MUST); return query; }
From source file:de.innovationgate.wgpublisher.lucene.LuceneManager.java
License:Open Source License
public WGResultSet search(WGDatabase db, List<String> fields, String phrase, Map parameters, WGA wga) throws WGQueryException { if (wga == null) { wga = WGA.get(_core);/*from ww w. j a va2 s . c om*/ } // set max clause count for boolean queries BooleanQuery.setMaxClauseCount(_booleanQueryMaxClauseCount); if (this.isRebuildingIndex()) { throw new WGQueryException(phrase, "Lucene search temporary disabled. Rebuilding lucene index ..."); } // Registering problem in that case but not cancelling the query, as this is old, expected behaviour. The query will just return no results. if (!_core.getLuceneManager().indexIsEnabled(db.getDbReference())) { _core.getProblemRegistry().addProblem( Problem.create(new TMLContext.WebTMLOccasion(), new DatabaseScope(db.getDbReference()), "webtmlProblem.luceneIndexExpected", ProblemSeverity.LOW)); } if (phrase == null || phrase.trim().equals("")) { return null; } try { BooleanQuery wholeQuery = new BooleanQuery(); int max = WGACore.DEFAULT_QUERY_MAXRESULTS; Integer maxResults = (Integer) parameters.get(WGDatabase.QUERYOPTION_MAXRESULTS); if (maxResults != null) { if (maxResults == 0 || maxResults == -1) { max = Integer.MAX_VALUE; } else { max = maxResults; } } // handle dboption EXCLUDEDOCUMENT WGContent excludeContent = (WGContent) parameters.get(WGDatabase.QUERYOPTION_EXCLUDEDOCUMENT); if (excludeContent != null) { String uniqueKey = buildUniqueIndexKey(excludeContent.getDatabase().getDbReference(), excludeContent.getDocumentKey()); wholeQuery.add(new TermQuery(new Term(INDEXFIELD_UNIQUEKEY, uniqueKey)), BooleanClause.Occur.MUST_NOT); wholeQuery.add(new TermQuery(new Term(INDEXFIELD_PARENTKEY, uniqueKey)), BooleanClause.Occur.MUST_NOT); } // list of dbs to search in String searchScope = (String) parameters.get(LuceneManager.QUERYOPTION_SEARCHSCOPE); List searchDBKeys = new ArrayList(); if (searchScope.equals(LuceneManager.SEARCHSCOPE_DB)) { searchDBKeys.add(db.getDbReference()); } if (searchScope.equals(LuceneManager.SEARCHSCOPE_DOMAIN)) { Iterator<WGDatabase> dbs = _core .getDatabasesForDomain((String) db.getAttribute(WGACore.DBATTRIB_DOMAIN)).iterator(); while (dbs.hasNext()) { WGDatabase currentDB = dbs.next(); if (wga.openDatabase(currentDB)) { searchDBKeys.add(currentDB.getDbReference()); } } } if (searchScope.equals(LuceneManager.SEARCHSCOPE_WGA)) { Iterator dbs = _core.getContentdbs().values().iterator(); while (dbs.hasNext()) { WGDatabase currentDB = (WGDatabase) dbs.next(); if (wga.openDatabase(currentDB)) { searchDBKeys.add(currentDB.getDbReference()); } } } if (searchScope.equals(LuceneManager.SEARCHSCOPE_DB_LIST)) { String dbListCSV = (String) parameters.get(QUERYOPTION_SEARCHDBKEYS); if (dbListCSV == null || dbListCSV.trim().equals("")) { throw new WGQueryException(phrase, "Search scope is 'dblist' but no db keys given."); } else { Iterator dbkeys = WGUtils.deserializeCollection(dbListCSV, ",").iterator(); while (dbkeys.hasNext()) { String dbkey = (String) dbkeys.next(); WGDatabase currentDB = wga.db(dbkey); if (currentDB.isSessionOpen()) { searchDBKeys.add(dbkey.trim().toLowerCase()); } } } } // Handle language selection; List<WGLanguage> languagesPriorityList = null; boolean filterLanguages = false; if (parameters.containsKey(WGDatabase.QUERYOPTION_LANGUAGES)) { List<WGLanguage> langs = (List<WGLanguage>) parameters.get(WGDatabase.QUERYOPTION_LANGUAGES); if (langs.size() > 1) { BooleanQuery langQuery = new BooleanQuery(); for (WGLanguage lang : langs) { langQuery.add(new TermQuery(new Term(WGContent.META_LANGUAGE, lang.getName())), BooleanClause.Occur.SHOULD); } wholeQuery.add(langQuery, BooleanClause.Occur.MUST); languagesPriorityList = langs; filterLanguages = true; } else if (langs.size() == 1) { wholeQuery.add(new TermQuery(new Term(WGContent.META_LANGUAGE, langs.get(0).getName())), BooleanClause.Occur.MUST); languagesPriorityList = Collections.singletonList(langs.get(0)); } } else if (parameters.containsKey(WGDatabase.QUERYOPTION_ONLYLANGUAGE)) { String language = (String) parameters.get(WGDatabase.QUERYOPTION_ONLYLANGUAGE); wholeQuery.add(new TermQuery(new Term(WGContent.META_LANGUAGE, language)), BooleanClause.Occur.MUST); languagesPriorityList = Collections.singletonList(db.getLanguage(language)); } if (languagesPriorityList == null) { languagesPriorityList = getLanguagesForSearchDBKeys(searchDBKeys); ; } // Handle visibility selection if (!parameters.containsKey(WGDatabase.QUERYOPTION_ENHANCE) || parameters.get(WGDatabase.QUERYOPTION_ENHANCE).equals(new Boolean(true))) { wholeQuery.add(new TermQuery(new Term(WGContent.META_VISIBLE, "true")), BooleanClause.Occur.MUST); String role = (String) parameters.get(WGDatabase.QUERYOPTION_ROLE); if (role != null) { if (!role.equalsIgnoreCase(WGContent.DISPLAYTYPE_NONE)) { wholeQuery.add(new TermQuery(new Term("HIDDENIN" + role.toUpperCase(), "false")), BooleanClause.Occur.MUST); } } } if (parameters.containsKey(WGDatabase.QUERYOPTION_ONLYRELEASED)) { wholeQuery.add(new TermQuery(new Term(WGContent.META_STATUS, WGContent.STATUS_RELEASE)), BooleanClause.Occur.MUST); } // build dbQuery (OR combination of all searchDbs indexed by lucene) BooleanQuery dbQuery = new BooleanQuery(); Iterator itSearchDBKeys = searchDBKeys.iterator(); while (itSearchDBKeys.hasNext()) { String currentDBKey = (String) itSearchDBKeys.next(); if (_indexedDbs.containsKey(currentDBKey)) { dbQuery.add(new TermQuery(new Term(INDEXFIELD_DBKEY, currentDBKey)), BooleanClause.Occur.SHOULD); } } wholeQuery.add(dbQuery, BooleanClause.Occur.MUST); // Add parsed search phrase. // Search in allcontent for each language using the configured analyzer // if no analyzer is configured for a language search at least with one // default analyzer boolean searchWithDefaultAnalyzer = false; //if no languages found search at least with DefaultAnalyzer if (languagesPriorityList.size() <= 0) { searchWithDefaultAnalyzer = true; } // parse native options Sort sort = null; String sortFieldName = ""; Operator defaultOperator = QueryParser.AND_OPERATOR; String nativeOptionsStr = (String) parameters.get(WGDatabase.QUERYOPTION_NATIVEOPTIONS); boolean includeVirtualContent = false; String doctype = DOCTYPE_CONTENT; if (nativeOptionsStr != null) { Iterator nativeOptions = WGUtils.deserializeCollection(nativeOptionsStr, ",", true).iterator(); while (nativeOptions.hasNext()) { String option = (String) nativeOptions.next(); if (option.startsWith("sort:")) { sortFieldName = option.substring(5).trim(); boolean reverse = false; if (sortFieldName.toLowerCase().endsWith("(asc)")) { sortFieldName = sortFieldName.substring(0, sortFieldName.length() - 5).trim(); } else if (sortFieldName.toLowerCase().endsWith("(desc)")) { sortFieldName = sortFieldName.substring(0, sortFieldName.length() - 6).trim(); reverse = true; } if (sortFieldName.length() > 0) { char first = sortFieldName.charAt(0); if (first >= 'A' && first <= 'Z') { // meta sort sortFieldName = sortFieldName.toUpperCase(); } else { // item sort sortFieldName = sortFieldName.toLowerCase(); } } // sort order currently only german sort = new Sort(new SortField(SORTITEM_PREFIX + sortFieldName, Locale.GERMANY, reverse)); } else if (option.equalsIgnoreCase(NATIVE_QUERYOPTION_INCLUDEVIRTUALCONTENT)) { includeVirtualContent = true; } else if (option.startsWith("doctype:")) { doctype = option.substring("doctype:".length()).trim(); } else if (option.startsWith("operator:")) { String op = option.substring("operator:".length()).trim(); if (op.equalsIgnoreCase("or")) defaultOperator = QueryParser.OR_OPERATOR; } } } if (!includeVirtualContent) { wholeQuery.add(new TermQuery(new Term(INDEXFIELD_ISVIRTUALCONTENT, String.valueOf(true))), BooleanClause.Occur.MUST_NOT); } // handle doctype option // we cannot be sure that all documents in index already contains the field DOCTYPE (introduced with OpenWGA 7.1) therefore we have to perform some excludes if (doctype.equals(DOCTYPE_CONTENT)) { wholeQuery.add(new TermQuery(new Term(INDEXFIELD_DOCTYPE, DOCTYPE_ATTACHMENT)), BooleanClause.Occur.MUST_NOT); } else if (!doctype.equals(DOCTYPE_ALL)) { wholeQuery.add(new TermQuery(new Term(INDEXFIELD_DOCTYPE, doctype)), BooleanClause.Occur.MUST); } //build phrase query BooleanQuery phraseQuery = new BooleanQuery(); phraseQuery.setBoost(10); Iterator languageList = languagesPriorityList.iterator(); List<String> searchFields = new ArrayList<String>(); Map<String, Float> searchBoosts = new HashMap<String, Float>(); for (String field : fields) { String[] parts = field.split("\\^"); searchFields.add(parts[0]); if (parts.length == 2) { searchBoosts.put(parts[0], Float.parseFloat(parts[1])); } } if (!searchFields.contains("allcontent")) searchFields.add("allcontent"); if (!searchFields.contains("TITLE")) searchFields.add("TITLE"); if (!searchFields.contains("DESCRIPTION")) searchFields.add("DESCRIPTION"); if (!searchFields.contains("KEYWORDS")) searchFields.add("KEYWORDS"); while (languageList.hasNext()) { WGLanguage languageItem = (WGLanguage) languageList.next(); Analyzer analyzer = _core.getAnalyzerForLanguageCode(languageItem.getName().substring(0, 2)); if (analyzer != null) { QueryParser parser = new IndexingRuleBasedQueryParser(searchFields.toArray(new String[0]), analyzer, searchBoosts, _indexedDbs, searchDBKeys, _metaKeywordFields); parser.setDefaultOperator(defaultOperator); Query query = parser.parse(phrase); if (filterLanguages) { BooleanQuery testPhraseAndLangQuery = new BooleanQuery(); testPhraseAndLangQuery.add(query, BooleanClause.Occur.MUST); testPhraseAndLangQuery.add( new TermQuery(new Term(WGContent.META_LANGUAGE, languageItem.getName())), BooleanClause.Occur.MUST); phraseQuery.add(testPhraseAndLangQuery, BooleanClause.Occur.SHOULD); } else { phraseQuery.add(query, BooleanClause.Occur.SHOULD); } } else { searchWithDefaultAnalyzer = true; } } if (searchWithDefaultAnalyzer) { QueryParser parser = new IndexingRuleBasedQueryParser(searchFields.toArray(new String[0]), _core.getDefaultAnalyzer(), searchBoosts, _indexedDbs, searchDBKeys, _metaKeywordFields); parser.setDefaultOperator(defaultOperator); Query query = parser.parse(phrase); phraseQuery.add(query, BooleanClause.Occur.SHOULD); } //LOG.info(phraseQuery.toString()); wholeQuery.add(phraseQuery, BooleanClause.Occur.MUST); TopDocs hits; //register executed query as output parameter parameters.put(WGDatabase.QUERYOPTION_RETURNQUERY, wholeQuery.toString()); // simplify query and register as taginfo parameters.put(TAGINFO_SIMPLIFIEDQUERY, rewrite(wholeQuery)); long timeBefore = System.currentTimeMillis(); if (sort != null) { try { hits = search(wholeQuery, max, sort); } catch (NullPointerException e) { // lucene bug when sorting for non existing fields with Locale throw new WGQueryException(wholeQuery.toString(), "Sortfield '" + sortFieldName + "' not indexed."); } } else { try { hits = search(wholeQuery, max, null); } catch (BooleanQuery.TooManyClauses e) { parameters.put(TAGINFO_UNSPECIFICQUERY, new Boolean(true)); throw new WGQueryException(phrase, "Too many BooleanClauses in query. " + "Please use a more specific query or increase value of " + "'booleanQueryMaxClauseCount' via WGAManager. Current value is '" + this.getBooleanQueryMaxClauseCount() + "'."); } } long timeAfter = System.currentTimeMillis(); long executionTime = timeAfter - timeBefore; LuceneResultSet resultSet; if (filterLanguages) { resultSet = new LuceneLanguageChoosingResultSet(hits, wga, parameters, wholeQuery, executionTime, languagesPriorityList); } else { resultSet = new LuceneMultiDBResultSet(hits, wga, parameters, wholeQuery, executionTime); } // put resultset in per thread list List rsList = (List) _resultsetList.get(); if (rsList == null) { rsList = new LinkedList(); _resultsetList.set(rsList); } rsList.add(resultSet); return resultSet; } catch (org.apache.lucene.queryParser.ParseException e) { throw new WGQueryException("Unable to parse lucene query", e.getMessage(), e); } catch (Exception e) { LOG.error("Error executing lucene search: " + e.getClass().getName() + " - " + e.getMessage(), e); throw new WGQueryException(phrase, e.getClass().getName() + ": " + e.getMessage(), e); } }
From source file:de.innovationgate.wgpublisher.lucene.LuceneManager.java
License:Open Source License
/** * // www . j a v a 2 s.c om * @param db * @throws IOException * @throws WGIllegalArgumentException * @throws InterruptedException * @deprecated pending feature - F00003426 */ public void performCustomDBIndexDeletions(WGDatabase db) throws IOException, WGIllegalArgumentException, InterruptedException { // check if db is not a full contentstore // this method is only supported for none fullcontentstores if (db.hasFeature(WGDatabase.FEATURE_FULLCONTENTFEATURES)) { throw new WGIllegalArgumentException( "Method performCustomDBIndexDeletions() is unsupported for full featured contentstores."); } // check if db is already indexed if (!_indexedDbs.containsKey(db.getDbReference())) { throw new WGIllegalArgumentException("Cannot perform deletions on database '" + db.getDbReference() + "'. Database is not yet indexed. Ensure performCustomDBIndexUpdates() has been called first."); } // collect currently indexed document keys for this db Set currentlyIndexedDocuments = new HashSet(); BooleanQuery query = new BooleanQuery(); query.add(new TermQuery(new Term(INDEXFIELD_DBKEY, db.getDbReference())), BooleanClause.Occur.MUST); TopDocs indexedDocuments = search(query, 500, null); for (ScoreDoc scoreDoc : indexedDocuments.scoreDocs) { org.apache.lucene.document.Document luceneDoc = getDocument(scoreDoc.doc); String documentKey = luceneDoc.get(LuceneManager.INDEXFIELD_DOCUMENTKEY); currentlyIndexedDocuments.add(documentKey); } }
From source file:de.mirkosertic.desktopsearch.LuceneIndexHandler.java
License:Open Source License
private BooleanQuery computeBooleanQueryFor(String aQueryString) throws IOException { QueryParser theParser = new QueryParser(analyzer); BooleanQuery theBooleanQuery = new BooleanQuery(); theBooleanQuery.setMinimumNumberShouldMatch(1); for (String theFieldName : analyzerCache.getAllFieldNames()) { Query theSingle = theParser.parse(aQueryString, theFieldName); theBooleanQuery.add(theSingle, BooleanClause.Occur.SHOULD); }/* w w w . j a v a 2 s. c o m*/ return theBooleanQuery; }
From source file:de.mirkosertic.desktopsearch.QueryParser.java
License:Open Source License
public Query parse(String aQuery, String aSearchField) throws IOException { QueryTokenizer theTokenizer = new QueryTokenizer(aQuery); // Now we have the terms, lets construct the query BooleanQuery theResult = new BooleanQuery(); if (!theTokenizer.getRequiredTerms().isEmpty()) { List<SpanQuery> theSpans = new ArrayList<>(); for (String theTerm : theTokenizer.getRequiredTerms()) { if (QueryUtils.isWildCard(theTerm)) { theSpans.add(/* w w w .j a v a2 s . c om*/ new SpanMultiTermQueryWrapper<>(new WildcardQuery(new Term(aSearchField, theTerm)))); } else if (QueryUtils.isFuzzy(theTerm)) { theSpans.add(new SpanMultiTermQueryWrapper<>(new FuzzyQuery(new Term(aSearchField, theTerm)))); } else { // Ok, we need to check of the token would be removed due to stopwords and so on String theTokenizedTerm = toToken(theTerm, aSearchField); if (!StringUtils.isEmpty(theTokenizedTerm)) { theSpans.add(new SpanTermQuery(new Term(aSearchField, theTokenizedTerm))); } } } // This is the original span, so we boost it a lot SpanQuery theExactMatchQuery = new SpanNearQuery(theSpans.toArray(new SpanQuery[theSpans.size()]), 0, true); theExactMatchQuery.setBoost(61); theResult.add(theExactMatchQuery, BooleanClause.Occur.SHOULD); // We expect a maximum edit distance of 10 between the searched terms in any order // This seems to be the most useful value int theMaxEditDistance = 10; for (int theSlop = 0; theSlop < theMaxEditDistance; theSlop++) { SpanQuery theNearQuery = new SpanNearQuery(theSpans.toArray(new SpanQuery[theSpans.size()]), theSlop, false); theNearQuery.setBoost(50 + theMaxEditDistance - theSlop); theResult.add(theNearQuery, BooleanClause.Occur.SHOULD); } // Finally, we just add simple term queries, but do not boost them // This makes sure that at least the searched terms // are found in the document addToBooleanQuery(theTokenizer.getRequiredTerms(), aSearchField, theResult, BooleanClause.Occur.MUST); } // Finally, add the terms that must not occur in the search result addToBooleanQuery(theTokenizer.getNotRequiredTerms(), aSearchField, theResult, BooleanClause.Occur.MUST_NOT); return theResult; }