List of usage examples for org.apache.lucene.search BooleanQuery BooleanQuery
BooleanQuery
From source file:de.unihildesheim.iw.lucene.query.TryExactTermsQuery.java
License:Open Source License
/** * New instance using the supplied query. * * @param analyzer Query analyzer/*w w w . jav a 2 s. c o m*/ * @param queryStr Query string * @param fields Fields to query * @throws ParseException Thrown, if the query could not be parsed */ public TryExactTermsQuery(@NotNull final Analyzer analyzer, @NotNull final String queryStr, @NotNull final String... fields) throws ParseException { if (fields.length == 0) { throw new IllegalArgumentException("Empty fields list."); } if (StringUtils.isStrippedEmpty(queryStr)) { throw new IllegalArgumentException("Empty query."); } this.queryTerms = QueryUtils.tokenizeQueryString(queryStr, analyzer); final QueryParser qParser = new MultiFieldQueryParser(fields, analyzer); this.query = new BooleanQuery(); this.uniqueQueryTerms = new HashSet<>(this.queryTerms); for (final String term : this.uniqueQueryTerms) { @SuppressWarnings("ObjectAllocationInLoop") final BooleanClause bc = new BooleanClause(qParser.parse(QueryParserBase.escape(term)), Occur.SHOULD); this.query.add(bc); } this.query.setMinimumNumberShouldMatch(this.uniqueQueryTerms.size()); if (LOG.isDebugEnabled()) { LOG.debug("TEQ {} uQt={}", this.query, this.uniqueQueryTerms); } }
From source file:de.walware.statet.r.internal.core.rhelp.index.SearchQuery.java
License:Open Source License
public static SearchQuery compile(final RHelpSearchQuery query) throws CoreException { try {/*from www . j a v a 2 s. c o m*/ final BooleanQuery q = new BooleanQuery(); q.add(REnvIndexReader.DOCTYPE_PAGE_QUERY, Occur.MUST); String[] fieldNames = NO_FIELDS; if (query.getSearchString().length() > 0) { switch (query.getSearchType()) { case RHelpSearchQuery.TOPIC_SEARCH: fieldNames = TOPIC_SEARCH_FIELDS; q.add(createMainQuery(fieldNames, query.getSearchString()), Occur.MUST); break; case RHelpSearchQuery.FIELD_SEARCH: fieldNames = sortFields(query.getEnabledFields()); if (fieldNames.length == 0) { break; } q.add(createMainQuery(fieldNames, query.getSearchString()), Occur.MUST); break; case RHelpSearchQuery.DOC_SEARCH: fieldNames = DOC_SEARCH_FIELDS; q.add(createMainQuery(fieldNames, query.getSearchString()), Occur.MUST); break; default: break; } } final List<String> keywords = query.getKeywords(); if (!keywords.isEmpty()) { q.add(createOrQuery(KEYWORD_FIELD_NAME, keywords), Occur.MUST); } final List<String> packages = query.getPackages(); if (!packages.isEmpty()) { q.add(createOrQuery(PACKAGE_FIELD_NAME, packages), Occur.MUST); } return new SearchQuery(fieldNames, q); } catch (final QueryNodeParseException e) { RCorePlugin.log(new Status(IStatus.ERROR, RCore.PLUGIN_ID, -1, NLS.bind("An error occurred when creating the Lucene query for: {0}.", //$NON-NLS-1$ query.toString()), e)); throw new CoreException(new Status(IStatus.ERROR, RCore.PLUGIN_ID, -1, "The search string is invalid: " + e.getLocalizedMessage(), null)); } catch (final Exception e) { RCorePlugin.log(new Status(IStatus.ERROR, RCore.PLUGIN_ID, -1, NLS.bind("An error occurred when creating the Lucene query for {0}.", //$NON-NLS-1$ query.toString()), e)); throw new CoreException(new Status(IStatus.ERROR, RCore.PLUGIN_ID, -1, "An error occurred when preparing the R help query.", null)); } }
From source file:de.walware.statet.r.internal.core.rhelp.index.SearchQuery.java
License:Open Source License
private static Query createOrQuery(final String field, final List<String> terms) { if (terms.size() == 1) { return new TermQuery(new Term(field, terms.get(0))); } else if (terms.size() > 1) { final BooleanQuery q = new BooleanQuery(); for (final String keyword : terms) { q.add(new TermQuery(new Term(field, keyword)), Occur.SHOULD); }/* w w w .ja v a2 s . c o m*/ return q; } return null; }
From source file:di.uniba.it.tee2.search.TemporalEventSearch.java
License:Open Source License
/** * @param query/*from w w w . j a va2 s .c o m*/ * @param timeRange * @param maxResults * @return * @throws java.lang.Exception * */ public List<SearchResult> naturalSearch(String query, String timeRange, int maxResults) throws Exception { QueryParser contentParser = new QueryParser(Version.LUCENE_48, "content", analyzer); QueryParser titleParser = new QueryParser(Version.LUCENE_48, "title", analyzer); QueryParser contextParser = new QueryParser(Version.LUCENE_48, "context", analyzer); QueryParser timeParser = new QueryParser(Version.LUCENE_48, "time", analyzer); String timeQueryString = null; if (timeRange.length() > 0) { timeQueryString = normalizeTimeQuery(timeRange); } Query contentQuery = null; Query titleQuery = null; Query contextQuery = null; if (query.length() > 0) { titleQuery = titleParser.parse(query); contentQuery = contentParser.parse(query); contextQuery = contextParser.parse(query); } Query timeConstraint = null; if (timeQueryString != null && timeQueryString.length() > 0) { timeConstraint = timeParser.parse(timeQueryString); } //BooleanQuery idQuery = new BooleanQuery(); BooleanQuery docQuery = new BooleanQuery(); if (titleQuery != null) { docQuery.add(titleQuery, BooleanClause.Occur.SHOULD); } if (contentQuery != null) { docQuery.add(contentQuery, BooleanClause.Occur.SHOULD); } Map<String, Float> docScoreMap = new HashMap<>(); if (titleQuery != null || contentQuery != null) { Logger.getLogger(TemporalEventSearch.class.getName()).log(Level.INFO, "Doc query: {0}", docQuery.toString()); TopDocs topDocs = doc_searcher.search(contentQuery, 1000); for (ScoreDoc sd : topDocs.scoreDocs) { String docid = doc_searcher.doc(sd.doc).get("id"); docScoreMap.put(docid, sd.score + 1); //idQuery.add(new TermQuery(new Term("id", docid)), BooleanClause.Occur.SHOULD); } } BooleanQuery timeQuery = new BooleanQuery(); if (timeConstraint != null) { timeQuery.add(timeConstraint, BooleanClause.Occur.MUST); } if (contextQuery != null) { timeQuery.add(contextQuery, BooleanClause.Occur.MUST); } /*if (timeConstraint != null || contextQuery != null) { timeQuery.add(idQuery, BooleanClause.Occur.MUST); }*/ Logger.getLogger(TemporalEventSearch.class.getName()).log(Level.INFO, "Time query: {0}", timeQuery.toString()); TopDocs timeDocs = time_searcher.search(timeQuery, 1000); List<SearchResult> results = new ArrayList<>(); for (ScoreDoc sd : timeDocs.scoreDocs) { Document timedoc = time_searcher.doc(sd.doc); String docId = timedoc.get("id"); Document document = getDocument(docId); if (document != null && document.get("content") != null) { SearchResult sr = new SearchResult(sd.doc, docId); sr.setStartOffset(timedoc.getField("offset_start").numericValue().intValue()); sr.setEndOffset(timedoc.getField("offset_end").numericValue().intValue()); String snip = createSnippet(document.get("content"), sr.getStartOffset(), sr.getEndOffset()); sr.setSnip(snip); sr.setTitle(document.get("title")); Float score = docScoreMap.get(docId); if (score != null) { sr.setScore(sd.score * score); results.add(sr); } else { sr.setScore(sd.score); results.add(sr); } } else { logger.log(Level.WARNING, "No text for doc: {0}", docId); } } Collections.sort(results); if (results.size() > maxResults) { return results.subList(0, maxResults); } else { return results; } }
From source file:di.uniba.it.tee2.search.TemporalEventSearch.java
License:Open Source License
public List<SearchResult> search(String query, String timeRange, int maxResults) throws Exception { QueryParser contentParser = new QueryParser(Version.LUCENE_48, "content", analyzer); QueryParser titleParser = new QueryParser(Version.LUCENE_48, "title", analyzer); QueryParser contextParser = new QueryParser(Version.LUCENE_48, "context", analyzer); QueryParser timeParser = new QueryParser(Version.LUCENE_48, "time", analyzer); Query contentQuery = null;// w ww . j a v a 2 s.c om Query titleQuery = null; Query contextQuery = null; if (query.length() > 0) { titleQuery = titleParser.parse(query); contentQuery = contentParser.parse(query); contextQuery = contextParser.parse(query); } Query timeConstraint = null; if (timeRange != null && timeRange.length() > 0) { timeConstraint = timeParser.parse(timeRange); } //BooleanQuery idQuery = new BooleanQuery(); BooleanQuery docQuery = new BooleanQuery(); if (titleQuery != null) { docQuery.add(titleQuery, BooleanClause.Occur.SHOULD); } if (contentQuery != null) { docQuery.add(contentQuery, BooleanClause.Occur.SHOULD); } Map<String, Float> docScoreMap = new HashMap<>(); if (titleQuery != null || contentQuery != null) { Logger.getLogger(TemporalEventSearch.class.getName()).log(Level.INFO, "Doc query: {0}", docQuery.toString()); TopDocs topDocs = doc_searcher.search(contentQuery, 1000); for (ScoreDoc sd : topDocs.scoreDocs) { String docid = doc_searcher.doc(sd.doc).get("id"); docScoreMap.put(docid, sd.score + 1); //idQuery.add(new TermQuery(new Term("id", docid)), BooleanClause.Occur.SHOULD); } } BooleanQuery timeQuery = new BooleanQuery(); if (timeConstraint != null) { timeQuery.add(timeConstraint, BooleanClause.Occur.MUST); } if (contextQuery != null) { timeQuery.add(contextQuery, BooleanClause.Occur.MUST); } /*if (timeConstraint != null || contextQuery != null) { timeQuery.add(idQuery, BooleanClause.Occur.MUST); }*/ Logger.getLogger(TemporalEventSearch.class.getName()).log(Level.INFO, "Time query: {0}", timeQuery.toString()); TopDocs timeDocs = time_searcher.search(timeQuery, 1000); List<SearchResult> results = new ArrayList<>(); for (ScoreDoc sd : timeDocs.scoreDocs) { Document timedoc = time_searcher.doc(sd.doc); String docId = timedoc.get("id"); Document document = getDocument(docId); if (document != null && document.get("content") != null) { SearchResult sr = new SearchResult(sd.doc, docId); sr.setStartOffset(timedoc.getField("offset_start").numericValue().intValue()); sr.setEndOffset(timedoc.getField("offset_end").numericValue().intValue()); String snip = createSnippet(document.get("content"), sr.getStartOffset(), sr.getEndOffset()); sr.setSnip(snip); sr.setTitle(document.get("title")); Float score = docScoreMap.get(docId); if (score != null) { sr.setScore(sd.score * score); results.add(sr); } else { sr.setScore(sd.score); results.add(sr); } } else { logger.log(Level.WARNING, "No text for doc: {0}", docId); } } Collections.sort(results); if (results.size() > maxResults) { return results.subList(0, maxResults); } else { return results; } }
From source file:dk.dbc.opensearch.fedora.search.LuceneFieldIndex.java
License:Open Source License
private Query constructQuery(final FieldSearchQuery fsq) throws ParseException { BooleanQuery booleanQuery = new BooleanQuery(); if (fsq.getType() == FieldSearchQuery.CONDITIONS_TYPE && fsq.getConditions().isEmpty()) { return new AllFieldsQuery("*"); }/*from w ww.j a va 2 s. c o m*/ if (fsq.getType() == FieldSearchQuery.CONDITIONS_TYPE && !fsq.getConditions().isEmpty()) { log.trace("Building map from conditions"); for (Condition cond : fsq.getConditions()) { String searchField = cond.getProperty().toUpperCase(); Operator operator = cond.getOperator(); String value = cond.getValue(); log.info("Raw condition: {}{}{}", new Object[] { searchField, operator.getSymbol(), value }); if (!(searchField.equals(FedoraFieldName.CDATE.name()) || searchField.equals(FedoraFieldName.DATE.name()) || searchField.equals(FedoraFieldName.DCMDATE.name()) || searchField.equals(FedoraFieldName.MDATE.name()) || searchField.equals(FedoraFieldName.PID.name()))) { log.trace("Lowercasing {} ({})", value, searchField); value = value.toLowerCase(); } if ((operator.equals(Operator.CONTAINS)) && value.trim().isEmpty()) { value = "*"; } String debugQuery = String.format("Building query: '%s %s %s'", searchField.toLowerCase(), operator, value); log.debug(debugQuery); try { booleanQuery.add(buildQueryFromClause(searchField.toLowerCase(), operator, value), Occur.MUST); } catch (IllegalArgumentException ex) { log.warn("Could not add query {}: {}", debugQuery, ex.getMessage()); } } } else if (fsq.getType() == FieldSearchQuery.TERMS_TYPE) { log.trace("Building map from terms"); String value = fsq.getTerms(); // See the javadoc for #buildQueryFromClause 2) b) and d) (and 3)) if (isSpecialCaseQuery("dummy_value", value)) { return new AllFieldsQuery("*"); } for (FedoraFieldName fieldName : FedoraFieldName.values()) { try { booleanQuery.add(buildQueryFromClause(fieldName.toString(), Operator.CONTAINS, value), Occur.SHOULD); } catch (IllegalArgumentException ex) { log.warn("Could not add query {}{}{}: {}", new Object[] { fieldName.toString(), "~", value, ex.getMessage() }); } } booleanQuery.setMinimumNumberShouldMatch(1); } return booleanQuery; }
From source file:document_search.MultiQuerySearch.java
License:Open Source License
public static List<MultiQueryResults> search(Index i, int docLimit, String searchField, Analyzer a, String... queries) {//from w w w.j a v a 2 s . c o m ParseWrapper parser = new ParseWrapper(new QueryParser(searchField, a)); // Parse all available results List<QueryPair> queryList = Arrays.asList(queries).stream().map(parser::parseQuery) .filter(query -> query != null).collect(Collectors.toList()); // Create the overall query BooleanQuery query = new BooleanQuery(); for (QueryPair pair : queryList) { query.add(pair.query, BooleanClause.Occur.SHOULD); } // FIXME: We should not have index searchers here, but we still do! :-( Fuck Lucene and its vast plots of, features... IndexSearcher searcher = new IndexSearcher(i.getIndexer().getReader()); // TODO: Refactor this because it looks terrible, functional style is better, but the functional isn't good... List<MultiQueryResults> queryResults = Collections.checkedList(new ArrayList<>(), MultiQueryResults.class); for (Index.IndexDocument doc : i.runQuery(query, docLimit)) { // Create new multi query results MultiQueryResults results = new MultiQueryResults(doc.id, doc.score, Arrays.asList(queries)); // Explain the individual query results for (QueryPair queryPair : queryList) { try { double score = searcher.explain(queryPair.query, doc.lucene_id).getValue(); QueryResults individualResults = new QueryResults(doc.id, queryPair.term, score); results.addQueryResult(individualResults); } catch (IOException e) { e.printStackTrace(); } } // Add the Multi-query results to the overall list queryResults.add(results); } return queryResults; }
From source file:edu.cmu.geolocator.resource.gazindexing.CollaborativeIndex.CollaborativeIndex.java
License:Apache License
@Override public ArrayList<Document> getDocumentsByPhrase(String phrase) { if (phrase == null || phrase.length() == 0) throw new NullPointerException(); if (phrase.startsWith("#")) phrase = phrase.substring(1);/*from w w w .j a va 2 s.c o m*/ TermQuery query = new TermQuery(new Term("LOWERED-NO-WS", phrase.toLowerCase().replace(" ", ""))); TopDocs res = null; try { res = stringSearcher.search(query, 2500); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } String abIds = null; if (ResourceFactory.getCountryCode2CountryMap().isInMap(phrase.toLowerCase())) abIds = ResourceFactory.getCountryCode2CountryMap().getValue(phrase.toLowerCase()).getId(); // System.out.println(res.totalHits); if (res == null && abIds == null) return null; if (res.totalHits == 0 && abIds == null) return null; ids = new HashSet<String>(); if (res != null) try { for (ScoreDoc doc : res.scoreDocs) { ids.add(stringSearcher.doc(doc.doc).get("ID")); } } catch (Exception e) { e.printStackTrace(); } if (abIds != null) ids.add(abIds); // System.out.println(ids); // System.out.println("total number of String ids are:" + ids.size()); q = new BooleanQuery(); for (String id : ids) { q.add(new TermQuery(new Term("ID", id)), Occur.SHOULD); } // use a term filter instead of a query filter. try { TopDocs docs = infoSearcher.search(q, 2500); // System.out.println("total hits in info is:" + docs.totalHits); returnDocs = new ArrayList<Document>(docs.totalHits); for (ScoreDoc d : docs.scoreDocs) { returnDocs.add(infoSearcher.doc(d.doc)); } return returnDocs; } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } return null; }
From source file:edu.harvard.iq.dvn.core.index.DvnQuery.java
License:Apache License
public void constructQuery() { logger.fine("in constructQuery..."); BooleanQuery searchQuery = null;// w ww. ja v a 2 s . c om // FIXME: how much of this logic do we need? from indexService.searchwithFacets()... List<BooleanQuery> searchParts = new ArrayList(); // "study-level search" is our "normal", default search, that is // performed on the study metadata keywords. boolean studyLevelSearch = false; boolean containsStudyLevelAndTerms = false; // We also support searches on variables and file-level metadata: // We do have to handle these 2 separately, because of the 2 different // levels of granularity: one searches on variables, the other on files. boolean variableSearch = false; boolean fileMetadataSearch = false; // And the boolean below indicates any file-level searche - i.e., // either a variable, or file metadata search. // -- L.A. boolean fileLevelSearch = false; List<SearchTerm> studyLevelSearchTerms = new ArrayList(); List<SearchTerm> variableSearchTerms = new ArrayList(); List<SearchTerm> fileMetadataSearchTerms = new ArrayList(); Indexer indexer = Indexer.getInstance(); for (Iterator it = searchTerms.iterator(); it.hasNext();) { SearchTerm elem = (SearchTerm) it.next(); logger.fine("elem field name = " + elem.getFieldName().toString()); if (elem.getFieldName().equals("variable")) { // SearchTerm st = dvnTokenizeSearchTerm(elem); // variableSearchTerms.add(st); variableSearchTerms.add(elem); variableSearch = true; } else if (indexer.isFileMetadataField(elem.getFieldName())) { fileMetadataSearch = true; fileMetadataSearchTerms.add(elem); } else { // SearchTerm nvst = dvnTokenizeSearchTerm(elem); // nonVariableSearchTerms.add(nvst); if (elem.getOperator().equals("=")) { containsStudyLevelAndTerms = true; } studyLevelSearchTerms.add(elem); studyLevelSearch = true; } } BooleanQuery searchTermsQuery = indexer.andSearchTermClause(studyLevelSearchTerms); searchParts.add(searchTermsQuery); searchQuery = indexer.andQueryClause(searchParts); if (!collectionQueries.isEmpty() || dvOwnerIdQuery != null) { BooleanQuery queryAcrossAllCollections = new BooleanQuery(); BooleanQuery allCollections = new BooleanQuery(); BooleanQuery submittedAndInCollection = new BooleanQuery(); for (Query collectionQuery : collectionQueries) { allCollections.add(collectionQuery, BooleanClause.Occur.SHOULD); } submittedAndInCollection.add(searchQuery, BooleanClause.Occur.MUST); submittedAndInCollection.add(allCollections, BooleanClause.Occur.MUST); queryAcrossAllCollections.add(submittedAndInCollection, BooleanClause.Occur.SHOULD); BooleanQuery dvSpecific = new BooleanQuery(); dvSpecific.add(searchQuery, BooleanClause.Occur.MUST); dvSpecific.add(dvOwnerIdQuery, BooleanClause.Occur.MUST); queryAcrossAllCollections.add(dvSpecific, BooleanClause.Occur.SHOULD); searchQuery = queryAcrossAllCollections; } else if (singleCollectionQuery != null) { logger.fine("single collection will be queried"); BooleanQuery submittedAndInCollection = new BooleanQuery(); submittedAndInCollection.add(searchQuery, BooleanClause.Occur.MUST); submittedAndInCollection.add(singleCollectionQuery, BooleanClause.Occur.MUST); searchQuery = submittedAndInCollection; } else if (!multipleCollectionQueries.isEmpty()) { logger.fine("adding multipleCollection queries..."); BooleanQuery queryMultipleCollections = new BooleanQuery(); for (Query collectionQuery : multipleCollectionQueries) { BooleanQuery submittedAndInCollection = new BooleanQuery(); submittedAndInCollection.add(searchQuery, BooleanClause.Occur.MUST); submittedAndInCollection.add(collectionQuery, BooleanClause.Occur.MUST); queryMultipleCollections.add(submittedAndInCollection, BooleanClause.Occur.SHOULD); } searchQuery = queryMultipleCollections; } else if (subNetworkQuery != null) { /** * When a user is in the context of a subnetwork any search that is * performed will return studies that are owned by dataverses in * that subnetwork along with any studies from outside dataverses * that are included in collections." */ BooleanQuery combinedSubNetworkQuery = new BooleanQuery(); combinedSubNetworkQuery.add(searchQuery, BooleanClause.Occur.MUST); combinedSubNetworkQuery.add(subNetworkQuery, BooleanClause.Occur.MUST); searchQuery = combinedSubNetworkQuery; } /* This commented-out code is Phil's implementation of subnetwork searching, * that does it by logically AND-ing and running all the queries that * define the dataverses in the subnetwork, and all the collections inside. * The performance should be atrocious, on a subnetwork the size of IQSS; * but it's good to have this implementation (it's supposed to be working) * for reference and comparison. --L.A. * else if (!subNetworkDvMemberQueries.isEmpty() || !subNetworkCollectionQueries.isEmpty()) { logger.fine("When a user is in the context of a subnetwork any search that is performed will return studies that are owned by dataverses in that subnetwork along with any studies from outside dataverses that are included in collections."); BooleanQuery queryAcrossSubNetworkMembers = new BooleanQuery(); for (Query collectionQuery : subNetworkDvMemberQueries) { BooleanQuery submittedAndInCollection = new BooleanQuery(); submittedAndInCollection.add(searchQuery, BooleanClause.Occur.MUST); submittedAndInCollection.add(collectionQuery, BooleanClause.Occur.MUST); queryAcrossSubNetworkMembers.add(submittedAndInCollection, BooleanClause.Occur.SHOULD); } BooleanQuery queryAcrossAllSubNetworkCollections = new BooleanQuery(); for (Query collectionQuery : subNetworkCollectionQueries) { BooleanQuery submittedAndInCollection = new BooleanQuery(); submittedAndInCollection.add(searchQuery, BooleanClause.Occur.MUST); submittedAndInCollection.add(collectionQuery, BooleanClause.Occur.MUST); queryAcrossAllSubNetworkCollections.add(submittedAndInCollection, BooleanClause.Occur.SHOULD); } BooleanQuery queryForEntireSubnetwork = new BooleanQuery(); queryForEntireSubnetwork.add(queryAcrossSubNetworkMembers, BooleanClause.Occur.SHOULD); queryForEntireSubnetwork.add(queryAcrossAllSubNetworkCollections, BooleanClause.Occur.SHOULD); searchQuery = queryForEntireSubnetwork; }*/ else { logger.fine("DVN-wide search will be made"); } query = searchQuery; }
From source file:edu.harvard.iq.dvn.core.index.Indexer.java
License:Apache License
List<Long> findStudiesInCollections(VDC vdc) { List<Long> linkedStudyIds = null; List<Query> collectionQueries = getCollectionQueriesForSubnetworkIndexing(vdc); if (collectionQueries != null && collectionQueries.size() > 0) { logger.fine("running combined collections query for the vdc id " + vdc.getId() + ", " + vdc.getName() + "; " + collectionQueries.size() + " queries total."); BooleanQuery queryAcrossAllCollections = new BooleanQuery(); for (Query collectionQuery : collectionQueries) { queryAcrossAllCollections.add(collectionQuery, BooleanClause.Occur.SHOULD); }//w w w.java 2s. co m try { linkedStudyIds = getHitIds(queryAcrossAllCollections); } catch (Exception ex) { logger.warning("Caught exception while executing combined colleciton query on VDC " + vdc.getId()); ex.printStackTrace(); } } return linkedStudyIds; }