List of usage examples for org.apache.solr.client.solrj SolrQuery addFilterQuery
public SolrQuery addFilterQuery(String... fq)
From source file:au.org.ala.biocache.dao.SearchDAOImpl.java
License:Open Source License
/** * Perform SOLR query - takes a SolrQuery and search params * * @param solrQuery//from w w w . ja v a 2 s . c o m * @param requestParams * @return * @throws SolrServerException */ private QueryResponse runSolrQuery(SolrQuery solrQuery, SearchRequestParams requestParams) throws SolrServerException { if (requestParams.getFormattedFq() != null) { for (String fq : requestParams.getFormattedFq()) { if (StringUtils.isNotEmpty(fq)) { solrQuery.addFilterQuery(fq); } } } //include null facets solrQuery.setFacetMissing(true); solrQuery.setRows(requestParams.getPageSize()); solrQuery.setStart(requestParams.getStart()); solrQuery.setSortField(requestParams.getSort(), ORDER.valueOf(requestParams.getDir())); if (logger.isDebugEnabled()) { logger.debug("runSolrQuery: " + solrQuery.toString()); } QueryResponse qr = query(solrQuery, queryMethod); // can throw exception if (logger.isDebugEnabled()) { logger.debug("runSolrQuery: " + solrQuery.toString() + " qtime:" + qr.getQTime()); if (qr.getResults() == null) { logger.debug("no results"); } else { logger.debug("matched records: " + qr.getResults().getNumFound()); } } return qr; }
From source file:au.org.ala.biocache.dao.SearchDAOImpl.java
License:Open Source License
/** * Get a distinct list of species and their counts using a facet search * * @param queryString/*from ww w.j av a 2 s . c o m*/ * @param pageSize * @param sortField * @param sortDirection * @return * @throws SolrServerException */ protected List<TaxaCountDTO> getSpeciesCounts(String queryString, List<String> filterQueries, List<String> facetFields, Integer pageSize, Integer startIndex, String sortField, String sortDirection) throws SolrServerException { List<TaxaCountDTO> speciesCounts = new ArrayList<TaxaCountDTO>(); SolrQuery solrQuery = new SolrQuery(); solrQuery.setQueryType("standard"); solrQuery.setQuery(queryString); if (filterQueries != null && filterQueries.size() > 0) { //solrQuery.addFilterQuery("(" + StringUtils.join(filterQueries, " OR ") + ")"); for (String fq : filterQueries) { solrQuery.addFilterQuery(fq); } } solrQuery.setRows(0); solrQuery.setFacet(true); solrQuery.setFacetSort(sortField); for (String facet : facetFields) { solrQuery.addFacetField(facet); if (logger.isDebugEnabled()) { logger.debug("adding facetField: " + facet); } } //set the facet starting point based on the paging information solrQuery.setFacetMinCount(1); solrQuery.setFacetLimit(pageSize); // unlimited = -1 | pageSize solrQuery.add("facet.offset", Integer.toString(startIndex)); if (logger.isDebugEnabled()) { logger.debug("getSpeciesCount query :" + solrQuery.getQuery()); } QueryResponse qr = runSolrQuery(solrQuery, null, 1, 0, "score", sortDirection); if (logger.isInfoEnabled()) { logger.info("SOLR query: " + solrQuery.getQuery() + "; total hits: " + qr.getResults().getNumFound()); } List<FacetField> facets = qr.getFacetFields(); java.util.regex.Pattern p = java.util.regex.Pattern.compile("\\|"); if (facets != null && facets.size() > 0) { if (logger.isDebugEnabled()) { logger.debug("Facets: " + facets.size() + "; facet #1: " + facets.get(0).getName()); } for (FacetField facet : facets) { List<FacetField.Count> facetEntries = facet.getValues(); if ((facetEntries != null) && (facetEntries.size() > 0)) { for (FacetField.Count fcount : facetEntries) { TaxaCountDTO tcDTO = null; String name = fcount.getName() != null ? fcount.getName() : ""; if (fcount.getFacetField().getName().equals(NAMES_AND_LSID)) { String[] values = p.split(name, 5); if (values.length >= 5) { if (!"||||".equals(name)) { tcDTO = new TaxaCountDTO(values[0], fcount.getCount()); tcDTO.setGuid(StringUtils.trimToNull(values[1])); tcDTO.setCommonName(values[2]); tcDTO.setKingdom(values[3]); tcDTO.setFamily(values[4]); if (StringUtils.isNotEmpty(tcDTO.getGuid())) tcDTO.setRank(searchUtils.getTaxonSearch(tcDTO.getGuid())[1].split(":")[0]); } } else { if (logger.isDebugEnabled()) { logger.debug("The values length: " + values.length + " :" + name); } tcDTO = new TaxaCountDTO(name, fcount.getCount()); } //speciesCounts.add(i, tcDTO); if (tcDTO != null && tcDTO.getCount() > 0) speciesCounts.add(tcDTO); } else if (fcount.getFacetField().getName().equals(COMMON_NAME_AND_LSID)) { String[] values = p.split(name, 6); if (values.length >= 5) { if (!"|||||".equals(name)) { tcDTO = new TaxaCountDTO(values[1], fcount.getCount()); tcDTO.setGuid(StringUtils.trimToNull(values[2])); tcDTO.setCommonName(values[0]); //cater for the bug of extra vernacular name in the result tcDTO.setKingdom(values[values.length - 2]); tcDTO.setFamily(values[values.length - 1]); if (StringUtils.isNotEmpty(tcDTO.getGuid())) tcDTO.setRank(searchUtils.getTaxonSearch(tcDTO.getGuid())[1].split(":")[0]); } } else { if (logger.isDebugEnabled()) { logger.debug("The values length: " + values.length + " :" + name); } tcDTO = new TaxaCountDTO(name, fcount.getCount()); } //speciesCounts.add(i, tcDTO); if (tcDTO != null && tcDTO.getCount() > 0) { speciesCounts.add(tcDTO); } } } } } } return speciesCounts; }
From source file:au.org.ala.biocache.dao.TaxonDAOImpl.java
License:Open Source License
private List<FacetField.Count> extractFacet(String queryString, String[] filterQueries, String facetName) throws Exception { SolrQuery query = new SolrQuery(queryString); query.setFacet(true);/* w ww .j av a2 s . c om*/ query.addFacetField(facetName); query.setRows(0); query.setFacetLimit(200000); query.setStart(0); query.setFacetMinCount(1); query.setFacetSort("index"); //query.setFacet if (filterQueries != null) { for (String fq : filterQueries) query.addFilterQuery(fq); } QueryResponse response = server.query(query); List<FacetField.Count> fc = response.getFacetField(facetName).getValues(); if (fc == null) { fc = new ArrayList<FacetField.Count>(); } return fc; }
From source file:bamboo.trove.rule.RuleChangeUpdateManager.java
License:Apache License
/** * Search solr for documents that are effected by this rule and send to be * rechecked./*from ww w . j av a 2s .c om*/ * <p/> * Depend on what has changed will decide on what and how many searched we do.<br/> * First we need to search for records that have been set by this rule(search * for the rule id) and then * <ul> * <li>Changed URL we will also need to search for records that match the * url(search for url).</li> * <li>Embargo changed(and gotten longer) we will also need to search in the * embargo period(search url and capture date).</li> * <li>Capture range changed(time extends earlier start or later end) we also need to search for capture date in the range(search url and capture date).</li> * <li>Retrieve date changed we also need to search if now is with in the range.</li> * </ul> * * @param currentRule The current rule in place * @param newRule The rule that will replace it * @throws IOException If network errors occur * @throws SolrServerException If errors occur inside the Solr servers */ private WorkLog findDocuments(CdxRule currentRule, CdxRule newRule) throws SolrServerException, IOException { log.debug("Find docs for rule {}", currentRule != null ? currentRule.getId() : newRule.getId()); // query part to stop records being processed more that once String notLastIndexed = SolrEnum.LAST_INDEXED + ":[* TO " + format(CdxRestrictionService.TODAY) + "]"; if (currentRule == null) { // this is a new rule search by url and possibly date return findDocumentsNewRule(newRule, notLastIndexed); } if (newRule == null) { // this is a current rule search date change processing return findDocumentsDateRule(currentRule, notLastIndexed); } // Changed rules WorkLog workLog = new WorkLog(currentRule.getId()); // Step 1.. find everything that is already impacted by this rule and reindex it SolrQuery query = createQuery(SolrEnum.RULE + ":" + currentRule.getId()); query.addFilterQuery(notLastIndexed); processQuery(query, workLog); // Step 2.. find anything that would be covered by the new rule that hasn't already been re-indexed query = convertRuleToSearch(newRule, notLastIndexed); processQuery(query, workLog); // Job done return workLog; }
From source file:bamboo.trove.rule.RuleChangeUpdateManager.java
License:Apache License
private WorkLog findDocumentsDateRule(CdxRule dateBasedRule, String notLastIndexed) throws SolrServerException, IOException { WorkLog workLog = new WorkLog(dateBasedRule.getId()); // these are from no change to the rule so we are checking date coming into or going out of range boolean urlSearchNeeded = false; // ******************* // Access dates CdxDateRange accessDates = dateBasedRule.getAccessed(); if (accessDates != null && accessDates.hasData()) { if (accessDates.contains(CdxRestrictionService.TODAY)) { // now is in range so we need to search by url urlSearchNeeded = true;//from ww w .j ava 2 s . co m } else { // Rule is no longer applicable. Look for records set by the rule to re-process them SolrQuery query = createQuery(SolrEnum.RULE + ":" + dateBasedRule.getId()); query.addFilterQuery(notLastIndexed); processQuery(query, workLog); // Job done... this rule will no longer apply to anything in the index return workLog; } } // ******************* // Embargoes if (dateBasedRule.getPeriod() != null && !dateBasedRule.getPeriod().isZero()) { // Any capture dates older than TODAY - embargo period should be checked for possible release Calendar c = Calendar.getInstance(); c.setTime(CdxRestrictionService.TODAY); c.add(Calendar.YEAR, -dateBasedRule.getPeriod().getYears()); c.add(Calendar.MONTH, -dateBasedRule.getPeriod().getMonths()); c.add(Calendar.DAY_OF_YEAR, -dateBasedRule.getPeriod().getDays()); SolrQuery query = createQuery(SolrEnum.RULE + ":" + dateBasedRule.getId()); query.addFilterQuery(SolrEnum.DATE + ":[* TO " + format.format(c.getTime()) + "]"); query.addFilterQuery(notLastIndexed); processQuery(query, workLog); } // ******************* // URL based search if (urlSearchNeeded) { SolrQuery query = convertRuleToSearch(dateBasedRule, notLastIndexed); processQuery(query, workLog); } return workLog; }
From source file:bamboo.trove.rule.RuleChangeUpdateManager.java
License:Apache License
@VisibleForTesting public SolrQuery convertRuleToSearch(CdxRule rule, String notLastIndexed) { // URL complexity first List<String> urlQueries = new ArrayList<>(); for (String url : rule.getUrlPatterns()) { if (!url.trim().isEmpty()) { urlQueries.add(urlSearch(url)); }/*from w ww. j a v a 2 s . c o m*/ } if (urlQueries.isEmpty()) { urlQueries.add("*:*"); } SolrQuery query = createQuery("(" + StringUtils.join(urlQueries, ") OR (") + ")"); // Filter out stuff we have touched already this run query.addFilterQuery(notLastIndexed); // Filter for Embargo if (rule.getPeriod() != null && !rule.getPeriod().isZero()) { // TODAY +/- embargo period ZonedDateTime today = ZonedDateTime.ofInstant(CdxRestrictionService.TODAY.toInstant(), TZ); Date embargoStart = Date.from(today.minus(rule.getPeriod()).toInstant()); query.addFilterQuery(SolrEnum.DATE + ":[" + format.format(embargoStart) + " TO *]"); } // Filter for Capture date if (rule.getCaptured() != null && rule.getCaptured().hasData()) { query.addFilterQuery(SolrEnum.DATE + ":[" + format.format(rule.getCaptured().start) + " TO " + format.format(rule.getCaptured().end) + "]"); } // Worth noting we don't filter for access date because it is one of the // deciding data points in whether or not to run this query at all. return query; }
From source file:com.databasepreservation.visualization.utils.SolrUtils.java
public static <T extends Serializable> IndexResult<T> find(SolrClient index, Class<T> classToRetrieve, Filter filter, Sorter sorter, Sublist sublist, Facets facets, RodaUser user, boolean showInactive) throws GenericException, RequestNotValidException { IndexResult<T> ret;//from w w w . j a v a 2s . c o m SolrQuery query = new SolrQuery(); query.setQuery(parseFilter(filter)); query.setSorts(parseSorter(sorter)); query.setStart(sublist.getFirstElementIndex()); query.setRows(sublist.getMaximumElementCount()); parseAndConfigureFacets(facets, query); if (hasPermissionFilters(classToRetrieve)) { query.addFilterQuery(getFilterQueries(user, showInactive)); } try { QueryResponse response = index.query(getIndexName(classToRetrieve), query); ret = queryResponseToIndexResult(response, classToRetrieve, facets); } catch (SolrServerException | IOException e) { throw new GenericException("Could not query index", e); } return ret; }
From source file:com.databasepreservation.visualization.utils.SolrUtils.java
private static void parseAndConfigureFacets(Facets facets, SolrQuery query) { if (facets != null) { query.setFacetSort(FacetParams.FACET_SORT_INDEX); if (!"".equals(facets.getQuery())) { query.addFacetQuery(facets.getQuery()); }/* w ww. j a v a 2 s . c o m*/ StringBuilder filterQuery = new StringBuilder(); for (Map.Entry<String, FacetParameter> parameter : facets.getParameters().entrySet()) { FacetParameter facetParameter = parameter.getValue(); if (facetParameter instanceof SimpleFacetParameter) { setQueryFacetParameter(query, (SimpleFacetParameter) facetParameter); appendValuesUsingOROperator(filterQuery, facetParameter.getName(), ((SimpleFacetParameter) facetParameter).getValues()); } else if (facetParameter instanceof RangeFacetParameter) { LOGGER.error("Unsupported facet parameter class: {}", facetParameter.getClass().getName()); } else { LOGGER.error("Unsupported facet parameter class: {}", facetParameter.getClass().getName()); } } if (filterQuery.length() > 0) { query.addFilterQuery(filterQuery.toString()); LOGGER.trace("Query after defining facets: " + query.toString()); } } }
From source file:com.digitalpebble.storm.crawler.solr.persistence.SolrSpout.java
License:Apache License
private void populateBuffer() { // TODO Sames as the ElasticSearchSpout? // TODO Use the cursor feature? // https://cwiki.apache.org/confluence/display/solr/Pagination+of+Results SolrQuery query = new SolrQuery(); query.setQuery("*:*").addFilterQuery("nextFetchDate:[* TO NOW]").setStart(lastStartOffset) .setRows(this.bufferSize); if (StringUtils.isNotBlank(diversityField)) { query.addFilterQuery(String.format("{!collapse field=%s}", diversityField)); query.set("expand", "true").set("expand.rows", diversityBucketSize); }/*from www .j ava 2 s . c om*/ try { QueryResponse response = connection.getClient().query(query); SolrDocumentList docs = new SolrDocumentList(); if (StringUtils.isNotBlank(diversityField)) { // Add the main documents collapsed by the CollapsingQParser // plugin docs.addAll(response.getResults()); Map<String, SolrDocumentList> expandedResults = response.getExpandedResults(); for (String key : expandedResults.keySet()) { docs.addAll(expandedResults.get(key)); } } else { docs = response.getResults(); } int numhits = response.getResults().size(); // no more results? if (numhits == 0) lastStartOffset = 0; else lastStartOffset += numhits; for (SolrDocument doc : docs) { String url = (String) doc.get("url"); // is already being processed - skip it! if (beingProcessed.containsKey(url)) continue; Metadata metadata = new Metadata(); String mdAsString = (String) doc.get("metadata"); // get the serialized metadata information if (mdAsString != null) { // parse the string and generate the MD accordingly // url.path: http://www.lemonde.fr/ // depth: 1 String[] kvs = mdAsString.split("\n"); for (String pair : kvs) { String[] kv = pair.split(": "); if (kv.length != 2) { LOG.info("Invalid key value pair {}", pair); continue; } metadata.addValue(kv[0], kv[1]); } } buffer.add(new Values(url, metadata)); } } catch (Exception e) { LOG.error("Can't query Solr: {}", e); } }
From source file:com.digitalpebble.stormcrawler.solr.persistence.SolrSpout.java
License:Apache License
private void populateBuffer() { // TODO Sames as the ElasticSearchSpout? // TODO Use the cursor feature? // https://cwiki.apache.org/confluence/display/solr/Pagination+of+Results SolrQuery query = new SolrQuery(); query.setQuery("*:*").addFilterQuery("nextFetchDate:[* TO NOW]").setStart(lastStartOffset) .setRows(this.bufferSize); if (StringUtils.isNotBlank(diversityField)) { query.addFilterQuery(String.format("{!collapse field=%s}", diversityField)); query.set("expand", "true").set("expand.rows", diversityBucketSize); }/*w w w. ja v a 2s . c om*/ try { QueryResponse response = connection.getClient().query(query); SolrDocumentList docs = new SolrDocumentList(); if (StringUtils.isNotBlank(diversityField)) { // Add the main documents collapsed by the CollapsingQParser // plugin docs.addAll(response.getResults()); Map<String, SolrDocumentList> expandedResults = response.getExpandedResults(); for (String key : expandedResults.keySet()) { docs.addAll(expandedResults.get(key)); } } else { docs = response.getResults(); } int numhits = response.getResults().size(); // no more results? if (numhits == 0) lastStartOffset = 0; else lastStartOffset += numhits; String prefix = mdPrefix.concat("."); for (SolrDocument doc : docs) { String url = (String) doc.get("url"); // is already being processed - skip it! if (beingProcessed.containsKey(url)) continue; Metadata metadata = new Metadata(); Iterator<String> keyIterators = doc.getFieldNames().iterator(); while (keyIterators.hasNext()) { String key = keyIterators.next(); if (key.startsWith(prefix)) { Collection<Object> values = doc.getFieldValues(key); key = StringUtils.replace(key, prefix, "", 1); Iterator<Object> valueIterator = values.iterator(); while (valueIterator.hasNext()) { String value = (String) valueIterator.next(); metadata.addValue(key, value); } } } buffer.add(new Values(url, metadata)); } } catch (Exception e) { LOG.error("Can't query Solr: {}", e); } }