List of usage examples for org.apache.solr.client.solrj.response QueryResponse getExpandedResults
public Map<String, SolrDocumentList> getExpandedResults()
From source file:com.digitalpebble.storm.crawler.solr.persistence.SolrSpout.java
License:Apache License
private void populateBuffer() { // TODO Sames as the ElasticSearchSpout? // TODO Use the cursor feature? // https://cwiki.apache.org/confluence/display/solr/Pagination+of+Results SolrQuery query = new SolrQuery(); query.setQuery("*:*").addFilterQuery("nextFetchDate:[* TO NOW]").setStart(lastStartOffset) .setRows(this.bufferSize); if (StringUtils.isNotBlank(diversityField)) { query.addFilterQuery(String.format("{!collapse field=%s}", diversityField)); query.set("expand", "true").set("expand.rows", diversityBucketSize); }/* w ww . ja v a 2 s . c o m*/ try { QueryResponse response = connection.getClient().query(query); SolrDocumentList docs = new SolrDocumentList(); if (StringUtils.isNotBlank(diversityField)) { // Add the main documents collapsed by the CollapsingQParser // plugin docs.addAll(response.getResults()); Map<String, SolrDocumentList> expandedResults = response.getExpandedResults(); for (String key : expandedResults.keySet()) { docs.addAll(expandedResults.get(key)); } } else { docs = response.getResults(); } int numhits = response.getResults().size(); // no more results? if (numhits == 0) lastStartOffset = 0; else lastStartOffset += numhits; for (SolrDocument doc : docs) { String url = (String) doc.get("url"); // is already being processed - skip it! if (beingProcessed.containsKey(url)) continue; Metadata metadata = new Metadata(); String mdAsString = (String) doc.get("metadata"); // get the serialized metadata information if (mdAsString != null) { // parse the string and generate the MD accordingly // url.path: http://www.lemonde.fr/ // depth: 1 String[] kvs = mdAsString.split("\n"); for (String pair : kvs) { String[] kv = pair.split(": "); if (kv.length != 2) { LOG.info("Invalid key value pair {}", pair); continue; } metadata.addValue(kv[0], kv[1]); } } buffer.add(new Values(url, metadata)); } } catch (Exception e) { LOG.error("Can't query Solr: {}", e); } }
From source file:com.digitalpebble.stormcrawler.solr.persistence.SolrSpout.java
License:Apache License
private void populateBuffer() { // TODO Sames as the ElasticSearchSpout? // TODO Use the cursor feature? // https://cwiki.apache.org/confluence/display/solr/Pagination+of+Results SolrQuery query = new SolrQuery(); query.setQuery("*:*").addFilterQuery("nextFetchDate:[* TO NOW]").setStart(lastStartOffset) .setRows(this.bufferSize); if (StringUtils.isNotBlank(diversityField)) { query.addFilterQuery(String.format("{!collapse field=%s}", diversityField)); query.set("expand", "true").set("expand.rows", diversityBucketSize); }/*from ww w . ja v a 2 s . com*/ try { QueryResponse response = connection.getClient().query(query); SolrDocumentList docs = new SolrDocumentList(); if (StringUtils.isNotBlank(diversityField)) { // Add the main documents collapsed by the CollapsingQParser // plugin docs.addAll(response.getResults()); Map<String, SolrDocumentList> expandedResults = response.getExpandedResults(); for (String key : expandedResults.keySet()) { docs.addAll(expandedResults.get(key)); } } else { docs = response.getResults(); } int numhits = response.getResults().size(); // no more results? if (numhits == 0) lastStartOffset = 0; else lastStartOffset += numhits; String prefix = mdPrefix.concat("."); for (SolrDocument doc : docs) { String url = (String) doc.get("url"); // is already being processed - skip it! if (beingProcessed.containsKey(url)) continue; Metadata metadata = new Metadata(); Iterator<String> keyIterators = doc.getFieldNames().iterator(); while (keyIterators.hasNext()) { String key = keyIterators.next(); if (key.startsWith(prefix)) { Collection<Object> values = doc.getFieldValues(key); key = StringUtils.replace(key, prefix, "", 1); Iterator<Object> valueIterator = values.iterator(); while (valueIterator.hasNext()) { String value = (String) valueIterator.next(); metadata.addValue(key, value); } } } buffer.add(new Values(url, metadata)); } } catch (Exception e) { LOG.error("Can't query Solr: {}", e); } }