List of usage examples for org.apache.solr.client.solrj SolrQuery setRows
public SolrQuery setRows(Integer rows)
From source file:kbSRU.kbSRU.java
License:Open Source License
public void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { response.setContentType(XML_RESPONSE_HEADER); // Talkback happens in XML form. response.setCharacterEncoding("UTF-8"); // Unicode++ request.setCharacterEncoding("UTF-8"); PrintWriter out = null; // The talkback buffer. // handle startrecord Integer startRecord = 0;/*from www. j a v a2s .c o m*/ if (!(request.getParameter("startRecord") == null)) { try { startRecord = Integer.parseInt(request.getParameter("startRecord")) - 1; } catch (NumberFormatException e) { startRecord = 0; } } // maximumrecords Integer maximumRecords = Integer.parseInt(this.config.getProperty("default_maximumRecords")); if (!(request.getParameter("maximumRecords") == null)) { maximumRecords = Integer.parseInt(request.getParameter("maximumRecords")); } // operation String operation = request.getParameter("operation"); // x_collection String x_collection = request.getParameter("x-collection"); if (x_collection == null) x_collection = this.config.getProperty("default_x_collection"); if (x_collection == null) operation = null; // sortkeys String sortKeys = request.getParameter("sortKeys"); // sortorder String sortOrder = request.getParameter("sortOrder"); // recordschema String recordSchema = request.getParameter("recordSchema"); if (recordSchema == null) recordSchema = "dc"; if (recordSchema.equalsIgnoreCase("dcx")) { recordSchema = "dcx"; } if (recordSchema.equalsIgnoreCase("solr")) { recordSchema = "solr"; } // query request String query = request.getParameter("query"); String q = request.getParameter("q"); // who is requestor ? String remote_ip = request.getHeader("X-FORWARDED-FOR"); if (remote_ip == null) { remote_ip = request.getRemoteAddr().trim(); } else { remote_ip = request.getHeader("X-FORWARDED-FOR"); } // handle debug Boolean debug = Boolean.parseBoolean(request.getParameter("debug")); if (!debug) { out = new PrintWriter(new OutputStreamWriter(response.getOutputStream(), "UTF8"), true); } // handle query if ((query == null) && (q != null)) { query = q; } else { if ((query != null) && (q == null)) { q = query; } else { operation = null; } } // handle operation if (operation == null) { if (query != null) { operation = "searchRetrieve"; } else { operation = "explain"; } } // searchRetrieve if (operation.equalsIgnoreCase("searchRetrieve")) { if (query == null) { operation = "explain"; log.debug(operation + ":" + query); } } // start talking back. String[] sq = { "" }; String solrquery = ""; // facet String facet = null; List<FacetField> fct = null; if (request.getParameter("facet") != null) { facet = request.getParameter("facet"); log.debug("facet : " + facet); } if (operation == null) { operation = "searchretrieve"; } else { // explain response if (operation.equalsIgnoreCase("explain")) { log.debug("operation = explain"); out.write("<srw:explainResponse xmlns:srw=\"http://www.loc.gov/zing/srw/\">"); out.write("</srw:explainResponse>"); } else { // DEBUG routine operation = "searchretrieve"; String triplequery = null; if (query.matches(".*?\\[.+?\\].*?")) { // New symantic syntax triplequery = symantic_query(query); query = query.split("\\[")[0] + " " + triplequery; log.fatal(triplequery); solrquery = CQLtoLucene.translate(query, log, config); } else { solrquery = CQLtoLucene.translate(query, log, config); } log.debug(solrquery); if (debug == true) { response.setContentType(HTML_RESPONSE_HEADER); out = new PrintWriter(new OutputStreamWriter(response.getOutputStream(), "UTF8"), true); out.write("<html><body>\n\n"); out.write("'" + remote_ip + "'<br>\n"); out.write("<form action='http://www.kbresearch.nl/kbSRU'>"); out.write("<input type=text name=q value='" + query + "' size=120>"); out.write("<input type=hidden name=debug value=True>"); out.write("<input type=submit>"); out.write("<table border=1><tr><td>"); out.write("q</td><td>" + query + "</td></tr><tr>"); out.write("<td>query out</td><td>" + URLDecoder.decode(solrquery) + "</td></tr>"); out.write("<tr><td>SOLR_URL</td><td> <a href='" + this.config.getProperty("collection." + x_collection.toLowerCase() + ".solr_baseurl") + "/?q=" + solrquery + "'>" + this.config.getProperty("collection." + x_collection.toLowerCase() + ".solr_baseurl") + "/select/?q=" + solrquery + "</a><br>" + this.config.getProperty("solr_url") + solrquery + "</td></tr>"); out.write("<b>SOLR_QUERY</b> : <BR> <iframe width=900 height=400 src='" + this.config.getProperty("collection." + x_collection.toLowerCase() + ".solr_baseurl") + "/../?q=" + solrquery + "'></iframe><BR>"); out.write("<b>SRU_QUERY</b> : <BR> <a href=" + this.config.getProperty("baseurl") + "?q=" + query + "'>" + this.config.getProperty("baseurl") + "?q=" + query + "</a><br><iframe width=901 height=400 src='http://www.kbresearch.nl/kbSRU/?q=" + query + "'></iframe><BR>"); out.write("<br><b>JSRU_QUERY</b> : <BR><a href='http://jsru.kb.nl/sru/?query=" + query + "&x-collection=" + x_collection + "'>http://jsru.kb.nl/sru/?query=" + query + "&x-collection=GGC</a><br><iframe width=900 height=400 src='http://jsru.kb.nl/sru/?query=" + query + "&x-collection=GGC'></iframe>"); } else { // XML SearchRetrieve response String url = this.config .getProperty("collection." + x_collection.toLowerCase() + ".solr_baseurl"); String buffer = ""; CommonsHttpSolrServer server = null; server = new CommonsHttpSolrServer(url); log.fatal("URSING " + url); server.setParser(new XMLResponseParser()); int numfound = 0; try { SolrQuery do_query = new SolrQuery(); do_query.setQuery(solrquery); do_query.setRows(maximumRecords); do_query.setStart(startRecord); if ((sortKeys != null) && (sortKeys.length() > 1)) { if (sortOrder != null) { if (sortOrder.equals("asc")) { do_query.setSortField(sortKeys, SolrQuery.ORDER.asc); } if (sortOrder.equals("desc")) { do_query.setSortField(sortKeys, SolrQuery.ORDER.desc); } } else { for (String str : sortKeys.trim().split(",")) { str = str.trim(); if (str.length() > 1) { if (str.equals("date")) { do_query.setSortField("date_date", SolrQuery.ORDER.desc); log.debug("SORTORDERDEBUG | DATE! " + str + " | "); break; } else { do_query.setSortField(str + "_str", SolrQuery.ORDER.asc); log.debug("SORTORDERDEBUG | " + str + " | "); break; } } } } } if (facet != null) { if (facet.indexOf(",") > 1) { for (String str : facet.split(",")) { if (str.indexOf("date") > 1) { do_query.addFacetField(str); } else { do_query.addFacetField(str); } //do_query.setParam("facet.method", "enum"); } //q.setFacetSort(false); } else { do_query.addFacetField(facet); } do_query.setFacet(true); do_query.setFacetMinCount(1); do_query.setFacetLimit(-1); } log.fatal(solrquery); QueryResponse rsp = null; boolean do_err = false; boolean do_sugg = false; SolrDocumentList sdl = null; String diag = ""; StringBuffer suggest = new StringBuffer(""); String content = "1"; SolrQuery spellq = do_query; try { rsp = server.query(do_query); } catch (SolrServerException e) { String header = this.SRW_HEADER.replaceAll("\\$numberOfRecords", "0"); out.write(header); diag = this.SRW_DIAG.replaceAll("\\$error", e.getMessage()); do_err = true; rsp = null; } log.fatal("query done.."); if (!(do_err)) { // XML dc response SolrDocumentList docs = rsp.getResults(); numfound = (int) docs.getNumFound(); int count = startRecord; String header = this.SRW_HEADER.replaceAll("\\$numberOfRecords", Integer.toString(numfound)); out.write(header); out.write("<srw:records>"); Iterator<SolrDocument> iter = rsp.getResults().iterator(); while (iter.hasNext()) { count += 1; if (recordSchema.equalsIgnoreCase("dc")) { SolrDocument resultDoc = iter.next(); content = (String) resultDoc.getFieldValue("id"); out.write("<srw:record>"); out.write("<srw:recordPacking>xml</srw:recordPacking>"); out.write("<srw:recordSchema>info:srw/schema/1/dc-v1.1</srw:recordSchema>"); out.write( "<srw:recordData xmlns:srw_dc=\"info:srw/schema/1/dc-v1.1\" xmlns:mods=\"http://www.loc.gov/mods\" xmlns:dcterms=\"http://purl.org/dc/terms/\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xmlns:dcx=\"http://krait.kb.nl/coop/tel/handbook/telterms.html\" xmlns:dc=\"http://purl.org/dc/elements/1.1/\" xmlns:marcrel=\"http://www.loc.gov/loc.terms/relators/OTH\" xmlns:facets=\"info:srw/extension/4/facets\" >"); StringBuffer result = new StringBuffer(""); construct_lucene_dc(result, resultDoc); out.write(result.toString()); out.write("</srw:recordData>"); out.write("<srw:recordPosition>" + Integer.toString(count) + "</srw:recordPosition>"); out.write("</srw:record>"); } if (recordSchema.equalsIgnoreCase("solr")) { SolrDocument resultDoc = iter.next(); content = (String) resultDoc.getFieldValue("id"); out.write("<srw:record>"); out.write("<srw:recordPacking>xml</srw:recordPacking>"); out.write("<srw:recordSchema>info:srw/schema/1/solr</srw:recordSchema>"); out.write("<srw:recordData xmlns:expand=\"http://www.kbresearch.nl/expand\">"); StringBuffer result = new StringBuffer(""); construct_lucene_solr(result, resultDoc); out.write(result.toString()); out.write("</srw:recordData>"); out.write("<srw:recordPosition>" + Integer.toString(count) + "</srw:recordPosition>"); out.write("</srw:record>"); } if (recordSchema.equalsIgnoreCase("dcx")) { // XML dcx response out.write("<srw:record>"); out.write("<srw:recordPacking>xml</srw:recordPacking>"); out.write("<srw:recordSchema>info:srw/schema/1/dc-v1.1</srw:recordSchema>"); out.write( "<srw:recordData><srw_dc:dc xmlns:srw_dc=\"info:srw/schema/1/dc-v1.1\" xmlns:mods=\"http://www.loc.gov/mods\" xmlns:dcterms=\"http://purl.org/dc/terms/\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xmlns:dcx=\"http://krait.kb.nl/coop/tel/handbook/telterms.html\" xmlns:dc=\"http://purl.org/dc/elements/1.1/\" xmlns:marcrel=\"http://www.loc.gov/marc.relators/\" xmlns:expand=\"http://www.kbresearch.nl/expand\" xmlns:skos=\"http://www.w3.org/2004/02/skos/core#\" xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\" >"); SolrDocument resultDoc = iter.next(); content = (String) resultDoc.getFieldValue("id"); String dcx_data = helpers.getOAIdcx( "http://services.kb.nl/mdo/oai?verb=GetRecord&identifier=" + content, log); if (x_collection.equalsIgnoreCase("ggc-thes")) { dcx_data = helpers.getOAIdcx( "http://serviceso.kb.nl/mdo/oai?verb=GetRecord&identifier=" + content, log); } if (!(dcx_data.length() == 0)) { out.write(dcx_data); } else { // Should not do this!! out.write("<srw:record>"); out.write("<srw:recordPacking>xml</srw:recordPacking>"); out.write("<srw:recordSchema>info:srw/schema/1/dc-v1.1</srw:recordSchema>"); out.write( "<srw:recordData xmlns:srw_dc=\"info:srw/schema/1/dc-v1.1\" xmlns:mods=\"http://www.loc.gov/mods\" xmlns:dcterms=\"http://purl.org/dc/terms/\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xmlns:dcx=\"http://krait.kb.nl/coop/tel/handbook/telterms.html\" xmlns:dc=\"http://purl.org/dc/elements/1.1/\" xmlns:marcrel=\"http://www.loc.gov/loc.terms/relators/OTH\" >"); StringBuffer result = new StringBuffer(""); construct_lucene_dc(result, resultDoc); out.write(result.toString()); out.write("</srw:recordData>"); out.write("<srw:recordPosition>" + Integer.toString(count) + "</srw:recordPosition>"); out.write("</srw:record>"); } out.write("</srw_dc:dc>"); StringBuffer expand_data; boolean expand = false; if (content.startsWith("GGC-THES:AC:")) { String tmp_content = ""; tmp_content = content.replaceFirst("GGC-THES:AC:", ""); log.fatal("calling get"); expand_data = new StringBuffer( helpers.getExpand("http://www.kbresearch.nl/general/lod_new/get/" + tmp_content + "?format=rdf", log)); log.fatal("get finini"); if (expand_data.toString().length() > 4) { out.write( "<srw_dc:expand xmlns:srw_dc=\"info:srw/schema/1/dc-v1.1\" xmlns:expand=\"http://www.kbresearch.nl/expand\" xmlns:skos=\"http://www.w3.org/2004/02/skos/core#\" xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\" >"); out.write(expand_data.toString()); expand = true; } } else { expand_data = new StringBuffer(helpers .getExpand("http://www.kbresearch.nl/ANP.cgi?q=" + content, log)); if (expand_data.toString().length() > 0) { if (!expand) { out.write( "<srw_dc:expand xmlns:srw_dc=\"info:srw/schema/1/dc-v1.1\" xmlns:expand=\"http://www.kbresearch.nl/expand\" xmlns:skos=\"http://www.w3.org/2004/02/skos/core#\" xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\" >"); expand = true; } out.write(expand_data.toString()); } } if (expand) { out.write("</srw_dc:expand>"); } out.write("</srw:recordData>"); out.write("<srw:recordPosition>" + Integer.toString(count) + "</srw:recordPosition>"); out.write("</srw:record>"); } } } if ((do_err) || (numfound == 0)) { log.fatal("I haz suggestions"); try { spellq.setParam("spellcheck", true); spellq.setQueryType("/spell"); server = new CommonsHttpSolrServer(url); rsp = server.query(spellq); sdl = rsp.getResults(); SpellCheckResponse spell; spell = rsp.getSpellCheckResponse(); List<SpellCheckResponse.Suggestion> suggestions = spell.getSuggestions(); if (suggestions.isEmpty() == false) { suggest.append("<srw:extraResponseData>"); suggest.append("<suggestions>"); for (SpellCheckResponse.Suggestion sugg : suggestions) { suggest.append("<suggestionfor>" + sugg.getToken() + "</suggestionfor>"); for (String item : sugg.getSuggestions()) { suggest.append("<suggestion>" + item + "</suggestion>"); } suggest.append("</suggestions>"); suggest.append("</srw:extraResponseData>"); } do_sugg = true; } } catch (Exception e) { rsp = null; //log.fatal(e.toString()); } ; } ; if (!do_err) { if (facet != null) { try { fct = rsp.getFacetFields(); out.write("<srw:facets>"); for (String str : facet.split(",")) { out.write("<srw:facet>"); out.write("<srw:facetType>"); out.write(str); out.write("</srw:facetType>"); for (FacetField f : fct) { log.debug(f.getName()); //if (f.getName().equals(str+"_str") || (f.getName().equals(str+"_date")) ) { List<FacetField.Count> facetEnties = f.getValues(); for (FacetField.Count fcount : facetEnties) { out.write("<srw:facetValue>"); out.write("<srw:valueString>"); out.write(helpers.xmlEncode(fcount.getName())); out.write("</srw:valueString>"); out.write("<srw:count>"); out.write(Double.toString(fcount.getCount())); out.write("</srw:count>"); out.write("</srw:facetValue>"); // } } } out.write("</srw:facet>"); } out.write("</srw:facets>"); startRecord += 1; } catch (Exception e) { } //log.fatal(e.toString()); } } } else { out.write(diag); } out.write("</srw:records>"); // SearchRetrieve response footer String footer = this.SRW_FOOTER.replaceAll("\\$query", helpers.xmlEncode(query)); footer = footer.replaceAll("\\$startRecord", (startRecord).toString()); footer = footer.replaceAll("\\$maximumRecords", maximumRecords.toString()); footer = footer.replaceAll("\\$recordSchema", recordSchema); if (do_sugg) { out.write(suggest.toString()); } out.write(footer); } catch (MalformedURLException e) { out.write(e.getMessage()); } catch (IOException e) { out.write("TO ERR is Human"); } } } } out.close(); }
From source file:net.peacesoft.nutch.crawl.RaovatPostSignature.java
License:Apache License
public void dedup(String solrUrl, boolean noCommit) throws IOException { SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); long start = System.currentTimeMillis(); LOG.info("RaovatPostSignature: starting at " + sdf.format(start)); LOG.info("RaovatPostSignature: Solr url: " + solrUrl); JobConf job = new NutchJob(getConf()); job.set(ReSolrConstants.SERVER_URL, solrUrl); // job.setBoolean("noCommit", noCommit); // job.setInputFormat(RaovatPostSignature.SolrInputFormat.class); // job.setOutputFormat(NullOutputFormat.class); // job.setMapOutputKeyClass(Text.class); // job.setMapOutputValueClass(RaovatPostSignature.SolrRecord.class); // job.setMapperClass(IdentityMapper.class); // job.setReducerClass(RaovatPostSignature.class); ////from www .ja v a 2 s. c om // JobClient.runJob(job); solr = SolrUtils.getCommonsHttpSolrServer(job); SolrQuery solrQuery = new SolrQuery(SOLR_GET_ALL_QUERY); solrQuery.setFields(ReSolrConstants.ID_FIELD); solrQuery.setRows(1); QueryResponse response; try { response = solr.query(solrQuery); } catch (final SolrServerException e) { throw new IOException(e); } int numResults = (int) response.getResults().getNumFound(); LOG.info("Total items:" + numResults); int s = 1; int value = 200; int page = numResults / value + 1; updateRequest = new UpdateRequest(); for (int i = 0; i < page; i++) { solrQuery = new SolrQuery(SOLR_GET_ALL_QUERY); solrQuery.setStart(s); solrQuery.setRows(value); try { response = solr.query(solrQuery); } catch (final SolrServerException e) { throw new IOException(e); } SolrDocumentList solrDocs = response.getResults(); for (SolrDocument solrDocument : solrDocs) { updateRequest.add(ClientUtils.toSolrInputDocument(solrDocument)); } s += value; } try { solr.commit(); } catch (SolrServerException ex) { } long end = System.currentTimeMillis(); LOG.info("RaovatPostSignature: finished at " + sdf.format(end) + ", elapsed: " + TimingUtil.elapsedTime(start, end)); }
From source file:net.yacy.cora.federate.solr.connector.AbstractSolrConnector.java
License:Open Source License
public static SolrQuery getSolrQuery(final String querystring, final String sort, final int offset, final int count, final String... fields) { // construct query final SolrQuery params = new SolrQuery(); //if (count < 2 && querystring.startsWith("{!raw f=")) { // params.setQuery("*:*"); // params.addFilterQuery(querystring); //} else {// w ww . j a v a 2 s . co m params.setQuery(querystring); //} params.clearSorts(); if (sort != null) { params.set(CommonParams.SORT, sort); } params.setRows(count); params.setStart(offset); params.setFacet(false); if (fields != null && fields.length > 0) params.setFields(fields); params.setIncludeScore(false); params.setParam("defType", "edismax"); params.setParam(DisMaxParams.QF, CollectionSchema.text_t.getSolrFieldName() + "^1.0"); return params; }
From source file:net.yacy.cora.federate.solr.connector.AbstractSolrConnector.java
License:Open Source License
/** * check if a given document, identified by url hash as document id exists * @param id the url hash and document id * @return metadata if any entry in solr exists, null otherwise * @throws IOException//from w ww.j a v a2 s.c om */ @Override public LoadTimeURL getLoadTimeURL(String id) throws IOException { // construct raw query final SolrQuery params = new SolrQuery(); //params.setQuery(CollectionSchema.id.getSolrFieldName() + ":\"" + id + "\""); String q = "{!cache=false raw f=" + CollectionSchema.id.getSolrFieldName() + "}" + id; params.setQuery(q); params.setRows(1); params.setStart(0); params.setFacet(false); params.clearSorts(); params.setFields(CollectionSchema.id.getSolrFieldName(), CollectionSchema.sku.getSolrFieldName(), CollectionSchema.load_date_dt.getSolrFieldName()); params.setIncludeScore(false); // query the server final SolrDocumentList sdl = getDocumentListByParams(params); if (sdl == null || sdl.getNumFound() <= 0) return null; SolrDocument doc = sdl.iterator().next(); LoadTimeURL md = getLoadTimeURL(doc); return md; }
From source file:net.yacy.cora.federate.solr.connector.AbstractSolrConnector.java
License:Open Source License
/** * get the number of results when this query is done. * This should only be called if the actual result is never used, and only the count is interesting * @param querystring//from w ww .j a va2s . c om * @return the number of results for this query */ @Override public long getCountByQuery(String querystring) throws IOException { // construct query final SolrQuery params = new SolrQuery(); params.setQuery(querystring); params.setRows(0); // essential to just get count params.setStart(0); params.setFacet(false); params.clearSorts(); params.setFields(CollectionSchema.id.getSolrFieldName()); params.setIncludeScore(false); // query the server final SolrDocumentList sdl = getDocumentListByParams(params); return sdl == null ? 0 : sdl.getNumFound(); }
From source file:net.yacy.cora.federate.solr.connector.AbstractSolrConnector.java
License:Open Source License
/** * get facets of the index: a list of lists with values that are most common in a specific field * @param query a query which is performed to get the facets * @param fields the field names which are selected as facet * @param maxresults the maximum size of the resulting maps * @return a map with key = facet field name, value = an ordered map of field values for that field * @throws IOException// w w w. j a v a 2 s . c om */ @Override public LinkedHashMap<String, ReversibleScoreMap<String>> getFacets(String query, int maxresults, final String... fields) throws IOException { // construct query assert fields.length > 0; final SolrQuery params = new SolrQuery(); params.setQuery(query); params.setRows(0); params.setStart(0); params.setFacet(true); params.setFacetMinCount(1); // there are many 0-count facets in the uninverted index cache params.setFacetLimit(maxresults); params.setFacetSort(FacetParams.FACET_SORT_COUNT); params.setParam(FacetParams.FACET_METHOD, FacetParams.FACET_METHOD_fc /*FACET_METHOD_fcs*/); params.setFields(fields); params.clearSorts(); params.setIncludeScore(false); for (String field : fields) params.addFacetField(field); // query the server QueryResponse rsp = getResponseByParams(params); LinkedHashMap<String, ReversibleScoreMap<String>> facets = new LinkedHashMap<String, ReversibleScoreMap<String>>( fields.length); for (String field : fields) { FacetField facet = rsp.getFacetField(field); ReversibleScoreMap<String> result = new ClusteredScoreMap<String>(UTF8.insensitiveUTF8Comparator); List<Count> values = facet.getValues(); if (values == null) continue; for (Count ff : values) if (ff.getCount() > 0) result.set(ff.getName(), (int) ff.getCount()); facets.put(field, result); } return facets; }
From source file:net.yacy.cora.federate.solr.connector.AbstractSolrConnector.java
License:Open Source License
@Override public SolrDocument getDocumentById(final String id, final String... fields) throws IOException { assert id.length() == Word.commonHashLength : "wrong id: " + id; final SolrQuery query = new SolrQuery(); // construct query StringBuilder sb = new StringBuilder(23); sb.append("{!cache=false raw f=").append(CollectionSchema.id.getSolrFieldName()).append('}').append(id); query.setQuery(sb.toString());//from ww w .j a v a 2s . co m //query.setQuery("*:*"); //query.addFilterQuery(sb.toString()); query.clearSorts(); query.setRows(1); query.setStart(0); if (fields != null && fields.length > 0) query.setFields(fields); query.setIncludeScore(false); // query the server try { final SolrDocumentList docs = getDocumentListByParams(query); if (docs == null || docs.isEmpty()) return null; SolrDocument doc = docs.get(0); return doc; } catch (final Throwable e) { clearCaches(); // we clear the in case that this is caused by OOM throw new IOException(e.getMessage(), e); } }
From source file:net.yacy.peers.Protocol.java
License:Open Source License
protected static int solrQuery(final SearchEvent event, final SolrQuery solrQuery, final int offset, final int count, final Seed target, final int partitions, final Blacklist blacklist) { if (event.query.getQueryGoal().getQueryString(false) == null || event.query.getQueryGoal().getQueryString(false).length() == 0) { return -1; // we cannot query solr only with word hashes, there is no clear text string }/*from w ww .j a v a2s. c om*/ event.addExpectedRemoteReferences(count); if (partitions > 0) solrQuery.set("partitions", partitions); solrQuery.setStart(offset); solrQuery.setRows(count); // set highlighting query attributes if (event.query.contentdom == Classification.ContentDomain.TEXT || event.query.contentdom == Classification.ContentDomain.ALL) { solrQuery.setHighlight(true); solrQuery.setHighlightFragsize(SearchEvent.SNIPPET_MAX_LENGTH); //solrQuery.setHighlightRequireFieldMatch(); solrQuery.setHighlightSimplePost("</b>"); solrQuery.setHighlightSimplePre("<b>"); solrQuery.setHighlightSnippets(5); for (CollectionSchema field : snippetFields) solrQuery.addHighlightField(field.getSolrFieldName()); } else { solrQuery.setHighlight(false); } boolean localsearch = target == null || target.equals(event.peers.mySeed()); Map<String, ReversibleScoreMap<String>> facets = new HashMap<String, ReversibleScoreMap<String>>( event.query.facetfields.size()); Map<String, LinkedHashSet<String>> snippets = new HashMap<String, LinkedHashSet<String>>(); // this will be a list of urlhash-snippet entries final QueryResponse[] rsp = new QueryResponse[] { null }; final SolrDocumentList[] docList = new SolrDocumentList[] { null }; String ip = target.getIP(); {// encapsulate expensive solr QueryResponse object if (localsearch && !Switchboard.getSwitchboard() .getConfigBool(SwitchboardConstants.DEBUG_SEARCH_REMOTE_SOLR_TESTLOCAL, false)) { // search the local index try { SolrConnector sc = event.getQuery().getSegment().fulltext().getDefaultConnector(); if (!sc.isClosed()) { rsp[0] = sc.getResponseByParams(solrQuery); docList[0] = rsp[0].getResults(); } } catch (final Throwable e) { Network.log.info("SEARCH failed (solr), localpeer (" + e.getMessage() + ")", e); return -1; } } else { try { final boolean myseed = target == event.peers.mySeed(); if (!myseed && !target.getFlagSolrAvailable()) { // skip if peer.dna has flag that last try resulted in error Network.log.info("SEARCH skip (solr), remote Solr interface not accessible, peer=" + target.getName()); return -1; } final String address = myseed ? "localhost:" + target.getPort() : target.getPublicAddress(ip); final int solrtimeout = Switchboard.getSwitchboard() .getConfigInt(SwitchboardConstants.FEDERATED_SERVICE_SOLR_INDEXING_TIMEOUT, 6000); Thread remoteRequest = new Thread() { @Override public void run() { this.setName("Protocol.solrQuery(" + solrQuery.getQuery() + " to " + target.hash + ")"); try { RemoteInstance instance = new RemoteInstance("http://" + address, null, "solr", solrtimeout); // this is a 'patch configuration' which considers 'solr' as default collection try { SolrConnector solrConnector = new RemoteSolrConnector(instance, myseed ? true : target.getVersion() >= 1.63, "solr"); if (!solrConnector.isClosed()) try { rsp[0] = solrConnector.getResponseByParams(solrQuery); docList[0] = rsp[0].getResults(); } catch (Throwable e) { } finally { solrConnector.close(); } } catch (Throwable ee) { } finally { instance.close(); } } catch (Throwable eee) { } } }; remoteRequest.start(); remoteRequest.join(solrtimeout); // just wait until timeout appears if (remoteRequest.isAlive()) { try { remoteRequest.interrupt(); } catch (Throwable e) { } Network.log.info("SEARCH failed (solr), remote Peer: " + target.getName() + "/" + target.getPublicAddress(ip) + " does not answer (time-out)"); target.setFlagSolrAvailable(false || myseed); return -1; // give up, leave remoteRequest abandoned. } // no need to close this here because that sends a commit to remote solr which is not wanted here } catch (final Throwable e) { Network.log.info("SEARCH failed (solr), remote Peer: " + target.getName() + "/" + target.getPublicAddress(ip) + " (" + e.getMessage() + ")"); target.setFlagSolrAvailable(false || localsearch); return -1; } } if (rsp[0] == null || docList[0] == null) { Network.log.info("SEARCH failed (solr), remote Peer: " + target.getName() + "/" + target.getPublicAddress(ip) + " returned null"); target.setFlagSolrAvailable(false || localsearch); return -1; } // evaluate facets for (String field : event.query.facetfields) { FacetField facet = rsp[0].getFacetField(field); ReversibleScoreMap<String> result = new ClusteredScoreMap<String>(UTF8.insensitiveUTF8Comparator); List<Count> values = facet == null ? null : facet.getValues(); if (values == null) continue; for (Count ff : values) { int c = (int) ff.getCount(); if (c == 0) continue; if (ff.getName().length() == 0) continue; // facet entry without text is not useful result.set(ff.getName(), c); } if (result.size() > 0) facets.put(field, result); } // evaluate snippets Map<String, Map<String, List<String>>> rawsnippets = rsp[0].getHighlighting(); // a map from the urlhash to a map with key=field and value = list of snippets if (rawsnippets != null) { nextsnippet: for (Map.Entry<String, Map<String, List<String>>> re : rawsnippets.entrySet()) { Map<String, List<String>> rs = re.getValue(); for (CollectionSchema field : snippetFields) { if (rs.containsKey(field.getSolrFieldName())) { List<String> s = rs.get(field.getSolrFieldName()); if (s.size() > 0) { LinkedHashSet<String> ls = new LinkedHashSet<String>(); ls.addAll(s); snippets.put(re.getKey(), ls); continue nextsnippet; } } } // no snippet found :( --we don't assign a value here by default; that can be done as an evaluation outside this method } } rsp[0] = null; } // evaluate result if (docList == null || docList[0].size() == 0) { Network.log.info("SEARCH (solr), returned 0 out of 0 documents from " + (target == null ? "shard" : ("peer " + target.hash + ":" + target.getName())) + " query = " + solrQuery.toString()); return 0; } List<URIMetadataNode> container = new ArrayList<URIMetadataNode>(); Network.log.info("SEARCH (solr), returned " + docList[0].size() + " out of " + docList[0].getNumFound() + " documents and " + facets.size() + " facets " + facets.keySet().toString() + " from " + (target == null ? "shard" : ("peer " + target.hash + ":" + target.getName()))); int term = count; Collection<SolrInputDocument> docs; if (event.addResultsToLocalIndex) { // only needed to store remote results docs = new ArrayList<SolrInputDocument>(docList[0].size()); } else docs = null; for (final SolrDocument doc : docList[0]) { if (term-- <= 0) { break; // do not process more that requested (in case that evil peers fill us up with rubbish) } // get one single search result if (doc == null) { continue; } URIMetadataNode urlEntry = new URIMetadataNode(doc); if (blacklist.isListed(BlacklistType.SEARCH, urlEntry.url())) { if (Network.log.isInfo()) { if (localsearch) { Network.log.info("local search (solr): filtered blacklisted url " + urlEntry.url()); } else { Network.log.info("remote search (solr): filtered blacklisted url " + urlEntry.url() + " from " + (target == null ? "shard" : ("peer " + target.hash + ":" + target.getName()))); } } continue; // block with blacklist } final String urlRejectReason = Switchboard.getSwitchboard().crawlStacker .urlInAcceptedDomain(urlEntry.url()); if (urlRejectReason != null) { if (Network.log.isInfo()) { if (localsearch) { Network.log.info("local search (solr): rejected url '" + urlEntry.url() + "' (" + urlRejectReason + ")"); } else { Network.log.info("remote search (solr): rejected url '" + urlEntry.url() + "' (" + urlRejectReason + ") from peer " + target.getName()); } } continue; // reject url outside of our domain } // passed all checks, store url if (!localsearch) { // put the remote documents to the local index. We must convert the solr document to a solr input document: if (event.addResultsToLocalIndex) { final SolrInputDocument sid = event.query.getSegment().fulltext().getDefaultConfiguration() .toSolrInputDocument(doc); // the input document stays untouched because it contains top-level cloned objects docs.add(sid); // will be stored to index, and is a full solr document, can be added to firstseen event.query.getSegment().setFirstSeenTime(urlEntry.hash(), Math.min(urlEntry.moddate().getTime(), System.currentTimeMillis())); } // after this conversion we can remove the largest and not used field text_t and synonyms_sxt from the document // because that goes into a search cache and would take a lot of memory in the search cache //doc.removeFields(CollectionSchema.text_t.getSolrFieldName()); doc.removeFields(CollectionSchema.synonyms_sxt.getSolrFieldName()); ResultURLs.stack(ASCII.String(urlEntry.url().hash()), urlEntry.url().getHost(), event.peers.mySeed().hash.getBytes(), UTF8.getBytes(target.hash), EventOrigin.QUERIES); } // add the url entry to the word indexes container.add(urlEntry); } final int dls = docList[0].size(); final int numFound = (int) docList[0].getNumFound(); docList[0].clear(); docList[0] = null; if (localsearch) { event.addNodes(container, facets, snippets, true, "localpeer", numFound); event.addFinalize(); event.addExpectedRemoteReferences(-count); Network.log.info( "local search (solr): localpeer sent " + container.size() + "/" + numFound + " references"); } else { if (event.addResultsToLocalIndex) { for (SolrInputDocument doc : docs) { event.query.getSegment().putDocument(doc); } docs.clear(); docs = null; } event.addNodes(container, facets, snippets, false, target.getName() + "/" + target.hash, numFound); event.addFinalize(); event.addExpectedRemoteReferences(-count); Network.log.info("remote search (solr): peer " + target.getName() + " sent " + (container.size() == 0 ? 0 : container.size()) + "/" + numFound + " references"); } return dls; }
From source file:net.yacy.search.index.ErrorCache.java
License:Open Source License
public ErrorCache(final Fulltext fulltext) { this.fulltext = fulltext; this.cache = new LinkedHashMap<String, CollectionConfiguration.FailDoc>(); // concurrently fill stack with latest values new Thread() { @Override/*from w w w . ja v a 2 s . c o m*/ public void run() { final SolrQuery params = new SolrQuery(); params.setParam("defType", "edismax"); params.setStart(0); params.setRows(1000); params.setFacet(false); params.setSort( new SortClause(CollectionSchema.last_modified.getSolrFieldName(), SolrQuery.ORDER.desc)); params.setFields(CollectionSchema.id.getSolrFieldName()); params.setQuery( CollectionSchema.failreason_s.getSolrFieldName() + AbstractSolrConnector.CATCHALL_DTERM); params.set(CommonParams.DF, CollectionSchema.id.getSolrFieldName()); // DisMaxParams.QF or CommonParams.DF must be given SolrDocumentList docList; try { docList = fulltext.getDefaultConnector().getDocumentListByParams(params); if (docList != null) for (int i = docList.size() - 1; i >= 0; i--) { SolrDocument doc = docList.get(i); String hash = (String) doc.getFieldValue(CollectionSchema.id.getSolrFieldName()); cache.put(hash, null); } } catch (IOException e) { ConcurrentLog.logException(e); } } }.start(); }
From source file:net.yacy.search.index.ErrorCacheFiller.java
License:Open Source License
/** * Fills the error cache with recently failed document hashes found in the index *///from www .j ava 2 s. c om @Override public void run() { final SolrQuery params = new SolrQuery(); params.setParam("defType", "edismax"); params.setStart(0); params.setRows(1000); params.setFacet(false); params.setSort(new SortClause(CollectionSchema.load_date_dt.getSolrFieldName(), SolrQuery.ORDER.desc)); // load_date_dt = faildate params.setFields(CollectionSchema.id.getSolrFieldName()); params.setQuery(CollectionSchema.failreason_s.getSolrFieldName() + AbstractSolrConnector.CATCHALL_DTERM); params.set(CommonParams.DF, CollectionSchema.id.getSolrFieldName()); // DisMaxParams.QF or CommonParams.DF must be given SolrDocumentList docList; try { docList = this.sb.index.fulltext().getDefaultConnector().getDocumentListByParams(params); if (docList != null) for (int i = docList.size() - 1; i >= 0; i--) { SolrDocument doc = docList.get(i); String hash = (String) doc.getFieldValue(CollectionSchema.id.getSolrFieldName()); cache.putHashOnly(hash); } } catch (IOException e) { ConcurrentLog.logException(e); } }