Example usage for org.apache.solr.client.solrj SolrQuery setRows

List of usage examples for org.apache.solr.client.solrj SolrQuery setRows

Introduction

In this page you can find the example usage for org.apache.solr.client.solrj SolrQuery setRows.

Prototype

public SolrQuery setRows(Integer rows) 

Source Link

Usage

From source file:kbSRU.kbSRU.java

License:Open Source License

public void doGet(HttpServletRequest request, HttpServletResponse response)
        throws ServletException, IOException {
    response.setContentType(XML_RESPONSE_HEADER); // Talkback happens in XML form.
    response.setCharacterEncoding("UTF-8"); // Unicode++
    request.setCharacterEncoding("UTF-8");

    PrintWriter out = null; // The talkback buffer.

    // handle startrecord 
    Integer startRecord = 0;/*from www.  j  a v a2s .c  o  m*/

    if (!(request.getParameter("startRecord") == null)) {
        try {
            startRecord = Integer.parseInt(request.getParameter("startRecord")) - 1;
        } catch (NumberFormatException e) {
            startRecord = 0;
        }
    }

    // maximumrecords
    Integer maximumRecords = Integer.parseInt(this.config.getProperty("default_maximumRecords"));
    if (!(request.getParameter("maximumRecords") == null)) {
        maximumRecords = Integer.parseInt(request.getParameter("maximumRecords"));
    }

    // operation 
    String operation = request.getParameter("operation");

    // x_collection
    String x_collection = request.getParameter("x-collection");
    if (x_collection == null)
        x_collection = this.config.getProperty("default_x_collection");
    if (x_collection == null)
        operation = null;

    // sortkeys
    String sortKeys = request.getParameter("sortKeys");

    // sortorder
    String sortOrder = request.getParameter("sortOrder");

    // recordschema 
    String recordSchema = request.getParameter("recordSchema");
    if (recordSchema == null)
        recordSchema = "dc";

    if (recordSchema.equalsIgnoreCase("dcx")) {
        recordSchema = "dcx";
    }

    if (recordSchema.equalsIgnoreCase("solr")) {
        recordSchema = "solr";
    }

    // query request 
    String query = request.getParameter("query");
    String q = request.getParameter("q");

    // who is requestor ?
    String remote_ip = request.getHeader("X-FORWARDED-FOR");

    if (remote_ip == null) {
        remote_ip = request.getRemoteAddr().trim();
    } else {
        remote_ip = request.getHeader("X-FORWARDED-FOR");
    }

    // handle debug 
    Boolean debug = Boolean.parseBoolean(request.getParameter("debug"));
    if (!debug) {
        out = new PrintWriter(new OutputStreamWriter(response.getOutputStream(), "UTF8"), true);
    }

    // handle query
    if ((query == null) && (q != null)) {
        query = q;
    } else {
        if ((query != null) && (q == null)) {
            q = query;
        } else {
            operation = null;
        }
    }

    // handle operation
    if (operation == null) {
        if (query != null) {
            operation = "searchRetrieve";
        } else {
            operation = "explain";
        }
    }

    //  searchRetrieve 
    if (operation.equalsIgnoreCase("searchRetrieve")) {
        if (query == null) {
            operation = "explain";
            log.debug(operation + ":" + query);
        }
    }

    // start talking back.
    String[] sq = { "" };
    String solrquery = "";

    // facet

    String facet = null;
    List<FacetField> fct = null;

    if (request.getParameter("facet") != null) {
        facet = request.getParameter("facet");
        log.debug("facet : " + facet);
    }

    if (operation == null) {
        operation = "searchretrieve";
    } else { // explain response
        if (operation.equalsIgnoreCase("explain")) {
            log.debug("operation = explain");
            out.write("<srw:explainResponse xmlns:srw=\"http://www.loc.gov/zing/srw/\">");
            out.write("</srw:explainResponse>");
        } else { // DEBUG routine
            operation = "searchretrieve";

            String triplequery = null;

            if (query.matches(".*?\\[.+?\\].*?")) { // New symantic syntax
                triplequery = symantic_query(query);
                query = query.split("\\[")[0] + " " + triplequery;
                log.fatal(triplequery);

                solrquery = CQLtoLucene.translate(query, log, config);
            } else {
                solrquery = CQLtoLucene.translate(query, log, config);
            }
            log.debug(solrquery);

            if (debug == true) {
                response.setContentType(HTML_RESPONSE_HEADER);
                out = new PrintWriter(new OutputStreamWriter(response.getOutputStream(), "UTF8"), true);
                out.write("<html><body>\n\n");
                out.write("'" + remote_ip + "'<br>\n");
                out.write("<form action='http://www.kbresearch.nl/kbSRU'>");
                out.write("<input type=text name=q value='" + query + "' size=120>");
                out.write("<input type=hidden name=debug value=True>");
                out.write("<input type=submit>");
                out.write("<table border=1><tr><td>");
                out.write("q</td><td>" + query + "</td></tr><tr>");
                out.write("<td>query out</td><td>" + URLDecoder.decode(solrquery) + "</td></tr>");
                out.write("<tr><td>SOLR_URL</td><td> <a href='"
                        + this.config.getProperty("collection." + x_collection.toLowerCase() + ".solr_baseurl")
                        + "/?q=" + solrquery + "'>"
                        + this.config.getProperty("collection." + x_collection.toLowerCase() + ".solr_baseurl")
                        + "/select/?q=" + solrquery + "</a><br>" + this.config.getProperty("solr_url")
                        + solrquery + "</td></tr>");
                out.write("<b>SOLR_QUERY</b> : <BR> <iframe width=900 height=400 src='"
                        + this.config.getProperty("collection." + x_collection.toLowerCase() + ".solr_baseurl")
                        + "/../?q=" + solrquery + "'></iframe><BR>");
                out.write("<b>SRU_QUERY</b> : <BR> <a href=" + this.config.getProperty("baseurl") + "?q="
                        + query + "'>" + this.config.getProperty("baseurl") + "?q=" + query
                        + "</a><br><iframe width=901 height=400 src='http://www.kbresearch.nl/kbSRU/?q=" + query
                        + "'></iframe><BR>");
                out.write("<br><b>JSRU_QUERY</b> : <BR><a href='http://jsru.kb.nl/sru/?query=" + query
                        + "&x-collection=" + x_collection + "'>http://jsru.kb.nl/sru/?query=" + query
                        + "&x-collection=GGC</a><br><iframe width=900 height=400 src='http://jsru.kb.nl/sru/?query="
                        + query + "&x-collection=GGC'></iframe>");

            } else { // XML SearchRetrieve response
                String url = this.config
                        .getProperty("collection." + x_collection.toLowerCase() + ".solr_baseurl");
                String buffer = "";
                CommonsHttpSolrServer server = null;
                server = new CommonsHttpSolrServer(url);
                log.fatal("URSING " + url);
                server.setParser(new XMLResponseParser());
                int numfound = 0;
                try {
                    SolrQuery do_query = new SolrQuery();
                    do_query.setQuery(solrquery);
                    do_query.setRows(maximumRecords);
                    do_query.setStart(startRecord);

                    if ((sortKeys != null) && (sortKeys.length() > 1)) {
                        if (sortOrder != null) {
                            if (sortOrder.equals("asc")) {
                                do_query.setSortField(sortKeys, SolrQuery.ORDER.asc);
                            }
                            if (sortOrder.equals("desc")) {
                                do_query.setSortField(sortKeys, SolrQuery.ORDER.desc);
                            }
                        } else {
                            for (String str : sortKeys.trim().split(",")) {
                                str = str.trim();
                                if (str.length() > 1) {
                                    if (str.equals("date")) {
                                        do_query.setSortField("date_date", SolrQuery.ORDER.desc);
                                        log.debug("SORTORDERDEBUG | DATE! " + str + " | ");
                                        break;
                                    } else {
                                        do_query.setSortField(str + "_str", SolrQuery.ORDER.asc);
                                        log.debug("SORTORDERDEBUG | " + str + " | ");
                                        break;
                                    }
                                }
                            }
                        }
                    }

                    if (facet != null) {
                        if (facet.indexOf(",") > 1) {
                            for (String str : facet.split(",")) {
                                if (str.indexOf("date") > 1) {
                                    do_query.addFacetField(str);
                                } else {
                                    do_query.addFacetField(str);
                                }
                                //do_query.setParam("facet.method", "enum");
                            }
                            //q.setFacetSort(false); 
                        } else {
                            do_query.addFacetField(facet);
                        }
                        do_query.setFacet(true);
                        do_query.setFacetMinCount(1);
                        do_query.setFacetLimit(-1);
                    }

                    log.fatal(solrquery);

                    QueryResponse rsp = null;
                    boolean do_err = false;
                    boolean do_sugg = false;
                    SolrDocumentList sdl = null;
                    String diag = "";
                    StringBuffer suggest = new StringBuffer("");

                    String content = "1";

                    SolrQuery spellq = do_query;
                    try {
                        rsp = server.query(do_query);
                    } catch (SolrServerException e) {
                        String header = this.SRW_HEADER.replaceAll("\\$numberOfRecords", "0");
                        out.write(header);
                        diag = this.SRW_DIAG.replaceAll("\\$error", e.getMessage());
                        do_err = true;
                        rsp = null;
                    }

                    log.fatal("query done..");
                    if (!(do_err)) { // XML dc response

                        SolrDocumentList docs = rsp.getResults();
                        numfound = (int) docs.getNumFound();
                        int count = startRecord;
                        String header = this.SRW_HEADER.replaceAll("\\$numberOfRecords",
                                Integer.toString(numfound));
                        out.write(header);
                        out.write("<srw:records>");

                        Iterator<SolrDocument> iter = rsp.getResults().iterator();

                        while (iter.hasNext()) {
                            count += 1;
                            if (recordSchema.equalsIgnoreCase("dc")) {
                                SolrDocument resultDoc = iter.next();
                                content = (String) resultDoc.getFieldValue("id");
                                out.write("<srw:record>");
                                out.write("<srw:recordPacking>xml</srw:recordPacking>");
                                out.write("<srw:recordSchema>info:srw/schema/1/dc-v1.1</srw:recordSchema>");
                                out.write(
                                        "<srw:recordData xmlns:srw_dc=\"info:srw/schema/1/dc-v1.1\" xmlns:mods=\"http://www.loc.gov/mods\" xmlns:dcterms=\"http://purl.org/dc/terms/\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xmlns:dcx=\"http://krait.kb.nl/coop/tel/handbook/telterms.html\" xmlns:dc=\"http://purl.org/dc/elements/1.1/\" xmlns:marcrel=\"http://www.loc.gov/loc.terms/relators/OTH\" xmlns:facets=\"info:srw/extension/4/facets\" >");
                                StringBuffer result = new StringBuffer("");

                                construct_lucene_dc(result, resultDoc);

                                out.write(result.toString());
                                out.write("</srw:recordData>");
                                out.write("<srw:recordPosition>" + Integer.toString(count)
                                        + "</srw:recordPosition>");
                                out.write("</srw:record>");
                            }

                            if (recordSchema.equalsIgnoreCase("solr")) {
                                SolrDocument resultDoc = iter.next();
                                content = (String) resultDoc.getFieldValue("id");
                                out.write("<srw:record>");
                                out.write("<srw:recordPacking>xml</srw:recordPacking>");
                                out.write("<srw:recordSchema>info:srw/schema/1/solr</srw:recordSchema>");
                                out.write("<srw:recordData xmlns:expand=\"http://www.kbresearch.nl/expand\">");
                                StringBuffer result = new StringBuffer("");
                                construct_lucene_solr(result, resultDoc);
                                out.write(result.toString());

                                out.write("</srw:recordData>");
                                out.write("<srw:recordPosition>" + Integer.toString(count)
                                        + "</srw:recordPosition>");
                                out.write("</srw:record>");
                            }

                            if (recordSchema.equalsIgnoreCase("dcx")) { // XML dcx response
                                out.write("<srw:record>");
                                out.write("<srw:recordPacking>xml</srw:recordPacking>");
                                out.write("<srw:recordSchema>info:srw/schema/1/dc-v1.1</srw:recordSchema>");
                                out.write(
                                        "<srw:recordData><srw_dc:dc xmlns:srw_dc=\"info:srw/schema/1/dc-v1.1\" xmlns:mods=\"http://www.loc.gov/mods\" xmlns:dcterms=\"http://purl.org/dc/terms/\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xmlns:dcx=\"http://krait.kb.nl/coop/tel/handbook/telterms.html\" xmlns:dc=\"http://purl.org/dc/elements/1.1/\" xmlns:marcrel=\"http://www.loc.gov/marc.relators/\" xmlns:expand=\"http://www.kbresearch.nl/expand\" xmlns:skos=\"http://www.w3.org/2004/02/skos/core#\" xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\" >");
                                SolrDocument resultDoc = iter.next();
                                content = (String) resultDoc.getFieldValue("id");

                                String dcx_data = helpers.getOAIdcx(
                                        "http://services.kb.nl/mdo/oai?verb=GetRecord&identifier=" + content,
                                        log);
                                if (x_collection.equalsIgnoreCase("ggc-thes")) {
                                    dcx_data = helpers.getOAIdcx(
                                            "http://serviceso.kb.nl/mdo/oai?verb=GetRecord&identifier="
                                                    + content,
                                            log);
                                }

                                if (!(dcx_data.length() == 0)) {
                                    out.write(dcx_data);
                                } else {
                                    // Should not do this!!

                                    out.write("<srw:record>");
                                    out.write("<srw:recordPacking>xml</srw:recordPacking>");
                                    out.write("<srw:recordSchema>info:srw/schema/1/dc-v1.1</srw:recordSchema>");
                                    out.write(
                                            "<srw:recordData xmlns:srw_dc=\"info:srw/schema/1/dc-v1.1\" xmlns:mods=\"http://www.loc.gov/mods\" xmlns:dcterms=\"http://purl.org/dc/terms/\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xmlns:dcx=\"http://krait.kb.nl/coop/tel/handbook/telterms.html\" xmlns:dc=\"http://purl.org/dc/elements/1.1/\" xmlns:marcrel=\"http://www.loc.gov/loc.terms/relators/OTH\" >");
                                    StringBuffer result = new StringBuffer("");

                                    construct_lucene_dc(result, resultDoc);

                                    out.write(result.toString());
                                    out.write("</srw:recordData>");
                                    out.write("<srw:recordPosition>" + Integer.toString(count)
                                            + "</srw:recordPosition>");
                                    out.write("</srw:record>");

                                }

                                out.write("</srw_dc:dc>");

                                StringBuffer expand_data;
                                boolean expand = false;

                                if (content.startsWith("GGC-THES:AC:")) {
                                    String tmp_content = "";
                                    tmp_content = content.replaceFirst("GGC-THES:AC:", "");
                                    log.fatal("calling get");
                                    expand_data = new StringBuffer(
                                            helpers.getExpand("http://www.kbresearch.nl/general/lod_new/get/"
                                                    + tmp_content + "?format=rdf", log));
                                    log.fatal("get finini");

                                    if (expand_data.toString().length() > 4) {

                                        out.write(
                                                "<srw_dc:expand xmlns:srw_dc=\"info:srw/schema/1/dc-v1.1\" xmlns:expand=\"http://www.kbresearch.nl/expand\" xmlns:skos=\"http://www.w3.org/2004/02/skos/core#\" xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\" >");
                                        out.write(expand_data.toString());
                                        expand = true;
                                    }
                                } else {
                                    expand_data = new StringBuffer(helpers
                                            .getExpand("http://www.kbresearch.nl/ANP.cgi?q=" + content, log));
                                    if (expand_data.toString().length() > 0) {
                                        if (!expand) {
                                            out.write(
                                                    "<srw_dc:expand xmlns:srw_dc=\"info:srw/schema/1/dc-v1.1\" xmlns:expand=\"http://www.kbresearch.nl/expand\" xmlns:skos=\"http://www.w3.org/2004/02/skos/core#\" xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\" >");
                                            expand = true;
                                        }
                                        out.write(expand_data.toString());
                                    }
                                }
                                if (expand) {
                                    out.write("</srw_dc:expand>");
                                }

                                out.write("</srw:recordData>");
                                out.write("<srw:recordPosition>" + Integer.toString(count)
                                        + "</srw:recordPosition>");
                                out.write("</srw:record>");
                            }
                        }
                    }

                    if ((do_err) || (numfound == 0)) {
                        log.fatal("I haz suggestions");

                        try {
                            spellq.setParam("spellcheck", true);
                            spellq.setQueryType("/spell");
                            server = new CommonsHttpSolrServer(url);
                            rsp = server.query(spellq);
                            sdl = rsp.getResults();
                            SpellCheckResponse spell;
                            spell = rsp.getSpellCheckResponse();
                            List<SpellCheckResponse.Suggestion> suggestions = spell.getSuggestions();
                            if (suggestions.isEmpty() == false) {
                                suggest.append("<srw:extraResponseData>");
                                suggest.append("<suggestions>");

                                for (SpellCheckResponse.Suggestion sugg : suggestions) {
                                    suggest.append("<suggestionfor>" + sugg.getToken() + "</suggestionfor>");
                                    for (String item : sugg.getSuggestions()) {
                                        suggest.append("<suggestion>" + item + "</suggestion>");
                                    }
                                    suggest.append("</suggestions>");
                                    suggest.append("</srw:extraResponseData>");
                                }
                                do_sugg = true;
                            }
                        } catch (Exception e) {
                            rsp = null;
                            //log.fatal(e.toString());
                        }
                        ;
                    }
                    ;

                    if (!do_err) {
                        if (facet != null) {

                            try {
                                fct = rsp.getFacetFields();
                                out.write("<srw:facets>");

                                for (String str : facet.split(",")) {
                                    out.write("<srw:facet>");
                                    out.write("<srw:facetType>");
                                    out.write(str);
                                    out.write("</srw:facetType>");

                                    for (FacetField f : fct) {
                                        log.debug(f.getName());
                                        //if (f.getName().equals(str+"_str") || (f.getName().equals(str+"_date")) ) {
                                        List<FacetField.Count> facetEnties = f.getValues();
                                        for (FacetField.Count fcount : facetEnties) {
                                            out.write("<srw:facetValue>");
                                            out.write("<srw:valueString>");
                                            out.write(helpers.xmlEncode(fcount.getName()));
                                            out.write("</srw:valueString>");
                                            out.write("<srw:count>");
                                            out.write(Double.toString(fcount.getCount()));
                                            out.write("</srw:count>");
                                            out.write("</srw:facetValue>");
                                            //   }
                                        }

                                    }
                                    out.write("</srw:facet>");
                                }
                                out.write("</srw:facets>");
                                startRecord += 1;
                            } catch (Exception e) {
                            }

                            //log.fatal(e.toString()); }
                        }
                    } else {
                        out.write(diag);
                    }
                    out.write("</srw:records>"); // SearchRetrieve response footer
                    String footer = this.SRW_FOOTER.replaceAll("\\$query", helpers.xmlEncode(query));
                    footer = footer.replaceAll("\\$startRecord", (startRecord).toString());
                    footer = footer.replaceAll("\\$maximumRecords", maximumRecords.toString());
                    footer = footer.replaceAll("\\$recordSchema", recordSchema);
                    if (do_sugg) {
                        out.write(suggest.toString());
                    }
                    out.write(footer);
                } catch (MalformedURLException e) {
                    out.write(e.getMessage());
                } catch (IOException e) {
                    out.write("TO ERR is Human");
                }
            }
        }
    }
    out.close();
}

From source file:net.peacesoft.nutch.crawl.RaovatPostSignature.java

License:Apache License

public void dedup(String solrUrl, boolean noCommit) throws IOException {
    SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
    long start = System.currentTimeMillis();
    LOG.info("RaovatPostSignature: starting at " + sdf.format(start));
    LOG.info("RaovatPostSignature: Solr url: " + solrUrl);

    JobConf job = new NutchJob(getConf());

    job.set(ReSolrConstants.SERVER_URL, solrUrl);

    //        job.setBoolean("noCommit", noCommit);
    //        job.setInputFormat(RaovatPostSignature.SolrInputFormat.class);
    //        job.setOutputFormat(NullOutputFormat.class);
    //        job.setMapOutputKeyClass(Text.class);
    //        job.setMapOutputValueClass(RaovatPostSignature.SolrRecord.class);
    //        job.setMapperClass(IdentityMapper.class);
    //        job.setReducerClass(RaovatPostSignature.class);
    ////from  www  .ja  v a 2  s.  c  om
    //        JobClient.runJob(job);

    solr = SolrUtils.getCommonsHttpSolrServer(job);

    SolrQuery solrQuery = new SolrQuery(SOLR_GET_ALL_QUERY);
    solrQuery.setFields(ReSolrConstants.ID_FIELD);
    solrQuery.setRows(1);

    QueryResponse response;
    try {
        response = solr.query(solrQuery);
    } catch (final SolrServerException e) {
        throw new IOException(e);
    }

    int numResults = (int) response.getResults().getNumFound();
    LOG.info("Total items:" + numResults);
    int s = 1;
    int value = 200;
    int page = numResults / value + 1;

    updateRequest = new UpdateRequest();

    for (int i = 0; i < page; i++) {
        solrQuery = new SolrQuery(SOLR_GET_ALL_QUERY);
        solrQuery.setStart(s);
        solrQuery.setRows(value);

        try {
            response = solr.query(solrQuery);
        } catch (final SolrServerException e) {
            throw new IOException(e);
        }

        SolrDocumentList solrDocs = response.getResults();
        for (SolrDocument solrDocument : solrDocs) {
            updateRequest.add(ClientUtils.toSolrInputDocument(solrDocument));
        }

        s += value;
    }

    try {
        solr.commit();
    } catch (SolrServerException ex) {
    }

    long end = System.currentTimeMillis();
    LOG.info("RaovatPostSignature: finished at " + sdf.format(end) + ", elapsed: "
            + TimingUtil.elapsedTime(start, end));
}

From source file:net.yacy.cora.federate.solr.connector.AbstractSolrConnector.java

License:Open Source License

public static SolrQuery getSolrQuery(final String querystring, final String sort, final int offset,
        final int count, final String... fields) {
    // construct query
    final SolrQuery params = new SolrQuery();
    //if (count < 2 && querystring.startsWith("{!raw f=")) {
    //    params.setQuery("*:*");
    //    params.addFilterQuery(querystring);
    //} else {//  w  ww .  j a v  a 2 s . co m
    params.setQuery(querystring);
    //}
    params.clearSorts();
    if (sort != null) {
        params.set(CommonParams.SORT, sort);
    }
    params.setRows(count);
    params.setStart(offset);
    params.setFacet(false);
    if (fields != null && fields.length > 0)
        params.setFields(fields);
    params.setIncludeScore(false);
    params.setParam("defType", "edismax");
    params.setParam(DisMaxParams.QF, CollectionSchema.text_t.getSolrFieldName() + "^1.0");
    return params;
}

From source file:net.yacy.cora.federate.solr.connector.AbstractSolrConnector.java

License:Open Source License

/**
 * check if a given document, identified by url hash as document id exists
 * @param id the url hash and document id
 * @return metadata if any entry in solr exists, null otherwise
 * @throws IOException//from   w ww.j  a  v a2  s.c om
 */
@Override
public LoadTimeURL getLoadTimeURL(String id) throws IOException {
    // construct raw query
    final SolrQuery params = new SolrQuery();
    //params.setQuery(CollectionSchema.id.getSolrFieldName() + ":\"" + id + "\"");
    String q = "{!cache=false raw f=" + CollectionSchema.id.getSolrFieldName() + "}" + id;
    params.setQuery(q);
    params.setRows(1);
    params.setStart(0);
    params.setFacet(false);
    params.clearSorts();
    params.setFields(CollectionSchema.id.getSolrFieldName(), CollectionSchema.sku.getSolrFieldName(),
            CollectionSchema.load_date_dt.getSolrFieldName());
    params.setIncludeScore(false);

    // query the server
    final SolrDocumentList sdl = getDocumentListByParams(params);
    if (sdl == null || sdl.getNumFound() <= 0)
        return null;
    SolrDocument doc = sdl.iterator().next();
    LoadTimeURL md = getLoadTimeURL(doc);
    return md;
}

From source file:net.yacy.cora.federate.solr.connector.AbstractSolrConnector.java

License:Open Source License

/**
 * get the number of results when this query is done.
 * This should only be called if the actual result is never used, and only the count is interesting
 * @param querystring//from w ww .j a  va2s  .  c  om
 * @return the number of results for this query
 */
@Override
public long getCountByQuery(String querystring) throws IOException {
    // construct query
    final SolrQuery params = new SolrQuery();
    params.setQuery(querystring);
    params.setRows(0); // essential to just get count
    params.setStart(0);
    params.setFacet(false);
    params.clearSorts();
    params.setFields(CollectionSchema.id.getSolrFieldName());
    params.setIncludeScore(false);

    // query the server
    final SolrDocumentList sdl = getDocumentListByParams(params);
    return sdl == null ? 0 : sdl.getNumFound();
}

From source file:net.yacy.cora.federate.solr.connector.AbstractSolrConnector.java

License:Open Source License

/**
 * get facets of the index: a list of lists with values that are most common in a specific field
 * @param query a query which is performed to get the facets
 * @param fields the field names which are selected as facet
 * @param maxresults the maximum size of the resulting maps
 * @return a map with key = facet field name, value = an ordered map of field values for that field
 * @throws IOException// w w w. j a v  a  2 s . c om
 */
@Override
public LinkedHashMap<String, ReversibleScoreMap<String>> getFacets(String query, int maxresults,
        final String... fields) throws IOException {
    // construct query
    assert fields.length > 0;
    final SolrQuery params = new SolrQuery();
    params.setQuery(query);
    params.setRows(0);
    params.setStart(0);
    params.setFacet(true);
    params.setFacetMinCount(1); // there are many 0-count facets in the uninverted index cache
    params.setFacetLimit(maxresults);
    params.setFacetSort(FacetParams.FACET_SORT_COUNT);
    params.setParam(FacetParams.FACET_METHOD, FacetParams.FACET_METHOD_fc /*FACET_METHOD_fcs*/);
    params.setFields(fields);
    params.clearSorts();
    params.setIncludeScore(false);
    for (String field : fields)
        params.addFacetField(field);

    // query the server
    QueryResponse rsp = getResponseByParams(params);
    LinkedHashMap<String, ReversibleScoreMap<String>> facets = new LinkedHashMap<String, ReversibleScoreMap<String>>(
            fields.length);
    for (String field : fields) {
        FacetField facet = rsp.getFacetField(field);
        ReversibleScoreMap<String> result = new ClusteredScoreMap<String>(UTF8.insensitiveUTF8Comparator);
        List<Count> values = facet.getValues();
        if (values == null)
            continue;
        for (Count ff : values)
            if (ff.getCount() > 0)
                result.set(ff.getName(), (int) ff.getCount());
        facets.put(field, result);
    }
    return facets;
}

From source file:net.yacy.cora.federate.solr.connector.AbstractSolrConnector.java

License:Open Source License

@Override
public SolrDocument getDocumentById(final String id, final String... fields) throws IOException {
    assert id.length() == Word.commonHashLength : "wrong id: " + id;
    final SolrQuery query = new SolrQuery();
    // construct query
    StringBuilder sb = new StringBuilder(23);
    sb.append("{!cache=false raw f=").append(CollectionSchema.id.getSolrFieldName()).append('}').append(id);
    query.setQuery(sb.toString());//from  ww w .j a  v  a 2s .  co  m
    //query.setQuery("*:*");
    //query.addFilterQuery(sb.toString());
    query.clearSorts();
    query.setRows(1);
    query.setStart(0);
    if (fields != null && fields.length > 0)
        query.setFields(fields);
    query.setIncludeScore(false);

    // query the server
    try {
        final SolrDocumentList docs = getDocumentListByParams(query);
        if (docs == null || docs.isEmpty())
            return null;
        SolrDocument doc = docs.get(0);
        return doc;
    } catch (final Throwable e) {
        clearCaches(); // we clear the in case that this is caused by OOM
        throw new IOException(e.getMessage(), e);
    }
}

From source file:net.yacy.peers.Protocol.java

License:Open Source License

protected static int solrQuery(final SearchEvent event, final SolrQuery solrQuery, final int offset,
        final int count, final Seed target, final int partitions, final Blacklist blacklist) {

    if (event.query.getQueryGoal().getQueryString(false) == null
            || event.query.getQueryGoal().getQueryString(false).length() == 0) {
        return -1; // we cannot query solr only with word hashes, there is no clear text string
    }/*from  w  ww  .j  a v a2s. c  om*/
    event.addExpectedRemoteReferences(count);
    if (partitions > 0)
        solrQuery.set("partitions", partitions);
    solrQuery.setStart(offset);
    solrQuery.setRows(count);

    // set highlighting query attributes
    if (event.query.contentdom == Classification.ContentDomain.TEXT
            || event.query.contentdom == Classification.ContentDomain.ALL) {
        solrQuery.setHighlight(true);
        solrQuery.setHighlightFragsize(SearchEvent.SNIPPET_MAX_LENGTH);
        //solrQuery.setHighlightRequireFieldMatch();
        solrQuery.setHighlightSimplePost("</b>");
        solrQuery.setHighlightSimplePre("<b>");
        solrQuery.setHighlightSnippets(5);
        for (CollectionSchema field : snippetFields)
            solrQuery.addHighlightField(field.getSolrFieldName());
    } else {
        solrQuery.setHighlight(false);
    }
    boolean localsearch = target == null || target.equals(event.peers.mySeed());
    Map<String, ReversibleScoreMap<String>> facets = new HashMap<String, ReversibleScoreMap<String>>(
            event.query.facetfields.size());
    Map<String, LinkedHashSet<String>> snippets = new HashMap<String, LinkedHashSet<String>>(); // this will be a list of urlhash-snippet entries
    final QueryResponse[] rsp = new QueryResponse[] { null };
    final SolrDocumentList[] docList = new SolrDocumentList[] { null };
    String ip = target.getIP();
    {// encapsulate expensive solr QueryResponse object
        if (localsearch && !Switchboard.getSwitchboard()
                .getConfigBool(SwitchboardConstants.DEBUG_SEARCH_REMOTE_SOLR_TESTLOCAL, false)) {
            // search the local index
            try {
                SolrConnector sc = event.getQuery().getSegment().fulltext().getDefaultConnector();
                if (!sc.isClosed()) {
                    rsp[0] = sc.getResponseByParams(solrQuery);
                    docList[0] = rsp[0].getResults();
                }
            } catch (final Throwable e) {
                Network.log.info("SEARCH failed (solr), localpeer (" + e.getMessage() + ")", e);
                return -1;
            }
        } else {
            try {
                final boolean myseed = target == event.peers.mySeed();
                if (!myseed && !target.getFlagSolrAvailable()) { // skip if peer.dna has flag that last try resulted in error
                    Network.log.info("SEARCH skip (solr), remote Solr interface not accessible, peer="
                            + target.getName());
                    return -1;
                }
                final String address = myseed ? "localhost:" + target.getPort() : target.getPublicAddress(ip);
                final int solrtimeout = Switchboard.getSwitchboard()
                        .getConfigInt(SwitchboardConstants.FEDERATED_SERVICE_SOLR_INDEXING_TIMEOUT, 6000);
                Thread remoteRequest = new Thread() {
                    @Override
                    public void run() {
                        this.setName("Protocol.solrQuery(" + solrQuery.getQuery() + " to " + target.hash + ")");
                        try {
                            RemoteInstance instance = new RemoteInstance("http://" + address, null, "solr",
                                    solrtimeout); // this is a 'patch configuration' which considers 'solr' as default collection
                            try {
                                SolrConnector solrConnector = new RemoteSolrConnector(instance,
                                        myseed ? true : target.getVersion() >= 1.63, "solr");
                                if (!solrConnector.isClosed())
                                    try {
                                        rsp[0] = solrConnector.getResponseByParams(solrQuery);
                                        docList[0] = rsp[0].getResults();
                                    } catch (Throwable e) {
                                    } finally {
                                        solrConnector.close();
                                    }
                            } catch (Throwable ee) {
                            } finally {
                                instance.close();
                            }
                        } catch (Throwable eee) {
                        }
                    }
                };
                remoteRequest.start();
                remoteRequest.join(solrtimeout); // just wait until timeout appears
                if (remoteRequest.isAlive()) {
                    try {
                        remoteRequest.interrupt();
                    } catch (Throwable e) {
                    }
                    Network.log.info("SEARCH failed (solr), remote Peer: " + target.getName() + "/"
                            + target.getPublicAddress(ip) + " does not answer (time-out)");
                    target.setFlagSolrAvailable(false || myseed);
                    return -1; // give up, leave remoteRequest abandoned.
                }
                // no need to close this here because that sends a commit to remote solr which is not wanted here
            } catch (final Throwable e) {
                Network.log.info("SEARCH failed (solr), remote Peer: " + target.getName() + "/"
                        + target.getPublicAddress(ip) + " (" + e.getMessage() + ")");
                target.setFlagSolrAvailable(false || localsearch);
                return -1;
            }
        }

        if (rsp[0] == null || docList[0] == null) {
            Network.log.info("SEARCH failed (solr), remote Peer: " + target.getName() + "/"
                    + target.getPublicAddress(ip) + " returned null");
            target.setFlagSolrAvailable(false || localsearch);
            return -1;
        }

        // evaluate facets
        for (String field : event.query.facetfields) {
            FacetField facet = rsp[0].getFacetField(field);
            ReversibleScoreMap<String> result = new ClusteredScoreMap<String>(UTF8.insensitiveUTF8Comparator);
            List<Count> values = facet == null ? null : facet.getValues();
            if (values == null)
                continue;
            for (Count ff : values) {
                int c = (int) ff.getCount();
                if (c == 0)
                    continue;
                if (ff.getName().length() == 0)
                    continue; // facet entry without text is not useful
                result.set(ff.getName(), c);
            }
            if (result.size() > 0)
                facets.put(field, result);
        }

        // evaluate snippets
        Map<String, Map<String, List<String>>> rawsnippets = rsp[0].getHighlighting(); // a map from the urlhash to a map with key=field and value = list of snippets
        if (rawsnippets != null) {
            nextsnippet: for (Map.Entry<String, Map<String, List<String>>> re : rawsnippets.entrySet()) {
                Map<String, List<String>> rs = re.getValue();
                for (CollectionSchema field : snippetFields) {
                    if (rs.containsKey(field.getSolrFieldName())) {
                        List<String> s = rs.get(field.getSolrFieldName());
                        if (s.size() > 0) {
                            LinkedHashSet<String> ls = new LinkedHashSet<String>();
                            ls.addAll(s);
                            snippets.put(re.getKey(), ls);
                            continue nextsnippet;
                        }
                    }
                }
                // no snippet found :( --we don't assign a value here by default; that can be done as an evaluation outside this method
            }
        }
        rsp[0] = null;
    }

    // evaluate result
    if (docList == null || docList[0].size() == 0) {
        Network.log.info("SEARCH (solr), returned 0 out of 0 documents from "
                + (target == null ? "shard" : ("peer " + target.hash + ":" + target.getName())) + " query = "
                + solrQuery.toString());
        return 0;
    }

    List<URIMetadataNode> container = new ArrayList<URIMetadataNode>();
    Network.log.info("SEARCH (solr), returned " + docList[0].size() + " out of " + docList[0].getNumFound()
            + " documents and " + facets.size() + " facets " + facets.keySet().toString() + " from "
            + (target == null ? "shard" : ("peer " + target.hash + ":" + target.getName())));
    int term = count;
    Collection<SolrInputDocument> docs;
    if (event.addResultsToLocalIndex) { // only needed to store remote results
        docs = new ArrayList<SolrInputDocument>(docList[0].size());
    } else
        docs = null;
    for (final SolrDocument doc : docList[0]) {
        if (term-- <= 0) {
            break; // do not process more that requested (in case that evil peers fill us up with rubbish)
        }
        // get one single search result
        if (doc == null) {
            continue;
        }
        URIMetadataNode urlEntry = new URIMetadataNode(doc);

        if (blacklist.isListed(BlacklistType.SEARCH, urlEntry.url())) {
            if (Network.log.isInfo()) {
                if (localsearch) {
                    Network.log.info("local search (solr): filtered blacklisted url " + urlEntry.url());
                } else {
                    Network.log.info("remote search (solr): filtered blacklisted url " + urlEntry.url()
                            + " from "
                            + (target == null ? "shard" : ("peer " + target.hash + ":" + target.getName())));
                }
            }
            continue; // block with blacklist
        }

        final String urlRejectReason = Switchboard.getSwitchboard().crawlStacker
                .urlInAcceptedDomain(urlEntry.url());
        if (urlRejectReason != null) {
            if (Network.log.isInfo()) {
                if (localsearch) {
                    Network.log.info("local search (solr): rejected url '" + urlEntry.url() + "' ("
                            + urlRejectReason + ")");
                } else {
                    Network.log.info("remote search (solr): rejected url '" + urlEntry.url() + "' ("
                            + urlRejectReason + ") from peer " + target.getName());
                }
            }
            continue; // reject url outside of our domain
        }

        // passed all checks, store url
        if (!localsearch) {

            // put the remote documents to the local index. We must convert the solr document to a solr input document:
            if (event.addResultsToLocalIndex) {
                final SolrInputDocument sid = event.query.getSegment().fulltext().getDefaultConfiguration()
                        .toSolrInputDocument(doc);

                // the input document stays untouched because it contains top-level cloned objects
                docs.add(sid);
                // will be stored to index, and is a full solr document, can be added to firstseen
                event.query.getSegment().setFirstSeenTime(urlEntry.hash(),
                        Math.min(urlEntry.moddate().getTime(), System.currentTimeMillis()));
            }

            // after this conversion we can remove the largest and not used field text_t and synonyms_sxt from the document
            // because that goes into a search cache and would take a lot of memory in the search cache
            //doc.removeFields(CollectionSchema.text_t.getSolrFieldName());
            doc.removeFields(CollectionSchema.synonyms_sxt.getSolrFieldName());

            ResultURLs.stack(ASCII.String(urlEntry.url().hash()), urlEntry.url().getHost(),
                    event.peers.mySeed().hash.getBytes(), UTF8.getBytes(target.hash), EventOrigin.QUERIES);
        }

        // add the url entry to the word indexes
        container.add(urlEntry);
    }
    final int dls = docList[0].size();
    final int numFound = (int) docList[0].getNumFound();
    docList[0].clear();
    docList[0] = null;
    if (localsearch) {
        event.addNodes(container, facets, snippets, true, "localpeer", numFound);
        event.addFinalize();
        event.addExpectedRemoteReferences(-count);
        Network.log.info(
                "local search (solr): localpeer sent " + container.size() + "/" + numFound + " references");
    } else {
        if (event.addResultsToLocalIndex) {
            for (SolrInputDocument doc : docs) {
                event.query.getSegment().putDocument(doc);
            }
            docs.clear();
            docs = null;
        }
        event.addNodes(container, facets, snippets, false, target.getName() + "/" + target.hash, numFound);
        event.addFinalize();
        event.addExpectedRemoteReferences(-count);
        Network.log.info("remote search (solr): peer " + target.getName() + " sent "
                + (container.size() == 0 ? 0 : container.size()) + "/" + numFound + " references");
    }
    return dls;
}

From source file:net.yacy.search.index.ErrorCache.java

License:Open Source License

public ErrorCache(final Fulltext fulltext) {
    this.fulltext = fulltext;
    this.cache = new LinkedHashMap<String, CollectionConfiguration.FailDoc>();
    // concurrently fill stack with latest values
    new Thread() {
        @Override/*from w  w w  .  ja  v a  2  s . c  o m*/
        public void run() {
            final SolrQuery params = new SolrQuery();
            params.setParam("defType", "edismax");
            params.setStart(0);
            params.setRows(1000);
            params.setFacet(false);
            params.setSort(
                    new SortClause(CollectionSchema.last_modified.getSolrFieldName(), SolrQuery.ORDER.desc));
            params.setFields(CollectionSchema.id.getSolrFieldName());
            params.setQuery(
                    CollectionSchema.failreason_s.getSolrFieldName() + AbstractSolrConnector.CATCHALL_DTERM);
            params.set(CommonParams.DF, CollectionSchema.id.getSolrFieldName()); // DisMaxParams.QF or CommonParams.DF must be given
            SolrDocumentList docList;
            try {
                docList = fulltext.getDefaultConnector().getDocumentListByParams(params);
                if (docList != null)
                    for (int i = docList.size() - 1; i >= 0; i--) {
                        SolrDocument doc = docList.get(i);
                        String hash = (String) doc.getFieldValue(CollectionSchema.id.getSolrFieldName());
                        cache.put(hash, null);
                    }
            } catch (IOException e) {
                ConcurrentLog.logException(e);
            }
        }
    }.start();
}

From source file:net.yacy.search.index.ErrorCacheFiller.java

License:Open Source License

/**
 * Fills the error cache with recently failed document hashes found in the index
 *///from   www .j ava  2 s. c  om
@Override
public void run() {
    final SolrQuery params = new SolrQuery();
    params.setParam("defType", "edismax");
    params.setStart(0);
    params.setRows(1000);
    params.setFacet(false);
    params.setSort(new SortClause(CollectionSchema.load_date_dt.getSolrFieldName(), SolrQuery.ORDER.desc)); // load_date_dt = faildate
    params.setFields(CollectionSchema.id.getSolrFieldName());
    params.setQuery(CollectionSchema.failreason_s.getSolrFieldName() + AbstractSolrConnector.CATCHALL_DTERM);
    params.set(CommonParams.DF, CollectionSchema.id.getSolrFieldName()); // DisMaxParams.QF or CommonParams.DF must be given
    SolrDocumentList docList;
    try {
        docList = this.sb.index.fulltext().getDefaultConnector().getDocumentListByParams(params);
        if (docList != null)
            for (int i = docList.size() - 1; i >= 0; i--) {
                SolrDocument doc = docList.get(i);
                String hash = (String) doc.getFieldValue(CollectionSchema.id.getSolrFieldName());
                cache.putHashOnly(hash);
            }
    } catch (IOException e) {
        ConcurrentLog.logException(e);
    }
}