Example usage for org.apache.solr.client.solrj SolrQuery setStart

List of usage examples for org.apache.solr.client.solrj SolrQuery setStart

Introduction

In this page you can find the example usage for org.apache.solr.client.solrj SolrQuery setStart.

Prototype

public SolrQuery setStart(Integer start) 

Source Link

Usage

From source file:edu.vt.vbi.patric.portlets.TranscriptomicsGene.java

License:Apache License

public void serveResource(ResourceRequest request, ResourceResponse response)
        throws PortletException, IOException {
    String callType = request.getParameter("callType");

    if (callType != null) {
        switch (callType) {
        case "saveParams": {

            String keyword = request.getParameter("keyword");
            DataApiHandler dataApi = new DataApiHandler(request);

            Map<String, String> key = new HashMap<>();
            key.put("keyword", "locus_tag:(" + keyword + ") OR refseq_locus_tag:(" + keyword + ") ");
            key.put("fields", "pid");

            SolrQuery query = dataApi.buildSolrQuery(key, null, null, 0, -1, false);

            String apiResponse = dataApi.solrQuery(SolrCore.TRANSCRIPTOMICS_GENE, query);

            Map resp = jsonReader.readValue(apiResponse);
            Map respBody = (Map) resp.get("response");

            List<Map> sdl = (List<Map>) respBody.get("docs");

            Set<String> sampleIds = new HashSet<>();

            for (Map doc : sdl) {
                sampleIds.add(doc.get("pid").toString());
            }/*from   w  w  w. ja va 2 s.c  o m*/

            String sId = StringUtils.join(sampleIds, ",");

            key = new HashMap();
            if (!keyword.equals("")) {
                key.put("keyword", keyword);
            }

            response.setContentType("text/html");
            PrintWriter writer = response.getWriter();

            if (!sId.equals("")) {
                key.put("sampleId", sId);
                long pk = (new Random()).nextLong();

                SessionHandler.getInstance().set(SessionHandler.PREFIX + pk,
                        jsonWriter.writeValueAsString(key));

                writer.write("" + pk);
            } else {
                writer.write("");
            }
            writer.close();

            break;
        }
        case "getTables": {

            String expId = request.getParameter("expId");
            String sampleId = request.getParameter("sampleId");
            String wsExperimentId = request.getParameter("wsExperimentId");
            String wsSampleId = request.getParameter("wsSampleId");
            String keyword = request.getParameter("keyword");
            DataApiHandler dataApi = new DataApiHandler(request);

            JSONArray sample = new JSONArray();

            if ((sampleId != null && !sampleId.equals("")) || (expId != null && !expId.equals(""))) {

                String query_keyword = "";

                if (expId != null && !expId.equals("")) {
                    query_keyword += "eid:(" + expId.replaceAll(",", " OR ") + ")";
                }

                if (sampleId != null && !sampleId.equals("")) {
                    if (query_keyword.length() > 0) {
                        query_keyword += " AND ";
                    }
                    query_keyword += "pid:(" + sampleId.replaceAll(",", " OR ") + ")";
                }

                Map<String, String> key = new HashMap<>();
                key.put("keyword", query_keyword);
                key.put("fields", "pid,expname,expmean,timepoint,mutant,strain,condition");

                SolrQuery query = dataApi.buildSolrQuery(key, null, null, 0, -1, false);

                String apiResponse = dataApi.solrQuery(SolrCore.TRANSCRIPTOMICS_COMPARISON, query);

                Map resp = jsonReader.readValue(apiResponse);
                Map respBody = (Map) resp.get("response");

                List<Map> sdl = (List<Map>) respBody.get("docs");

                for (final Map doc : sdl) {
                    final JSONObject item = new JSONObject(doc);
                    sample.add(item);
                }
            }

            // Read from JSON if collection parameter is there
            ExpressionDataCollection parser = null;
            if (wsExperimentId != null && !wsExperimentId.equals("")) {
                String token = getAuthorizationToken(request);

                parser = new ExpressionDataCollection(wsExperimentId, token);
                parser.read(ExpressionDataCollection.CONTENT_SAMPLE);
                if (wsSampleId != null && !wsSampleId.equals("")) {
                    parser.filter(wsSampleId, ExpressionDataCollection.CONTENT_SAMPLE);
                }
                // Append samples from collection to samples from DB
                sample = parser.append(sample, ExpressionDataCollection.CONTENT_SAMPLE);
            }

            String sampleList = "";
            sampleList += ((JSONObject) sample.get(0)).get("pid");

            for (int i = 1; i < sample.size(); i++) {
                sampleList += "," + ((JSONObject) sample.get(i)).get("pid");
            }

            JSONObject jsonResult = new JSONObject();
            jsonResult.put(ExpressionDataCollection.CONTENT_SAMPLE + "Total", sample.size());
            jsonResult.put(ExpressionDataCollection.CONTENT_SAMPLE, sample);
            JSONArray expression = new JSONArray();

            if ((sampleId != null && !sampleId.equals("")) || (expId != null && !expId.equals(""))) {

                String query_keyword = "";

                if (keyword != null && !keyword.equals("")) {
                    query_keyword += "(alt_locus_tag:(" + keyword + ") OR refseq_locus_tag:(" + keyword + ")) ";
                }

                if (expId != null && !expId.equals("")) {
                    if (query_keyword.length() > 0) {
                        query_keyword += " AND ";
                    }
                    query_keyword += "eid:(" + expId.replaceAll(",", " OR ") + ")";
                }

                if (sampleId != null && !sampleId.equals("")) {
                    if (query_keyword.length() > 0) {
                        query_keyword += " AND ";
                    }
                    query_keyword += "pid:(" + sampleId.replaceAll(",", " OR ") + ")";
                }

                Map<String, String> key = new HashMap<>();
                key.put("keyword", query_keyword);
                key.put("fields", "pid,refseq_locus_tag,feature_id,log_ratio,z_score");

                SolrQuery query = dataApi.buildSolrQuery(key, null, null, 0, -1, false);

                LOGGER.trace("getTables: [{}] {}", SolrCore.TRANSCRIPTOMICS_GENE.getSolrCoreName(), query);

                String apiResponse = dataApi.solrQuery(SolrCore.TRANSCRIPTOMICS_GENE, query);

                Map resp = jsonReader.readValue(apiResponse);
                Map respBody = (Map) resp.get("response");

                List<Map> sdl = (List<Map>) respBody.get("docs");

                for (final Map doc : sdl) {
                    final JSONObject item = new JSONObject(doc);
                    expression.add(item);
                }

                // TODO: re-implement when data API removes limit 25k records
                int start = 0;
                int fetchedSize = sdl.size();
                while (fetchedSize == 25000) {
                    start += 25000;
                    query.setStart(start);

                    LOGGER.trace("getTables: [{}] {}", SolrCore.TRANSCRIPTOMICS_GENE.getSolrCoreName(), query);

                    final String apiResponseSub = dataApi.solrQuery(SolrCore.TRANSCRIPTOMICS_GENE, query);
                    final Map respSub = jsonReader.readValue(apiResponseSub);
                    final Map respBodySub = (Map) respSub.get("response");

                    sdl = (List<Map>) respBodySub.get("docs");
                    fetchedSize = sdl.size();

                    for (final Map doc : sdl) {
                        final JSONObject item = new JSONObject(doc);
                        expression.add(item);
                    }
                }
            }

            if (wsExperimentId != null && !wsExperimentId.equals("")) {

                parser.read(ExpressionDataCollection.CONTENT_EXPRESSION);
                if (wsSampleId != null && !wsSampleId.equals(""))
                    parser.filter(wsSampleId, ExpressionDataCollection.CONTENT_EXPRESSION);

                // Append expression from collection to expression from DB
                expression = parser.append(expression, ExpressionDataCollection.CONTENT_EXPRESSION);
            }

            JSONArray stats = getExperimentStats(dataApi, expression, sampleList, sample);
            jsonResult.put(ExpressionDataCollection.CONTENT_EXPRESSION + "Total", stats.size());
            jsonResult.put(ExpressionDataCollection.CONTENT_EXPRESSION, stats);

            response.setContentType("application/json");
            PrintWriter writer = response.getWriter();
            jsonResult.writeJSONString(writer);
            writer.close();

            break;
        }
        case "doClustering": {

            String data = request.getParameter("data");
            String g = request.getParameter("g");
            String e = request.getParameter("e");
            String m = request.getParameter("m");
            String ge = request.getParameter("ge");
            String pk = request.getParameter("pk");
            String action = request.getParameter("action");

            String folder = "/tmp/";
            String filename = folder + "tmp_" + pk + ".txt";
            String output_filename = folder + "cluster_tmp_" + pk;
            try {

                PrintWriter out = new PrintWriter(new BufferedWriter(new FileWriter(filename, true)));
                out.write(data);
                out.close();

            } catch (Exception es) {
                LOGGER.error(es.getMessage(), es);
            }

            response.setContentType("text/html");
            PrintWriter writer = response.getWriter();
            if (action.equals("Run"))
                writer.write(doCLustering(filename, output_filename, g, e, m, ge).toString());

            writer.close();

            break;
        }
        case "saveState": {

            String keyType = request.getParameter("keyType");
            String pageAt = request.getParameter("pageAt");
            String sampleFilter = request.getParameter("sampleFilter");
            String regex = request.getParameter("regex");
            String regexGN = request.getParameter("regexGN");
            String upFold = request.getParameter("upFold");
            String downFold = request.getParameter("downFold");
            String upZscore = request.getParameter("upZscore");
            String downZscore = request.getParameter("downZscore");
            String significantGenes = request.getParameter("significantGenes");
            String ClusterColumnOrder = request.getParameter("ClusterColumnOrder");
            String ClusterRowOrder = request.getParameter("ClusterRowOrder");
            String heatmapState = request.getParameter("heatmapState");
            String heatmapAxis = request.getParameter("heatmapAxis");
            String colorScheme = request.getParameter("colorScheme");
            String filterOffset = request.getParameter("filterOffset");

            Map<String, String> key = new HashMap<>();
            key.put("sampleFilter", (sampleFilter == null) ? "" : sampleFilter);
            key.put("pageAt", (pageAt == null) ? "" : pageAt);
            key.put("regex", (regex == null) ? "" : regex);
            key.put("regexGN", (regexGN == null) ? "" : regexGN);
            key.put("upFold", (upFold == null) ? "" : upFold);
            key.put("downFold", (downFold == null) ? "" : downFold);
            key.put("upZscore", (upZscore == null) ? "" : upZscore);
            key.put("downZscore", (downZscore == null) ? "" : downZscore);
            key.put("significantGenes", (significantGenes == null) ? "" : significantGenes);
            key.put("ClusterRowOrder", (ClusterRowOrder == null) ? "" : ClusterRowOrder);
            key.put("ClusterColumnOrder", (ClusterColumnOrder == null) ? "" : ClusterColumnOrder);
            key.put("heatmapState", (heatmapState == null) ? "" : heatmapState);
            key.put("heatmapAxis", (heatmapAxis == null) ? "" : heatmapAxis);
            key.put("colorScheme", (colorScheme == null) ? "" : colorScheme);
            key.put("filterOffset", (filterOffset == null) ? "" : filterOffset);

            long pk = (new Random()).nextLong();
            SessionHandler.getInstance().set(SessionHandler.PREFIX + pk, jsonWriter.writeValueAsString(key));

            response.setContentType("text/html");
            PrintWriter writer = response.getWriter();
            writer.write("" + pk);
            writer.close();

            break;
        }
        case "getState": {

            String keyType = request.getParameter("keyType");
            String pk = request.getParameter("random");

            if ((pk != null) && (keyType != null)) {
                JSONArray results = new JSONArray();
                JSONObject a = new JSONObject();
                Map<String, String> key = jsonReader
                        .readValue(SessionHandler.getInstance().get(SessionHandler.PREFIX + pk));
                if (key != null) {
                    a.put("sampleFilter", key.get("sampleFilter"));
                    a.put("pageAt", key.get("pageAt"));
                    a.put("regex", key.get("regex"));
                    a.put("regexGN", key.get("regexGN"));
                    a.put("upFold", key.get("upFold"));
                    a.put("downFold", key.get("downFold"));
                    a.put("upZscore", key.get("upZscore"));
                    a.put("downZscore", key.get("downZscore"));
                    a.put("significantGenes", key.get("significantGenes"));
                    a.put("ClusterRowOrder", key.get("ClusterRowOrder"));
                    a.put("ClusterColumnOrder", key.get("ClusterColumnOrder"));
                    a.put("heatmapState", key.get("heatmapState"));
                    a.put("heatmapAxis", key.get("heatmapAxis"));
                    a.put("colorScheme", key.get("colorScheme"));
                    a.put("filterOffset", key.get("filterOffset"));
                }
                results.add(a);
                response.setContentType("application/json");
                PrintWriter writer = response.getWriter();
                results.writeJSONString(writer);
                writer.close();
            }
            break;
        }
        case "downloadFeatures": {
            String featureIds = request.getParameter("featureIds");
            String fileFormat = request.getParameter("fileFormat");
            String fileName = "Table_Gene";

            List<String> tableHeader = DownloadHelper.getHeaderForFeatures();
            List<String> tableField = DownloadHelper.getFieldsForFeatures();
            JSONArray tableSource = new JSONArray();

            DataApiHandler dataApi = new DataApiHandler(request);

            SolrQuery query = new SolrQuery("feature_id:(" + featureIds.replaceAll(",", " OR ") + ")");
            query.setFields(StringUtils.join(DownloadHelper.getFieldsForFeatures(), ","));
            query.setRows(dataApi.MAX_ROWS);

            LOGGER.trace("downloadFeatures: [{}] {}", SolrCore.FEATURE.getSolrCoreName(), query);

            final String apiResponse = dataApi.solrQuery(SolrCore.FEATURE, query);

            final Map resp = jsonReader.readValue(apiResponse);
            final Map respBody = (Map) resp.get("response");

            final List<GenomeFeature> features = (List) dataApi.bindDocuments((List) respBody.get("docs"),
                    GenomeFeature.class);

            for (final GenomeFeature feature : features) {
                tableSource.add(feature.toJSONObject());
            }

            final ExcelHelper excel = new ExcelHelper("xssf", tableHeader, tableField, tableSource);
            excel.buildSpreadsheet();

            if (fileFormat.equalsIgnoreCase("xlsx")) {
                response.setContentType("application/octetstream");
                response.addProperty("Content-Disposition",
                        "attachment; filename=\"" + fileName + "." + fileFormat + "\"");

                excel.writeSpreadsheettoBrowser(response.getPortletOutputStream());
            } else {
                response.setContentType("application/octetstream");
                response.addProperty("Content-Disposition",
                        "attachment; filename=\"" + fileName + "." + fileFormat + "\"");

                response.getWriter().write(excel.writeToTextFile());
            }
        }
        }
    }
}

From source file:edu.vt.vbi.patric.proteinfamily.FIGfamData.java

License:Apache License

public String getGenomeIdsForTaxon(ResourceRequest request) throws IOException {
    String taxon = request.getParameter("taxonId");
    String genomeFilter = request.getParameter("genomeFilter");
    List<String> gIds = new ArrayList<>();

    SolrQuery query = new SolrQuery("patric_cds:[1 TO *] AND taxon_lineage_ids:" + taxon);
    if (genomeFilter != null && !genomeFilter.equals("")) {
        query.addFilterQuery(genomeFilter);
    }/*from w  w w. ja  va2  s .co  m*/
    query.addField("genome_id");
    query.setSort("genome_name", SolrQuery.ORDER.asc);
    query.setRows(DataApiHandler.MAX_ROWS);

    LOGGER.trace("getGenomeIdsForTaxon: [{}] {}", SolrCore.GENOME.getSolrCoreName(), query);

    String apiResponse = dataApiHandler.solrQuery(SolrCore.GENOME, query);
    Map resp = jsonReader.readValue(apiResponse);
    Map respBody = (Map) resp.get("response");

    List<Genome> genomes = dataApiHandler.bindDocuments((List<Map>) respBody.get("docs"), Genome.class);

    for (final Genome g : genomes) {
        gIds.add(g.getId());
    }

    // TODO: remove this when data API limit is removed
    if (gIds.size() == 25000) {
        query.setStart(25000);

        apiResponse = dataApiHandler.solrQuery(SolrCore.GENOME, query);
        resp = jsonReader.readValue(apiResponse);
        respBody = (Map) resp.get("response");

        genomes = dataApiHandler.bindDocuments((List<Map>) respBody.get("docs"), Genome.class);

        for (final Genome g : genomes) {
            gIds.add(g.getId());
        }
    }

    return StringUtils.join(gIds, ",");
}

From source file:edu.vt.vbi.patric.proteinfamily.FIGfamData.java

License:Apache License

@SuppressWarnings("unchecked")
public void getGroupStats(ResourceRequest request, PrintWriter writer) throws IOException {

    DataApiHandler dataApi = new DataApiHandler(request);

    JSONObject figfams = new JSONObject();
    Set<String> figfamIdList = new HashSet<>();
    List<String> genomeIdList = new LinkedList<>();
    // get family Type
    final String familyType = request.getParameter("familyType");
    final String familyId = familyType + "_id";

    // get genome list in order
    String genomeIds = request.getParameter("genomeIds");
    try {/*from  ww w .  java2s .  c  o  m*/
        SolrQuery query = new SolrQuery("genome_id:(" + genomeIds.replaceAll(",", " OR ") + ")");
        query.addSort("genome_name", SolrQuery.ORDER.asc).addField("genome_id")
                .setRows(DataApiHandler.MAX_ROWS);

        LOGGER.trace("[{}] {}", SolrCore.GENOME.getSolrCoreName(), query);

        String apiResponse = dataApi.solrQuery(SolrCore.GENOME, query);
        Map resp = jsonReader.readValue(apiResponse);
        Map respBody = (Map) resp.get("response");

        List<Genome> genomes = dataApi.bindDocuments((List<Map>) respBody.get("docs"), Genome.class);

        for (final Genome genome : genomes) {
            genomeIdList.add(genome.getId());
        }

        if (genomeIdList.size() == 25000) {
            query.setStart(25000);

            apiResponse = dataApi.solrQuery(SolrCore.GENOME, query);
            resp = jsonReader.readValue(apiResponse);
            respBody = (Map) resp.get("response");

            genomes = dataApi.bindDocuments((List<Map>) respBody.get("docs"), Genome.class);

            for (final Genome genome : genomes) {
                genomeIdList.add(genome.getId());
            }
        }
    } catch (IOException e) {
        LOGGER.error(e.getMessage(), e);
    }

    //      LOGGER.debug("genomeIdList: {}", genomeIdList);

    // getting genome counts per figfamID (figfam)
    // {stat:{field:{field:figfam_id,limit:-1,facet:{min:"min(aa_length)",max:"max(aa_length)",mean:"avg(aa_length)",ss:"sumsq(aa_length)",sum:"sum(aa_length)",dist:"percentile(aa_length,50,75,99,99.9)",field:{field:genome_id}}}}}

    try {
        long start = System.currentTimeMillis();
        SolrQuery query = new SolrQuery("annotation:PATRIC AND feature_type:CDS");
        //         query.addFilterQuery("end:[3200 TO 4300] OR end:[4400 TO 4490] OR end:[4990 TO 4999]");
        query.addFilterQuery(getSolrQuery(request));
        query.addFilterQuery("!" + familyId + ":\"\"");
        query.setRows(0).setFacet(true).set("facet.threads", 15);
        query.add("json.facet", "{stat:{type:field,field:genome_id,limit:-1,facet:{figfams:{type:field,field:"
                + familyId + ",limit:-1,sort:{index:asc}}}}}");

        LOGGER.trace("getGroupStats() 1/3: [{}] {}", SolrCore.FEATURE.getSolrCoreName(), query);
        String apiResponse = dataApi.solrQuery(SolrCore.FEATURE, query);

        long point = System.currentTimeMillis();
        LOGGER.debug("1st query: {} ms", (point - start));
        start = point;

        Map resp = jsonReader.readValue(apiResponse);
        Map facets = (Map) resp.get("facets");
        Map stat = (Map) facets.get("stat");

        final Map<String, String> figfamGenomeIdStr = new LinkedHashMap<>();
        final Map<String, Integer> figfamGenomeCount = new LinkedHashMap<>();

        final int genomeTotal = genomeIdList.size();
        final Map<String, Integer> genomePosMap = new LinkedHashMap<>();
        for (String genomeId : genomeIdList) {
            genomePosMap.put(genomeId, genomeIdList.indexOf(genomeId));
        }

        final Map<String, List> figfamGenomeIdCountMap = new ConcurrentHashMap<>();
        final Map<String, Set> figfamGenomeIdSet = new ConcurrentHashMap<>();

        List<Map> genomeBuckets = (List<Map>) stat.get("buckets");

        for (final Map bucket : genomeBuckets) {

            final String genomeId = (String) bucket.get("val");
            final List<Map> figfamBucket = (List<Map>) ((Map) bucket.get("figfams")).get("buckets");

            for (final Map figfam : figfamBucket) {
                final String figfamId = (String) figfam.get("val");
                final String genomeCount = String.format("%02x", (Integer) figfam.get("count"));

                if (figfamGenomeIdCountMap.containsKey(figfamId)) {
                    figfamGenomeIdCountMap.get(figfamId).set(genomePosMap.get(genomeId), genomeCount);
                } else {
                    final List<String> genomeIdCount = new LinkedList<>(Collections.nCopies(genomeTotal, "00"));
                    genomeIdCount.set(genomePosMap.get(genomeId), genomeCount);
                    figfamGenomeIdCountMap.put(figfamId, genomeIdCount);
                }

                if (figfamGenomeIdSet.containsKey(figfamId)) {
                    figfamGenomeIdSet.get(figfamId).add(genomeId);
                } else {
                    final Set<String> genomeIdSet = new HashSet<>();
                    genomeIdSet.add(genomeId);
                    figfamGenomeIdSet.put(figfamId, genomeIdSet);
                }
            }
        }

        for (String figfamId : figfamGenomeIdCountMap.keySet()) {
            final List genomeIdStr = figfamGenomeIdCountMap.get(figfamId);
            figfamGenomeIdStr.put(figfamId, StringUtils.join(genomeIdStr, ""));
            figfamGenomeCount.put(figfamId, figfamGenomeIdSet.get(figfamId).size());
        }

        point = System.currentTimeMillis();
        LOGGER.debug("1st query process : {} ms, figfamGenomeIdStr:{}, figfamGenomeCount:{}", (point - start),
                figfamGenomeIdStr.size(), figfamGenomeCount.size());

        long start2nd = System.currentTimeMillis();
        // 2nd query

        query.set("json.facet", "{stat:{type:field,field:" + familyId
                + ",limit:-1,facet:{min:\"min(aa_length)\",max:\"max(aa_length)\",mean:\"avg(aa_length)\",ss:\"sumsq(aa_length)\",sum:\"sum(aa_length)\"}}}");

        LOGGER.trace("getGroupStats() 2/3: [{}] {}", SolrCore.FEATURE.getSolrCoreName(), query);
        apiResponse = dataApi.solrQuery(SolrCore.FEATURE, query);

        point = System.currentTimeMillis();
        LOGGER.debug("2st query: {} ms", (point - start2nd));
        start2nd = point;

        resp = jsonReader.readValue(apiResponse);
        facets = (Map) resp.get("facets");
        stat = (Map) facets.get("stat");

        List<Map> buckets = (List<Map>) stat.get("buckets");

        for (Map bucket : buckets) {
            final String figfamId = (String) bucket.get("val");
            final int count = (Integer) bucket.get("count");

            double min, max, mean, sumsq, sum;
            if (bucket.get("min") instanceof Double) {
                min = (Double) bucket.get("min");
            } else if (bucket.get("min") instanceof Integer) {
                min = ((Integer) bucket.get("min")).doubleValue();
            } else {
                min = 0;
            }
            if (bucket.get("max") instanceof Double) {
                max = (Double) bucket.get("max");
            } else if (bucket.get("max") instanceof Integer) {
                max = ((Integer) bucket.get("max")).doubleValue();
            } else {
                max = 0;
            }
            if (bucket.get("mean") instanceof Double) {
                mean = (Double) bucket.get("mean");
            } else if (bucket.get("mean") instanceof Integer) {
                mean = ((Integer) bucket.get("mean")).doubleValue();
            } else {
                mean = 0;
            }
            if (bucket.get("ss") instanceof Double) {
                sumsq = (Double) bucket.get("ss");
            } else if (bucket.get("ss") instanceof Integer) {
                sumsq = ((Integer) bucket.get("ss")).doubleValue();
            } else {
                sumsq = 0;
            }
            if (bucket.get("sum") instanceof Double) {
                sum = (Double) bucket.get("sum");
            } else if (bucket.get("sum") instanceof Integer) {
                sum = ((Integer) bucket.get("sum")).doubleValue();
            } else {
                sum = 0;
            }

            //            LOGGER.debug("bucket:{}, sumsq:{}, count: {}", bucket, sumsq, count);
            double std;
            if (count > 1) {
                // std = Math.sqrt(sumsq / (count - 1));
                final double realSq = sumsq - (sum * sum) / count;
                std = Math.sqrt(realSq / (count - 1));
            } else {
                std = 0;
            }
            final JSONObject aaLength = new JSONObject();
            aaLength.put("min", min);
            aaLength.put("max", max);
            aaLength.put("mean", mean);
            aaLength.put("stddev", std);

            figfamIdList.add(figfamId);

            final JSONObject figfam = new JSONObject();
            figfam.put("genomes", figfamGenomeIdStr.get(figfamId));
            figfam.put("genome_count", figfamGenomeCount.get(figfamId));
            figfam.put("feature_count", count);
            figfam.put("stats", aaLength);

            figfams.put(figfamId, figfam);
        }

        point = System.currentTimeMillis();
        LOGGER.debug("2st query process: {} ms", (point - start2nd));
    } catch (IOException e) {
        LOGGER.error(e.getMessage(), e);
    }

    // getting distinct figfam_product
    if (!figfamIdList.isEmpty()) {

        figfamIdList.remove("");

        try {
            SolrQuery query = new SolrQuery("family_id:(" + StringUtils.join(figfamIdList, " OR ") + ")");
            query.addFilterQuery("family_type:" + familyType);
            query.addField("family_id,family_product").setRows(figfamIdList.size());

            LOGGER.debug("getGroupStats() 3/3: [{}] {}", SolrCore.FIGFAM_DIC.getSolrCoreName(), query);

            String apiResponse = dataApi.solrQuery(SolrCore.FIGFAM_DIC, query);

            Map resp = jsonReader.readValue(apiResponse);
            Map respBody = (Map) resp.get("response");

            List<Map> sdl = (List<Map>) respBody.get("docs");

            for (final Map doc : sdl) {
                final JSONObject figfam = (JSONObject) figfams.get(doc.get("family_id"));
                figfam.put("description", doc.get("family_product"));
                figfams.put(doc.get("family_id").toString(), figfam);
            }

            int i = 1;
            while (sdl.size() == 25000) {
                query.setStart(25000 * i);

                apiResponse = dataApi.solrQuery(SolrCore.FIGFAM_DIC, query);
                resp = jsonReader.readValue(apiResponse);
                respBody = (Map) resp.get("response");

                sdl = (List<Map>) respBody.get("docs");

                for (final Map doc : sdl) {
                    final JSONObject figfam = (JSONObject) figfams.get(doc.get("family_id"));
                    figfam.put("description", doc.get("family_product"));
                    figfams.put(doc.get("family_id").toString(), figfam);
                }
                i++;
            }
        } catch (IOException e) {
            LOGGER.error(e.getMessage(), e);
            LOGGER.debug("::getGroupStats() 3/3, params: {}", request.getParameterMap().toString());
        }
        figfams.writeJSONString(writer);
    }
}

From source file:eu.annocultor.converters.solr.SolrDocumentTagger.java

License:Apache License

public int tag() throws Exception {
    int recordsPassed = 0;
    for (int page = 0; true; page++) {
        int queryStart = page * DOCUMENTS_PER_READ;
        int queryEnd = queryStart + DOCUMENTS_PER_READ;
        if (queryEnd > start) {

            SolrQuery solrQuery = new SolrQuery(query);
            solrQuery.setStart(queryStart);
            solrQuery.setRows(DOCUMENTS_PER_READ);
            if (FIELDS_TO_EXTRACT.length > 0) {
                solrQuery.setFields(FIELDS_TO_EXTRACT);
            }//from  ww  w. j av a 2  s  .  co m
            QueryResponse response = solrServerFrom.query(solrQuery);
            SolrDocumentList sourceDocs = response.getResults();
            System.out.println("retrieved document query OK: " + sourceDocs.getNumFound());
            if (sourceDocs.isEmpty() || page > MAX_PAGES_TO_TAG) {
                log.flush();
                report();
                return recordsPassed;
            }

            int retry = 0;
            while (retry < MAX_RETRIES) {
                try {
                    List<SolrInputDocument> destDocs = new ArrayList<SolrInputDocument>();
                    tagDocumentList(sourceDocs, destDocs);
                    recordsPassed += sourceDocs.size();
                    System.out.println("Let's try");
                    solrServerTo.add(destDocs);
                    log.println(new Date() + "Completed " + recordsPassed + " SOLR documents");
                    log.println("Passed " + originalWordCount + " original words and added "
                            + enrichmentWordCount + " enrichment words");
                    break;
                } catch (Exception e) {
                    e.printStackTrace();
                    log.println(new Date() + "Stopped at document " + recordsPassed);
                    log.println(new Date() + "Retry " + retry + " failed, keep trying");
                    log.flush();
                    Thread.sleep(60000 * retry);
                    retry++;
                } finally {
                    log.flush();
                }
            }

            if (retry >= MAX_RETRIES) {
                throw new Exception("Failed completely.");
            }
        } else {
            log.println("Skipping page " + page);
            log.flush();
        }
    }
}

From source file:eu.annocultor.data.sources.SolrServerDataSource.java

License:Apache License

@Override
protected boolean parseQuery(DefaultHandler handler, String query, Path recordSeparatingPath,
        Path recordIdentifyingPath) throws Exception {

    ConverterHandlerDataObjects flatHandler = makeHandler(handler, recordSeparatingPath);

    boolean passedARecord = false;

    SolrQuery solrQuery = new SolrQuery();
    solrQuery.setQueryType("advanced");
    solrQuery.setQuery(query);// w  w  w  .  j av a2 s  .  c  om
    solrQuery.setRows(500);
    solrQuery.setStart(0);
    solrQuery.setParam("spellcheck", false);

    System.out.println("query: " + solrQuery);
    QueryResponse response = server.query(solrQuery);
    System.out.println(response.getResponseHeader());
    System.out.println(response.getResults().size());
    for (SolrDocument doc : response.getResults()) {

        flatHandler.startDocument();
        passedARecord = true;
        String id = doc.getFirstValue(idField).toString();
        flatHandler.attemptDataObjectChange(id);

        for (String fieldName : doc.getFieldNames()) {

            for (Object value : doc.getFieldValues(fieldName)) {

                String preprocessedValue = preprocessValue(fieldName, value.toString());
                if (preprocessedValue != null) {
                    flatHandler.addField(fieldName, new LiteralValue(preprocessedValue));
                    System.out.println(id + "-" + fieldName + "-" + preprocessedValue);
                }
            }
        }
        flatHandler.endDocument();
    }
    return passedARecord;
}

From source file:eu.clarin.cmdi.vlo.importer.MetadataImporter.java

/**
 * Update "days since last import" field for all Solr records of dataRoot.
 * Notice that it will not touch records that have a "last seen" value newer
 * than today. Therefore this should be called <em>after</em> normal 
 * processing of data root!//w w w  . j a  v  a2  s  .c o  m
 *
 * @param dataRoot
 * @throws SolrServerException
 * @throws IOException
 */
private void updateDaysSinceLastImport(DataRoot dataRoot) throws SolrServerException, IOException {
    LOG.info("Updating \"days since last import\" in Solr for: {}", dataRoot.getOriginName());

    SolrQuery query = new SolrQuery();
    query.setQuery(
            //we're going to process all records in the current data root...
            FacetConstants.FIELD_DATA_PROVIDER + ":" + ClientUtils.escapeQueryChars(dataRoot.getOriginName())
                    + " AND "
                    // ...that have a "last seen" value _older_ than today (on update/initialisation all records get 0 so we can skip the rest)
                    + FacetConstants.FIELD_LAST_SEEN + ":[* TO NOW-1DAY]");
    query.setFields(FacetConstants.FIELD_ID, FacetConstants.FIELD_LAST_SEEN);
    int fetchSize = 1000;
    query.setRows(fetchSize);
    QueryResponse rsp = solrServer.query(query);

    final long totalResults = rsp.getResults().getNumFound();
    final LocalDate nowDate = LocalDate.now();

    final int docsListSize = config.getMaxDocsInList();
    List<SolrInputDocument> updateDocs = new ArrayList<>(docsListSize);

    Boolean updatedDocs = false;
    int offset = 0;

    while (offset < totalResults) {
        query.setStart(offset);
        query.setRows(fetchSize);

        for (SolrDocument doc : solrServer.query(query).getResults()) {
            updatedDocs = true;

            String recordId = (String) doc.getFieldValue(FacetConstants.FIELD_ID);
            Date lastImportDate = (Date) doc.getFieldValue(FacetConstants.FIELD_LAST_SEEN);
            LocalDate oldDate = lastImportDate.toInstant().atZone(ZoneId.systemDefault()).toLocalDate();
            long daysSinceLastSeen = DAYS.between(oldDate, nowDate);

            SolrInputDocument updateDoc = new SolrInputDocument();
            updateDoc.setField(FacetConstants.FIELD_ID, recordId);

            Map<String, Long> partialUpdateMap = new HashMap<>();
            partialUpdateMap.put("set", daysSinceLastSeen);
            updateDoc.setField(FacetConstants.FIELD_DAYS_SINCE_LAST_SEEN, partialUpdateMap);

            updateDocs.add(updateDoc);

            if (updateDocs.size() == docsListSize) {
                solrServer.add(updateDocs);
                if (serverError != null) {
                    throw new SolrServerException(serverError);
                }
                updateDocs = new ArrayList<>(docsListSize);
            }
        }
        offset += fetchSize;
        LOG.info("Updating \"days since last import\": {} out of {} records updated", offset, totalResults);
    }

    if (!updateDocs.isEmpty()) {
        solrServer.add(updateDocs);
        if (serverError != null) {
            throw new SolrServerException(serverError);
        }
    }

    if (updatedDocs) {
        solrServer.commit();
    }

    LOG.info("Updating \"days since last import\" done.");
}

From source file:eu.europeana.core.BeanQueryModelFactory.java

License:EUPL

@Override
public List<?> getDocIdList(Map<String, String[]> params, Locale locale)
        throws EuropeanaQueryException, SolrServerException {
    SolrQuery solrQuery = createFromQueryParams(params, locale);
    Integer start = solrQuery.getStart();
    if (start > 1) {
        solrQuery.setStart(start - 2);
    }//from   ww w .  j  ava  2  s .  c  o m
    solrQuery.setRows(3);
    solrQuery.setFields("europeana_uri");
    // Fetch results from server
    final PortalTheme theme = ThemeFilter.getTheme();
    if (theme != null) {
        solrServer.setBaseURL(theme.getSolrSelectUrl());
    }
    QueryResponse queryResponse = solrServer.query(solrQuery);
    // fetch beans
    return getDocIdsFromQueryResponse(queryResponse);
}

From source file:eu.europeana.core.BeanQueryModelFactory.java

License:EUPL

/**
 * Get records from Sorl for a particular collection for the siteMap.
 *
 * @param europeanaCollectionName the europeana collectionName as stored in the EuropeanaCollection Domain object
 * @param rowsReturned            number of rows to be returned from Solr
 * @param pageNumber              which page of the sitemap per collection will be returned.
 * @return list of IdBeans/*w w  w .  j av a  2  s . c  o  m*/
 * @throws EuropeanaQueryException
 * @throws SolrServerException
 */
@Override
public SiteMapBeanView getSiteMapBeanView(String europeanaCollectionName, int rowsReturned, int pageNumber)
        throws EuropeanaQueryException, SolrServerException {
    SolrQuery solrQuery = new SolrQuery("PROVIDER:\"" + europeanaCollectionName + "\"");
    solrQuery.setRows(rowsReturned);
    solrQuery.setFields("europeana_uri", "timestamp");
    solrQuery.setStart(pageNumber * rowsReturned);
    final PortalTheme theme = ThemeFilter.getTheme();
    if (theme != null) {
        solrServer.setBaseURL(theme.getSolrSelectUrl());
    }
    QueryResponse queryResponse = solrServer.query(solrQuery);
    return new SiteMapBeanViewImpl(europeanaCollectionName, queryResponse, rowsReturned);
}

From source file:eu.europeana.core.BeanQueryModelFactory.java

License:EUPL

private QueryResponse getSolrResponseFromServer(SolrQuery solrQuery, boolean decrementStart)
        throws EuropeanaQueryException {
    if (solrQuery.getStart() != null && solrQuery.getStart() < 0) {
        solrQuery.setStart(0);
        log.warn("Solr Start cannot be negative");
    }/*from   w  w w. j a  v a  2 s .c o m*/
    // solr query is 0 based
    if (decrementStart && solrQuery.getStart() != null && solrQuery.getStart() > 0) {
        solrQuery.setStart(solrQuery.getStart() - 1);
    }
    QueryResponse queryResponse;
    // todo: add view limitation to query
    try {
        final PortalTheme theme = ThemeFilter.getTheme();
        if (theme != null) {
            solrServer.setBaseURL(theme.getSolrSelectUrl());
        }
        queryResponse = solrServer.query(solrQuery);
    } catch (SolrException e) {
        log.error("unable to execute SolrQuery", e);
        throw new EuropeanaQueryException(QueryProblem.MALFORMED_QUERY.toString(), e);
    } catch (SolrServerException e) {
        //todo determine which errors the SolrServer can throw
        log.error("Unable to fetch result", e);
        if (e.getMessage().equalsIgnoreCase("Error executing query")) {
            throw new EuropeanaQueryException(QueryProblem.MALFORMED_QUERY.toString(), e);
        } else {
            throw new EuropeanaQueryException(QueryProblem.SOLR_UNREACHABLE.toString(), e);
        }
    }
    return queryResponse;
}

From source file:eu.europeana.core.BeanQueryModelFactory.java

License:EUPL

@Override
public QueryResponse getSolrResponse(SolrQuery solrQuery, boolean isBriefDoc, Map<String, String[]> params)
        throws EuropeanaQueryException { // add bean to ???
    // since we make a defensive copy before the start is decremented we must do it here
    if (solrQuery.getStart() != null && solrQuery.getStart() > 0) {
        solrQuery.setStart(solrQuery.getStart() - 1);
    }// w ww. j  a  v  a2s .  co  m
    // set facets
    if (isBriefDoc) {
        // only show spelling-suggestion on the first result page
        if ((solrQuery.getStart() == null || solrQuery.getStart() == 0)
                && solrQuery.getFilterQueries() == null) {
            // give spelling suggestions
            solrQuery.setParam("spellcheck", true);
            solrQuery.setParam("spellcheck.collate", true);
            solrQuery.setParam("spellcheck.extendedResults", true);
            solrQuery.setParam("spellcheck.onlyMorePopular", true);
            //                solrQuery.setParam("spellcheck.count", "4");
        }
        solrQuery.setFacet(true);
        solrQuery.setFacetMinCount(1);
        //solrQuery.setFacetLimit(100); solr default is 100 so doesn't need to be set explicitly
        if (solrQuery.getRows() == null) {
            solrQuery.setRows(12);
        }
        solrQuery.addFacetField(ThemeFilter.getTheme().getRecordDefinition().getFacetFieldStrings());
        // todo now hard-coded but these values must be retrieved from the RecordDefinition later
        if (solrQuery.getFields() == null) {
            solrQuery.setFields(
                    "europeana_uri,dc_title,europeana_object,dc_creator,europeana_year,europeana_provider,"
                            + "europeana_dataProvider,europeana_language,europeana_type,dc_description,dc_type");
            //            solrQuery.setFields("*,score");
            //            solrQuery.setFields(metadataModel.getRecordDefinition().getFieldStrings());
        }
        if (solrQuery.getQueryType().equalsIgnoreCase(QueryType.SIMPLE_QUERY.toString())) {
            solrQuery.setQueryType(queryAnalyzer
                    .findSolrQueryType(solrQuery.getQuery(), ThemeFilter.getTheme().getRecordDefinition())
                    .toString());
        }
    }
    SolrQuery dCopy = addHiddenQueryFilters(solrQuery, params);
    return getSolrResponseFromServer(dCopy, false);
}