List of usage examples for org.apache.solr.client.solrj SolrQuery setStart
public SolrQuery setStart(Integer start)
From source file:edu.vt.vbi.patric.portlets.TranscriptomicsGene.java
License:Apache License
public void serveResource(ResourceRequest request, ResourceResponse response) throws PortletException, IOException { String callType = request.getParameter("callType"); if (callType != null) { switch (callType) { case "saveParams": { String keyword = request.getParameter("keyword"); DataApiHandler dataApi = new DataApiHandler(request); Map<String, String> key = new HashMap<>(); key.put("keyword", "locus_tag:(" + keyword + ") OR refseq_locus_tag:(" + keyword + ") "); key.put("fields", "pid"); SolrQuery query = dataApi.buildSolrQuery(key, null, null, 0, -1, false); String apiResponse = dataApi.solrQuery(SolrCore.TRANSCRIPTOMICS_GENE, query); Map resp = jsonReader.readValue(apiResponse); Map respBody = (Map) resp.get("response"); List<Map> sdl = (List<Map>) respBody.get("docs"); Set<String> sampleIds = new HashSet<>(); for (Map doc : sdl) { sampleIds.add(doc.get("pid").toString()); }/*from w w w. ja va 2 s.c o m*/ String sId = StringUtils.join(sampleIds, ","); key = new HashMap(); if (!keyword.equals("")) { key.put("keyword", keyword); } response.setContentType("text/html"); PrintWriter writer = response.getWriter(); if (!sId.equals("")) { key.put("sampleId", sId); long pk = (new Random()).nextLong(); SessionHandler.getInstance().set(SessionHandler.PREFIX + pk, jsonWriter.writeValueAsString(key)); writer.write("" + pk); } else { writer.write(""); } writer.close(); break; } case "getTables": { String expId = request.getParameter("expId"); String sampleId = request.getParameter("sampleId"); String wsExperimentId = request.getParameter("wsExperimentId"); String wsSampleId = request.getParameter("wsSampleId"); String keyword = request.getParameter("keyword"); DataApiHandler dataApi = new DataApiHandler(request); JSONArray sample = new JSONArray(); if ((sampleId != null && !sampleId.equals("")) || (expId != null && !expId.equals(""))) { String query_keyword = ""; if (expId != null && !expId.equals("")) { query_keyword += "eid:(" + expId.replaceAll(",", " OR ") + ")"; } if (sampleId != null && !sampleId.equals("")) { if (query_keyword.length() > 0) { query_keyword += " AND "; } query_keyword += "pid:(" + sampleId.replaceAll(",", " OR ") + ")"; } Map<String, String> key = new HashMap<>(); key.put("keyword", query_keyword); key.put("fields", "pid,expname,expmean,timepoint,mutant,strain,condition"); SolrQuery query = dataApi.buildSolrQuery(key, null, null, 0, -1, false); String apiResponse = dataApi.solrQuery(SolrCore.TRANSCRIPTOMICS_COMPARISON, query); Map resp = jsonReader.readValue(apiResponse); Map respBody = (Map) resp.get("response"); List<Map> sdl = (List<Map>) respBody.get("docs"); for (final Map doc : sdl) { final JSONObject item = new JSONObject(doc); sample.add(item); } } // Read from JSON if collection parameter is there ExpressionDataCollection parser = null; if (wsExperimentId != null && !wsExperimentId.equals("")) { String token = getAuthorizationToken(request); parser = new ExpressionDataCollection(wsExperimentId, token); parser.read(ExpressionDataCollection.CONTENT_SAMPLE); if (wsSampleId != null && !wsSampleId.equals("")) { parser.filter(wsSampleId, ExpressionDataCollection.CONTENT_SAMPLE); } // Append samples from collection to samples from DB sample = parser.append(sample, ExpressionDataCollection.CONTENT_SAMPLE); } String sampleList = ""; sampleList += ((JSONObject) sample.get(0)).get("pid"); for (int i = 1; i < sample.size(); i++) { sampleList += "," + ((JSONObject) sample.get(i)).get("pid"); } JSONObject jsonResult = new JSONObject(); jsonResult.put(ExpressionDataCollection.CONTENT_SAMPLE + "Total", sample.size()); jsonResult.put(ExpressionDataCollection.CONTENT_SAMPLE, sample); JSONArray expression = new JSONArray(); if ((sampleId != null && !sampleId.equals("")) || (expId != null && !expId.equals(""))) { String query_keyword = ""; if (keyword != null && !keyword.equals("")) { query_keyword += "(alt_locus_tag:(" + keyword + ") OR refseq_locus_tag:(" + keyword + ")) "; } if (expId != null && !expId.equals("")) { if (query_keyword.length() > 0) { query_keyword += " AND "; } query_keyword += "eid:(" + expId.replaceAll(",", " OR ") + ")"; } if (sampleId != null && !sampleId.equals("")) { if (query_keyword.length() > 0) { query_keyword += " AND "; } query_keyword += "pid:(" + sampleId.replaceAll(",", " OR ") + ")"; } Map<String, String> key = new HashMap<>(); key.put("keyword", query_keyword); key.put("fields", "pid,refseq_locus_tag,feature_id,log_ratio,z_score"); SolrQuery query = dataApi.buildSolrQuery(key, null, null, 0, -1, false); LOGGER.trace("getTables: [{}] {}", SolrCore.TRANSCRIPTOMICS_GENE.getSolrCoreName(), query); String apiResponse = dataApi.solrQuery(SolrCore.TRANSCRIPTOMICS_GENE, query); Map resp = jsonReader.readValue(apiResponse); Map respBody = (Map) resp.get("response"); List<Map> sdl = (List<Map>) respBody.get("docs"); for (final Map doc : sdl) { final JSONObject item = new JSONObject(doc); expression.add(item); } // TODO: re-implement when data API removes limit 25k records int start = 0; int fetchedSize = sdl.size(); while (fetchedSize == 25000) { start += 25000; query.setStart(start); LOGGER.trace("getTables: [{}] {}", SolrCore.TRANSCRIPTOMICS_GENE.getSolrCoreName(), query); final String apiResponseSub = dataApi.solrQuery(SolrCore.TRANSCRIPTOMICS_GENE, query); final Map respSub = jsonReader.readValue(apiResponseSub); final Map respBodySub = (Map) respSub.get("response"); sdl = (List<Map>) respBodySub.get("docs"); fetchedSize = sdl.size(); for (final Map doc : sdl) { final JSONObject item = new JSONObject(doc); expression.add(item); } } } if (wsExperimentId != null && !wsExperimentId.equals("")) { parser.read(ExpressionDataCollection.CONTENT_EXPRESSION); if (wsSampleId != null && !wsSampleId.equals("")) parser.filter(wsSampleId, ExpressionDataCollection.CONTENT_EXPRESSION); // Append expression from collection to expression from DB expression = parser.append(expression, ExpressionDataCollection.CONTENT_EXPRESSION); } JSONArray stats = getExperimentStats(dataApi, expression, sampleList, sample); jsonResult.put(ExpressionDataCollection.CONTENT_EXPRESSION + "Total", stats.size()); jsonResult.put(ExpressionDataCollection.CONTENT_EXPRESSION, stats); response.setContentType("application/json"); PrintWriter writer = response.getWriter(); jsonResult.writeJSONString(writer); writer.close(); break; } case "doClustering": { String data = request.getParameter("data"); String g = request.getParameter("g"); String e = request.getParameter("e"); String m = request.getParameter("m"); String ge = request.getParameter("ge"); String pk = request.getParameter("pk"); String action = request.getParameter("action"); String folder = "/tmp/"; String filename = folder + "tmp_" + pk + ".txt"; String output_filename = folder + "cluster_tmp_" + pk; try { PrintWriter out = new PrintWriter(new BufferedWriter(new FileWriter(filename, true))); out.write(data); out.close(); } catch (Exception es) { LOGGER.error(es.getMessage(), es); } response.setContentType("text/html"); PrintWriter writer = response.getWriter(); if (action.equals("Run")) writer.write(doCLustering(filename, output_filename, g, e, m, ge).toString()); writer.close(); break; } case "saveState": { String keyType = request.getParameter("keyType"); String pageAt = request.getParameter("pageAt"); String sampleFilter = request.getParameter("sampleFilter"); String regex = request.getParameter("regex"); String regexGN = request.getParameter("regexGN"); String upFold = request.getParameter("upFold"); String downFold = request.getParameter("downFold"); String upZscore = request.getParameter("upZscore"); String downZscore = request.getParameter("downZscore"); String significantGenes = request.getParameter("significantGenes"); String ClusterColumnOrder = request.getParameter("ClusterColumnOrder"); String ClusterRowOrder = request.getParameter("ClusterRowOrder"); String heatmapState = request.getParameter("heatmapState"); String heatmapAxis = request.getParameter("heatmapAxis"); String colorScheme = request.getParameter("colorScheme"); String filterOffset = request.getParameter("filterOffset"); Map<String, String> key = new HashMap<>(); key.put("sampleFilter", (sampleFilter == null) ? "" : sampleFilter); key.put("pageAt", (pageAt == null) ? "" : pageAt); key.put("regex", (regex == null) ? "" : regex); key.put("regexGN", (regexGN == null) ? "" : regexGN); key.put("upFold", (upFold == null) ? "" : upFold); key.put("downFold", (downFold == null) ? "" : downFold); key.put("upZscore", (upZscore == null) ? "" : upZscore); key.put("downZscore", (downZscore == null) ? "" : downZscore); key.put("significantGenes", (significantGenes == null) ? "" : significantGenes); key.put("ClusterRowOrder", (ClusterRowOrder == null) ? "" : ClusterRowOrder); key.put("ClusterColumnOrder", (ClusterColumnOrder == null) ? "" : ClusterColumnOrder); key.put("heatmapState", (heatmapState == null) ? "" : heatmapState); key.put("heatmapAxis", (heatmapAxis == null) ? "" : heatmapAxis); key.put("colorScheme", (colorScheme == null) ? "" : colorScheme); key.put("filterOffset", (filterOffset == null) ? "" : filterOffset); long pk = (new Random()).nextLong(); SessionHandler.getInstance().set(SessionHandler.PREFIX + pk, jsonWriter.writeValueAsString(key)); response.setContentType("text/html"); PrintWriter writer = response.getWriter(); writer.write("" + pk); writer.close(); break; } case "getState": { String keyType = request.getParameter("keyType"); String pk = request.getParameter("random"); if ((pk != null) && (keyType != null)) { JSONArray results = new JSONArray(); JSONObject a = new JSONObject(); Map<String, String> key = jsonReader .readValue(SessionHandler.getInstance().get(SessionHandler.PREFIX + pk)); if (key != null) { a.put("sampleFilter", key.get("sampleFilter")); a.put("pageAt", key.get("pageAt")); a.put("regex", key.get("regex")); a.put("regexGN", key.get("regexGN")); a.put("upFold", key.get("upFold")); a.put("downFold", key.get("downFold")); a.put("upZscore", key.get("upZscore")); a.put("downZscore", key.get("downZscore")); a.put("significantGenes", key.get("significantGenes")); a.put("ClusterRowOrder", key.get("ClusterRowOrder")); a.put("ClusterColumnOrder", key.get("ClusterColumnOrder")); a.put("heatmapState", key.get("heatmapState")); a.put("heatmapAxis", key.get("heatmapAxis")); a.put("colorScheme", key.get("colorScheme")); a.put("filterOffset", key.get("filterOffset")); } results.add(a); response.setContentType("application/json"); PrintWriter writer = response.getWriter(); results.writeJSONString(writer); writer.close(); } break; } case "downloadFeatures": { String featureIds = request.getParameter("featureIds"); String fileFormat = request.getParameter("fileFormat"); String fileName = "Table_Gene"; List<String> tableHeader = DownloadHelper.getHeaderForFeatures(); List<String> tableField = DownloadHelper.getFieldsForFeatures(); JSONArray tableSource = new JSONArray(); DataApiHandler dataApi = new DataApiHandler(request); SolrQuery query = new SolrQuery("feature_id:(" + featureIds.replaceAll(",", " OR ") + ")"); query.setFields(StringUtils.join(DownloadHelper.getFieldsForFeatures(), ",")); query.setRows(dataApi.MAX_ROWS); LOGGER.trace("downloadFeatures: [{}] {}", SolrCore.FEATURE.getSolrCoreName(), query); final String apiResponse = dataApi.solrQuery(SolrCore.FEATURE, query); final Map resp = jsonReader.readValue(apiResponse); final Map respBody = (Map) resp.get("response"); final List<GenomeFeature> features = (List) dataApi.bindDocuments((List) respBody.get("docs"), GenomeFeature.class); for (final GenomeFeature feature : features) { tableSource.add(feature.toJSONObject()); } final ExcelHelper excel = new ExcelHelper("xssf", tableHeader, tableField, tableSource); excel.buildSpreadsheet(); if (fileFormat.equalsIgnoreCase("xlsx")) { response.setContentType("application/octetstream"); response.addProperty("Content-Disposition", "attachment; filename=\"" + fileName + "." + fileFormat + "\""); excel.writeSpreadsheettoBrowser(response.getPortletOutputStream()); } else { response.setContentType("application/octetstream"); response.addProperty("Content-Disposition", "attachment; filename=\"" + fileName + "." + fileFormat + "\""); response.getWriter().write(excel.writeToTextFile()); } } } } }
From source file:edu.vt.vbi.patric.proteinfamily.FIGfamData.java
License:Apache License
public String getGenomeIdsForTaxon(ResourceRequest request) throws IOException { String taxon = request.getParameter("taxonId"); String genomeFilter = request.getParameter("genomeFilter"); List<String> gIds = new ArrayList<>(); SolrQuery query = new SolrQuery("patric_cds:[1 TO *] AND taxon_lineage_ids:" + taxon); if (genomeFilter != null && !genomeFilter.equals("")) { query.addFilterQuery(genomeFilter); }/*from w w w. ja va2 s .co m*/ query.addField("genome_id"); query.setSort("genome_name", SolrQuery.ORDER.asc); query.setRows(DataApiHandler.MAX_ROWS); LOGGER.trace("getGenomeIdsForTaxon: [{}] {}", SolrCore.GENOME.getSolrCoreName(), query); String apiResponse = dataApiHandler.solrQuery(SolrCore.GENOME, query); Map resp = jsonReader.readValue(apiResponse); Map respBody = (Map) resp.get("response"); List<Genome> genomes = dataApiHandler.bindDocuments((List<Map>) respBody.get("docs"), Genome.class); for (final Genome g : genomes) { gIds.add(g.getId()); } // TODO: remove this when data API limit is removed if (gIds.size() == 25000) { query.setStart(25000); apiResponse = dataApiHandler.solrQuery(SolrCore.GENOME, query); resp = jsonReader.readValue(apiResponse); respBody = (Map) resp.get("response"); genomes = dataApiHandler.bindDocuments((List<Map>) respBody.get("docs"), Genome.class); for (final Genome g : genomes) { gIds.add(g.getId()); } } return StringUtils.join(gIds, ","); }
From source file:edu.vt.vbi.patric.proteinfamily.FIGfamData.java
License:Apache License
@SuppressWarnings("unchecked") public void getGroupStats(ResourceRequest request, PrintWriter writer) throws IOException { DataApiHandler dataApi = new DataApiHandler(request); JSONObject figfams = new JSONObject(); Set<String> figfamIdList = new HashSet<>(); List<String> genomeIdList = new LinkedList<>(); // get family Type final String familyType = request.getParameter("familyType"); final String familyId = familyType + "_id"; // get genome list in order String genomeIds = request.getParameter("genomeIds"); try {/*from ww w . java2s . c o m*/ SolrQuery query = new SolrQuery("genome_id:(" + genomeIds.replaceAll(",", " OR ") + ")"); query.addSort("genome_name", SolrQuery.ORDER.asc).addField("genome_id") .setRows(DataApiHandler.MAX_ROWS); LOGGER.trace("[{}] {}", SolrCore.GENOME.getSolrCoreName(), query); String apiResponse = dataApi.solrQuery(SolrCore.GENOME, query); Map resp = jsonReader.readValue(apiResponse); Map respBody = (Map) resp.get("response"); List<Genome> genomes = dataApi.bindDocuments((List<Map>) respBody.get("docs"), Genome.class); for (final Genome genome : genomes) { genomeIdList.add(genome.getId()); } if (genomeIdList.size() == 25000) { query.setStart(25000); apiResponse = dataApi.solrQuery(SolrCore.GENOME, query); resp = jsonReader.readValue(apiResponse); respBody = (Map) resp.get("response"); genomes = dataApi.bindDocuments((List<Map>) respBody.get("docs"), Genome.class); for (final Genome genome : genomes) { genomeIdList.add(genome.getId()); } } } catch (IOException e) { LOGGER.error(e.getMessage(), e); } // LOGGER.debug("genomeIdList: {}", genomeIdList); // getting genome counts per figfamID (figfam) // {stat:{field:{field:figfam_id,limit:-1,facet:{min:"min(aa_length)",max:"max(aa_length)",mean:"avg(aa_length)",ss:"sumsq(aa_length)",sum:"sum(aa_length)",dist:"percentile(aa_length,50,75,99,99.9)",field:{field:genome_id}}}}} try { long start = System.currentTimeMillis(); SolrQuery query = new SolrQuery("annotation:PATRIC AND feature_type:CDS"); // query.addFilterQuery("end:[3200 TO 4300] OR end:[4400 TO 4490] OR end:[4990 TO 4999]"); query.addFilterQuery(getSolrQuery(request)); query.addFilterQuery("!" + familyId + ":\"\""); query.setRows(0).setFacet(true).set("facet.threads", 15); query.add("json.facet", "{stat:{type:field,field:genome_id,limit:-1,facet:{figfams:{type:field,field:" + familyId + ",limit:-1,sort:{index:asc}}}}}"); LOGGER.trace("getGroupStats() 1/3: [{}] {}", SolrCore.FEATURE.getSolrCoreName(), query); String apiResponse = dataApi.solrQuery(SolrCore.FEATURE, query); long point = System.currentTimeMillis(); LOGGER.debug("1st query: {} ms", (point - start)); start = point; Map resp = jsonReader.readValue(apiResponse); Map facets = (Map) resp.get("facets"); Map stat = (Map) facets.get("stat"); final Map<String, String> figfamGenomeIdStr = new LinkedHashMap<>(); final Map<String, Integer> figfamGenomeCount = new LinkedHashMap<>(); final int genomeTotal = genomeIdList.size(); final Map<String, Integer> genomePosMap = new LinkedHashMap<>(); for (String genomeId : genomeIdList) { genomePosMap.put(genomeId, genomeIdList.indexOf(genomeId)); } final Map<String, List> figfamGenomeIdCountMap = new ConcurrentHashMap<>(); final Map<String, Set> figfamGenomeIdSet = new ConcurrentHashMap<>(); List<Map> genomeBuckets = (List<Map>) stat.get("buckets"); for (final Map bucket : genomeBuckets) { final String genomeId = (String) bucket.get("val"); final List<Map> figfamBucket = (List<Map>) ((Map) bucket.get("figfams")).get("buckets"); for (final Map figfam : figfamBucket) { final String figfamId = (String) figfam.get("val"); final String genomeCount = String.format("%02x", (Integer) figfam.get("count")); if (figfamGenomeIdCountMap.containsKey(figfamId)) { figfamGenomeIdCountMap.get(figfamId).set(genomePosMap.get(genomeId), genomeCount); } else { final List<String> genomeIdCount = new LinkedList<>(Collections.nCopies(genomeTotal, "00")); genomeIdCount.set(genomePosMap.get(genomeId), genomeCount); figfamGenomeIdCountMap.put(figfamId, genomeIdCount); } if (figfamGenomeIdSet.containsKey(figfamId)) { figfamGenomeIdSet.get(figfamId).add(genomeId); } else { final Set<String> genomeIdSet = new HashSet<>(); genomeIdSet.add(genomeId); figfamGenomeIdSet.put(figfamId, genomeIdSet); } } } for (String figfamId : figfamGenomeIdCountMap.keySet()) { final List genomeIdStr = figfamGenomeIdCountMap.get(figfamId); figfamGenomeIdStr.put(figfamId, StringUtils.join(genomeIdStr, "")); figfamGenomeCount.put(figfamId, figfamGenomeIdSet.get(figfamId).size()); } point = System.currentTimeMillis(); LOGGER.debug("1st query process : {} ms, figfamGenomeIdStr:{}, figfamGenomeCount:{}", (point - start), figfamGenomeIdStr.size(), figfamGenomeCount.size()); long start2nd = System.currentTimeMillis(); // 2nd query query.set("json.facet", "{stat:{type:field,field:" + familyId + ",limit:-1,facet:{min:\"min(aa_length)\",max:\"max(aa_length)\",mean:\"avg(aa_length)\",ss:\"sumsq(aa_length)\",sum:\"sum(aa_length)\"}}}"); LOGGER.trace("getGroupStats() 2/3: [{}] {}", SolrCore.FEATURE.getSolrCoreName(), query); apiResponse = dataApi.solrQuery(SolrCore.FEATURE, query); point = System.currentTimeMillis(); LOGGER.debug("2st query: {} ms", (point - start2nd)); start2nd = point; resp = jsonReader.readValue(apiResponse); facets = (Map) resp.get("facets"); stat = (Map) facets.get("stat"); List<Map> buckets = (List<Map>) stat.get("buckets"); for (Map bucket : buckets) { final String figfamId = (String) bucket.get("val"); final int count = (Integer) bucket.get("count"); double min, max, mean, sumsq, sum; if (bucket.get("min") instanceof Double) { min = (Double) bucket.get("min"); } else if (bucket.get("min") instanceof Integer) { min = ((Integer) bucket.get("min")).doubleValue(); } else { min = 0; } if (bucket.get("max") instanceof Double) { max = (Double) bucket.get("max"); } else if (bucket.get("max") instanceof Integer) { max = ((Integer) bucket.get("max")).doubleValue(); } else { max = 0; } if (bucket.get("mean") instanceof Double) { mean = (Double) bucket.get("mean"); } else if (bucket.get("mean") instanceof Integer) { mean = ((Integer) bucket.get("mean")).doubleValue(); } else { mean = 0; } if (bucket.get("ss") instanceof Double) { sumsq = (Double) bucket.get("ss"); } else if (bucket.get("ss") instanceof Integer) { sumsq = ((Integer) bucket.get("ss")).doubleValue(); } else { sumsq = 0; } if (bucket.get("sum") instanceof Double) { sum = (Double) bucket.get("sum"); } else if (bucket.get("sum") instanceof Integer) { sum = ((Integer) bucket.get("sum")).doubleValue(); } else { sum = 0; } // LOGGER.debug("bucket:{}, sumsq:{}, count: {}", bucket, sumsq, count); double std; if (count > 1) { // std = Math.sqrt(sumsq / (count - 1)); final double realSq = sumsq - (sum * sum) / count; std = Math.sqrt(realSq / (count - 1)); } else { std = 0; } final JSONObject aaLength = new JSONObject(); aaLength.put("min", min); aaLength.put("max", max); aaLength.put("mean", mean); aaLength.put("stddev", std); figfamIdList.add(figfamId); final JSONObject figfam = new JSONObject(); figfam.put("genomes", figfamGenomeIdStr.get(figfamId)); figfam.put("genome_count", figfamGenomeCount.get(figfamId)); figfam.put("feature_count", count); figfam.put("stats", aaLength); figfams.put(figfamId, figfam); } point = System.currentTimeMillis(); LOGGER.debug("2st query process: {} ms", (point - start2nd)); } catch (IOException e) { LOGGER.error(e.getMessage(), e); } // getting distinct figfam_product if (!figfamIdList.isEmpty()) { figfamIdList.remove(""); try { SolrQuery query = new SolrQuery("family_id:(" + StringUtils.join(figfamIdList, " OR ") + ")"); query.addFilterQuery("family_type:" + familyType); query.addField("family_id,family_product").setRows(figfamIdList.size()); LOGGER.debug("getGroupStats() 3/3: [{}] {}", SolrCore.FIGFAM_DIC.getSolrCoreName(), query); String apiResponse = dataApi.solrQuery(SolrCore.FIGFAM_DIC, query); Map resp = jsonReader.readValue(apiResponse); Map respBody = (Map) resp.get("response"); List<Map> sdl = (List<Map>) respBody.get("docs"); for (final Map doc : sdl) { final JSONObject figfam = (JSONObject) figfams.get(doc.get("family_id")); figfam.put("description", doc.get("family_product")); figfams.put(doc.get("family_id").toString(), figfam); } int i = 1; while (sdl.size() == 25000) { query.setStart(25000 * i); apiResponse = dataApi.solrQuery(SolrCore.FIGFAM_DIC, query); resp = jsonReader.readValue(apiResponse); respBody = (Map) resp.get("response"); sdl = (List<Map>) respBody.get("docs"); for (final Map doc : sdl) { final JSONObject figfam = (JSONObject) figfams.get(doc.get("family_id")); figfam.put("description", doc.get("family_product")); figfams.put(doc.get("family_id").toString(), figfam); } i++; } } catch (IOException e) { LOGGER.error(e.getMessage(), e); LOGGER.debug("::getGroupStats() 3/3, params: {}", request.getParameterMap().toString()); } figfams.writeJSONString(writer); } }
From source file:eu.annocultor.converters.solr.SolrDocumentTagger.java
License:Apache License
public int tag() throws Exception { int recordsPassed = 0; for (int page = 0; true; page++) { int queryStart = page * DOCUMENTS_PER_READ; int queryEnd = queryStart + DOCUMENTS_PER_READ; if (queryEnd > start) { SolrQuery solrQuery = new SolrQuery(query); solrQuery.setStart(queryStart); solrQuery.setRows(DOCUMENTS_PER_READ); if (FIELDS_TO_EXTRACT.length > 0) { solrQuery.setFields(FIELDS_TO_EXTRACT); }//from ww w. j av a 2 s . co m QueryResponse response = solrServerFrom.query(solrQuery); SolrDocumentList sourceDocs = response.getResults(); System.out.println("retrieved document query OK: " + sourceDocs.getNumFound()); if (sourceDocs.isEmpty() || page > MAX_PAGES_TO_TAG) { log.flush(); report(); return recordsPassed; } int retry = 0; while (retry < MAX_RETRIES) { try { List<SolrInputDocument> destDocs = new ArrayList<SolrInputDocument>(); tagDocumentList(sourceDocs, destDocs); recordsPassed += sourceDocs.size(); System.out.println("Let's try"); solrServerTo.add(destDocs); log.println(new Date() + "Completed " + recordsPassed + " SOLR documents"); log.println("Passed " + originalWordCount + " original words and added " + enrichmentWordCount + " enrichment words"); break; } catch (Exception e) { e.printStackTrace(); log.println(new Date() + "Stopped at document " + recordsPassed); log.println(new Date() + "Retry " + retry + " failed, keep trying"); log.flush(); Thread.sleep(60000 * retry); retry++; } finally { log.flush(); } } if (retry >= MAX_RETRIES) { throw new Exception("Failed completely."); } } else { log.println("Skipping page " + page); log.flush(); } } }
From source file:eu.annocultor.data.sources.SolrServerDataSource.java
License:Apache License
@Override protected boolean parseQuery(DefaultHandler handler, String query, Path recordSeparatingPath, Path recordIdentifyingPath) throws Exception { ConverterHandlerDataObjects flatHandler = makeHandler(handler, recordSeparatingPath); boolean passedARecord = false; SolrQuery solrQuery = new SolrQuery(); solrQuery.setQueryType("advanced"); solrQuery.setQuery(query);// w w w . j av a2 s . c om solrQuery.setRows(500); solrQuery.setStart(0); solrQuery.setParam("spellcheck", false); System.out.println("query: " + solrQuery); QueryResponse response = server.query(solrQuery); System.out.println(response.getResponseHeader()); System.out.println(response.getResults().size()); for (SolrDocument doc : response.getResults()) { flatHandler.startDocument(); passedARecord = true; String id = doc.getFirstValue(idField).toString(); flatHandler.attemptDataObjectChange(id); for (String fieldName : doc.getFieldNames()) { for (Object value : doc.getFieldValues(fieldName)) { String preprocessedValue = preprocessValue(fieldName, value.toString()); if (preprocessedValue != null) { flatHandler.addField(fieldName, new LiteralValue(preprocessedValue)); System.out.println(id + "-" + fieldName + "-" + preprocessedValue); } } } flatHandler.endDocument(); } return passedARecord; }
From source file:eu.clarin.cmdi.vlo.importer.MetadataImporter.java
/** * Update "days since last import" field for all Solr records of dataRoot. * Notice that it will not touch records that have a "last seen" value newer * than today. Therefore this should be called <em>after</em> normal * processing of data root!//w w w . j a v a2 s .c o m * * @param dataRoot * @throws SolrServerException * @throws IOException */ private void updateDaysSinceLastImport(DataRoot dataRoot) throws SolrServerException, IOException { LOG.info("Updating \"days since last import\" in Solr for: {}", dataRoot.getOriginName()); SolrQuery query = new SolrQuery(); query.setQuery( //we're going to process all records in the current data root... FacetConstants.FIELD_DATA_PROVIDER + ":" + ClientUtils.escapeQueryChars(dataRoot.getOriginName()) + " AND " // ...that have a "last seen" value _older_ than today (on update/initialisation all records get 0 so we can skip the rest) + FacetConstants.FIELD_LAST_SEEN + ":[* TO NOW-1DAY]"); query.setFields(FacetConstants.FIELD_ID, FacetConstants.FIELD_LAST_SEEN); int fetchSize = 1000; query.setRows(fetchSize); QueryResponse rsp = solrServer.query(query); final long totalResults = rsp.getResults().getNumFound(); final LocalDate nowDate = LocalDate.now(); final int docsListSize = config.getMaxDocsInList(); List<SolrInputDocument> updateDocs = new ArrayList<>(docsListSize); Boolean updatedDocs = false; int offset = 0; while (offset < totalResults) { query.setStart(offset); query.setRows(fetchSize); for (SolrDocument doc : solrServer.query(query).getResults()) { updatedDocs = true; String recordId = (String) doc.getFieldValue(FacetConstants.FIELD_ID); Date lastImportDate = (Date) doc.getFieldValue(FacetConstants.FIELD_LAST_SEEN); LocalDate oldDate = lastImportDate.toInstant().atZone(ZoneId.systemDefault()).toLocalDate(); long daysSinceLastSeen = DAYS.between(oldDate, nowDate); SolrInputDocument updateDoc = new SolrInputDocument(); updateDoc.setField(FacetConstants.FIELD_ID, recordId); Map<String, Long> partialUpdateMap = new HashMap<>(); partialUpdateMap.put("set", daysSinceLastSeen); updateDoc.setField(FacetConstants.FIELD_DAYS_SINCE_LAST_SEEN, partialUpdateMap); updateDocs.add(updateDoc); if (updateDocs.size() == docsListSize) { solrServer.add(updateDocs); if (serverError != null) { throw new SolrServerException(serverError); } updateDocs = new ArrayList<>(docsListSize); } } offset += fetchSize; LOG.info("Updating \"days since last import\": {} out of {} records updated", offset, totalResults); } if (!updateDocs.isEmpty()) { solrServer.add(updateDocs); if (serverError != null) { throw new SolrServerException(serverError); } } if (updatedDocs) { solrServer.commit(); } LOG.info("Updating \"days since last import\" done."); }
From source file:eu.europeana.core.BeanQueryModelFactory.java
License:EUPL
@Override public List<?> getDocIdList(Map<String, String[]> params, Locale locale) throws EuropeanaQueryException, SolrServerException { SolrQuery solrQuery = createFromQueryParams(params, locale); Integer start = solrQuery.getStart(); if (start > 1) { solrQuery.setStart(start - 2); }//from ww w . j ava 2 s . c o m solrQuery.setRows(3); solrQuery.setFields("europeana_uri"); // Fetch results from server final PortalTheme theme = ThemeFilter.getTheme(); if (theme != null) { solrServer.setBaseURL(theme.getSolrSelectUrl()); } QueryResponse queryResponse = solrServer.query(solrQuery); // fetch beans return getDocIdsFromQueryResponse(queryResponse); }
From source file:eu.europeana.core.BeanQueryModelFactory.java
License:EUPL
/** * Get records from Sorl for a particular collection for the siteMap. * * @param europeanaCollectionName the europeana collectionName as stored in the EuropeanaCollection Domain object * @param rowsReturned number of rows to be returned from Solr * @param pageNumber which page of the sitemap per collection will be returned. * @return list of IdBeans/*w w w . j av a 2 s . c o m*/ * @throws EuropeanaQueryException * @throws SolrServerException */ @Override public SiteMapBeanView getSiteMapBeanView(String europeanaCollectionName, int rowsReturned, int pageNumber) throws EuropeanaQueryException, SolrServerException { SolrQuery solrQuery = new SolrQuery("PROVIDER:\"" + europeanaCollectionName + "\""); solrQuery.setRows(rowsReturned); solrQuery.setFields("europeana_uri", "timestamp"); solrQuery.setStart(pageNumber * rowsReturned); final PortalTheme theme = ThemeFilter.getTheme(); if (theme != null) { solrServer.setBaseURL(theme.getSolrSelectUrl()); } QueryResponse queryResponse = solrServer.query(solrQuery); return new SiteMapBeanViewImpl(europeanaCollectionName, queryResponse, rowsReturned); }
From source file:eu.europeana.core.BeanQueryModelFactory.java
License:EUPL
private QueryResponse getSolrResponseFromServer(SolrQuery solrQuery, boolean decrementStart) throws EuropeanaQueryException { if (solrQuery.getStart() != null && solrQuery.getStart() < 0) { solrQuery.setStart(0); log.warn("Solr Start cannot be negative"); }/*from w w w. j a v a 2 s .c o m*/ // solr query is 0 based if (decrementStart && solrQuery.getStart() != null && solrQuery.getStart() > 0) { solrQuery.setStart(solrQuery.getStart() - 1); } QueryResponse queryResponse; // todo: add view limitation to query try { final PortalTheme theme = ThemeFilter.getTheme(); if (theme != null) { solrServer.setBaseURL(theme.getSolrSelectUrl()); } queryResponse = solrServer.query(solrQuery); } catch (SolrException e) { log.error("unable to execute SolrQuery", e); throw new EuropeanaQueryException(QueryProblem.MALFORMED_QUERY.toString(), e); } catch (SolrServerException e) { //todo determine which errors the SolrServer can throw log.error("Unable to fetch result", e); if (e.getMessage().equalsIgnoreCase("Error executing query")) { throw new EuropeanaQueryException(QueryProblem.MALFORMED_QUERY.toString(), e); } else { throw new EuropeanaQueryException(QueryProblem.SOLR_UNREACHABLE.toString(), e); } } return queryResponse; }
From source file:eu.europeana.core.BeanQueryModelFactory.java
License:EUPL
@Override public QueryResponse getSolrResponse(SolrQuery solrQuery, boolean isBriefDoc, Map<String, String[]> params) throws EuropeanaQueryException { // add bean to ??? // since we make a defensive copy before the start is decremented we must do it here if (solrQuery.getStart() != null && solrQuery.getStart() > 0) { solrQuery.setStart(solrQuery.getStart() - 1); }// w ww. j a v a2s . co m // set facets if (isBriefDoc) { // only show spelling-suggestion on the first result page if ((solrQuery.getStart() == null || solrQuery.getStart() == 0) && solrQuery.getFilterQueries() == null) { // give spelling suggestions solrQuery.setParam("spellcheck", true); solrQuery.setParam("spellcheck.collate", true); solrQuery.setParam("spellcheck.extendedResults", true); solrQuery.setParam("spellcheck.onlyMorePopular", true); // solrQuery.setParam("spellcheck.count", "4"); } solrQuery.setFacet(true); solrQuery.setFacetMinCount(1); //solrQuery.setFacetLimit(100); solr default is 100 so doesn't need to be set explicitly if (solrQuery.getRows() == null) { solrQuery.setRows(12); } solrQuery.addFacetField(ThemeFilter.getTheme().getRecordDefinition().getFacetFieldStrings()); // todo now hard-coded but these values must be retrieved from the RecordDefinition later if (solrQuery.getFields() == null) { solrQuery.setFields( "europeana_uri,dc_title,europeana_object,dc_creator,europeana_year,europeana_provider," + "europeana_dataProvider,europeana_language,europeana_type,dc_description,dc_type"); // solrQuery.setFields("*,score"); // solrQuery.setFields(metadataModel.getRecordDefinition().getFieldStrings()); } if (solrQuery.getQueryType().equalsIgnoreCase(QueryType.SIMPLE_QUERY.toString())) { solrQuery.setQueryType(queryAnalyzer .findSolrQueryType(solrQuery.getQuery(), ThemeFilter.getTheme().getRecordDefinition()) .toString()); } } SolrQuery dCopy = addHiddenQueryFilters(solrQuery, params); return getSolrResponseFromServer(dCopy, false); }