List of usage examples for org.apache.solr.client.solrj.response FacetField getValueCount
public int getValueCount()
From source file:at.pagu.soldockr.core.ResultHelper.java
License:Apache License
static Map<Field, Page<FacetEntry>> convertFacetQueryResponseToFacetPageMap(FacetQuery query, QueryResponse response) {// w w w .j ava2s . co m Assert.notNull(query, "Cannot convert response for 'null', query"); if (!query.hasFacetOptions() || response == null) { return Collections.emptyMap(); } Map<Field, Page<FacetEntry>> facetResult = new HashMap<Field, Page<FacetEntry>>(); if (CollectionUtils.isNotEmpty(response.getFacetFields())) { int initalPageSize = query.getFacetOptions().getPageable().getPageSize(); for (FacetField facetField : response.getFacetFields()) { if (facetField != null && StringUtils.isNotBlank(facetField.getName())) { Field field = new SimpleField(facetField.getName()); if (CollectionUtils.isNotEmpty(facetField.getValues())) { List<FacetEntry> pageEntries = new ArrayList<FacetEntry>(initalPageSize); for (Count count : facetField.getValues()) { if (count != null) { pageEntries.add(new SimpleFacetEntry(field, count.getName(), count.getCount())); } } facetResult.put(field, new FacetPage<FacetEntry>(pageEntries, query.getFacetOptions().getPageable(), facetField.getValueCount())); } else { facetResult.put(field, new FacetPage<FacetEntry>(Collections.<FacetEntry>emptyList(), query.getFacetOptions().getPageable(), 0)); } } } } return facetResult; }
From source file:au.org.ala.biocache.dao.SearchDAOImpl.java
License:Open Source License
/** * Writes the values for the first supplied facet to output stream * * @param includeCount true when the count should be included in the download * @param lookupName true when a name lsid should be looked up in the bie *//*from ww w. j a v a2 s . c o m*/ public void writeFacetToStream(SpatialSearchRequestParams searchParams, boolean includeCount, boolean lookupName, boolean includeSynonyms, boolean includeLists, OutputStream out, DownloadDetailsDTO dd) throws Exception { //set to unlimited facets searchParams.setFlimit(-1); queryFormatUtils.formatSearchQuery(searchParams); String queryString = searchParams.getFormattedQuery(); SolrQuery solrQuery = initSolrQuery(searchParams, false, null); solrQuery.setQuery(queryString); //don't want any results returned solrQuery.setRows(0); searchParams.setPageSize(0); solrQuery.setFacetLimit(FACET_PAGE_SIZE); int offset = 0; boolean shouldLookup = lookupName && (searchParams.getFacets()[0].contains("_guid") || searchParams.getFacets()[0].contains("_lsid")); if (dd != null) { dd.resetCounts(); } QueryResponse qr = runSolrQuery(solrQuery, searchParams); if (logger.isDebugEnabled()) { logger.debug("Retrieved facet results from server..."); } if (qr.getResults().getNumFound() > 0) { FacetField ff = qr.getFacetField(searchParams.getFacets()[0]); //write the header line if (ff != null) { String[] header = new String[] { ff.getName() }; // out.write(ff.getName().getBytes()); if (shouldLookup) { header = speciesLookupService.getHeaderDetails(ff.getName(), includeCount, includeSynonyms); } else if (includeCount) { //out.write(",Count".getBytes()); header = (String[]) ArrayUtils.add(header, "count"); } if (includeLists) { header = (String[]) ArrayUtils.addAll(header, listsService.getTypes().toArray(new String[] {})); } CSVRecordWriter writer = new CSVRecordWriter(new CloseShieldOutputStream(out), header); try { boolean addedNullFacet = false; //out.write("\n".getBytes()); //PAGE through the facets until we reach the end. //do not continue when null facet is already added and the next facet is only null while (ff.getValueCount() > 1 || !addedNullFacet || (ff.getValueCount() == 1 && ff.getValues().get(0).getName() != null)) { //process the "species_guid_ facet by looking up the list of guids if (shouldLookup) { List<String> guids = new ArrayList<String>(); List<Long> counts = new ArrayList<Long>(); List<String[]> speciesLists = new ArrayList<String[]>(); if (logger.isDebugEnabled()) { logger.debug("Downloading " + ff.getValueCount() + " species guids"); } for (FacetField.Count value : ff.getValues()) { //only add null facet once if (value.getName() == null) addedNullFacet = true; if (value.getCount() == 0 || (value.getName() == null && addedNullFacet)) continue; guids.add(value.getName()); if (includeCount) { counts.add(value.getCount()); } //Only want to send a sub set of the list so that the URI is not too long for BIE if (guids.size() == 30) { //now get the list of species from the web service TODO may need to move this code //handle null values being returned from the service... writeTaxonDetailsToStream(guids, counts, includeCount, includeSynonyms, includeLists, writer); guids.clear(); counts.clear(); } } //now write any guids that remain at the end of the looping writeTaxonDetailsToStream(guids, counts, includeCount, includeSynonyms, includeLists, writer); } else { //default processing of facets for (FacetField.Count value : ff.getValues()) { //only add null facet once if (value.getName() == null) addedNullFacet = true; if (value.getCount() == 0 || (value.getName() == null && addedNullFacet)) continue; String name = value.getName() != null ? value.getName() : ""; String[] row = includeCount ? new String[] { name, Long.toString(value.getCount()) } : new String[] { name }; writer.write(row); } } offset += FACET_PAGE_SIZE; if (dd != null) { dd.updateCounts(FACET_PAGE_SIZE); } //get the next values solrQuery.remove("facet.offset"); solrQuery.add("facet.offset", Integer.toString(offset)); qr = runSolrQuery(solrQuery, searchParams); ff = qr.getFacetField(searchParams.getFacets()[0]); } } finally { writer.finalise(); } } } }
From source file:au.org.ala.biocache.dao.SearchDAOImpl.java
License:Open Source License
/** * Writes all the distinct latitude and longitude in the index to the supplied * output stream./* w w w.j a va 2 s . co m*/ * * @param out * @throws Exception */ public void writeCoordinatesToStream(SearchRequestParams searchParams, OutputStream out) throws Exception { //generate the query to obtain the lat,long as a facet SearchRequestParams srp = new SearchRequestParams(); SearchUtils.setDefaultParams(srp); srp.setFacets(searchParams.getFacets()); SolrQuery solrQuery = initSolrQuery(srp, false, null); //We want all the facets so we can dump all the coordinates solrQuery.setFacetLimit(-1); solrQuery.setFacetSort("count"); solrQuery.setRows(0); solrQuery.setQuery(searchParams.getQ()); QueryResponse qr = runSolrQuery(solrQuery, srp); if (qr.getResults().size() > 0) { FacetField ff = qr.getFacetField(searchParams.getFacets()[0]); if (ff != null && ff.getValueCount() > 0) { out.write("latitude,longitude\n".getBytes()); //write the facets to file for (FacetField.Count value : ff.getValues()) { //String[] slatlon = value.getName().split(","); if (value.getName() != null && value.getCount() > 0) { out.write(value.getName().getBytes()); out.write("\n".getBytes()); } } } } }
From source file:au.org.ala.biocache.dao.SearchDAOImpl.java
License:Open Source License
/** * Writes the index fields to the supplied output stream in CSV format. * <p>/* w w w . jav a2s . c o m*/ * DM: refactored to split the query by month to improve performance. * Further enhancements possible: * 1) Multi threaded * 2) More filtering, by year or decade.. * * @param downloadParams * @param out * @param includeSensitive * @param dd The details of the download * @param checkLimit * @param nextExecutor The ExecutorService to use to process results on different threads * @throws Exception */ @Override public ConcurrentMap<String, AtomicInteger> writeResultsFromIndexToStream( final DownloadRequestParams downloadParams, final OutputStream out, final boolean includeSensitive, final DownloadDetailsDTO dd, boolean checkLimit, final ExecutorService nextExecutor) throws Exception { expandRequestedFields(downloadParams, true); if (dd != null) { dd.resetCounts(); } long start = System.currentTimeMillis(); final ConcurrentMap<String, AtomicInteger> uidStats = new ConcurrentHashMap<>(); getServer(); try { SolrQuery solrQuery = new SolrQuery(); queryFormatUtils.formatSearchQuery(downloadParams); String dFields = downloadParams.getFields(); if (includeSensitive) { //include raw latitude and longitudes if (dFields.contains("decimalLatitude.p")) { dFields = dFields.replaceFirst("decimalLatitude.p", "sensitive_latitude,sensitive_longitude,decimalLatitude.p"); } else if (dFields.contains("decimalLatitude")) { dFields = dFields.replaceFirst("decimalLatitude", "sensitive_latitude,sensitive_longitude,decimalLatitude"); } if (dFields.contains(",locality,")) { dFields = dFields.replaceFirst(",locality,", ",locality,sensitive_locality,"); } if (dFields.contains(",locality.p,")) { dFields = dFields.replaceFirst(",locality.p,", ",locality.p,sensitive_locality,"); } } StringBuilder sb = new StringBuilder(dFields); if (!downloadParams.getExtra().isEmpty()) { sb.append(",").append(downloadParams.getExtra()); } String[] requestedFields = sb.toString().split(","); List<String>[] indexedFields; if (downloadFields == null) { //default to include everything java.util.List<String> mappedNames = new java.util.LinkedList<String>(); for (int i = 0; i < requestedFields.length; i++) mappedNames.add(requestedFields[i]); indexedFields = new List[] { mappedNames, new java.util.LinkedList<String>(), mappedNames, mappedNames, new ArrayList(), new ArrayList() }; } else { indexedFields = downloadFields.getIndexFields(requestedFields, downloadParams.getDwcHeaders(), downloadParams.getLayersServiceUrl()); } //apply custom header String[] customHeader = dd.getRequestParams().getCustomHeader().split(","); for (int i = 0; i + 1 < customHeader.length; i += 2) { for (int j = 0; j < indexedFields[0].size(); j++) { if (customHeader[i].equals(indexedFields[0].get(j))) { indexedFields[2].set(j, customHeader[i + 1]); } } for (int j = 0; j < indexedFields[4].size(); j++) { if (customHeader[i].equals(indexedFields[5].get(j))) { indexedFields[4].set(j, customHeader[i + 1]); } } } if (logger.isDebugEnabled()) { logger.debug("Fields included in download: " + indexedFields[0]); logger.debug("Fields excluded from download: " + indexedFields[1]); logger.debug("The headers in downloads: " + indexedFields[2]); logger.debug("Analysis headers: " + indexedFields[4]); logger.debug("Analysis fields: " + indexedFields[5]); } //set the fields to the ones that are available in the index String[] fields = indexedFields[0].toArray(new String[] {}); solrQuery.setFields(fields); StringBuilder qasb = new StringBuilder(); if (!"none".equals(downloadParams.getQa())) { solrQuery.addField("assertions"); if (!"all".equals(downloadParams.getQa()) && !"includeall".equals(downloadParams.getQa())) { //add all the qa fields qasb.append(downloadParams.getQa()); } } solrQuery.addField("institution_uid").addField("collection_uid").addField("data_resource_uid") .addField("data_provider_uid"); solrQuery.setQuery(downloadParams.getFormattedQuery()); solrQuery.setFacetMinCount(1); solrQuery.setFacetLimit(-1); //get the assertion facets to add them to the download fields boolean getAssertionsFromFacets = "all".equals(downloadParams.getQa()) || "includeall".equals(downloadParams.getQa()); SolrQuery monthAssertionsQuery = getAssertionsFromFacets ? solrQuery.getCopy().addFacetField("month", "assertions") : solrQuery.getCopy().addFacetField("month"); if (getAssertionsFromFacets) { //set the order for the facet to be based on the index - this will force the assertions to be returned in the same order each time //based on alphabetical sort. The number of QA's may change between searches so we can't guarantee that the order won't change monthAssertionsQuery.add("f.assertions.facet.sort", "index"); } QueryResponse facetQuery = runSolrQuery(monthAssertionsQuery, downloadParams.getFormattedFq(), 0, 0, "score", "asc"); //set the totalrecords for the download details dd.setTotalRecords(facetQuery.getResults().getNumFound()); //use a separately configured and smaller limit when output will be unzipped final long maxDownloadSize; if (MAX_DOWNLOAD_SIZE > unzippedLimit && out instanceof OptionalZipOutputStream && ((OptionalZipOutputStream) out).getType() == OptionalZipOutputStream.Type.unzipped) { maxDownloadSize = unzippedLimit; } else { maxDownloadSize = MAX_DOWNLOAD_SIZE; } if (checkLimit && dd.getTotalRecords() < maxDownloadSize) { checkLimit = false; } //get the month facets to add them to the download fields get the assertion facets. List<Count> splitByFacet = null; for (FacetField facet : facetQuery.getFacetFields()) { if (facet.getName().equals("assertions") && facet.getValueCount() > 0) { qasb.append(getQAFromFacet(facet)); } if (facet.getName().equals("month") && facet.getValueCount() > 0) { splitByFacet = facet.getValues(); } } if ("includeall".equals(downloadParams.getQa())) { qasb = getAllQAFields(); } String qas = qasb.toString(); //include sensitive fields in the header when the output will be partially sensitive final String[] sensitiveFields; final String[] notSensitiveFields; if (dd.getSensitiveFq() != null) { List<String>[] sensitiveHdr = downloadFields.getIndexFields(sensitiveSOLRHdr, downloadParams.getDwcHeaders(), downloadParams.getLayersServiceUrl()); //header for the output file indexedFields[2].addAll(sensitiveHdr[2]); //lookup for fields from sensitive queries sensitiveFields = org.apache.commons.lang3.ArrayUtils.addAll( indexedFields[0].toArray(new String[] {}), sensitiveHdr[0].toArray(new String[] {})); //use general fields when sensitive data is not permitted notSensitiveFields = org.apache.commons.lang3.ArrayUtils .addAll(indexedFields[0].toArray(new String[] {}), notSensitiveSOLRHdr); } else { sensitiveFields = new String[0]; notSensitiveFields = fields; } //add analysis headers indexedFields[2].addAll(indexedFields[4]); final String[] analysisFields = indexedFields[5].toArray(new String[0]); final String[] qaFields = qas.equals("") ? new String[] {} : qas.split(","); String[] qaTitles = downloadFields.getHeader(qaFields, false, false); String[] header = org.apache.commons.lang3.ArrayUtils.addAll(indexedFields[2].toArray(new String[] {}), qaTitles); //retain output header fields and field names for inclusion of header info in the download StringBuilder infoFields = new StringBuilder("infoFields"); for (String h : indexedFields[3]) infoFields.append(",").append(h); for (String h : qaFields) infoFields.append(",").append(h); StringBuilder infoHeader = new StringBuilder("infoHeaders"); for (String h : header) infoHeader.append(",").append(h); String info = infoFields.toString(); while (info.contains(",,")) info = info.replace(",,", ","); uidStats.put(info, new AtomicInteger(-1)); String hdr = infoHeader.toString(); while (hdr.contains(",,")) hdr = hdr.replace(",,", ","); uidStats.put(hdr, new AtomicInteger(-2)); //construct correct RecordWriter based on the supplied fileType final RecordWriterError rw = downloadParams.getFileType().equals("csv") ? new CSVRecordWriter(out, header, downloadParams.getSep(), downloadParams.getEsc()) : (downloadParams.getFileType().equals("tsv") ? new TSVRecordWriter(out, header) : new ShapeFileRecordWriter(tmpShapefileDir, downloadParams.getFile(), out, (String[]) ArrayUtils.addAll(fields, qaFields))); // Requirement to be able to propagate interruptions to all other threads for this execution // Doing this via this variable final AtomicBoolean interruptFound = dd != null ? dd.getInterrupt() : new AtomicBoolean(false); // Create a fixed length blocking queue for buffering results before they are written // This also creates a push-back effect to throttle the results generating threads // when it fills and offers to it are delayed until the writer consumes elements from the queue final BlockingQueue<String[]> queue = new ArrayBlockingQueue<>(resultsQueueLength); // Create a sentinel that we can check for reference equality to signal the end of the queue final String[] sentinel = new String[0]; // An implementation of RecordWriter that adds to an in-memory queue final RecordWriter concurrentWrapper = new RecordWriter() { private AtomicBoolean finalised = new AtomicBoolean(false); private AtomicBoolean finalisedComplete = new AtomicBoolean(false); @Override public void write(String[] nextLine) { try { if (Thread.currentThread().isInterrupted() || interruptFound.get() || finalised.get()) { finalise(); return; } while (!queue.offer(nextLine, writerTimeoutWaitMillis, TimeUnit.MILLISECONDS)) { if (Thread.currentThread().isInterrupted() || interruptFound.get() || finalised.get()) { finalise(); break; } } } catch (InterruptedException e) { Thread.currentThread().interrupt(); interruptFound.set(true); if (logger.isDebugEnabled()) { logger.debug( "Queue failed to accept the next record due to a thread interrupt, calling finalise the cleanup: ", e); } // If we were interrupted then we should call finalise to cleanup finalise(); } } @Override public void finalise() { if (finalised.compareAndSet(false, true)) { try { // Offer the sentinel at least once, even when the thread is interrupted while (!queue.offer(sentinel, writerTimeoutWaitMillis, TimeUnit.MILLISECONDS)) { // If the thread is interrupted then the queue may not have any active consumers, // so don't loop forever waiting for capacity in this case // The hard shutdown phase will use queue.clear to ensure that the // sentinel gets onto the queue at least once if (Thread.currentThread().isInterrupted() || interruptFound.get()) { break; } } } catch (InterruptedException e) { Thread.currentThread().interrupt(); interruptFound.set(true); if (logger.isDebugEnabled()) { logger.debug( "Queue failed to accept the sentinel in finalise due to a thread interrupt: ", e); } } finally { finalisedComplete.set(true); } } } @Override public boolean finalised() { return finalisedComplete.get(); } }; // A single thread that consumes elements put onto the queue until it sees the sentinel, finalising after the sentinel or an interrupt Runnable writerRunnable = new Runnable() { @Override public void run() { try { long counter = 0; while (true) { counter = counter + 1; if (Thread.currentThread().isInterrupted() || interruptFound.get()) { break; } String[] take = queue.take(); // Sentinel object equality check to see if we are done if (take == sentinel || Thread.currentThread().isInterrupted() || interruptFound.get()) { break; } // Otherwise write to the wrapped record writer rw.write(take); //test for errors. This can contain a flush so only test occasionally if (counter % resultsQueueLength == 0 && rw.hasError()) { throw RecordWriterException.newRecordWriterException(dd, downloadParams, true, rw); } } } catch (RecordWriterException e) { //no trace information is available to print for these errors logger.error(e.getMessage()); interruptFound.set(true); } catch (InterruptedException e) { Thread.currentThread().interrupt(); interruptFound.set(true); } catch (Exception e) { // Reuse interruptFound variable to signal that the writer had issues interruptFound.set(true); logger.error("Download writer failed.", e); } finally { rw.finalise(); } } }; Thread writerThread = new Thread(writerRunnable); writerThread.start(); try { if (rw instanceof ShapeFileRecordWriter) { dd.setHeaderMap(((ShapeFileRecordWriter) rw).getHeaderMappings()); } //order the query by _docid_ for faster paging solrQuery.addSortField("_docid_", ORDER.asc); //for each month create a separate query that pages through 500 records per page List<SolrQuery> queries = new ArrayList<SolrQuery>(); if (splitByFacet != null) { for (Count facet : splitByFacet) { if (facet.getCount() > 0) { SolrQuery splitByFacetQuery; //do not add remainderQuery here if (facet.getName() != null) { splitByFacetQuery = solrQuery.getCopy() .addFilterQuery(facet.getFacetField().getName() + ":" + facet.getName()); splitByFacetQuery.setFacet(false); queries.add(splitByFacetQuery); } } } if (splitByFacet.size() > 0) { SolrQuery remainderQuery = solrQuery.getCopy() .addFilterQuery("-" + splitByFacet.get(0).getFacetField().getName() + ":[* TO *]"); queries.add(0, remainderQuery); } } else { queries.add(0, solrQuery); } //split into sensitive and non-sensitive queries when // - not including all sensitive values // - there is a sensitive fq final List<SolrQuery> sensitiveQ = new ArrayList<SolrQuery>(); if (!includeSensitive && dd.getSensitiveFq() != null) { sensitiveQ.addAll( splitQueries(queries, dd.getSensitiveFq(), sensitiveSOLRHdr, notSensitiveSOLRHdr)); } //Set<Future<Integer>> futures = new HashSet<Future<Integer>>(); final AtomicInteger resultsCount = new AtomicInteger(0); final boolean threadCheckLimit = checkLimit; List<Callable<Integer>> solrCallables = new ArrayList<>(queries.size()); // execute each query, writing the results to stream for (final SolrQuery splitByFacetQuery : queries) { // define a thread Callable<Integer> solrCallable = new Callable<Integer>() { @Override public Integer call() throws Exception { int startIndex = 0; // Randomise the wakeup time so they don't all wakeup on a periodic cycle long localThrottle = throttle + Math.round(Math.random() * throttle); String[] fq = downloadParams.getFormattedFq(); if (splitByFacetQuery.getFilterQueries() != null && splitByFacetQuery.getFilterQueries().length > 0) { if (fq == null) { fq = new String[0]; } fq = org.apache.commons.lang3.ArrayUtils.addAll(fq, splitByFacetQuery.getFilterQueries()); } QueryResponse qr = runSolrQuery(splitByFacetQuery, fq, downloadBatchSize, startIndex, "_docid_", "asc"); AtomicInteger recordsForThread = new AtomicInteger(0); if (logger.isDebugEnabled()) { logger.debug( splitByFacetQuery.getQuery() + " - results: " + qr.getResults().size()); } while (qr != null && !qr.getResults().isEmpty() && !interruptFound.get()) { if (logger.isDebugEnabled()) { logger.debug( "Start index: " + startIndex + ", " + splitByFacetQuery.getQuery()); } int count = 0; if (sensitiveQ.contains(splitByFacetQuery)) { count = processQueryResults(uidStats, sensitiveFields, qaFields, concurrentWrapper, qr, dd, threadCheckLimit, resultsCount, maxDownloadSize, analysisFields); } else { // write non-sensitive values into sensitive fields when not authorised for their sensitive values count = processQueryResults(uidStats, notSensitiveFields, qaFields, concurrentWrapper, qr, dd, threadCheckLimit, resultsCount, maxDownloadSize, analysisFields); } recordsForThread.addAndGet(count); startIndex += downloadBatchSize; // we have already set the Filter query the first time the query was constructed // rerun with the same params but different startIndex if (!threadCheckLimit || resultsCount.get() < maxDownloadSize) { if (!threadCheckLimit) { // throttle the download by sleeping Thread.sleep(localThrottle); } qr = runSolrQuery(splitByFacetQuery, null, downloadBatchSize, startIndex, "_docid_", "asc"); } else { qr = null; } } return recordsForThread.get(); } }; solrCallables.add(solrCallable); } List<Future<Integer>> futures = new ArrayList<>(solrCallables.size()); for (Callable<Integer> nextCallable : solrCallables) { futures.add(nextExecutor.submit(nextCallable)); } // Busy wait because we need to be able to respond to an interrupt on any callable // and propagate it to all of the others for this particular query // Because the executor service is shared to prevent too many concurrent threads being run, // this requires a busy wait loop on the main thread to monitor state boolean waitAgain = false; do { waitAgain = false; for (Future<Integer> future : futures) { if (!future.isDone()) { // Wait again even if an interrupt flag is set, as it may have been set partway through the iteration // The calls to future.cancel will occur next time if the interrupt is setup partway through an iteration waitAgain = true; // If one thread finds an interrupt it is propagated to others using the interruptFound AtomicBoolean if (interruptFound.get()) { future.cancel(true); } } } // Don't trigger the timeout interrupt if we don't have to wait again as we are already done at this point if (waitAgain && (System.currentTimeMillis() - start) > downloadMaxTime) { interruptFound.set(true); break; } if (waitAgain) { Thread.sleep(downloadCheckBusyWaitSleep); } } while (waitAgain); AtomicInteger totalDownload = new AtomicInteger(0); for (Future<Integer> future : futures) { if (future.isDone()) { totalDownload.addAndGet(future.get()); } else { // All incomplete futures that survived the loop above are cancelled here future.cancel(true); } } long finish = System.currentTimeMillis(); long timeTakenInSecs = (finish - start) / 1000; if (timeTakenInSecs <= 0) timeTakenInSecs = 1; if (logger.isInfoEnabled()) { logger.info("Download of " + resultsCount + " records in " + timeTakenInSecs + " seconds. Record/sec: " + resultsCount.intValue() / timeTakenInSecs); } } finally { try { // Once we get here, we need to finalise starting at the concurrent wrapper, // as there are no more non-sentinel records to be added to the queue // This eventually triggers finalisation of the underlying writer when the queue empties // This is a soft shutdown, and hence we wait below for this stage to complete in normal circumstances // Note, this blocks for writerTimeoutWaitMillis trying to legitimately add the sentinel to the end of the queue // We force the sentinel to be added in the hard shutdown phase below concurrentWrapper.finalise(); } finally { try { // Track the current time right now so we can abort after downloadMaxCompletionTime milliseconds in this phase final long completionStartTime = System.currentTimeMillis(); // Busy wait check for finalised to be called in the RecordWriter or something is interrupted // By this stage, there are at maximum download.internal.queue.size items remaining (default 1000) while (writerThread.isAlive() && !writerThread.isInterrupted() && !interruptFound.get() && !Thread.currentThread().isInterrupted() && !rw.finalised() && !((System.currentTimeMillis() - completionStartTime) > downloadMaxCompletionTime)) { Thread.sleep(downloadCheckBusyWaitSleep); } } finally { try { // Attempt all actions that could trigger the writer thread to finalise, as by this stage we are in hard shutdown mode // Signal that we are in hard shutdown mode interruptFound.set(true); // Add the sentinel or clear the queue and try again until it gets onto the queue // We are in hard shutdown mode, so only priority is that the queue either // gets the sentinel or the thread is interrupted to clean up resources while (!queue.offer(sentinel)) { queue.clear(); } // Interrupt the single writer thread writerThread.interrupt(); // Explicitly call finalise on the RecordWriter as a backup // In normal circumstances it is called via the sentinel or the interrupt // This will not block if finalise has been called previously in the current three implementations rw.finalise(); } finally { if (rw != null && rw.hasError()) { throw RecordWriterException.newRecordWriterException(dd, downloadParams, true, rw); } else { // Flush whatever output was still pending for more deterministic debugging out.flush(); } } } } } } catch (SolrServerException ex) { logger.error("Problem communicating with SOLR server while processing download. " + ex.getMessage(), ex); } return uidStats; }
From source file:au.org.ala.biocache.dao.SearchDAOImpl.java
License:Open Source License
/** * Note - this method extracts from CASSANDRA rather than the Index. *///from w w w . j a v a2s.c o m public ConcurrentMap<String, AtomicInteger> writeResultsToStream(DownloadRequestParams downloadParams, OutputStream out, int i, boolean includeSensitive, DownloadDetailsDTO dd, boolean limit) throws Exception { expandRequestedFields(downloadParams, false); int resultsCount = 0; ConcurrentMap<String, AtomicInteger> uidStats = new ConcurrentHashMap<>(); //stores the remaining limit for data resources that have a download limit Map<String, Integer> downloadLimit = new HashMap<>(); try { SolrQuery solrQuery = initSolrQuery(downloadParams, false, null); //ensure that the qa facet is being ordered alphabetically so that the order is consistent. boolean getAssertionsFromFacets = "all".equals(downloadParams.getQa()) || "includeall".equals(downloadParams.getQa()); if (getAssertionsFromFacets) { //set the order for the facet to be based on the index - this will force the assertions to be returned in the same order each time //based on alphabetical sort. The number of QA's may change between searches so we can't guarantee that the order won't change solrQuery.add("f.assertions.facet.sort", "index"); } queryFormatUtils.formatSearchQuery(downloadParams); if (logger.isInfoEnabled()) { logger.info("search query: " + downloadParams.getFormattedQuery()); } solrQuery.setQuery(downloadParams.getFormattedQuery()); //Only the fields specified below will be included in the results from the SOLR Query solrQuery.setFields("row_key", "institution_uid", "collection_uid", "data_resource_uid", "data_provider_uid"); String dFields = downloadParams.getFields(); if (includeSensitive) { //include raw latitude and longitudes dFields = dFields .replaceFirst("decimalLatitude.p", "decimalLatitude,decimalLongitude,decimalLatitude.p") .replaceFirst(",locality,", ",locality,sensitive_locality,"); } StringBuilder sb = new StringBuilder(dFields); if (downloadParams.getExtra().length() > 0) { sb.append(",").append(downloadParams.getExtra()); } StringBuilder qasb = new StringBuilder(); solrQuery.setFacet(true); QueryResponse qr = runSolrQuery(solrQuery, downloadParams.getFormattedFq(), 0, 0, "_docid_", "asc"); dd.setTotalRecords(qr.getResults().getNumFound()); //get the assertion facets to add them to the download fields List<FacetField> facets = qr.getFacetFields(); for (FacetField facet : facets) { if (facet.getName().equals("assertions") && facet.getValueCount() > 0) { qasb.append(getQAFromFacet(facet)); } else if (facet.getName().equals("data_resource_uid") && checkDownloadLimits) { //populate the download limit initDownloadLimits(downloadLimit, facet); } } if ("includeall".equals(downloadParams.getQa())) { qasb = getAllQAFields(); } //Write the header line String qas = qasb.toString(); List<String>[] indexedFields = downloadFields.getIndexFields(downloadParams.getFields().split(","), false, downloadParams.getLayersServiceUrl()); String[] fields = sb.toString().split(","); //avoid analysis field duplicates for (String s : indexedFields[5]) fields = (String[]) ArrayUtils.removeElement(fields, s); String[] qaFields = qas.equals("") ? new String[] {} : qas.split(","); String[] qaTitles = downloadFields.getHeader(qaFields, false, false); String[] titles = downloadFields.getHeader(fields, true, downloadParams.getDwcHeaders()); String[] analysisHeaders = indexedFields[4].toArray(new String[0]); String[] analysisFields = indexedFields[5].toArray(new String[0]); //apply custom header String[] customHeader = dd.getRequestParams().getCustomHeader().split(","); for (i = 0; i + 1 < customHeader.length; i += 2) { for (int j = 0; j < analysisFields.length; j++) { if (customHeader[i].equals(analysisFields[j])) { analysisFields[j] = customHeader[i + 1]; } } for (int j = 0; j < qaFields.length; j++) { if (customHeader[i].equals(qaFields[j])) { qaTitles[j] = customHeader[i + 1]; } } for (int j = 0; j < fields.length; j++) { if (customHeader[i].equals(fields[j])) { titles[j] = customHeader[i + 1]; } } } //append sensitive fields for the header only if (!includeSensitive && dd.getSensitiveFq() != null) { //sensitive headers do not have a DwC name, always set getIndexFields dwcHeader=false List<String>[] sensitiveHdr = downloadFields.getIndexFields(sensitiveSOLRHdr, false, downloadParams.getLayersServiceUrl()); titles = org.apache.commons.lang3.ArrayUtils.addAll(titles, sensitiveHdr[2].toArray(new String[] {})); } String[] header = org.apache.commons.lang3.ArrayUtils .addAll(org.apache.commons.lang3.ArrayUtils.addAll(titles, qaTitles), analysisHeaders); //Create the Writer that will be used to format the records //construct correct RecordWriter based on the supplied fileType final RecordWriterError rw = downloadParams.getFileType().equals("csv") ? new CSVRecordWriter(out, header, downloadParams.getSep(), downloadParams.getEsc()) : (downloadParams.getFileType().equals("tsv") ? new TSVRecordWriter(out, header) : new ShapeFileRecordWriter(tmpShapefileDir, downloadParams.getFile(), out, (String[]) ArrayUtils.addAll(fields, qaFields))); try { if (rw instanceof ShapeFileRecordWriter) { dd.setHeaderMap(((ShapeFileRecordWriter) rw).getHeaderMappings()); } //retain output header fields and field names for inclusion of header info in the download StringBuilder infoFields = new StringBuilder("infoFields,"); for (String h : fields) infoFields.append(",").append(h); for (String h : analysisFields) infoFields.append(",").append(h); for (String h : qaFields) infoFields.append(",").append(h); StringBuilder infoHeader = new StringBuilder("infoHeaders,"); for (String h : header) infoHeader.append(",").append(h); String info = infoFields.toString(); while (info.contains(",,")) info = info.replace(",,", ","); uidStats.put(info, new AtomicInteger(-1)); String hdr = infoHeader.toString(); while (hdr.contains(",,")) hdr = hdr.replace(",,", ","); uidStats.put(hdr, new AtomicInteger(-2)); //download the records that have limits first... if (downloadLimit.size() > 0) { String[] originalFq = downloadParams.getFormattedFq(); StringBuilder fqBuilder = new StringBuilder("-("); for (String dr : downloadLimit.keySet()) { //add another fq to the search for data_resource_uid downloadParams.setFq((String[]) ArrayUtils.add(originalFq, "data_resource_uid:" + dr)); resultsCount = downloadRecords(downloadParams, rw, downloadLimit, uidStats, fields, qaFields, resultsCount, dr, includeSensitive, dd, limit, analysisFields); if (fqBuilder.length() > 2) { fqBuilder.append(" OR "); } fqBuilder.append("data_resource_uid:").append(dr); } fqBuilder.append(")"); //now include the rest of the data resources //add extra fq for the remaining records downloadParams.setFq((String[]) ArrayUtils.add(originalFq, fqBuilder.toString())); resultsCount = downloadRecords(downloadParams, rw, downloadLimit, uidStats, fields, qaFields, resultsCount, null, includeSensitive, dd, limit, analysisFields); } else { //download all at once downloadRecords(downloadParams, rw, downloadLimit, uidStats, fields, qaFields, resultsCount, null, includeSensitive, dd, limit, analysisFields); } } finally { rw.finalise(); } } catch (SolrServerException ex) { logger.error("Problem communicating with SOLR server. " + ex.getMessage(), ex); } return uidStats; }
From source file:au.org.ala.biocache.dao.SearchDAOImpl.java
License:Open Source License
private List<FacetResultDTO> getMultivalueFacetCounts(SolrQuery query, SpatialSearchRequestParams searchParams, List<String> facet) throws Exception { for (String s : facet) { query.addFacetField(s);/* www .j a v a 2 s .com*/ } int flimit = searchParams.getFlimit(); query.setFacetLimit(-1); QueryResponse response = runSolrQuery(query, searchParams); SearchResultDTO searchResults = processSolrResponse(searchParams, response, query, OccurrenceIndex.class); //include paged facets when flimit > 0 Collection<FacetResultDTO> facetResults = new ArrayList<FacetResultDTO>(); if (searchParams.getFlimit() > 0) { facetResults = searchResults.getFacetResults(); if (facetResults != null) { for (FacetResultDTO fr : facetResults) { if (fr.getFieldResult() != null && fr.getFieldResult().size() > 0) { for (FacetField ff : response.getFacetFields()) { fr.setCount(0); if (ff != null && StringUtils.equals(ff.getName(), fr.getFieldName())) { fr.setCount(ff.getValueCount()); } } //sort and apply limit and offset Collections.sort(fr.getFieldResult(), new Comparator<FieldResultDTO>() { @Override public int compare(FieldResultDTO o1, FieldResultDTO o2) { long result = o1.getCount() - o2.getCount(); if (result == 0) { return o1.getLabel() != null ? o1.getLabel().compareTo(o2.getLabel()) : (o2.getLabel() == null ? 0 : 1); } else { return result > 0 ? -1 : 1; } } }); int from = Math.min(fr.getFieldResult().size() - 1, searchParams.getFoffset()); int to = Math.min(fr.getFieldResult().size(), searchParams.getFoffset() + searchParams.getFlimit()); fr.setFieldResult(new ArrayList(fr.getFieldResult().subList(from, to))); } } } } else { //only return totals counts for (FacetField ff : response.getFacetFields()) { facetResults.add(new FacetResultDTO(ff.getName(), null, ff.getValueCount())); } } query.setFacetLimit(flimit); return new ArrayList<FacetResultDTO>(facetResults); }
From source file:au.org.ala.biocache.web.WMSController.java
License:Open Source License
ImgObj drawHighlight(SpatialSearchRequestParams requestParams, WmsEnv vars, PointType pointType, int width, int height, double[] pbbox, double width_mult, double height_mult, ImgObj imgObj, String[] originalFqs, String[] boundingBoxFqs, boolean is4326, double[] tilebbox) throws Exception { String[] fqs = new String[originalFqs.length + 3]; System.arraycopy(originalFqs, 0, fqs, 3, originalFqs.length); fqs[0] = vars.highlight;/*w w w.j a va 2 s.c o m*/ fqs[1] = boundingBoxFqs[0]; fqs[2] = boundingBoxFqs[1]; requestParams.setFq(fqs); requestParams.setFlimit(-1); requestParams.setFormattedQuery(null); FacetField ps = searchDAO.getFacetPointsShort(requestParams, pointType.getLabel()); if (ps != null && ps.getValueCount() > 0) { if (imgObj == null || imgObj.img == null) { //when vars.alpha == 0 img is null imgObj = ImgObj.create(width, height); } int highightRadius = vars.size + HIGHLIGHT_RADIUS; int highlightWidth = highightRadius * 2; imgObj.g.setStroke(new BasicStroke(2)); imgObj.g.setColor(new Color(255, 0, 0, 255)); int x, y; //for 4326 double top = tilebbox[3]; double bottom = tilebbox[1]; double left = tilebbox[0]; double right = tilebbox[2]; for (int i = 0; i < ps.getValueCount(); i++) { //extract lat lng if (ps.getValues().get(i).getName() != null) { String[] lat_lng = ps.getValues().get(i).getName().split(","); float lng = Float.parseFloat(lat_lng[1]); float lat = Float.parseFloat(lat_lng[0]); if (is4326) { x = convertLngToPixel4326(lng, left, right, width); y = convertLatToPixel4326(lat, top, bottom, height); } else { x = (int) ((convertLngToPixel(lng) - pbbox[0]) * width_mult); y = (int) ((convertLatToPixel(lat) - pbbox[3]) * height_mult); } imgObj.g.drawOval(x - highightRadius, y - highightRadius, highlightWidth, highlightWidth); } } } return imgObj; }
From source file:com.databasepreservation.visualization.utils.SolrUtils.java
private static List<FacetFieldResult> processFacetFields(Facets facets, List<FacetField> facetFields) { List<FacetFieldResult> ret = new ArrayList<FacetFieldResult>(); FacetFieldResult facetResult;/*from w w w . j a v a2s.c o m*/ if (facetFields != null) { for (FacetField facet : facetFields) { LOGGER.trace("facet:{} count:{}", facet.getName(), facet.getValueCount()); facetResult = new FacetFieldResult(facet.getName(), facet.getValueCount(), facets.getParameters().get(facet.getName()).getValues()); for (FacetField.Count count : facet.getValues()) { LOGGER.trace(" value:{} value:{}", count.getName(), count.getCount()); facetResult.addFacetValue(count.getName(), count.getName(), count.getCount()); } ret.add(facetResult); } } return ret; }
From source file:com.doculibre.constellio.servlets.SolrJExampleMain.java
License:Open Source License
/** * Print documents and facets/* ww w.j a v a 2s . c o m*/ * * @param response */ @SuppressWarnings("unchecked") public static void print(QueryResponse response) { SolrDocumentList docs = response.getResults(); if (docs != null) { System.out.println(docs.getNumFound() + " documents found, " + docs.size() + " returned : "); for (int i = 0; i < docs.size(); i++) { SolrDocument doc = docs.get(i); System.out.println("\t" + doc.toString()); } } List<FacetField> fieldFacets = response.getFacetFields(); if (fieldFacets != null && fieldFacets.isEmpty()) { System.out.println("\nField Facets : "); for (FacetField fieldFacet : fieldFacets) { System.out.print("\t" + fieldFacet.getName() + " :\t"); if (fieldFacet.getValueCount() > 0) { for (Count count : fieldFacet.getValues()) { System.out.print(count.getName() + "[" + count.getCount() + "]\t"); } } System.out.println(""); } } Map<String, Integer> queryFacets = response.getFacetQuery(); if (queryFacets != null && !queryFacets.isEmpty()) { System.out.println("\nQuery facets : "); for (String queryFacet : queryFacets.keySet()) { System.out.println("\t" + queryFacet + "\t[" + queryFacets.get(queryFacet) + "]"); } System.out.println(""); } NamedList<NamedList<Object>> spellCheckResponse = (NamedList<NamedList<Object>>) response.getResponse() .get("spellcheck"); if (spellCheckResponse != null) { Iterator<Entry<String, NamedList<Object>>> wordsIterator = spellCheckResponse.iterator(); while (wordsIterator.hasNext()) { Entry<String, NamedList<Object>> entry = wordsIterator.next(); String word = entry.getKey(); NamedList<Object> spellCheckWordResponse = entry.getValue(); boolean correct = spellCheckWordResponse.get("frequency").equals(1); System.out.println("Word: " + word + ",\tCorrect?: " + correct); NamedList<Integer> suggestions = (NamedList<Integer>) spellCheckWordResponse.get("suggestions"); if (suggestions != null && suggestions.size() > 0) { System.out.println("Suggestions : "); Iterator<Entry<String, Integer>> suggestionsIterator = suggestions.iterator(); while (suggestionsIterator.hasNext()) { System.out.println("\t" + suggestionsIterator.next().getKey()); } } System.out.println(""); } } }
From source file:com.francelabs.datafari.statistics.StatsProcessor.java
License:Apache License
public static void processStatsResponse(final QueryResponse queryResponse) throws Exception { final NamedList responseHeader = queryResponse.getResponseHeader(); final FacetField QFacet = queryResponse.getFacetField("q"); final Long numTot = queryResponse.getResults().getNumFound(); final SolrDocumentList solrDocumentList = new SolrDocumentList(); solrDocumentList.setNumFound(QFacet.getValueCount()); solrDocumentList.setStart(0);/* w w w .jav a 2 s .c o m*/ if (numTot != 0) { final Map<String, FieldStatsInfo> stats = queryResponse.getFieldStatsInfo(); final List<FieldStatsInfo> noHitsStats = stats.get("noHits").getFacets().get("q"); final List<FieldStatsInfo> QTimeStats = stats.get("QTime").getFacets().get("q"); List<FieldStatsInfo> positionClickTotStats = null; try { positionClickTotStats = stats.get("positionClickTot").getFacets().get("q"); } catch (final Exception e) { } final List<FieldStatsInfo> clickStats = stats.get("click").getFacets().get("q"); final List<FieldStatsInfo> numClicksStats = stats.get("numClicks").getFacets().get("q"); final List<FieldStatsInfo> numFoundStats = stats.get("numFound").getFacets().get("q"); final List<Count> QFacetValues = QFacet.getValues(); final Map<String, SolrDocument> mapDocuments = new HashMap<String, SolrDocument>(); for (int i = 0; i < QFacetValues.size(); i++) { final SolrDocument doc = new SolrDocument(); final String query = QFacetValues.get(i).getName(); final double count = QFacetValues.get(i).getCount(); final double frequency = StatsUtils.round(count * 100 / numTot, 2, BigDecimal.ROUND_HALF_UP); doc.addField("query", query); doc.addField("count", count); doc.addField("frequency", frequency); mapDocuments.put(query, doc); solrDocumentList.add(doc); } for (int i = 0; i < QTimeStats.size(); i++) { final String query = QTimeStats.get(i).getName(); final SolrDocument doc = mapDocuments.get(query); final int AVGHits = new Double((Double) numFoundStats.get(i).getMean()).intValue(); final Double noHits = new Double((Double) noHitsStats.get(i).getSum()); final int AVGQTime = new Double((Double) QTimeStats.get(i).getMean()).intValue(); final int MAXQTime = new Double((Double) QTimeStats.get(i).getMax()).intValue(); final double click = new Double((Double) clickStats.get(i).getSum()); final double clickRatio = StatsUtils.round(click * 100 / (Double) doc.getFirstValue("count"), 2, BigDecimal.ROUND_HALF_UP); if (click > 0) { final double AVGClickPosition = new Double((Double) positionClickTotStats.get(i).getSum() / (Double) numClicksStats.get(i).getSum()).intValue(); doc.addField("AVGClickPosition", AVGClickPosition); } else { doc.addField("AVGClickPosition", "-"); } doc.addField("withClickRatio", clickRatio); doc.addField("AVGHits", AVGHits); doc.addField("numNoHits", noHits); doc.addField("withClick", click); doc.addField("AVGQTime", AVGQTime); doc.addField("MaxQTime", MAXQTime); } } final NamedList<Object> response = new SimpleOrderedMap<Object>(); response.add("responseHeader", responseHeader); response.add("response", solrDocumentList); queryResponse.setResponse(response); }