Example usage for org.apache.solr.client.solrj SolrQuery setFacet

Introduction

In this page you can find the example usage for org.apache.solr.client.solrj SolrQuery setFacet.

Prototype

public SolrQuery setFacet(boolean b)

Source Link

Document

enable/disable faceting.

Usage

From source file:at.pagu.soldockr.core.QueryParser.java

License:Apache License

private void appendFacetingOnFields(SolrQuery solrQuery, FacetQuery query) {
    FacetOptions facetOptions = query.getFacetOptions();
    if (facetOptions == null || !facetOptions.hasFields()) {
        return;//from w w w . j  a v a 2 s. co  m
    }
    solrQuery.setFacet(true);
    solrQuery.addFacetField(convertFieldListToStringArray(facetOptions.getFacetOnFields()));
    solrQuery.setFacetMinCount(facetOptions.getFacetMinCount());
    solrQuery.setFacetLimit(facetOptions.getPageable().getPageSize());
    if (facetOptions.getPageable().getPageNumber() > 0) {
        solrQuery.set(FacetParams.FACET_OFFSET, facetOptions.getPageable().getOffset());
    }
    if (FacetOptions.FacetSort.INDEX.equals(facetOptions.getFacetSort())) {
        solrQuery.setFacetSort(FacetParams.FACET_SORT_INDEX);
    }
}

From source file:au.org.ala.biocache.dao.SearchDAOImpl.java

License:Open Source License

/**
 * Writes the index fields to the supplied output stream in CSV format.
 * <p>//from w w w .  j ava 2s. c om
 * DM: refactored to split the query by month to improve performance.
 * Further enhancements possible:
 * 1) Multi threaded
 * 2) More filtering, by year or decade..
 *
 * @param downloadParams
 * @param out
 * @param includeSensitive
 * @param dd               The details of the download
 * @param checkLimit
 * @param nextExecutor     The ExecutorService to use to process results on different threads
 * @throws Exception
 */
@Override
public ConcurrentMap<String, AtomicInteger> writeResultsFromIndexToStream(
        final DownloadRequestParams downloadParams, final OutputStream out, final boolean includeSensitive,
        final DownloadDetailsDTO dd, boolean checkLimit, final ExecutorService nextExecutor) throws Exception {
    expandRequestedFields(downloadParams, true);

    if (dd != null) {
        dd.resetCounts();
    }

    long start = System.currentTimeMillis();
    final ConcurrentMap<String, AtomicInteger> uidStats = new ConcurrentHashMap<>();
    getServer();

    try {
        SolrQuery solrQuery = new SolrQuery();
        queryFormatUtils.formatSearchQuery(downloadParams);

        String dFields = downloadParams.getFields();

        if (includeSensitive) {
            //include raw latitude and longitudes
            if (dFields.contains("decimalLatitude.p")) {
                dFields = dFields.replaceFirst("decimalLatitude.p",
                        "sensitive_latitude,sensitive_longitude,decimalLatitude.p");
            } else if (dFields.contains("decimalLatitude")) {
                dFields = dFields.replaceFirst("decimalLatitude",
                        "sensitive_latitude,sensitive_longitude,decimalLatitude");
            }
            if (dFields.contains(",locality,")) {
                dFields = dFields.replaceFirst(",locality,", ",locality,sensitive_locality,");
            }
            if (dFields.contains(",locality.p,")) {
                dFields = dFields.replaceFirst(",locality.p,", ",locality.p,sensitive_locality,");
            }
        }

        StringBuilder sb = new StringBuilder(dFields);
        if (!downloadParams.getExtra().isEmpty()) {
            sb.append(",").append(downloadParams.getExtra());
        }

        String[] requestedFields = sb.toString().split(",");
        List<String>[] indexedFields;
        if (downloadFields == null) {
            //default to include everything
            java.util.List<String> mappedNames = new java.util.LinkedList<String>();
            for (int i = 0; i < requestedFields.length; i++)
                mappedNames.add(requestedFields[i]);

            indexedFields = new List[] { mappedNames, new java.util.LinkedList<String>(), mappedNames,
                    mappedNames, new ArrayList(), new ArrayList() };
        } else {
            indexedFields = downloadFields.getIndexFields(requestedFields, downloadParams.getDwcHeaders(),
                    downloadParams.getLayersServiceUrl());
        }
        //apply custom header
        String[] customHeader = dd.getRequestParams().getCustomHeader().split(",");
        for (int i = 0; i + 1 < customHeader.length; i += 2) {
            for (int j = 0; j < indexedFields[0].size(); j++) {
                if (customHeader[i].equals(indexedFields[0].get(j))) {
                    indexedFields[2].set(j, customHeader[i + 1]);
                }
            }
            for (int j = 0; j < indexedFields[4].size(); j++) {
                if (customHeader[i].equals(indexedFields[5].get(j))) {
                    indexedFields[4].set(j, customHeader[i + 1]);
                }
            }
        }
        if (logger.isDebugEnabled()) {
            logger.debug("Fields included in download: " + indexedFields[0]);
            logger.debug("Fields excluded from download: " + indexedFields[1]);
            logger.debug("The headers in downloads: " + indexedFields[2]);
            logger.debug("Analysis headers: " + indexedFields[4]);
            logger.debug("Analysis fields: " + indexedFields[5]);
        }

        //set the fields to the ones that are available in the index
        String[] fields = indexedFields[0].toArray(new String[] {});
        solrQuery.setFields(fields);
        StringBuilder qasb = new StringBuilder();
        if (!"none".equals(downloadParams.getQa())) {
            solrQuery.addField("assertions");
            if (!"all".equals(downloadParams.getQa()) && !"includeall".equals(downloadParams.getQa())) {
                //add all the qa fields
                qasb.append(downloadParams.getQa());
            }
        }
        solrQuery.addField("institution_uid").addField("collection_uid").addField("data_resource_uid")
                .addField("data_provider_uid");

        solrQuery.setQuery(downloadParams.getFormattedQuery());
        solrQuery.setFacetMinCount(1);
        solrQuery.setFacetLimit(-1);

        //get the assertion facets to add them to the download fields
        boolean getAssertionsFromFacets = "all".equals(downloadParams.getQa())
                || "includeall".equals(downloadParams.getQa());
        SolrQuery monthAssertionsQuery = getAssertionsFromFacets
                ? solrQuery.getCopy().addFacetField("month", "assertions")
                : solrQuery.getCopy().addFacetField("month");
        if (getAssertionsFromFacets) {
            //set the order for the facet to be based on the index - this will force the assertions to be returned in the same order each time
            //based on alphabetical sort.  The number of QA's may change between searches so we can't guarantee that the order won't change
            monthAssertionsQuery.add("f.assertions.facet.sort", "index");
        }
        QueryResponse facetQuery = runSolrQuery(monthAssertionsQuery, downloadParams.getFormattedFq(), 0, 0,
                "score", "asc");

        //set the totalrecords for the download details
        dd.setTotalRecords(facetQuery.getResults().getNumFound());

        //use a separately configured and smaller limit when output will be unzipped
        final long maxDownloadSize;
        if (MAX_DOWNLOAD_SIZE > unzippedLimit && out instanceof OptionalZipOutputStream
                && ((OptionalZipOutputStream) out).getType() == OptionalZipOutputStream.Type.unzipped) {
            maxDownloadSize = unzippedLimit;
        } else {
            maxDownloadSize = MAX_DOWNLOAD_SIZE;
        }

        if (checkLimit && dd.getTotalRecords() < maxDownloadSize) {
            checkLimit = false;
        }

        //get the month facets to add them to the download fields get the assertion facets.
        List<Count> splitByFacet = null;

        for (FacetField facet : facetQuery.getFacetFields()) {
            if (facet.getName().equals("assertions") && facet.getValueCount() > 0) {
                qasb.append(getQAFromFacet(facet));
            }
            if (facet.getName().equals("month") && facet.getValueCount() > 0) {
                splitByFacet = facet.getValues();
            }
        }

        if ("includeall".equals(downloadParams.getQa())) {
            qasb = getAllQAFields();
        }

        String qas = qasb.toString();

        //include sensitive fields in the header when the output will be partially sensitive
        final String[] sensitiveFields;
        final String[] notSensitiveFields;
        if (dd.getSensitiveFq() != null) {
            List<String>[] sensitiveHdr = downloadFields.getIndexFields(sensitiveSOLRHdr,
                    downloadParams.getDwcHeaders(), downloadParams.getLayersServiceUrl());

            //header for the output file
            indexedFields[2].addAll(sensitiveHdr[2]);

            //lookup for fields from sensitive queries
            sensitiveFields = org.apache.commons.lang3.ArrayUtils.addAll(
                    indexedFields[0].toArray(new String[] {}), sensitiveHdr[0].toArray(new String[] {}));

            //use general fields when sensitive data is not permitted
            notSensitiveFields = org.apache.commons.lang3.ArrayUtils
                    .addAll(indexedFields[0].toArray(new String[] {}), notSensitiveSOLRHdr);
        } else {
            sensitiveFields = new String[0];
            notSensitiveFields = fields;
        }

        //add analysis headers
        indexedFields[2].addAll(indexedFields[4]);
        final String[] analysisFields = indexedFields[5].toArray(new String[0]);

        final String[] qaFields = qas.equals("") ? new String[] {} : qas.split(",");
        String[] qaTitles = downloadFields.getHeader(qaFields, false, false);

        String[] header = org.apache.commons.lang3.ArrayUtils.addAll(indexedFields[2].toArray(new String[] {}),
                qaTitles);

        //retain output header fields and field names for inclusion of header info in the download
        StringBuilder infoFields = new StringBuilder("infoFields");
        for (String h : indexedFields[3])
            infoFields.append(",").append(h);
        for (String h : qaFields)
            infoFields.append(",").append(h);

        StringBuilder infoHeader = new StringBuilder("infoHeaders");
        for (String h : header)
            infoHeader.append(",").append(h);

        String info = infoFields.toString();
        while (info.contains(",,"))
            info = info.replace(",,", ",");
        uidStats.put(info, new AtomicInteger(-1));
        String hdr = infoHeader.toString();
        while (hdr.contains(",,"))
            hdr = hdr.replace(",,", ",");
        uidStats.put(hdr, new AtomicInteger(-2));

        //construct correct RecordWriter based on the supplied fileType
        final RecordWriterError rw = downloadParams.getFileType().equals("csv")
                ? new CSVRecordWriter(out, header, downloadParams.getSep(), downloadParams.getEsc())
                : (downloadParams.getFileType().equals("tsv") ? new TSVRecordWriter(out, header)
                        : new ShapeFileRecordWriter(tmpShapefileDir, downloadParams.getFile(), out,
                                (String[]) ArrayUtils.addAll(fields, qaFields)));

        // Requirement to be able to propagate interruptions to all other threads for this execution
        // Doing this via this variable
        final AtomicBoolean interruptFound = dd != null ? dd.getInterrupt() : new AtomicBoolean(false);

        // Create a fixed length blocking queue for buffering results before they are written
        // This also creates a push-back effect to throttle the results generating threads
        // when it fills and offers to it are delayed until the writer consumes elements from the queue
        final BlockingQueue<String[]> queue = new ArrayBlockingQueue<>(resultsQueueLength);
        // Create a sentinel that we can check for reference equality to signal the end of the queue
        final String[] sentinel = new String[0];
        // An implementation of RecordWriter that adds to an in-memory queue
        final RecordWriter concurrentWrapper = new RecordWriter() {
            private AtomicBoolean finalised = new AtomicBoolean(false);
            private AtomicBoolean finalisedComplete = new AtomicBoolean(false);

            @Override
            public void write(String[] nextLine) {
                try {
                    if (Thread.currentThread().isInterrupted() || interruptFound.get() || finalised.get()) {
                        finalise();
                        return;
                    }
                    while (!queue.offer(nextLine, writerTimeoutWaitMillis, TimeUnit.MILLISECONDS)) {
                        if (Thread.currentThread().isInterrupted() || interruptFound.get() || finalised.get()) {
                            finalise();
                            break;
                        }
                    }
                } catch (InterruptedException e) {
                    Thread.currentThread().interrupt();
                    interruptFound.set(true);
                    if (logger.isDebugEnabled()) {
                        logger.debug(
                                "Queue failed to accept the next record due to a thread interrupt, calling finalise the cleanup: ",
                                e);
                    }
                    // If we were interrupted then we should call finalise to cleanup
                    finalise();
                }
            }

            @Override
            public void finalise() {
                if (finalised.compareAndSet(false, true)) {
                    try {
                        // Offer the sentinel at least once, even when the thread is interrupted
                        while (!queue.offer(sentinel, writerTimeoutWaitMillis, TimeUnit.MILLISECONDS)) {
                            // If the thread is interrupted then the queue may not have any active consumers,
                            // so don't loop forever waiting for capacity in this case
                            // The hard shutdown phase will use queue.clear to ensure that the
                            // sentinel gets onto the queue at least once
                            if (Thread.currentThread().isInterrupted() || interruptFound.get()) {
                                break;
                            }
                        }
                    } catch (InterruptedException e) {
                        Thread.currentThread().interrupt();
                        interruptFound.set(true);
                        if (logger.isDebugEnabled()) {
                            logger.debug(
                                    "Queue failed to accept the sentinel in finalise due to a thread interrupt: ",
                                    e);
                        }
                    } finally {
                        finalisedComplete.set(true);
                    }
                }
            }

            @Override
            public boolean finalised() {
                return finalisedComplete.get();
            }

        };

        // A single thread that consumes elements put onto the queue until it sees the sentinel, finalising after the sentinel or an interrupt
        Runnable writerRunnable = new Runnable() {
            @Override
            public void run() {
                try {
                    long counter = 0;
                    while (true) {
                        counter = counter + 1;

                        if (Thread.currentThread().isInterrupted() || interruptFound.get()) {
                            break;
                        }

                        String[] take = queue.take();
                        // Sentinel object equality check to see if we are done
                        if (take == sentinel || Thread.currentThread().isInterrupted()
                                || interruptFound.get()) {
                            break;
                        }
                        // Otherwise write to the wrapped record writer
                        rw.write(take);

                        //test for errors. This can contain a flush so only test occasionally
                        if (counter % resultsQueueLength == 0 && rw.hasError()) {
                            throw RecordWriterException.newRecordWriterException(dd, downloadParams, true, rw);
                        }

                    }
                } catch (RecordWriterException e) {
                    //no trace information is available to print for these errors
                    logger.error(e.getMessage());
                    interruptFound.set(true);
                } catch (InterruptedException e) {
                    Thread.currentThread().interrupt();
                    interruptFound.set(true);
                } catch (Exception e) {
                    // Reuse interruptFound variable to signal that the writer had issues
                    interruptFound.set(true);
                    logger.error("Download writer failed.", e);
                } finally {
                    rw.finalise();
                }
            }
        };
        Thread writerThread = new Thread(writerRunnable);
        writerThread.start();

        try {
            if (rw instanceof ShapeFileRecordWriter) {
                dd.setHeaderMap(((ShapeFileRecordWriter) rw).getHeaderMappings());
            }

            //order the query by _docid_ for faster paging
            solrQuery.addSortField("_docid_", ORDER.asc);

            //for each month create a separate query that pages through 500 records per page
            List<SolrQuery> queries = new ArrayList<SolrQuery>();
            if (splitByFacet != null) {
                for (Count facet : splitByFacet) {
                    if (facet.getCount() > 0) {
                        SolrQuery splitByFacetQuery;
                        //do not add remainderQuery here
                        if (facet.getName() != null) {
                            splitByFacetQuery = solrQuery.getCopy()
                                    .addFilterQuery(facet.getFacetField().getName() + ":" + facet.getName());
                            splitByFacetQuery.setFacet(false);
                            queries.add(splitByFacetQuery);
                        }

                    }
                }
                if (splitByFacet.size() > 0) {
                    SolrQuery remainderQuery = solrQuery.getCopy()
                            .addFilterQuery("-" + splitByFacet.get(0).getFacetField().getName() + ":[* TO *]");
                    queries.add(0, remainderQuery);
                }
            } else {
                queries.add(0, solrQuery);
            }

            //split into sensitive and non-sensitive queries when
            // - not including all sensitive values
            // - there is a sensitive fq
            final List<SolrQuery> sensitiveQ = new ArrayList<SolrQuery>();
            if (!includeSensitive && dd.getSensitiveFq() != null) {
                sensitiveQ.addAll(
                        splitQueries(queries, dd.getSensitiveFq(), sensitiveSOLRHdr, notSensitiveSOLRHdr));
            }

            //Set<Future<Integer>> futures = new HashSet<Future<Integer>>();
            final AtomicInteger resultsCount = new AtomicInteger(0);
            final boolean threadCheckLimit = checkLimit;

            List<Callable<Integer>> solrCallables = new ArrayList<>(queries.size());
            // execute each query, writing the results to stream
            for (final SolrQuery splitByFacetQuery : queries) {
                // define a thread
                Callable<Integer> solrCallable = new Callable<Integer>() {
                    @Override
                    public Integer call() throws Exception {
                        int startIndex = 0;
                        // Randomise the wakeup time so they don't all wakeup on a periodic cycle
                        long localThrottle = throttle + Math.round(Math.random() * throttle);

                        String[] fq = downloadParams.getFormattedFq();
                        if (splitByFacetQuery.getFilterQueries() != null
                                && splitByFacetQuery.getFilterQueries().length > 0) {
                            if (fq == null) {
                                fq = new String[0];
                            }
                            fq = org.apache.commons.lang3.ArrayUtils.addAll(fq,
                                    splitByFacetQuery.getFilterQueries());
                        }

                        QueryResponse qr = runSolrQuery(splitByFacetQuery, fq, downloadBatchSize, startIndex,
                                "_docid_", "asc");
                        AtomicInteger recordsForThread = new AtomicInteger(0);
                        if (logger.isDebugEnabled()) {
                            logger.debug(
                                    splitByFacetQuery.getQuery() + " - results: " + qr.getResults().size());
                        }

                        while (qr != null && !qr.getResults().isEmpty() && !interruptFound.get()) {
                            if (logger.isDebugEnabled()) {
                                logger.debug(
                                        "Start index: " + startIndex + ", " + splitByFacetQuery.getQuery());
                            }
                            int count = 0;
                            if (sensitiveQ.contains(splitByFacetQuery)) {
                                count = processQueryResults(uidStats, sensitiveFields, qaFields,
                                        concurrentWrapper, qr, dd, threadCheckLimit, resultsCount,
                                        maxDownloadSize, analysisFields);
                            } else {
                                // write non-sensitive values into sensitive fields when not authorised for their sensitive values
                                count = processQueryResults(uidStats, notSensitiveFields, qaFields,
                                        concurrentWrapper, qr, dd, threadCheckLimit, resultsCount,
                                        maxDownloadSize, analysisFields);
                            }
                            recordsForThread.addAndGet(count);
                            startIndex += downloadBatchSize;
                            // we have already set the Filter query the first time the query was constructed
                            // rerun with the same params but different startIndex
                            if (!threadCheckLimit || resultsCount.get() < maxDownloadSize) {
                                if (!threadCheckLimit) {
                                    // throttle the download by sleeping
                                    Thread.sleep(localThrottle);
                                }
                                qr = runSolrQuery(splitByFacetQuery, null, downloadBatchSize, startIndex,
                                        "_docid_", "asc");
                            } else {
                                qr = null;
                            }
                        }
                        return recordsForThread.get();
                    }
                };
                solrCallables.add(solrCallable);
            }

            List<Future<Integer>> futures = new ArrayList<>(solrCallables.size());
            for (Callable<Integer> nextCallable : solrCallables) {
                futures.add(nextExecutor.submit(nextCallable));
            }

            // Busy wait because we need to be able to respond to an interrupt on any callable
            // and propagate it to all of the others for this particular query
            // Because the executor service is shared to prevent too many concurrent threads being run,
            // this requires a busy wait loop on the main thread to monitor state
            boolean waitAgain = false;
            do {
                waitAgain = false;
                for (Future<Integer> future : futures) {
                    if (!future.isDone()) {
                        // Wait again even if an interrupt flag is set, as it may have been set partway through the iteration
                        // The calls to future.cancel will occur next time if the interrupt is setup partway through an iteration
                        waitAgain = true;
                        // If one thread finds an interrupt it is propagated to others using the interruptFound AtomicBoolean
                        if (interruptFound.get()) {
                            future.cancel(true);
                        }
                    }
                }
                // Don't trigger the timeout interrupt if we don't have to wait again as we are already done at this point
                if (waitAgain && (System.currentTimeMillis() - start) > downloadMaxTime) {
                    interruptFound.set(true);
                    break;
                }

                if (waitAgain) {
                    Thread.sleep(downloadCheckBusyWaitSleep);
                }
            } while (waitAgain);

            AtomicInteger totalDownload = new AtomicInteger(0);
            for (Future<Integer> future : futures) {
                if (future.isDone()) {
                    totalDownload.addAndGet(future.get());
                } else {
                    // All incomplete futures that survived the loop above are cancelled here
                    future.cancel(true);
                }
            }

            long finish = System.currentTimeMillis();
            long timeTakenInSecs = (finish - start) / 1000;
            if (timeTakenInSecs <= 0)
                timeTakenInSecs = 1;
            if (logger.isInfoEnabled()) {
                logger.info("Download of " + resultsCount + " records in " + timeTakenInSecs
                        + " seconds. Record/sec: " + resultsCount.intValue() / timeTakenInSecs);
            }

        } finally {
            try {
                // Once we get here, we need to finalise starting at the concurrent wrapper,
                // as there are no more non-sentinel records to be added to the queue
                // This eventually triggers finalisation of the underlying writer when the queue empties
                // This is a soft shutdown, and hence we wait below for this stage to complete in normal circumstances
                // Note, this blocks for writerTimeoutWaitMillis trying to legitimately add the sentinel to the end of the queue
                // We force the sentinel to be added in the hard shutdown phase below
                concurrentWrapper.finalise();
            } finally {
                try {
                    // Track the current time right now so we can abort after downloadMaxCompletionTime milliseconds in this phase
                    final long completionStartTime = System.currentTimeMillis();
                    // Busy wait check for finalised to be called in the RecordWriter or something is interrupted
                    // By this stage, there are at maximum download.internal.queue.size items remaining (default 1000)
                    while (writerThread.isAlive() && !writerThread.isInterrupted() && !interruptFound.get()
                            && !Thread.currentThread().isInterrupted() && !rw.finalised()
                            && !((System.currentTimeMillis()
                                    - completionStartTime) > downloadMaxCompletionTime)) {
                        Thread.sleep(downloadCheckBusyWaitSleep);
                    }
                } finally {
                    try {
                        // Attempt all actions that could trigger the writer thread to finalise, as by this stage we are in hard shutdown mode

                        // Signal that we are in hard shutdown mode
                        interruptFound.set(true);

                        // Add the sentinel or clear the queue and try again until it gets onto the queue
                        // We are in hard shutdown mode, so only priority is that the queue either
                        // gets the sentinel or the thread is interrupted to clean up resources
                        while (!queue.offer(sentinel)) {
                            queue.clear();
                        }

                        // Interrupt the single writer thread
                        writerThread.interrupt();

                        // Explicitly call finalise on the RecordWriter as a backup
                        // In normal circumstances it is called via the sentinel or the interrupt
                        // This will not block if finalise has been called previously in the current three implementations
                        rw.finalise();
                    } finally {
                        if (rw != null && rw.hasError()) {
                            throw RecordWriterException.newRecordWriterException(dd, downloadParams, true, rw);
                        } else {
                            // Flush whatever output was still pending for more deterministic debugging
                            out.flush();
                        }
                    }
                }
            }
        }
    } catch (SolrServerException ex) {
        logger.error("Problem communicating with SOLR server while processing download. " + ex.getMessage(),
                ex);
    }
    return uidStats;
}

From source file:au.org.ala.biocache.dao.SearchDAOImpl.java

License:Open Source License

/**
 * Note - this method extracts from CASSANDRA rather than the Index.
 *///from w w  w . j  a va  2  s  . c om
public ConcurrentMap<String, AtomicInteger> writeResultsToStream(DownloadRequestParams downloadParams,
        OutputStream out, int i, boolean includeSensitive, DownloadDetailsDTO dd, boolean limit)
        throws Exception {
    expandRequestedFields(downloadParams, false);

    int resultsCount = 0;
    ConcurrentMap<String, AtomicInteger> uidStats = new ConcurrentHashMap<>();
    //stores the remaining limit for data resources that have a download limit
    Map<String, Integer> downloadLimit = new HashMap<>();

    try {
        SolrQuery solrQuery = initSolrQuery(downloadParams, false, null);
        //ensure that the qa facet is being ordered alphabetically so that the order is consistent.
        boolean getAssertionsFromFacets = "all".equals(downloadParams.getQa())
                || "includeall".equals(downloadParams.getQa());
        if (getAssertionsFromFacets) {
            //set the order for the facet to be based on the index - this will force the assertions to be returned in the same order each time
            //based on alphabetical sort.  The number of QA's may change between searches so we can't guarantee that the order won't change
            solrQuery.add("f.assertions.facet.sort", "index");
        }
        queryFormatUtils.formatSearchQuery(downloadParams);
        if (logger.isInfoEnabled()) {
            logger.info("search query: " + downloadParams.getFormattedQuery());
        }
        solrQuery.setQuery(downloadParams.getFormattedQuery());
        //Only the fields specified below will be included in the results from the SOLR Query
        solrQuery.setFields("row_key", "institution_uid", "collection_uid", "data_resource_uid",
                "data_provider_uid");

        String dFields = downloadParams.getFields();

        if (includeSensitive) {
            //include raw latitude and longitudes
            dFields = dFields
                    .replaceFirst("decimalLatitude.p", "decimalLatitude,decimalLongitude,decimalLatitude.p")
                    .replaceFirst(",locality,", ",locality,sensitive_locality,");
        }

        StringBuilder sb = new StringBuilder(dFields);
        if (downloadParams.getExtra().length() > 0) {
            sb.append(",").append(downloadParams.getExtra());
        }
        StringBuilder qasb = new StringBuilder();

        solrQuery.setFacet(true);
        QueryResponse qr = runSolrQuery(solrQuery, downloadParams.getFormattedFq(), 0, 0, "_docid_", "asc");
        dd.setTotalRecords(qr.getResults().getNumFound());
        //get the assertion facets to add them to the download fields
        List<FacetField> facets = qr.getFacetFields();
        for (FacetField facet : facets) {
            if (facet.getName().equals("assertions") && facet.getValueCount() > 0) {
                qasb.append(getQAFromFacet(facet));
            } else if (facet.getName().equals("data_resource_uid") && checkDownloadLimits) {
                //populate the download limit
                initDownloadLimits(downloadLimit, facet);
            }
        }

        if ("includeall".equals(downloadParams.getQa())) {
            qasb = getAllQAFields();
        }

        //Write the header line
        String qas = qasb.toString();

        List<String>[] indexedFields = downloadFields.getIndexFields(downloadParams.getFields().split(","),
                false, downloadParams.getLayersServiceUrl());

        String[] fields = sb.toString().split(",");

        //avoid analysis field duplicates
        for (String s : indexedFields[5])
            fields = (String[]) ArrayUtils.removeElement(fields, s);

        String[] qaFields = qas.equals("") ? new String[] {} : qas.split(",");
        String[] qaTitles = downloadFields.getHeader(qaFields, false, false);
        String[] titles = downloadFields.getHeader(fields, true, downloadParams.getDwcHeaders());
        String[] analysisHeaders = indexedFields[4].toArray(new String[0]);
        String[] analysisFields = indexedFields[5].toArray(new String[0]);

        //apply custom header
        String[] customHeader = dd.getRequestParams().getCustomHeader().split(",");
        for (i = 0; i + 1 < customHeader.length; i += 2) {
            for (int j = 0; j < analysisFields.length; j++) {
                if (customHeader[i].equals(analysisFields[j])) {
                    analysisFields[j] = customHeader[i + 1];
                }
            }
            for (int j = 0; j < qaFields.length; j++) {
                if (customHeader[i].equals(qaFields[j])) {
                    qaTitles[j] = customHeader[i + 1];
                }
            }
            for (int j = 0; j < fields.length; j++) {
                if (customHeader[i].equals(fields[j])) {
                    titles[j] = customHeader[i + 1];
                }
            }
        }

        //append sensitive fields for the header only
        if (!includeSensitive && dd.getSensitiveFq() != null) {
            //sensitive headers do not have a DwC name, always set getIndexFields dwcHeader=false
            List<String>[] sensitiveHdr = downloadFields.getIndexFields(sensitiveSOLRHdr, false,
                    downloadParams.getLayersServiceUrl());

            titles = org.apache.commons.lang3.ArrayUtils.addAll(titles,
                    sensitiveHdr[2].toArray(new String[] {}));
        }
        String[] header = org.apache.commons.lang3.ArrayUtils
                .addAll(org.apache.commons.lang3.ArrayUtils.addAll(titles, qaTitles), analysisHeaders);
        //Create the Writer that will be used to format the records
        //construct correct RecordWriter based on the supplied fileType
        final RecordWriterError rw = downloadParams.getFileType().equals("csv")
                ? new CSVRecordWriter(out, header, downloadParams.getSep(), downloadParams.getEsc())
                : (downloadParams.getFileType().equals("tsv") ? new TSVRecordWriter(out, header)
                        : new ShapeFileRecordWriter(tmpShapefileDir, downloadParams.getFile(), out,
                                (String[]) ArrayUtils.addAll(fields, qaFields)));

        try {
            if (rw instanceof ShapeFileRecordWriter) {
                dd.setHeaderMap(((ShapeFileRecordWriter) rw).getHeaderMappings());
            }

            //retain output header fields and field names for inclusion of header info in the download
            StringBuilder infoFields = new StringBuilder("infoFields,");
            for (String h : fields)
                infoFields.append(",").append(h);
            for (String h : analysisFields)
                infoFields.append(",").append(h);
            for (String h : qaFields)
                infoFields.append(",").append(h);

            StringBuilder infoHeader = new StringBuilder("infoHeaders,");
            for (String h : header)
                infoHeader.append(",").append(h);

            String info = infoFields.toString();
            while (info.contains(",,"))
                info = info.replace(",,", ",");
            uidStats.put(info, new AtomicInteger(-1));
            String hdr = infoHeader.toString();
            while (hdr.contains(",,"))
                hdr = hdr.replace(",,", ",");
            uidStats.put(hdr, new AtomicInteger(-2));

            //download the records that have limits first...
            if (downloadLimit.size() > 0) {
                String[] originalFq = downloadParams.getFormattedFq();
                StringBuilder fqBuilder = new StringBuilder("-(");
                for (String dr : downloadLimit.keySet()) {
                    //add another fq to the search for data_resource_uid
                    downloadParams.setFq((String[]) ArrayUtils.add(originalFq, "data_resource_uid:" + dr));
                    resultsCount = downloadRecords(downloadParams, rw, downloadLimit, uidStats, fields,
                            qaFields, resultsCount, dr, includeSensitive, dd, limit, analysisFields);
                    if (fqBuilder.length() > 2) {
                        fqBuilder.append(" OR ");
                    }
                    fqBuilder.append("data_resource_uid:").append(dr);
                }
                fqBuilder.append(")");
                //now include the rest of the data resources
                //add extra fq for the remaining records
                downloadParams.setFq((String[]) ArrayUtils.add(originalFq, fqBuilder.toString()));
                resultsCount = downloadRecords(downloadParams, rw, downloadLimit, uidStats, fields, qaFields,
                        resultsCount, null, includeSensitive, dd, limit, analysisFields);
            } else {
                //download all at once
                downloadRecords(downloadParams, rw, downloadLimit, uidStats, fields, qaFields, resultsCount,
                        null, includeSensitive, dd, limit, analysisFields);
            }
        } finally {
            rw.finalise();
        }
    } catch (SolrServerException ex) {
        logger.error("Problem communicating with SOLR server. " + ex.getMessage(), ex);
    }

    return uidStats;
}

From source file:au.org.ala.biocache.dao.SearchDAOImpl.java

License:Open Source License

private List<OccurrencePoint> getPoints(SpatialSearchRequestParams searchParams, PointType pointType, int max)
        throws Exception {
    List<OccurrencePoint> points = new ArrayList<OccurrencePoint>(); // new OccurrencePoint(PointType.POINT);
    queryFormatUtils.formatSearchQuery(searchParams);
    if (logger.isInfoEnabled()) {
        logger.info("search query: " + searchParams.getFormattedQuery());
    }/*from ww  w.  j a  va  2  s . co m*/
    SolrQuery solrQuery = new SolrQuery();
    solrQuery.setQueryType("standard");
    solrQuery.setQuery(searchParams.getFormattedQuery());
    solrQuery.setRows(0);
    solrQuery.setFacet(true);
    solrQuery.addFacetField(pointType.getLabel());
    solrQuery.setFacetMinCount(1);
    solrQuery.setFacetLimit(max); // unlimited = -1

    QueryResponse qr = runSolrQuery(solrQuery, searchParams.getFormattedFq(), 1, 0, "score", "asc");
    List<FacetField> facets = qr.getFacetFields();

    if (facets != null) {
        for (FacetField facet : facets) {
            List<FacetField.Count> facetEntries = facet.getValues();
            if (facet.getName().contains(pointType.getLabel()) && (facetEntries != null)
                    && (facetEntries.size() > 0)) {

                for (FacetField.Count fcount : facetEntries) {
                    if (StringUtils.isNotEmpty(fcount.getName()) && fcount.getCount() > 0) {
                        OccurrencePoint point = new OccurrencePoint(pointType);
                        point.setCount(fcount.getCount());
                        String[] pointsDelimited = StringUtils.split(fcount.getName(), ',');
                        List<Float> coords = new ArrayList<Float>();

                        for (String coord : pointsDelimited) {
                            try {
                                Float decimalCoord = Float.parseFloat(coord);
                                coords.add(decimalCoord);
                            } catch (NumberFormatException numberFormatException) {
                                logger.warn("Error parsing Float for Lat/Long: "
                                        + numberFormatException.getMessage(), numberFormatException);
                            }
                        }

                        if (!coords.isEmpty()) {
                            Collections.reverse(coords); // must be long, lat order
                            point.setCoordinates(coords);
                            points.add(point);
                        }
                    }
                }
            }
        }
    }
    return points;
}

From source file:au.org.ala.biocache.dao.SearchDAOImpl.java

License:Open Source License

/**
 * @see au.org.ala.biocache.dao.SearchDAO#getFacetPointsShort(au.org.ala.biocache.dto.SpatialSearchRequestParams, String)
 *//*  ww  w  . j  a v  a 2 s  .c  om*/
@Override
public FacetField getFacetPointsShort(SpatialSearchRequestParams searchParams, String pointType)
        throws Exception {
    queryFormatUtils.formatSearchQuery(searchParams);
    if (logger.isInfoEnabled()) {
        logger.info("search query: " + searchParams.getFormattedQuery());
    }
    SolrQuery solrQuery = new SolrQuery();
    solrQuery.setRequestHandler("standard");
    solrQuery.setQuery(searchParams.getFormattedQuery());
    solrQuery.setRows(0);
    solrQuery.setFacet(true);
    solrQuery.addFacetField(pointType);
    solrQuery.setFacetMinCount(1);
    solrQuery.setFacetLimit(searchParams.getFlimit());//MAX_DOWNLOAD_SIZE);  // unlimited = -1

    QueryResponse qr = runSolrQuery(solrQuery, searchParams.getFormattedFq(), 0, 0, "_docid_", "asc");
    List<FacetField> facets = qr.getFacetFields();

    //return first facet, there should only be 1
    if (facets != null && facets.size() > 0) {
        return facets.get(0);
    }
    return null;
}

From source file:au.org.ala.biocache.dao.SearchDAOImpl.java

License:Open Source License

/**
 * @see au.org.ala.biocache.dao.SearchDAO#getOccurrences(au.org.ala.biocache.dto.SpatialSearchRequestParams, au.org.ala.biocache.dto.PointType, String)
 *//*from w  w  w.  j  a  v  a  2 s  . c  o m*/
@Override
public List<OccurrencePoint> getOccurrences(SpatialSearchRequestParams searchParams, PointType pointType,
        String colourBy) throws Exception {

    List<OccurrencePoint> points = new ArrayList<OccurrencePoint>();
    searchParams.setPageSize(100);

    String queryString = "";
    queryFormatUtils.formatSearchQuery(searchParams);
    queryString = searchParams.getFormattedQuery();

    if (logger.isInfoEnabled()) {
        logger.info("search query: " + queryString);
    }
    SolrQuery solrQuery = new SolrQuery();
    solrQuery.setQueryType("standard");
    solrQuery.setQuery(queryString);
    solrQuery.setRows(0);
    solrQuery.setFacet(true);
    solrQuery.addFacetField(pointType.getLabel());
    solrQuery.setFacetMinCount(1);
    solrQuery.setFacetLimit(MAX_DOWNLOAD_SIZE); // unlimited = -1

    QueryResponse qr = runSolrQuery(solrQuery, searchParams);
    SearchResultDTO searchResults = processSolrResponse(searchParams, qr, solrQuery, OccurrenceIndex.class);
    List<OccurrenceIndex> ocs = searchResults.getOccurrences();

    if (!ocs.isEmpty() && ocs.size() > 0) {

        for (OccurrenceIndex oc : ocs) {

            List<Float> coords = new ArrayList<Float>();
            coords.add(oc.getDecimalLongitude().floatValue());
            coords.add(oc.getDecimalLatitude().floatValue());

            OccurrencePoint point = new OccurrencePoint();
            point.setCoordinates(coords);

            point.setOccurrenceUid(oc.getUuid());

            points.add(point);
        }
    }

    return points;
}

From source file:au.org.ala.biocache.dao.SearchDAOImpl.java

License:Open Source License

/**
 * http://ala-biocache1.vm.csiro.au:8080/solr/select?q=*:*&rows=0&facet=true&facet.field=data_provider_id&facet.field=data_provider&facet.sort=data_provider_id
 *
 * @see au.org.ala.biocache.dao.SearchDAO#getDataProviderCounts()
 *///w  ww. jav  a  2 s  . c  o  m
//IS THIS BEING USED BY ANYTHING??
@Override
public List<DataProviderCountDTO> getDataProviderCounts() throws Exception {

    List<DataProviderCountDTO> dpDTOs = new ArrayList<DataProviderCountDTO>(); // new OccurrencePoint(PointType.POINT);
    SolrQuery solrQuery = new SolrQuery();
    solrQuery.setQueryType("standard");
    solrQuery.setQuery("*:*");
    solrQuery.setRows(0);
    solrQuery.setFacet(true);
    solrQuery.addFacetField("data_provider_uid");
    solrQuery.addFacetField("data_provider");
    solrQuery.setFacetMinCount(1);
    QueryResponse qr = runSolrQuery(solrQuery, null, 1, 0, "data_provider", "asc");
    List<FacetField> facets = qr.getFacetFields();

    if (facets != null && facets.size() == 2) {

        FacetField dataProviderIdFacet = facets.get(0);
        FacetField dataProviderNameFacet = facets.get(1);

        List<FacetField.Count> dpIdEntries = dataProviderIdFacet.getValues();
        List<FacetField.Count> dpNameEntries = dataProviderNameFacet.getValues();

        if (dpIdEntries != null) {
            for (int i = 0; i < dpIdEntries.size(); i++) {

                FacetField.Count dpIdEntry = dpIdEntries.get(i);
                FacetField.Count dpNameEntry = dpNameEntries.get(i);

                String dataProviderId = dpIdEntry.getName();
                String dataProviderName = dpNameEntry.getName();
                long count = dpIdEntry.getCount();

                if (count > 0) {
                    DataProviderCountDTO dto = new DataProviderCountDTO(dataProviderId, dataProviderName,
                            count);
                    dpDTOs.add(dto);
                }
            }
        }
    }
    if (logger.isInfoEnabled()) {
        logger.info("Find data providers = " + dpDTOs.size());
    }
    return dpDTOs;
}

From source file:au.org.ala.biocache.dao.SearchDAOImpl.java

License:Open Source License

/**
 * @see au.org.ala.biocache.dao.SearchDAO#findRecordByStateFor(java.lang.String)
 * IS THIS BEGIN USED OR NECESSARY/*from   w ww  .  j  a  va 2s.  c o  m*/
 */
@Override
public List<FieldResultDTO> findRecordByStateFor(String query) throws Exception {
    List<FieldResultDTO> fDTOs = new ArrayList<FieldResultDTO>(); // new OccurrencePoint(PointType.POINT);
    SolrQuery solrQuery = new SolrQuery();
    solrQuery.setQueryType("standard");
    solrQuery.setQuery(query);
    solrQuery.setRows(0);
    solrQuery.setFacet(true);
    solrQuery.addFacetField("state");
    solrQuery.setFacetMinCount(1);
    QueryResponse qr = runSolrQuery(solrQuery, null, 1, 0, "data_provider", "asc");
    List<FacetField> facets = qr.getFacetFields();
    FacetField ff = qr.getFacetField("state");
    if (ff != null) {
        for (Count count : ff.getValues()) {
            //only start adding counts when we hit a decade with some results.
            if (count.getCount() > 0) {
                FieldResultDTO f = new FieldResultDTO(count.getName(), count.getCount());
                fDTOs.add(f);
            }
        }
    }
    return fDTOs;
}

From source file:au.org.ala.biocache.dao.SearchDAOImpl.java

License:Open Source License

/**
 * Calculates the breakdown of the supplied query based on the supplied params
 *//*ww w .java2  s  .  co m*/
public TaxaRankCountDTO calculateBreakdown(BreakdownRequestParams queryParams) throws Exception {
    if (logger.isDebugEnabled()) {
        logger.debug("Attempting to find the counts for " + queryParams);
    }
    TaxaRankCountDTO trDTO = null;
    SolrQuery solrQuery = new SolrQuery();
    solrQuery.setQueryType("standard");
    queryFormatUtils.formatSearchQuery(queryParams);
    solrQuery.setQuery(queryParams.getFormattedQuery());
    queryParams.setPageSize(0);
    solrQuery.setFacet(true);
    solrQuery.setFacetMinCount(1);
    solrQuery.setFacetSort("count");
    solrQuery.setFacetLimit(-1);

    //add the rank:name as a fq if necessary
    if (StringUtils.isNotEmpty(queryParams.getName()) && StringUtils.isNotEmpty(queryParams.getRank())) {
        queryParams.setFormattedFq((String[]) ArrayUtils.addAll(queryParams.getFormattedFq(),
                new String[] { queryParams.getRank() + ":" + queryParams.getName() }));
    }
    //add the ranks as facets
    if (queryParams.getLevel() == null) {
        List<String> ranks = queryParams.getRank() != null
                ? searchUtils.getNextRanks(queryParams.getRank(), queryParams.getName() == null)
                : searchUtils.getRanks();
        for (String r : ranks) {
            solrQuery.addFacetField(r);
        }
    } else {
        //the user has supplied the "exact" level at which to perform the breakdown
        solrQuery.addFacetField(queryParams.getLevel());
    }
    QueryResponse qr = runSolrQuery(solrQuery, queryParams);
    if (queryParams.getMax() != null && queryParams.getMax() > 0) {
        //need to get the return level that the number of facets are <=max ranks need to be processed in reverse order until max is satisfied
        if (qr.getResults().getNumFound() > 0) {
            List<FacetField> ffs = qr.getFacetFields();
            //reverse the facets so that they are returned in rank reverse order species, genus, family etc
            Collections.reverse(ffs);
            for (FacetField ff : ffs) {
                //logger.debug("Handling " + ff.getName());
                trDTO = new TaxaRankCountDTO(ff.getName());
                if (ff.getValues() != null && ff.getValues().size() <= queryParams.getMax()) {
                    List<FieldResultDTO> fDTOs = new ArrayList<FieldResultDTO>();
                    for (Count count : ff.getValues()) {
                        if (count.getCount() > 0) {
                            FieldResultDTO f = new FieldResultDTO(count.getName(), count.getCount());
                            fDTOs.add(f);
                        }
                    }
                    trDTO.setTaxa(fDTOs);
                    break;
                }
            }

        }
    } else if (queryParams.getRank() != null || queryParams.getLevel() != null) {
        //just want to process normally the rank to facet on will start with the highest rank and then go down until one exists for
        if (qr.getResults().getNumFound() > 0) {
            List<FacetField> ffs = qr.getFacetFields();
            for (FacetField ff : ffs) {
                trDTO = new TaxaRankCountDTO(ff.getName());
                if (ff != null && ff.getValues() != null) {
                    List<Count> counts = ff.getValues();
                    if (counts.size() > 0) {
                        List<FieldResultDTO> fDTOs = new ArrayList<FieldResultDTO>();
                        for (Count count : counts) {
                            if (count.getCount() > 0) {
                                FieldResultDTO f = new FieldResultDTO(count.getName(), count.getCount());
                                fDTOs.add(f);
                            }
                        }
                        trDTO.setTaxa(fDTOs);
                        break;
                    }
                }
            }
        }

    }
    return trDTO;
}

From source file:au.org.ala.biocache.dao.SearchDAOImpl.java

License:Open Source License

/**
 * @see au.org.ala.biocache.dao.SearchDAO#findTaxonCountForUid(au.org.ala.biocache.dto.BreakdownRequestParams, String)
 * @deprecated use {@link #calculateBreakdown(BreakdownRequestParams)} instead
 *//*from   w  w  w .j ava2 s  .c  o  m*/
@Deprecated
public TaxaRankCountDTO findTaxonCountForUid(BreakdownRequestParams breakdownParams, String query)
        throws Exception {
    TaxaRankCountDTO trDTO = null;
    List<String> ranks = breakdownParams.getLevel() == null
            ? searchUtils.getNextRanks(breakdownParams.getRank(), breakdownParams.getName() == null)
            : new ArrayList<String>();
    if (breakdownParams.getLevel() != null)
        ranks.add(breakdownParams.getLevel());
    if (ranks != null && ranks.size() > 0) {
        SolrQuery solrQuery = new SolrQuery();
        solrQuery.setQueryType("standard");
        solrQuery.setQuery(query);
        solrQuery.setRows(0);
        solrQuery.setFacet(true);
        solrQuery.setFacetMinCount(1);
        solrQuery.setFacetSort("count");
        solrQuery.setFacetLimit(-1); //we want all facets
        for (String r : ranks) {
            solrQuery.addFacetField(r);
        }
        QueryResponse qr = runSolrQuery(solrQuery,
                queryFormatUtils.getQueryContextAsArray(breakdownParams.getQc()), 1, 0,
                breakdownParams.getRank(), "asc");
        if (qr.getResults().size() > 0) {
            for (String r : ranks) {
                trDTO = new TaxaRankCountDTO(r);
                FacetField ff = qr.getFacetField(r);
                if (ff != null && ff.getValues() != null) {
                    List<Count> counts = ff.getValues();
                    if (counts.size() > 0) {
                        List<FieldResultDTO> fDTOs = new ArrayList<FieldResultDTO>();
                        for (Count count : counts) {
                            FieldResultDTO f = new FieldResultDTO(count.getName(), count.getCount());
                            fDTOs.add(f);
                        }
                        trDTO.setTaxa(fDTOs);
                        break;
                    }
                }
            }
        }
    }
    return trDTO;
}