List of usage examples for org.apache.solr.client.solrj SolrQuery SolrQuery
public SolrQuery()
From source file:SolrUpdate.java
License:Apache License
public void getAllSolrRecords() { String pmid;//from w ww. j a v a 2 s .c o m try { CoreAdminRequest adminRequest = new CoreAdminRequest(); adminRequest.setAction(CoreAdminAction.RELOAD); SolrServer solr = new HttpSolrServer("http://localhost:8983/solr"); String query; query = "pmid:*"; SolrQuery theq = new SolrQuery(); theq.setQuery(query); theq.setStart(0); theq.setRows(10000); QueryResponse response = new QueryResponse(); response = solr.query(theq); SolrDocumentList list = response.getResults(); int docnum = 1; for (SolrDocument doc : list) { Publication currlist = new Publication(); List<String> fullnames = new ArrayList<String>(); String currepubsum1 = "", currepubsum2 = ""; if (doc.getFieldValue("abstract") != null) { currlist.setAbstract(doc.getFieldValue("abstract").toString()); } if (doc.getFieldValue("ptitle") != null) { currlist.setTitle(doc.getFieldValue("ptitle").toString()); } if (doc.getFieldValue("author_fullname_list") != null) { currlist.setFirst5authors(doc.getFieldValue("author_fullname_list").toString()); } if (doc.getFieldValue("pmid") != null) { currlist.setPmid(Integer.valueOf(doc.getFieldValue("pmid").toString())); } if (doc.getFieldValue("completion") != null) { currlist.setCompletion(Boolean.valueOf(doc.getFieldValue("completion").toString())); } if (doc.getFieldValue("lruid") != null) { currlist.setLruid(doc.getFieldValue("lruid").toString()); } if (doc.getFieldValue("draftpoint") != null) { currlist.setDraftpoint(Integer.valueOf(doc.getFieldValue("draftpoint").toString())); } if (doc.getFieldValue("journalname") != null) { currlist.setJournalname(doc.getFieldValue("journalname").toString()); } if (doc.getFieldValue("journalyear") != null) { currlist.setJournalyear(doc.getFieldValue("journalyear").toString()); } if (doc.getFieldValue("journalday") != null) { currlist.setJournalday(doc.getFieldValue("journalday").toString()); } if (doc.getFieldValue("journalmonth") != null) { currlist.setJournalmonth(doc.getFieldValue("journalmonth").toString()); } if (doc.getFieldValue("journalpage") != null) { currlist.setJournalstartpg(doc.getFieldValue("journalpage").toString()); } if (doc.getFieldValue("journalissue") != null) { currlist.setJournalissue(doc.getFieldValue("journalissue").toString()); } if (doc.getFieldValue("journalvolume") != null) { currlist.setJournalvolume(doc.getFieldValue("journalvolume").toString()); } if (doc.getFieldValue("publicationdate_year") != null) { currlist.setYear(doc.getFieldValue("publicationdate_year").toString()); } if (doc.getFieldValue("doi") != null) { currlist.setDoi(doc.getFieldValue("doi").toString()); } if (doc.getFieldValues("pfileinfo") != null) { Collection<Object> currcoll = doc.getFieldValues("pfileinfo"); for (Object currobj : currcoll) { currlist.getFilesanddata().add(currobj.toString()); } } if (doc.getFieldValue("author_firstname") != null) { currlist.setFauthors((List<String>) doc.getFieldValue("author_firstname")); } if (doc.getFieldValue("author_lastname") != null) { currlist.setLauthors((List<String>) doc.getFieldValue("author_lastname")); } if (doc.getFieldValue("epubmonth") != null) { currlist.setEpubmonth(doc.getFieldValue("epubmonth").toString()); } if (doc.getFieldValue("epubyear") != null) { currlist.setEpubyear(doc.getFieldValue("epubyear").toString()); } if (doc.getFieldValue("epubday") != null) { currlist.setEpubday(doc.getFieldValue("epubday").toString()); } int counter = 0; for (String currstring : currlist.getFauthors()) { currstring += " " + currlist.getLauthors().get(counter); fullnames.add(currstring); counter++; } currlist.setFullnames(fullnames); if (currlist.getJournalvolume().length() > 0) { currepubsum2 += currlist.getJournalvolume(); } if (currlist.getJournalissue().length() > 0) { currepubsum2 += "(" + currlist.getJournalissue() + ")" + ":"; } if (currlist.getJournalstartpg().length() > 0) { currepubsum2 += currlist.getJournalstartpg() + "."; } if (currlist.getEpubday().length() < 1 && currlist.getEpubmonth().length() < 1 && currlist.getEpubyear().length() < 1) { currepubsum1 = "[Epub ahead of print]"; } else if (currlist.getEpubyear().length() > 0) { currepubsum1 = "Epub " + currlist.getEpubyear() + " " + currlist.getEpubmonth() + " " + currlist.getEpubday(); } else { currepubsum1 = ""; } currlist.setEpubsum(currepubsum1); currlist.setEpubsum2(currepubsum2); currlist.setIndex(docnum); if (currlist.getCompletion() == false) { currlist.setComp("Hidden"); } else { currlist.setComp("Visible"); } solrrecords.add(currlist); docnum++; } } catch (Exception ex) { System.out.println(ex); } System.out.println("There are a total of this many records gathered: " + solrrecords.size()); }
From source file:QueryLoop.java
License:GNU General Public License
@Override public void run() { Random ran = new Random(); long querycount = 1; long resultcount = 1; long oldresultcount = 1; long time = System.currentTimeMillis(); double timesum = 0; double docsum = 0; int seccount = 0; System.out.println("Starting"); BufferedWriter out = null;//from ww w .j a va 2s .co m try { InetAddress addr = InetAddress.getLocalHost(); String hostname = addr.getHostName(); String fname = date + "_" + hostname.toString() + "_N" + node_num + "_S" + shard_num + "_Z" + zk_num + "_T" + thread_num + ".log"; System.out.println(fname); FileWriter fstream = new FileWriter(fname, false); out = new BufferedWriter(fstream); while (true) { SolrQuery query = new SolrQuery(); query.setQuery(qs[ran.nextInt(len)]); //if we want to restrict other fields // query.setQuery("test sony"); //query.addFilterQuery("cat:electronics","store:amazon.com"); // query.addFilterQuery("blogurl:"+"http\\" + "://www.bloomberg.com"); // query.setFields("id","blogurl","url","published"); // query.set("defType", "edismax"); // query.setStart(0); // System.out.println(query.toString()); SolrDocumentList results = null; QueryResponse response = solr_node.query(query); results = response.getResults(); querycount++; for (int i = 0; i < results.size(); ++i) { // System.out.println(results.get(i)); resultcount++; } if ((querycount % 1000) == 1) { long newtime = System.currentTimeMillis(); System.out.println("ThreadID: " + thread_num + ", 1000 Queries, Time passed: " + (newtime - time) + ", #doc results: " + (resultcount - oldresultcount)); System.out.println(query); // System.out.println(results); out.write("ThreadID: " + thread_num + ", 1000 Queries, Time passed: " + (newtime - time) + ", #doc results: " + (resultcount - oldresultcount) + "\n"); out.flush(); timesum += newtime - time; docsum += (resultcount - oldresultcount); time = newtime; oldresultcount = resultcount; seccount++; if (seccount >= 60) { out.write("Average Time:" + timesum / seccount + "\n"); out.flush(); System.out.println(("Average Time:" + timesum / seccount + ", Average Docs: " + docsum / seccount + "\n")); break; } } } //while } catch (Exception e) { e.printStackTrace(); System.exit(-1); } try { if (out != null) { out.close(); } } catch (Exception e) { e.printStackTrace(); } }
From source file:actors.SolrActor.java
License:Apache License
public void indexUpdated(SolrIndexEvent msg) { try {/*www.j a v a2s .c o m*/ System.out.println("SolrIndexEvent"); SolrInputDocument doc = msg.getDocuement(); //Making realtime GET System.out.println("GET"); SolrQuery parameters = new SolrQuery(); parameters.setRequestHandler("/get"); String f1 = doc.getFieldValue("literal.id").toString(); String f2 = doc.getFieldValue("literal.rev").toString(); parameters.set("id", f1); parameters.set("rev", f2); //System.out.println(parameters); QueryResponse response = server.query(parameters); NamedList<Object> result = response.getResponse(); //System.out.println(response.getResponse()); //System.out.println(result.size() ); //System.out.println(); //System.out.println(result); //validate the doc exists if (result == null || result.get("doc") == null) { System.out.println("/update/extract"); ContentStreamUpdateRequest req = new ContentStreamUpdateRequest("/update/extract"); // url dropbox URL url = new URL(doc.getFieldValue("literal.links").toString()); ContentStreamBase content = new ContentStreamBase.URLStream(url); System.out.println("ContentStreamBase"); req.addContentStream(content); // Adittionall metadata req.setParam("literal.id", doc.getFieldValue("literal.id").toString()); req.setParam("literal.title", doc.getFieldValue("literal.title").toString()); req.setParam("literal.rev", doc.getFieldValue("literal.rev").toString()); req.setParam("literal.when", doc.getFieldValue("literal.when").toString()); req.setParam("literal.path", doc.getFieldValue("literal.path").toString()); req.setParam("literal.icon", doc.getFieldValue("literal.icon").toString()); req.setParam("literal.size", doc.getFieldValue("literal.size").toString()); req.setParam("literal.url", doc.getFieldValue("literal.links").toString()); req.setParam("uprefix", "attr_"); req.setParam("fmap.content", "attr_content"); req.setAction(AbstractUpdateRequest.ACTION.COMMIT, true, true); //Requesting Solr result = server.request(req); //System.out.println("Result: " + result.toString()); } else { System.out.println("It's already update"); } } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } }
From source file:actors.SolrActor.java
License:Apache License
public void performedSearch(SolrSearchEvent msg) { try {//from w w w.j ava 2 s . c o m System.out.println("SolrSearchEvent"); System.out.println(""); SolrQuery parameters = new SolrQuery(); //parameters.setRequestHandler("/get"); parameters.set("q", msg.getQuery()); parameters.setFields("title", "id", "path", "when", "icon", "size", "content_type"); parameters.set("defType", "edismax"); parameters.addFilterQuery("title", "content"); parameters.setStart(0); parameters.setHighlight(true).setHighlightSnippets(1); //set other params as needed parameters.setParam("hl.fl", "content"); parameters.setParam("wt", "json"); server.query(parameters); System.out.println(parameters); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } }
From source file:at.kc.tugraz.ss.service.solr.impl.fct.SSSolrFct.java
License:Apache License
public static SolrDocumentList query(final SolrClient solrQuerier, final SSSolrQueryPars queryPars) throws Exception { final SolrQuery solrQuery = new SolrQuery(); solrQuery.setQuery(queryPars.query); solrQuery.setRows(queryPars.numRows); return solrQuerier.query(solrQuery).getResults(); }
From source file:at.newmedialab.lmf.search.services.indexing.SolrCoreRuntime.java
License:Apache License
/** * Ask the server to retrieve all documents that depend on the resource passed as argument; this * query is/* w w w. j av a 2 s .c om*/ * carried out by querying the dependencies field of a document. * * @param resource * @return */ public Collection<URI> listDependent(ValueFactory valueFactory, URI resource) { SolrQuery query = new SolrQuery(); query.setQuery("lmf.dependencies:\"" + resource.stringValue() + "\""); query.setFields("lmf.uri"); query.setRows(Integer.MAX_VALUE); try { SolrDocumentList docs = server.query(query).getResults(); Set<URI> result = new HashSet<URI>(); for (SolrDocument doc : docs) { result.add(valueFactory.createURI((String) doc.getFirstValue("lmf.uri"))); } return result; } catch (SolrServerException e) { return Collections.emptyList(); } }
From source file:at.pagu.soldockr.core.QueryParser.java
License:Apache License
/** * Convert given Query into a SolrQuery executable via {@link SolrServer} * /*from w w w . ja v a 2s .c o m*/ * @param query * @return */ public final SolrQuery constructSolrQuery(SolDockRQuery query) { Assert.notNull(query, "Cannot construct solrQuery from null value."); Assert.notNull(query.getCriteria(), "Query has to have a criteria."); SolrQuery solrQuery = new SolrQuery(); solrQuery.setParam(CommonParams.Q, getQueryString(query)); if (query instanceof Query) { processQueryOptions(solrQuery, (Query) query); } if (query instanceof FacetQuery) { processFacetOptions(solrQuery, (FacetQuery) query); } return solrQuery; }
From source file:at.tugraz.sss.servs.db.impl.SSDBNoSQLSolrImpl.java
License:Apache License
@Override public List<String> search(final SSDBNoSQLSearchPar par) throws SSErr { try {/*from w ww. j av a 2 s . c o m*/ final List<String> searchResults = new ArrayList<>(); final SolrQuery query = new SolrQuery(); final String globalSearchOp = par.globalSearchOp.toUpperCase(); final String localSearchOp = par.localSearchOp.toUpperCase(); String queryString = new String(); if (par.wheres.isEmpty()) { throw new Exception("no search fields given"); } for (Map.Entry<SSSolrSearchFieldE, List<SSSolrKeywordLabel>> where : par.wheres.entrySet()) { if (where.getValue().isEmpty()) { throw new Exception("values for search field empty"); } queryString += where.getKey().val + SSStrU.colon + "("; for (SSSolrKeywordLabel value : where.getValue()) { if (par.useFuzzySearch && !SSStrU.contains(value, SSStrU.blank)) { queryString += value + SSStrU.tilde + SSStrU.blank + localSearchOp + SSStrU.blank; } else { queryString += SSStrU.doubleQuote + value + SSStrU.doubleQuote + SSStrU.blank + localSearchOp + SSStrU.blank; } } queryString = SSStrU.removeTrailingString(queryString, SSStrU.blank + localSearchOp + SSStrU.blank) + ")" + SSStrU.blank + globalSearchOp + SSStrU.blank; } queryString = SSStrU.removeTrailingString(queryString, SSStrU.blank + globalSearchOp + SSStrU.blank); query.setQuery(queryString); query.setRows(par.maxResults); for (SSSolrSearchResult result : SSSolrSearchResult.get(solrServer.query(query).getResults())) { searchResults.add(result.id); } return searchResults; } catch (Exception error) { SSServErrReg.regErrThrow(error); return null; } }
From source file:au.edu.usq.fascinator.access.solr.SolrAccessControl.java
License:Open Source License
private List<AccessControlSchema> search(String recordId, String role) throws Exception { // An empty response List<AccessControlSchema> schemas = new ArrayList(); SolrSchema schema;// w w w . j a v a 2 s . c o m // Run the query String q = writeQueryString(recordId, role); SolrQuery query = new SolrQuery().setQuery(q); QueryResponse result = core.query(query); // Loop through the responses Iterator<SolrDocument> iter = result.getResults().iterator(); while (iter.hasNext()) { // Get the details SolrDocument resultDoc = iter.next(); String rRecord = (String) resultDoc.getFieldValue("recordId"); String rRole = (String) resultDoc.getFieldValue("role"); // Build a schema from details schema = new SolrSchema(); schema.init(rRecord); schema.set("role", rRole); // Add to our response schemas.add(schema); } return schemas; }
From source file:au.org.ala.biocache.dao.SearchDAOImpl.java
License:Open Source License
/** * Writes the index fields to the supplied output stream in CSV format. * <p>/*from w w w . j av a2s .c o m*/ * DM: refactored to split the query by month to improve performance. * Further enhancements possible: * 1) Multi threaded * 2) More filtering, by year or decade.. * * @param downloadParams * @param out * @param includeSensitive * @param dd The details of the download * @param checkLimit * @param nextExecutor The ExecutorService to use to process results on different threads * @throws Exception */ @Override public ConcurrentMap<String, AtomicInteger> writeResultsFromIndexToStream( final DownloadRequestParams downloadParams, final OutputStream out, final boolean includeSensitive, final DownloadDetailsDTO dd, boolean checkLimit, final ExecutorService nextExecutor) throws Exception { expandRequestedFields(downloadParams, true); if (dd != null) { dd.resetCounts(); } long start = System.currentTimeMillis(); final ConcurrentMap<String, AtomicInteger> uidStats = new ConcurrentHashMap<>(); getServer(); try { SolrQuery solrQuery = new SolrQuery(); queryFormatUtils.formatSearchQuery(downloadParams); String dFields = downloadParams.getFields(); if (includeSensitive) { //include raw latitude and longitudes if (dFields.contains("decimalLatitude.p")) { dFields = dFields.replaceFirst("decimalLatitude.p", "sensitive_latitude,sensitive_longitude,decimalLatitude.p"); } else if (dFields.contains("decimalLatitude")) { dFields = dFields.replaceFirst("decimalLatitude", "sensitive_latitude,sensitive_longitude,decimalLatitude"); } if (dFields.contains(",locality,")) { dFields = dFields.replaceFirst(",locality,", ",locality,sensitive_locality,"); } if (dFields.contains(",locality.p,")) { dFields = dFields.replaceFirst(",locality.p,", ",locality.p,sensitive_locality,"); } } StringBuilder sb = new StringBuilder(dFields); if (!downloadParams.getExtra().isEmpty()) { sb.append(",").append(downloadParams.getExtra()); } String[] requestedFields = sb.toString().split(","); List<String>[] indexedFields; if (downloadFields == null) { //default to include everything java.util.List<String> mappedNames = new java.util.LinkedList<String>(); for (int i = 0; i < requestedFields.length; i++) mappedNames.add(requestedFields[i]); indexedFields = new List[] { mappedNames, new java.util.LinkedList<String>(), mappedNames, mappedNames, new ArrayList(), new ArrayList() }; } else { indexedFields = downloadFields.getIndexFields(requestedFields, downloadParams.getDwcHeaders(), downloadParams.getLayersServiceUrl()); } //apply custom header String[] customHeader = dd.getRequestParams().getCustomHeader().split(","); for (int i = 0; i + 1 < customHeader.length; i += 2) { for (int j = 0; j < indexedFields[0].size(); j++) { if (customHeader[i].equals(indexedFields[0].get(j))) { indexedFields[2].set(j, customHeader[i + 1]); } } for (int j = 0; j < indexedFields[4].size(); j++) { if (customHeader[i].equals(indexedFields[5].get(j))) { indexedFields[4].set(j, customHeader[i + 1]); } } } if (logger.isDebugEnabled()) { logger.debug("Fields included in download: " + indexedFields[0]); logger.debug("Fields excluded from download: " + indexedFields[1]); logger.debug("The headers in downloads: " + indexedFields[2]); logger.debug("Analysis headers: " + indexedFields[4]); logger.debug("Analysis fields: " + indexedFields[5]); } //set the fields to the ones that are available in the index String[] fields = indexedFields[0].toArray(new String[] {}); solrQuery.setFields(fields); StringBuilder qasb = new StringBuilder(); if (!"none".equals(downloadParams.getQa())) { solrQuery.addField("assertions"); if (!"all".equals(downloadParams.getQa()) && !"includeall".equals(downloadParams.getQa())) { //add all the qa fields qasb.append(downloadParams.getQa()); } } solrQuery.addField("institution_uid").addField("collection_uid").addField("data_resource_uid") .addField("data_provider_uid"); solrQuery.setQuery(downloadParams.getFormattedQuery()); solrQuery.setFacetMinCount(1); solrQuery.setFacetLimit(-1); //get the assertion facets to add them to the download fields boolean getAssertionsFromFacets = "all".equals(downloadParams.getQa()) || "includeall".equals(downloadParams.getQa()); SolrQuery monthAssertionsQuery = getAssertionsFromFacets ? solrQuery.getCopy().addFacetField("month", "assertions") : solrQuery.getCopy().addFacetField("month"); if (getAssertionsFromFacets) { //set the order for the facet to be based on the index - this will force the assertions to be returned in the same order each time //based on alphabetical sort. The number of QA's may change between searches so we can't guarantee that the order won't change monthAssertionsQuery.add("f.assertions.facet.sort", "index"); } QueryResponse facetQuery = runSolrQuery(monthAssertionsQuery, downloadParams.getFormattedFq(), 0, 0, "score", "asc"); //set the totalrecords for the download details dd.setTotalRecords(facetQuery.getResults().getNumFound()); //use a separately configured and smaller limit when output will be unzipped final long maxDownloadSize; if (MAX_DOWNLOAD_SIZE > unzippedLimit && out instanceof OptionalZipOutputStream && ((OptionalZipOutputStream) out).getType() == OptionalZipOutputStream.Type.unzipped) { maxDownloadSize = unzippedLimit; } else { maxDownloadSize = MAX_DOWNLOAD_SIZE; } if (checkLimit && dd.getTotalRecords() < maxDownloadSize) { checkLimit = false; } //get the month facets to add them to the download fields get the assertion facets. List<Count> splitByFacet = null; for (FacetField facet : facetQuery.getFacetFields()) { if (facet.getName().equals("assertions") && facet.getValueCount() > 0) { qasb.append(getQAFromFacet(facet)); } if (facet.getName().equals("month") && facet.getValueCount() > 0) { splitByFacet = facet.getValues(); } } if ("includeall".equals(downloadParams.getQa())) { qasb = getAllQAFields(); } String qas = qasb.toString(); //include sensitive fields in the header when the output will be partially sensitive final String[] sensitiveFields; final String[] notSensitiveFields; if (dd.getSensitiveFq() != null) { List<String>[] sensitiveHdr = downloadFields.getIndexFields(sensitiveSOLRHdr, downloadParams.getDwcHeaders(), downloadParams.getLayersServiceUrl()); //header for the output file indexedFields[2].addAll(sensitiveHdr[2]); //lookup for fields from sensitive queries sensitiveFields = org.apache.commons.lang3.ArrayUtils.addAll( indexedFields[0].toArray(new String[] {}), sensitiveHdr[0].toArray(new String[] {})); //use general fields when sensitive data is not permitted notSensitiveFields = org.apache.commons.lang3.ArrayUtils .addAll(indexedFields[0].toArray(new String[] {}), notSensitiveSOLRHdr); } else { sensitiveFields = new String[0]; notSensitiveFields = fields; } //add analysis headers indexedFields[2].addAll(indexedFields[4]); final String[] analysisFields = indexedFields[5].toArray(new String[0]); final String[] qaFields = qas.equals("") ? new String[] {} : qas.split(","); String[] qaTitles = downloadFields.getHeader(qaFields, false, false); String[] header = org.apache.commons.lang3.ArrayUtils.addAll(indexedFields[2].toArray(new String[] {}), qaTitles); //retain output header fields and field names for inclusion of header info in the download StringBuilder infoFields = new StringBuilder("infoFields"); for (String h : indexedFields[3]) infoFields.append(",").append(h); for (String h : qaFields) infoFields.append(",").append(h); StringBuilder infoHeader = new StringBuilder("infoHeaders"); for (String h : header) infoHeader.append(",").append(h); String info = infoFields.toString(); while (info.contains(",,")) info = info.replace(",,", ","); uidStats.put(info, new AtomicInteger(-1)); String hdr = infoHeader.toString(); while (hdr.contains(",,")) hdr = hdr.replace(",,", ","); uidStats.put(hdr, new AtomicInteger(-2)); //construct correct RecordWriter based on the supplied fileType final RecordWriterError rw = downloadParams.getFileType().equals("csv") ? new CSVRecordWriter(out, header, downloadParams.getSep(), downloadParams.getEsc()) : (downloadParams.getFileType().equals("tsv") ? new TSVRecordWriter(out, header) : new ShapeFileRecordWriter(tmpShapefileDir, downloadParams.getFile(), out, (String[]) ArrayUtils.addAll(fields, qaFields))); // Requirement to be able to propagate interruptions to all other threads for this execution // Doing this via this variable final AtomicBoolean interruptFound = dd != null ? dd.getInterrupt() : new AtomicBoolean(false); // Create a fixed length blocking queue for buffering results before they are written // This also creates a push-back effect to throttle the results generating threads // when it fills and offers to it are delayed until the writer consumes elements from the queue final BlockingQueue<String[]> queue = new ArrayBlockingQueue<>(resultsQueueLength); // Create a sentinel that we can check for reference equality to signal the end of the queue final String[] sentinel = new String[0]; // An implementation of RecordWriter that adds to an in-memory queue final RecordWriter concurrentWrapper = new RecordWriter() { private AtomicBoolean finalised = new AtomicBoolean(false); private AtomicBoolean finalisedComplete = new AtomicBoolean(false); @Override public void write(String[] nextLine) { try { if (Thread.currentThread().isInterrupted() || interruptFound.get() || finalised.get()) { finalise(); return; } while (!queue.offer(nextLine, writerTimeoutWaitMillis, TimeUnit.MILLISECONDS)) { if (Thread.currentThread().isInterrupted() || interruptFound.get() || finalised.get()) { finalise(); break; } } } catch (InterruptedException e) { Thread.currentThread().interrupt(); interruptFound.set(true); if (logger.isDebugEnabled()) { logger.debug( "Queue failed to accept the next record due to a thread interrupt, calling finalise the cleanup: ", e); } // If we were interrupted then we should call finalise to cleanup finalise(); } } @Override public void finalise() { if (finalised.compareAndSet(false, true)) { try { // Offer the sentinel at least once, even when the thread is interrupted while (!queue.offer(sentinel, writerTimeoutWaitMillis, TimeUnit.MILLISECONDS)) { // If the thread is interrupted then the queue may not have any active consumers, // so don't loop forever waiting for capacity in this case // The hard shutdown phase will use queue.clear to ensure that the // sentinel gets onto the queue at least once if (Thread.currentThread().isInterrupted() || interruptFound.get()) { break; } } } catch (InterruptedException e) { Thread.currentThread().interrupt(); interruptFound.set(true); if (logger.isDebugEnabled()) { logger.debug( "Queue failed to accept the sentinel in finalise due to a thread interrupt: ", e); } } finally { finalisedComplete.set(true); } } } @Override public boolean finalised() { return finalisedComplete.get(); } }; // A single thread that consumes elements put onto the queue until it sees the sentinel, finalising after the sentinel or an interrupt Runnable writerRunnable = new Runnable() { @Override public void run() { try { long counter = 0; while (true) { counter = counter + 1; if (Thread.currentThread().isInterrupted() || interruptFound.get()) { break; } String[] take = queue.take(); // Sentinel object equality check to see if we are done if (take == sentinel || Thread.currentThread().isInterrupted() || interruptFound.get()) { break; } // Otherwise write to the wrapped record writer rw.write(take); //test for errors. This can contain a flush so only test occasionally if (counter % resultsQueueLength == 0 && rw.hasError()) { throw RecordWriterException.newRecordWriterException(dd, downloadParams, true, rw); } } } catch (RecordWriterException e) { //no trace information is available to print for these errors logger.error(e.getMessage()); interruptFound.set(true); } catch (InterruptedException e) { Thread.currentThread().interrupt(); interruptFound.set(true); } catch (Exception e) { // Reuse interruptFound variable to signal that the writer had issues interruptFound.set(true); logger.error("Download writer failed.", e); } finally { rw.finalise(); } } }; Thread writerThread = new Thread(writerRunnable); writerThread.start(); try { if (rw instanceof ShapeFileRecordWriter) { dd.setHeaderMap(((ShapeFileRecordWriter) rw).getHeaderMappings()); } //order the query by _docid_ for faster paging solrQuery.addSortField("_docid_", ORDER.asc); //for each month create a separate query that pages through 500 records per page List<SolrQuery> queries = new ArrayList<SolrQuery>(); if (splitByFacet != null) { for (Count facet : splitByFacet) { if (facet.getCount() > 0) { SolrQuery splitByFacetQuery; //do not add remainderQuery here if (facet.getName() != null) { splitByFacetQuery = solrQuery.getCopy() .addFilterQuery(facet.getFacetField().getName() + ":" + facet.getName()); splitByFacetQuery.setFacet(false); queries.add(splitByFacetQuery); } } } if (splitByFacet.size() > 0) { SolrQuery remainderQuery = solrQuery.getCopy() .addFilterQuery("-" + splitByFacet.get(0).getFacetField().getName() + ":[* TO *]"); queries.add(0, remainderQuery); } } else { queries.add(0, solrQuery); } //split into sensitive and non-sensitive queries when // - not including all sensitive values // - there is a sensitive fq final List<SolrQuery> sensitiveQ = new ArrayList<SolrQuery>(); if (!includeSensitive && dd.getSensitiveFq() != null) { sensitiveQ.addAll( splitQueries(queries, dd.getSensitiveFq(), sensitiveSOLRHdr, notSensitiveSOLRHdr)); } //Set<Future<Integer>> futures = new HashSet<Future<Integer>>(); final AtomicInteger resultsCount = new AtomicInteger(0); final boolean threadCheckLimit = checkLimit; List<Callable<Integer>> solrCallables = new ArrayList<>(queries.size()); // execute each query, writing the results to stream for (final SolrQuery splitByFacetQuery : queries) { // define a thread Callable<Integer> solrCallable = new Callable<Integer>() { @Override public Integer call() throws Exception { int startIndex = 0; // Randomise the wakeup time so they don't all wakeup on a periodic cycle long localThrottle = throttle + Math.round(Math.random() * throttle); String[] fq = downloadParams.getFormattedFq(); if (splitByFacetQuery.getFilterQueries() != null && splitByFacetQuery.getFilterQueries().length > 0) { if (fq == null) { fq = new String[0]; } fq = org.apache.commons.lang3.ArrayUtils.addAll(fq, splitByFacetQuery.getFilterQueries()); } QueryResponse qr = runSolrQuery(splitByFacetQuery, fq, downloadBatchSize, startIndex, "_docid_", "asc"); AtomicInteger recordsForThread = new AtomicInteger(0); if (logger.isDebugEnabled()) { logger.debug( splitByFacetQuery.getQuery() + " - results: " + qr.getResults().size()); } while (qr != null && !qr.getResults().isEmpty() && !interruptFound.get()) { if (logger.isDebugEnabled()) { logger.debug( "Start index: " + startIndex + ", " + splitByFacetQuery.getQuery()); } int count = 0; if (sensitiveQ.contains(splitByFacetQuery)) { count = processQueryResults(uidStats, sensitiveFields, qaFields, concurrentWrapper, qr, dd, threadCheckLimit, resultsCount, maxDownloadSize, analysisFields); } else { // write non-sensitive values into sensitive fields when not authorised for their sensitive values count = processQueryResults(uidStats, notSensitiveFields, qaFields, concurrentWrapper, qr, dd, threadCheckLimit, resultsCount, maxDownloadSize, analysisFields); } recordsForThread.addAndGet(count); startIndex += downloadBatchSize; // we have already set the Filter query the first time the query was constructed // rerun with the same params but different startIndex if (!threadCheckLimit || resultsCount.get() < maxDownloadSize) { if (!threadCheckLimit) { // throttle the download by sleeping Thread.sleep(localThrottle); } qr = runSolrQuery(splitByFacetQuery, null, downloadBatchSize, startIndex, "_docid_", "asc"); } else { qr = null; } } return recordsForThread.get(); } }; solrCallables.add(solrCallable); } List<Future<Integer>> futures = new ArrayList<>(solrCallables.size()); for (Callable<Integer> nextCallable : solrCallables) { futures.add(nextExecutor.submit(nextCallable)); } // Busy wait because we need to be able to respond to an interrupt on any callable // and propagate it to all of the others for this particular query // Because the executor service is shared to prevent too many concurrent threads being run, // this requires a busy wait loop on the main thread to monitor state boolean waitAgain = false; do { waitAgain = false; for (Future<Integer> future : futures) { if (!future.isDone()) { // Wait again even if an interrupt flag is set, as it may have been set partway through the iteration // The calls to future.cancel will occur next time if the interrupt is setup partway through an iteration waitAgain = true; // If one thread finds an interrupt it is propagated to others using the interruptFound AtomicBoolean if (interruptFound.get()) { future.cancel(true); } } } // Don't trigger the timeout interrupt if we don't have to wait again as we are already done at this point if (waitAgain && (System.currentTimeMillis() - start) > downloadMaxTime) { interruptFound.set(true); break; } if (waitAgain) { Thread.sleep(downloadCheckBusyWaitSleep); } } while (waitAgain); AtomicInteger totalDownload = new AtomicInteger(0); for (Future<Integer> future : futures) { if (future.isDone()) { totalDownload.addAndGet(future.get()); } else { // All incomplete futures that survived the loop above are cancelled here future.cancel(true); } } long finish = System.currentTimeMillis(); long timeTakenInSecs = (finish - start) / 1000; if (timeTakenInSecs <= 0) timeTakenInSecs = 1; if (logger.isInfoEnabled()) { logger.info("Download of " + resultsCount + " records in " + timeTakenInSecs + " seconds. Record/sec: " + resultsCount.intValue() / timeTakenInSecs); } } finally { try { // Once we get here, we need to finalise starting at the concurrent wrapper, // as there are no more non-sentinel records to be added to the queue // This eventually triggers finalisation of the underlying writer when the queue empties // This is a soft shutdown, and hence we wait below for this stage to complete in normal circumstances // Note, this blocks for writerTimeoutWaitMillis trying to legitimately add the sentinel to the end of the queue // We force the sentinel to be added in the hard shutdown phase below concurrentWrapper.finalise(); } finally { try { // Track the current time right now so we can abort after downloadMaxCompletionTime milliseconds in this phase final long completionStartTime = System.currentTimeMillis(); // Busy wait check for finalised to be called in the RecordWriter or something is interrupted // By this stage, there are at maximum download.internal.queue.size items remaining (default 1000) while (writerThread.isAlive() && !writerThread.isInterrupted() && !interruptFound.get() && !Thread.currentThread().isInterrupted() && !rw.finalised() && !((System.currentTimeMillis() - completionStartTime) > downloadMaxCompletionTime)) { Thread.sleep(downloadCheckBusyWaitSleep); } } finally { try { // Attempt all actions that could trigger the writer thread to finalise, as by this stage we are in hard shutdown mode // Signal that we are in hard shutdown mode interruptFound.set(true); // Add the sentinel or clear the queue and try again until it gets onto the queue // We are in hard shutdown mode, so only priority is that the queue either // gets the sentinel or the thread is interrupted to clean up resources while (!queue.offer(sentinel)) { queue.clear(); } // Interrupt the single writer thread writerThread.interrupt(); // Explicitly call finalise on the RecordWriter as a backup // In normal circumstances it is called via the sentinel or the interrupt // This will not block if finalise has been called previously in the current three implementations rw.finalise(); } finally { if (rw != null && rw.hasError()) { throw RecordWriterException.newRecordWriterException(dd, downloadParams, true, rw); } else { // Flush whatever output was still pending for more deterministic debugging out.flush(); } } } } } } catch (SolrServerException ex) { logger.error("Problem communicating with SOLR server while processing download. " + ex.getMessage(), ex); } return uidStats; }