List of usage examples for org.apache.solr.client.solrj SolrQuery setQuery
public SolrQuery setQuery(String query)
From source file:org.dspace.discovery.SolrServiceImpl.java
License:BSD License
/** * Iterates over all documents in the Lucene index and verifies they are in * database, if not, they are removed./*from w ww. j av a2s.c o m*/ * * @param force whether or not to force a clean index * @throws IOException IO exception * @throws SQLException sql exception * @throws SearchServiceException occurs when something went wrong with querying the solr server */ @Override public void cleanIndex(boolean force) throws IOException, SQLException, SearchServiceException { Context context = new Context(); context.turnOffAuthorisationSystem(); try { if (getSolr() == null) { return; } if (force) { getSolr().deleteByQuery("search.resourcetype:[2 TO 4]"); } else { SolrQuery query = new SolrQuery(); query.setQuery("search.resourcetype:[2 TO 4]"); QueryResponse rsp = getSolr().query(query); SolrDocumentList docs = rsp.getResults(); Iterator iter = docs.iterator(); while (iter.hasNext()) { SolrDocument doc = (SolrDocument) iter.next(); String handle = (String) doc.getFieldValue("handle"); DSpaceObject o = HandleManager.resolveToObject(context, handle); if (o == null) { log.info("Deleting: " + handle); /* * Use IndexWriter to delete, its easier to manage * write.lock */ unIndexContent(context, handle); } else { context.removeCached(o, o.getID()); log.debug("Keeping: " + handle); } } } } catch (Exception e) { throw new SearchServiceException(e.getMessage(), e); } finally { context.abort(); } }
From source file:org.dspace.discovery.SolrServiceImpl.java
License:BSD License
/** * Is stale checks the lastModified time stamp in the database and the index * to determine if the index is stale./*from w w w .j a v a 2s . c om*/ * * @param handle the handle of the dso * @param lastModified the last modified date of the DSpace object * @return a boolean indicating if the dso should be re indexed again * @throws SQLException sql exception * @throws IOException io exception * @throws SearchServiceException if something went wrong with querying the solr server */ protected boolean requiresIndexing(String handle, Date lastModified) throws SQLException, IOException, SearchServiceException { boolean reindexItem = false; boolean inIndex = false; SolrQuery query = new SolrQuery(); query.setQuery("handle:" + handle); QueryResponse rsp; try { if (getSolr() == null) { return false; } rsp = getSolr().query(query); } catch (SolrServerException e) { throw new SearchServiceException(e.getMessage(), e); } for (SolrDocument doc : rsp.getResults()) { inIndex = true; Object value = doc.getFieldValue(LAST_INDEXED_FIELD); if (value instanceof Date) { Date lastIndexed = (Date) value; if (lastIndexed.before(lastModified)) { reindexItem = true; } } } return reindexItem || !inIndex; }
From source file:org.dspace.discovery.SolrServiceImpl.java
License:BSD License
protected SolrQuery resolveToSolrQuery(Context context, DiscoverQuery discoveryQuery, boolean includeUnDiscoverable) { SolrQuery solrQuery = new SolrQuery(); String query = "*:*"; if (discoveryQuery.getQuery() != null) { query = discoveryQuery.getQuery(); }/*from ww w . j a v a2 s . c o m*/ solrQuery.setQuery(query); if (discoveryQuery.isSpellCheck()) { solrQuery.setParam(SpellingParams.SPELLCHECK_Q, query); solrQuery.setParam(SpellingParams.SPELLCHECK_COLLATE, Boolean.TRUE); solrQuery.setParam("spellcheck", Boolean.TRUE); } if (!includeUnDiscoverable) { solrQuery.addFilterQuery("NOT(withdrawn:true)"); solrQuery.addFilterQuery("NOT(discoverable:false)"); } for (int i = 0; i < discoveryQuery.getFilterQueries().size(); i++) { String filterQuery = discoveryQuery.getFilterQueries().get(i); solrQuery.addFilterQuery(filterQuery); } if (discoveryQuery.getDSpaceObjectFilter() != -1) { solrQuery.addFilterQuery("search.resourcetype:" + discoveryQuery.getDSpaceObjectFilter()); } for (int i = 0; i < discoveryQuery.getFieldPresentQueries().size(); i++) { String filterQuery = discoveryQuery.getFieldPresentQueries().get(i); solrQuery.addFilterQuery(filterQuery + ":[* TO *]"); } if (discoveryQuery.getStart() != -1) { solrQuery.setStart(discoveryQuery.getStart()); } if (discoveryQuery.getMaxResults() != -1) { solrQuery.setRows(discoveryQuery.getMaxResults()); } if (discoveryQuery.getSortField() != null) { SolrQuery.ORDER order = SolrQuery.ORDER.asc; if (discoveryQuery.getSortOrder().equals(DiscoverQuery.SORT_ORDER.desc)) order = SolrQuery.ORDER.desc; solrQuery.addSortField(discoveryQuery.getSortField(), order); } for (String property : discoveryQuery.getProperties().keySet()) { List<String> values = discoveryQuery.getProperties().get(property); solrQuery.add(property, values.toArray(new String[values.size()])); } List<DiscoverFacetField> facetFields = discoveryQuery.getFacetFields(); if (0 < facetFields.size()) { //Only add facet information if there are any facets for (DiscoverFacetField facetFieldConfig : facetFields) { String field = transformFacetField(facetFieldConfig, facetFieldConfig.getField(), false); solrQuery.addFacetField(field); // Setting the facet limit in this fashion ensures that each facet can have its own max solrQuery.add("f." + field + "." + FacetParams.FACET_LIMIT, String.valueOf(facetFieldConfig.getLimit())); String facetSort; if (DiscoveryConfigurationParameters.SORT.COUNT.equals(facetFieldConfig.getSortOrder())) { facetSort = FacetParams.FACET_SORT_COUNT; } else { facetSort = FacetParams.FACET_SORT_INDEX; } solrQuery.add("f." + field + "." + FacetParams.FACET_SORT, facetSort); if (facetFieldConfig.getOffset() != -1) { solrQuery.setParam("f." + field + "." + FacetParams.FACET_OFFSET, String.valueOf(facetFieldConfig.getOffset())); } if (facetFieldConfig.getPrefix() != null) { solrQuery.setFacetPrefix(field, facetFieldConfig.getPrefix()); } } List<String> facetQueries = discoveryQuery.getFacetQueries(); for (String facetQuery : facetQueries) { solrQuery.addFacetQuery(facetQuery); } if (discoveryQuery.getFacetMinCount() != -1) { solrQuery.setFacetMinCount(discoveryQuery.getFacetMinCount()); } solrQuery.setParam(FacetParams.FACET_OFFSET, String.valueOf(discoveryQuery.getFacetOffset())); } if (0 < discoveryQuery.getHitHighlightingFields().size()) { solrQuery.setHighlight(true); solrQuery.add(HighlightParams.USE_PHRASE_HIGHLIGHTER, Boolean.TRUE.toString()); for (DiscoverHitHighlightingField highlightingField : discoveryQuery.getHitHighlightingFields()) { solrQuery.addHighlightField(highlightingField.getField() + "_hl"); solrQuery.add("f." + highlightingField.getField() + "_hl." + HighlightParams.FRAGSIZE, String.valueOf(highlightingField.getMaxChars())); solrQuery.add("f." + highlightingField.getField() + "_hl." + HighlightParams.SNIPPETS, String.valueOf(highlightingField.getMaxSnippets())); } } //Add any configured search plugins ! List<SolrServiceSearchPlugin> solrServiceSearchPlugins = new DSpace().getServiceManager() .getServicesByType(SolrServiceSearchPlugin.class); for (SolrServiceSearchPlugin searchPlugin : solrServiceSearchPlugins) { searchPlugin.additionalSearchParameters(context, discoveryQuery, solrQuery); } return solrQuery; }
From source file:org.dspace.discovery.SolrServiceImpl.java
License:BSD License
public List<DSpaceObject> search(Context context, String query, String orderfield, boolean ascending, int offset, int max, String... filterquery) { try {/*from ww w. j a v a 2 s .c om*/ if (getSolr() == null) { return Collections.emptyList(); } SolrQuery solrQuery = new SolrQuery(); solrQuery.setQuery(query); solrQuery.setFields("search.resourceid", "search.resourcetype"); solrQuery.setStart(offset); solrQuery.setRows(max); if (orderfield != null) { solrQuery.setSortField(orderfield, ascending ? SolrQuery.ORDER.asc : SolrQuery.ORDER.desc); } if (filterquery != null) { solrQuery.addFilterQuery(filterquery); } QueryResponse rsp = getSolr().query(solrQuery); SolrDocumentList docs = rsp.getResults(); Iterator iter = docs.iterator(); List<DSpaceObject> result = new ArrayList<DSpaceObject>(); while (iter.hasNext()) { SolrDocument doc = (SolrDocument) iter.next(); DSpaceObject o = DSpaceObject.find(context, (Integer) doc.getFirstValue("search.resourcetype"), (Integer) doc.getFirstValue("search.resourceid")); if (o != null) { result.add(o); } } return result; } catch (Exception e) { // Any acception that we get ignore it. // We do NOT want any crashed to shown by the user log.error(LogManager.getHeader(context, "Error while quering solr", "Queyr: " + query), e); return new ArrayList<DSpaceObject>(0); } }
From source file:org.dspace.discovery.SolrServiceImpl.java
License:BSD License
@Override public List<Item> getRelatedItems(Context context, Item item, DiscoveryMoreLikeThisConfiguration mltConfig) { List<Item> results = new ArrayList<Item>(); try {/* w w w.jav a 2 s . c o m*/ SolrQuery solrQuery = new SolrQuery(); //Set the query to handle since this is unique solrQuery.setQuery("handle: " + item.getHandle()); //Add the more like this parameters ! solrQuery.setParam(MoreLikeThisParams.MLT, true); //Add a comma separated list of the similar fields @SuppressWarnings("unchecked") java.util.Collection<String> similarityMetadataFields = CollectionUtils .collect(mltConfig.getSimilarityMetadataFields(), new Transformer() { @Override public Object transform(Object input) { //Add the mlt appendix ! return input + "_mlt"; } }); solrQuery.setParam(MoreLikeThisParams.SIMILARITY_FIELDS, StringUtils.join(similarityMetadataFields, ',')); solrQuery.setParam(MoreLikeThisParams.MIN_TERM_FREQ, String.valueOf(mltConfig.getMinTermFrequency())); solrQuery.setParam(MoreLikeThisParams.DOC_COUNT, String.valueOf(mltConfig.getMax())); solrQuery.setParam(MoreLikeThisParams.MIN_WORD_LEN, String.valueOf(mltConfig.getMinWordLength())); if (getSolr() == null) { return Collections.emptyList(); } QueryResponse rsp = getSolr().query(solrQuery); NamedList mltResults = (NamedList) rsp.getResponse().get("moreLikeThis"); if (mltResults != null && mltResults.get(item.getType() + "-" + item.getID()) != null) { SolrDocumentList relatedDocs = (SolrDocumentList) mltResults .get(item.getType() + "-" + item.getID()); for (Object relatedDoc : relatedDocs) { SolrDocument relatedDocument = (SolrDocument) relatedDoc; DSpaceObject relatedItem = findDSpaceObject(context, relatedDocument); if (relatedItem.getType() == Constants.ITEM) { results.add((Item) relatedItem); } } } } catch (Exception e) { log.error(LogManager.getHeader(context, "Error while retrieving related items", "Handle: " + item.getHandle()), e); } return results; }
From source file:org.dspace.statistics.SolrLogger.java
License:BSD License
public static void shardSolrIndex() throws IOException, SolrServerException { /*//from www. j a v a 2s. co m Start by faceting by year so we can include each year in a separate core ! */ SolrQuery yearRangeQuery = new SolrQuery(); yearRangeQuery.setQuery("*:*"); yearRangeQuery.setRows(0); yearRangeQuery.setFacet(true); yearRangeQuery.add(FacetParams.FACET_RANGE, "time"); //We go back to 2000 the year 2000, this is a bit overkill but this way we ensure we have everything //The alternative would be to sort but that isn't recommended since it would be a very costly query ! yearRangeQuery.add(FacetParams.FACET_RANGE_START, "NOW/YEAR-" + (Calendar.getInstance().get(Calendar.YEAR) - 2000) + "YEARS"); //Add the +0year to ensure that we DO NOT include the current year yearRangeQuery.add(FacetParams.FACET_RANGE_END, "NOW/YEAR+0YEARS"); yearRangeQuery.add(FacetParams.FACET_RANGE_GAP, "+1YEAR"); yearRangeQuery.add(FacetParams.FACET_MINCOUNT, String.valueOf(1)); //Create a temp directory to store our files in ! File tempDirectory = new File( ConfigurationManager.getProperty("dspace.dir") + File.separator + "temp" + File.separator); tempDirectory.mkdirs(); QueryResponse queryResponse = solr.query(yearRangeQuery); //We only have one range query ! List<RangeFacet.Count> yearResults = queryResponse.getFacetRanges().get(0).getCounts(); for (RangeFacet.Count count : yearResults) { long totalRecords = count.getCount(); //Create a range query from this ! //We start with out current year DCDate dcStart = new DCDate(count.getValue()); Calendar endDate = Calendar.getInstance(); //Advance one year for the start of the next one ! endDate.setTime(dcStart.toDate()); endDate.add(Calendar.YEAR, 1); DCDate dcEndDate = new DCDate(endDate.getTime()); StringBuilder filterQuery = new StringBuilder(); filterQuery.append("time:(["); filterQuery.append(ClientUtils.escapeQueryChars(dcStart.toString())); filterQuery.append(" TO "); filterQuery.append(ClientUtils.escapeQueryChars(dcEndDate.toString())); filterQuery.append("]"); //The next part of the filter query excludes the content from midnight of the next year ! filterQuery.append(" NOT ").append(ClientUtils.escapeQueryChars(dcEndDate.toString())); filterQuery.append(")"); Map<String, String> yearQueryParams = new HashMap<String, String>(); yearQueryParams.put(CommonParams.Q, "*:*"); yearQueryParams.put(CommonParams.ROWS, String.valueOf(10000)); yearQueryParams.put(CommonParams.FQ, filterQuery.toString()); yearQueryParams.put(CommonParams.WT, "csv"); //Start by creating a new core String coreName = "statistics-" + dcStart.getYear(); HttpSolrServer statisticsYearServer = createCore(solr, coreName); System.out.println("Moving: " + totalRecords + " into core " + coreName); log.info("Moving: " + totalRecords + " records into core " + coreName); List<File> filesToUpload = new ArrayList<File>(); for (int i = 0; i < totalRecords; i += 10000) { String solrRequestUrl = solr.getBaseURL() + "/select"; solrRequestUrl = generateURL(solrRequestUrl, yearQueryParams); HttpGet get = new HttpGet(solrRequestUrl); HttpResponse response = new DefaultHttpClient().execute(get); InputStream csvInputstream = response.getEntity().getContent(); //Write the csv ouput to a file ! File csvFile = new File(tempDirectory.getPath() + File.separatorChar + "temp." + dcStart.getYear() + "." + i + ".csv"); FileUtils.copyInputStreamToFile(csvInputstream, csvFile); filesToUpload.add(csvFile); //Add 10000 & start over again yearQueryParams.put(CommonParams.START, String.valueOf((i + 10000))); } for (File tempCsv : filesToUpload) { //Upload the data in the csv files to our new solr core ContentStreamUpdateRequest contentStreamUpdateRequest = new ContentStreamUpdateRequest( "/update/csv"); contentStreamUpdateRequest.setParam("stream.contentType", "text/plain;charset=utf-8"); contentStreamUpdateRequest.setAction(AbstractUpdateRequest.ACTION.COMMIT, true, true); contentStreamUpdateRequest.addFile(tempCsv, "text/plain;charset=utf-8"); statisticsYearServer.request(contentStreamUpdateRequest); } statisticsYearServer.commit(true, true); //Delete contents of this year from our year query ! solr.deleteByQuery(filterQuery.toString()); solr.commit(true, true); log.info("Moved " + totalRecords + " records into core: " + coreName); } FileUtils.deleteDirectory(tempDirectory); }
From source file:org.dspace.statistics.SolrLogger.java
License:BSD License
public static void reindexBitstreamHits(boolean removeDeletedBitstreams) throws Exception { Context context = new Context(); try {/* www .java2s . c o m*/ //First of all retrieve the total number of records to be updated SolrQuery query = new SolrQuery(); query.setQuery("*:*"); query.addFilterQuery("type:" + Constants.BITSTREAM); //Only retrieve records which do not have a bundle name query.addFilterQuery("-bundleName:[* TO *]"); query.setRows(0); addAdditionalSolrYearCores(query); long totalRecords = solr.query(query).getResults().getNumFound(); File tempDirectory = new File( ConfigurationManager.getProperty("dspace.dir") + File.separator + "temp" + File.separator); tempDirectory.mkdirs(); List<File> tempCsvFiles = new ArrayList<File>(); for (int i = 0; i < totalRecords; i += 10000) { Map<String, String> params = new HashMap<String, String>(); params.put(CommonParams.Q, "*:*"); params.put(CommonParams.FQ, "-bundleName:[* TO *] AND type:" + Constants.BITSTREAM); params.put(CommonParams.WT, "csv"); params.put(CommonParams.ROWS, String.valueOf(10000)); params.put(CommonParams.START, String.valueOf(i)); String solrRequestUrl = solr.getBaseURL() + "/select"; solrRequestUrl = generateURL(solrRequestUrl, params); HttpGet get = new HttpGet(solrRequestUrl); HttpResponse response = new DefaultHttpClient().execute(get); InputStream csvOutput = response.getEntity().getContent(); Reader csvReader = new InputStreamReader(csvOutput); List<String[]> rows = new CSVReader(csvReader).readAll(); String[][] csvParsed = rows.toArray(new String[rows.size()][]); String[] header = csvParsed[0]; //Attempt to find the bitstream id index ! int idIndex = 0; for (int j = 0; j < header.length; j++) { if (header[j].equals("id")) { idIndex = j; } } File tempCsv = new File(tempDirectory.getPath() + File.separatorChar + "temp." + i + ".csv"); tempCsvFiles.add(tempCsv); CSVWriter csvp = new CSVWriter(new FileWriter(tempCsv)); //csvp.setAlwaysQuote(false); //Write the header ! csvp.writeNext((String[]) ArrayUtils.add(header, "bundleName")); Map<Integer, String> bitBundleCache = new HashMap<Integer, String>(); //Loop over each line (skip the headers though)! for (int j = 1; j < csvParsed.length; j++) { String[] csvLine = csvParsed[j]; //Write the default line ! int bitstreamId = Integer.parseInt(csvLine[idIndex]); //Attempt to retrieve our bundle name from the cache ! String bundleName = bitBundleCache.get(bitstreamId); if (bundleName == null) { //Nothing found retrieve the bitstream Bitstream bitstream = Bitstream.find(context, bitstreamId); //Attempt to retrieve our bitstream ! if (bitstream != null) { Bundle[] bundles = bitstream.getBundles(); if (bundles != null && 0 < bundles.length) { Bundle bundle = bundles[0]; bundleName = bundle.getName(); context.removeCached(bundle, bundle.getID()); } else { //No bundle found, we are either a collection or a community logo, check for it ! DSpaceObject parentObject = bitstream.getParentObject(); if (parentObject instanceof Collection) { bundleName = "LOGO-COLLECTION"; } else if (parentObject instanceof Community) { bundleName = "LOGO-COMMUNITY"; } if (parentObject != null) { context.removeCached(parentObject, parentObject.getID()); } } //Cache the bundle name bitBundleCache.put(bitstream.getID(), bundleName); //Remove the bitstream from cache context.removeCached(bitstream, bitstreamId); } //Check if we don't have a bundlename //If we don't have one & we do not need to delete the deleted bitstreams ensure that a BITSTREAM_DELETED bundle name is given ! if (bundleName == null && !removeDeletedBitstreams) { bundleName = "BITSTREAM_DELETED"; } } csvp.writeNext((String[]) ArrayUtils.add(csvLine, bundleName)); } //Loop over our parsed csv csvp.flush(); csvp.close(); } //Add all the separate csv files for (File tempCsv : tempCsvFiles) { ContentStreamUpdateRequest contentStreamUpdateRequest = new ContentStreamUpdateRequest( "/update/csv"); contentStreamUpdateRequest.setParam("stream.contentType", "text/plain;charset=utf-8"); contentStreamUpdateRequest.setAction(AbstractUpdateRequest.ACTION.COMMIT, true, true); contentStreamUpdateRequest.addFile(tempCsv, "text/plain;charset=utf-8"); solr.request(contentStreamUpdateRequest); } //Now that all our new bitstream stats are in place, delete all the old ones ! solr.deleteByQuery("-bundleName:[* TO *] AND type:" + Constants.BITSTREAM); //Commit everything to wrap up solr.commit(true, true); //Clean up our directory ! FileUtils.deleteDirectory(tempDirectory); } catch (Exception e) { log.error("Error while updating the bitstream statistics", e); throw e; } finally { context.abort(); } }
From source file:org.dspace.statistics.SolrLogger.java
License:BSD License
/** * Export all SOLR usage statistics for viewing/downloading content to a flat text file. * The file goes to a series/* w w w . j ava2 s. c o m*/ * * @throws Exception */ public static void exportHits() throws Exception { Context context = new Context(); File tempDirectory = new File( ConfigurationManager.getProperty("dspace.dir") + File.separator + "temp" + File.separator); tempDirectory.mkdirs(); try { //First of all retrieve the total number of records to be updated SolrQuery query = new SolrQuery(); query.setQuery("*:*"); ModifiableSolrParams solrParams = new ModifiableSolrParams(); solrParams.set(CommonParams.Q, "statistics_type:view OR (*:* AND -statistics_type:*)"); solrParams.set(CommonParams.WT, "javabin"); solrParams.set(CommonParams.ROWS, String.valueOf(10000)); addAdditionalSolrYearCores(query); long totalRecords = solr.query(query).getResults().getNumFound(); System.out.println("There are " + totalRecords + " usage events in SOLR for download/view."); for (int i = 0; i < totalRecords; i += 10000) { solrParams.set(CommonParams.START, String.valueOf(i)); QueryResponse queryResponse = solr.query(solrParams); SolrDocumentList docs = queryResponse.getResults(); File exportOutput = new File( tempDirectory.getPath() + File.separatorChar + "usagestats_" + i + ".csv"); exportOutput.delete(); //export docs addDocumentsToFile(context, docs, exportOutput); System.out.println("Export hits [" + i + " - " + String.valueOf(i + 9999) + "] to " + exportOutput.getCanonicalPath()); } } catch (Exception e) { log.error("Error while exporting SOLR data", e); throw e; } finally { context.abort(); } }
From source file:org.dspace.statistics.SolrLoggerServiceImpl.java
License:BSD License
@Override public void shardSolrIndex() throws IOException, SolrServerException { /*//from ww w . j a v a 2 s.c om Start by faceting by year so we can include each year in a separate core ! */ SolrQuery yearRangeQuery = new SolrQuery(); yearRangeQuery.setQuery("*:*"); yearRangeQuery.setRows(0); yearRangeQuery.setFacet(true); yearRangeQuery.add(FacetParams.FACET_RANGE, "time"); //We go back to 2000 the year 2000, this is a bit overkill but this way we ensure we have everything //The alternative would be to sort but that isn't recommended since it would be a very costly query ! yearRangeQuery.add(FacetParams.FACET_RANGE_START, "NOW/YEAR-" + (Calendar.getInstance().get(Calendar.YEAR) - 2000) + "YEARS"); //Add the +0year to ensure that we DO NOT include the current year yearRangeQuery.add(FacetParams.FACET_RANGE_END, "NOW/YEAR+0YEARS"); yearRangeQuery.add(FacetParams.FACET_RANGE_GAP, "+1YEAR"); yearRangeQuery.add(FacetParams.FACET_MINCOUNT, String.valueOf(1)); //Create a temp directory to store our files in ! File tempDirectory = new File( configurationService.getProperty("dspace.dir") + File.separator + "temp" + File.separator); tempDirectory.mkdirs(); QueryResponse queryResponse = solr.query(yearRangeQuery); //We only have one range query ! List<RangeFacet.Count> yearResults = queryResponse.getFacetRanges().get(0).getCounts(); for (RangeFacet.Count count : yearResults) { long totalRecords = count.getCount(); //Create a range query from this ! //We start with out current year DCDate dcStart = new DCDate(count.getValue()); Calendar endDate = Calendar.getInstance(); //Advance one year for the start of the next one ! endDate.setTime(dcStart.toDate()); endDate.add(Calendar.YEAR, 1); DCDate dcEndDate = new DCDate(endDate.getTime()); StringBuilder filterQuery = new StringBuilder(); filterQuery.append("time:(["); filterQuery.append(ClientUtils.escapeQueryChars(dcStart.toString())); filterQuery.append(" TO "); filterQuery.append(ClientUtils.escapeQueryChars(dcEndDate.toString())); filterQuery.append("]"); //The next part of the filter query excludes the content from midnight of the next year ! filterQuery.append(" NOT ").append(ClientUtils.escapeQueryChars(dcEndDate.toString())); filterQuery.append(")"); Map<String, String> yearQueryParams = new HashMap<String, String>(); yearQueryParams.put(CommonParams.Q, "*:*"); yearQueryParams.put(CommonParams.ROWS, String.valueOf(10000)); yearQueryParams.put(CommonParams.FQ, filterQuery.toString()); yearQueryParams.put(CommonParams.WT, "csv"); //Start by creating a new core String coreName = "statistics-" + dcStart.getYear(); HttpSolrServer statisticsYearServer = createCore(solr, coreName); System.out.println("Moving: " + totalRecords + " into core " + coreName); log.info("Moving: " + totalRecords + " records into core " + coreName); List<File> filesToUpload = new ArrayList<File>(); for (int i = 0; i < totalRecords; i += 10000) { String solrRequestUrl = solr.getBaseURL() + "/select"; solrRequestUrl = generateURL(solrRequestUrl, yearQueryParams); HttpGet get = new HttpGet(solrRequestUrl); HttpResponse response = new DefaultHttpClient().execute(get); InputStream csvInputstream = response.getEntity().getContent(); //Write the csv ouput to a file ! File csvFile = new File(tempDirectory.getPath() + File.separatorChar + "temp." + dcStart.getYear() + "." + i + ".csv"); FileUtils.copyInputStreamToFile(csvInputstream, csvFile); filesToUpload.add(csvFile); //Add 10000 & start over again yearQueryParams.put(CommonParams.START, String.valueOf((i + 10000))); } for (File tempCsv : filesToUpload) { //Upload the data in the csv files to our new solr core ContentStreamUpdateRequest contentStreamUpdateRequest = new ContentStreamUpdateRequest( "/update/csv"); contentStreamUpdateRequest.setParam("stream.contentType", "text/plain;charset=utf-8"); contentStreamUpdateRequest.setParam("skip", "_version_"); contentStreamUpdateRequest.setAction(AbstractUpdateRequest.ACTION.COMMIT, true, true); contentStreamUpdateRequest.addFile(tempCsv, "text/plain;charset=utf-8"); statisticsYearServer.request(contentStreamUpdateRequest); } statisticsYearServer.commit(true, true); //Delete contents of this year from our year query ! solr.deleteByQuery(filterQuery.toString()); solr.commit(true, true); log.info("Moved " + totalRecords + " records into core: " + coreName); } FileUtils.deleteDirectory(tempDirectory); }
From source file:org.dspace.statistics.SolrLoggerServiceImpl.java
License:BSD License
@Override public void reindexBitstreamHits(boolean removeDeletedBitstreams) throws Exception { Context context = new Context(); try {/*from www . j av a2 s . com*/ //First of all retrieve the total number of records to be updated SolrQuery query = new SolrQuery(); query.setQuery("*:*"); query.addFilterQuery("type:" + Constants.BITSTREAM); //Only retrieve records which do not have a bundle name query.addFilterQuery("-bundleName:[* TO *]"); query.setRows(0); addAdditionalSolrYearCores(query); long totalRecords = solr.query(query).getResults().getNumFound(); File tempDirectory = new File( configurationService.getProperty("dspace.dir") + File.separator + "temp" + File.separator); tempDirectory.mkdirs(); List<File> tempCsvFiles = new ArrayList<File>(); for (int i = 0; i < totalRecords; i += 10000) { Map<String, String> params = new HashMap<String, String>(); params.put(CommonParams.Q, "*:*"); params.put(CommonParams.FQ, "-bundleName:[* TO *] AND type:" + Constants.BITSTREAM); params.put(CommonParams.WT, "csv"); params.put(CommonParams.ROWS, String.valueOf(10000)); params.put(CommonParams.START, String.valueOf(i)); String solrRequestUrl = solr.getBaseURL() + "/select"; solrRequestUrl = generateURL(solrRequestUrl, params); HttpGet get = new HttpGet(solrRequestUrl); HttpResponse response = new DefaultHttpClient().execute(get); InputStream csvOutput = response.getEntity().getContent(); Reader csvReader = new InputStreamReader(csvOutput); List<String[]> rows = new CSVReader(csvReader).readAll(); String[][] csvParsed = rows.toArray(new String[rows.size()][]); String[] header = csvParsed[0]; //Attempt to find the bitstream id index ! int idIndex = 0; for (int j = 0; j < header.length; j++) { if (header[j].equals("id")) { idIndex = j; } } File tempCsv = new File(tempDirectory.getPath() + File.separatorChar + "temp." + i + ".csv"); tempCsvFiles.add(tempCsv); CSVWriter csvp = new CSVWriter(new FileWriter(tempCsv)); //csvp.setAlwaysQuote(false); //Write the header ! csvp.writeNext((String[]) ArrayUtils.add(header, "bundleName")); Map<String, String> bitBundleCache = new HashMap<>(); //Loop over each line (skip the headers though)! for (int j = 1; j < csvParsed.length; j++) { String[] csvLine = csvParsed[j]; //Write the default line ! String bitstreamId = csvLine[idIndex]; //Attempt to retrieve our bundle name from the cache ! String bundleName = bitBundleCache.get(bitstreamId); if (bundleName == null) { //Nothing found retrieve the bitstream Bitstream bitstream = bitstreamService.findByIdOrLegacyId(context, bitstreamId); //Attempt to retrieve our bitstream ! if (bitstream != null) { List<Bundle> bundles = bitstream.getBundles(); if (bundles != null && 0 < bundles.size()) { Bundle bundle = bundles.get(0); bundleName = bundle.getName(); } else { //No bundle found, we are either a collection or a community logo, check for it ! DSpaceObject parentObject = bitstreamService.getParentObject(context, bitstream); if (parentObject instanceof Collection) { bundleName = "LOGO-COLLECTION"; } else if (parentObject instanceof Community) { bundleName = "LOGO-COMMUNITY"; } } //Cache the bundle name bitBundleCache.put(bitstream.getID().toString(), bundleName); //Remove the bitstream from cache } //Check if we don't have a bundlename //If we don't have one & we do not need to delete the deleted bitstreams ensure that a BITSTREAM_DELETED bundle name is given ! if (bundleName == null && !removeDeletedBitstreams) { bundleName = "BITSTREAM_DELETED"; } } csvp.writeNext((String[]) ArrayUtils.add(csvLine, bundleName)); } //Loop over our parsed csv csvp.flush(); csvp.close(); } //Add all the separate csv files for (File tempCsv : tempCsvFiles) { ContentStreamUpdateRequest contentStreamUpdateRequest = new ContentStreamUpdateRequest( "/update/csv"); contentStreamUpdateRequest.setParam("stream.contentType", "text/plain;charset=utf-8"); contentStreamUpdateRequest.setAction(AbstractUpdateRequest.ACTION.COMMIT, true, true); contentStreamUpdateRequest.addFile(tempCsv, "text/plain;charset=utf-8"); solr.request(contentStreamUpdateRequest); } //Now that all our new bitstream stats are in place, delete all the old ones ! solr.deleteByQuery("-bundleName:[* TO *] AND type:" + Constants.BITSTREAM); //Commit everything to wrap up solr.commit(true, true); //Clean up our directory ! FileUtils.deleteDirectory(tempDirectory); } catch (Exception e) { log.error("Error while updating the bitstream statistics", e); throw e; } finally { context.abort(); } }