Example usage for org.apache.solr.client.solrj SolrQuery setQuery

List of usage examples for org.apache.solr.client.solrj SolrQuery setQuery

Introduction

In this page you can find the example usage for org.apache.solr.client.solrj SolrQuery setQuery.

Prototype

public SolrQuery setQuery(String query) 

Source Link

Usage

From source file:org.dspace.discovery.SolrServiceImpl.java

License:BSD License

/**
 * Iterates over all documents in the Lucene index and verifies they are in
 * database, if not, they are removed./*from  w ww.  j av  a2s.c o m*/
 *
 * @param force whether or not to force a clean index
 * @throws IOException IO exception
 * @throws SQLException sql exception
 * @throws SearchServiceException occurs when something went wrong with querying the solr server
 */
@Override
public void cleanIndex(boolean force) throws IOException, SQLException, SearchServiceException {

    Context context = new Context();
    context.turnOffAuthorisationSystem();

    try {
        if (getSolr() == null) {
            return;
        }
        if (force) {
            getSolr().deleteByQuery("search.resourcetype:[2 TO 4]");
        } else {
            SolrQuery query = new SolrQuery();
            query.setQuery("search.resourcetype:[2 TO 4]");
            QueryResponse rsp = getSolr().query(query);
            SolrDocumentList docs = rsp.getResults();

            Iterator iter = docs.iterator();
            while (iter.hasNext()) {

                SolrDocument doc = (SolrDocument) iter.next();

                String handle = (String) doc.getFieldValue("handle");

                DSpaceObject o = HandleManager.resolveToObject(context, handle);

                if (o == null) {
                    log.info("Deleting: " + handle);
                    /*
                          * Use IndexWriter to delete, its easier to manage
                          * write.lock
                          */
                    unIndexContent(context, handle);
                } else {
                    context.removeCached(o, o.getID());
                    log.debug("Keeping: " + handle);
                }
            }
        }
    } catch (Exception e) {

        throw new SearchServiceException(e.getMessage(), e);
    } finally {
        context.abort();
    }

}

From source file:org.dspace.discovery.SolrServiceImpl.java

License:BSD License

/**
 * Is stale checks the lastModified time stamp in the database and the index
 * to determine if the index is stale./*from  w w w  .j  a v  a  2s  . c  om*/
 *
 * @param handle the handle of the dso
 * @param lastModified the last modified date of the DSpace object
 * @return a boolean indicating if the dso should be re indexed again
 * @throws SQLException sql exception
 * @throws IOException io exception
 * @throws SearchServiceException if something went wrong with querying the solr server
 */
protected boolean requiresIndexing(String handle, Date lastModified)
        throws SQLException, IOException, SearchServiceException {

    boolean reindexItem = false;
    boolean inIndex = false;

    SolrQuery query = new SolrQuery();
    query.setQuery("handle:" + handle);
    QueryResponse rsp;

    try {
        if (getSolr() == null) {
            return false;
        }
        rsp = getSolr().query(query);
    } catch (SolrServerException e) {
        throw new SearchServiceException(e.getMessage(), e);
    }

    for (SolrDocument doc : rsp.getResults()) {

        inIndex = true;

        Object value = doc.getFieldValue(LAST_INDEXED_FIELD);

        if (value instanceof Date) {
            Date lastIndexed = (Date) value;

            if (lastIndexed.before(lastModified)) {

                reindexItem = true;
            }
        }
    }

    return reindexItem || !inIndex;
}

From source file:org.dspace.discovery.SolrServiceImpl.java

License:BSD License

protected SolrQuery resolveToSolrQuery(Context context, DiscoverQuery discoveryQuery,
        boolean includeUnDiscoverable) {
    SolrQuery solrQuery = new SolrQuery();

    String query = "*:*";
    if (discoveryQuery.getQuery() != null) {
        query = discoveryQuery.getQuery();
    }/*from   ww  w . j  a v  a2  s .  c  o  m*/

    solrQuery.setQuery(query);
    if (discoveryQuery.isSpellCheck()) {
        solrQuery.setParam(SpellingParams.SPELLCHECK_Q, query);
        solrQuery.setParam(SpellingParams.SPELLCHECK_COLLATE, Boolean.TRUE);
        solrQuery.setParam("spellcheck", Boolean.TRUE);
    }

    if (!includeUnDiscoverable) {
        solrQuery.addFilterQuery("NOT(withdrawn:true)");
        solrQuery.addFilterQuery("NOT(discoverable:false)");
    }

    for (int i = 0; i < discoveryQuery.getFilterQueries().size(); i++) {
        String filterQuery = discoveryQuery.getFilterQueries().get(i);
        solrQuery.addFilterQuery(filterQuery);
    }
    if (discoveryQuery.getDSpaceObjectFilter() != -1) {
        solrQuery.addFilterQuery("search.resourcetype:" + discoveryQuery.getDSpaceObjectFilter());
    }

    for (int i = 0; i < discoveryQuery.getFieldPresentQueries().size(); i++) {
        String filterQuery = discoveryQuery.getFieldPresentQueries().get(i);
        solrQuery.addFilterQuery(filterQuery + ":[* TO *]");
    }

    if (discoveryQuery.getStart() != -1) {
        solrQuery.setStart(discoveryQuery.getStart());
    }

    if (discoveryQuery.getMaxResults() != -1) {
        solrQuery.setRows(discoveryQuery.getMaxResults());
    }

    if (discoveryQuery.getSortField() != null) {
        SolrQuery.ORDER order = SolrQuery.ORDER.asc;
        if (discoveryQuery.getSortOrder().equals(DiscoverQuery.SORT_ORDER.desc))
            order = SolrQuery.ORDER.desc;

        solrQuery.addSortField(discoveryQuery.getSortField(), order);
    }

    for (String property : discoveryQuery.getProperties().keySet()) {
        List<String> values = discoveryQuery.getProperties().get(property);
        solrQuery.add(property, values.toArray(new String[values.size()]));
    }

    List<DiscoverFacetField> facetFields = discoveryQuery.getFacetFields();
    if (0 < facetFields.size()) {
        //Only add facet information if there are any facets
        for (DiscoverFacetField facetFieldConfig : facetFields) {
            String field = transformFacetField(facetFieldConfig, facetFieldConfig.getField(), false);
            solrQuery.addFacetField(field);

            // Setting the facet limit in this fashion ensures that each facet can have its own max
            solrQuery.add("f." + field + "." + FacetParams.FACET_LIMIT,
                    String.valueOf(facetFieldConfig.getLimit()));
            String facetSort;
            if (DiscoveryConfigurationParameters.SORT.COUNT.equals(facetFieldConfig.getSortOrder())) {
                facetSort = FacetParams.FACET_SORT_COUNT;
            } else {
                facetSort = FacetParams.FACET_SORT_INDEX;
            }
            solrQuery.add("f." + field + "." + FacetParams.FACET_SORT, facetSort);
            if (facetFieldConfig.getOffset() != -1) {
                solrQuery.setParam("f." + field + "." + FacetParams.FACET_OFFSET,
                        String.valueOf(facetFieldConfig.getOffset()));
            }
            if (facetFieldConfig.getPrefix() != null) {
                solrQuery.setFacetPrefix(field, facetFieldConfig.getPrefix());
            }
        }

        List<String> facetQueries = discoveryQuery.getFacetQueries();
        for (String facetQuery : facetQueries) {
            solrQuery.addFacetQuery(facetQuery);
        }

        if (discoveryQuery.getFacetMinCount() != -1) {
            solrQuery.setFacetMinCount(discoveryQuery.getFacetMinCount());
        }

        solrQuery.setParam(FacetParams.FACET_OFFSET, String.valueOf(discoveryQuery.getFacetOffset()));
    }

    if (0 < discoveryQuery.getHitHighlightingFields().size()) {
        solrQuery.setHighlight(true);
        solrQuery.add(HighlightParams.USE_PHRASE_HIGHLIGHTER, Boolean.TRUE.toString());
        for (DiscoverHitHighlightingField highlightingField : discoveryQuery.getHitHighlightingFields()) {
            solrQuery.addHighlightField(highlightingField.getField() + "_hl");
            solrQuery.add("f." + highlightingField.getField() + "_hl." + HighlightParams.FRAGSIZE,
                    String.valueOf(highlightingField.getMaxChars()));
            solrQuery.add("f." + highlightingField.getField() + "_hl." + HighlightParams.SNIPPETS,
                    String.valueOf(highlightingField.getMaxSnippets()));
        }

    }

    //Add any configured search plugins !
    List<SolrServiceSearchPlugin> solrServiceSearchPlugins = new DSpace().getServiceManager()
            .getServicesByType(SolrServiceSearchPlugin.class);
    for (SolrServiceSearchPlugin searchPlugin : solrServiceSearchPlugins) {
        searchPlugin.additionalSearchParameters(context, discoveryQuery, solrQuery);
    }
    return solrQuery;
}

From source file:org.dspace.discovery.SolrServiceImpl.java

License:BSD License

public List<DSpaceObject> search(Context context, String query, String orderfield, boolean ascending,
        int offset, int max, String... filterquery) {

    try {/*from ww  w.  j  a v a  2  s .c  om*/
        if (getSolr() == null) {
            return Collections.emptyList();
        }

        SolrQuery solrQuery = new SolrQuery();
        solrQuery.setQuery(query);
        solrQuery.setFields("search.resourceid", "search.resourcetype");
        solrQuery.setStart(offset);
        solrQuery.setRows(max);
        if (orderfield != null) {
            solrQuery.setSortField(orderfield, ascending ? SolrQuery.ORDER.asc : SolrQuery.ORDER.desc);
        }
        if (filterquery != null) {
            solrQuery.addFilterQuery(filterquery);
        }
        QueryResponse rsp = getSolr().query(solrQuery);
        SolrDocumentList docs = rsp.getResults();

        Iterator iter = docs.iterator();
        List<DSpaceObject> result = new ArrayList<DSpaceObject>();
        while (iter.hasNext()) {
            SolrDocument doc = (SolrDocument) iter.next();

            DSpaceObject o = DSpaceObject.find(context, (Integer) doc.getFirstValue("search.resourcetype"),
                    (Integer) doc.getFirstValue("search.resourceid"));

            if (o != null) {
                result.add(o);
            }
        }
        return result;
    } catch (Exception e) {
        // Any acception that we get ignore it.
        // We do NOT want any crashed to shown by the user
        log.error(LogManager.getHeader(context, "Error while quering solr", "Queyr: " + query), e);
        return new ArrayList<DSpaceObject>(0);
    }
}

From source file:org.dspace.discovery.SolrServiceImpl.java

License:BSD License

@Override
public List<Item> getRelatedItems(Context context, Item item, DiscoveryMoreLikeThisConfiguration mltConfig) {
    List<Item> results = new ArrayList<Item>();
    try {/*  w w  w.jav a 2 s  .  c  o  m*/
        SolrQuery solrQuery = new SolrQuery();
        //Set the query to handle since this is unique
        solrQuery.setQuery("handle: " + item.getHandle());
        //Add the more like this parameters !
        solrQuery.setParam(MoreLikeThisParams.MLT, true);
        //Add a comma separated list of the similar fields
        @SuppressWarnings("unchecked")
        java.util.Collection<String> similarityMetadataFields = CollectionUtils
                .collect(mltConfig.getSimilarityMetadataFields(), new Transformer() {
                    @Override
                    public Object transform(Object input) {
                        //Add the mlt appendix !
                        return input + "_mlt";
                    }
                });

        solrQuery.setParam(MoreLikeThisParams.SIMILARITY_FIELDS,
                StringUtils.join(similarityMetadataFields, ','));
        solrQuery.setParam(MoreLikeThisParams.MIN_TERM_FREQ, String.valueOf(mltConfig.getMinTermFrequency()));
        solrQuery.setParam(MoreLikeThisParams.DOC_COUNT, String.valueOf(mltConfig.getMax()));
        solrQuery.setParam(MoreLikeThisParams.MIN_WORD_LEN, String.valueOf(mltConfig.getMinWordLength()));

        if (getSolr() == null) {
            return Collections.emptyList();
        }
        QueryResponse rsp = getSolr().query(solrQuery);
        NamedList mltResults = (NamedList) rsp.getResponse().get("moreLikeThis");
        if (mltResults != null && mltResults.get(item.getType() + "-" + item.getID()) != null) {
            SolrDocumentList relatedDocs = (SolrDocumentList) mltResults
                    .get(item.getType() + "-" + item.getID());
            for (Object relatedDoc : relatedDocs) {
                SolrDocument relatedDocument = (SolrDocument) relatedDoc;
                DSpaceObject relatedItem = findDSpaceObject(context, relatedDocument);
                if (relatedItem.getType() == Constants.ITEM) {
                    results.add((Item) relatedItem);
                }
            }
        }

    } catch (Exception e) {
        log.error(LogManager.getHeader(context, "Error while retrieving related items",
                "Handle: " + item.getHandle()), e);
    }
    return results;
}

From source file:org.dspace.statistics.SolrLogger.java

License:BSD License

public static void shardSolrIndex() throws IOException, SolrServerException {
    /*//from   www. j  a v a 2s. co  m
    Start by faceting by year so we can include each year in a separate core !
     */
    SolrQuery yearRangeQuery = new SolrQuery();
    yearRangeQuery.setQuery("*:*");
    yearRangeQuery.setRows(0);
    yearRangeQuery.setFacet(true);
    yearRangeQuery.add(FacetParams.FACET_RANGE, "time");
    //We go back to 2000 the year 2000, this is a bit overkill but this way we ensure we have everything
    //The alternative would be to sort but that isn't recommended since it would be a very costly query !
    yearRangeQuery.add(FacetParams.FACET_RANGE_START,
            "NOW/YEAR-" + (Calendar.getInstance().get(Calendar.YEAR) - 2000) + "YEARS");
    //Add the +0year to ensure that we DO NOT include the current year
    yearRangeQuery.add(FacetParams.FACET_RANGE_END, "NOW/YEAR+0YEARS");
    yearRangeQuery.add(FacetParams.FACET_RANGE_GAP, "+1YEAR");
    yearRangeQuery.add(FacetParams.FACET_MINCOUNT, String.valueOf(1));

    //Create a temp directory to store our files in !
    File tempDirectory = new File(
            ConfigurationManager.getProperty("dspace.dir") + File.separator + "temp" + File.separator);
    tempDirectory.mkdirs();

    QueryResponse queryResponse = solr.query(yearRangeQuery);
    //We only have one range query !
    List<RangeFacet.Count> yearResults = queryResponse.getFacetRanges().get(0).getCounts();
    for (RangeFacet.Count count : yearResults) {
        long totalRecords = count.getCount();

        //Create a range query from this !
        //We start with out current year
        DCDate dcStart = new DCDate(count.getValue());
        Calendar endDate = Calendar.getInstance();
        //Advance one year for the start of the next one !
        endDate.setTime(dcStart.toDate());
        endDate.add(Calendar.YEAR, 1);
        DCDate dcEndDate = new DCDate(endDate.getTime());

        StringBuilder filterQuery = new StringBuilder();
        filterQuery.append("time:([");
        filterQuery.append(ClientUtils.escapeQueryChars(dcStart.toString()));
        filterQuery.append(" TO ");
        filterQuery.append(ClientUtils.escapeQueryChars(dcEndDate.toString()));
        filterQuery.append("]");
        //The next part of the filter query excludes the content from midnight of the next year !
        filterQuery.append(" NOT ").append(ClientUtils.escapeQueryChars(dcEndDate.toString()));
        filterQuery.append(")");

        Map<String, String> yearQueryParams = new HashMap<String, String>();
        yearQueryParams.put(CommonParams.Q, "*:*");
        yearQueryParams.put(CommonParams.ROWS, String.valueOf(10000));
        yearQueryParams.put(CommonParams.FQ, filterQuery.toString());
        yearQueryParams.put(CommonParams.WT, "csv");

        //Start by creating a new core
        String coreName = "statistics-" + dcStart.getYear();
        HttpSolrServer statisticsYearServer = createCore(solr, coreName);

        System.out.println("Moving: " + totalRecords + " into core " + coreName);
        log.info("Moving: " + totalRecords + " records into core " + coreName);

        List<File> filesToUpload = new ArrayList<File>();
        for (int i = 0; i < totalRecords; i += 10000) {
            String solrRequestUrl = solr.getBaseURL() + "/select";
            solrRequestUrl = generateURL(solrRequestUrl, yearQueryParams);

            HttpGet get = new HttpGet(solrRequestUrl);
            HttpResponse response = new DefaultHttpClient().execute(get);
            InputStream csvInputstream = response.getEntity().getContent();
            //Write the csv ouput to a file !
            File csvFile = new File(tempDirectory.getPath() + File.separatorChar + "temp." + dcStart.getYear()
                    + "." + i + ".csv");
            FileUtils.copyInputStreamToFile(csvInputstream, csvFile);
            filesToUpload.add(csvFile);

            //Add 10000 & start over again
            yearQueryParams.put(CommonParams.START, String.valueOf((i + 10000)));
        }

        for (File tempCsv : filesToUpload) {
            //Upload the data in the csv files to our new solr core
            ContentStreamUpdateRequest contentStreamUpdateRequest = new ContentStreamUpdateRequest(
                    "/update/csv");
            contentStreamUpdateRequest.setParam("stream.contentType", "text/plain;charset=utf-8");
            contentStreamUpdateRequest.setAction(AbstractUpdateRequest.ACTION.COMMIT, true, true);
            contentStreamUpdateRequest.addFile(tempCsv, "text/plain;charset=utf-8");

            statisticsYearServer.request(contentStreamUpdateRequest);
        }
        statisticsYearServer.commit(true, true);

        //Delete contents of this year from our year query !
        solr.deleteByQuery(filterQuery.toString());
        solr.commit(true, true);

        log.info("Moved " + totalRecords + " records into core: " + coreName);
    }

    FileUtils.deleteDirectory(tempDirectory);
}

From source file:org.dspace.statistics.SolrLogger.java

License:BSD License

public static void reindexBitstreamHits(boolean removeDeletedBitstreams) throws Exception {
    Context context = new Context();

    try {/*  www  .java2s  .  c  o  m*/
        //First of all retrieve the total number of records to be updated
        SolrQuery query = new SolrQuery();
        query.setQuery("*:*");
        query.addFilterQuery("type:" + Constants.BITSTREAM);
        //Only retrieve records which do not have a bundle name
        query.addFilterQuery("-bundleName:[* TO *]");
        query.setRows(0);
        addAdditionalSolrYearCores(query);
        long totalRecords = solr.query(query).getResults().getNumFound();

        File tempDirectory = new File(
                ConfigurationManager.getProperty("dspace.dir") + File.separator + "temp" + File.separator);
        tempDirectory.mkdirs();
        List<File> tempCsvFiles = new ArrayList<File>();
        for (int i = 0; i < totalRecords; i += 10000) {
            Map<String, String> params = new HashMap<String, String>();
            params.put(CommonParams.Q, "*:*");
            params.put(CommonParams.FQ, "-bundleName:[* TO *] AND type:" + Constants.BITSTREAM);
            params.put(CommonParams.WT, "csv");
            params.put(CommonParams.ROWS, String.valueOf(10000));
            params.put(CommonParams.START, String.valueOf(i));

            String solrRequestUrl = solr.getBaseURL() + "/select";
            solrRequestUrl = generateURL(solrRequestUrl, params);

            HttpGet get = new HttpGet(solrRequestUrl);
            HttpResponse response = new DefaultHttpClient().execute(get);

            InputStream csvOutput = response.getEntity().getContent();
            Reader csvReader = new InputStreamReader(csvOutput);
            List<String[]> rows = new CSVReader(csvReader).readAll();
            String[][] csvParsed = rows.toArray(new String[rows.size()][]);
            String[] header = csvParsed[0];
            //Attempt to find the bitstream id index !
            int idIndex = 0;
            for (int j = 0; j < header.length; j++) {
                if (header[j].equals("id")) {
                    idIndex = j;
                }
            }

            File tempCsv = new File(tempDirectory.getPath() + File.separatorChar + "temp." + i + ".csv");
            tempCsvFiles.add(tempCsv);
            CSVWriter csvp = new CSVWriter(new FileWriter(tempCsv));
            //csvp.setAlwaysQuote(false);

            //Write the header !
            csvp.writeNext((String[]) ArrayUtils.add(header, "bundleName"));
            Map<Integer, String> bitBundleCache = new HashMap<Integer, String>();
            //Loop over each line (skip the headers though)!
            for (int j = 1; j < csvParsed.length; j++) {
                String[] csvLine = csvParsed[j];
                //Write the default line !
                int bitstreamId = Integer.parseInt(csvLine[idIndex]);
                //Attempt to retrieve our bundle name from the cache !
                String bundleName = bitBundleCache.get(bitstreamId);
                if (bundleName == null) {
                    //Nothing found retrieve the bitstream
                    Bitstream bitstream = Bitstream.find(context, bitstreamId);
                    //Attempt to retrieve our bitstream !
                    if (bitstream != null) {
                        Bundle[] bundles = bitstream.getBundles();
                        if (bundles != null && 0 < bundles.length) {
                            Bundle bundle = bundles[0];
                            bundleName = bundle.getName();
                            context.removeCached(bundle, bundle.getID());
                        } else {
                            //No bundle found, we are either a collection or a community logo, check for it !
                            DSpaceObject parentObject = bitstream.getParentObject();
                            if (parentObject instanceof Collection) {
                                bundleName = "LOGO-COLLECTION";
                            } else if (parentObject instanceof Community) {
                                bundleName = "LOGO-COMMUNITY";
                            }
                            if (parentObject != null) {
                                context.removeCached(parentObject, parentObject.getID());
                            }

                        }
                        //Cache the bundle name
                        bitBundleCache.put(bitstream.getID(), bundleName);
                        //Remove the bitstream from cache
                        context.removeCached(bitstream, bitstreamId);
                    }
                    //Check if we don't have a bundlename
                    //If we don't have one & we do not need to delete the deleted bitstreams ensure that a BITSTREAM_DELETED bundle name is given !
                    if (bundleName == null && !removeDeletedBitstreams) {
                        bundleName = "BITSTREAM_DELETED";
                    }
                }
                csvp.writeNext((String[]) ArrayUtils.add(csvLine, bundleName));
            }

            //Loop over our parsed csv
            csvp.flush();
            csvp.close();
        }

        //Add all the separate csv files
        for (File tempCsv : tempCsvFiles) {
            ContentStreamUpdateRequest contentStreamUpdateRequest = new ContentStreamUpdateRequest(
                    "/update/csv");
            contentStreamUpdateRequest.setParam("stream.contentType", "text/plain;charset=utf-8");
            contentStreamUpdateRequest.setAction(AbstractUpdateRequest.ACTION.COMMIT, true, true);
            contentStreamUpdateRequest.addFile(tempCsv, "text/plain;charset=utf-8");

            solr.request(contentStreamUpdateRequest);
        }

        //Now that all our new bitstream stats are in place, delete all the old ones !
        solr.deleteByQuery("-bundleName:[* TO *] AND type:" + Constants.BITSTREAM);
        //Commit everything to wrap up
        solr.commit(true, true);
        //Clean up our directory !
        FileUtils.deleteDirectory(tempDirectory);
    } catch (Exception e) {
        log.error("Error while updating the bitstream statistics", e);
        throw e;
    } finally {
        context.abort();
    }
}

From source file:org.dspace.statistics.SolrLogger.java

License:BSD License

/**
 * Export all SOLR usage statistics for viewing/downloading content to a flat text file.
 * The file goes to a series/*  w w  w .  j ava2 s. c  o  m*/
 *
 * @throws Exception
 */
public static void exportHits() throws Exception {
    Context context = new Context();

    File tempDirectory = new File(
            ConfigurationManager.getProperty("dspace.dir") + File.separator + "temp" + File.separator);
    tempDirectory.mkdirs();

    try {
        //First of all retrieve the total number of records to be updated
        SolrQuery query = new SolrQuery();
        query.setQuery("*:*");

        ModifiableSolrParams solrParams = new ModifiableSolrParams();
        solrParams.set(CommonParams.Q, "statistics_type:view OR (*:* AND -statistics_type:*)");
        solrParams.set(CommonParams.WT, "javabin");
        solrParams.set(CommonParams.ROWS, String.valueOf(10000));

        addAdditionalSolrYearCores(query);
        long totalRecords = solr.query(query).getResults().getNumFound();
        System.out.println("There are " + totalRecords + " usage events in SOLR for download/view.");

        for (int i = 0; i < totalRecords; i += 10000) {
            solrParams.set(CommonParams.START, String.valueOf(i));
            QueryResponse queryResponse = solr.query(solrParams);
            SolrDocumentList docs = queryResponse.getResults();

            File exportOutput = new File(
                    tempDirectory.getPath() + File.separatorChar + "usagestats_" + i + ".csv");
            exportOutput.delete();

            //export docs
            addDocumentsToFile(context, docs, exportOutput);
            System.out.println("Export hits [" + i + " - " + String.valueOf(i + 9999) + "] to "
                    + exportOutput.getCanonicalPath());
        }
    } catch (Exception e) {
        log.error("Error while exporting SOLR data", e);
        throw e;
    } finally {
        context.abort();
    }
}

From source file:org.dspace.statistics.SolrLoggerServiceImpl.java

License:BSD License

@Override
public void shardSolrIndex() throws IOException, SolrServerException {
    /*//from  ww  w .  j a v  a  2  s.c  om
    Start by faceting by year so we can include each year in a separate core !
     */
    SolrQuery yearRangeQuery = new SolrQuery();
    yearRangeQuery.setQuery("*:*");
    yearRangeQuery.setRows(0);
    yearRangeQuery.setFacet(true);
    yearRangeQuery.add(FacetParams.FACET_RANGE, "time");
    //We go back to 2000 the year 2000, this is a bit overkill but this way we ensure we have everything
    //The alternative would be to sort but that isn't recommended since it would be a very costly query !
    yearRangeQuery.add(FacetParams.FACET_RANGE_START,
            "NOW/YEAR-" + (Calendar.getInstance().get(Calendar.YEAR) - 2000) + "YEARS");
    //Add the +0year to ensure that we DO NOT include the current year
    yearRangeQuery.add(FacetParams.FACET_RANGE_END, "NOW/YEAR+0YEARS");
    yearRangeQuery.add(FacetParams.FACET_RANGE_GAP, "+1YEAR");
    yearRangeQuery.add(FacetParams.FACET_MINCOUNT, String.valueOf(1));

    //Create a temp directory to store our files in !
    File tempDirectory = new File(
            configurationService.getProperty("dspace.dir") + File.separator + "temp" + File.separator);
    tempDirectory.mkdirs();

    QueryResponse queryResponse = solr.query(yearRangeQuery);
    //We only have one range query !
    List<RangeFacet.Count> yearResults = queryResponse.getFacetRanges().get(0).getCounts();
    for (RangeFacet.Count count : yearResults) {
        long totalRecords = count.getCount();

        //Create a range query from this !
        //We start with out current year
        DCDate dcStart = new DCDate(count.getValue());
        Calendar endDate = Calendar.getInstance();
        //Advance one year for the start of the next one !
        endDate.setTime(dcStart.toDate());
        endDate.add(Calendar.YEAR, 1);
        DCDate dcEndDate = new DCDate(endDate.getTime());

        StringBuilder filterQuery = new StringBuilder();
        filterQuery.append("time:([");
        filterQuery.append(ClientUtils.escapeQueryChars(dcStart.toString()));
        filterQuery.append(" TO ");
        filterQuery.append(ClientUtils.escapeQueryChars(dcEndDate.toString()));
        filterQuery.append("]");
        //The next part of the filter query excludes the content from midnight of the next year !
        filterQuery.append(" NOT ").append(ClientUtils.escapeQueryChars(dcEndDate.toString()));
        filterQuery.append(")");

        Map<String, String> yearQueryParams = new HashMap<String, String>();
        yearQueryParams.put(CommonParams.Q, "*:*");
        yearQueryParams.put(CommonParams.ROWS, String.valueOf(10000));
        yearQueryParams.put(CommonParams.FQ, filterQuery.toString());
        yearQueryParams.put(CommonParams.WT, "csv");

        //Start by creating a new core
        String coreName = "statistics-" + dcStart.getYear();
        HttpSolrServer statisticsYearServer = createCore(solr, coreName);

        System.out.println("Moving: " + totalRecords + " into core " + coreName);
        log.info("Moving: " + totalRecords + " records into core " + coreName);

        List<File> filesToUpload = new ArrayList<File>();
        for (int i = 0; i < totalRecords; i += 10000) {
            String solrRequestUrl = solr.getBaseURL() + "/select";
            solrRequestUrl = generateURL(solrRequestUrl, yearQueryParams);

            HttpGet get = new HttpGet(solrRequestUrl);
            HttpResponse response = new DefaultHttpClient().execute(get);
            InputStream csvInputstream = response.getEntity().getContent();
            //Write the csv ouput to a file !
            File csvFile = new File(tempDirectory.getPath() + File.separatorChar + "temp." + dcStart.getYear()
                    + "." + i + ".csv");
            FileUtils.copyInputStreamToFile(csvInputstream, csvFile);
            filesToUpload.add(csvFile);

            //Add 10000 & start over again
            yearQueryParams.put(CommonParams.START, String.valueOf((i + 10000)));
        }

        for (File tempCsv : filesToUpload) {
            //Upload the data in the csv files to our new solr core
            ContentStreamUpdateRequest contentStreamUpdateRequest = new ContentStreamUpdateRequest(
                    "/update/csv");
            contentStreamUpdateRequest.setParam("stream.contentType", "text/plain;charset=utf-8");
            contentStreamUpdateRequest.setParam("skip", "_version_");
            contentStreamUpdateRequest.setAction(AbstractUpdateRequest.ACTION.COMMIT, true, true);
            contentStreamUpdateRequest.addFile(tempCsv, "text/plain;charset=utf-8");

            statisticsYearServer.request(contentStreamUpdateRequest);
        }
        statisticsYearServer.commit(true, true);

        //Delete contents of this year from our year query !
        solr.deleteByQuery(filterQuery.toString());
        solr.commit(true, true);

        log.info("Moved " + totalRecords + " records into core: " + coreName);
    }

    FileUtils.deleteDirectory(tempDirectory);
}

From source file:org.dspace.statistics.SolrLoggerServiceImpl.java

License:BSD License

@Override
public void reindexBitstreamHits(boolean removeDeletedBitstreams) throws Exception {
    Context context = new Context();

    try {/*from  www . j av  a2 s  .  com*/
        //First of all retrieve the total number of records to be updated
        SolrQuery query = new SolrQuery();
        query.setQuery("*:*");
        query.addFilterQuery("type:" + Constants.BITSTREAM);
        //Only retrieve records which do not have a bundle name
        query.addFilterQuery("-bundleName:[* TO *]");
        query.setRows(0);
        addAdditionalSolrYearCores(query);
        long totalRecords = solr.query(query).getResults().getNumFound();

        File tempDirectory = new File(
                configurationService.getProperty("dspace.dir") + File.separator + "temp" + File.separator);
        tempDirectory.mkdirs();
        List<File> tempCsvFiles = new ArrayList<File>();
        for (int i = 0; i < totalRecords; i += 10000) {
            Map<String, String> params = new HashMap<String, String>();
            params.put(CommonParams.Q, "*:*");
            params.put(CommonParams.FQ, "-bundleName:[* TO *] AND type:" + Constants.BITSTREAM);
            params.put(CommonParams.WT, "csv");
            params.put(CommonParams.ROWS, String.valueOf(10000));
            params.put(CommonParams.START, String.valueOf(i));

            String solrRequestUrl = solr.getBaseURL() + "/select";
            solrRequestUrl = generateURL(solrRequestUrl, params);

            HttpGet get = new HttpGet(solrRequestUrl);
            HttpResponse response = new DefaultHttpClient().execute(get);

            InputStream csvOutput = response.getEntity().getContent();
            Reader csvReader = new InputStreamReader(csvOutput);
            List<String[]> rows = new CSVReader(csvReader).readAll();
            String[][] csvParsed = rows.toArray(new String[rows.size()][]);
            String[] header = csvParsed[0];
            //Attempt to find the bitstream id index !
            int idIndex = 0;
            for (int j = 0; j < header.length; j++) {
                if (header[j].equals("id")) {
                    idIndex = j;
                }
            }

            File tempCsv = new File(tempDirectory.getPath() + File.separatorChar + "temp." + i + ".csv");
            tempCsvFiles.add(tempCsv);
            CSVWriter csvp = new CSVWriter(new FileWriter(tempCsv));
            //csvp.setAlwaysQuote(false);

            //Write the header !
            csvp.writeNext((String[]) ArrayUtils.add(header, "bundleName"));
            Map<String, String> bitBundleCache = new HashMap<>();
            //Loop over each line (skip the headers though)!
            for (int j = 1; j < csvParsed.length; j++) {
                String[] csvLine = csvParsed[j];
                //Write the default line !
                String bitstreamId = csvLine[idIndex];
                //Attempt to retrieve our bundle name from the cache !
                String bundleName = bitBundleCache.get(bitstreamId);
                if (bundleName == null) {
                    //Nothing found retrieve the bitstream
                    Bitstream bitstream = bitstreamService.findByIdOrLegacyId(context, bitstreamId);
                    //Attempt to retrieve our bitstream !
                    if (bitstream != null) {
                        List<Bundle> bundles = bitstream.getBundles();
                        if (bundles != null && 0 < bundles.size()) {
                            Bundle bundle = bundles.get(0);
                            bundleName = bundle.getName();
                        } else {
                            //No bundle found, we are either a collection or a community logo, check for it !
                            DSpaceObject parentObject = bitstreamService.getParentObject(context, bitstream);
                            if (parentObject instanceof Collection) {
                                bundleName = "LOGO-COLLECTION";
                            } else if (parentObject instanceof Community) {
                                bundleName = "LOGO-COMMUNITY";
                            }

                        }
                        //Cache the bundle name
                        bitBundleCache.put(bitstream.getID().toString(), bundleName);
                        //Remove the bitstream from cache
                    }
                    //Check if we don't have a bundlename
                    //If we don't have one & we do not need to delete the deleted bitstreams ensure that a BITSTREAM_DELETED bundle name is given !
                    if (bundleName == null && !removeDeletedBitstreams) {
                        bundleName = "BITSTREAM_DELETED";
                    }
                }
                csvp.writeNext((String[]) ArrayUtils.add(csvLine, bundleName));
            }

            //Loop over our parsed csv
            csvp.flush();
            csvp.close();
        }

        //Add all the separate csv files
        for (File tempCsv : tempCsvFiles) {
            ContentStreamUpdateRequest contentStreamUpdateRequest = new ContentStreamUpdateRequest(
                    "/update/csv");
            contentStreamUpdateRequest.setParam("stream.contentType", "text/plain;charset=utf-8");
            contentStreamUpdateRequest.setAction(AbstractUpdateRequest.ACTION.COMMIT, true, true);
            contentStreamUpdateRequest.addFile(tempCsv, "text/plain;charset=utf-8");

            solr.request(contentStreamUpdateRequest);
        }

        //Now that all our new bitstream stats are in place, delete all the old ones !
        solr.deleteByQuery("-bundleName:[* TO *] AND type:" + Constants.BITSTREAM);
        //Commit everything to wrap up
        solr.commit(true, true);
        //Clean up our directory !
        FileUtils.deleteDirectory(tempDirectory);
    } catch (Exception e) {
        log.error("Error while updating the bitstream statistics", e);
        throw e;
    } finally {
        context.abort();
    }
}