Example usage for org.apache.solr.client.solrj.request ContentStreamUpdateRequest setAction

List of usage examples for org.apache.solr.client.solrj.request ContentStreamUpdateRequest setAction

Introduction

In this page you can find the example usage for org.apache.solr.client.solrj.request ContentStreamUpdateRequest setAction.

Prototype

public AbstractUpdateRequest setAction(ACTION action, boolean waitFlush, boolean waitSearcher) 

Source Link

Document

Sets appropriate parameters for the given ACTION

Usage

From source file:actors.SolrActor.java

License:Apache License

public void indexUpdated(SolrIndexEvent msg) {
    try {// w ww.  j  av  a 2s.  c  om
        System.out.println("SolrIndexEvent");
        SolrInputDocument doc = msg.getDocuement();
        //Making realtime GET
        System.out.println("GET");
        SolrQuery parameters = new SolrQuery();
        parameters.setRequestHandler("/get");
        String f1 = doc.getFieldValue("literal.id").toString();
        String f2 = doc.getFieldValue("literal.rev").toString();
        parameters.set("id", f1);
        parameters.set("rev", f2);
        //System.out.println(parameters);

        QueryResponse response = server.query(parameters);

        NamedList<Object> result = response.getResponse();
        //System.out.println(response.getResponse());
        //System.out.println(result.size() );
        //System.out.println();
        //System.out.println(result);
        //validate the doc exists
        if (result == null || result.get("doc") == null) {
            System.out.println("/update/extract");
            ContentStreamUpdateRequest req = new ContentStreamUpdateRequest("/update/extract");
            // url dropbox
            URL url = new URL(doc.getFieldValue("literal.links").toString());
            ContentStreamBase content = new ContentStreamBase.URLStream(url);
            System.out.println("ContentStreamBase");
            req.addContentStream(content);
            // Adittionall metadata
            req.setParam("literal.id", doc.getFieldValue("literal.id").toString());
            req.setParam("literal.title", doc.getFieldValue("literal.title").toString());
            req.setParam("literal.rev", doc.getFieldValue("literal.rev").toString());
            req.setParam("literal.when", doc.getFieldValue("literal.when").toString());
            req.setParam("literal.path", doc.getFieldValue("literal.path").toString());
            req.setParam("literal.icon", doc.getFieldValue("literal.icon").toString());
            req.setParam("literal.size", doc.getFieldValue("literal.size").toString());
            req.setParam("literal.url", doc.getFieldValue("literal.links").toString());

            req.setParam("uprefix", "attr_");
            req.setParam("fmap.content", "attr_content");
            req.setAction(AbstractUpdateRequest.ACTION.COMMIT, true, true);
            //Requesting Solr
            result = server.request(req);
            //System.out.println("Result: " + result.toString());

        } else {
            System.out.println("It's already update");

        }

    } catch (Exception e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }
}

From source file:at.kc.tugraz.ss.service.solr.impl.SSSolrImpl.java

License:Apache License

@Override
public void solrAddDoc(final SSServPar parA) throws Exception {

    //    according to Solr specification by adding a document with an ID already
    //     existing in the index will replace the document (eg. refer to 
    //     http://stackoverflow.com/questions/8494923/solr-block-updating-of-existing-document or
    //     http://lucene.apache.org/solr/api-4_0_0-ALPHA/doc-files/tutorial.html ) 

    try {//w  ww  . j a v  a2s . c o m
        final SSSolrAddDocPar par = new SSSolrAddDocPar(parA);
        final ContentStreamUpdateRequest csur = new ContentStreamUpdateRequest("/update/extract");
        final NamedList<Object> response;

        csur.addContentStream(new ContentStreamBase.FileStream(new File(localWorkPath + par.id)));

        csur.setParam("literal.id", par.id);
        csur.setAction(AbstractUpdateRequest.ACTION.COMMIT, true, true);

        response = solrUpdater.request(csur);

        SSLogU.info("document w/ id " + par.id + " added successfully. ");
    } catch (Exception error) {
        SSServErrReg.regErrThrow(error);
    }
}

From source file:at.tugraz.sss.servs.db.impl.SSDBNoSQLSolrImpl.java

License:Apache License

@Override
public void addDoc(final SSDBNoSQLAddDocPar par) throws SSErr {

    //    according to Solr specification by adding a document with an ID already
    //     existing in the index will replace the document (eg. refer to 
    //     http://stackoverflow.com/questions/8494923/solr-block-updating-of-existing-document or
    //     http://lucene.apache.org/solr/api-4_0_0-ALPHA/doc-files/tutorial.html ) 

    try {/*from   w w w  .  j a v a 2 s  . c  om*/
        final ContentStreamUpdateRequest csur = new ContentStreamUpdateRequest("/update/extract");
        final NamedList<Object> response;

        csur.addContentStream(new ContentStreamBase.FileStream(new File(SSConf.getLocalWorkPath() + par.id)));

        csur.setParam("literal.id", par.id);
        //      csur.setParam  ("stream.type", "application/octet-stream");

        csur.setAction(AbstractUpdateRequest.ACTION.COMMIT, true, true);

        response = solrServer.request(csur);

        SSLogU.info("document w/ id " + par.id + " added successfully. ");
    } catch (Exception error) {
        SSServErrReg.regErrThrow(error);
    }
}

From source file:org.craftercms.search.service.impl.SolrSearchService.java

License:Open Source License

@Override
public void updateContent(String indexId, String site, String id, File file,
        Map<String, List<String>> additionalFields) throws SearchException {
    if (StringUtils.isEmpty(indexId)) {
        indexId = defaultIndexId;/*from  w  w w. j a v a2 s  .c o  m*/
    }

    String finalId = site + ":" + id;
    String fileName = FilenameUtils.getName(id);
    String contentType = mimeTypesMap.getContentType(fileName);
    ContentStreamUpdateRequest request = new ContentStreamUpdateRequest(SOLR_CONTENT_STREAM_UPDATE_URL);
    NamedList<Object> response;

    try {
        ModifiableSolrParams params = solrDocumentBuilder.buildParams(site, id,
                ExtractingParams.LITERALS_PREFIX, null, additionalFields);
        params.set(ExtractingParams.LITERALS_PREFIX + fileNameFieldName, fileName);

        request.setParams(params);
        request.addFile(file, contentType);
        request.setAction(AbstractUpdateRequest.ACTION.COMMIT, true, true);

        response = solrClient.request(request, indexId);
    } catch (SolrServerException e) {
        logger.warn("{}Unable to update file through content stream request: {}. Attempting to perform just "
                + "the metadata update", getIndexPrefix(indexId), e.getMessage());

        try {
            SolrInputDocument inputDocument = solrDocumentBuilder.build(site, id, additionalFields);
            inputDocument.setField(fileNameFieldName, fileName);

            response = solrClient.add(indexId, inputDocument).getResponse();
        } catch (IOException e1) {
            throw new SearchException(indexId, "I/O error while executing update file for " + finalId, e1);
        } catch (SolrServerException e1) {
            throw new SearchException(indexId, e1.getMessage(), e1);
        }
    } catch (IOException e) {
        throw new SearchException(indexId, "I/O error while executing update file for " + finalId, e);
    }

    if (logger.isDebugEnabled()) {
        logger.debug(getSuccessfulMessage(indexId, finalId, "Update file", response));
    }
}

From source file:org.dspace.discovery.SolrServiceImpl.java

License:BSD License

/**
 * Write the document to the index under the appropriate handle.
 *
 * @param doc the solr document to be written to the server
 * @param streams//  ww  w  .  j  a v a2  s.  c o m
 * @throws IOException IO exception
 */
protected void writeDocument(SolrInputDocument doc, List<BitstreamContentStream> streams) throws IOException {

    try {
        if (getSolr() != null) {
            if (CollectionUtils.isNotEmpty(streams)) {
                ContentStreamUpdateRequest req = new ContentStreamUpdateRequest("/update/extract");

                for (BitstreamContentStream bce : streams) {
                    req.addContentStream(bce);
                }

                ModifiableSolrParams params = new ModifiableSolrParams();

                //req.setParam(ExtractingParams.EXTRACT_ONLY, "true");
                for (String name : doc.getFieldNames()) {
                    for (Object val : doc.getFieldValues(name)) {
                        params.add(ExtractingParams.LITERALS_PREFIX + name, val.toString());
                    }
                }

                req.setParams(params);
                req.setParam(ExtractingParams.UNKNOWN_FIELD_PREFIX, "attr_");
                req.setParam(ExtractingParams.MAP_PREFIX + "content", "fulltext");
                req.setParam(ExtractingParams.EXTRACT_FORMAT, "text");
                req.setAction(AbstractUpdateRequest.ACTION.COMMIT, true, true);
                req.process(getSolr());
            } else {
                getSolr().add(doc);
            }
        }
    } catch (SolrServerException e) {
        log.error(e.getMessage(), e);
    }
}

From source file:org.dspace.statistics.SolrLogger.java

License:BSD License

public static void shardSolrIndex() throws IOException, SolrServerException {
    /*/*from  w ww  .  jav a 2  s . c  o m*/
    Start by faceting by year so we can include each year in a separate core !
     */
    SolrQuery yearRangeQuery = new SolrQuery();
    yearRangeQuery.setQuery("*:*");
    yearRangeQuery.setRows(0);
    yearRangeQuery.setFacet(true);
    yearRangeQuery.add(FacetParams.FACET_RANGE, "time");
    //We go back to 2000 the year 2000, this is a bit overkill but this way we ensure we have everything
    //The alternative would be to sort but that isn't recommended since it would be a very costly query !
    yearRangeQuery.add(FacetParams.FACET_RANGE_START,
            "NOW/YEAR-" + (Calendar.getInstance().get(Calendar.YEAR) - 2000) + "YEARS");
    //Add the +0year to ensure that we DO NOT include the current year
    yearRangeQuery.add(FacetParams.FACET_RANGE_END, "NOW/YEAR+0YEARS");
    yearRangeQuery.add(FacetParams.FACET_RANGE_GAP, "+1YEAR");
    yearRangeQuery.add(FacetParams.FACET_MINCOUNT, String.valueOf(1));

    //Create a temp directory to store our files in !
    File tempDirectory = new File(
            ConfigurationManager.getProperty("dspace.dir") + File.separator + "temp" + File.separator);
    tempDirectory.mkdirs();

    QueryResponse queryResponse = solr.query(yearRangeQuery);
    //We only have one range query !
    List<RangeFacet.Count> yearResults = queryResponse.getFacetRanges().get(0).getCounts();
    for (RangeFacet.Count count : yearResults) {
        long totalRecords = count.getCount();

        //Create a range query from this !
        //We start with out current year
        DCDate dcStart = new DCDate(count.getValue());
        Calendar endDate = Calendar.getInstance();
        //Advance one year for the start of the next one !
        endDate.setTime(dcStart.toDate());
        endDate.add(Calendar.YEAR, 1);
        DCDate dcEndDate = new DCDate(endDate.getTime());

        StringBuilder filterQuery = new StringBuilder();
        filterQuery.append("time:([");
        filterQuery.append(ClientUtils.escapeQueryChars(dcStart.toString()));
        filterQuery.append(" TO ");
        filterQuery.append(ClientUtils.escapeQueryChars(dcEndDate.toString()));
        filterQuery.append("]");
        //The next part of the filter query excludes the content from midnight of the next year !
        filterQuery.append(" NOT ").append(ClientUtils.escapeQueryChars(dcEndDate.toString()));
        filterQuery.append(")");

        Map<String, String> yearQueryParams = new HashMap<String, String>();
        yearQueryParams.put(CommonParams.Q, "*:*");
        yearQueryParams.put(CommonParams.ROWS, String.valueOf(10000));
        yearQueryParams.put(CommonParams.FQ, filterQuery.toString());
        yearQueryParams.put(CommonParams.WT, "csv");

        //Start by creating a new core
        String coreName = "statistics-" + dcStart.getYear();
        HttpSolrServer statisticsYearServer = createCore(solr, coreName);

        System.out.println("Moving: " + totalRecords + " into core " + coreName);
        log.info("Moving: " + totalRecords + " records into core " + coreName);

        List<File> filesToUpload = new ArrayList<File>();
        for (int i = 0; i < totalRecords; i += 10000) {
            String solrRequestUrl = solr.getBaseURL() + "/select";
            solrRequestUrl = generateURL(solrRequestUrl, yearQueryParams);

            HttpGet get = new HttpGet(solrRequestUrl);
            HttpResponse response = new DefaultHttpClient().execute(get);
            InputStream csvInputstream = response.getEntity().getContent();
            //Write the csv ouput to a file !
            File csvFile = new File(tempDirectory.getPath() + File.separatorChar + "temp." + dcStart.getYear()
                    + "." + i + ".csv");
            FileUtils.copyInputStreamToFile(csvInputstream, csvFile);
            filesToUpload.add(csvFile);

            //Add 10000 & start over again
            yearQueryParams.put(CommonParams.START, String.valueOf((i + 10000)));
        }

        for (File tempCsv : filesToUpload) {
            //Upload the data in the csv files to our new solr core
            ContentStreamUpdateRequest contentStreamUpdateRequest = new ContentStreamUpdateRequest(
                    "/update/csv");
            contentStreamUpdateRequest.setParam("stream.contentType", "text/plain;charset=utf-8");
            contentStreamUpdateRequest.setAction(AbstractUpdateRequest.ACTION.COMMIT, true, true);
            contentStreamUpdateRequest.addFile(tempCsv, "text/plain;charset=utf-8");

            statisticsYearServer.request(contentStreamUpdateRequest);
        }
        statisticsYearServer.commit(true, true);

        //Delete contents of this year from our year query !
        solr.deleteByQuery(filterQuery.toString());
        solr.commit(true, true);

        log.info("Moved " + totalRecords + " records into core: " + coreName);
    }

    FileUtils.deleteDirectory(tempDirectory);
}

From source file:org.dspace.statistics.SolrLogger.java

License:BSD License

public static void reindexBitstreamHits(boolean removeDeletedBitstreams) throws Exception {
    Context context = new Context();

    try {/*from  w  ww  .  ja va  2  s.c o m*/
        //First of all retrieve the total number of records to be updated
        SolrQuery query = new SolrQuery();
        query.setQuery("*:*");
        query.addFilterQuery("type:" + Constants.BITSTREAM);
        //Only retrieve records which do not have a bundle name
        query.addFilterQuery("-bundleName:[* TO *]");
        query.setRows(0);
        addAdditionalSolrYearCores(query);
        long totalRecords = solr.query(query).getResults().getNumFound();

        File tempDirectory = new File(
                ConfigurationManager.getProperty("dspace.dir") + File.separator + "temp" + File.separator);
        tempDirectory.mkdirs();
        List<File> tempCsvFiles = new ArrayList<File>();
        for (int i = 0; i < totalRecords; i += 10000) {
            Map<String, String> params = new HashMap<String, String>();
            params.put(CommonParams.Q, "*:*");
            params.put(CommonParams.FQ, "-bundleName:[* TO *] AND type:" + Constants.BITSTREAM);
            params.put(CommonParams.WT, "csv");
            params.put(CommonParams.ROWS, String.valueOf(10000));
            params.put(CommonParams.START, String.valueOf(i));

            String solrRequestUrl = solr.getBaseURL() + "/select";
            solrRequestUrl = generateURL(solrRequestUrl, params);

            HttpGet get = new HttpGet(solrRequestUrl);
            HttpResponse response = new DefaultHttpClient().execute(get);

            InputStream csvOutput = response.getEntity().getContent();
            Reader csvReader = new InputStreamReader(csvOutput);
            List<String[]> rows = new CSVReader(csvReader).readAll();
            String[][] csvParsed = rows.toArray(new String[rows.size()][]);
            String[] header = csvParsed[0];
            //Attempt to find the bitstream id index !
            int idIndex = 0;
            for (int j = 0; j < header.length; j++) {
                if (header[j].equals("id")) {
                    idIndex = j;
                }
            }

            File tempCsv = new File(tempDirectory.getPath() + File.separatorChar + "temp." + i + ".csv");
            tempCsvFiles.add(tempCsv);
            CSVWriter csvp = new CSVWriter(new FileWriter(tempCsv));
            //csvp.setAlwaysQuote(false);

            //Write the header !
            csvp.writeNext((String[]) ArrayUtils.add(header, "bundleName"));
            Map<Integer, String> bitBundleCache = new HashMap<Integer, String>();
            //Loop over each line (skip the headers though)!
            for (int j = 1; j < csvParsed.length; j++) {
                String[] csvLine = csvParsed[j];
                //Write the default line !
                int bitstreamId = Integer.parseInt(csvLine[idIndex]);
                //Attempt to retrieve our bundle name from the cache !
                String bundleName = bitBundleCache.get(bitstreamId);
                if (bundleName == null) {
                    //Nothing found retrieve the bitstream
                    Bitstream bitstream = Bitstream.find(context, bitstreamId);
                    //Attempt to retrieve our bitstream !
                    if (bitstream != null) {
                        Bundle[] bundles = bitstream.getBundles();
                        if (bundles != null && 0 < bundles.length) {
                            Bundle bundle = bundles[0];
                            bundleName = bundle.getName();
                            context.removeCached(bundle, bundle.getID());
                        } else {
                            //No bundle found, we are either a collection or a community logo, check for it !
                            DSpaceObject parentObject = bitstream.getParentObject();
                            if (parentObject instanceof Collection) {
                                bundleName = "LOGO-COLLECTION";
                            } else if (parentObject instanceof Community) {
                                bundleName = "LOGO-COMMUNITY";
                            }
                            if (parentObject != null) {
                                context.removeCached(parentObject, parentObject.getID());
                            }

                        }
                        //Cache the bundle name
                        bitBundleCache.put(bitstream.getID(), bundleName);
                        //Remove the bitstream from cache
                        context.removeCached(bitstream, bitstreamId);
                    }
                    //Check if we don't have a bundlename
                    //If we don't have one & we do not need to delete the deleted bitstreams ensure that a BITSTREAM_DELETED bundle name is given !
                    if (bundleName == null && !removeDeletedBitstreams) {
                        bundleName = "BITSTREAM_DELETED";
                    }
                }
                csvp.writeNext((String[]) ArrayUtils.add(csvLine, bundleName));
            }

            //Loop over our parsed csv
            csvp.flush();
            csvp.close();
        }

        //Add all the separate csv files
        for (File tempCsv : tempCsvFiles) {
            ContentStreamUpdateRequest contentStreamUpdateRequest = new ContentStreamUpdateRequest(
                    "/update/csv");
            contentStreamUpdateRequest.setParam("stream.contentType", "text/plain;charset=utf-8");
            contentStreamUpdateRequest.setAction(AbstractUpdateRequest.ACTION.COMMIT, true, true);
            contentStreamUpdateRequest.addFile(tempCsv, "text/plain;charset=utf-8");

            solr.request(contentStreamUpdateRequest);
        }

        //Now that all our new bitstream stats are in place, delete all the old ones !
        solr.deleteByQuery("-bundleName:[* TO *] AND type:" + Constants.BITSTREAM);
        //Commit everything to wrap up
        solr.commit(true, true);
        //Clean up our directory !
        FileUtils.deleteDirectory(tempDirectory);
    } catch (Exception e) {
        log.error("Error while updating the bitstream statistics", e);
        throw e;
    } finally {
        context.abort();
    }
}

From source file:org.dspace.statistics.SolrLoggerServiceImpl.java

License:BSD License

@Override
public void shardSolrIndex() throws IOException, SolrServerException {
    /*/*from w  w w  . j  a  v  a  2  s .  co  m*/
    Start by faceting by year so we can include each year in a separate core !
     */
    SolrQuery yearRangeQuery = new SolrQuery();
    yearRangeQuery.setQuery("*:*");
    yearRangeQuery.setRows(0);
    yearRangeQuery.setFacet(true);
    yearRangeQuery.add(FacetParams.FACET_RANGE, "time");
    //We go back to 2000 the year 2000, this is a bit overkill but this way we ensure we have everything
    //The alternative would be to sort but that isn't recommended since it would be a very costly query !
    yearRangeQuery.add(FacetParams.FACET_RANGE_START,
            "NOW/YEAR-" + (Calendar.getInstance().get(Calendar.YEAR) - 2000) + "YEARS");
    //Add the +0year to ensure that we DO NOT include the current year
    yearRangeQuery.add(FacetParams.FACET_RANGE_END, "NOW/YEAR+0YEARS");
    yearRangeQuery.add(FacetParams.FACET_RANGE_GAP, "+1YEAR");
    yearRangeQuery.add(FacetParams.FACET_MINCOUNT, String.valueOf(1));

    //Create a temp directory to store our files in !
    File tempDirectory = new File(
            configurationService.getProperty("dspace.dir") + File.separator + "temp" + File.separator);
    tempDirectory.mkdirs();

    QueryResponse queryResponse = solr.query(yearRangeQuery);
    //We only have one range query !
    List<RangeFacet.Count> yearResults = queryResponse.getFacetRanges().get(0).getCounts();
    for (RangeFacet.Count count : yearResults) {
        long totalRecords = count.getCount();

        //Create a range query from this !
        //We start with out current year
        DCDate dcStart = new DCDate(count.getValue());
        Calendar endDate = Calendar.getInstance();
        //Advance one year for the start of the next one !
        endDate.setTime(dcStart.toDate());
        endDate.add(Calendar.YEAR, 1);
        DCDate dcEndDate = new DCDate(endDate.getTime());

        StringBuilder filterQuery = new StringBuilder();
        filterQuery.append("time:([");
        filterQuery.append(ClientUtils.escapeQueryChars(dcStart.toString()));
        filterQuery.append(" TO ");
        filterQuery.append(ClientUtils.escapeQueryChars(dcEndDate.toString()));
        filterQuery.append("]");
        //The next part of the filter query excludes the content from midnight of the next year !
        filterQuery.append(" NOT ").append(ClientUtils.escapeQueryChars(dcEndDate.toString()));
        filterQuery.append(")");

        Map<String, String> yearQueryParams = new HashMap<String, String>();
        yearQueryParams.put(CommonParams.Q, "*:*");
        yearQueryParams.put(CommonParams.ROWS, String.valueOf(10000));
        yearQueryParams.put(CommonParams.FQ, filterQuery.toString());
        yearQueryParams.put(CommonParams.WT, "csv");

        //Start by creating a new core
        String coreName = "statistics-" + dcStart.getYear();
        HttpSolrServer statisticsYearServer = createCore(solr, coreName);

        System.out.println("Moving: " + totalRecords + " into core " + coreName);
        log.info("Moving: " + totalRecords + " records into core " + coreName);

        List<File> filesToUpload = new ArrayList<File>();
        for (int i = 0; i < totalRecords; i += 10000) {
            String solrRequestUrl = solr.getBaseURL() + "/select";
            solrRequestUrl = generateURL(solrRequestUrl, yearQueryParams);

            HttpGet get = new HttpGet(solrRequestUrl);
            HttpResponse response = new DefaultHttpClient().execute(get);
            InputStream csvInputstream = response.getEntity().getContent();
            //Write the csv ouput to a file !
            File csvFile = new File(tempDirectory.getPath() + File.separatorChar + "temp." + dcStart.getYear()
                    + "." + i + ".csv");
            FileUtils.copyInputStreamToFile(csvInputstream, csvFile);
            filesToUpload.add(csvFile);

            //Add 10000 & start over again
            yearQueryParams.put(CommonParams.START, String.valueOf((i + 10000)));
        }

        for (File tempCsv : filesToUpload) {
            //Upload the data in the csv files to our new solr core
            ContentStreamUpdateRequest contentStreamUpdateRequest = new ContentStreamUpdateRequest(
                    "/update/csv");
            contentStreamUpdateRequest.setParam("stream.contentType", "text/plain;charset=utf-8");
            contentStreamUpdateRequest.setParam("skip", "_version_");
            contentStreamUpdateRequest.setAction(AbstractUpdateRequest.ACTION.COMMIT, true, true);
            contentStreamUpdateRequest.addFile(tempCsv, "text/plain;charset=utf-8");

            statisticsYearServer.request(contentStreamUpdateRequest);
        }
        statisticsYearServer.commit(true, true);

        //Delete contents of this year from our year query !
        solr.deleteByQuery(filterQuery.toString());
        solr.commit(true, true);

        log.info("Moved " + totalRecords + " records into core: " + coreName);
    }

    FileUtils.deleteDirectory(tempDirectory);
}

From source file:org.dspace.statistics.SolrLoggerServiceImpl.java

License:BSD License

@Override
public void reindexBitstreamHits(boolean removeDeletedBitstreams) throws Exception {
    Context context = new Context();

    try {/* ww w.j a  v a2  s  . c  o m*/
        //First of all retrieve the total number of records to be updated
        SolrQuery query = new SolrQuery();
        query.setQuery("*:*");
        query.addFilterQuery("type:" + Constants.BITSTREAM);
        //Only retrieve records which do not have a bundle name
        query.addFilterQuery("-bundleName:[* TO *]");
        query.setRows(0);
        addAdditionalSolrYearCores(query);
        long totalRecords = solr.query(query).getResults().getNumFound();

        File tempDirectory = new File(
                configurationService.getProperty("dspace.dir") + File.separator + "temp" + File.separator);
        tempDirectory.mkdirs();
        List<File> tempCsvFiles = new ArrayList<File>();
        for (int i = 0; i < totalRecords; i += 10000) {
            Map<String, String> params = new HashMap<String, String>();
            params.put(CommonParams.Q, "*:*");
            params.put(CommonParams.FQ, "-bundleName:[* TO *] AND type:" + Constants.BITSTREAM);
            params.put(CommonParams.WT, "csv");
            params.put(CommonParams.ROWS, String.valueOf(10000));
            params.put(CommonParams.START, String.valueOf(i));

            String solrRequestUrl = solr.getBaseURL() + "/select";
            solrRequestUrl = generateURL(solrRequestUrl, params);

            HttpGet get = new HttpGet(solrRequestUrl);
            HttpResponse response = new DefaultHttpClient().execute(get);

            InputStream csvOutput = response.getEntity().getContent();
            Reader csvReader = new InputStreamReader(csvOutput);
            List<String[]> rows = new CSVReader(csvReader).readAll();
            String[][] csvParsed = rows.toArray(new String[rows.size()][]);
            String[] header = csvParsed[0];
            //Attempt to find the bitstream id index !
            int idIndex = 0;
            for (int j = 0; j < header.length; j++) {
                if (header[j].equals("id")) {
                    idIndex = j;
                }
            }

            File tempCsv = new File(tempDirectory.getPath() + File.separatorChar + "temp." + i + ".csv");
            tempCsvFiles.add(tempCsv);
            CSVWriter csvp = new CSVWriter(new FileWriter(tempCsv));
            //csvp.setAlwaysQuote(false);

            //Write the header !
            csvp.writeNext((String[]) ArrayUtils.add(header, "bundleName"));
            Map<String, String> bitBundleCache = new HashMap<>();
            //Loop over each line (skip the headers though)!
            for (int j = 1; j < csvParsed.length; j++) {
                String[] csvLine = csvParsed[j];
                //Write the default line !
                String bitstreamId = csvLine[idIndex];
                //Attempt to retrieve our bundle name from the cache !
                String bundleName = bitBundleCache.get(bitstreamId);
                if (bundleName == null) {
                    //Nothing found retrieve the bitstream
                    Bitstream bitstream = bitstreamService.findByIdOrLegacyId(context, bitstreamId);
                    //Attempt to retrieve our bitstream !
                    if (bitstream != null) {
                        List<Bundle> bundles = bitstream.getBundles();
                        if (bundles != null && 0 < bundles.size()) {
                            Bundle bundle = bundles.get(0);
                            bundleName = bundle.getName();
                        } else {
                            //No bundle found, we are either a collection or a community logo, check for it !
                            DSpaceObject parentObject = bitstreamService.getParentObject(context, bitstream);
                            if (parentObject instanceof Collection) {
                                bundleName = "LOGO-COLLECTION";
                            } else if (parentObject instanceof Community) {
                                bundleName = "LOGO-COMMUNITY";
                            }

                        }
                        //Cache the bundle name
                        bitBundleCache.put(bitstream.getID().toString(), bundleName);
                        //Remove the bitstream from cache
                    }
                    //Check if we don't have a bundlename
                    //If we don't have one & we do not need to delete the deleted bitstreams ensure that a BITSTREAM_DELETED bundle name is given !
                    if (bundleName == null && !removeDeletedBitstreams) {
                        bundleName = "BITSTREAM_DELETED";
                    }
                }
                csvp.writeNext((String[]) ArrayUtils.add(csvLine, bundleName));
            }

            //Loop over our parsed csv
            csvp.flush();
            csvp.close();
        }

        //Add all the separate csv files
        for (File tempCsv : tempCsvFiles) {
            ContentStreamUpdateRequest contentStreamUpdateRequest = new ContentStreamUpdateRequest(
                    "/update/csv");
            contentStreamUpdateRequest.setParam("stream.contentType", "text/plain;charset=utf-8");
            contentStreamUpdateRequest.setAction(AbstractUpdateRequest.ACTION.COMMIT, true, true);
            contentStreamUpdateRequest.addFile(tempCsv, "text/plain;charset=utf-8");

            solr.request(contentStreamUpdateRequest);
        }

        //Now that all our new bitstream stats are in place, delete all the old ones !
        solr.deleteByQuery("-bundleName:[* TO *] AND type:" + Constants.BITSTREAM);
        //Commit everything to wrap up
        solr.commit(true, true);
        //Clean up our directory !
        FileUtils.deleteDirectory(tempDirectory);
    } catch (Exception e) {
        log.error("Error while updating the bitstream statistics", e);
        throw e;
    } finally {
        context.abort();
    }
}

From source file:org.dspace.util.SolrImportExport.java

License:BSD License

/**
 * Import previously exported documents (or externally created CSV files that have the appropriate structure) into the specified index.
 * @param indexName the index to import.
 * @param fromDir the source directory. Must exist and be readable.
 *                   The importer will look for files whose name starts with <pre>indexName</pre>
 *                   and ends with .csv (to match what is generated by #makeExportFilename).
 * @param solrUrl The solr URL for the index to export. Must not be null.
 * @param clear if true, clear the index before importing.
 * @param overwrite if true, skip _version_ field on import to disable Solr's optimistic concurrency functionality
 * @throws IOException if there is a problem reading the files or communicating with Solr.
 * @throws SolrServerException if there is a problem reading the files or communicating with Solr.
 * @throws SolrImportExportException if there is a problem communicating with Solr.
 *///from ww  w  .  j  av  a 2  s  .  co  m
public static void importIndex(final String indexName, File fromDir, String solrUrl, boolean clear,
        boolean overwrite) throws IOException, SolrServerException, SolrImportExportException {
    if (StringUtils.isBlank(solrUrl)) {
        throw new SolrImportExportException(
                "Could not construct solr URL for index" + indexName + ", aborting export.");
    }

    if (!fromDir.exists() || !fromDir.canRead()) {
        throw new SolrImportExportException("Source directory " + fromDir
                + " doesn't exist or isn't readable, aborting export of index " + indexName);
    }

    HttpSolrServer solr = new HttpSolrServer(solrUrl);

    // must get multivalue fields before clearing
    List<String> multivaluedFields = getMultiValuedFields(solr);

    if (clear) {
        clearIndex(solrUrl);
    }

    File[] files = fromDir.listFiles(new FilenameFilter() {
        @Override
        public boolean accept(File dir, String name) {
            return name.startsWith(indexName) && name.endsWith(".csv");
        }
    });

    if (files == null || files.length == 0) {
        log.warn(
                "No export files found in directory " + fromDir.getCanonicalPath() + " for index " + indexName);
        return;
    }

    Arrays.sort(files);

    for (File file : files) {
        log.info("Importing file " + file.getCanonicalPath());
        ContentStreamUpdateRequest contentStreamUpdateRequest = new ContentStreamUpdateRequest("/update/csv");
        if (overwrite) {
            contentStreamUpdateRequest.setParam("skip", "_version_");
        }
        for (String mvField : multivaluedFields) {
            contentStreamUpdateRequest.setParam("f." + mvField + ".split", "true");
            contentStreamUpdateRequest.setParam("f." + mvField + ".escape", "\\");
        }
        contentStreamUpdateRequest.setParam("stream.contentType", "text/csv;charset=utf-8");
        contentStreamUpdateRequest.setAction(AbstractUpdateRequest.ACTION.COMMIT, true, true);
        contentStreamUpdateRequest.addFile(file, "text/csv;charset=utf-8");

        solr.request(contentStreamUpdateRequest);
    }

    solr.commit(true, true);
}