Example usage for org.apache.commons.compress.archivers.zip ScatterZipOutputStream fileBased

List of usage examples for org.apache.commons.compress.archivers.zip ScatterZipOutputStream fileBased

Introduction

In this page you can find the example usage for org.apache.commons.compress.archivers.zip ScatterZipOutputStream fileBased.

Prototype

public static ScatterZipOutputStream fileBased(final File file) throws FileNotFoundException 

Source Link

Document

Create a ScatterZipOutputStream with default compression level that is backed by a file

Usage

From source file:org.sead.nds.repository.BagGenerator.java

public boolean generateBag(OutputStream outputStream) throws Exception {
    log.info("Generating: Bag to the Future!");
    pubRequest = RO.getPublicationRequest();
    RO.sendStatus(C3PRPubRequestFacade.PENDING_STAGE, Repository.getID() + " is now processing this request");

    File tmp = File.createTempFile("sead-scatter-dirs", "tmp");
    dirs = ScatterZipOutputStream.fileBased(tmp);

    JSONObject oremap = RO.getOREMap();/*from ww w .  jav a  2s  . c  om*/
    JSONObject aggregation = oremap.getJSONObject("describes");

    // Transfer statistics to oremap for preservation - note that the #
    // files, totalsize are checked after the zip is written
    // so any error will be recorded in the zip, but caught in the log.
    // Other elements are not curently checked.
    JSONObject aggStats = ((JSONObject) pubRequest.get("Aggregation Statistics"));
    aggregation.put("Aggregation Statistics", aggStats);

    if (((JSONObject) pubRequest.get(PubRequestFacade.PREFERENCES)).has("License")) {
        license = ((JSONObject) pubRequest.get(PubRequestFacade.PREFERENCES)).getString("License");

    }
    // Accept license preference and add it as the license on the
    // aggregation
    aggregation.put("License", license);

    if (((JSONObject) pubRequest.get(PubRequestFacade.PREFERENCES)).has("Purpose")) {
        purpose = ((JSONObject) pubRequest.get(PubRequestFacade.PREFERENCES)).getString("Purpose");

    }
    // Accept the purpose and add it to the map and aggregation (both are
    // for this purpose)
    aggregation.put("Purpose", purpose);
    oremap.put("Purpose", purpose);

    // check whether Access Rights set, if so, add it to aggregation
    if (((JSONObject) pubRequest.get(PubRequestFacade.PREFERENCES)).has("Access Rights")) {
        String accessRights = ((JSONObject) pubRequest.get(PubRequestFacade.PREFERENCES))
                .getString("Access Rights");
        aggregation.put("Access Rights", accessRights);
    }

    bagID = aggregation.getString("Identifier");
    String bagName = bagID;
    try {
        // Create valid filename from identifier and extend path with
        // two levels of hash-based subdirs to help distribute files
        bagName = getValidName(bagName);
    } catch (Exception e) {
        log.error("Couldn't create valid filename: " + e.getLocalizedMessage());
        return false;
    }
    // Create data dir in bag, also creates parent bagName dir
    String currentPath = bagName + "/data/";
    createDir(currentPath);

    aggregates = aggregation.getJSONArray("aggregates");

    if (aggregates != null) {
        // Add container and data entries
        // Setup global index of the aggregation and all aggregated
        // resources by Identifier
        resourceIndex = indexResources(bagID, aggregates);
        // Setup global list of succeed(true), fail(false), notused
        // (null) flags
        resourceUsed = new Boolean[aggregates.length() + 1];
        // Process current container (the aggregation itself) and its
        // children
        processContainer(aggregation, currentPath);
    }
    // Create mainifest files
    // pid-mapping.txt - a DataOne recommendation to connect ids and
    // in-bag path/names
    StringBuffer pidStringBuffer = new StringBuffer();
    boolean first = true;
    for (Entry<String, String> pidEntry : pidMap.entrySet()) {
        if (!first) {
            pidStringBuffer.append("\n");
        } else {
            first = false;
        }
        pidStringBuffer.append(pidEntry.getKey() + " " + pidEntry.getValue());
    }
    createFileFromString(bagName + "/pid-mapping.txt", pidStringBuffer.toString());
    // Hash manifest - a hash manifest is required
    // by Bagit spec
    StringBuffer sha1StringBuffer = new StringBuffer();
    first = true;
    for (Entry<String, String> sha1Entry : sha1Map.entrySet()) {
        if (!first) {
            sha1StringBuffer.append("\n");
        } else {
            first = false;
        }
        sha1StringBuffer.append(sha1Entry.getValue() + " " + sha1Entry.getKey());
    }
    if (!(hashtype == null)) {
        String manifestName = bagName + "/manifest-";
        if (hashtype.equals("SHA1 Hash")) {
            manifestName = manifestName + "sha1.txt";
        } else if (hashtype.equals("SHA512 Hash")) {
            manifestName = manifestName + "sha512.txt";
        } else {
            log.warn("Unsupported Hash type: " + hashtype);
        }
        createFileFromString(manifestName, sha1StringBuffer.toString());
    } else {
        log.warn("No Hash values sent - Bag File does not meet BagIT specification requirement");
    }
    // bagit.txt - Required by spec
    createFileFromString(bagName + "/bagit.txt", "BagIt-Version: 0.97\nTag-File-Character-Encoding: UTF-8");

    if (oremap.getJSONObject("describes").has("Creator")) {
        aggregation.put("Creator",
                RO.expandPeople(RO.normalizeValues(oremap.getJSONObject("describes").get("Creator"))));
    }
    if (oremap.getJSONObject("describes").has("Contact")) {
        aggregation.put("Contact",
                RO.expandPeople(RO.normalizeValues(oremap.getJSONObject("describes").get("Contact"))));
    }

    // Generate DOI:
    oremap.getJSONObject("describes").put(PubRequestFacade.EXTERNAL_IDENTIFIER,
            Repository.createDOIForRO(bagID, RO));

    oremap.getJSONObject("describes").put("Publication Date",
            new SimpleDateFormat("yyyy-MM-dd").format(Calendar.getInstance().getTime()));

    Object context = oremap.get("@context");
    // FixMe - should test that these labels don't have a different
    // definition (currently we're just checking to see if they a
    // already defined)
    addIfNeeded(context, "License", "http://purl.org/dc/terms/license");
    addIfNeeded(context, "Purpose", "http://sead-data.net/vocab/publishing#Purpose");
    addIfNeeded(context, "Access Rights", "http://purl.org/dc/terms/accessRights");
    addIfNeeded(context, PubRequestFacade.EXTERNAL_IDENTIFIER, "http://purl.org/dc/terms/identifier");
    addIfNeeded(context, "Publication Date", "http://purl.org/dc/terms/issued");

    // Aggregation Statistics
    // For keys in Agg Stats:
    for (String key : ((Set<String>) aggStats.keySet())) {
        addIfNeeded(context, key, getURIForKey(pubRequest.get("@context"), key));
    }

    oremap.put("@id", linkRewriter.rewriteOREMapLink(oremap.getString("@id"), bagID));
    aggregation.put("@id", linkRewriter.rewriteAggregationLink(aggregation.getString("@id"), bagID));
    // Serialize oremap itself (pretty printed) - SEAD recommendation
    // (DataOne distributes metadata files within the bag
    // FixMe - add missing hash values if needed and update context
    // (read and cache files or read twice?)
    createFileFromString(bagName + "/oremap.jsonld.txt", oremap.toString(2));

    // Add a bag-info file
    createFileFromString(bagName + "/bag-info.txt", generateInfoFile(pubRequest, oremap));

    log.info("Creating bag: " + bagName);

    ZipArchiveOutputStream zipArchiveOutputStream = new ZipArchiveOutputStream(outputStream);

    // Add all the waiting contents - dirs created first, then data
    // files
    // are retrieved via URLs in parallel (defaults to one thread per
    // processor)
    // directly to the zip file
    log.debug("Starting write");
    writeTo(zipArchiveOutputStream);
    log.info("Zipfile Written");
    // Finish
    zipArchiveOutputStream.close();
    log.debug("Closed");

    // Validate oremap - all entries are part of the collection
    for (int i = 0; i < resourceUsed.length; i++) {
        Boolean b = resourceUsed[i];
        if (b == null) {
            RO.sendStatus("Problem", pidMap.get(resourceIndex.get(i)) + " was not used");
        } else if (!b) {
            RO.sendStatus("Problem", pidMap.get(resourceIndex.get(i)) + " was not included successfully");
        } else {
            // Successfully included - now check for hash value and
            // generate if needed
            if (i > 0) { // Not root container
                if (!sha1Map.containsKey(pidMap.get(resourceIndex.get(i)))) {

                    if (!RO.childIsContainer(i - 1))
                        log.warn("Missing sha1 hash for: " + resourceIndex.get(i));
                    // FixMe - actually generate it before adding the
                    // oremap
                    // to the zip
                }
            }
        }

    }
    return true;

}