List of usage examples for org.apache.commons.compress.archivers.zip ScatterZipOutputStream fileBased
public static ScatterZipOutputStream fileBased(final File file) throws FileNotFoundException
From source file:org.sead.nds.repository.BagGenerator.java
public boolean generateBag(OutputStream outputStream) throws Exception { log.info("Generating: Bag to the Future!"); pubRequest = RO.getPublicationRequest(); RO.sendStatus(C3PRPubRequestFacade.PENDING_STAGE, Repository.getID() + " is now processing this request"); File tmp = File.createTempFile("sead-scatter-dirs", "tmp"); dirs = ScatterZipOutputStream.fileBased(tmp); JSONObject oremap = RO.getOREMap();/*from ww w . jav a 2s . c om*/ JSONObject aggregation = oremap.getJSONObject("describes"); // Transfer statistics to oremap for preservation - note that the # // files, totalsize are checked after the zip is written // so any error will be recorded in the zip, but caught in the log. // Other elements are not curently checked. JSONObject aggStats = ((JSONObject) pubRequest.get("Aggregation Statistics")); aggregation.put("Aggregation Statistics", aggStats); if (((JSONObject) pubRequest.get(PubRequestFacade.PREFERENCES)).has("License")) { license = ((JSONObject) pubRequest.get(PubRequestFacade.PREFERENCES)).getString("License"); } // Accept license preference and add it as the license on the // aggregation aggregation.put("License", license); if (((JSONObject) pubRequest.get(PubRequestFacade.PREFERENCES)).has("Purpose")) { purpose = ((JSONObject) pubRequest.get(PubRequestFacade.PREFERENCES)).getString("Purpose"); } // Accept the purpose and add it to the map and aggregation (both are // for this purpose) aggregation.put("Purpose", purpose); oremap.put("Purpose", purpose); // check whether Access Rights set, if so, add it to aggregation if (((JSONObject) pubRequest.get(PubRequestFacade.PREFERENCES)).has("Access Rights")) { String accessRights = ((JSONObject) pubRequest.get(PubRequestFacade.PREFERENCES)) .getString("Access Rights"); aggregation.put("Access Rights", accessRights); } bagID = aggregation.getString("Identifier"); String bagName = bagID; try { // Create valid filename from identifier and extend path with // two levels of hash-based subdirs to help distribute files bagName = getValidName(bagName); } catch (Exception e) { log.error("Couldn't create valid filename: " + e.getLocalizedMessage()); return false; } // Create data dir in bag, also creates parent bagName dir String currentPath = bagName + "/data/"; createDir(currentPath); aggregates = aggregation.getJSONArray("aggregates"); if (aggregates != null) { // Add container and data entries // Setup global index of the aggregation and all aggregated // resources by Identifier resourceIndex = indexResources(bagID, aggregates); // Setup global list of succeed(true), fail(false), notused // (null) flags resourceUsed = new Boolean[aggregates.length() + 1]; // Process current container (the aggregation itself) and its // children processContainer(aggregation, currentPath); } // Create mainifest files // pid-mapping.txt - a DataOne recommendation to connect ids and // in-bag path/names StringBuffer pidStringBuffer = new StringBuffer(); boolean first = true; for (Entry<String, String> pidEntry : pidMap.entrySet()) { if (!first) { pidStringBuffer.append("\n"); } else { first = false; } pidStringBuffer.append(pidEntry.getKey() + " " + pidEntry.getValue()); } createFileFromString(bagName + "/pid-mapping.txt", pidStringBuffer.toString()); // Hash manifest - a hash manifest is required // by Bagit spec StringBuffer sha1StringBuffer = new StringBuffer(); first = true; for (Entry<String, String> sha1Entry : sha1Map.entrySet()) { if (!first) { sha1StringBuffer.append("\n"); } else { first = false; } sha1StringBuffer.append(sha1Entry.getValue() + " " + sha1Entry.getKey()); } if (!(hashtype == null)) { String manifestName = bagName + "/manifest-"; if (hashtype.equals("SHA1 Hash")) { manifestName = manifestName + "sha1.txt"; } else if (hashtype.equals("SHA512 Hash")) { manifestName = manifestName + "sha512.txt"; } else { log.warn("Unsupported Hash type: " + hashtype); } createFileFromString(manifestName, sha1StringBuffer.toString()); } else { log.warn("No Hash values sent - Bag File does not meet BagIT specification requirement"); } // bagit.txt - Required by spec createFileFromString(bagName + "/bagit.txt", "BagIt-Version: 0.97\nTag-File-Character-Encoding: UTF-8"); if (oremap.getJSONObject("describes").has("Creator")) { aggregation.put("Creator", RO.expandPeople(RO.normalizeValues(oremap.getJSONObject("describes").get("Creator")))); } if (oremap.getJSONObject("describes").has("Contact")) { aggregation.put("Contact", RO.expandPeople(RO.normalizeValues(oremap.getJSONObject("describes").get("Contact")))); } // Generate DOI: oremap.getJSONObject("describes").put(PubRequestFacade.EXTERNAL_IDENTIFIER, Repository.createDOIForRO(bagID, RO)); oremap.getJSONObject("describes").put("Publication Date", new SimpleDateFormat("yyyy-MM-dd").format(Calendar.getInstance().getTime())); Object context = oremap.get("@context"); // FixMe - should test that these labels don't have a different // definition (currently we're just checking to see if they a // already defined) addIfNeeded(context, "License", "http://purl.org/dc/terms/license"); addIfNeeded(context, "Purpose", "http://sead-data.net/vocab/publishing#Purpose"); addIfNeeded(context, "Access Rights", "http://purl.org/dc/terms/accessRights"); addIfNeeded(context, PubRequestFacade.EXTERNAL_IDENTIFIER, "http://purl.org/dc/terms/identifier"); addIfNeeded(context, "Publication Date", "http://purl.org/dc/terms/issued"); // Aggregation Statistics // For keys in Agg Stats: for (String key : ((Set<String>) aggStats.keySet())) { addIfNeeded(context, key, getURIForKey(pubRequest.get("@context"), key)); } oremap.put("@id", linkRewriter.rewriteOREMapLink(oremap.getString("@id"), bagID)); aggregation.put("@id", linkRewriter.rewriteAggregationLink(aggregation.getString("@id"), bagID)); // Serialize oremap itself (pretty printed) - SEAD recommendation // (DataOne distributes metadata files within the bag // FixMe - add missing hash values if needed and update context // (read and cache files or read twice?) createFileFromString(bagName + "/oremap.jsonld.txt", oremap.toString(2)); // Add a bag-info file createFileFromString(bagName + "/bag-info.txt", generateInfoFile(pubRequest, oremap)); log.info("Creating bag: " + bagName); ZipArchiveOutputStream zipArchiveOutputStream = new ZipArchiveOutputStream(outputStream); // Add all the waiting contents - dirs created first, then data // files // are retrieved via URLs in parallel (defaults to one thread per // processor) // directly to the zip file log.debug("Starting write"); writeTo(zipArchiveOutputStream); log.info("Zipfile Written"); // Finish zipArchiveOutputStream.close(); log.debug("Closed"); // Validate oremap - all entries are part of the collection for (int i = 0; i < resourceUsed.length; i++) { Boolean b = resourceUsed[i]; if (b == null) { RO.sendStatus("Problem", pidMap.get(resourceIndex.get(i)) + " was not used"); } else if (!b) { RO.sendStatus("Problem", pidMap.get(resourceIndex.get(i)) + " was not included successfully"); } else { // Successfully included - now check for hash value and // generate if needed if (i > 0) { // Not root container if (!sha1Map.containsKey(pidMap.get(resourceIndex.get(i)))) { if (!RO.childIsContainer(i - 1)) log.warn("Missing sha1 hash for: " + resourceIndex.get(i)); // FixMe - actually generate it before adding the // oremap // to the zip } } } } return true; }