eu.planets_project.services.utils.DigitalObjectUtils.java Source code

Java tutorial

Introduction

Here is the source code for eu.planets_project.services.utils.DigitalObjectUtils.java

Source

/*******************************************************************************
 * Copyright (c) 2007, 2010 The Planets Project Partners.
 *
 * All rights reserved. This program and the accompanying 
 * materials are made available under the terms of the 
 * Apache License, Version 2.0 which accompanies 
 * this distribution, and is available at 
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 *******************************************************************************/
/**
 * 
 */
package eu.planets_project.services.utils;

import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.net.MalformedURLException;
import java.net.URI;
import java.net.URL;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Random;
import java.util.logging.Logger;

import org.apache.commons.io.FileUtils;
import org.apache.commons.io.FilenameUtils;
import org.apache.commons.io.IOUtils;

import eu.planets_project.ifr.core.techreg.formats.FormatRegistry;
import eu.planets_project.ifr.core.techreg.formats.FormatRegistryFactory;
import eu.planets_project.services.datatypes.Checksum;
import eu.planets_project.services.datatypes.Content;
import eu.planets_project.services.datatypes.DigitalObject;
import eu.planets_project.services.datatypes.Event;
import eu.planets_project.services.datatypes.Metadata;

/**
 * Utils for handling digital objects.
 * @author <a href="mailto:Andrew.Jackson@bl.uk">Andy Jackson</a>
 */
public final class DigitalObjectUtils {
    static final String SYSTEM_TEMP_DIR = System.getProperty("java.io.tmpdir");

    private DigitalObjectUtils() {
        /* Util classes are not instantiated */}

    private static final Logger log = Logger.getLogger(DigitalObjectUtils.class.getName());

    private final static FormatRegistry format = FormatRegistryFactory.getFormatRegistry();

    private static final URI zipType = format.createExtensionUri("zip");

    private static File utils_tmp = null;

    static {
        utils_tmp = new File(SYSTEM_TEMP_DIR, "dig-ob-utils-tmp".toUpperCase());
        try {
            FileUtils.forceMkdir(utils_tmp);
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    /**
     * @return The total size, in bytes, of the bytestream contained or referred
     *         to by this Digital Object. Does not include the size of any
     *         associated metadata, or the Java objects etc.
     */
    public static long getContentSize(final DigitalObject dob) {
        long bytes = 0;
        // Get the size at this level, if set:
        byte[] buf = new byte[1024];
        if (dob.getContent() != null) {
            InputStream inputStream = dob.getContent().getInputStream();
            int length = 0;
            try {
                while ((inputStream != null) && ((length = inputStream.read(buf)) != -1)) {
                    bytes += length;
                }
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
        // Return the total:
        return bytes;
    }

    /**
     * @param object The digital object to copy to a temporary file
     * @return The temporary file the digital object's byte stream was written to
     */
    public static File toFile(final DigitalObject object) {
        try {
            /* TODO: use format registry to set the extension? The framework should not presume user needs or perform preservation actions silently. */
            /* TODO: use data registry to store the content? Maybe, but TMP files are needed too: toTmpFile.*/
            File file = File.createTempFile("planets", null);
            file.deleteOnExit();
            toFile(object, file);
            return file;
        } catch (IOException e) {
            e.printStackTrace();
        }
        throw new IllegalStateException("Could not copy digital object: " + object);
    }

    /**
     * @param object The digital object to copy to a file
     * @param file The file to copy the digital object's byte stream to
     * @return The number of bytes copied
     */
    public static long toFile(final DigitalObject object, final File file) {
        try {
            FileOutputStream fOut = new FileOutputStream(file);
            long bytesCopied = IOUtils.copyLarge(object.getContent().getInputStream(), fOut);
            fOut.close();
            return bytesCopied;
        } catch (FileNotFoundException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        }
        return 0;
    }

    /**
     * These cases: <br/>
     * - A compound DO, zip as Content, with MD outside the zip, pointing into
     * it via Title. This is to pass between services.<br/>
     * - A zip file containing CDO, MD inside the zip, pointing to the binaries
     * via the Title. This is an pure file 'IP', in effect.<br/>
     * - A compound DO, pulled from such a CDO zip file, with inputstreams for
     * content. Okay, two formats, different contexts and packing/unpacking
     * options.<br/>
     * - (CDO[zip] or CDO) i.e. If no Content, look up to root and unpack?<br/>
     * - DOIP - a special ZIP file containing CDOs. <br/>
     * Operations:<br/>
     * - Packing one or more CDOs into a DOIP, optionally embedding referenced
     * resources. (Value) resources always to be embedded.<br/>
     * - Unpacking a DOIP and getting N CDOs out, optionally embedding binaries,
     * using ZipInputStreams, or unpacking into Files?<br/>
     * TODO Should DO use URI internally got Content.reference, to allow
     * relative resolution?
     */
    public static void main(String args[]) {
        try {
            URI uri = new URI("FAQ.html");
            System.out.println("Got " + uri);
            System.out.println("Got " + uri.isAbsolute());
            uri = new URI("http://localhost/FAQ.html");
            System.out.println("Got " + uri);
            System.out.println("Got " + uri.isAbsolute());
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    /**
      * A utility method that creates files for the content of "contained"-DigObs in a DigOb.
      * This method returns all contained DigObs one level deep.
      * 
      * @param listOfDigObjs The digital objects to create files from
      * @param targetFolder The folder to store result files in
      * @return The child elements of the given digital object as files
      */
    public static List<File> getDigitalObjectsAsFiles(final List<DigitalObject> listOfDigObjs,
            final File targetFolder) {
        List<File> containedFiles = new ArrayList<File>();
        log.info("received list of dig obj with lengh: " + listOfDigObjs.size());
        if (listOfDigObjs.size() > 0) {
            for (DigitalObject currentDigObj : listOfDigObjs) {
                String name = getFileNameFromDigObject(currentDigObj, null);
                log.info("name of dig obj is: " + name);
                File file = new File(targetFolder, name);
                toFile(currentDigObj, file);
                containedFiles.add(file);
            }
        }
        log.info(String.format("Returning %s files", containedFiles.size()));
        return containedFiles;
    }

    /**
     * Creates a Zip-type DigitalObject either from a given folder or from a zip file.
     * @param zip_Or_Folder
     * @param destZipName the name of the created zip file
     * @param createByReference Create the Content of the DO by reference or by value
     * @param withChecksum Create a checksum for the zip file to create
     * @param compress compress the content of the zip file
     * @return The digital object representing the zipped files
     */
    public static DigitalObject createZipTypeDigitalObject(File zip_Or_Folder, String destZipName,
            boolean createByReference, boolean withChecksum, boolean compress) {
        if (zip_Or_Folder.isFile() && ZipUtils.isZipFile(zip_Or_Folder)) {
            return createZipTypeDigitalObjectFromZip(zip_Or_Folder, createByReference, withChecksum);
        } else {
            return createZipTypeDigitalObjectFromFolder(zip_Or_Folder, destZipName, createByReference, withChecksum,
                    compress);
        }
    }

    /**
      * This method returns a new DigOb, containing a file that is specified by the fragment. The Fragment points to a file inside the zip.
      * If the passed DigOb is not a ZIP type DigOb, null is returned.
      * 
      * @param digOb the zip type DigOb to get the fragment from
      * @param fragment the fragment (file in the zip) to retrieve
      * @param createByReference create by reference (true) or as stream (false)
      * @return a new DigitalObject containing the extracted fragment as content
      */
    public static DigitalObject getFragment(DigitalObject digOb, String fragment, boolean createByReference) {
        if (!isZipType(digOb)) {
            log.severe(
                    "The DigitalObject you have passed is NOT a Zip type DigOb. No Fragment could be retrieved!");
            return null;
        }
        // Do all the tmpFolder related stuff....
        String tmpfolderName = randomizeFileName(getFolderNameFromDigObject(digOb));
        File digObTmp = new File(utils_tmp, tmpfolderName);
        try {
            FileUtils.forceMkdir(digObTmp);
        } catch (IOException e) {
            e.printStackTrace();
        }
        File zip = getZipAsFile(digOb);

        File target = ZipUtils.getFileFrom(zip, fragment, digObTmp);

        DigitalObject resultDigOb = createDigitalObject(target, createByReference);

        return resultDigOb;
    }

    public static DigitalObject insertFragment(DigitalObject zipTypeDigOb, File fragmentFile,
            String targetPathInZip, boolean createByReference) {
        if (!isZipType(zipTypeDigOb)) {
            log.severe(
                    "The DigitalObject you have passed is NOT a Zip type DigOb. No Fragment could be retrieved!");
            return null;
        }

        File zip = getZipAsFile(zipTypeDigOb);

        File modifiedZip = ZipUtils.insertFileInto(zip, fragmentFile, targetPathInZip);
        DigitalObject result = createZipTypeDigitalObjectFromZip(modifiedZip, createByReference, false);
        return result;
    }

    public static DigitalObject removeFragment(DigitalObject zipTypeDigOb, String targetPathInZip,
            boolean createByReference) {
        if (!isZipType(zipTypeDigOb)) {
            log.severe(
                    "The DigitalObject you have passed is NOT a Zip type DigOb. No Fragment could be retrieved!");
            return null;
        }

        File zip = getZipAsFile(zipTypeDigOb);

        File modifiedZip = ZipUtils.removeFileFrom(zip, targetPathInZip);
        DigitalObject result = createZipTypeDigitalObjectFromZip(modifiedZip, createByReference, false);
        return result;
    }

    public static List<String> listFragments(DigitalObject digOb) {
        if (!isZipType(digOb)) {
            log.severe("This DigitalObject is NOT a Zip-type DigOb! No Fragments to return, sorry!!!");
            return null;
        }

        return digOb.getFragments();
    }

    /**
     * test if this is a zip type DigitalObject (format-URL == planets:fmt/ext/zip)
     * @param digOb the DigitalObject to test
     * @return "true" if the digOb is of type zip, "false" if not ;-)
     */
    public static boolean isZipType(DigitalObject digOb) {
        if (digOb.getFormat() == null) {
            return false;
        }
        return digOb.getFormat().equals(zipType);
    }

    /**
     * Gets the title from the passed digOb and returns a proper folder name (e.g. strip the extension etc.)
     * @param digOb to get the folder name from
     * @return the folder name based on "title" in the passed digOb.
     */
    public static String getFolderNameFromDigObject(DigitalObject digOb) {
        String title = digOb.getTitle();
        if (title == null) {
            return null;
        }

        if (title.contains(" ")) {
            title = title.replaceAll(" ", "_");
        }

        if (title.equalsIgnoreCase(".svn")) {
            return title;
        }
        if (title.contains(".")) {
            title = title.substring(0, title.lastIndexOf("."));
        }
        return title;
    }

    /**
     * Gets the title from the passed digOb and returns a proper file name
     * @param digOb to get the file name from
     * @param supposedFormatURI This could be the format you believe the file has. Used to create a proper file name.
     * @return the folder name based on "title" in the passed digOb.
     */
    public static String getFileNameFromDigObject(DigitalObject digOb, URI supposedFormatURI) {
        String title = digOb.getTitle();
        String ext = null;

        // I know, this is evil, but this is a workaround for the Zip-DigitalObjectUtils
        if (supposedFormatURI == null) {
            URI digObFormat = digOb.getFormat();
            if (digObFormat == null) {
                ext = "bin";
            } else {
                ext = format.getFirstExtension(digObFormat);
            }
        } else {
            ext = format.getFirstExtension(supposedFormatURI);
        }

        if (title == null) {
            String defaultTitle = "default_input";
            title = defaultTitle + "." + ext;
        }

        if (title.contains(" ")) {
            title = title.replaceAll(" ", "_");
        }

        if (title.contains(".")) {
            return title;
        } else {
            title = title + "." + ext;
        }
        return randomizeFileName(title);

    }

    public static boolean cleanDigObUtilsTmp() {
        try {
            FileUtils.cleanDirectory(utils_tmp);
        } catch (IOException e) {
            e.printStackTrace();
            return false;
        }
        return true;
    }

    /**
    * Generates a ZIP-type DigitalObject from a given folder, containing the zip file itself at the top-level
    * DigitalObject and a list of the files contained in this zip as "fragments" DigitalObjects.
    * 
    * @param folder the folder to create a zip from and build the DigitalObject
        * @param destZipName the name the zip file should have. If no name is specified, the name of the folder will be used.
        * @param createByReference a flag to set whether you want to create the DigObs by Reference or as stream...
        * @param withChecksum creates a zip with a checksum. 
        * @param compress compress the zip content or not.
        * @return a DigitalObject containing the zip file created from "folder" and a list of the files inside the zip as "fragments".
    */
    private static DigitalObject createZipTypeDigitalObjectFromFolder(File folder, String destZipName,
            boolean createByReference, boolean withChecksum, boolean compress) {
        String zipName = null;
        if (destZipName == null) {
            zipName = folder.getName() + ".zip";
        } else {
            if (destZipName.contains(".")) {
                String tmpName = destZipName.substring(0, destZipName.lastIndexOf(".")) + ".zip";
                zipName = tmpName;
            } else {
                zipName = destZipName + ".zip";
            }
        }
        File zip_tmp = new File(utils_tmp, randomizeFileName("zip_from_folder_tmp"));
        try {
            FileUtils.forceMkdir(zip_tmp);
        } catch (IOException e) {
            e.printStackTrace();
        }

        if (withChecksum) {
            ZipResult zipResult = ZipUtils.createZipAndCheck(folder, zip_tmp, zipName, compress);

            if (createByReference) {
                DigitalObject digOb = null;
                digOb = new DigitalObject.Builder(Content.byReference(getUrlFromFile(zipResult.getZipFile()))
                        .withChecksum(zipResult.getChecksum())).title(zipName)
                                .format(format.createExtensionUri("zip"))
                                .fragments(ZipUtils.getAllFragments(zipResult.getZipFile())).build();
                return digOb;
            } else {
                DigitalObject digOb = new DigitalObject.Builder(
                        Content.byReference(zipResult.getZipFile()).withChecksum(zipResult.getChecksum()))
                                .title(zipName).format(format.createExtensionUri("zip"))
                                .fragments(ZipUtils.getAllFragments(zipResult.getZipFile())).build();
                return digOb;
            }
        } else {
            File result = ZipUtils.createZip(folder, zip_tmp, zipName, compress);

            if (createByReference) {
                DigitalObject digOb = null;
                digOb = new DigitalObject.Builder(Content.byReference(getUrlFromFile(result))).title(zipName)
                        .format(format.createExtensionUri("zip")).fragments(ZipUtils.getAllFragments(result))
                        .build();
                return digOb;
            } else {
                DigitalObject digOb = new DigitalObject.Builder(Content.byReference(result)).title(zipName)
                        .format(format.createExtensionUri("zip")).fragments(ZipUtils.getAllFragments(result))
                        .build();
                return digOb;
            }
        }
    }

    /**
     * Generates a ZIP-type DigitalObject from a given zip file, containing the zip file itself at the top-level
     * DigitalObject and the files contained in this zip as "contained" DigitalObjects.
     * 
     * @param zipFile the zip file to create a DigitalObject with
     * @param createByReference a flag to set whether you want to create the DigObs by Reference or as stream...
     * @param withChecksum create DigOb with checksum or not?
     * @return a DigitalObject containing the zip file and a list of the contained files in this zip as "fragments".
     */
    private static DigitalObject createZipTypeDigitalObjectFromZip(File zipFile, boolean createByReference,
            boolean withChecksum) {
        DigitalObject digOb = null;
        if (withChecksum) {
            Checksum checksum = null;
            try {
                checksum = new Checksum("MD5", Arrays.toString(Checksums.md5(zipFile)));
            } catch (IOException e) {
                e.printStackTrace();
            }
            if (createByReference) {
                digOb = new DigitalObject.Builder(
                        Content.byReference(getUrlFromFile(zipFile)).withChecksum(checksum))
                                .title(zipFile.getName()).format(format.createExtensionUri("zip"))
                                // lists all entries in this zip file and includes them as "fragments"
                                .fragments(ZipUtils.getAllFragments(zipFile)).build();
            } else {
                digOb = new DigitalObject.Builder(Content.byReference(zipFile).withChecksum(checksum))
                        .title(zipFile.getName()).format(format.createExtensionUri("zip"))
                        // lists all entries in this zip file and includes them as "fragments"
                        .fragments(ZipUtils.getAllFragments(zipFile)).build();
            }
        } else {
            if (createByReference) {
                digOb = new DigitalObject.Builder(Content.byReference(getUrlFromFile(zipFile)))
                        .title(zipFile.getName()).format(format.createExtensionUri("zip"))
                        // lists all entries in this zip file and includes them as "fragments"
                        .fragments(ZipUtils.getAllFragments(zipFile)).build();
            } else {
                digOb = new DigitalObject.Builder(Content.byReference(zipFile)).title(zipFile.getName())
                        .format(format.createExtensionUri("zip"))
                        // lists all entries in this zip file and includes them as "fragments"
                        .fragments(ZipUtils.getAllFragments(zipFile)).build();
            }
        }
        return digOb;
    }

    private static File getZipAsFile(DigitalObject digOb) {
        String folderName = randomizeFileName(getFolderNameFromDigObject(digOb));

        File tmpFolder = new File(utils_tmp, folderName);
        File zip = null;
        try {
            FileUtils.forceMkdir(tmpFolder);

            zip = new File(tmpFolder, getFileNameFromDigObject(digOb, null));

            FileOutputStream out = new FileOutputStream(zip);
            IOUtils.copyLarge(digOb.getContent().getInputStream(), out);
            out.close();
        } catch (FileNotFoundException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        }

        return zip;
    }

    static String randomizeFileName(String name) {
        Random random = new Random();
        String prefix = null;
        String postfix = null;
        String randomName = null;
        if (name == null) {
            name = "";
        }
        if (name.contains(".")) {
            prefix = name.substring(0, name.lastIndexOf(".")) + "_";
            postfix = name.substring(name.lastIndexOf("."));
            randomName = prefix + random.nextInt(Integer.MAX_VALUE) + postfix;
        } else {
            randomName = name + "_" + random.nextInt(Integer.MAX_VALUE);
        }
        return randomName;
    }

    /**
     * Creates a ZIP-type DigitalObject from a given file, by reference or as stream
     * @param file the file to create the DigitalObject from
     * @param createByReference create by reference (true) or as stream (false)
     * @return
     */
    private static DigitalObject createDigitalObject(File file, boolean createByReference) {
        DigitalObject result = null;
        if (file.isDirectory()) {
            result = createZipTypeDigitalObjectFromFolder(file, FilenameUtils.getBaseName(file.getName()),
                    createByReference, true, true);
            return result;
        } else if (ZipUtils.isZipFile(file)) {
            result = createZipTypeDigitalObjectFromZip(file, createByReference, false);
            return result;
        } else {
            if (createByReference) {
                result = new DigitalObject.Builder(Content.byReference(getUrlFromFile(file))).title(file.getName())
                        .build();
            } else {
                result = new DigitalObject.Builder(Content.byReference(file)).title(file.getName()).build();
            }
        }
        return result;
    }

    /**
     * Convenience method that creates a URL from a file in a proper (i.e. not deprecated) way, using the toURI().toURL() way. 
     * Hiding the Exception, so you don't have to put it in a try-catch block.
     * @param file
     * @return The URL for the given file
     */
    private static URL getUrlFromFile(File file) {
        try {
            return file.toURI().toURL();
        } catch (MalformedURLException e) {
            e.printStackTrace();
        }
        return null;
    }

    /**
     * @param digitalObject
     *        The digital object to be updated
     * @param newEvent
     *        The event to add to the digital object
     * @return changed digital object with new event
     */
    public static DigitalObject addEvent(DigitalObject digitalObject, Event newEvent) {
        DigitalObject res = null;

        if (digitalObject != null && newEvent != null) {
            DigitalObject.Builder b = new DigitalObject.Builder(digitalObject.getContent());
            if (digitalObject.getTitle() != null)
                b.title(digitalObject.getTitle());
            if (digitalObject.getPermanentUri() != null)
                b.permanentUri(digitalObject.getPermanentUri());
            if (digitalObject.getFormat() != null)
                b.format(digitalObject.getFormat());
            if (digitalObject.getManifestationOf() != null)
                b.manifestationOf(digitalObject.getManifestationOf());
            if (digitalObject.getMetadata() != null)
                b.metadata((Metadata[]) digitalObject.getMetadata().toArray(new Metadata[0]));
            if (digitalObject.getEvents() != null) {
                List<Event> eventList = digitalObject.getEvents();
                eventList.add(newEvent);
                b.events((Event[]) eventList.toArray(new Event[0]));
            }
            res = b.build();
        }
        return res;
    }

    /**
     * This method returns event by summary
     * 
     * in the targetObj
     * @param initObj
     *        The initial digital object
     * @param summary
     *        Event property we are looking for
     * @return res
     *         The found event for particular summary
     */
    public static Event getEventBySummary(DigitalObject initObj, String summary) {
        Event res = null;

        if (summary != null && initObj != null) {
            // search for the right event 
            for (Event event : initObj.getEvents()) {
                if (event != null) {
                    if (event.getSummary().equals(summary)) {
                        res = event;
                    }
                }
            }
        }

        return res;
    }

    /**
     * This method evaluates if particular digital object contains an ingest event
     * 
     * @param obj
     *        The digital object
     * @param summary
     *        The summary of the event
     * @return res
     *        Returns true if digital object contains ingest event otherwise false
     */
    public static boolean hasEvent(DigitalObject obj, String summary) {
        boolean res = false;

        if (obj != null && summary != null) {
            // search for the right event 
            for (Event event : obj.getEvents()) {
                if (event != null) {
                    if (event.getSummary().equals(summary)) {
                        res = true;
                    }
                }
            }
        }

        return res;
    }
}