de.uzk.hki.da.cb.UnpackNoBagitAction.java Source code

Java tutorial

Introduction

Here is the source code for de.uzk.hki.da.cb.UnpackNoBagitAction.java

Source

/*
  DA-NRW Software Suite | ContentBroker
  Copyright (C) 2013 Historisch-Kulturwissenschaftliche Informationsverarbeitung
  Universitt zu Kln
    
  This program is free software: you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
  the Free Software Foundation, either version 3 of the License, or
  (at your option) any later version.
    
  This program is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  GNU General Public License for more details.
    
  You should have received a copy of the GNU General Public License
  along with this program.  If not, see <http://www.gnu.org/licenses/>.
*/

package de.uzk.hki.da.cb;

import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.apache.commons.io.FileUtils;
import org.apache.commons.io.FilenameUtils;
import org.apache.commons.io.filefilter.TrueFileFilter;
import org.xml.sax.SAXException;

import de.uzk.hki.da.action.AbstractAction;
import de.uzk.hki.da.core.IngestGate;
import de.uzk.hki.da.core.PreconditionsNotMetException;
import de.uzk.hki.da.core.UserException;
import de.uzk.hki.da.core.UserException.UserExceptionId;
import de.uzk.hki.da.model.ObjectPremisXmlReader;
import de.uzk.hki.da.model.PremisXmlValidator;
import de.uzk.hki.da.util.ConfigurationException;
import de.uzk.hki.da.utils.C;
import de.uzk.hki.da.utils.FolderUtils;
import de.uzk.hki.da.utils.FriendlyFilesUtils;
import de.uzk.hki.da.utils.Path;

/**
 * If there is sufficient space on the WorkArea, fetches the container (named object.package.containername)
 * from the user's (object.contractor) IngestArea space and puts it to work. There the action unpacks the
 * contents and checks the SIP for consistency. Deletes the container after proving that it is valid so that
 * the original SIP remains. If the package has proven valid, then the original SIP on the IngestArea gets 
 * removed. 
 * 
 * Accepted container formats [.tar,.tar.gz,.tgz,.zip].
 * 
 * The package is expected to conform to our SIP-Specification.
 * @see abc <a href="https://github.com/da-nrw/DNSCore/blob/master/ContentBroker/src/main/markdown/specification_sip.de.md">
 * SIP-Spezifikation
 * </a>
 * 
 * @author Daniel M. de Oliveira
 * @author Sebastian Cuy
 * @author Thomas Kleinke
 * 
 */
public class UnpackNoBagitAction extends AbstractAction {

    private static final String SIP_SPEC_URL = "https://github.com/da-nrw/DNSCore/blob/master/ContentBroker/src/main/markdown/sip_specification.md";
    private static final String HELP_SUMMARY = "Make sure there exists always only one file with the same document name (which is the file path relative from the SIPs data path, excluding the file extension). "
            + "For help refer to the SIP-Specification page at " + SIP_SPEC_URL + ".";

    public UnpackNoBagitAction() {
        SUPPRESS_OBJECT_CONSISTENCY_CHECK = true;
    }

    private IngestGate ingestGateNoBagit;

    @Override
    public void checkConfiguration() {
        if (ingestGateNoBagit == null)
            throw new ConfigurationException("ingestGateNoBagit");
    }

    @Override
    public void checkPreconditions() {
        if (!sipContainerInIngestAreaNoBagit().exists()) {
            throw new PreconditionsNotMetException("Missing file: " + sipContainerInIngestAreaNoBagitPath());
        }
        if (wa.objectPath().toFile().exists()) {
            throw new PreconditionsNotMetException("Should not exist: " + wa.objectPath());
        }
    }

    @Override
    public boolean implementation() throws IOException {
        long size = 0L;
        if (sipContainerOnIngestAreaIsDir())
            size = FileUtils.sizeOfDirectory(sipContainerInIngestAreaNoBagit());
        else
            size = sipContainerInIngestAreaNoBagit().length();

        if (!ingestGateNoBagit.canHandle(size)) {
            logger.warn(
                    "ResourceMonitor prevents further processing of package due to space limitations. Setting job back to start state.");
            return false;
        }

        wa.ingestSIP(sipContainerInIngestAreaNoBagit());

        moveSipDir();

        throwUserExceptionIfDuplicatesExist();
        throwUserExceptionIfNotPremisConsistent();

        // Is the last step of action because it should only happen after validity has been proven. 
        logger.info("Removing SIP from IngestArea");
        if (!sipContainerOnIngestAreaIsDir()) {
            sipContainerInIngestAreaNoBagit().delete();
        } else
            FolderUtils.deleteDirectorySafe(sipContainerInIngestAreaNoBagit());
        return true;
    }

    private File sipContainerInIngestAreaNoBagit() {
        return sipContainerInIngestAreaNoBagitPath().toFile();
    }

    private boolean sipContainerOnIngestAreaIsDir() {
        return sipContainerInIngestAreaNoBagitPath().toFile().isDirectory();
    }

    private void moveSipDir() throws IOException {

        if (!wa.dataPath().toFile().mkdirs()) {
            throw new RuntimeException("couldn't create directory: " + wa.dataPath().toFile());
        }

        File[] files = wa.sipFile().listFiles();
        if (files.length == 1) {
            File[] folderFiles = files[0].listFiles();
            for (File f : folderFiles) {
                if (f.isFile()) {
                    try {
                        FileUtils.moveFileToDirectory(f, wa.dataPath().toFile(), false);
                    } catch (IOException e) {
                        throw new RuntimeException("couldn't move file " + f.getAbsolutePath() + " to folder "
                                + wa.dataPath().toFile(), e);
                    }
                }
                if (f.isDirectory()) {
                    try {
                        FileUtils.moveDirectoryToDirectory(f, wa.dataPath().toFile(), false);
                    } catch (IOException e) {
                        throw new RuntimeException("couldn't move folder " + f.getAbsolutePath() + " to folder "
                                + wa.dataPath().toFile(), e);
                    }
                }
            }

            try {
                FolderUtils.deleteDirectorySafe(files[0]);
            } catch (IOException e) {
                throw new RuntimeException("couldn't delete folder " + files[0].getAbsolutePath());
            }
        }

    }

    private Path sipContainerInIngestAreaNoBagitPath() {
        return Path.make(n.getIngestAreaNoBagitRootPath(), o.getContractor().getShort_name(),
                o.getLatestPackage().getContainerName());
    }

    @Override
    public void rollback() throws IOException {

        FolderUtils.deleteDirectorySafe(wa.objectPath().toFile());
        if (!sipContainerOnIngestAreaIsDir())
            wa.sipFile().delete();

        o.getLatestPackage().getFiles().clear();
        j.setRep_name("");
    }

    private void throwUserExceptionIfNotPremisConsistent() throws IOException {

        try {
            if (!PremisXmlValidator.validatePremisFile(Path.make(wa.dataPath(), C.PREMIS_XML).toFile()))
                throw new UserException(UserExceptionId.INVALID_SIP_PREMIS, "PREMIS Datei nicht valide.");
        } catch (FileNotFoundException e1) {
            throw new UserException(UserExceptionId.SIP_PREMIS_NOT_FOUND, "PREMIS Datei nicht gefunden.", e1);
        } catch (SAXException e) {
            logger.error(e.getMessage());
            throw new UserException(UserExceptionId.INVALID_SIP_PREMIS,
                    "PREMIS Datei nicht valide.: " + e.getMessage());
        }
        try {
            //just test: parse values and do xml to object mapping
            new ObjectPremisXmlReader().deserialize(Path.makeFile(wa.dataPath(), C.PREMIS_XML));
        } catch (Exception e) {
            throw new UserException(UserExceptionId.READ_SIP_PREMIS_ERROR,
                    "Konnte PREMIS Datei nicht erfolgreich einlesen.", e);
        }
    }

    /**
     * Searches for duplicate document names. Normally duplicates are bad.
     * <br>
     * However, duplicates can be ok, if there are only two files sharing a document name and
     * one of them is a sidecar file (which can be identified if it has one of the allowed sidecarExtensions).
     * 
     * @throws UserException if more there are files which share a document name.
     */
    private void throwUserExceptionIfDuplicatesExist() {

        // document name <-> list of the files sharing the same document name  
        Map<String, List<File>> duplicates = purgeUnicates(generateDocumentsToFilesMap());

        String errorMsg = "";
        int errs = 0;
        for (String duplicate : duplicates.keySet()) {

            boolean isOKWhenSidecarFilesAreSubtracted = false;
            for (File file : duplicates.get(duplicate)) {
                if (FriendlyFilesUtils.isFriendlyFile(file.getAbsolutePath(), o.getFriendlyFileExtensions())
                        && (duplicates.get(duplicate).size() - 1) == 1) {
                    isOKWhenSidecarFilesAreSubtracted = true;
                    break;
                }
            }
            if (!isOKWhenSidecarFilesAreSubtracted) {
                errorMsg += "Mehr als ein Dokument gefunden mit dem Namen \"";
                errorMsg += duplicate;
                errorMsg += "\".\n";
                errs++;
            }
        }

        if (errs != 0) {
            errorMsg += HELP_SUMMARY + " Gefundene Fehler: " + errs;
            throw new UserException(UserException.UserExceptionId.DUPLICATE_DOCUMENT_NAMES, errorMsg);
        }
    }

    /**
     * purges documentsToFiles and returns the reference
     * @param
     * @return the reference to the param  
     */
    private Map<String, List<File>> purgeUnicates(Map<String, List<File>> documentsToFiles) {

        List<String> unicates = new ArrayList<String>();

        for (String document : documentsToFiles.keySet())
            if (documentsToFiles.get(document).size() == 1)
                unicates.add(document);

        for (String unicate : unicates)
            documentsToFiles.remove(unicate);
        return documentsToFiles;
    }

    /**
     * @return
     */
    private Map<String, List<File>> generateDocumentsToFilesMap() {

        Map<String, List<File>> documentsToFiles = new HashMap<String, List<File>>();

        Collection<File> files = FileUtils.listFiles(wa.dataPath().toFile(), TrueFileFilter.INSTANCE,
                TrueFileFilter.INSTANCE);
        for (File file : files) {
            String document = file.getAbsolutePath().replace(wa.dataPath().toFile().getAbsolutePath(), "");
            document = document.substring(1);
            document = FilenameUtils.removeExtension(document);

            if (!documentsToFiles.keySet().contains(document)) {

                List<File> filesList = new ArrayList<File>();
                filesList.add(file);
                documentsToFiles.put(document, filesList);
            } else {
                documentsToFiles.get(document).add(file);
            }
        }
        return documentsToFiles;
    }

    boolean isStandardPackage(File packageContent) {

        boolean is = true;
        if (!new File(packageContent.getAbsolutePath() + "/data").exists())
            is = false;
        if (!new File(packageContent.getAbsolutePath() + "/bagit.txt").exists())
            is = false;
        if (!new File(packageContent.getAbsolutePath() + "/bag-info.txt").exists())
            is = false;

        return is;
    }

    public IngestGate getIngestGateNoBagit() {
        return ingestGateNoBagit;
    }

    public void setIngestGateNoBagit(IngestGate ingestGateNoBagit) {
        this.ingestGateNoBagit = ingestGateNoBagit;
    }

}