edu.unc.lib.dl.ingest.sip.METSPackageFileValidator.java Source code

Java tutorial

Introduction

Here is the source code for edu.unc.lib.dl.ingest.sip.METSPackageFileValidator.java

Source

/**
 * Copyright 2008 The University of North Carolina at Chapel Hill
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *         http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package edu.unc.lib.dl.ingest.sip;

import java.io.File;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.ArrayList;
import java.util.List;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.jdom.Document;
import org.jdom.Element;
import org.jdom.JDOMException;
import org.jdom.Namespace;
import org.jdom.xpath.XPath;

import edu.unc.lib.dl.ingest.IngestException;
import edu.unc.lib.dl.util.Checksum;
import edu.unc.lib.dl.xml.JDOMNamespaceUtil;

/**
 * This filter checks the integrity of files for ingest. It will throw an IngestFilterException under these conditions:
 * - a file referenced in FOXML is not resolvable in a supported way - a supplied checksum doesn't match one locally
 * computed - there are extra files in the SIP
 *
 * @author count0
 */
public class METSPackageFileValidator {
    private static XPath fileFinderXpath;
    private static XPath allFilesXpath;
    private static final Namespace _METSNamespace = Namespace.getNamespace("m", JDOMNamespaceUtil.METS_NS.getURI());
    private static final String fileFinderXpathStr = "//m:file[m:FLocat/@xlink:href = $locator]";
    private static final String allFilesXpathStr = "//m:file";
    private static final Log log = LogFactory.getLog(METSPackageFileValidator.class);
    static {
        try {
            fileFinderXpath = XPath.newInstance(fileFinderXpathStr);
            fileFinderXpath.addNamespace(_METSNamespace);
            allFilesXpath = XPath.newInstance(allFilesXpathStr);
            allFilesXpath.addNamespace(_METSNamespace);
        } catch (JDOMException e) {
            throw new java.lang.ExceptionInInitializerError(e);
        }
    }

    /**
     * Checks that there are as many files packaged as there are non-staged file references. Computes and compares the
     * MD5 digest of packaged files that have a checksum in METS. Checks access to all files referenced in staging
     * locations.
     *
     * @param mets
     * @param metsPack
     * @param aip
     * @throws IngestException
     */
    @SuppressWarnings("unchecked")
    public void validateFiles(Document mets, METSPackageSIP metsPack) throws IngestException {
        StringBuffer errors = new StringBuffer();
        List<File> manifestFiles = new ArrayList<File>();
        List<String> missingFiles = new ArrayList<String>();
        List<String> badChecksumFiles = new ArrayList<String>();

        // find missing or corrupt files listed in manifest
        try {
            for (Element fileEl : (List<Element>) allFilesXpath.selectNodes(mets)) {
                String href = null;
                try {
                    href = fileEl.getChild("FLocat", JDOMNamespaceUtil.METS_NS).getAttributeValue("href",
                            JDOMNamespaceUtil.XLINK_NS);
                    URI uri = new URI(href);
                    if (uri.getScheme() != null && !uri.getScheme().contains("file")) {
                        continue;
                    }
                } catch (URISyntaxException e) {
                    errors.append("Cannot parse file location: " + e.getLocalizedMessage() + " (" + href + ")");
                    missingFiles.add(href);
                    continue;
                } catch (NullPointerException e) {
                    errors.append("A file location is missing for file ID: " + fileEl.getAttributeValue("ID"));
                    continue;
                }
                File file = null;
                // locate the file and check that it exists
                try {
                    log.debug("Looking in SIP");
                    file = metsPack.getFileForLocator(href);
                    file.equals(file);
                    manifestFiles.add(file);
                    log.debug("FILE IS IN METSPackage: " + file.getPath());
                    if (file == null || !file.exists()) {
                        missingFiles.add(href);
                        continue;
                    }
                } catch (IOException e) {
                    log.debug(e);
                    missingFiles.add(href);
                    errors.append(e.getMessage());
                }

                String checksum = fileEl.getAttributeValue("CHECKSUM");
                if (checksum != null) {
                    log.debug("found a checksum in METS");
                    Checksum checker = new Checksum();
                    try {
                        String sum = checker.getChecksum(file);
                        if (!sum.equals(checksum.toLowerCase())) {
                            log.debug("Checksum failed for file: " + href + " (METS says " + checksum
                                    + ", but we got " + sum + ")");
                            badChecksumFiles.add(href);
                        }
                        log.debug("METS manifest checksum was verified for file: " + href);
                    } catch (IOException e) {
                        throw new IngestException("Checksum failed to find file: " + href);
                    }
                }
            }
        } catch (JDOMException e1) {
            throw new Error("Unexpected JDOM Exception", e1);
        }

        // TODO: account for local (not inline xmlData) MODS files
        // see if there are extra files in the SIP
        List<String> extraFiles = new ArrayList<String>();

        if (metsPack.getSIPDataDir() != null) {
            int zipPathLength = 0;
            try {
                zipPathLength = metsPack.getSIPDataDir().getCanonicalPath().length();

                for (File received : metsPack.getDataFiles()) {
                    if (!manifestFiles.contains(received) && received.compareTo(metsPack.getMetsFile()) != 0) {
                        extraFiles.add("file://" + received.getCanonicalPath().substring(zipPathLength));
                    }
                }
            } catch (IOException e) {
                throw new Error("Unexpected IO Exception trying to get path of a known file.", e);
            }
        }
        if (missingFiles.size() > 0 || badChecksumFiles.size() > 0 || extraFiles.size() > 0) {
            // We have an error here...
            String msg = "The files submitted do not match those listed in the METS manifest.";
            FilesDoNotMatchManifestException e = new FilesDoNotMatchManifestException(msg);
            e.setBadChecksumFiles(badChecksumFiles);
            e.setExtraFiles(extraFiles);
            e.setMissingFiles(missingFiles);
            throw e;
        }
    }
}