org.mustangproject.ZUGFeRD.ZUGFeRDImporter.java Source code

Introduction

Here is the source code for org.mustangproject.ZUGFeRD.ZUGFeRDImporter.java
Source

/** **********************************************************************
 *
 * Copyright 2018 Jochen Staerk
 *
 * Use is subject to license terms.
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
 * use this file except in compliance with the License. You may obtain a copy
 * of the License at http://www.apache.org/licenses/LICENSE-2.0.
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 *********************************************************************** */
package org.mustangproject.ZUGFeRD;
/**
 * Mustangproject's ZUGFeRD implementation
 * ZUGFeRD importer
 * Licensed under the APLv2
 * @date 2014-07-07
 * @version 1.1.0
 * @author jstaerk
 * */

import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDDocumentNameDictionary;
import org.apache.pdfbox.pdmodel.PDEmbeddedFilesNameTreeNode;
import org.apache.pdfbox.pdmodel.common.PDNameTreeNode;
import org.apache.pdfbox.pdmodel.common.filespecification.PDComplexFileSpecification;
import org.apache.pdfbox.pdmodel.common.filespecification.PDEmbeddedFile;
import org.mustangproject.toecount.Toecount;
import org.w3c.dom.Document;
import org.xml.sax.SAXException;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.*;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathExpressionException;
import javax.xml.xpath.XPathFactory;
import java.io.*;
import java.util.*;
import java.util.logging.Level;
import java.util.logging.Logger;

public class ZUGFeRDImporter {

    /**
     * @var if metadata has been found
     */
    private boolean containsMeta = false;
    /**
     * @var the reference (i.e. invoice number) of the sender
     */
    private HashMap<String, byte[]> additionalXMLs = new HashMap<>();
    /**
     * Raw XML form of the extracted data - may be directly obtained.
     */
    private byte[] rawXML = null;
    private String xmpString = null; // XMP metadata

    public ZUGFeRDImporter(String pdfFilename) {
        try {
            BufferedInputStream bis = new BufferedInputStream(new FileInputStream(pdfFilename));
            extractLowLevel(bis);
            bis.close();
        } catch (IOException e) {
            Logger.getLogger(ZUGFeRDImporter.class.getName()).log(Level.SEVERE, null, e);
            throw new ZUGFeRDExportException(e);
        }
    }

    public ZUGFeRDImporter(InputStream pdfStream) {
        try {
            extractLowLevel(pdfStream);
        } catch (IOException e) {
            Logger.getLogger(ZUGFeRDImporter.class.getName()).log(Level.SEVERE, null, e);
            throw new ZUGFeRDExportException(e);
        }
    }

    /**
     * Extracts a ZUGFeRD invoice from a PDF document represented by an input
     * stream. Errors are reported via exception handling.
     *
     * @param pdfStream a inputstream of a pdf file
     */
    private void extractLowLevel(InputStream pdfStream) throws IOException {
        try (PDDocument doc = PDDocument.load(pdfStream)) {
            // PDDocumentInformation info = doc.getDocumentInformation();
            PDDocumentNameDictionary names = new PDDocumentNameDictionary(doc.getDocumentCatalog());
            //start
            InputStream XMP = doc.getDocumentCatalog().getMetadata().exportXMPMetadata();

            xmpString = convertStreamToString(XMP);

            PDEmbeddedFilesNameTreeNode etn = names.getEmbeddedFiles();
            if (etn == null) {
                return;
            }

            Map<String, PDComplexFileSpecification> efMap = etn.getNames();
            // String filePath = "/tmp/";

            if (efMap != null) {
                extractFiles(efMap); // see
                // https://memorynotfound.com/apache-pdfbox-extract-embedded-file-pdf-document/
            } else {

                List<PDNameTreeNode<PDComplexFileSpecification>> kids = etn.getKids();
                for (PDNameTreeNode<PDComplexFileSpecification> node : kids) {
                    Map<String, PDComplexFileSpecification> namesL = node.getNames();
                    extractFiles(namesL);
                }
            }
        }
    }

    private void extractFiles(Map<String, PDComplexFileSpecification> names) throws IOException {
        for (String filename : names.keySet()) {
            /**
             * currently (in the release candidate of version 1) only one attached file with
             * the name ZUGFeRD-invoice.xml is allowed
             */
            if ((filename.equals("ZUGFeRD-invoice.xml") || (filename.equals("zugferd-invoice.xml")) //$NON-NLS-1$
                    || filename.equals("factur-x.xml"))) {
                containsMeta = true;

                PDComplexFileSpecification fileSpec = names.get(filename);
                PDEmbeddedFile embeddedFile = fileSpec.getEmbeddedFile();
                // String embeddedFilename = filePath + filename;
                // File file = new File(filePath + filename);
                // System.out.println("Writing " + embeddedFilename);
                // ByteArrayOutputStream fileBytes=new
                // ByteArrayOutputStream();
                // FileOutputStream fos = new FileOutputStream(file);

                rawXML = embeddedFile.toByteArray();
                setMeta(new String(rawXML));

                // fos.write(embeddedFile.getByteArray());
                // fos.close();
            }
            if (filename.startsWith("additional_data")) {

                PDComplexFileSpecification fileSpec = names.get(filename);
                PDEmbeddedFile embeddedFile = fileSpec.getEmbeddedFile();
                additionalXMLs.put(filename, embeddedFile.toByteArray());

            }
        }
    }

    private void prettyPrint(Document document) throws TransformerException {
        TransformerFactory tf = TransformerFactory.newInstance();
        Transformer transformer = null;
        try {
            transformer = tf.newTransformer();
        } catch (TransformerConfigurationException e) {
            Logger.getLogger(ZUGFeRDImporter.class.getName()).log(Level.SEVERE, null, e);
        }
        transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes");
        StringWriter writer = new StringWriter();
        transformer.transform(new DOMSource(document), new StreamResult(writer));
        String output = writer.getBuffer().toString();//.replaceAll("\n|\r", "");
        System.err.println(output);
    }

    private Document getDocument()
            throws ParserConfigurationException, IOException, SAXException, TransformerException {
        DocumentBuilderFactory xmlFact = DocumentBuilderFactory.newInstance();
        xmlFact.setNamespaceAware(false);
        DocumentBuilder builder = xmlFact.newDocumentBuilder();
        Document doc = builder.parse(new ByteArrayInputStream(rawXML));
        //prettyPrint(doc);
        return doc;
    }

    private String extractString(String xpathStr) {
        if (!containsMeta) {
            throw new ZUGFeRDExportException("No suitable data/ZUGFeRD file could be found.");
        }
        String result;
        try {
            Document document = getDocument();
            XPathFactory xpathFact = XPathFactory.newInstance();
            XPath xpath = xpathFact.newXPath();
            result = xpath.evaluate(xpathStr, document);
        } catch (ParserConfigurationException e) {
            Logger.getLogger(ZUGFeRDImporter.class.getName()).log(Level.SEVERE, null, e);
            throw new ZUGFeRDExportException(e);
        } catch (IOException | SAXException | TransformerException | XPathExpressionException e) {
            Logger.getLogger(ZUGFeRDImporter.class.getName()).log(Level.SEVERE, null, e);
            throw new ZUGFeRDExportException(e);
        }
        return result;
    }

    /**
     * @return the reference (purpose) the sender specified for this invoice
     */
    public String getForeignReference() {
        String result = extractString("//ApplicableHeaderTradeSettlement/PaymentReference");
        if (result == null || result.isEmpty())
            result = extractString("//ApplicableSupplyChainTradeSettlement/PaymentReference");
        return result;
    }

    /**
     * @return the document code
     */
    public String getDocumentCode() {
        return extractString("//HeaderExchangedDocument/TypeCode");
    }

    /**
     * @return the referred document
     */
    public String getReference() {
        return extractString("//ApplicableHeaderTradeAgreement/BuyerReference");
    }

    /**
     * @return the sender's bank's BLZ code
     */
    public String getBLZ() {
        return extractString("//PayeeSpecifiedCreditorFinancialInstitution/GermanBankleitzahlID");
    }

    /**
     * @return the sender's bank's BIC code
     */
    public String getBIC() {
        return extractString("//PayeeSpecifiedCreditorFinancialInstitution/BICID");
    }

    /**
     * @return the sender's bankname
     */
    public String getBankName() {
        return extractString(
                "/CrossIndustryInvoice/SupplyChainTradeTransaction/ApplicableHeaderTradeSettlement/SpecifiedTradeSettlementPaymentMeans/PayeeSpecifiedCreditorFinancialInstitution/Name");
    }

    public String getIBAN() {
        return extractString("//PayeePartyCreditorFinancialAccount/IBANID");
    }

    public String getKTO() {
        return extractString("//PayeePartyCreditorFinancialAccount/ProprietaryID");
    }

    public String getHolder() {
        return extractString("//SellerTradeParty/Name");
    }

    /**
     * @return the total payable amount
     */
    public String getAmount() {
        String result = extractString("//SpecifiedTradeSettlementHeaderMonetarySummation/DuePayableAmount");
        if (result == null || result.isEmpty())
            result = extractString("//SpecifiedTradeSettlementMonetarySummation/GrandTotalAmount");
        return result;
    }

    /**
     * @return when the payment is due
     */
    public String getDueDate() {
        return extractString("//SpecifiedTradePaymentTerms/DueDateDateTime/DateTimeString");
    }

    public HashMap<String, byte[]> getAdditionalData() {
        return additionalXMLs;
    }

    /**
     * get xmp metadata of the PDF, null if not available
     *
     * @return string
     */
    public String getXMP() {
        return xmpString;
    }

    /**
     * @return if export found parseable ZUGFeRD data
     */
    public boolean containsMeta() {
        return containsMeta;
    }

    /**
     * @param meta raw XML to be set
     */
    public void setMeta(String meta) {
        this.rawXML = meta.getBytes();
    }

    /**
     * @return raw XML of the invoice
     */
    public String getMeta() {
        if (rawXML == null) {
            return null;
        }

        return new String(rawXML);
    }

    /**
     * @return return UTF8 XML (without BOM) of the invoice
     */
    public String getUTF8() {
        if (rawXML == null) {
            return null;
        }
        if (rawXML.length < 3) {
            return new String(rawXML);
        }

        byte[] bomlessData;

        if ((rawXML[0] == (byte) 0xEF) && (rawXML[1] == (byte) 0xBB) && (rawXML[2] == (byte) 0xBF)) {
            // I don't like BOMs, lets remove it
            bomlessData = new byte[rawXML.length - 3];
            System.arraycopy(rawXML, 3, bomlessData, 0, rawXML.length - 3);
        } else {
            bomlessData = rawXML;
        }

        return new String(bomlessData);
    }

    /**
     * Returns the raw XML data as extracted from the ZUGFeRD PDF file.
     */
    public byte[] getRawXML() {
        return rawXML;
    }

    /**
     * will return true if the metadata (just extract-ed or set with setMeta)
     * contains ZUGFeRD XML
     *
     * @return true if the invoice contains ZUGFeRD XML
     */
    public boolean canParse() {

        // SpecifiedExchangedDocumentContext is in the schema, so a relatively good
        // indication if zugferd is present - better than just invoice
        String meta = getMeta();
        return (meta != null) && (meta.length() > 0) && ((meta.contains("SpecifiedExchangedDocumentContext") //$NON-NLS-1$
                /* ZF1 */ || meta.contains("ExchangedDocumentContext") /* ZF2 */));
    }

    static String convertStreamToString(java.io.InputStream is) {
        // source https://stackoverflow.com/questions/309424/how-do-i-read-convert-an-inputstream-into-a-string-in-java referring to
        // https://community.oracle.com/blogs/pat/2004/10/23/stupid-scanner-tricks
        Scanner s = new Scanner(is, "UTF-8").useDelimiter("\\A");
        return s.hasNext() ? s.next() : "";
    }

}