Java XML String Transform extractAndNormalizeEmbedPictures(String xmlFile, String odtFile, String parentDir, String imgBaseDir)

Description

Extract and normalize picture names, i.e.

License

Open Source License

Parameter

Parameter	Description
xmlFile	The path to the XML file (merged XML files inside the ODF file).
odtFile	The path to the ODF file.
parentDir	The parent directory (of the new directory for the images (??)).
imgBaseDir	The new directory for the images.

Exception

Parameter	Description

Return

LinkedHashMap where keys are old file names and values are new file names.

Declaration

public static LinkedHashMap<String, String> extractAndNormalizeEmbedPictures(String xmlFile, String odtFile,
        String parentDir, String imgBaseDir) throws SAXException, IOException, ParserConfigurationException,
        TransformerConfigurationException, TransformerException

Method Source Code

//package com.java2s;
/**/*from   w  w  w  .  j  a  v  a 2s  . co m*/
 *  odt2daisy - OpenDocument to DAISY XML/Audio
 *
 *  (c) Copyright 2008 - 2012 by Vincent Spiewak, All Rights Reserved.
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Lesser Public License as published by
 *  the Free Software Foundation; either version 3 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU Lesser General Public License for more details.
 *
 *  You should have received a copy of the GNU Lesser General Public License along
 *  with this program; if not, write to the Free Software Foundation, Inc.,
 *  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 */

import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.Enumeration;
import java.util.LinkedHashMap;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.zip.ZipEntry;
import java.util.zip.ZipFile;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.TransformerConfigurationException;
import javax.xml.transform.TransformerException;
import org.w3c.dom.Element;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.EntityResolver;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;

public class Main {
    public static final String PICTURE_FOLDER = "Pictures/";
    private static final Logger logger = Logger.getLogger("com.versusoft.packages.jodl.odtutils");

    /**
     * Extract and normalize picture names, i.e.
     * converts <code>xlink:href</code> values like
     * 'Pictures/100000000000034300000273CAF76237.png'
     * into values like 'images/0.png'.
     *
     * @param xmlFile The path to the XML file (merged XML files inside the ODF file).
     * @param odtFile The path to the ODF file.
     * @param parentDir The parent directory (of the new directory for the images (??)).
     * @param imgBaseDir The new directory for the images.
     * @return LinkedHashMap where keys are old file names and values are new file names.
     * @throws org.xml.sax.SAXException If an input source for the XML content cannot be created.
     * @throws java.io.IOException If the String representing the image directory is actually a file instead of a directory, or if an input source for the XML content cannot be created.
     * @throws javax.xml.parsers.ParserConfigurationException If a DocumentBuilder cannot be created which satisfies the configuration requested, i.e. a validating parser cannot be created.
     * @throws javax.xml.transform.TransformerConfigurationException If a a serious configuration error occured.
     * @throws javax.xml.transform.TransformerException If an exceptional condition occured during the transformation process.
     */
    public static LinkedHashMap<String, String> extractAndNormalizeEmbedPictures(String xmlFile, String odtFile,
            String parentDir, String imgBaseDir) throws SAXException, IOException, ParserConfigurationException,
            TransformerConfigurationException, TransformerException {

        logger.fine("entering");

        ZipFile zip;
        File imgDir;
        ArrayList<String> pics;
        DocumentBuilderFactory docFactory;
        DocumentBuilder docBuilder;
        Document contentDoc;
        LinkedHashMap<String, String> oldAndNewImgNames = new LinkedHashMap<String, String>();

        pics = getPictures(odtFile);
        zip = new ZipFile(odtFile);

        // @todo clean up / irrelevant after making method return oldAndNewImgNames:
        if (pics.size() < 1) {
            return oldAndNewImgNames;
        }

        imgDir = new File(parentDir + imgBaseDir);

        logger.fine("imgBaseDir: " + imgBaseDir + "\n");
        logger.fine("parentDir: " + parentDir + "\n");

        if (imgDir.isFile()) {
            throw new IOException("Wrong argument: parentDir is a file");
        }

        if (!imgDir.exists()) {
            imgDir.mkdirs();
        }

        docFactory = DocumentBuilderFactory.newInstance();
        docFactory.setValidating(false);

        docBuilder = docFactory.newDocumentBuilder();
        docBuilder.setEntityResolver(new EntityResolver() {

            public InputSource resolveEntity(java.lang.String publicId, java.lang.String systemId)
                    throws SAXException, java.io.IOException {

                return new InputSource(
                        new ByteArrayInputStream("<?xml version='1.0' encoding='UTF-8'?>".getBytes()));

            }
        });

        contentDoc = docBuilder.parse(xmlFile);

        Element root = contentDoc.getDocumentElement();
        NodeList nodelist = root.getElementsByTagName("draw:image");

        // for every draw:image element in the merged XML:
        for (int i = 0; i < nodelist.getLength(); i++) {

            Node objectNode = nodelist.item(i);
            Node hrefNode = objectNode.getAttributes().getNamedItem("xlink:href");

            String imagePath = hrefNode.getTextContent();
            logger.fine("Image path: " + imagePath);

            // if the xlink:href value can be found in the list of images from the ODF file:
            if (pics.contains(imagePath)) {

                int id = pics.indexOf(imagePath);
                // create file extension based on original extension turned to lower case:
                String ext = imagePath.substring(imagePath.lastIndexOf(".")).toLowerCase();

                //String newImageName = id + ext;
                //String newImagePath = parentDir + imgBaseDir + newImageName;

                //if (ext.endsWith("gif") || ext.endsWith("bmp") || ext.endsWith("wbmp")) {
                //    hrefNode.setTextContent(imgBaseDir + id + ".png");
                //    logger.fine("extract image\n");
                //    copyInputStream(zip.getInputStream(zip.getEntry(imagePath)), new FileOutputStream(parentDir + imgBaseDir + id + ext));
                //    logger.fine("convert to png\n");
                //    toPNG(parentDir + imgBaseDir + id + ext, parentDir + imgBaseDir + id + ".png");
                //    logger.fine("delete old image\n");
                //    new File(parentDir + imgBaseDir + id + ext).delete();
                //} else {
                // Set xlink:href value to new image directory + image index + image extension (lower case):
                String newXlinkHref = imgBaseDir + id + ext;
                hrefNode.setTextContent(newXlinkHref);
                logger.fine("extracted image: " + newXlinkHref + "\n");
                // Store mapping between new and old image names:
                oldAndNewImgNames.put(imagePath, newXlinkHref);
                copyInputStream(zip.getInputStream(zip.getEntry(imagePath)),
                        new FileOutputStream(parentDir + imgBaseDir + id + ext));
                //}

                //@todo Remove logger output after testing
                logger.fine("Image mapping = " + oldAndNewImgNames.toString());
                logger.fine("done\n");
            }
        }

        saveDOM(contentDoc, xmlFile);
        logger.fine("done");
        return oldAndNewImgNames;
    }

    /**
     * Return an ArrayList of image(s) path(s) included in the ODF file,
     * i.e. a ArrayList of Strings like 'Pictures/100000000000034300000273CAF76237.png'.
     *
     * @param odtFile The path to the ODF file.
     * @return ArrayList of image(s) path(s)
     * @throws java.io.IOException If an exceptional condition occurred while creating a ZipFile based on the path to the ODF file.
     */
    public static ArrayList<String> getPictures(String odtFile) throws IOException {
        ArrayList<String> ret = new ArrayList<String>();
        ZipFile zf = null;
        Enumeration<? extends ZipEntry> entries = null;

        zf = new ZipFile(odtFile);
        entries = zf.entries();

        while (entries.hasMoreElements()) {

            ZipEntry entry = (ZipEntry) entries.nextElement();

            if (entry.getName().startsWith(PICTURE_FOLDER)) {
                if (!entry.isDirectory()) {
                    ret.add(entry.getName());
                }

            }

        }

        return ret;

    }

    private static final void copyInputStream(InputStream in, OutputStream out) throws IOException {
        byte[] buffer = new byte[1024];
        int len;

        while ((len = in.read(buffer)) >= 0) {
            out.write(buffer, 0, len);
        }

        in.close();
        out.close();
    }

    /**
     * Saves a document object model (DOM) as a file with the chosen file name.
     * 
     * @param doc DOM representation of XML instance.
     * @param filename The file to which the DOM should be saved. The String must conform to the URI syntax.
     * @return true if the file could be saved; false if the file could not be saved.
     */
    public static boolean saveDOM(Document doc, String filename) {
        boolean save = false;
        try {

            Transformer transformer = TransformerFactory.newInstance().newTransformer();
            transformer.setOutputProperty(OutputKeys.METHOD, "xml");
            transformer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
            transformer.setOutputProperty(OutputKeys.INDENT, "yes");
            transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "3");

            StreamResult result = new StreamResult(filename);
            DOMSource source = new DOMSource(doc);
            transformer.transform(source, result);

            save = true;
        } catch (TransformerConfigurationException ex) {
            logger.log(Level.SEVERE, null, ex);
        } catch (TransformerException ex) {
            logger.log(Level.SEVERE, null, ex);
        } finally {

            return save;

        }
    }
}

Java XML String Transform extractAndNormalizeEmbedPictures(String xmlFile, String odtFile, String parentDir, String imgBaseDir)

Description

License

Parameter

Exception

Return

Declaration

Method Source Code

Related