Utility class for xml/sax handling : SAX « XML « Java

Utility class for xml/sax handling

 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *   http://www.apache.org/licenses/LICENSE-2.0
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.

import java.io.*;
import java.util.*;

import javax.xml.transform.*;
import javax.xml.transform.sax.*;
import javax.xml.transform.stream.StreamResult;
import javax.xml.transform.stream.StreamSource;

import org.xml.sax.*;
import org.xml.sax.helpers.AttributesImpl;

 * Utility class for xml/sax handling.
 * It provides support for "older" sax implementations (like the default one shipped with JDK 1.4.2)
 * which have bugs in the namespace handling.
public class IOUtils {

    /** The transformer factory. */
    private static final SAXTransformerFactory FACTORY = (SAXTransformerFactory) TransformerFactory.newInstance();

    /** The URI for xml namespaces */
    private static final String XML_NAMESPACE_URI = "http://www.w3.org/XML/1998/namespace";

     * Parse a file and send the sax events to the content handler.
     * @param file
     * @param handler
     * @throws IOException
     * @throws TransformerException
    public static final void parse(File file, ContentHandler handler)
    throws IOException, TransformerException {
        final Transformer transformer = FACTORY.newTransformer();
        transformer.transform(new StreamSource(new FileReader(file)),
                new SAXResult(handler));

    public static ContentHandler getSerializer(File file)
    throws IOException, TransformerException {
        final FileWriter writer = new FileWriter(file);

        final TransformerHandler transformerHandler = FACTORY.newTransformerHandler();
        final Transformer transformer = transformerHandler.getTransformer();

        final Properties format = new Properties();
        format.put(OutputKeys.METHOD, "xml");
        format.put(OutputKeys.OMIT_XML_DECLARATION, "no");
        format.put(OutputKeys.ENCODING, "UTF-8");
        format.put(OutputKeys.INDENT, "yes");

        transformerHandler.setResult(new StreamResult(writer));

        try {
            if ( needsNamespacesAsAttributes(format) ) {
                return new NamespaceAsAttributes(transformerHandler);
        } catch (SAXException se) {
            throw new TransformerException("Unable to detect of namespace support for sax works properly.", se);
        return transformerHandler;

     * Checks if the used Trax implementation correctly handles namespaces set using
     * <code>startPrefixMapping()</code>, but wants them also as 'xmlns:' attributes.
     * <p>
     * The check consists in sending SAX events representing a minimal namespaced document
     * with namespaces defined only with calls to <code>startPrefixMapping</code> (no
     * xmlns:xxx attributes) and check if they are present in the resulting text.
    protected static boolean needsNamespacesAsAttributes(Properties format)
    throws TransformerException, SAXException {
        // Serialize a minimal document to check how namespaces are handled.
        final StringWriter writer = new StringWriter();

        final String uri = "namespaceuri";
        final String prefix = "nsp";
        final String check = "xmlns:" + prefix + "='" + uri + "'";

        final TransformerHandler handler = FACTORY.newTransformerHandler();

        handler.setResult(new StreamResult(writer));

        // Output a single element
        handler.startPrefixMapping(prefix, uri);
        handler.startElement(uri, "element", "element", new AttributesImpl());
        handler.endElement(uri, "element", "element");

        final String text = writer.toString();

        // Check if the namespace is there (replace " by ' to be sure of what we search in)
        boolean needsIt = (text.replace('"', '\'').indexOf(check) == -1);

        return needsIt;

     * A pipe that ensures that all namespace prefixes are also present as
     * 'xmlns:' attributes. This used to circumvent Xalan's serialization behaviour
     * which is to ignore namespaces if they're not present as 'xmlns:xxx' attributes.
    public static class NamespaceAsAttributes implements ContentHandler {

        /** The wrapped content handler. */
        private final ContentHandler contentHandler;

         * The prefixes of startPrefixMapping() declarations for the coming element.
        private List prefixList = new ArrayList();

         * The URIs of startPrefixMapping() declarations for the coming element.
        private List uriList = new ArrayList();

         * Maps of URI<->prefix mappings. Used to work around a bug in the Xalan
         * serializer.
        private Map uriToPrefixMap = new HashMap();
        private Map prefixToUriMap = new HashMap();

         * True if there has been some startPrefixMapping() for the coming element.
        private boolean hasMappings = false;

        public NamespaceAsAttributes(ContentHandler ch) {
            this.contentHandler = ch;

        public void startDocument() throws SAXException {
            // Cleanup

         * Track mappings to be able to add <code>xmlns:</code> attributes
         * in <code>startElement()</code>.
        public void startPrefixMapping(String prefix, String uri) throws SAXException {
            // Store the mappings to reconstitute xmlns:attributes
            // except prefixes starting with "xml": these are reserved
            // VG: (uri != null) fixes NPE in startElement
            if (uri != null && !prefix.startsWith("xml")) {
                this.hasMappings = true;

                // append the prefix colon now, in order to save concatenations later, but
                // only for non-empty prefixes.
                if (prefix.length() > 0) {
                    this.uriToPrefixMap.put(uri, prefix + ":");
                } else {
                    this.uriToPrefixMap.put(uri, prefix);

                this.prefixToUriMap.put(prefix, uri);
            this.contentHandler.startPrefixMapping(prefix, uri);

         * Ensure all namespace declarations are present as <code>xmlns:</code> attributes
         * and add those needed before calling superclass. This is a workaround for a Xalan bug
         * (at least in version 2.0.1) : <code>org.apache.xalan.serialize.SerializerToXML</code>
         * ignores <code>start/endPrefixMapping()</code>.
        public void startElement(String eltUri, String eltLocalName, String eltQName, Attributes attrs)
                throws SAXException {

            // try to restore the qName. The map already contains the colon
            if (null != eltUri && eltUri.length() != 0 && this.uriToPrefixMap.containsKey(eltUri)) {
                eltQName = this.uriToPrefixMap.get(eltUri) + eltLocalName;
            if (this.hasMappings) {
                // Add xmlns* attributes where needed

                // New Attributes if we have to add some.
                AttributesImpl newAttrs = null;

                int mappingCount = this.prefixList.size();
                int attrCount = attrs.getLength();

                for (int mapping = 0; mapping < mappingCount; mapping++) {

                    // Build infos for this namespace
                    String uri = (String) this.uriList.get(mapping);
                    String prefix = (String) this.prefixList.get(mapping);
                    String qName = prefix.length() == 0 ? "xmlns" : ("xmlns:" + prefix);

                    // Search for the corresponding xmlns* attribute
                    boolean found = false;
                    for (int attr = 0; attr < attrCount; attr++) {
                        if (qName.equals(attrs.getQName(attr))) {
                            // Check if mapping and attribute URI match
                            if (!uri.equals(attrs.getValue(attr))) {
                                throw new SAXException("URI in prefix mapping and attribute do not match");
                            found = true;

                    if (!found) {
                        // Need to add this namespace
                        if (newAttrs == null) {
                            // Need to test if attrs is empty or we go into an infinite loop...
                            // Well know SAX bug which I spent 3 hours to remind of :-(
                            if (attrCount == 0) {
                                newAttrs = new AttributesImpl();
                            } else {
                                newAttrs = new AttributesImpl(attrs);

                        if (prefix.length() == 0) {
                            newAttrs.addAttribute(XML_NAMESPACE_URI, "xmlns", "xmlns", "CDATA", uri);
                        } else {
                            newAttrs.addAttribute(XML_NAMESPACE_URI, prefix, qName, "CDATA", uri);
                } // end for mapping

                // Cleanup for the next element

                // Start element with new attributes, if any
                this.contentHandler.startElement(eltUri, eltLocalName, eltQName, newAttrs == null ? attrs : newAttrs);
            } else {
                // Normal job
                this.contentHandler.startElement(eltUri, eltLocalName, eltQName, attrs);

         * Receive notification of the end of an element.
         * Try to restore the element qName.
        public void endElement(String eltUri, String eltLocalName, String eltQName) throws SAXException {
            // try to restore the qName. The map already contains the colon
            if (null != eltUri && eltUri.length() != 0 && this.uriToPrefixMap.containsKey(eltUri)) {
                eltQName = this.uriToPrefixMap.get(eltUri) + eltLocalName;
            this.contentHandler.endElement(eltUri, eltLocalName, eltQName);

         * End the scope of a prefix-URI mapping:
         * remove entry from mapping tables.
        public void endPrefixMapping(String prefix) throws SAXException {
            // remove mappings for xalan-bug-workaround.
            // Unfortunately, we're not passed the uri, but the prefix here,
            // so we need to maintain maps in both directions.
            if (this.prefixToUriMap.containsKey(prefix)) {

            if (hasMappings) {
                // most of the time, start/endPrefixMapping calls have an element event between them,
                // which will clear the hasMapping flag and so this code will only be executed in the
                // rather rare occasion when there are start/endPrefixMapping calls with no element
                // event in between. If we wouldn't remove the items from the prefixList and uriList here,
                // the namespace would be incorrectly declared on the next element following the
                // endPrefixMapping call.
                int pos = prefixList.lastIndexOf(prefix);
                if (pos != -1) {


         * @see org.xml.sax.ContentHandler#endDocument()
        public void endDocument() throws SAXException {
            // Cleanup

        private void clearMappings() {
            this.hasMappings = false;

         * @see org.xml.sax.ContentHandler#characters(char[], int, int)
        public void characters(char[] ch, int start, int length) throws SAXException {
            contentHandler.characters(ch, start, length);

         * @see org.xml.sax.ContentHandler#ignorableWhitespace(char[], int, int)
        public void ignorableWhitespace(char[] ch, int start, int length) throws SAXException {
            contentHandler.ignorableWhitespace(ch, start, length);

         * @see org.xml.sax.ContentHandler#processingInstruction(java.lang.String, java.lang.String)
        public void processingInstruction(String target, String data) throws SAXException {
            contentHandler.processingInstruction(target, data);

         * @see org.xml.sax.ContentHandler#setDocumentLocator(org.xml.sax.Locator)
        public void setDocumentLocator(Locator locator) {

         * @see org.xml.sax.ContentHandler#skippedEntity(java.lang.String)
        public void skippedEntity(String name) throws SAXException {

     * Helper method to add an attribute.
     * This implementation adds a new attribute with the given name
     * and value. Before adding the value is checked for non-null.
     * @param ai    The attributes impl receiving the additional attribute.
     * @param name  The name of the attribute.
     * @param value The value of the attribute.
    protected static void addAttribute(AttributesImpl ai, String name, Object value) {
        if ( value != null ) {
            ai.addAttribute("", name, name, "CDATA", value.toString());

     * Helper method writing out a string.
     * @param ch    The content handler.
     * @param text
     * @throws SAXException
    protected static void text(ContentHandler ch, String text)
    throws SAXException {
        if ( text != null ) {
            final char[] c = text.toCharArray();
            ch.characters(c, 0, c.length);

     * Helper method to indent the xml elements.
     * Each level is indented with four spaces.
     * @param ch    The content handler.
     * @param level The level of indention.
    protected static void indent(ContentHandler ch, int level)
    throws SAXException {
        for(int i=0;i<level;i++) {
            IOUtils.text(ch, "    ");

     * Helper method to create a new line.
     * @param ch    The content handler.
     * @throws SAXException
    protected static void newline(ContentHandler ch)
    throws SAXException {
        IOUtils.text(ch, "\n");


Related examples in the same category

1.Parsing XML Files with SAX
2.SAX Demo
3.Duplicates XML Files
4.A Program to Display the Input from a SAX ParserA Program to Display the Input from a SAX Parser
5.SAX Checker
6.A Content Handler to Output a Sorted List as HTML
7.A Content Handler to Output a Sorted List
8.Simple lister - extract name and children tags
9.SAX Tree Validator
10.SAX Tree ViewerSAX Tree Viewer
11.Accessing character data (CDATA) of XML element
12.Accessing features of the SAX parser implementation
13.Configuring SAX parser factory to produce alternate parser
14.Extracting attribute values from XML elements
15.Handling SAX errors during parsing
16.Using XML locator to indicate current parser position
17.Filter to write an XML document from a SAX event stream
18.XML utility methods that only depend on the JDK
19.Create Xml Reader
20.Sax to DOM converter
21.Produce a SAX stream from a DOM Document
22.SAX2 writer: register a SAX2 ContentHandler and receive the callbacks in order to print a document that is parsed.
23.Register a SAX2 ContentHandler and receive callbacks to print information about the document.
24.Provides a complete trace of SAX2 events for files parsed.
25.The program prints all hyperlinks links of an XHTML web page