com.opengamma.strata.collect.io.XmlFile.java Source code

Java tutorial

Introduction

Here is the source code for com.opengamma.strata.collect.io.XmlFile.java

Source

/**
 * Copyright (C) 2015 - present by OpenGamma Inc. and the OpenGamma group of companies
 * 
 * Please see distribution for license.
 */
package com.opengamma.strata.collect.io;

import java.io.InputStream;
import java.io.UncheckedIOException;
import java.util.HashMap;
import java.util.Map;

import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLStreamConstants;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.XMLStreamReader;

import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.common.io.ByteSource;
import com.opengamma.strata.collect.ArgChecker;
import com.opengamma.strata.collect.Unchecked;

/**
 * An XML file.
 * <p>
 * Represents an XML file together with the ability to parse it from a {@link ByteSource}.
 * <p>
 * This uses the standard StAX API to parse the file.
 * Once parsed, the XML is represented as a DOM-like structure, see {@link XmlElement}.
 * This approach is suitable for XML files where the size of the parsed XML file is
 * known to be manageable in memory.
 * <p>
 * Note that the {@link XmlElement} representation does not express all XML features.
 * No support is provided for processing instructions, comments or mixed content.
 * In addition, it is not possible to determine the difference between empty content and no children.
 * <p>
 * There is no support for namespaces.
 * All namespace prefixes are dropped.
 * There are cases where this can be a problem, but most of the time lenient parsing is helpful.
 */
public final class XmlFile {

    /**
     * The root element.
     */
    private final XmlElement root;
    /**
     * The map of references.
     */
    private final ImmutableMap<String, XmlElement> refs;

    //-----------------------------------------------------------------------
    /**
     * Parses the specified source as an XML file to an in-memory DOM-like structure.
     * <p>
     * This parses the specified byte source expecting an XML file format.
     * The resulting instance can be queried for the root element.
     * 
     * @param source  the XML source data
     * @return the parsed file
     * @throws UncheckedIOException if an IO exception occurs
     * @throws IllegalArgumentException if the file cannot be parsed
     */
    public static XmlFile of(ByteSource source) {
        return of(source, "");
    }

    /**
     * Parses the specified source as an XML file to an in-memory DOM-like structure.
     * <p>
     * This parses the specified byte source expecting an XML file format.
     * The resulting instance can be queried for the root element.
     * <p>
     * This supports capturing attribute references, such as an id/href pair.
     * Wherever the parser finds an attribute with the specified name, the element is added
     * to the internal map, accessible by calling {@link #getReferences()}.
     * <p>
     * For example, if one part of the XML has {@code <foo id="fooId">}, the references map will
     * contain an entry mapping "fooId" to the parsed element {@code <foo>}.
     * 
     * @param source  the XML source data
     * @param refAttrName  the attribute name that should be parsed as a reference
     * @return the parsed file
     * @throws UncheckedIOException if an IO exception occurs
     * @throws IllegalArgumentException if the file cannot be parsed
     */
    public static XmlFile of(ByteSource source, String refAttrName) {
        ArgChecker.notNull(source, "source");
        return Unchecked.wrap(() -> {
            try (InputStream in = source.openBufferedStream()) {
                XMLStreamReader xmlReader = xmlInputFactory().createXMLStreamReader(in);
                try {
                    HashMap<String, XmlElement> refs = new HashMap<>();
                    XmlElement root = parse(xmlReader, refAttrName, refs);
                    return new XmlFile(root, refs);
                } finally {
                    xmlReader.close();
                }
            }
        });
    }

    //-------------------------------------------------------------------------
    /**
     * Parses the tree from the StAX stream reader, capturing references.
     * <p>
     * The reader should be created using the factory returned from {@link #xmlInputFactory()}.
     * <p>
     * This method supports capturing attribute references, such as an id/href pair.
     * Wherever the parser finds an attribute with the specified name, the element is added
     * to the specified map. Note that the map is mutated.
     * 
     * @param reader  the StAX stream reader, positioned at or before the element to be parsed
     * @param refAttr  the attribute name that should be parsed as a reference, null if not applicable
     * @param refs  the mutable map of references to update, null if not applicable
     * @return the parsed element
     * @throws IllegalArgumentException if the input cannot be parsed
     */
    private static XmlElement parse(XMLStreamReader reader, String refAttr, Map<String, XmlElement> refs) {
        try {
            // parse start element
            String elementName = parseElementName(reader);
            ImmutableMap<String, String> attrs = parseAttributes(reader);

            // parse children or content
            ImmutableList.Builder<XmlElement> childBuilder = ImmutableList.builder();
            String content = "";
            int event = reader.next();
            while (event != XMLStreamConstants.END_ELEMENT) {
                switch (event) {
                // parse child when start element found
                case XMLStreamConstants.START_ELEMENT:
                    childBuilder.add(parse(reader, refAttr, refs));
                    break;
                // append content when characters found
                // since XMLStreamReader has IS_COALESCING=true means there should only be one content call
                case XMLStreamConstants.CHARACTERS:
                case XMLStreamConstants.CDATA:
                    content += reader.getText();
                    break;
                default:
                    break;
                }
                event = reader.next();
            }
            ImmutableList<XmlElement> children = childBuilder.build();
            XmlElement parsed = children.isEmpty() ? XmlElement.ofContent(elementName, attrs, content)
                    : XmlElement.ofChildren(elementName, attrs, children);
            String ref = attrs.get(refAttr);
            if (ref != null) {
                refs.put(ref, parsed);
            }
            return parsed;

        } catch (XMLStreamException ex) {
            throw new IllegalArgumentException(ex);
        }
    }

    // find the start element and parses the name
    private static String parseElementName(XMLStreamReader reader) throws XMLStreamException {
        int event = reader.getEventType();
        while (event != XMLStreamConstants.START_ELEMENT) {
            event = reader.next();
        }
        return reader.getLocalName();
    }

    // parses attributes into a map
    private static ImmutableMap<String, String> parseAttributes(XMLStreamReader reader) {
        ImmutableMap<String, String> attrs;
        int attributeCount = reader.getAttributeCount() + reader.getNamespaceCount();
        if (attributeCount == 0) {
            attrs = ImmutableMap.of();
        } else {
            ImmutableMap.Builder<String, String> builder = ImmutableMap.builder();
            for (int i = 0; i < reader.getAttributeCount(); i++) {
                builder.put(reader.getAttributeLocalName(i), reader.getAttributeValue(i));
            }
            attrs = builder.build();
        }
        return attrs;
    }

    //-------------------------------------------------------------------------
    // creates the XML input factory, recreated each time to avoid JDK-8028111
    // this also provides some protection against hackers attacking XML
    private static XMLInputFactory xmlInputFactory() {
        XMLInputFactory factory = XMLInputFactory.newFactory();
        factory.setProperty(XMLInputFactory.IS_COALESCING, true);
        factory.setProperty(XMLInputFactory.IS_REPLACING_ENTITY_REFERENCES, true);
        factory.setProperty(XMLInputFactory.IS_SUPPORTING_EXTERNAL_ENTITIES, false);
        factory.setProperty(XMLInputFactory.SUPPORT_DTD, false);
        return factory;
    }

    //-------------------------------------------------------------------------
    /**
     * Restricted constructor.
     */
    private XmlFile(XmlElement root, Map<String, XmlElement> refs) {
        this.root = ArgChecker.notNull(root, "root");
        this.refs = ImmutableMap.copyOf(refs);
    }

    //-------------------------------------------------------------------------
    /**
     * Gets the root element of this file.
     * 
     * @return the root element
     */
    public XmlElement getRoot() {
        return root;
    }

    /**
     * Gets the reference map of id to element.
     * <p>
     * This is used to decode references, such as an id/href pair.
     * <p>
     * For example, if one part of the XML has {@code <foo id="fooId">}, the map will
     * contain an entry mapping "fooId" to the parsed element {@code <foo>}.
     * 
     * @return the map of id to element
     */
    public ImmutableMap<String, XmlElement> getReferences() {
        return refs;
    }

    //-------------------------------------------------------------------------
    /**
     * Checks if this file equals another.
     * <p>
     * The comparison checks the content and reference map.
     * 
     * @param obj  the other section, null returns false
     * @return true if equal
     */
    @Override
    public boolean equals(Object obj) {
        if (obj == this) {
            return true;
        }
        if (obj instanceof XmlFile) {
            XmlFile other = (XmlFile) obj;
            return root.equals(other.root) && refs.equals(other.refs);
        }
        return false;
    }

    /**
     * Returns a suitable hash code for the file.
     * 
     * @return the hash code
     */
    @Override
    public int hashCode() {
        return root.hashCode() ^ refs.hashCode();
    }

    /**
     * Returns a string describing the file.
     * 
     * @return the descriptive string
     */
    @Override
    public String toString() {
        return root.toString();
    }

}