com.ibm.jaql.lang.expr.xml.XmlToJsonFn.java Source code

Java tutorial

Introduction

Here is the source code for com.ibm.jaql.lang.expr.xml.XmlToJsonFn.java

Source

/*
 * Copyright (C) IBM Corp. 2009.
 * 
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
 * use this file except in compliance with the License. You may obtain a copy of
 * the License at
 * 
 * http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations under
 * the License.
 */
package com.ibm.jaql.lang.expr.xml;

import java.io.IOException;
import java.io.StringReader;
import java.lang.reflect.UndeclaredThrowableException;
import java.util.Deque;
import java.util.LinkedList;
import java.util.Stack;

import javax.xml.parsers.SAXParserFactory;

import org.apache.commons.lang.StringUtils;
import org.xml.sax.Attributes;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler;

import com.ibm.jaql.json.type.BufferedJsonRecord;
import com.ibm.jaql.json.type.JsonString;
import com.ibm.jaql.json.type.JsonValue;
import com.ibm.jaql.json.type.SpilledJsonArray;
import com.ibm.jaql.lang.core.Context;
import com.ibm.jaql.lang.expr.core.Expr;
import com.ibm.jaql.lang.expr.function.DefaultBuiltInFunctionDescriptor;
import com.ibm.jaql.util.IntArray;

/**
 * An expression for converting XML to JSON.
 * 
 * @see JsonToXmlFn
 */
public class XmlToJsonFn extends Expr {
    public static class Descriptor extends DefaultBuiltInFunctionDescriptor.Par11 {
        public Descriptor() {
            super("xmlToJson", XmlToJsonFn.class);
        }
    }

    private XMLReader parser;
    private XmlToJsonHandler handler;

    /**
     * string xmlToJson( string xml )
     * 
     * @param exprs
     */
    public XmlToJsonFn(Expr[] exprs) {
        super(exprs);
    }

    @Override
    public JsonValue eval(Context context) throws Exception {
        if (parser == null) {
            SAXParserFactory factory = SAXParserFactory.newInstance();
            factory.setNamespaceAware(true);
            parser = factory.newSAXParser().getXMLReader();
            handler = new XmlToJsonHandler2();
            parser.setContentHandler(handler);
        }

        JsonString s = (JsonString) exprs[0].eval(context);
        if (s == null) {
            return null;
        }
        parser.parse(new InputSource(new StringReader(s.toString())));
        return handler.result;
    }
}

class XmlToJsonHandler extends DefaultHandler // TODO: move, make much faster
{
    public JsonValue result;
}

/**
 * This guy produces a record for every xml node, children and attributes
 * are in the same array as the value of the node's name.  Namespaces are
 * included in a node's record as a field called xmlns.  Text nodes have
 * the field name "text()", an attribute called "name" has a field name "@name". 
 * 
 */
class XmlToJsonHandler1 extends XmlToJsonHandler // TODO: move, make much faster, add schema support
{
    public static final JsonString S_XML = new JsonString("xml");
    public static final JsonString S_XMLNS = new JsonString("xmlns");
    public static final JsonString S_TEXT = new JsonString("text()");

    protected Stack<SpilledJsonArray> stack = new Stack<SpilledJsonArray>();
    protected StringBuffer text = new StringBuffer();

    @Override
    public void startDocument() throws SAXException {
        result = null;
        stack.clear();
        stack.push(new SpilledJsonArray());
        text.setLength(0);
    }

    @Override
    public void endDocument() throws SAXException {
        SpilledJsonArray a = stack.pop();
        assert stack.empty();
        BufferedJsonRecord r = new BufferedJsonRecord();
        r.add(S_XML, a);
        result = r;
    }

    @Override
    public void startElement(String uri, String localName, String name, Attributes attrs) throws SAXException {
        try {
            SpilledJsonArray ca = new SpilledJsonArray();
            int n = attrs.getLength();
            for (int i = 0; i < n; i++) {
                name = "@" + attrs.getLocalName(i);
                uri = attrs.getURI(i);
                String v = attrs.getValue(i);
                BufferedJsonRecord r = new BufferedJsonRecord();
                if (uri != null && uri.length() > 0) {
                    r.add(S_XMLNS, new JsonString(uri));
                }
                r.add(new JsonString(name), new JsonString(v));
                ca.addCopy(r);
            }
            stack.push(ca);
        } catch (IOException e) {
            throw new UndeclaredThrowableException(e);
        }
    }

    @Override
    public void endElement(String uri, String localName, String name) throws SAXException {
        try {
            endText();
            SpilledJsonArray ca = (SpilledJsonArray) stack.pop();
            BufferedJsonRecord r = new BufferedJsonRecord();
            if (uri != null && uri.length() > 0) {
                r.add(new JsonString("xmlns"), new JsonString(uri));
            }
            r.add(new JsonString(localName), ca);
            SpilledJsonArray pa = (SpilledJsonArray) stack.peek();
            pa.addCopy(r);
        } catch (IOException e) {
            throw new UndeclaredThrowableException(e);
        }
    }

    @Override
    public void characters(char[] ch, int start, int length) throws SAXException {
        text.append(ch, start, length);
    }

    public void endText() throws IOException {
        if (text.length() > 0) {
            BufferedJsonRecord r = new BufferedJsonRecord();
            r.add(S_TEXT, new JsonString(text.toString()));
            SpilledJsonArray pa = (SpilledJsonArray) stack.peek();
            pa.addCopy(r);
            text.setLength(0);
        }
    }
}

/**
 * This guy places all children in a single record.  If there are multiple
 * children with the same name, an array is created to hold all the children.
 * Therefore, node order is not preserved.
 * 
 * This has an annoyance for nodes that occur once under some node and twice
 * under another node because the value is sometimes an array and sometimes
 * a single value, which makes query writing painful. This would be fixed with
 * schema support.
 *
 * Text nodes are not records, they are just strings. All text _directly_ under
 * one node is concatenated into one string value.  Therefore, mixed content is messed up! // TODO: mixed content...
 * If a node has string content and attributes or children, a field called "text()" is created
 * to store the string value.  (Again painful when it sometimes happens.)
 *  
 * Mixed-content nodes are not handled well.  Nodes are out of order and text messed up.
 * When we know it's mixed content (e.g. from schema) we could:
 *    1) make $."text()" be the concat of all text indirectly below (the string value in xquery)
 *    2) make an array [ "la", {y:["di"]}, "da", {z:["oh"]}, "my"]
 *    3) leave as xml string
 *    4) just extract string value, ignore elements below
 *    5) ...
 *
 * Schema support is also important for creating non-string values.
 * 
 * If a node is in a namespace, two records are created:
 *    { 'http://namespace': { a:'1', b:'2' }}
 * All nodes in that namespace are below the namespace field.
 * 
 */
class XmlToJsonHandler2 extends XmlToJsonHandler // TODO: move, make much faster, add schema support
{
    public static final JsonString textName = new JsonString("text()");

    /**
     * <code>LinkedList</code> is used for performance reason. Stack operations only happens at
     * one end of the list.
     */
    protected Deque<BufferedJsonRecord> stack = new LinkedList<BufferedJsonRecord>();
    /** Stores all the characters received during parsing */
    protected StringBuilder text = new StringBuilder();
    protected IntArray lengths = new IntArray();

    @Override
    public void startDocument() throws SAXException {
        result = null;
        text.setLength(0);
        stack.clear();
        stack.push(new BufferedJsonRecord());
        lengths.add(0);
    }

    @Override
    public void endDocument() throws SAXException {
        BufferedJsonRecord r = stack.pop();
        assert stack.size() == 0;
        lengths.pop();
        assert lengths.empty();
        if (r.size() == 0) {
            result = null;
        } else {
            result = r;
        }
    }

    /**
     * Create a JSON record for new node. The node attributes are processed.
     */
    @Override
    public void startElement(String uri, String localName, String name, Attributes attrs) throws SAXException {
        BufferedJsonRecord node = new BufferedJsonRecord();
        stack.push(node);
        lengths.add(text.length());
        int n = attrs.getLength();
        BufferedJsonRecord container;
        for (int i = 0; i < n; i++) {
            uri = attrs.getURI(i);

            // If attribute has a uri
            if (uri != null && uri.length() > 0) {
                JsonString jUri = new JsonString(uri);
                BufferedJsonRecord ur = (BufferedJsonRecord) node.get(jUri);
                if (ur == null) {
                    ur = new BufferedJsonRecord();
                    node.add(jUri, ur);
                }
                container = ur;
            } else {
                container = node;
            }

            String attrName = "@" + attrs.getLocalName(i);
            JsonString jName = new JsonString(attrName);
            String v = attrs.getValue(i);
            if (container.containsKey(jName)) {
                throw new RuntimeException("duplicate attribute name: " + attrName);
            }
            container.add(jName, new JsonString(v));
        }
    }

    /**
     * 
     */
    @Override
    public void endElement(String uri, String localName, String name) throws SAXException {
        try {
            BufferedJsonRecord r = stack.pop();
            int textEnd = text.length();
            int textStart = lengths.pop();
            JsonValue me = r;

            if (textStart < textEnd) {
                JsonString s = new JsonString(text.substring(textStart, textEnd));
                text.setLength(textStart);
                if (r.size() == 0) {
                    me = s;
                } else if (!StringUtils.isBlank(s.toString())) {
                    r.add(textName, s);
                }
            }

            BufferedJsonRecord parent = stack.peek();
            // uri is not empty, use namespace record as parent.
            if (uri != null && uri.length() > 0) {
                JsonString jUri = new JsonString(uri);
                BufferedJsonRecord ur = (BufferedJsonRecord) parent.get(jUri, null);
                if (ur == null) {
                    ur = new BufferedJsonRecord();
                    parent.add(jUri, ur);
                }
                parent = ur;
            }

            JsonString jLocalName = new JsonString(localName);
            if (!parent.containsKey(jLocalName)) {
                parent.add(jLocalName, me);
            } else {
                JsonValue v = parent.get(jLocalName);
                SpilledJsonArray a;
                if (v instanceof SpilledJsonArray) {
                    a = (SpilledJsonArray) v;
                } else {
                    a = new SpilledJsonArray();
                    a.addCopy(v);
                    parent.set(jLocalName, a);
                }
                a.addCopy(me);
            }
        } catch (IOException e) {
            throw new UndeclaredThrowableException(e);
        }
    }

    /**
     * Collects all the characters received so far.
     */
    @Override
    public void characters(char[] ch, int start, int length) throws SAXException {
        text.append(ch, start, length);
    }
}