org.apache.shindig.gadgets.parse.caja.CajaHtmlParser.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.shindig.gadgets.parse.caja.CajaHtmlParser.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements. See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership. The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License. You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied. See the License for the
 * specific language governing permissions and limitations under the License.
 */
package org.apache.shindig.gadgets.parse.caja;

import java.util.LinkedList;

import com.google.caja.lexer.CharProducer;
import com.google.caja.lexer.HtmlLexer;
import com.google.caja.lexer.InputSource;
import com.google.caja.lexer.ParseException;
import com.google.caja.parser.html.DomParser;
import com.google.caja.parser.html.Namespaces;
import com.google.caja.reporting.Message;
import com.google.caja.reporting.MessageLevel;
import com.google.caja.reporting.MessageQueue;
import com.google.caja.reporting.SimpleMessageQueue;
import com.google.common.collect.Lists;
import com.google.inject.Inject;

import org.apache.shindig.gadgets.GadgetException;
import org.apache.shindig.gadgets.http.HttpResponse;
import org.apache.shindig.gadgets.parse.GadgetHtmlParser;
import org.w3c.dom.DOMImplementation;
import org.w3c.dom.Document;
import org.w3c.dom.DocumentFragment;
import org.w3c.dom.Node;

public class CajaHtmlParser extends GadgetHtmlParser {
    @Inject
    public CajaHtmlParser(DOMImplementation documentFactory) {
        super(documentFactory);
    }

    @Override
    protected Document parseDomImpl(String source) throws GadgetException {
        DocumentFragment fragment = parseFragmentImpl(source);

        // TODO: remove parseDomImpl() altogether; only have subclasses
        // support parseFragmentImpl() with base class cleaning up.
        Document document = fragment.getOwnerDocument();
        Node html = null;
        LinkedList<Node> beforeHtml = Lists.newLinkedList();
        while (fragment.hasChildNodes()) {
            Node child = fragment.removeChild(fragment.getFirstChild());
            if (child.getNodeType() == Node.ELEMENT_NODE && "html".equalsIgnoreCase(child.getNodeName())) {
                if (html == null) {
                    html = child;
                } else {
                    // Ignore the current (duplicated) html node but add its children
                    transferChildren(html, child);
                }
            } else if (html != null) {
                html.appendChild(child);
            } else {
                beforeHtml.add(child);
            }
        }

        if (html == null) {
            html = document.createElement("html");
        }

        prependToNode(html, beforeHtml);

        // Ensure document.getDocumentElement() is html node.
        document.appendChild(html);

        return document;
    }

    @Override
    protected DocumentFragment parseFragmentImpl(String source) throws GadgetException {
        try {
            MessageQueue mq = makeMessageQueue();
            // Newline works around Caja parser issue with certain short-form
            // HTML - the internal Lexer gets confused. A bug has been filed w/ Caja.
            // Even so, adding the newline is innocuous for any HTML.
            DomParser parser = getDomParser(source + '\n', mq);
            DocumentFragment fragment = parser.parseFragment();
            // Get rid of the newline, if maintained.
            Node lastChild = fragment != null ? fragment.getLastChild() : null;
            if (lastChild != null && lastChild.getNodeType() == Node.TEXT_NODE) {
                String lastText = lastChild.getTextContent();
                if ("\n".equals(lastText)) {
                    fragment.removeChild(lastChild);
                } else if (lastText.endsWith("\n")) {
                    lastChild.setTextContent(lastText.substring(0, lastText.length() - 1));
                }
            }
            if (mq.hasMessageAtLevel(MessageLevel.ERROR)) {
                StringBuilder err = new StringBuilder();
                for (Message m : mq.getMessages()) {
                    err.append(m.toString()).append('\n');
                }
                throw new GadgetException(GadgetException.Code.HTML_PARSE_ERROR, err.toString(),
                        HttpResponse.SC_BAD_REQUEST);
            }
            return fragment;
        } catch (ParseException e) {
            throw new GadgetException(GadgetException.Code.HTML_PARSE_ERROR, e.getCajaMessage().toString(),
                    HttpResponse.SC_BAD_REQUEST);
        }
    }

    protected InputSource getInputSource() {
        // Returns a default/dummy InputSource.
        // We might consider adding the gadget URI to the GadgetHtmlParser API,
        // but in the meantime this method is protected to allow overriding this
        // with request-scoped retrieval of this same data.
        return InputSource.UNKNOWN;
    }

    protected MessageQueue makeMessageQueue() {
        return new SimpleMessageQueue();
    }

    protected boolean needsDebugData() {
        return false;
    }

    private DomParser getDomParser(String source, final MessageQueue mq) throws ParseException {
        InputSource is = getInputSource();
        HtmlLexer lexer = new HtmlLexer(CharProducer.Factory.fromString(source, is));
        final Namespaces ns = Namespaces.HTML_DEFAULT; // Includes OpenSocial
        final boolean needsDebugData = needsDebugData();
        DomParser parser = new DomParser(lexer, /* wantsComments */ true, is, ns, mq);
        parser.setDomImpl(documentFactory);
        parser.setNeedsDebugData(needsDebugData);
        return parser;
    }
}