org.apache.shindig.gadgets.parse.ParseTreeSerializerBenchmark.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.shindig.gadgets.parse.ParseTreeSerializerBenchmark.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements. See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership. The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License. You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied. See the License for the
 * specific language governing permissions and limitations under the License.
 */
package org.apache.shindig.gadgets.parse;

import org.apache.commons.io.IOUtils;
import org.apache.shindig.gadgets.GadgetException;
import org.apache.shindig.gadgets.parse.caja.CajaHtmlParser;
import org.apache.shindig.gadgets.parse.nekohtml.NekoSimplifiedHtmlParser;

import org.w3c.dom.DOMImplementation;

import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import java.io.File;
import java.io.FileInputStream;
import java.io.StringWriter;

/**
 * Benchmarks for HTML parsing and serialization
 */
public class ParseTreeSerializerBenchmark {
    private int numRuns;
    private String content;

    private GadgetHtmlParser nekoSimpleParser = new NekoSimplifiedHtmlParser(DOCUMENT_PROVIDER);

    private GadgetHtmlParser cajaParser = new CajaHtmlParser(DOCUMENT_PROVIDER);

    private boolean warmup;

    private static final DOMImplementation DOCUMENT_PROVIDER = new ParseModule.DOMImplementationProvider().get();

    private ParseTreeSerializerBenchmark(String file, int numRuns) throws Exception {
        File inputFile = new File(file);
        if (!inputFile.exists() || !inputFile.canRead()) {
            System.err.println("Input file: " + file + " not found or can't be read.");
            System.exit(1);
        }
        content = new String(IOUtils.toByteArray(new FileInputStream(file)));

        this.numRuns = 10;
        warmup = true;
        runCaja();
        runNekoSimple();

        //Sleep to let JIT kick in
        Thread.sleep(10000L);
        this.numRuns = numRuns;
        warmup = false;
        runCaja();
        runNekoSimple();
    }

    private void runNekoSimple() throws Exception {
        output("NekoSimple-----------------");
        timeParseDom(nekoSimpleParser);
        timeParseDomSerialize(nekoSimpleParser);
    }

    private void runCaja() throws Exception {
        output("Caja-----------------");
        timeParseDom(cajaParser);
        timeParseDomSerialize(cajaParser);
    }

    private void output(String string) {
        if (!warmup) {
            System.out.println(string);
        }
    }

    private void timeParseDom(GadgetHtmlParser parser) throws GadgetException {
        long parseStart = System.currentTimeMillis();
        for (int i = 0; i < numRuns; ++i) {
            parser.parseDom(content);
        }
        long parseMillis = System.currentTimeMillis() - parseStart;

        output("Parsing W3C DOM [" + parseMillis + " ms total: " + ((double) parseMillis) / numRuns + "ms/run]");
    }

    private void timeParseDomSerialize(GadgetHtmlParser parser) throws GadgetException {
        org.w3c.dom.Document document = parser.parseDom(content);
        try {
            long parseStart = System.currentTimeMillis();
            for (int i = 0; i < numRuns; ++i) {
                HtmlSerialization.serialize(document);
            }
            long parseMillis = System.currentTimeMillis() - parseStart;

            output("Serializing [" + parseMillis + " ms total: " + ((double) parseMillis) / numRuns + "ms/run]");
        } catch (Exception e) {
            throw new GadgetException(GadgetException.Code.HTML_PARSE_ERROR, e);
        }

        try {
            // Create an "identity" transformer - copies input to output
            Transformer t = TransformerFactory.newInstance().newTransformer();
            t.setOutputProperty(OutputKeys.METHOD, "html");

            long parseStart = System.currentTimeMillis();
            for (int i = 0; i < numRuns; ++i) {
                StringWriter sw = new StringWriter((content.length() * 11) / 10);
                t.transform(new DOMSource(document), new StreamResult(sw));
                sw.toString();
            }
            long parseMillis = System.currentTimeMillis() - parseStart;

            output("Serializing DOM Transformer [" + parseMillis + " ms total: " + ((double) parseMillis) / numRuns
                    + "ms/run]");

        } catch (Exception e) {
            throw new GadgetException(GadgetException.Code.HTML_PARSE_ERROR, e);
        }
    }

    public static void main(String[] args) {
        // Test can be run as standalone program to test out serialization and parsing
        // performance numbers, using Caja as a parser.
        if (args.length != 2) {
            System.err.println("Args: <input-file> <num-runs>");
            System.exit(1);
        }

        String fileArg = args[0];
        String runsArg = args[1];
        int numRuns = -1;
        try {
            numRuns = Integer.parseInt(runsArg);
        } catch (Exception e) {
            System.err.println("Invalid num-runs argument: " + runsArg + ", reason: " + e);
        }
        try {
            new ParseTreeSerializerBenchmark(fileArg, numRuns);
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
}