org.apache.cocoon.transformation.SparqlTransformer.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.cocoon.transformation.SparqlTransformer.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.cocoon.transformation;

import java.io.IOException;
import java.io.InputStream;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;

import org.apache.avalon.framework.component.Component;
import org.apache.avalon.framework.parameters.Parameters;
import org.apache.avalon.framework.service.ServiceException;
import org.apache.cocoon.ProcessingException;
import org.apache.cocoon.environment.SourceResolver;
import org.apache.cocoon.xml.AttributesImpl;
import org.apache.cocoon.xml.IncludeXMLConsumer;
import org.apache.commons.httpclient.*;
import org.apache.commons.httpclient.auth.AuthScope;
import org.apache.commons.httpclient.methods.*;
import org.apache.excalibur.source.SourceParameters;
import org.apache.excalibur.xmlizer.XMLizer;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;

/**
 * This transformer can send queries to a ReST SPARQL endpoint.
 * It can also be used as a genaral HTTP client.
 *
 * This transformer triggers for the element <code>query</code> in the namespace "http://apache.org/cocoon/sparql/1.0".
 * These elements must not be nested.
 * 
 * The mandatory <code>src</code> attribute contains the url of a SPARQL endpoint.
 * 
 * The optional <code>method</code> attribute contains the HTTP method for the request (default is GET).
 * 
 * The optional <code>credentials</code> attribute contains a username and password, separated by a tab character (&#x9;).
 * The credentials will be sent to <a href="http://hc.apache.org/httpclient-3.x/authentication.html">any authentication realm</a>,
 * so be careful to send them only to the intended service!
 * 
 * For POST requests, parameters are sent in the body if the attribute <code>http:Content-Type</code> is
 * "application/x-www-form-urlencoded".
 * (Note that the header name "Content-Type" is case sensitive!)
 * In this case, the content of the <code>query</code> element is passed as the value of a parameter,
 * which has the name specified by the <code>parameter-name</code> attribute (default is "query").
 * 
 * Otherwise, the content of the <code>query</code> element (text or XML) goes into the request body.
 * 
 * The optional <code>content</code> attribute indicates if the content of the <code>query</code> element is "text"
 * (default for SPARQL queries), or "xml" (useful if you PUT RDF triples).
 * Unfortunately, if you use content="xml" you may run into namespace problems.
 * 
 * The optional <code>parse</code> attribute indicates how the response should be parsed.
 * It can be "xml" or "text". Default is "xml". Text will be wrapped in an XML element.
 * 
 * The optional <code>showErrors</code> attribute can be "true" (default; generate XML elements for HTTP errors)
 * or false (throw exceptions for HTTP errors).
 * 
 * Attributes in the "http://www.w3.org/2006/http#" namespace are used as request headers.
 * The header name is the local name of the attribute.
 * 
 * Attributes in the "http://apache.org/cocoon/sparql/1.0" (sparql:) namespace are used as request parameters.
 * The parameter name is the local name of the attribute. Note: This does not allow for multivalued parameters.
 * 
 * The text content of the <code>query</code> element is passed as the value of the 'query' parameter in GET and
 * POST (www-form-urlencoded data) requests.
 * In PUT requests, it is the request entity (body). Note that this is text, even if you put RDF statements in it,
 * so XML must be escaped.
 * 
 * Example XML input, with content and parse attributes set to their default values:
 * <pre>
 *   <sparql:query
 *     xmlns:sparql="http://apache.org/cocoon/sparql/1.0"
 *     xmlns:http="http://www.w3.org/2006/http#"
 *     src="http://dbpedia.org/sparql"
 *     method="POST"
 *     content="text"
 *     parse="xml"
 *     http:Content-Type="application/x-www-form-urlencoded"
 *     http:Accept="application/sparql-results+xml"
 *     sparql:maxrows="25" sparql:format="XML"
 *   >
 *   <![CDATA[
 *     PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
 *     SELECT *
 *     FROM <http://dbpedia.org>
 *     WHERE {
 *       ?person rdf:type <http://dbpedia.org/class/yago/Person100007846> .
 *     }
 *   ]]>
 *   </sparql:query>
 * </pre>
 * 
 * @author Nico Verwer (nverwer@rakensi.com)
 *
 */
public class SparqlTransformer extends AbstractSAXPipelineTransformer {

    public static final String SPARQL_NAMESPACE_URI = "http://apache.org/cocoon/sparql/1.0";
    public static final String HTTP_NAMESPACE_URI = "http://www.w3.org/2006/http#";
    public static final String QUERY_ELEMENT = "query";
    public static final String METHOD_ATTR = "method";
    public static final String CREDENTIALS_ATTR = "credentials";
    public static final String CONTENT_ATTR = "content";
    public static final String PARSE_ATTR = "parse";
    public static final String SHOW_ERRORS_ATTR = "showErrors";
    public static final String SRC_ATTR = "src";
    public static final String PARAMETER_NAME_ATTR = "parameter-name";
    public static final String DEFAULT_QUERY_PARAM = "query";
    public static final String HTTP_CONTENT_TYPE = "Content-Type";

    private boolean inQuery;
    private String src;
    private String method;
    private String credentials;
    private String contentType;
    private String parameterName;
    private String parse;
    private boolean showErrors;
    private Map httpHeaders;
    private SourceParameters requestParameters;

    public SparqlTransformer() {
        this.defaultNamespaceURI = SPARQL_NAMESPACE_URI;
    }

    public void setup(SourceResolver resolver, Map objectModel, String src, Parameters params)
            throws ProcessingException, SAXException, IOException {
        super.setup(resolver, objectModel, src, params);
        inQuery = false;
    }

    private String getAttribute(Attributes attr, String name, String defaultValue) {
        return (attr.getIndex(name) >= 0) ? attr.getValue(name) : defaultValue;
    }

    public void startTransformingElement(String uri, String name, String raw, Attributes attr)
            throws ProcessingException, IOException, SAXException {
        if (name.equals(QUERY_ELEMENT)) {
            if (inQuery) {
                throw new ProcessingException("Nested SPARQL queries are not allowed.");
            }
            inQuery = true;
            src = getAttribute(attr, SRC_ATTR, null);
            if (src == null)
                throw new ProcessingException(
                        "The " + SRC_ATTR + " attribute is mandatory for " + QUERY_ELEMENT + " elements.");
            method = getAttribute(attr, METHOD_ATTR, "GET");
            credentials = getAttribute(attr, CREDENTIALS_ATTR, "");
            contentType = getAttribute(attr, CONTENT_ATTR, "text");
            parameterName = getAttribute(attr, PARAMETER_NAME_ATTR, DEFAULT_QUERY_PARAM);
            parse = getAttribute(attr, PARSE_ATTR, "xml");
            showErrors = getAttribute(attr, SHOW_ERRORS_ATTR, "true").charAt(0) == 't';
            requestParameters = new SourceParameters();
            httpHeaders = new HashMap();
            // Process other attributes.
            for (int i = 0; i < attr.getLength(); ++i) {
                if (attr.getURI(i).equals(HTTP_NAMESPACE_URI)) {
                    httpHeaders.put(attr.getLocalName(i), attr.getValue(i));
                } else if (attr.getURI(i).equals(SPARQL_NAMESPACE_URI)) {
                    requestParameters.setParameter(attr.getLocalName(i), attr.getValue(i));
                }
            }
            if (contentType.equals("text")) {
                startTextRecording();
            } else if (contentType.equals("xml")) {
                startSerializedXMLRecording(null);
            } else {
                throw new ProcessingException("Unsupported query content type: " + contentType);
            }
        }
    }

    public void endTransformingElement(String uri, String name, String raw)
            throws ProcessingException, IOException, SAXException {
        if (name.equals(QUERY_ELEMENT)) {
            inQuery = false;
            String query = null;
            if (contentType.equals("text")) {
                query = endTextRecording();
            } else if (contentType.equals("xml")) {
                query = endSerializedXMLRecording();
            }
            requestParameters.setParameter(parameterName, query);
            executeRequest(src, method, httpHeaders, requestParameters);
        }
    }

    //-Dhttp.nonProxyHosts=10.*|localhost|62.112.232.245
    private void executeRequest(String url, String method, Map httpHeaders, SourceParameters requestParameters)
            throws ProcessingException, IOException, SAXException {
        HttpClient httpclient = new HttpClient();
        if (System.getProperty("http.proxyHost") != null) {
            // getLogger().warn("PROXY: "+System.getProperty("http.proxyHost"));
            String nonProxyHostsRE = System.getProperty("http.nonProxyHosts", "");
            if (nonProxyHostsRE.length() > 0) {
                String[] pHosts = nonProxyHostsRE.replaceAll("\\.", "\\\\.").replaceAll("\\*", ".*").split("\\|");
                nonProxyHostsRE = "";
                for (String pHost : pHosts) {
                    nonProxyHostsRE += "|(^https?://" + pHost + ".*$)";
                }
                nonProxyHostsRE = nonProxyHostsRE.substring(1);
            }
            if (nonProxyHostsRE.length() == 0 || !url.matches(nonProxyHostsRE)) {
                try {
                    HostConfiguration hostConfiguration = httpclient.getHostConfiguration();
                    hostConfiguration.setProxy(System.getProperty("http.proxyHost"),
                            Integer.parseInt(System.getProperty("http.proxyPort", "80")));
                    httpclient.setHostConfiguration(hostConfiguration);
                } catch (Exception e) {
                    throw new ProcessingException("Cannot set proxy!", e);
                }
            }
        }
        // Make the HttpMethod.
        HttpMethod httpMethod = null;
        // Do not use empty query parameter.
        if (requestParameters.getParameter(parameterName).trim().equals("")) {
            requestParameters.removeParameter(parameterName);
        }
        // Instantiate different HTTP methods.
        if ("GET".equalsIgnoreCase(method)) {
            httpMethod = new GetMethod(url);
            if (requestParameters.getEncodedQueryString() != null) {
                httpMethod.setQueryString(
                        requestParameters.getEncodedQueryString().replace("\"", "%22")); /* Also escape '"' */
            } else {
                httpMethod.setQueryString("");
            }
        } else if ("POST".equalsIgnoreCase(method)) {
            PostMethod httpPostMethod = new PostMethod(url);
            if (httpHeaders.containsKey(HTTP_CONTENT_TYPE) && ((String) httpHeaders.get(HTTP_CONTENT_TYPE))
                    .startsWith("application/x-www-form-urlencoded")) {
                // Encode parameters in POST body.
                Iterator parNames = requestParameters.getParameterNames();
                while (parNames.hasNext()) {
                    String parName = (String) parNames.next();
                    httpPostMethod.addParameter(parName, requestParameters.getParameter(parName));
                }
            } else {
                // Use query parameter as POST body
                httpPostMethod.setRequestBody(requestParameters.getParameter(parameterName));
                // Add other parameters to query string
                requestParameters.removeParameter(parameterName);
                if (requestParameters.getEncodedQueryString() != null) {
                    httpPostMethod.setQueryString(
                            requestParameters.getEncodedQueryString().replace("\"", "%22")); /* Also escape '"' */
                } else {
                    httpPostMethod.setQueryString("");
                }
            }
            httpMethod = httpPostMethod;
        } else if ("PUT".equalsIgnoreCase(method)) {
            PutMethod httpPutMethod = new PutMethod(url);
            httpPutMethod.setRequestBody(requestParameters.getParameter(parameterName));
            requestParameters.removeParameter(parameterName);
            httpPutMethod.setQueryString(requestParameters.getEncodedQueryString());
            httpMethod = httpPutMethod;
        } else if ("DELETE".equalsIgnoreCase(method)) {
            httpMethod = new DeleteMethod(url);
            httpMethod.setQueryString(requestParameters.getEncodedQueryString());
        } else {
            throw new ProcessingException("Unsupported method: " + method);
        }
        // Authentication (optional).
        if (credentials != null && credentials.length() > 0) {
            String[] unpw = credentials.split("\t");
            httpclient.getParams().setAuthenticationPreemptive(true);
            httpclient.getState().setCredentials(new AuthScope(httpMethod.getURI().getHost(),
                    httpMethod.getURI().getPort(), AuthScope.ANY_REALM),
                    new UsernamePasswordCredentials(unpw[0], unpw[1]));
        }
        // Add request headers.
        Iterator headers = httpHeaders.entrySet().iterator();
        while (headers.hasNext()) {
            Map.Entry header = (Map.Entry) headers.next();
            httpMethod.addRequestHeader((String) header.getKey(), (String) header.getValue());
        }
        // Declare some variables before the try-block.
        XMLizer xmlizer = null;
        try {
            // Execute the request.
            int responseCode;
            responseCode = httpclient.executeMethod(httpMethod);
            // Handle errors, if any.
            if (responseCode < 200 || responseCode >= 300) {
                if (showErrors) {
                    AttributesImpl attrs = new AttributesImpl();
                    attrs.addCDATAAttribute("status", "" + responseCode);
                    xmlConsumer.startElement(SPARQL_NAMESPACE_URI, "error", "sparql:error", attrs);
                    String responseBody = httpMethod.getStatusText(); //httpMethod.getResponseBodyAsString();
                    xmlConsumer.characters(responseBody.toCharArray(), 0, responseBody.length());
                    xmlConsumer.endElement(SPARQL_NAMESPACE_URI, "error", "sparql:error");
                    return; // Not a nice, but quick and dirty way to end.
                } else {
                    throw new ProcessingException("Received HTTP status code " + responseCode + " "
                            + httpMethod.getStatusText() + ":\n" + httpMethod.getResponseBodyAsString());
                }
            }
            // Parse the response
            if (responseCode == 204) { // No content.
                String statusLine = httpMethod.getStatusLine().toString();
                xmlConsumer.startElement(SPARQL_NAMESPACE_URI, "result", "sparql:result", EMPTY_ATTRIBUTES);
                xmlConsumer.characters(statusLine.toCharArray(), 0, statusLine.length());
                xmlConsumer.endElement(SPARQL_NAMESPACE_URI, "result", "sparql:result");
            } else if (parse.equalsIgnoreCase("xml")) {
                InputStream responseBodyStream = httpMethod.getResponseBodyAsStream();
                xmlizer = (XMLizer) manager.lookup(XMLizer.ROLE);
                xmlizer.toSAX(responseBodyStream, "text/xml", httpMethod.getURI().toString(),
                        new IncludeXMLConsumer(xmlConsumer));
                responseBodyStream.close();
            } else if (parse.equalsIgnoreCase("text")) {
                xmlConsumer.startElement(SPARQL_NAMESPACE_URI, "result", "sparql:result", EMPTY_ATTRIBUTES);
                String responseBody = httpMethod.getResponseBodyAsString();
                xmlConsumer.characters(responseBody.toCharArray(), 0, responseBody.length());
                xmlConsumer.endElement(SPARQL_NAMESPACE_URI, "result", "sparql:result");
            } else {
                throw new ProcessingException("Unknown parse type: " + parse);
            }
        } catch (ServiceException e) {
            throw new ProcessingException("Cannot find the right XMLizer for " + XMLizer.ROLE, e);
        } finally {
            if (xmlizer != null)
                manager.release((Component) xmlizer);
            httpMethod.releaseConnection();
        }
    }

}