com.clarkparsia.empire.codegen.BeanGenerator.java Source code

Introduction

Here is the source code for com.clarkparsia.empire.codegen.BeanGenerator.java
Source

/*
 * Copyright (c) 2009-2012 Clark & Parsia, LLC. <http://www.clarkparsia.com>
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.clarkparsia.empire.codegen;

import com.complexible.common.collect.Iterables2;
import com.complexible.common.collect.Iterators2;
import com.complexible.common.openrdf.model.Statements;
import com.complexible.common.openrdf.repository.Repositories;
import com.google.common.collect.Iterables;
import org.openrdf.model.Resource;
import org.openrdf.model.URI;
import org.openrdf.model.Statement;
import org.openrdf.model.Value;
import org.openrdf.model.Literal;
import org.openrdf.model.BNode;
import org.openrdf.model.vocabulary.OWL;
import org.openrdf.model.vocabulary.XMLSchema;
import org.openrdf.model.vocabulary.RDFS;
import org.openrdf.model.vocabulary.RDF;

import org.openrdf.model.impl.ValueFactoryImpl;

import org.openrdf.query.QueryLanguage;
import org.openrdf.repository.Repository;
import org.openrdf.rio.RDFFormat;

import org.openrdf.query.BindingSet;
import org.openrdf.query.TupleQueryResult;

import com.complexible.common.openrdf.util.AdunaIterations;

import com.complexible.common.collect.MultiIterator;

import com.complexible.common.net.NetUtils;
import com.complexible.common.base.Functions2;
import com.google.common.base.Predicate;
import com.google.common.base.Charsets;
import com.google.common.base.Function;
import com.google.common.base.Functions;
import com.google.common.collect.Collections2;
import com.google.common.collect.Sets;
import com.google.common.collect.Iterators;
import com.google.common.io.Files;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.Collection;
import java.util.Map;
import java.util.List;
import java.util.Arrays;
import java.util.HashSet;
import java.util.HashMap;

import java.io.File;
import java.io.IOException;

import java.net.URL;

/**
 * <p>Generate a set of Java beans which are compatible with Empire from a given RDF schema, OWL ontology, or blob
 * of RDF data.  The generated source code will map to the domain represented in the RDF.</p>
 *
 * @author   Michael Grove
 * @since   0.5.1
 * @version   0.7.3
 */
public final class BeanGenerator {
    /**
     * The logger
     */
    private static final Logger LOGGER = LoggerFactory.getLogger(BeanGenerator.class);

    /**
     * String URI constant for the owl:Thing conccept
     */
    private static final URI OWL_THING = ValueFactoryImpl.getInstance().createURI(OWL.NAMESPACE + "Thing");

    /**
     * The list of xsd datatypes which map to Integer
     */
    private static final List<URI> integerTypes = Arrays.asList(XMLSchema.INT, XMLSchema.INTEGER,
            XMLSchema.POSITIVE_INTEGER, XMLSchema.NEGATIVE_INTEGER, XMLSchema.NON_NEGATIVE_INTEGER,
            XMLSchema.NON_POSITIVE_INTEGER, XMLSchema.UNSIGNED_INT);

    /**
     * The list of xsd datatypes which map to Long
     */
    private static final List<URI> longTypes = Arrays.asList(XMLSchema.LONG, XMLSchema.UNSIGNED_LONG);

    /**
     * The list of xsd datatypes which map to Float
     */
    private static final List<URI> floatTypes = Arrays.asList(XMLSchema.FLOAT, XMLSchema.DECIMAL);

    /**
     * The list of xsd datatypes which map to Short
     */
    private static final List<URI> shortTypes = Arrays.asList(XMLSchema.SHORT, XMLSchema.UNSIGNED_SHORT);

    /**
     * The list of xsd datatypes which map to Byte
     */
    private static final List<URI> byteTypes = Arrays.asList(XMLSchema.BYTE, XMLSchema.UNSIGNED_BYTE);

    private static final Map<Resource, String> NAMES = new HashMap<Resource, String>();
    private static final Map<String, Integer> NAMES_TO_COUNT = new HashMap<String, Integer>();

    /**
     * NO instances
     */
    private BeanGenerator() {
    }

    /**
     * Return the Java bean source code that represents the given RDF class
     * @param thePackageName the name of the package the source will be in
     * @param theGraph the repository containing information about the class
     * @param theClass the class that is to be turned into Java source
     * @param theMap the map of classes to the properties in their domain
     * @return a string of the source code of the equivalent Java bean
     * @throws Exception if there is an error while converting
     */
    private static String toSource(final String thePackageName, final Repository theGraph, final Resource theClass,
            final Map<Resource, Collection<URI>> theMap) throws Exception {
        StringBuffer aSrc = new StringBuffer();

        aSrc.append("package ").append(thePackageName).append(";\n\n");

        aSrc.append("import java.util.*;\n");
        aSrc.append("import javax.persistence.Entity;\n");
        aSrc.append("import com.clarkparsia.empire.SupportsRdfId;\n");
        aSrc.append("import com.clarkparsia.empire.annotation.*;\n\n");

        // TODO: more imports? less?

        Iterable<Resource> aSupers = Iterables2.present(Iterables.transform(
                AdunaIterations.iterable(Repositories.getStatements(theGraph, theClass, RDFS.SUBCLASSOF, null)),
                Statements.objectAsResource()));

        aSrc.append("@Entity\n");
        aSrc.append("@RdfsClass(\"").append(theClass).append("\")\n");
        aSrc.append("public interface ").append(className(theClass));

        aSupers = Collections2.filter(Sets.newHashSet(aSupers), new Predicate<Resource>() {
            public boolean apply(final Resource theValue) {
                return theValue != null && !theValue.toString().startsWith(OWL.NAMESPACE)
                        && !theValue.toString().startsWith(RDFS.NAMESPACE)
                        && !theValue.toString().startsWith(RDF.NAMESPACE);
            }
        });

        boolean aNeedsComma = false;
        aSrc.append(" extends");

        if (aSupers.iterator().hasNext()) {
            for (Resource aSuper : aSupers) {
                if (aNeedsComma) {
                    aSrc.append(",");
                } else {
                    aNeedsComma = true;
                }

                aSrc.append(" ").append(className(aSuper));
            }
        }

        if (aNeedsComma) {
            aSrc.append(",");
        }

        aSrc.append(" SupportsRdfId");

        aSrc.append(" { \n\n");

        Collection<URI> aProps = props(theClass, theMap);

        for (URI aProp : aProps) {
            aSrc.append("@RdfProperty(\"").append(aProp).append("\")\n");
            aSrc.append("public ").append(functionType(theGraph, aProp)).append(" get").append(functionName(aProp))
                    .append("();\n");
            aSrc.append("public void set").append(functionName(aProp)).append("(")
                    .append(functionType(theGraph, aProp)).append(" theValue);\n\n");
        }

        aSrc.append("}");

        return aSrc.toString();
    }

    /**
     * Return the type of the function (getter & setter), i.e. the bean property type, for the given rdf:Property
     * @param theRepo the graph of the ontology/data
     * @param theProp the property
     * @return the String representation of the property type
     * @throws Exception if there is an error querying the data
     */
    private static String functionType(final Repository theRepo, final URI theProp) throws Exception {
        String aType;

        Resource aRangeRes = Statements
                .objectAsResource().apply(AdunaIterations
                        .singleResult(Repositories.getStatements(theRepo, theProp, RDFS.RANGE, null)).orNull())
                .orNull();

        if (aRangeRes instanceof BNode) {
            // we can't handle bnodes very well, so we're just going to assume Object
            return "Object";
        }

        URI aRange = (URI) aRangeRes;

        if (aRange == null) {
            // no explicit range, try to infer it...
            try {
                TupleQueryResult aResults = Repositories.selectQuery(theRepo, QueryLanguage.SERQL,
                        "select distinct r from {s} <" + theProp + "> {o}, {o} rdf:type {r}");

                if (aResults.hasNext()) {
                    URI aTempRange = (URI) aResults.next().getValue("r");
                    if (!aResults.hasNext()) {
                        aRange = aTempRange;
                    } else {
                        // TODO: leave range as null, the property is used for things of multiple different values.  so here
                        // we should try and find the superclass of all the values and use that as the range.
                    }
                }

                aResults.close();

                if (aRange == null) {
                    // could not get it from type usage, so maybe its a literal and we can guess it from datatype

                    aResults = Repositories.selectQuery(theRepo, QueryLanguage.SERQL,
                            "select distinct datatype(o) as dt from {s} <" + theProp + "> {o} where isLiteral(o)");

                    if (aResults.hasNext()) {
                        URI aTempRange = null;
                        while (aTempRange == null && aResults.hasNext()) {
                            Literal aLit = (Literal) aResults.next().getValue("o");
                            if (aLit != null) {
                                aTempRange = aLit.getDatatype();
                            }
                        }

                        if (!aResults.hasNext()) {
                            aRange = aTempRange;
                        } else {
                            // TODO: do something here, literals of multiple types used
                        }
                    }

                    aResults.close();
                }
            } catch (Exception e) {
                // don't worry about it
                e.printStackTrace();
            }
        }

        if (XMLSchema.STRING.equals(aRange) || RDFS.LITERAL.equals(aRange)) {
            aType = "String";
        } else if (XMLSchema.BOOLEAN.equals(aRange)) {
            aType = "Boolean";
        } else if (integerTypes.contains(aRange)) {
            aType = "Integer";
        } else if (longTypes.contains(aRange)) {
            aType = "Long";
        } else if (XMLSchema.DOUBLE.equals(aRange)) {
            aType = "Double";
        } else if (floatTypes.contains(aRange)) {
            aType = "Float";
        } else if (shortTypes.contains(aRange)) {
            aType = "Short";
        } else if (byteTypes.contains(aRange)) {
            aType = "Byte";
        } else if (XMLSchema.ANYURI.equals(aRange)) {
            aType = "java.net.URI";
        } else if (XMLSchema.DATE.equals(aRange) || XMLSchema.DATETIME.equals(aRange)) {
            aType = "Date";
        } else if (XMLSchema.TIME.equals(aRange)) {
            aType = "Date";
        } else if (aRange == null || aRange.equals(OWL_THING)) {
            aType = "Object";
        } else {
            aType = className(aRange);
        }

        if (isCollection(theRepo, theProp)) {
            aType = "Collection<? extends " + aType + ">";
        }

        return aType;
    }

    /**
     * Determine whether or not the property's range is a collection.  This will inspect both the ontology, for cardinality
     * restrictions, and when that is not available, it will use the actual structure of the data.
     * @param theRepo the graph of the ontology/data
     * @param theProp the property
     * @return true if the property has a collection as it's value, false if it's just a single valued property
     * @throws Exception if there is an error querying the data
     */
    private static boolean isCollection(final Repository theRepo, final URI theProp) throws Exception {
        // TODO: this is not fool proof.

        String aCardQuery = "select distinct ?card where {\n" + "?s rdf:type owl:Restriction.\n"
                + "?s owl:onProperty <" + theProp + ">.\n" + "?s ?cardProp ?card.\n"
                + "FILTER (?cardProp = owl:cardinality || ?cardProp = owl:minCardinality || ?cardProp = owl:maxCardinality)\n"
                + "}";
        TupleQueryResult aResults = Repositories.selectQuery(theRepo, QueryLanguage.SPARQL, aCardQuery);
        if (aResults.hasNext()) {
            Literal aCard = (Literal) aResults.next().getValue("card");

            try {
                return Integer.parseInt(aCard.getLabel()) > 1;
            } catch (NumberFormatException e) {
                LOGGER.error("Unparseable cardinality value for '" + theProp + "' of '" + aCard + "'", e);
            }
        }

        aResults.close();

        try {
            aResults = Repositories.selectQuery(theRepo, QueryLanguage.SPARQL,
                    "select distinct ?s where  { ?s <" + theProp + "> ?o}");
            for (BindingSet aBinding : AdunaIterations.iterable(aResults)) {

                Collection aCollection = Sets
                        .newHashSet(Iterators2.present(Iterators.transform(
                                AdunaIterations.iterator(Repositories.getStatements(theRepo,
                                        (Resource) aBinding.getValue("s"), theProp, null)),
                                Statements.objectOptional())));
                if (aCollection.size() > 1) {
                    return true;
                }
            }

            return false;
        } finally {
            aResults.close();
        }
    }

    /**
     * Return the name of the function (the bean property) for this rdf:Property
     * @param theProp the rdf:Property
     * @return the name of the Java property/function name
     */
    private static String functionName(final URI theProp) {
        return className(theProp);
    }

    /**
     * Return all the properties for the given resource.  This will return only the properties which are directly
     * associated with the class, not any properties from its parent, or otherwise inferred from the data.
     * @param theRes the resource
     * @param theMap the map of resources to properties
     * @return a collection of the proeprties associated with the class
     */
    private static Collection<URI> props(final Resource theRes, final Map<Resource, Collection<URI>> theMap) {
        Collection<URI> aProps = new HashSet<URI>();

        if (theMap.containsKey(theRes)) {
            aProps.addAll(theMap.get(theRes));
        }

        return aProps;
    }

    /**
     * Given a Resource, return the Java class name for that resource
     * @param theClass the resource
     * @return the name of the Java class
     */
    private static String className(Resource theClass) {
        if (NAMES.containsKey(theClass)) {
            return NAMES.get(theClass);
        }

        String aLabel;

        if (theClass instanceof URI) {
            aLabel = ((URI) theClass).getLocalName();
        } else {
            aLabel = theClass.stringValue();
        }

        aLabel = String.valueOf(aLabel.charAt(0)).toUpperCase() + aLabel.substring(1);

        aLabel = aLabel.replaceAll(" ", "");

        if (NAMES_TO_COUNT.containsKey(aLabel)) {
            String aNewLabel = aLabel + NAMES_TO_COUNT.get(aLabel);

            NAMES_TO_COUNT.put(aLabel, NAMES_TO_COUNT.get(aLabel) + 1);

            aLabel = aNewLabel;
        } else {
            NAMES_TO_COUNT.put(aLabel, 0);
        }

        NAMES.put(theClass, aLabel);

        return aLabel;
    }

    /**
     * Given an ontology/schema, generate Empire compatible Java beans for each class in the ontology.
     * @param thePackageName the name of the packages the source should belong to
     * @param theOntology the location of the ontology to load
     * @param theFormat the RDF format the ontology is in
     * @param theDirToSave where to save the generated source code
     * @throws Exception if there is an error while generating the source
     */
    public static void generateSourceFiles(String thePackageName, URL theOntology, RDFFormat theFormat,
            File theDirToSave) throws Exception {
        NAMES_TO_COUNT.clear();

        Repository aRepository = Repositories.createInMemoryRepo();

        Repositories.add(aRepository, theOntology.openStream(), theFormat);

        Collection<Resource> aClasses = Sets.newHashSet(Iterators.transform(new MultiIterator<Statement>(
                AdunaIterations.iterator(Repositories.getStatements(aRepository, null, RDF.TYPE, RDFS.CLASS)),
                AdunaIterations.iterator(Repositories.getStatements(aRepository, null, RDF.TYPE, OWL.CLASS))),
                new StatementToSubject()));

        aClasses = Collections2.filter(aClasses, new Predicate<Resource>() {
            public boolean apply(Resource theRes) {
                return theRes instanceof URI;
            }
        });

        Collection<Resource> aIndClasses = Sets.newHashSet(Iterators.transform(
                AdunaIterations.iterator(Repositories.getStatements(aRepository, null, RDF.TYPE, null)),
                Functions.compose(Functions2.<Value, Resource>cast(Resource.class), new StatementToObject())));

        aClasses.addAll(aIndClasses);

        aClasses = Collections2.filter(aClasses, new Predicate<Resource>() {
            public boolean apply(final Resource theValue) {
                return !theValue.stringValue().startsWith(RDFS.NAMESPACE)
                        && !theValue.stringValue().startsWith(RDF.NAMESPACE)
                        && !theValue.stringValue().startsWith(OWL.NAMESPACE);
            }
        });

        Map<Resource, Collection<URI>> aMap = new HashMap<Resource, Collection<URI>>();

        for (Resource aClass : aClasses) {
            if (aClass instanceof BNode) {
                continue;
            }
            Collection<URI> aProps = Sets.newHashSet(Iterators.transform(
                    AdunaIterations.iterator(Repositories.getStatements(aRepository, null, RDFS.DOMAIN, aClass)),
                    Functions.compose(Functions2.<Resource, URI>cast(URI.class), new StatementToSubject())));

            // infer properties based on usage in actual instance data
            for (BindingSet aBinding : AdunaIterations.iterable(Repositories.selectQuery(aRepository,
                    QueryLanguage.SPARQL, "select distinct ?p where { ?s rdf:type <" + aClass + ">. ?s ?p ?o }"))) {
                aProps.add((URI) aBinding.getValue("p"));
            }

            // don't include rdf:type as a property
            aProps = Collections2.filter(aProps, new Predicate<URI>() {
                public boolean apply(final URI theValue) {
                    return !RDF.TYPE.equals(theValue);
                }
            });

            aMap.put(aClass, aProps);
        }

        if (!theDirToSave.exists()) {
            if (!theDirToSave.mkdirs()) {
                throw new IOException("Could not create output directory");
            }
        }

        for (Resource aClass : aMap.keySet()) {
            String aSrc = toSource(thePackageName, aRepository, aClass, aMap);

            if (aSrc == null) {
                continue;
            }

            File aFile = new File(theDirToSave, className(aClass) + ".java");

            System.out.println("Writing source to file: " + aFile.getName());

            Files.write(aSrc, aFile, Charsets.UTF_8);
        }
    }

    public static void main(String[] args) throws Exception {
        //aGraph.read(new URL("http://xmlns.com/foaf/spec/index.rdf").openStream());
        //      File aOut = new File("/Users/mhgrove/work/GitHub/empire/core/src/com/clarkparsia/empire/codegen/test/");
        //
        //      generateSourceFiles("com.clarkparsia.empire.codegen.test", new File("test/data/nasa.nt").toURI().toURL(), RDFFormat.NTRIPLES, aOut);

        if (args.length < 4) {
            System.err.println(
                    "Must provide four arguments to the program, the package name, ontology URL, rdf format of the ontology (rdf/xml|turtle|ntriples), and the output directory for the source code.\n");
            System.err.println("For example:\n");
            System.err.println(
                    "\tBeanGenerator my.package.domain /usr/local/files/myontology.ttl turtle /usr/local/code/src/my/package/domain");

            return;
        }

        URL aURL;

        if (NetUtils.isURL(args[1])) {
            aURL = new URL(args[1]);
        } else {
            aURL = new File(args[1]).toURI().toURL();
        }

        generateSourceFiles(args[0], aURL, RDFFormat.valueOf(args[2]), new File(args[3]));
    }

    private static class StatementToObject implements Function<Statement, Value> {
        public Value apply(final Statement theIn) {
            return theIn.getObject();
        }
    }

    private static class StatementToSubject implements Function<Statement, Resource> {
        public Resource apply(final Statement theIn) {
            return theIn.getSubject();
        }
    }
}