eu.larkc.iris.Main.java Source code

Java tutorial

Introduction

Here is the source code for eu.larkc.iris.Main.java

Source

/*
 * Copyright 2010 Softgress - http://www.softgress.com/
 * 
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *      http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package eu.larkc.iris;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.util.GenericOptionsParser;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.deri.iris.EvaluationException;
import org.deri.iris.api.basics.IQuery;
import org.deri.iris.api.basics.IRule;
import org.deri.iris.api.terms.IVariable;
import org.deri.iris.compiler.Parser;
import org.deri.iris.compiler.ParserException;
import org.deri.iris.evaluation.IEvaluationStrategy;
import org.deri.iris.storage.IRelation;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import at.sti2.rif4j.parser.xml.XmlParser;
import at.sti2.rif4j.rule.Document;
import at.sti2.rif4j.translator.iris.RifToIrisTranslator;
import cascading.flow.FlowConnector;
import cascading.flow.MultiMapReducePlanner;
import cascading.operation.DebugLevel;
import cascading.tap.Hfs;
import cascading.tap.Tap;
import cascading.tuple.Fields;
import cascading.tuple.TupleEntry;
import cascading.tuple.TupleEntryIterator;
import eu.larkc.iris.evaluation.bottomup.DistributedBottomUpEvaluationStrategyFactory;
import eu.larkc.iris.evaluation.bottomup.naive.DistributedEvaluatorFactory;
import eu.larkc.iris.exports.Exporter;
import eu.larkc.iris.imports.Importer;
import eu.larkc.iris.indexing.DistributedFileSystemManager;
import eu.larkc.iris.indexing.PredicateData;

/**
 * @author valer
 *
 */
@SuppressWarnings("deprecation")
public class Main extends Configured implements Tool {

    private static final Logger logger = LoggerFactory.getLogger(Main.class);

    public enum RULES_TYPE {
        DATALOG, RIF
    }

    private String project;

    private String importName = null;

    private boolean rdfImporter = false;
    private boolean ntripleImporter = false;
    private boolean tester = false;
    private boolean processer = false;
    private boolean rdfExporter = false;
    private boolean ntripleExporter = false;
    private boolean viewConfig = false;

    //rdf importer args
    private String storageId = null;

    //ntriple importer
    private String inPath = null;

    //ntriple exporter
    private String outPath = null;

    //test args
    private String sourcePath = null;

    private RULES_TYPE rulesType;
    private String rulesFile;
    private boolean keepResults = false;
    private String resultsName;

    private eu.larkc.iris.Configuration defaultConfiguration;
    //transient private static Map<Object, Object> properties = new HashMap<Object, Object>();

    protected List<IRule> rules;

    private void printUsage() {
        System.out.println("<project_name> <-importRdf storage_id import_name | "
                + "-importNTriple path_to_file import_name | "
                + "-process rules_type:<DATALOG|RIF> rules_file_path keep_results:<true:false> results_name | "
                + "-exportRdf storage_id results_name | " + "-exportNTriple path_to_export_file results_name> ");
    }

    private void processUserArguments(String[] args) {
        if (args.length == 0) {
            printUsage();
            return;
        }

        project = args[0];

        String operation = args[1];
        if (operation.equalsIgnoreCase("-importRdf")) {
            rdfImporter = true;
            storageId = args[2];
            importName = args[3];
        } else if (operation.equalsIgnoreCase("-importNTriple")) {
            ntripleImporter = true;
            inPath = args[2];
            importName = args[3];
        } else if (operation.equalsIgnoreCase("-process")) {
            processer = true;
            rulesType = RULES_TYPE.valueOf(args[2].toUpperCase());
            rulesFile = args[3];
            resultsName = args[4];
        } else if (operation.equalsIgnoreCase("-exportRdf")) {
            rdfExporter = true;
            storageId = args[2];
            resultsName = args[3];
        } else if (operation.equalsIgnoreCase("-exportNTriple")) {
            ntripleExporter = true;
            outPath = args[2];
            resultsName = args[3];
        } else if (operation.equals("-test")) {
            tester = true;
            sourcePath = args[2];
        } else if (operation.equals("-viewConfig")) {
            viewConfig = true;
        }
    }

    private JobConf setupJob(Configuration conf) {
        JobConf jobConf = new JobConf(conf, Main.class);

        // run the job here.

        /* REAL CLUSTER */
        jobConf.set("dfs.blocksize", "536870912");
        jobConf.set("dfs.namenode.handler.count", "40");
        //jobConf.set("dfs.replication", "1");
        jobConf.set("mapreduce.reduce.shuffle.parallelcopies", "10");
        jobConf.set("mapreduce.task.io.sort.factor", "100");
        jobConf.set("mapreduce.task.io.sort.mb", "1024");
        jobConf.set("io.file.buffer.size", "131072");
        jobConf.set("mapred.child.java.opts", "-Xmx2560m");
        jobConf.set("mapred.child.ulimit", "4194304");
        jobConf.set("mapred.min.split.size", "536870912");
        jobConf.set("mapreduce.input.fileinputformat.split.minsize", "536870912");
        jobConf.set("mapreduce.reduce.merge.inmem.threshold", "0");
        /**/

        /* compression settings 
        jobConf.set("mapreduce.map.output.compress", "false");
        jobConf.set("mapreduce.output.fileoutputformat.compress", "true");
        jobConf.set("mapreduce.output.fileoutputformat.compression.type", "BLOCK");
         ~~~ */

        //!!!IMPORTANT, if not : Caused by: java.io.FileNotFoundException: File does not exist: hdfs://ec2-50-19-191-200.compute-1.amazonaws.com:8020/user/root/lubm/facts/lubm50/data
        jobConf.setBoolean("mapred.input.dir.recursive", true);

        jobConf.set("cascading.serialization.tokens",
                "130=eu.larkc.iris.storage.IRIWritable,131=eu.larkc.iris.storage.StringTermWritable");
        defaultConfiguration.flowProperties.put("cascading.serialization.tokens",
                "130=eu.larkc.iris.storage.IRIWritable,131=eu.larkc.iris.storage.StringTermWritable");

        /*
         if( System.getProperty("log4j.logger") != null )
            defaultConfiguration.flowProperties.put( "log4j.logger", System.getProperty("log4j.logger") );
        */

        //jobConf.set("mapred.min.split.size", "134217728");
        //jobConf.set("mapred.child.java.opts", "-Xms64m -Xmx512m");
        jobConf.setMapSpeculativeExecution(false);
        jobConf.setReduceSpeculativeExecution(false);

        //FIXME
        //jobConf.setNumMapTasks(8);
        jobConf.setNumReduceTasks(32);

        FlowConnector.setDebugLevel(defaultConfiguration.flowProperties, DebugLevel.VERBOSE);
        MultiMapReducePlanner.setJobConf(defaultConfiguration.flowProperties, jobConf);

        //Flow.setJobPollingInterval(defaultConfiguration.flowProperties, 500);

        return jobConf;
    }

    public int doRdfImport(eu.larkc.iris.Configuration configuration) {
        new Importer(configuration).importFromRdf(storageId, importName);
        return 0;
    }

    public int doNTripleImport(eu.larkc.iris.Configuration configuration) {
        new Importer(configuration).importFromFile(inPath, importName);
        return 0;
    }

    public int doRdfExport(eu.larkc.iris.Configuration configuration) {
        new Exporter(configuration).exportToRdf(storageId, resultsName);
        return 0;
    }

    public int doNTripleExport(eu.larkc.iris.Configuration configuration) {
        new Exporter(configuration).exportToFile(outPath, resultsName);
        return 0;
    }

    public int doTester(eu.larkc.iris.Configuration configuration) {
        logger.info("do tester ...");

        //JobConf jobConf = new JobConf();
        //jobConf.set("cascading.serialization.tokens", "130=eu.larkc.iris.storage.IRIWritable,131=eu.larkc.iris.storage.PredicateWritable,132=eu.larkc.iris.storage.StringTermWritable");
        //jobConf.setBoolean("mapred.input.dir.recursive", true);

        Tap source = new Hfs(Fields.ALL, project + "/" + sourcePath);
        TupleEntryIterator tei = null;
        try {
            tei = source.openForRead(configuration.jobConf);
        } catch (IOException e) {
            logger.error("io exception", e);
            return -1;
        }

        int i = 0;
        while (tei.hasNext() && i < 10) {
            TupleEntry te = tei.next();
            logger.info(te.toString());
            i++;
        }

        return 0;
    }

    public int doViewConfig(eu.larkc.iris.Configuration configuration) {
        DistributedFileSystemManager distributedFileSystemManager = new DistributedFileSystemManager(configuration);
        List<PredicateData> predicatesData = distributedFileSystemManager.getPredicateData();
        for (PredicateData predicateData : predicatesData) {
            logger.info(predicateData.toString());
        }
        return 0;
    }

    public int doProcess() {
        defaultConfiguration.keepResults = keepResults;
        defaultConfiguration.resultsName = resultsName;
        try {
            evaluate(null, new ArrayList<IVariable>(), defaultConfiguration);
        } catch (EvaluationException e) {
            logger.error("evaluation exception", e);
            return -1;
        }

        return 0;
    }

    @Override
    public int run(String[] args) throws Exception {
        GenericOptionsParser gop = new GenericOptionsParser(getConf(), new org.apache.commons.cli.Options(), args);

        processUserArguments(gop.getRemainingArgs());

        rules = createRules();

        Configuration hadoopConf = gop.getConfiguration();
        defaultConfiguration.hadoopConfiguration = hadoopConf;
        defaultConfiguration.jobConf = setupJob(hadoopConf);
        defaultConfiguration.project = project;

        logger.info("predicate indexing is " + (defaultConfiguration.doPredicateIndexing ? "ON" : "OFF"));

        if (rdfImporter) {
            return doRdfImport(defaultConfiguration);
        } else if (ntripleImporter) {
            return doNTripleImport(defaultConfiguration);
        } else if (tester) {
            return doTester(defaultConfiguration);
        } else if (processer) {
            return doProcess();
        } else if (rdfExporter) {
            return doRdfExport(defaultConfiguration);
        } else if (ntripleExporter) {
            return doNTripleExport(defaultConfiguration);
        } else if (viewConfig) {
            return doViewConfig(defaultConfiguration);
        }

        return -1;
    }

    protected List<IRule> createRules() {
        List<IRule> rules = null;
        if (rulesType == RULES_TYPE.DATALOG) {
            Collection<String> expressions = new ArrayList<String>();
            try {
                BufferedReader br = new BufferedReader(new FileReader(new File(rulesFile)));
                String line = null;
                while ((line = br.readLine()) != null) {
                    logger.info(line);
                    expressions.add(line);
                }
            } catch (FileNotFoundException e) {
                logger.error("the rules files can not be located", e);
                throw new RuntimeException("the rules files can not be located", e);
            } catch (IOException e) {
                logger.error("io exception reading the rules files", e);
                throw new RuntimeException("io exception reading the rules files", e);
            }
            //expressions.add("subClassOf( ?X, ?Z ) :- subClassOf( ?X, ?Y ), subClassOf( ?Y, ?Z ).");
            //expressions.add("type( ?X, ?Z ) :- type( ?X, ?Y ), subClassOf( ?Y, ?Z ).");

            Parser parser = new Parser();
            StringBuffer buffer = new StringBuffer();

            for (String expression : expressions) {
                buffer.append(expression);
            }

            try {
                parser.parse(buffer.toString());
            } catch (ParserException e) {
                logger.error("rules parser exception", e);
                throw new RuntimeException("rules parser exception", e);
            }

            rules = parser.getRules();
        }

        if (rulesType == RULES_TYPE.RIF) {
            XmlParser parser = new XmlParser(true);
            Document rifDocument;
            try {
                rifDocument = parser.parseDocument(new FileReader(new File(rulesFile)));
            } catch (Exception e) {
                logger.error("exception reading/parsing rules RIF file!", e);
                throw new RuntimeException("exception reading/parsing rules RIF file!", e);
            }

            RifToIrisTranslator translator = new RifToIrisTranslator();
            translator.translate(rifDocument);
            rules = translator.getRules();
        }

        return rules;
    }

    public Main() {
        logger.info("start iris distributed reasoner ...");

        org.apache.hadoop.conf.Configuration configuration = new org.apache.hadoop.conf.Configuration();
        setConf(configuration);

        defaultConfiguration = new eu.larkc.iris.Configuration();
        defaultConfiguration.evaluationStrategyFactory = new DistributedBottomUpEvaluationStrategyFactory(
                new DistributedEvaluatorFactory());
    }

    public static void main(String[] args) throws Exception {
        int ret = ToolRunner.run(new Main(), args); // calls your run() method. 
        System.exit(ret);
    }

    private void evaluate(IQuery query, List<IVariable> outputVariables, eu.larkc.iris.Configuration configuration)
            throws EvaluationException {
        //IRelation relation = evaluate(FactsFactory.getInstance("default"), "?- p(?X, ?Y).");

        IEvaluationStrategy strategy = configuration.evaluationStrategyFactory.createEvaluator(rules,
                configuration);

        @SuppressWarnings("unused")
        IRelation relation = strategy.evaluateQuery(query, outputVariables);
    }

}