Example usage for org.apache.commons.cli Option UNLIMITED_VALUES

List of usage examples for org.apache.commons.cli Option UNLIMITED_VALUES

Introduction

In this page you can find the example usage for org.apache.commons.cli Option UNLIMITED_VALUES.

Prototype

int UNLIMITED_VALUES

To view the source code for org.apache.commons.cli Option UNLIMITED_VALUES.

Click Source Link

Document

constant that specifies the number of argument values is infinite

Usage

From source file:fr.tpt.s3.mcdag.scheduling.Main.java

public static void main(String[] args) throws IOException, InterruptedException {

    /* Command line options */
    Options options = new Options();

    Option input = new Option("i", "input", true, "MC-DAG XML Models");
    input.setRequired(true);//from  w w w .ja v  a2 s.  com
    input.setArgs(Option.UNLIMITED_VALUES); // Sets maximum number of threads to be launched
    options.addOption(input);

    Option outSched = new Option("os", "out-scheduler", false, "Write the scheduling tables into a file.");
    outSched.setRequired(false);
    options.addOption(outSched);

    Option outPrism = new Option("op", "out-prism", false, "Write PRISM model into a file.");
    outPrism.setRequired(false);
    options.addOption(outPrism);

    Option jobs = new Option("j", "jobs", true, "Number of threads to be launched.");
    jobs.setRequired(false);
    options.addOption(jobs);

    Option debugOpt = new Option("d", "debug", false, "Enabling debug.");
    debugOpt.setRequired(false);
    options.addOption(debugOpt);

    Option preemptOpt = new Option("p", "preempt", false, "Count for preemptions.");
    preemptOpt.setRequired(false);
    options.addOption(preemptOpt);

    CommandLineParser parser = new DefaultParser();
    HelpFormatter formatter = new HelpFormatter();
    CommandLine cmd;

    try {
        cmd = parser.parse(options, args);
    } catch (ParseException e) {
        System.err.println(e.getMessage());
        formatter.printHelp("MC-DAG framework", options);

        System.exit(1);
        return;
    }

    String inputFilePath[] = cmd.getOptionValues("input");
    boolean bOutSched = cmd.hasOption("out-scheduler");
    boolean bOutPrism = cmd.hasOption("out-prism");
    boolean debug = cmd.hasOption("debug");
    boolean preempt = cmd.hasOption("preempt");
    boolean levels = cmd.hasOption("n-levels");
    int nbFiles = inputFilePath.length;

    int nbJobs = 1;
    if (cmd.hasOption("jobs"))
        nbJobs = Integer.parseInt(cmd.getOptionValue("jobs"));

    if (debug)
        System.out.println("[DEBUG] Launching " + inputFilePath.length + " thread(s).");

    int i_files = 0;
    ExecutorService executor = Executors.newFixedThreadPool(nbJobs);

    /* Launch threads to solve allocation */
    while (i_files != nbFiles) {
        SchedulingThread ft = new SchedulingThread(inputFilePath[i_files], bOutSched, bOutPrism, debug,
                preempt);

        ft.setLevels(levels);
        executor.execute(ft);
        i_files++;
    }

    executor.shutdown();
    executor.awaitTermination(Long.MAX_VALUE, TimeUnit.NANOSECONDS);
    System.out.println("[FRAMEWORK Main] DONE");
}

From source file:fr.tpt.s3.mcdag.bench.MainBench.java

public static void main(String[] args) throws IOException, InterruptedException {

    // Command line options
    Options options = new Options();

    Option input = new Option("i", "input", true, "MC-DAG XML models");
    input.setRequired(true);//from ww  w.j  av  a  2 s.  co  m
    input.setArgs(Option.UNLIMITED_VALUES);
    options.addOption(input);

    Option output = new Option("o", "output", true, "Folder where results have to be written.");
    output.setRequired(true);
    options.addOption(output);

    Option uUti = new Option("u", "utilization", true, "Utilization.");
    uUti.setRequired(true);
    options.addOption(uUti);

    Option output2 = new Option("ot", "output-total", true, "File where total results are being written");
    output2.setRequired(true);
    options.addOption(output2);

    Option oCores = new Option("c", "cores", true, "Cores given to the test");
    oCores.setRequired(true);
    options.addOption(oCores);

    Option oLvls = new Option("l", "levels", true, "Levels tested for the system");
    oLvls.setRequired(true);
    options.addOption(oLvls);

    Option jobs = new Option("j", "jobs", true, "Number of threads to be launched.");
    jobs.setRequired(false);
    options.addOption(jobs);

    Option debug = new Option("d", "debug", false, "Debug logs.");
    debug.setRequired(false);
    options.addOption(debug);

    /*
     * Parsing of the command line
     */
    CommandLineParser parser = new DefaultParser();
    HelpFormatter formatter = new HelpFormatter();
    CommandLine cmd;

    try {
        cmd = parser.parse(options, args);
    } catch (ParseException e) {
        System.err.println(e.getMessage());
        formatter.printHelp("Benchmarks MultiDAG", options);
        System.exit(1);
        return;
    }

    String inputFilePath[] = cmd.getOptionValues("input");
    String outputFilePath = cmd.getOptionValue("output");
    String outputFilePathTotal = cmd.getOptionValue("output-total");
    double utilization = Double.parseDouble(cmd.getOptionValue("utilization"));
    boolean boolDebug = cmd.hasOption("debug");
    int nbLvls = Integer.parseInt(cmd.getOptionValue("levels"));
    int nbJobs = 1;
    int nbFiles = inputFilePath.length;

    if (cmd.hasOption("jobs"))
        nbJobs = Integer.parseInt(cmd.getOptionValue("jobs"));

    int nbCores = Integer.parseInt(cmd.getOptionValue("cores"));

    /*
     *  While files need to be allocated
     *  run the tests in the pool of threads
     */

    // For dual-criticality systems we call a specific thread
    if (nbLvls == 2) {

        System.out.println(">>>>>>>>>>>>>>>>>>>>> NB levels " + nbLvls);

        int i_files2 = 0;
        String outFile = outputFilePath.substring(0, outputFilePath.lastIndexOf('.'))
                .concat("-schedulability.csv");
        PrintWriter writer = new PrintWriter(outFile, "UTF-8");
        writer.println(
                "Thread; File; FSched (%); FPreempts; FAct; LSched (%); LPreempts; LAct; ESched (%); EPreempts; EAct; HSched(%); HPreempts; HAct; Utilization");
        writer.close();

        ExecutorService executor2 = Executors.newFixedThreadPool(nbJobs);
        while (i_files2 != nbFiles) {
            BenchThreadDualCriticality bt2 = new BenchThreadDualCriticality(inputFilePath[i_files2], outFile,
                    nbCores, boolDebug);

            executor2.execute(bt2);
            i_files2++;
        }

        executor2.shutdown();
        executor2.awaitTermination(Long.MAX_VALUE, TimeUnit.NANOSECONDS);

        int fedTotal = 0;
        int laxTotal = 0;
        int edfTotal = 0;
        int hybridTotal = 0;
        int fedPreempts = 0;
        int laxPreempts = 0;
        int edfPreempts = 0;
        int hybridPreempts = 0;
        int fedActiv = 0;
        int laxActiv = 0;
        int edfActiv = 0;
        int hybridActiv = 0;
        // Read lines in file and do average
        int i = 0;
        File f = new File(outFile);
        @SuppressWarnings("resource")
        Scanner line = new Scanner(f);
        while (line.hasNextLine()) {
            String s = line.nextLine();
            if (i > 0) { // To skip the first line
                try (Scanner inLine = new Scanner(s).useDelimiter("; ")) {
                    int j = 0;

                    while (inLine.hasNext()) {
                        String val = inLine.next();
                        if (j == 2) {
                            fedTotal += Integer.parseInt(val);
                        } else if (j == 3) {
                            fedPreempts += Integer.parseInt(val);
                        } else if (j == 4) {
                            fedActiv += Integer.parseInt(val);
                        } else if (j == 5) {
                            laxTotal += Integer.parseInt(val);
                        } else if (j == 6) {
                            laxPreempts += Integer.parseInt(val);
                        } else if (j == 7) {
                            laxActiv += Integer.parseInt(val);
                        } else if (j == 8) {
                            edfTotal += Integer.parseInt(val);
                        } else if (j == 9) {
                            edfPreempts += Integer.parseInt(val);
                        } else if (j == 10) {
                            edfActiv += Integer.parseInt(val);
                        } else if (j == 11) {
                            hybridTotal += Integer.parseInt(val);
                        } else if (j == 12) {
                            hybridPreempts += Integer.parseInt(val);
                        } else if (j == 13) {
                            hybridActiv += Integer.parseInt(val);
                        }
                        j++;
                    }
                }
            }
            i++;
        }

        // Write percentage
        double fedPerc = (double) fedTotal / nbFiles;
        double laxPerc = (double) laxTotal / nbFiles;
        double edfPerc = (double) edfTotal / nbFiles;
        double hybridPerc = (double) hybridTotal / nbFiles;

        double fedPercPreempts = (double) fedPreempts / fedActiv;
        double laxPercPreempts = (double) laxPreempts / laxActiv;
        double edfPercPreempts = (double) edfPreempts / edfActiv;
        double hybridPercPreempts = (double) hybridPreempts / hybridActiv;

        Writer wOutput = new BufferedWriter(new FileWriter(outputFilePathTotal, true));
        wOutput.write(Thread.currentThread().getName() + "; " + utilization + "; " + fedPerc + "; "
                + fedPreempts + "; " + fedActiv + "; " + fedPercPreempts + "; " + laxPerc + "; " + laxPreempts
                + "; " + laxActiv + "; " + laxPercPreempts + "; " + edfPerc + "; " + edfPreempts + "; "
                + edfActiv + "; " + edfPercPreempts + "; " + hybridPerc + "; " + hybridPreempts + "; "
                + hybridActiv + "; " + hybridPercPreempts + "\n");
        wOutput.close();

    } else if (nbLvls > 2) {
        int i_files2 = 0;
        String outFile = outputFilePath.substring(0, outputFilePath.lastIndexOf('.'))
                .concat("-schedulability.csv");
        PrintWriter writer = new PrintWriter(outFile, "UTF-8");
        writer.println(
                "Thread; File; LSched (%); LPreempts; LAct; ESched (%); EPreempts; EAct; HSched(%); HPreempts; HAct; Utilization");
        writer.close();

        ExecutorService executor2 = Executors.newFixedThreadPool(nbJobs);
        while (i_files2 != nbFiles) {
            BenchThreadNLevels bt2 = new BenchThreadNLevels(inputFilePath[i_files2], outFile, nbCores,
                    boolDebug);

            executor2.execute(bt2);
            i_files2++;
        }

        executor2.shutdown();
        executor2.awaitTermination(Long.MAX_VALUE, TimeUnit.NANOSECONDS);

        int laxTotal = 0;
        int edfTotal = 0;
        int hybridTotal = 0;
        int laxPreempts = 0;
        int edfPreempts = 0;
        int hybridPreempts = 0;
        int laxActiv = 0;
        int edfActiv = 0;
        int hybridActiv = 0;
        // Read lines in file and do average
        int i = 0;
        File f = new File(outFile);
        @SuppressWarnings("resource")
        Scanner line = new Scanner(f);
        while (line.hasNextLine()) {
            String s = line.nextLine();
            if (i > 0) { // To skip the first line
                try (Scanner inLine = new Scanner(s).useDelimiter("; ")) {
                    int j = 0;

                    while (inLine.hasNext()) {
                        String val = inLine.next();
                        if (j == 2) {
                            laxTotal += Integer.parseInt(val);
                        } else if (j == 3) {
                            laxPreempts += Integer.parseInt(val);
                        } else if (j == 4) {
                            laxActiv += Integer.parseInt(val);
                        } else if (j == 5) {
                            edfTotal += Integer.parseInt(val);
                        } else if (j == 6) {
                            edfPreempts += Integer.parseInt(val);
                        } else if (j == 7) {
                            edfActiv += Integer.parseInt(val);
                        } else if (j == 8) {
                            hybridTotal += Integer.parseInt(val);
                        } else if (j == 9) {
                            hybridPreempts += Integer.parseInt(val);
                        } else if (j == 10) {
                            hybridActiv += Integer.parseInt(val);
                        }
                        j++;
                    }
                }
            }
            i++;
        }

        // Write percentage
        double laxPerc = (double) laxTotal / nbFiles;
        double edfPerc = (double) edfTotal / nbFiles;
        double hybridPerc = (double) hybridTotal / nbFiles;

        double laxPercPreempts = (double) laxPreempts / laxActiv;
        double edfPercPreempts = (double) edfPreempts / edfActiv;
        double hybridPercPreempts = (double) hybridPreempts / hybridActiv;

        Writer wOutput = new BufferedWriter(new FileWriter(outputFilePathTotal, true));
        wOutput.write(Thread.currentThread().getName() + "; " + utilization + "; " + laxPerc + "; "
                + laxPreempts + "; " + laxActiv + "; " + laxPercPreempts + "; " + edfPerc + "; " + edfPreempts
                + "; " + edfActiv + "; " + edfPercPreempts + "; " + hybridPerc + "; " + hybridPreempts + "; "
                + hybridActiv + "; " + hybridPercPreempts + "\n");
        wOutput.close();

    } else {
        System.err.println("Wrong number of levels");
        System.exit(-1);
    }

    System.out.println("[BENCH Main] Done benchmarking U = " + utilization + " Levels " + nbLvls);
}

From source file:edu.wisc.doit.tcrypt.cli.TokenCrypt.java

public static void main(String[] args) throws IOException {
    // create Options object
    final Options options = new Options();

    // operation opt group
    final OptionGroup cryptTypeGroup = new OptionGroup();
    cryptTypeGroup.addOption(new Option("e", "encrypt", false, "Encrypt a token"));
    cryptTypeGroup.addOption(new Option("d", "decrypt", false, "Decrypt a token"));
    cryptTypeGroup/*from w  w  w  . j ava2 s . c  om*/
            .addOption(new Option("c", "check", false, "Check if the string looks like an encrypted token"));
    cryptTypeGroup.setRequired(true);
    options.addOptionGroup(cryptTypeGroup);

    // token source opt group
    final OptionGroup tokenGroup = new OptionGroup();
    final Option tokenOpt = new Option("t", "token", true, "The token(s) to operate on");
    tokenOpt.setArgs(Option.UNLIMITED_VALUES);
    tokenGroup.addOption(tokenOpt);
    final Option tokenFileOpt = new Option("f", "file", true,
            "A file with one token per line to operate on, if - is specified stdin is used");
    tokenGroup.addOption(tokenFileOpt);
    tokenGroup.setRequired(true);
    options.addOptionGroup(tokenGroup);

    final Option keyOpt = new Option("k", "keyFile", true,
            "Key file to use. Must be a private key for decryption and a public key for encryption");
    keyOpt.setRequired(true);
    options.addOption(keyOpt);

    // create the parser
    final CommandLineParser parser = new GnuParser();
    CommandLine line = null;
    try {
        // parse the command line arguments
        line = parser.parse(options, args);
    } catch (ParseException exp) {
        // automatically generate the help statement
        System.err.println(exp.getMessage());
        final HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp("java " + TokenCrypt.class.getName(), options, true);
        System.exit(1);
    }

    final Reader keyReader = createKeyReader(line);

    final TokenHandler tokenHandler = createTokenHandler(line, keyReader);

    if (line.hasOption("t")) {
        //tokens on cli
        final String[] tokens = line.getOptionValues("t");
        for (final String token : tokens) {
            handleToken(tokenHandler, token);
        }
    } else {
        //tokens from a file
        final String tokenFile = line.getOptionValue("f");
        final BufferedReader fileReader;
        if ("-".equals(tokenFile)) {
            fileReader = new BufferedReader(new InputStreamReader(System.in));
        } else {
            fileReader = new BufferedReader(new FileReader(tokenFile));
        }

        while (true) {
            final String token = fileReader.readLine();
            if (token == null) {
                break;
            }

            handleToken(tokenHandler, token);
        }
    }
}

From source file:com.versusoft.packages.ooo.odt2daisy.gui.CommandLineGUI.java

public static void main(String args[]) throws IOException {

    Handler fh = new FileHandler(LOG_FILENAME_PATTERN);
    fh.setFormatter(new SimpleFormatter());

    //removeAllLoggersHandlers(Logger.getLogger(""));

    Logger.getLogger("").addHandler(fh);
    Logger.getLogger("").setLevel(Level.FINEST);

    Options options = new Options();

    Option option1 = new Option("in", "name of ODT file (required)");
    option1.setRequired(true);/*  w w w  .  j  av  a2 s  .  c  o  m*/
    option1.setArgs(1);

    Option option2 = new Option("out", "name of DAISY DTB file (required)");
    option2.setRequired(true);
    option2.setArgs(1);

    Option option3 = new Option("h", "show this help");
    option3.setArgs(Option.UNLIMITED_VALUES);

    Option option4 = new Option("alt", "use alternate Level Markup");

    Option option5 = new Option("u", "UID of DAISY DTB (optional)");
    option5.setArgs(1);

    Option option6 = new Option("t", "Title of DAISY DTB");
    option6.setArgs(1);

    Option option7 = new Option("c", "Creator of DAISY DTB");
    option7.setArgs(1);

    Option option8 = new Option("p", "Publisher of DAISY DTB");
    option8.setArgs(1);

    Option option9 = new Option("pr", "Producer of DAISY DTB");
    option9.setArgs(1);

    Option option10 = new Option("pic", "set Picture directory");
    option10.setArgs(1);

    Option option11 = new Option("page", "enable pagination");
    option11.setArgs(0);

    Option option12 = new Option("css", "write CSS file");
    option12.setArgs(0);

    options.addOption(option1);
    options.addOption(option2);
    options.addOption(option3);
    options.addOption(option4);
    options.addOption(option5);
    options.addOption(option6);
    options.addOption(option7);
    options.addOption(option8);
    options.addOption(option9);
    options.addOption(option10);
    options.addOption(option11);
    options.addOption(option12);

    CommandLineParser parser = new BasicParser();
    CommandLine cmd = null;

    try {
        cmd = parser.parse(options, args);
    } catch (ParseException e) {
        //System.out.println("***ERROR: " + e.getClass() + ": " + e.getMessage());

        printHelp();
        return;
    }

    if (cmd.hasOption("help")) {
        printHelp();
        return;
    }

    try {

        Odt2Daisy odt2daisy = new Odt2Daisy(cmd.getOptionValue("in")); //@todo add initial output directory URL?
        odt2daisy.init();

        if (odt2daisy.isEmptyDocument()) {
            logger.info("Cannot convert empty documents. Export Aborted...");
            System.exit(1);
        }

        //System.out.println("Metadatas");
        //System.out.println("- title: " + odt2daisy.getTitleMeta());
        //System.out.println("- creator: " + odt2daisy.getCreatorMeta());

        if (!odt2daisy.isUsingHeadings()) {
            logger.info("You SHOULD use Heading styles in your document. Export in a single level.");
        }
        //@todo Warning for incompatible image formats should go here. See UnoGui.java.

        if (cmd.hasOption("u")) {
            //System.out.println("arg uid:"+cmd.getOptionValue("u"));
            odt2daisy.setUidParam(cmd.getOptionValue("u"));
        }

        if (cmd.hasOption("t")) {
            //System.out.println("arg title:"+cmd.getOptionValue("t"));
            odt2daisy.setTitleParam(cmd.getOptionValue("t"));
        }

        if (cmd.hasOption("c")) {
            //System.out.println("arg creator:"+cmd.getOptionValue("c"));
            odt2daisy.setCreatorParam(cmd.getOptionValue("c"));
        }

        if (cmd.hasOption("p")) {
            //System.out.println("arg publisher:"+cmd.getOptionValue("p"));
            odt2daisy.setPublisherParam(cmd.getOptionValue("p"));
        }

        if (cmd.hasOption("pr")) {
            //System.out.println("arg producer:"+cmd.getOptionValue("pr"));
            odt2daisy.setProducerParam(cmd.getOptionValue("pr"));
        }

        if (cmd.hasOption("alt")) {
            //System.out.println("arg alt:"+cmd.getOptionValue("alt"));
            odt2daisy.setUseAlternateLevelParam(true);
        }

        if (cmd.hasOption("css")) {
            odt2daisy.setWriteCSSParam(true);
        }

        if (cmd.hasOption("page")) {
            odt2daisy.paginationProcessing();
        }

        odt2daisy.correctionProcessing();

        if (cmd.hasOption("pic")) {

            odt2daisy.convertAsDTBook(cmd.getOptionValue("out"), cmd.getOptionValue("pic"));

        } else {

            logger.info("Language detected: " + odt2daisy.getLangParam());
            odt2daisy.convertAsDTBook(cmd.getOptionValue("out"), Configuration.DEFAULT_IMAGE_DIR);
        }

        boolean valid = odt2daisy.validateDTD(cmd.getOptionValue("out"));

        if (valid) {

            logger.info("DAISY DTBook produced is valid against DTD - Congratulations !");

        } else {

            logger.info("DAISY Book produced isn't valid against DTD - You SHOULD NOT use this DAISY Book !");
            logger.info("Error at line: " + odt2daisy.getErrorHandler().getLineNumber());
            logger.info("Error Message: " + odt2daisy.getErrorHandler().getMessage());
        }

    } catch (Exception e) {

        e.printStackTrace();

    } finally {

        if (fh != null) {
            fh.flush();
            fh.close();
        }
    }

}

From source file:com.akana.demo.freemarker.templatetester.App.java

public static void main(String[] args) {

    final Options options = new Options();

    @SuppressWarnings("static-access")
    Option optionContentType = OptionBuilder.withArgName("content-type").hasArg()
            .withDescription("content type of model").create("content");
    @SuppressWarnings("static-access")
    Option optionUrlPath = OptionBuilder.withArgName("httpRequestLine").hasArg()
            .withDescription("url path and parameters in HTTP Request Line format").create("url");
    @SuppressWarnings("static-access")
    Option optionRootMessageName = OptionBuilder.withArgName("messageName").hasArg()
            .withDescription("root data object name, defaults to 'message'").create("root");
    @SuppressWarnings("static-access")
    Option optionAdditionalMessages = OptionBuilder.withArgName("dataModelPaths")
            .hasArgs(Option.UNLIMITED_VALUES).withDescription("additional message object data sources")
            .create("messages");
    @SuppressWarnings("static-access")
    Option optionDebugMessages = OptionBuilder.hasArg(false)
            .withDescription("Shows debug information about template processing").create("debug");

    Option optionHelp = new Option("help", "print this message");

    options.addOption(optionHelp);/*from  ww  w.  j  a  va  2  s  .  co m*/
    options.addOption(optionContentType);
    options.addOption(optionUrlPath);
    options.addOption(optionRootMessageName);
    options.addOption(optionAdditionalMessages);
    options.addOption(optionDebugMessages);

    CommandLineParser parser = new DefaultParser();

    CommandLine cmd;
    try {
        cmd = parser.parse(options, args);

        // Check for help flag
        if (cmd.hasOption("help")) {
            showHelp(options);
            return;
        }

        String[] remainingArguments = cmd.getArgs();
        if (remainingArguments.length < 2) {
            showHelp(options);
            return;
        }
        String ftlPath, dataPath = "none";

        ftlPath = remainingArguments[0];
        dataPath = remainingArguments[1];

        String contentType = "text/xml";
        // Discover content type from file extension
        String ext = FilenameUtils.getExtension(dataPath);
        if (ext.equals("json")) {
            contentType = "json";
        } else if (ext.equals("txt")) {
            contentType = "txt";
        }
        // Override discovered content type
        if (cmd.hasOption("content")) {
            contentType = cmd.getOptionValue("content");
        }
        // Root data model name
        String rootMessageName = "message";
        if (cmd.hasOption("root")) {
            rootMessageName = cmd.getOptionValue("root");
        }
        // Additional data models
        String[] additionalModels = new String[0];
        if (cmd.hasOption("messages")) {
            additionalModels = cmd.getOptionValues("messages");
        }
        // Debug Info
        if (cmd.hasOption("debug")) {
            System.out.println(" Processing ftl   : " + ftlPath);
            System.out.println("   with data model: " + dataPath);
            System.out.println(" with content-type: " + contentType);
            System.out.println(" data model object: " + rootMessageName);
            if (cmd.hasOption("messages")) {
                System.out.println("additional models: " + additionalModels.length);
            }
        }

        Configuration cfg = new Configuration(Configuration.VERSION_2_3_23);
        cfg.setDirectoryForTemplateLoading(new File("."));
        cfg.setDefaultEncoding("UTF-8");
        cfg.setTemplateExceptionHandler(TemplateExceptionHandler.RETHROW_HANDLER);

        /* Create the primary data-model */
        Map<String, Object> message = new HashMap<String, Object>();
        if (contentType.contains("json") || contentType.contains("txt")) {
            message.put("contentAsString",
                    FileUtils.readFileToString(new File(dataPath), StandardCharsets.UTF_8));
        } else {
            message.put("contentAsXml", freemarker.ext.dom.NodeModel.parse(new File(dataPath)));
        }

        if (cmd.hasOption("url")) {
            message.put("getProperty", new AkanaGetProperty(cmd.getOptionValue("url")));
        }

        Map<String, Object> root = new HashMap<String, Object>();
        root.put(rootMessageName, message);
        if (additionalModels.length > 0) {
            for (int i = 0; i < additionalModels.length; i++) {
                Map<String, Object> m = createMessageFromFile(additionalModels[i], contentType);
                root.put("message" + i, m);
            }
        }

        /* Get the template (uses cache internally) */
        Template temp = cfg.getTemplate(ftlPath);

        /* Merge data-model with template */
        Writer out = new OutputStreamWriter(System.out);
        temp.process(root, out);

    } catch (ParseException e) {
        showHelp(options);
        System.exit(1);
    } catch (IOException e) {
        System.out.println("Unable to parse ftl.");
        e.printStackTrace();
    } catch (SAXException e) {
        System.out.println("XML parsing issue.");
        e.printStackTrace();
    } catch (ParserConfigurationException e) {
        System.out.println("Unable to configure parser.");
        e.printStackTrace();
    } catch (TemplateException e) {
        System.out.println("Unable to parse template.");
        e.printStackTrace();
    }

}

From source file:edu.nyu.vida.data_polygamy.scalar_function_computation.Aggregation.java

/**
 * @param args/*from   w w w .j  av  a 2s.  c  o m*/
 */
@SuppressWarnings({ "deprecation" })
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {

    Options options = new Options();

    Option forceOption = new Option("f", "force", false,
            "force the computation of the aggregate functions " + "even if files already exist");
    forceOption.setRequired(false);
    options.addOption(forceOption);

    Option gOption = new Option("g", "group", true, "set group of datasets for which the aggregate functions"
            + " will be computed, followed by their temporal and spatial attribute indices");
    gOption.setRequired(true);
    gOption.setArgName("GROUP");
    gOption.setArgs(Option.UNLIMITED_VALUES);
    options.addOption(gOption);

    Option machineOption = new Option("m", "machine", true, "machine identifier");
    machineOption.setRequired(true);
    machineOption.setArgName("MACHINE");
    machineOption.setArgs(1);
    options.addOption(machineOption);

    Option nodesOption = new Option("n", "nodes", true, "number of nodes");
    nodesOption.setRequired(true);
    nodesOption.setArgName("NODES");
    nodesOption.setArgs(1);
    options.addOption(nodesOption);

    Option s3Option = new Option("s3", "s3", false, "data on Amazon S3");
    s3Option.setRequired(false);
    options.addOption(s3Option);

    Option awsAccessKeyIdOption = new Option("aws_id", "aws-id", true,
            "aws access key id; " + "this is required if the execution is on aws");
    awsAccessKeyIdOption.setRequired(false);
    awsAccessKeyIdOption.setArgName("AWS-ACCESS-KEY-ID");
    awsAccessKeyIdOption.setArgs(1);
    options.addOption(awsAccessKeyIdOption);

    Option awsSecretAccessKeyOption = new Option("aws_key", "aws-id", true,
            "aws secrect access key; " + "this is required if the execution is on aws");
    awsSecretAccessKeyOption.setRequired(false);
    awsSecretAccessKeyOption.setArgName("AWS-SECRET-ACCESS-KEY");
    awsSecretAccessKeyOption.setArgs(1);
    options.addOption(awsSecretAccessKeyOption);

    Option bucketOption = new Option("b", "s3-bucket", true,
            "bucket on s3; " + "this is required if the execution is on aws");
    bucketOption.setRequired(false);
    bucketOption.setArgName("S3-BUCKET");
    bucketOption.setArgs(1);
    options.addOption(bucketOption);

    Option helpOption = new Option("h", "help", false, "display this message");
    helpOption.setRequired(false);
    options.addOption(helpOption);

    HelpFormatter formatter = new HelpFormatter();
    CommandLineParser parser = new PosixParser();
    CommandLine cmd = null;

    try {
        cmd = parser.parse(options, args);
    } catch (ParseException e) {
        formatter.printHelp("hadoop jar data-polygamy.jar "
                + "edu.nyu.vida.data_polygamy.scalar_function_computation.Aggregation", options, true);
        System.exit(0);
    }

    if (cmd.hasOption("h")) {
        formatter.printHelp("hadoop jar data-polygamy.jar "
                + "edu.nyu.vida.data_polygamy.scalar_function_computation.Aggregation", options, true);
        System.exit(0);
    }

    boolean s3 = cmd.hasOption("s3");
    String s3bucket = "";
    String awsAccessKeyId = "";
    String awsSecretAccessKey = "";

    if (s3) {
        if ((!cmd.hasOption("aws_id")) || (!cmd.hasOption("aws_key")) || (!cmd.hasOption("b"))) {
            System.out.println(
                    "Arguments 'aws_id', 'aws_key', and 'b'" + " are mandatory if execution is on AWS.");
            formatter.printHelp(
                    "hadoop jar data-polygamy.jar "
                            + "edu.nyu.vida.data_polygamy.scalar_function_computation.Aggregation",
                    options, true);
            System.exit(0);
        }
        s3bucket = cmd.getOptionValue("b");
        awsAccessKeyId = cmd.getOptionValue("aws_id");
        awsSecretAccessKey = cmd.getOptionValue("aws_key");
    }

    boolean snappyCompression = false;
    boolean bzip2Compression = false;
    String machine = cmd.getOptionValue("m");
    int nbNodes = Integer.parseInt(cmd.getOptionValue("n"));

    Configuration s3conf = new Configuration();
    if (s3) {
        s3conf.set("fs.s3.awsAccessKeyId", awsAccessKeyId);
        s3conf.set("fs.s3.awsSecretAccessKey", awsSecretAccessKey);
        s3conf.set("bucket", s3bucket);
    }

    String datasetNames = "";
    String datasetIds = "";
    String preProcessingDatasets = "";

    ArrayList<String> shortDataset = new ArrayList<String>();
    ArrayList<String> shortDatasetAggregation = new ArrayList<String>();
    HashMap<String, String> datasetTempAtt = new HashMap<String, String>();
    HashMap<String, String> datasetSpatialAtt = new HashMap<String, String>();
    HashMap<String, String> preProcessingDataset = new HashMap<String, String>();
    HashMap<String, String> datasetId = new HashMap<String, String>();

    boolean removeExistingFiles = cmd.hasOption("f");
    String[] datasetArgs = cmd.getOptionValues("g");

    for (int i = 0; i < datasetArgs.length; i += 3) {
        String dataset = datasetArgs[i];

        // getting pre-processing
        String tempPreProcessing = FrameworkUtils.searchPreProcessing(dataset, s3conf, s3);
        if (tempPreProcessing == null) {
            System.out.println("No pre-processing available for " + dataset);
            continue;
        }
        preProcessingDataset.put(dataset, tempPreProcessing);

        shortDataset.add(dataset);
        datasetTempAtt.put(dataset, ((datasetArgs[i + 1] == "null") ? null : datasetArgs[i + 1]));
        datasetSpatialAtt.put(dataset, ((datasetArgs[i + 2] == "null") ? null : datasetArgs[i + 2]));

        datasetId.put(dataset, null);
    }

    if (shortDataset.size() == 0) {
        System.out.println("No datasets to process.");
        System.exit(0);
    }

    // getting dataset id

    Path path = null;
    FileSystem fs = null;

    if (s3) {
        path = new Path(s3bucket + FrameworkUtils.datasetsIndexDir);
        fs = FileSystem.get(path.toUri(), s3conf);
    } else {
        fs = FileSystem.get(new Configuration());
        path = new Path(fs.getHomeDirectory() + "/" + FrameworkUtils.datasetsIndexDir);
    }
    BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(path)));
    String line = br.readLine();
    while (line != null) {
        String[] dt = line.split("\t");
        if (datasetId.containsKey(dt[0])) {
            datasetId.put(dt[0], dt[1]);
            datasetNames += dt[0] + ",";
            datasetIds += dt[1] + ",";
        }
        line = br.readLine();
    }
    br.close();
    if (s3)
        fs.close();

    datasetNames = datasetNames.substring(0, datasetNames.length() - 1);
    datasetIds = datasetIds.substring(0, datasetIds.length() - 1);
    Iterator<String> it = shortDataset.iterator();
    while (it.hasNext()) {
        String dataset = it.next();
        if (datasetId.get(dataset) == null) {
            System.out.println("No dataset id for " + dataset);
            System.exit(0);
        }
    }

    FrameworkUtils.createDir(s3bucket + FrameworkUtils.aggregatesDir, s3conf, s3);

    // getting smallest resolution

    HashMap<String, String> tempResMap = new HashMap<String, String>();
    HashMap<String, String> spatialResMap = new HashMap<String, String>();

    HashMap<String, String> datasetTemporalStrMap = new HashMap<String, String>();
    HashMap<String, String> datasetSpatialStrMap = new HashMap<String, String>();

    HashSet<String> input = new HashSet<String>();

    for (String dataset : shortDataset) {

        String[] datasetArray = preProcessingDataset.get(dataset).split("-");

        String datasetTemporalStr = datasetArray[datasetArray.length - 2];
        int datasetTemporal = utils.temporalResolution(datasetTemporalStr);

        String datasetSpatialStr = datasetArray[datasetArray.length - 1];
        int datasetSpatial = utils.spatialResolution(datasetSpatialStr);

        // finding all possible resolutions

        String[] temporalResolutions = FrameworkUtils.getAggTempResolutions(datasetTemporal);
        String[] spatialResolutions = FrameworkUtils.getAggSpatialResolutions(datasetSpatial);

        String temporalResolution = "";
        String spatialResolution = "";

        String tempRes = "";
        String spatialRes = "";

        boolean dataAdded = false;

        for (int i = 0; i < temporalResolutions.length; i++) {
            for (int j = 0; j < spatialResolutions.length; j++) {

                temporalResolution = temporalResolutions[i];
                spatialResolution = spatialResolutions[j];

                String aggregatesOutputFileName = s3bucket + FrameworkUtils.aggregatesDir + "/" + dataset + "/";

                if (removeExistingFiles) {
                    FrameworkUtils.removeFile(aggregatesOutputFileName, s3conf, s3);
                }

                if (!FrameworkUtils.fileExists(aggregatesOutputFileName, s3conf, s3)) {

                    dataAdded = true;

                    tempRes += temporalResolution + "-";
                    spatialRes += spatialResolution + "-";
                }
            }
        }

        if (dataAdded) {
            input.add(s3bucket + FrameworkUtils.preProcessingDir + "/" + preProcessingDataset.get(dataset));
            shortDatasetAggregation.add(dataset);

            tempResMap.put(dataset, tempRes.substring(0, tempRes.length() - 1));
            spatialResMap.put(dataset, spatialRes.substring(0, spatialRes.length() - 1));

            datasetTemporalStrMap.put(dataset, datasetTemporalStr);
            datasetSpatialStrMap.put(dataset, datasetSpatialStr);
        }
    }

    if (input.isEmpty()) {
        System.out.println("All the input datasets have aggregates.");
        System.out.println("Use -f in the beginning of the command line to force the computation.");
        System.exit(0);
    }

    it = input.iterator();
    while (it.hasNext()) {
        preProcessingDatasets += it.next() + ",";
    }

    Job aggJob = null;
    String aggregatesOutputDir = s3bucket + FrameworkUtils.aggregatesDir + "/tmp/";
    String jobName = "aggregates";

    FrameworkUtils.removeFile(aggregatesOutputDir, s3conf, s3);

    Configuration aggConf = new Configuration();
    Machine machineConf = new Machine(machine, nbNodes);

    aggConf.set("dataset-name", datasetNames);
    aggConf.set("dataset-id", datasetIds);

    for (int i = 0; i < shortDatasetAggregation.size(); i++) {
        String dataset = shortDatasetAggregation.get(i);
        String id = datasetId.get(dataset);
        aggConf.set("dataset-" + id + "-temporal-resolutions", tempResMap.get(dataset));
        aggConf.set("dataset-" + id + "-spatial-resolutions", spatialResMap.get(dataset));
        aggConf.set("dataset-" + id + "-temporal-att", datasetTempAtt.get(dataset));
        aggConf.set("dataset-" + id + "-spatial-att", datasetSpatialAtt.get(dataset));
        aggConf.set("dataset-" + id + "-temporal", datasetTemporalStrMap.get(dataset));
        aggConf.set("dataset-" + id + "-spatial", datasetSpatialStrMap.get(dataset));

        if (s3)
            aggConf.set("dataset-" + id,
                    s3bucket + FrameworkUtils.preProcessingDir + "/" + preProcessingDataset.get(dataset));
        else
            aggConf.set("dataset-" + id, FileSystem.get(new Configuration()).getHomeDirectory() + "/"
                    + FrameworkUtils.preProcessingDir + "/" + preProcessingDataset.get(dataset));
    }

    aggConf.set("mapreduce.tasktracker.map.tasks.maximum", String.valueOf(machineConf.getMaximumTasks()));
    aggConf.set("mapreduce.tasktracker.reduce.tasks.maximum", String.valueOf(machineConf.getMaximumTasks()));
    aggConf.set("mapreduce.jobtracker.maxtasks.perjob", "-1");
    aggConf.set("mapreduce.reduce.shuffle.parallelcopies", "20");
    aggConf.set("mapreduce.input.fileinputformat.split.minsize", "0");
    aggConf.set("mapreduce.task.io.sort.mb", "200");
    aggConf.set("mapreduce.task.io.sort.factor", "100");
    machineConf.setMachineConfiguration(aggConf);

    if (s3) {
        machineConf.setMachineConfiguration(aggConf);
        aggConf.set("fs.s3.awsAccessKeyId", awsAccessKeyId);
        aggConf.set("fs.s3.awsSecretAccessKey", awsSecretAccessKey);
    }

    if (snappyCompression) {
        aggConf.set("mapreduce.map.output.compress", "true");
        aggConf.set("mapreduce.map.output.compress.codec", "org.apache.hadoop.io.compress.SnappyCodec");
        //aggConf.set("mapreduce.output.fileoutputformat.compress.codec", "org.apache.hadoop.io.compress.SnappyCodec");
    }
    if (bzip2Compression) {
        aggConf.set("mapreduce.map.output.compress", "true");
        aggConf.set("mapreduce.map.output.compress.codec", "org.apache.hadoop.io.compress.BZip2Codec");
        //aggConf.set("mapreduce.output.fileoutputformat.compress.codec", "org.apache.hadoop.io.compress.BZip2Codec");
    }

    aggJob = new Job(aggConf);
    aggJob.setJobName(jobName);

    aggJob.setMapOutputKeyClass(SpatioTemporalWritable.class);
    aggJob.setMapOutputValueClass(AggregationArrayWritable.class);
    aggJob.setOutputKeyClass(SpatioTemporalWritable.class);
    aggJob.setOutputValueClass(FloatArrayWritable.class);
    //aggJob.setOutputKeyClass(Text.class);
    //aggJob.setOutputValueClass(Text.class);

    aggJob.setMapperClass(AggregationMapper.class);
    aggJob.setCombinerClass(AggregationCombiner.class);
    aggJob.setReducerClass(AggregationReducer.class);
    aggJob.setNumReduceTasks(machineConf.getNumberReduces());

    aggJob.setInputFormatClass(SequenceFileInputFormat.class);
    //aggJob.setOutputFormatClass(SequenceFileOutputFormat.class);
    LazyOutputFormat.setOutputFormatClass(aggJob, SequenceFileOutputFormat.class);
    //LazyOutputFormat.setOutputFormatClass(aggJob, TextOutputFormat.class);
    SequenceFileOutputFormat.setCompressOutput(aggJob, true);
    SequenceFileOutputFormat.setOutputCompressionType(aggJob, CompressionType.BLOCK);

    FileInputFormat.setInputDirRecursive(aggJob, true);
    FileInputFormat.setInputPaths(aggJob,
            preProcessingDatasets.substring(0, preProcessingDatasets.length() - 1));
    FileOutputFormat.setOutputPath(aggJob, new Path(aggregatesOutputDir));

    aggJob.setJarByClass(Aggregation.class);

    long start = System.currentTimeMillis();
    aggJob.submit();
    aggJob.waitForCompletion(true);
    System.out.println(jobName + "\t" + (System.currentTimeMillis() - start));

    // moving files to right place
    for (String dataset : shortDatasetAggregation) {
        String from = s3bucket + FrameworkUtils.aggregatesDir + "/tmp/" + dataset + "/";
        String to = s3bucket + FrameworkUtils.aggregatesDir + "/" + dataset + "/";
        FrameworkUtils.renameFile(from, to, s3conf, s3);
    }

}

From source file:edu.nyu.vida.data_polygamy.feature_identification.IndexCreation.java

/**
 * @param args/*from  ww  w  . ja v  a  2  s .  c om*/
 */
@SuppressWarnings({ "deprecation" })
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {

    Options options = new Options();

    Option forceOption = new Option("f", "force", false,
            "force the computation of the index and events " + "even if files already exist");
    forceOption.setRequired(false);
    options.addOption(forceOption);

    Option thresholdOption = new Option("t", "use-custom-thresholds", false,
            "use custom thresholds for regular and rare events, defined in HDFS_HOME/"
                    + FrameworkUtils.thresholdDir + " file");
    thresholdOption.setRequired(false);
    options.addOption(thresholdOption);

    Option gOption = new Option("g", "group", true,
            "set group of datasets for which the indices and events" + " will be computed");
    gOption.setRequired(true);
    gOption.setArgName("GROUP");
    gOption.setArgs(Option.UNLIMITED_VALUES);
    options.addOption(gOption);

    Option machineOption = new Option("m", "machine", true, "machine identifier");
    machineOption.setRequired(true);
    machineOption.setArgName("MACHINE");
    machineOption.setArgs(1);
    options.addOption(machineOption);

    Option nodesOption = new Option("n", "nodes", true, "number of nodes");
    nodesOption.setRequired(true);
    nodesOption.setArgName("NODES");
    nodesOption.setArgs(1);
    options.addOption(nodesOption);

    Option s3Option = new Option("s3", "s3", false, "data on Amazon S3");
    s3Option.setRequired(false);
    options.addOption(s3Option);

    Option awsAccessKeyIdOption = new Option("aws_id", "aws-id", true,
            "aws access key id; " + "this is required if the execution is on aws");
    awsAccessKeyIdOption.setRequired(false);
    awsAccessKeyIdOption.setArgName("AWS-ACCESS-KEY-ID");
    awsAccessKeyIdOption.setArgs(1);
    options.addOption(awsAccessKeyIdOption);

    Option awsSecretAccessKeyOption = new Option("aws_key", "aws-id", true,
            "aws secrect access key; " + "this is required if the execution is on aws");
    awsSecretAccessKeyOption.setRequired(false);
    awsSecretAccessKeyOption.setArgName("AWS-SECRET-ACCESS-KEY");
    awsSecretAccessKeyOption.setArgs(1);
    options.addOption(awsSecretAccessKeyOption);

    Option bucketOption = new Option("b", "s3-bucket", true,
            "bucket on s3; " + "this is required if the execution is on aws");
    bucketOption.setRequired(false);
    bucketOption.setArgName("S3-BUCKET");
    bucketOption.setArgs(1);
    options.addOption(bucketOption);

    Option helpOption = new Option("h", "help", false, "display this message");
    helpOption.setRequired(false);
    options.addOption(helpOption);

    HelpFormatter formatter = new HelpFormatter();
    CommandLineParser parser = new PosixParser();
    CommandLine cmd = null;

    try {
        cmd = parser.parse(options, args);
    } catch (ParseException e) {
        formatter.printHelp("hadoop jar data-polygamy.jar "
                + "edu.nyu.vida.data_polygamy.feature_identification.IndexCreation", options, true);
        System.exit(0);
    }

    if (cmd.hasOption("h")) {
        formatter.printHelp("hadoop jar data-polygamy.jar "
                + "edu.nyu.vida.data_polygamy.feature_identification.IndexCreation", options, true);
        System.exit(0);
    }

    boolean s3 = cmd.hasOption("s3");
    String s3bucket = "";
    String awsAccessKeyId = "";
    String awsSecretAccessKey = "";

    if (s3) {
        if ((!cmd.hasOption("aws_id")) || (!cmd.hasOption("aws_key")) || (!cmd.hasOption("b"))) {
            System.out.println(
                    "Arguments 'aws_id', 'aws_key', and 'b'" + " are mandatory if execution is on AWS.");
            formatter.printHelp("hadoop jar data-polygamy.jar "
                    + "edu.nyu.vida.data_polygamy.feature_identification.IndexCreation", options, true);
            System.exit(0);
        }
        s3bucket = cmd.getOptionValue("b");
        awsAccessKeyId = cmd.getOptionValue("aws_id");
        awsSecretAccessKey = cmd.getOptionValue("aws_key");
    }

    boolean snappyCompression = false;
    boolean bzip2Compression = false;
    String machine = cmd.getOptionValue("m");
    int nbNodes = Integer.parseInt(cmd.getOptionValue("n"));

    Configuration s3conf = new Configuration();
    if (s3) {
        s3conf.set("fs.s3.awsAccessKeyId", awsAccessKeyId);
        s3conf.set("fs.s3.awsSecretAccessKey", awsSecretAccessKey);
        s3conf.set("bucket", s3bucket);
    }

    String datasetNames = "";
    String datasetIds = "";

    ArrayList<String> shortDataset = new ArrayList<String>();
    ArrayList<String> shortDatasetIndex = new ArrayList<String>();
    HashMap<String, String> datasetAgg = new HashMap<String, String>();
    HashMap<String, String> datasetId = new HashMap<String, String>();
    HashMap<String, HashMap<Integer, Double>> datasetRegThreshold = new HashMap<String, HashMap<Integer, Double>>();
    HashMap<String, HashMap<Integer, Double>> datasetRareThreshold = new HashMap<String, HashMap<Integer, Double>>();

    Path path = null;
    FileSystem fs = FileSystem.get(new Configuration());
    BufferedReader br;

    boolean removeExistingFiles = cmd.hasOption("f");
    boolean isThresholdUserDefined = cmd.hasOption("t");

    for (String dataset : cmd.getOptionValues("g")) {

        // getting aggregates
        String[] aggregate = FrameworkUtils.searchAggregates(dataset, s3conf, s3);
        if (aggregate.length == 0) {
            System.out.println("No aggregates found for " + dataset + ".");
            continue;
        }

        // getting aggregates header
        String aggregatesHeaderFileName = FrameworkUtils.searchAggregatesHeader(dataset, s3conf, s3);
        if (aggregatesHeaderFileName == null) {
            System.out.println("No aggregate header for " + dataset);
            continue;
        }

        String aggregatesHeader = s3bucket + FrameworkUtils.preProcessingDir + "/" + aggregatesHeaderFileName;

        shortDataset.add(dataset);
        datasetId.put(dataset, null);

        if (s3) {
            path = new Path(aggregatesHeader);
            fs = FileSystem.get(path.toUri(), s3conf);
        } else {
            path = new Path(fs.getHomeDirectory() + "/" + aggregatesHeader);
        }

        br = new BufferedReader(new InputStreamReader(fs.open(path)));
        datasetAgg.put(dataset, br.readLine().split("\t")[1]);
        br.close();
        if (s3)
            fs.close();
    }

    if (shortDataset.size() == 0) {
        System.out.println("No datasets to process.");
        System.exit(0);
    }

    // getting dataset id

    if (s3) {
        path = new Path(s3bucket + FrameworkUtils.datasetsIndexDir);
        fs = FileSystem.get(path.toUri(), s3conf);
    } else {
        path = new Path(fs.getHomeDirectory() + "/" + FrameworkUtils.datasetsIndexDir);
    }
    br = new BufferedReader(new InputStreamReader(fs.open(path)));
    String line = br.readLine();
    while (line != null) {
        String[] dt = line.split("\t");
        if (datasetId.containsKey(dt[0])) {
            datasetId.put(dt[0], dt[1]);
            datasetNames += dt[0] + ",";
            datasetIds += dt[1] + ",";
        }
        line = br.readLine();
    }
    br.close();

    datasetNames = datasetNames.substring(0, datasetNames.length() - 1);
    datasetIds = datasetIds.substring(0, datasetIds.length() - 1);
    Iterator<String> it = shortDataset.iterator();
    while (it.hasNext()) {
        String dataset = it.next();
        if (datasetId.get(dataset) == null) {
            System.out.println("No dataset id for " + dataset);
            System.exit(0);
        }
    }

    // getting user defined thresholds

    if (isThresholdUserDefined) {
        if (s3) {
            path = new Path(s3bucket + FrameworkUtils.thresholdDir);
            fs = FileSystem.get(path.toUri(), s3conf);
        } else {
            path = new Path(fs.getHomeDirectory() + "/" + FrameworkUtils.thresholdDir);
        }
        br = new BufferedReader(new InputStreamReader(fs.open(path)));
        line = br.readLine();
        while (line != null) {
            // getting dataset name
            String dataset = line.trim();
            HashMap<Integer, Double> regThresholds = new HashMap<Integer, Double>();
            HashMap<Integer, Double> rareThresholds = new HashMap<Integer, Double>();
            line = br.readLine();
            while ((line != null) && (line.split("\t").length > 1)) {
                // getting attribute ids and thresholds
                String[] keyVals = line.trim().split("\t");
                int att = Integer.parseInt(keyVals[0].trim());
                regThresholds.put(att, Double.parseDouble(keyVals[1].trim()));
                rareThresholds.put(att, Double.parseDouble(keyVals[2].trim()));
                line = br.readLine();
            }
            datasetRegThreshold.put(dataset, regThresholds);
            datasetRareThreshold.put(dataset, rareThresholds);
        }
        br.close();
    }
    if (s3)
        fs.close();

    // datasets that will use existing merge tree
    ArrayList<String> useMergeTree = new ArrayList<String>();

    // creating index for each spatio-temporal resolution

    FrameworkUtils.createDir(s3bucket + FrameworkUtils.indexDir, s3conf, s3);

    HashSet<String> input = new HashSet<String>();

    for (String dataset : shortDataset) {

        String indexCreationOutputFileName = s3bucket + FrameworkUtils.indexDir + "/" + dataset + "/";
        String mergeTreeFileName = s3bucket + FrameworkUtils.mergeTreeDir + "/" + dataset + "/";

        if (removeExistingFiles) {
            FrameworkUtils.removeFile(indexCreationOutputFileName, s3conf, s3);
            FrameworkUtils.removeFile(mergeTreeFileName, s3conf, s3);
            FrameworkUtils.createDir(mergeTreeFileName, s3conf, s3);
        } else if (datasetRegThreshold.containsKey(dataset)) {
            FrameworkUtils.removeFile(indexCreationOutputFileName, s3conf, s3);
            if (FrameworkUtils.fileExists(mergeTreeFileName, s3conf, s3)) {
                useMergeTree.add(dataset);
            }
        }

        if (!FrameworkUtils.fileExists(indexCreationOutputFileName, s3conf, s3)) {
            input.add(s3bucket + FrameworkUtils.aggregatesDir + "/" + dataset);
            shortDatasetIndex.add(dataset);
        }

    }

    if (input.isEmpty()) {
        System.out.println("All the input datasets have indices.");
        System.out.println("Use -f in the beginning of the command line to force the computation.");
        System.exit(0);
    }

    String aggregateDatasets = "";
    it = input.iterator();
    while (it.hasNext()) {
        aggregateDatasets += it.next() + ",";
    }

    Job icJob = null;
    Configuration icConf = new Configuration();
    Machine machineConf = new Machine(machine, nbNodes);

    String jobName = "index";
    String indexOutputDir = s3bucket + FrameworkUtils.indexDir + "/tmp/";

    FrameworkUtils.removeFile(indexOutputDir, s3conf, s3);

    icConf.set("dataset-name", datasetNames);
    icConf.set("dataset-id", datasetIds);

    if (!useMergeTree.isEmpty()) {
        String useMergeTreeStr = "";
        for (String dt : useMergeTree) {
            useMergeTreeStr += dt + ",";
        }
        icConf.set("use-merge-tree", useMergeTreeStr.substring(0, useMergeTreeStr.length() - 1));
    }

    for (int i = 0; i < shortDataset.size(); i++) {
        String dataset = shortDataset.get(i);
        String id = datasetId.get(dataset);
        icConf.set("dataset-" + id + "-aggregates", datasetAgg.get(dataset));
        if (datasetRegThreshold.containsKey(dataset)) {
            HashMap<Integer, Double> regThresholds = datasetRegThreshold.get(dataset);
            String thresholds = "";
            for (int att : regThresholds.keySet()) {
                thresholds += String.valueOf(att) + "-" + String.valueOf(regThresholds.get(att)) + ",";
            }
            icConf.set("regular-" + id, thresholds.substring(0, thresholds.length() - 1));
        }

        if (datasetRareThreshold.containsKey(dataset)) {
            HashMap<Integer, Double> rareThresholds = datasetRareThreshold.get(dataset);
            String thresholds = "";
            for (int att : rareThresholds.keySet()) {
                thresholds += String.valueOf(att) + "-" + String.valueOf(rareThresholds.get(att)) + ",";
            }
            icConf.set("rare-" + id, thresholds.substring(0, thresholds.length() - 1));
        }
    }

    icConf.set("mapreduce.tasktracker.map.tasks.maximum", String.valueOf(machineConf.getMaximumTasks()));
    icConf.set("mapreduce.tasktracker.reduce.tasks.maximum", String.valueOf(machineConf.getMaximumTasks()));
    icConf.set("mapreduce.jobtracker.maxtasks.perjob", "-1");
    icConf.set("mapreduce.reduce.shuffle.parallelcopies", "20");
    icConf.set("mapreduce.input.fileinputformat.split.minsize", "0");
    icConf.set("mapreduce.task.io.sort.mb", "200");
    icConf.set("mapreduce.task.io.sort.factor", "100");
    //icConf.set("mapreduce.task.timeout", "1800000");
    machineConf.setMachineConfiguration(icConf);

    if (s3) {
        machineConf.setMachineConfiguration(icConf);
        icConf.set("fs.s3.awsAccessKeyId", awsAccessKeyId);
        icConf.set("fs.s3.awsSecretAccessKey", awsSecretAccessKey);
        icConf.set("bucket", s3bucket);
    }

    if (snappyCompression) {
        icConf.set("mapreduce.map.output.compress", "true");
        icConf.set("mapreduce.map.output.compress.codec", "org.apache.hadoop.io.compress.SnappyCodec");
        //icConf.set("mapreduce.output.fileoutputformat.compress.codec", "org.apache.hadoop.io.compress.SnappyCodec");
    }
    if (bzip2Compression) {
        icConf.set("mapreduce.map.output.compress", "true");
        icConf.set("mapreduce.map.output.compress.codec", "org.apache.hadoop.io.compress.BZip2Codec");
        //icConf.set("mapreduce.output.fileoutputformat.compress.codec", "org.apache.hadoop.io.compress.BZip2Codec");
    }

    icJob = new Job(icConf);
    icJob.setJobName(jobName);

    icJob.setMapOutputKeyClass(AttributeResolutionWritable.class);
    icJob.setMapOutputValueClass(SpatioTemporalFloatWritable.class);
    icJob.setOutputKeyClass(AttributeResolutionWritable.class);
    icJob.setOutputValueClass(TopologyTimeSeriesWritable.class);
    //icJob.setOutputKeyClass(Text.class);
    //icJob.setOutputValueClass(Text.class);

    icJob.setMapperClass(IndexCreationMapper.class);
    icJob.setReducerClass(IndexCreationReducer.class);
    icJob.setNumReduceTasks(machineConf.getNumberReduces());

    icJob.setInputFormatClass(SequenceFileInputFormat.class);
    //icJob.setOutputFormatClass(SequenceFileOutputFormat.class);
    LazyOutputFormat.setOutputFormatClass(icJob, SequenceFileOutputFormat.class);
    //LazyOutputFormat.setOutputFormatClass(icJob, TextOutputFormat.class);
    SequenceFileOutputFormat.setCompressOutput(icJob, true);
    SequenceFileOutputFormat.setOutputCompressionType(icJob, CompressionType.BLOCK);

    FileInputFormat.setInputDirRecursive(icJob, true);
    FileInputFormat.setInputPaths(icJob, aggregateDatasets.substring(0, aggregateDatasets.length() - 1));
    FileOutputFormat.setOutputPath(icJob, new Path(indexOutputDir));

    icJob.setJarByClass(IndexCreation.class);

    long start = System.currentTimeMillis();
    icJob.submit();
    icJob.waitForCompletion(true);
    System.out.println(jobName + "\t" + (System.currentTimeMillis() - start));

    // moving files to right place
    for (String dataset : shortDatasetIndex) {
        String from = s3bucket + FrameworkUtils.indexDir + "/tmp/" + dataset + "/";
        String to = s3bucket + FrameworkUtils.indexDir + "/" + dataset + "/";
        FrameworkUtils.renameFile(from, to, s3conf, s3);
    }

}

From source file:com.hortonworks.streamline.storage.tool.SQLScriptRunner.java

public static void main(String[] args) throws Exception {
    Options options = new Options();

    options.addOption(option(1, "c", OPTION_CONFIG_FILE_PATH, "Config file path"));
    options.addOption(option(Option.UNLIMITED_VALUES, "f", OPTION_SCRIPT_PATH, "Script path to execute"));
    options.addOption(option(Option.UNLIMITED_VALUES, "m", OPTION_MYSQL_JAR_URL_PATH,
            "Mysql client jar url to download"));
    CommandLineParser parser = new BasicParser();
    CommandLine commandLine = parser.parse(options, args);

    if (!commandLine.hasOption(OPTION_CONFIG_FILE_PATH) || !commandLine.hasOption(OPTION_SCRIPT_PATH)
            || commandLine.getOptionValues(OPTION_SCRIPT_PATH).length <= 0) {
        usage(options);//from ww w  . jav a 2s  .co  m
        System.exit(1);
    }

    String confFilePath = commandLine.getOptionValue(OPTION_CONFIG_FILE_PATH);
    String[] scripts = commandLine.getOptionValues(OPTION_SCRIPT_PATH);
    String mysqlJarUrl = commandLine.getOptionValue(OPTION_MYSQL_JAR_URL_PATH);

    try {
        Map<String, Object> conf = Utils.readStreamlineConfig(confFilePath);

        StorageProviderConfigurationReader confReader = new StorageProviderConfigurationReader();
        StorageProviderConfiguration storageProperties = confReader.readStorageConfig(conf);

        String bootstrapDirPath = System.getProperty("bootstrap.dir");

        MySqlDriverHelper.downloadMySQLJarIfNeeded(storageProperties, bootstrapDirPath, mysqlJarUrl);

        SQLScriptRunner sqlScriptRunner = new SQLScriptRunner(storageProperties);
        try {
            sqlScriptRunner.initializeDriver();
        } catch (ClassNotFoundException e) {
            System.err.println(
                    "Driver class is not found in classpath. Please ensure that driver is in classpath.");
            System.exit(1);
        }

        for (String script : scripts) {
            sqlScriptRunner.runScriptWithReplaceDBType(script);
        }
    } catch (IOException e) {
        System.err.println("Error occurred while reading config file: " + confFilePath);
        System.exit(1);
    }
}

From source file:edu.nyu.vida.data_polygamy.pre_processing.PreProcessing.java

/**
 * @param args/*  w w w.  jav  a  2s  .co  m*/
 * @throws IOException 
 * @throws ClassNotFoundException 
 * @throws InterruptedException 
 */
@SuppressWarnings("deprecation")
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {

    Options options = new Options();

    Option nameOption = new Option("dn", "name", true, "the name of the dataset");
    nameOption.setRequired(true);
    nameOption.setArgName("DATASET NAME");
    options.addOption(nameOption);

    Option headerOption = new Option("dh", "header", true, "the file that contains the header of the dataset");
    headerOption.setRequired(true);
    headerOption.setArgName("DATASET HEADER FILE");
    options.addOption(headerOption);

    Option deafultsOption = new Option("dd", "defaults", true,
            "the file that contains the default values of the dataset");
    deafultsOption.setRequired(true);
    deafultsOption.setArgName("DATASET DEFAULTS FILE");
    options.addOption(deafultsOption);

    Option tempResOption = new Option("t", "temporal", true,
            "desired temporal resolution (hour, day, week, or month)");
    tempResOption.setRequired(true);
    tempResOption.setArgName("TEMPORAL RESOLUTION");
    options.addOption(tempResOption);

    Option spatialResOption = new Option("s", "spatial", true,
            "desired spatial resolution (points, nbhd, zip, grid, or city)");
    spatialResOption.setRequired(true);
    spatialResOption.setArgName("SPATIAL RESOLUTION");
    options.addOption(spatialResOption);

    Option currentSpatialResOption = new Option("cs", "current-spatial", true,
            "current spatial resolution (points, nbhd, zip, grid, or city)");
    currentSpatialResOption.setRequired(true);
    currentSpatialResOption.setArgName("CURRENT SPATIAL RESOLUTION");
    options.addOption(currentSpatialResOption);

    Option indexResOption = new Option("i", "index", true, "indexes of the temporal and spatial attributes");
    indexResOption.setRequired(true);
    indexResOption.setArgName("INDEX OF SPATIO-TEMPORAL RESOLUTIONS");
    indexResOption.setArgs(Option.UNLIMITED_VALUES);
    options.addOption(indexResOption);

    Option machineOption = new Option("m", "machine", true, "machine identifier");
    machineOption.setRequired(true);
    machineOption.setArgName("MACHINE");
    machineOption.setArgs(1);
    options.addOption(machineOption);

    Option nodesOption = new Option("n", "nodes", true, "number of nodes");
    nodesOption.setRequired(true);
    nodesOption.setArgName("NODES");
    nodesOption.setArgs(1);
    options.addOption(nodesOption);

    Option s3Option = new Option("s3", "s3", false, "data on Amazon S3");
    s3Option.setRequired(false);
    options.addOption(s3Option);

    Option awsAccessKeyIdOption = new Option("aws_id", "aws-id", true,
            "aws access key id; " + "this is required if the execution is on aws");
    awsAccessKeyIdOption.setRequired(false);
    awsAccessKeyIdOption.setArgName("AWS-ACCESS-KEY-ID");
    awsAccessKeyIdOption.setArgs(1);
    options.addOption(awsAccessKeyIdOption);

    Option awsSecretAccessKeyOption = new Option("aws_key", "aws-id", true,
            "aws secrect access key; " + "this is required if the execution is on aws");
    awsSecretAccessKeyOption.setRequired(false);
    awsSecretAccessKeyOption.setArgName("AWS-SECRET-ACCESS-KEY");
    awsSecretAccessKeyOption.setArgs(1);
    options.addOption(awsSecretAccessKeyOption);

    Option bucketOption = new Option("b", "s3-bucket", true,
            "bucket on s3; " + "this is required if the execution is on aws");
    bucketOption.setRequired(false);
    bucketOption.setArgName("S3-BUCKET");
    bucketOption.setArgs(1);
    options.addOption(bucketOption);

    Option helpOption = new Option("h", "help", false, "display this message");
    helpOption.setRequired(false);
    options.addOption(helpOption);

    HelpFormatter formatter = new HelpFormatter();
    CommandLineParser parser = new PosixParser();
    CommandLine cmd = null;

    try {
        cmd = parser.parse(options, args);
    } catch (ParseException e) {
        formatter.printHelp(
                "hadoop jar data-polygamy.jar " + "edu.nyu.vida.data_polygamy.pre_processing.PreProcessing",
                options, true);
        System.exit(0);
    }

    if (cmd.hasOption("h")) {
        formatter.printHelp(
                "hadoop jar data-polygamy.jar " + "edu.nyu.vida.data_polygamy.pre_processing.PreProcessing",
                options, true);
        System.exit(0);
    }

    boolean s3 = cmd.hasOption("s3");
    String s3bucket = "";
    String awsAccessKeyId = "";
    String awsSecretAccessKey = "";

    if (s3) {
        if ((!cmd.hasOption("aws_id")) || (!cmd.hasOption("aws_key")) || (!cmd.hasOption("b"))) {
            System.out.println(
                    "Arguments 'aws_id', 'aws_key', and 'b'" + " are mandatory if execution is on AWS.");
            formatter.printHelp(
                    "hadoop jar data-polygamy.jar " + "edu.nyu.vida.data_polygamy.pre_processing.PreProcessing",
                    options, true);
            System.exit(0);
        }
        s3bucket = cmd.getOptionValue("b");
        awsAccessKeyId = cmd.getOptionValue("aws_id");
        awsSecretAccessKey = cmd.getOptionValue("aws_key");
    }

    boolean snappyCompression = false;
    boolean bzip2Compression = false;
    String machine = cmd.getOptionValue("m");
    int nbNodes = Integer.parseInt(cmd.getOptionValue("n"));

    Configuration s3conf = new Configuration();
    if (s3) {
        s3conf.set("fs.s3.awsAccessKeyId", awsAccessKeyId);
        s3conf.set("fs.s3.awsSecretAccessKey", awsSecretAccessKey);
        s3conf.set("bucket", s3bucket);
    }

    Configuration conf = new Configuration();
    Machine machineConf = new Machine(machine, nbNodes);
    String dataset = cmd.getOptionValue("dn");
    String header = cmd.getOptionValue("dh");
    String defaults = cmd.getOptionValue("dd");
    String temporalResolution = cmd.getOptionValue("t");
    String spatialResolution = cmd.getOptionValue("s");
    String gridResolution = "";
    String currentSpatialResolution = cmd.getOptionValue("cs");

    if (spatialResolution.contains("grid")) {
        String[] res = spatialResolution.split("-");
        spatialResolution = res[0];
        gridResolution = res[1];
    }

    conf.set("header", s3bucket + FrameworkUtils.dataDir + "/" + header);
    conf.set("defaults", s3bucket + FrameworkUtils.dataDir + "/" + defaults);
    conf.set("temporal-resolution", temporalResolution);
    conf.set("spatial-resolution", spatialResolution);
    conf.set("grid-resolution", gridResolution);
    conf.set("current-spatial-resolution", currentSpatialResolution);

    String[] indexes = cmd.getOptionValues("i");
    String temporalPos = "";
    Integer sizeSpatioTemp = 0;
    if (!(currentSpatialResolution.equals("points"))) {
        String spatialPos = "";
        for (int i = 0; i < indexes.length; i++) {
            temporalPos += indexes[i] + ",";
            spatialPos += indexes[++i] + ",";
            sizeSpatioTemp++;
        }
        conf.set("spatial-pos", spatialPos);
    } else {
        String xPositions = "", yPositions = "";
        for (int i = 0; i < indexes.length; i++) {
            temporalPos += indexes[i] + ",";
            xPositions += indexes[++i] + ",";
            yPositions += indexes[++i] + ",";
            sizeSpatioTemp++;
        }
        conf.set("xPositions", xPositions);
        conf.set("yPositions", yPositions);
    }
    conf.set("temporal-pos", temporalPos);

    conf.set("size-spatio-temporal", sizeSpatioTemp.toString());

    // checking resolutions

    if (utils.spatialResolution(spatialResolution) < 0) {
        System.out.println("Invalid spatial resolution: " + spatialResolution);
        System.exit(-1);
    }

    if (utils.spatialResolution(spatialResolution) == FrameworkUtils.POINTS) {
        System.out.println("The data needs to be reduced at least to neighborhoods or grid.");
        System.exit(-1);
    }

    if (utils.spatialResolution(currentSpatialResolution) < 0) {
        System.out.println("Invalid spatial resolution: " + currentSpatialResolution);
        System.exit(-1);
    }

    if (utils.spatialResolution(currentSpatialResolution) > utils.spatialResolution(spatialResolution)) {
        System.out.println("The current spatial resolution is coarser than "
                + "the desired one. You can only navigate from a fine resolution" + " to a coarser one.");
        System.exit(-1);
    }

    if (utils.temporalResolution(temporalResolution) < 0) {
        System.out.println("Invalid temporal resolution: " + temporalResolution);
        System.exit(-1);
    }

    String fileName = s3bucket + FrameworkUtils.preProcessingDir + "/" + dataset + "-" + temporalResolution
            + "-" + spatialResolution + gridResolution;
    conf.set("aggregates", fileName + ".aggregates");

    // making sure both files are removed, if they exist
    FrameworkUtils.removeFile(fileName, s3conf, s3);
    FrameworkUtils.removeFile(fileName + ".aggregates", s3conf, s3);

    /**
     * Hadoop Parameters
     * sources: http://www.slideshare.net/ImpetusInfo/ppt-on-advanced-hadoop-tuning-n-optimisation
     *          https://cloudcelebrity.wordpress.com/2013/08/14/12-key-steps-to-keep-your-hadoop-cluster-running-strong-and-performing-optimum/
     */

    conf.set("mapreduce.tasktracker.map.tasks.maximum", String.valueOf(machineConf.getMaximumTasks()));
    conf.set("mapreduce.tasktracker.reduce.tasks.maximum", String.valueOf(machineConf.getMaximumTasks()));
    conf.set("mapreduce.jobtracker.maxtasks.perjob", "-1");
    conf.set("mapreduce.reduce.shuffle.parallelcopies", "20");
    conf.set("mapreduce.input.fileinputformat.split.minsize", "0");
    conf.set("mapreduce.task.io.sort.mb", "200");
    conf.set("mapreduce.task.io.sort.factor", "100");

    // using SnappyCodec for intermediate and output data ?
    // TODO: for now, using SnappyCodec -- what about LZO + Protocol Buffer serialization?
    //   LZO - http://www.oberhumer.com/opensource/lzo/#download
    //   Hadoop-LZO - https://github.com/twitter/hadoop-lzo
    //   Protocol Buffer - https://github.com/twitter/elephant-bird
    //   General Info - http://www.devx.com/Java/Article/47913
    //   Compression - http://comphadoop.weebly.com/index.html
    if (snappyCompression) {
        conf.set("mapreduce.map.output.compress", "true");
        conf.set("mapreduce.map.output.compress.codec", "org.apache.hadoop.io.compress.SnappyCodec");
        conf.set("mapreduce.output.fileoutputformat.compress.codec",
                "org.apache.hadoop.io.compress.SnappyCodec");
    }
    if (bzip2Compression) {
        conf.set("mapreduce.map.output.compress", "true");
        conf.set("mapreduce.map.output.compress.codec", "org.apache.hadoop.io.compress.BZip2Codec");
        conf.set("mapreduce.output.fileoutputformat.compress.codec",
                "org.apache.hadoop.io.compress.BZip2Codec");
    }

    // TODO: this is dangerous!
    if (s3) {
        conf.set("fs.s3.awsAccessKeyId", awsAccessKeyId);
        conf.set("fs.s3.awsSecretAccessKey", awsSecretAccessKey);
    }

    Job job = new Job(conf);
    job.setJobName(dataset + "-" + temporalResolution + "-" + spatialResolution);

    job.setMapOutputKeyClass(MultipleSpatioTemporalWritable.class);
    job.setMapOutputValueClass(AggregationArrayWritable.class);

    job.setOutputKeyClass(MultipleSpatioTemporalWritable.class);
    job.setOutputValueClass(AggregationArrayWritable.class);

    job.setMapperClass(PreProcessingMapper.class);
    job.setCombinerClass(PreProcessingCombiner.class);
    job.setReducerClass(PreProcessingReducer.class);
    job.setNumReduceTasks(machineConf.getNumberReduces());
    //job.setNumReduceTasks(1);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    SequenceFileOutputFormat.setCompressOutput(job, true);
    SequenceFileOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK);

    FileInputFormat.setInputPaths(job, new Path(s3bucket + FrameworkUtils.dataDir + "/" + dataset));
    FileOutputFormat.setOutputPath(job, new Path(fileName));

    job.setJarByClass(PreProcessing.class);

    long start = System.currentTimeMillis();
    job.submit();
    job.waitForCompletion(true);
    System.out.println(fileName + "\t" + (System.currentTimeMillis() - start));

}

From source file:edu.nyu.vida.data_polygamy.standard_techniques.CorrelationTechniques.java

/**
 * @param args//from  w  ww .ja v  a  2 s .  co m
 * @throws ParseException 
 */
@SuppressWarnings({ "deprecation" })
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {

    Options options = new Options();

    Option forceOption = new Option("f", "force", false,
            "force the computation of the relationship " + "even if files already exist");
    forceOption.setRequired(false);
    options.addOption(forceOption);

    Option g1Option = new Option("g1", "first-group", true, "set first group of datasets");
    g1Option.setRequired(true);
    g1Option.setArgName("FIRST GROUP");
    g1Option.setArgs(Option.UNLIMITED_VALUES);
    options.addOption(g1Option);

    Option g2Option = new Option("g2", "second-group", true, "set second group of datasets");
    g2Option.setRequired(false);
    g2Option.setArgName("SECOND GROUP");
    g2Option.setArgs(Option.UNLIMITED_VALUES);
    options.addOption(g2Option);

    Option machineOption = new Option("m", "machine", true, "machine identifier");
    machineOption.setRequired(true);
    machineOption.setArgName("MACHINE");
    machineOption.setArgs(1);
    options.addOption(machineOption);

    Option nodesOption = new Option("n", "nodes", true, "number of nodes");
    nodesOption.setRequired(true);
    nodesOption.setArgName("NODES");
    nodesOption.setArgs(1);
    options.addOption(nodesOption);

    Option s3Option = new Option("s3", "s3", false, "data on Amazon S3");
    s3Option.setRequired(false);
    options.addOption(s3Option);

    Option awsAccessKeyIdOption = new Option("aws_id", "aws-id", true,
            "aws access key id; " + "this is required if the execution is on aws");
    awsAccessKeyIdOption.setRequired(false);
    awsAccessKeyIdOption.setArgName("AWS-ACCESS-KEY-ID");
    awsAccessKeyIdOption.setArgs(1);
    options.addOption(awsAccessKeyIdOption);

    Option awsSecretAccessKeyOption = new Option("aws_key", "aws-id", true,
            "aws secrect access key; " + "this is required if the execution is on aws");
    awsSecretAccessKeyOption.setRequired(false);
    awsSecretAccessKeyOption.setArgName("AWS-SECRET-ACCESS-KEY");
    awsSecretAccessKeyOption.setArgs(1);
    options.addOption(awsSecretAccessKeyOption);

    Option bucketOption = new Option("b", "s3-bucket", true,
            "bucket on s3; " + "this is required if the execution is on aws");
    bucketOption.setRequired(false);
    bucketOption.setArgName("S3-BUCKET");
    bucketOption.setArgs(1);
    options.addOption(bucketOption);

    Option helpOption = new Option("h", "help", false, "display this message");
    helpOption.setRequired(false);
    options.addOption(helpOption);

    HelpFormatter formatter = new HelpFormatter();
    CommandLineParser parser = new PosixParser();
    CommandLine cmd = null;

    try {
        cmd = parser.parse(options, args);
    } catch (ParseException e) {
        formatter.printHelp(
                "hadoop jar data-polygamy.jar "
                        + "edu.nyu.vida.data_polygamy.standard_techniques.CorrelationTechniques",
                options, true);
        System.exit(0);
    }

    if (cmd.hasOption("h")) {
        formatter.printHelp(
                "hadoop jar data-polygamy.jar "
                        + "edu.nyu.vida.data_polygamy.standard_techniques.CorrelationTechniques",
                options, true);
        System.exit(0);
    }

    boolean s3 = cmd.hasOption("s3");
    String s3bucket = "";
    String awsAccessKeyId = "";
    String awsSecretAccessKey = "";

    if (s3) {
        if ((!cmd.hasOption("aws_id")) || (!cmd.hasOption("aws_key")) || (!cmd.hasOption("b"))) {
            System.out.println(
                    "Arguments 'aws_id', 'aws_key', and 'b'" + " are mandatory if execution is on AWS.");
            formatter.printHelp(
                    "hadoop jar data-polygamy.jar "
                            + "edu.nyu.vida.data_polygamy.standard_techniques.CorrelationTechniques",
                    options, true);
            System.exit(0);
        }
        s3bucket = cmd.getOptionValue("b");
        awsAccessKeyId = cmd.getOptionValue("aws_id");
        awsSecretAccessKey = cmd.getOptionValue("aws_key");
    }

    boolean snappyCompression = false;
    boolean bzip2Compression = false;
    String machine = cmd.getOptionValue("m");
    int nbNodes = Integer.parseInt(cmd.getOptionValue("n"));

    Configuration s3conf = new Configuration();
    if (s3) {
        s3conf.set("fs.s3.awsAccessKeyId", awsAccessKeyId);
        s3conf.set("fs.s3.awsSecretAccessKey", awsSecretAccessKey);
        s3conf.set("bucket", s3bucket);
    }

    Path path = null;
    FileSystem fs = FileSystem.get(new Configuration());

    ArrayList<String> shortDataset = new ArrayList<String>();
    ArrayList<String> firstGroup = new ArrayList<String>();
    ArrayList<String> secondGroup = new ArrayList<String>();
    HashMap<String, String> datasetAgg = new HashMap<String, String>();

    boolean removeExistingFiles = cmd.hasOption("f");

    String[] firstGroupCmd = cmd.getOptionValues("g1");
    String[] secondGroupCmd = cmd.hasOption("g2") ? cmd.getOptionValues("g2") : new String[0];
    addDatasets(firstGroupCmd, firstGroup, shortDataset, datasetAgg, path, fs, s3conf, s3, s3bucket);
    addDatasets(secondGroupCmd, secondGroup, shortDataset, datasetAgg, path, fs, s3conf, s3, s3bucket);

    if (shortDataset.size() == 0) {
        System.out.println("No datasets to process.");
        System.exit(0);
    }

    if (firstGroup.isEmpty()) {
        System.out.println("First group of datasets (G1) is empty. " + "Doing G1 = G2.");
        firstGroup.addAll(secondGroup);
    }

    if (secondGroup.isEmpty()) {
        System.out.println("Second group of datasets (G2) is empty. " + "Doing G2 = G1.");
        secondGroup.addAll(firstGroup);
    }

    // getting dataset ids

    String datasetNames = "";
    String datasetIds = "";
    HashMap<String, String> datasetId = new HashMap<String, String>();
    Iterator<String> it = shortDataset.iterator();
    while (it.hasNext()) {
        datasetId.put(it.next(), null);
    }

    if (s3) {
        path = new Path(s3bucket + FrameworkUtils.datasetsIndexDir);
        fs = FileSystem.get(path.toUri(), s3conf);
    } else {
        path = new Path(fs.getHomeDirectory() + "/" + FrameworkUtils.datasetsIndexDir);
    }
    BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(path)));
    String line = br.readLine();
    while (line != null) {
        String[] dt = line.split("\t");
        if (datasetId.containsKey(dt[0])) {
            datasetId.put(dt[0], dt[1]);
            datasetNames += dt[0] + ",";
            datasetIds += dt[1] + ",";
        }
        line = br.readLine();
    }
    br.close();
    if (s3)
        fs.close();

    datasetNames = datasetNames.substring(0, datasetNames.length() - 1);
    datasetIds = datasetIds.substring(0, datasetIds.length() - 1);
    it = shortDataset.iterator();
    while (it.hasNext()) {
        String dataset = it.next();
        if (datasetId.get(dataset) == null) {
            System.out.println("No dataset id for " + dataset);
            System.exit(0);
        }
    }

    String firstGroupStr = "";
    String secondGroupStr = "";
    for (String dataset : firstGroup) {
        firstGroupStr += datasetId.get(dataset) + ",";
    }
    for (String dataset : secondGroup) {
        secondGroupStr += datasetId.get(dataset) + ",";
    }
    firstGroupStr = firstGroupStr.substring(0, firstGroupStr.length() - 1);
    secondGroupStr = secondGroupStr.substring(0, secondGroupStr.length() - 1);

    FrameworkUtils.createDir(s3bucket + FrameworkUtils.correlationTechniquesDir, s3conf, s3);

    String dataAttributesInputDirs = "";
    String noRelationship = "";

    HashSet<String> dirs = new HashSet<String>();

    String dataset1;
    String dataset2;
    String datasetId1;
    String datasetId2;
    for (int i = 0; i < firstGroup.size(); i++) {
        for (int j = 0; j < secondGroup.size(); j++) {

            if (Integer.parseInt(datasetId.get(firstGroup.get(i))) < Integer
                    .parseInt(datasetId.get(secondGroup.get(j)))) {
                dataset1 = firstGroup.get(i);
                dataset2 = secondGroup.get(j);
            } else {
                dataset1 = secondGroup.get(j);
                dataset2 = firstGroup.get(i);
            }

            datasetId1 = datasetId.get(dataset1);
            datasetId2 = datasetId.get(dataset2);

            if (dataset1.equals(dataset2))
                continue;
            String correlationOutputFileName = s3bucket + FrameworkUtils.correlationTechniquesDir + "/"
                    + dataset1 + "-" + dataset2 + "/";

            if (removeExistingFiles) {
                FrameworkUtils.removeFile(correlationOutputFileName, s3conf, s3);
            }
            if (!FrameworkUtils.fileExists(correlationOutputFileName, s3conf, s3)) {
                dirs.add(s3bucket + FrameworkUtils.aggregatesDir + "/" + dataset1);
                dirs.add(s3bucket + FrameworkUtils.aggregatesDir + "/" + dataset2);
            } else {
                noRelationship += datasetId1 + "-" + datasetId2 + ",";
            }
        }
    }

    if (dirs.isEmpty()) {
        System.out.println("All the relationships were already computed.");
        System.out.println("Use -f in the beginning of the command line to force the computation.");
        System.exit(0);
    }

    for (String dir : dirs) {
        dataAttributesInputDirs += dir + ",";
    }

    Configuration conf = new Configuration();
    Machine machineConf = new Machine(machine, nbNodes);

    String jobName = "correlation";
    String correlationOutputDir = s3bucket + FrameworkUtils.correlationTechniquesDir + "/tmp/";

    FrameworkUtils.removeFile(correlationOutputDir, s3conf, s3);

    for (int i = 0; i < shortDataset.size(); i++) {
        conf.set("dataset-" + datasetId.get(shortDataset.get(i)) + "-agg", datasetAgg.get(shortDataset.get(i)));
    }
    for (int i = 0; i < shortDataset.size(); i++) {
        conf.set("dataset-" + datasetId.get(shortDataset.get(i)) + "-agg-size",
                Integer.toString(datasetAgg.get(shortDataset.get(i)).split(",").length));
    }
    conf.set("dataset-keys", datasetIds);
    conf.set("dataset-names", datasetNames);
    conf.set("first-group", firstGroupStr);
    conf.set("second-group", secondGroupStr);
    conf.set("main-dataset-id", datasetId.get(shortDataset.get(0)));
    if (noRelationship.length() > 0) {
        conf.set("no-relationship", noRelationship.substring(0, noRelationship.length() - 1));
    }

    conf.set("mapreduce.tasktracker.map.tasks.maximum", String.valueOf(machineConf.getMaximumTasks()));
    conf.set("mapreduce.tasktracker.reduce.tasks.maximum", String.valueOf(machineConf.getMaximumTasks()));
    conf.set("mapreduce.jobtracker.maxtasks.perjob", "-1");
    conf.set("mapreduce.reduce.shuffle.parallelcopies", "20");
    conf.set("mapreduce.input.fileinputformat.split.minsize", "0");
    conf.set("mapreduce.task.io.sort.mb", "200");
    conf.set("mapreduce.task.io.sort.factor", "100");
    conf.set("mapreduce.task.timeout", "2400000");

    if (s3) {
        machineConf.setMachineConfiguration(conf);
        conf.set("fs.s3.awsAccessKeyId", awsAccessKeyId);
        conf.set("fs.s3.awsSecretAccessKey", awsSecretAccessKey);
        conf.set("bucket", s3bucket);
    }

    if (snappyCompression) {
        conf.set("mapreduce.map.output.compress", "true");
        conf.set("mapreduce.map.output.compress.codec", "org.apache.hadoop.io.compress.SnappyCodec");
        //conf.set("mapreduce.output.fileoutputformat.compress.codec", "org.apache.hadoop.io.compress.SnappyCodec");
    }
    if (bzip2Compression) {
        conf.set("mapreduce.map.output.compress", "true");
        conf.set("mapreduce.map.output.compress.codec", "org.apache.hadoop.io.compress.BZip2Codec");
        //conf.set("mapreduce.output.fileoutputformat.compress.codec", "org.apache.hadoop.io.compress.BZip2Codec");
    }

    Job job = new Job(conf);
    job.setJobName(jobName);

    job.setMapOutputKeyClass(PairAttributeWritable.class);
    job.setMapOutputValueClass(SpatioTemporalValueWritable.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setMapperClass(CorrelationTechniquesMapper.class);
    job.setReducerClass(CorrelationTechniquesReducer.class);
    job.setNumReduceTasks(machineConf.getNumberReduces());

    job.setInputFormatClass(SequenceFileInputFormat.class);
    LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class);

    FileInputFormat.setInputDirRecursive(job, true);
    FileInputFormat.setInputPaths(job,
            dataAttributesInputDirs.substring(0, dataAttributesInputDirs.length() - 1));
    FileOutputFormat.setOutputPath(job, new Path(correlationOutputDir));

    job.setJarByClass(CorrelationTechniques.class);

    long start = System.currentTimeMillis();
    job.submit();
    job.waitForCompletion(true);
    System.out.println(jobName + "\t" + (System.currentTimeMillis() - start));

    // moving files to right place
    for (int i = 0; i < firstGroup.size(); i++) {
        for (int j = 0; j < secondGroup.size(); j++) {

            if (Integer.parseInt(datasetId.get(firstGroup.get(i))) < Integer
                    .parseInt(datasetId.get(secondGroup.get(j)))) {
                dataset1 = firstGroup.get(i);
                dataset2 = secondGroup.get(j);
            } else {
                dataset1 = secondGroup.get(j);
                dataset2 = firstGroup.get(i);
            }

            if (dataset1.equals(dataset2))
                continue;

            String from = s3bucket + FrameworkUtils.correlationTechniquesDir + "/tmp/" + dataset1 + "-"
                    + dataset2 + "/";
            String to = s3bucket + FrameworkUtils.correlationTechniquesDir + "/" + dataset1 + "-" + dataset2
                    + "/";
            FrameworkUtils.renameFile(from, to, s3conf, s3);
        }
    }
}