List of usage examples for org.apache.commons.cli Option UNLIMITED_VALUES
int UNLIMITED_VALUES
To view the source code for org.apache.commons.cli Option UNLIMITED_VALUES.
Click Source Link
From source file:fr.tpt.s3.mcdag.scheduling.Main.java
public static void main(String[] args) throws IOException, InterruptedException { /* Command line options */ Options options = new Options(); Option input = new Option("i", "input", true, "MC-DAG XML Models"); input.setRequired(true);//from w w w .ja v a2 s. com input.setArgs(Option.UNLIMITED_VALUES); // Sets maximum number of threads to be launched options.addOption(input); Option outSched = new Option("os", "out-scheduler", false, "Write the scheduling tables into a file."); outSched.setRequired(false); options.addOption(outSched); Option outPrism = new Option("op", "out-prism", false, "Write PRISM model into a file."); outPrism.setRequired(false); options.addOption(outPrism); Option jobs = new Option("j", "jobs", true, "Number of threads to be launched."); jobs.setRequired(false); options.addOption(jobs); Option debugOpt = new Option("d", "debug", false, "Enabling debug."); debugOpt.setRequired(false); options.addOption(debugOpt); Option preemptOpt = new Option("p", "preempt", false, "Count for preemptions."); preemptOpt.setRequired(false); options.addOption(preemptOpt); CommandLineParser parser = new DefaultParser(); HelpFormatter formatter = new HelpFormatter(); CommandLine cmd; try { cmd = parser.parse(options, args); } catch (ParseException e) { System.err.println(e.getMessage()); formatter.printHelp("MC-DAG framework", options); System.exit(1); return; } String inputFilePath[] = cmd.getOptionValues("input"); boolean bOutSched = cmd.hasOption("out-scheduler"); boolean bOutPrism = cmd.hasOption("out-prism"); boolean debug = cmd.hasOption("debug"); boolean preempt = cmd.hasOption("preempt"); boolean levels = cmd.hasOption("n-levels"); int nbFiles = inputFilePath.length; int nbJobs = 1; if (cmd.hasOption("jobs")) nbJobs = Integer.parseInt(cmd.getOptionValue("jobs")); if (debug) System.out.println("[DEBUG] Launching " + inputFilePath.length + " thread(s)."); int i_files = 0; ExecutorService executor = Executors.newFixedThreadPool(nbJobs); /* Launch threads to solve allocation */ while (i_files != nbFiles) { SchedulingThread ft = new SchedulingThread(inputFilePath[i_files], bOutSched, bOutPrism, debug, preempt); ft.setLevels(levels); executor.execute(ft); i_files++; } executor.shutdown(); executor.awaitTermination(Long.MAX_VALUE, TimeUnit.NANOSECONDS); System.out.println("[FRAMEWORK Main] DONE"); }
From source file:fr.tpt.s3.mcdag.bench.MainBench.java
public static void main(String[] args) throws IOException, InterruptedException { // Command line options Options options = new Options(); Option input = new Option("i", "input", true, "MC-DAG XML models"); input.setRequired(true);//from ww w.j av a 2 s. co m input.setArgs(Option.UNLIMITED_VALUES); options.addOption(input); Option output = new Option("o", "output", true, "Folder where results have to be written."); output.setRequired(true); options.addOption(output); Option uUti = new Option("u", "utilization", true, "Utilization."); uUti.setRequired(true); options.addOption(uUti); Option output2 = new Option("ot", "output-total", true, "File where total results are being written"); output2.setRequired(true); options.addOption(output2); Option oCores = new Option("c", "cores", true, "Cores given to the test"); oCores.setRequired(true); options.addOption(oCores); Option oLvls = new Option("l", "levels", true, "Levels tested for the system"); oLvls.setRequired(true); options.addOption(oLvls); Option jobs = new Option("j", "jobs", true, "Number of threads to be launched."); jobs.setRequired(false); options.addOption(jobs); Option debug = new Option("d", "debug", false, "Debug logs."); debug.setRequired(false); options.addOption(debug); /* * Parsing of the command line */ CommandLineParser parser = new DefaultParser(); HelpFormatter formatter = new HelpFormatter(); CommandLine cmd; try { cmd = parser.parse(options, args); } catch (ParseException e) { System.err.println(e.getMessage()); formatter.printHelp("Benchmarks MultiDAG", options); System.exit(1); return; } String inputFilePath[] = cmd.getOptionValues("input"); String outputFilePath = cmd.getOptionValue("output"); String outputFilePathTotal = cmd.getOptionValue("output-total"); double utilization = Double.parseDouble(cmd.getOptionValue("utilization")); boolean boolDebug = cmd.hasOption("debug"); int nbLvls = Integer.parseInt(cmd.getOptionValue("levels")); int nbJobs = 1; int nbFiles = inputFilePath.length; if (cmd.hasOption("jobs")) nbJobs = Integer.parseInt(cmd.getOptionValue("jobs")); int nbCores = Integer.parseInt(cmd.getOptionValue("cores")); /* * While files need to be allocated * run the tests in the pool of threads */ // For dual-criticality systems we call a specific thread if (nbLvls == 2) { System.out.println(">>>>>>>>>>>>>>>>>>>>> NB levels " + nbLvls); int i_files2 = 0; String outFile = outputFilePath.substring(0, outputFilePath.lastIndexOf('.')) .concat("-schedulability.csv"); PrintWriter writer = new PrintWriter(outFile, "UTF-8"); writer.println( "Thread; File; FSched (%); FPreempts; FAct; LSched (%); LPreempts; LAct; ESched (%); EPreempts; EAct; HSched(%); HPreempts; HAct; Utilization"); writer.close(); ExecutorService executor2 = Executors.newFixedThreadPool(nbJobs); while (i_files2 != nbFiles) { BenchThreadDualCriticality bt2 = new BenchThreadDualCriticality(inputFilePath[i_files2], outFile, nbCores, boolDebug); executor2.execute(bt2); i_files2++; } executor2.shutdown(); executor2.awaitTermination(Long.MAX_VALUE, TimeUnit.NANOSECONDS); int fedTotal = 0; int laxTotal = 0; int edfTotal = 0; int hybridTotal = 0; int fedPreempts = 0; int laxPreempts = 0; int edfPreempts = 0; int hybridPreempts = 0; int fedActiv = 0; int laxActiv = 0; int edfActiv = 0; int hybridActiv = 0; // Read lines in file and do average int i = 0; File f = new File(outFile); @SuppressWarnings("resource") Scanner line = new Scanner(f); while (line.hasNextLine()) { String s = line.nextLine(); if (i > 0) { // To skip the first line try (Scanner inLine = new Scanner(s).useDelimiter("; ")) { int j = 0; while (inLine.hasNext()) { String val = inLine.next(); if (j == 2) { fedTotal += Integer.parseInt(val); } else if (j == 3) { fedPreempts += Integer.parseInt(val); } else if (j == 4) { fedActiv += Integer.parseInt(val); } else if (j == 5) { laxTotal += Integer.parseInt(val); } else if (j == 6) { laxPreempts += Integer.parseInt(val); } else if (j == 7) { laxActiv += Integer.parseInt(val); } else if (j == 8) { edfTotal += Integer.parseInt(val); } else if (j == 9) { edfPreempts += Integer.parseInt(val); } else if (j == 10) { edfActiv += Integer.parseInt(val); } else if (j == 11) { hybridTotal += Integer.parseInt(val); } else if (j == 12) { hybridPreempts += Integer.parseInt(val); } else if (j == 13) { hybridActiv += Integer.parseInt(val); } j++; } } } i++; } // Write percentage double fedPerc = (double) fedTotal / nbFiles; double laxPerc = (double) laxTotal / nbFiles; double edfPerc = (double) edfTotal / nbFiles; double hybridPerc = (double) hybridTotal / nbFiles; double fedPercPreempts = (double) fedPreempts / fedActiv; double laxPercPreempts = (double) laxPreempts / laxActiv; double edfPercPreempts = (double) edfPreempts / edfActiv; double hybridPercPreempts = (double) hybridPreempts / hybridActiv; Writer wOutput = new BufferedWriter(new FileWriter(outputFilePathTotal, true)); wOutput.write(Thread.currentThread().getName() + "; " + utilization + "; " + fedPerc + "; " + fedPreempts + "; " + fedActiv + "; " + fedPercPreempts + "; " + laxPerc + "; " + laxPreempts + "; " + laxActiv + "; " + laxPercPreempts + "; " + edfPerc + "; " + edfPreempts + "; " + edfActiv + "; " + edfPercPreempts + "; " + hybridPerc + "; " + hybridPreempts + "; " + hybridActiv + "; " + hybridPercPreempts + "\n"); wOutput.close(); } else if (nbLvls > 2) { int i_files2 = 0; String outFile = outputFilePath.substring(0, outputFilePath.lastIndexOf('.')) .concat("-schedulability.csv"); PrintWriter writer = new PrintWriter(outFile, "UTF-8"); writer.println( "Thread; File; LSched (%); LPreempts; LAct; ESched (%); EPreempts; EAct; HSched(%); HPreempts; HAct; Utilization"); writer.close(); ExecutorService executor2 = Executors.newFixedThreadPool(nbJobs); while (i_files2 != nbFiles) { BenchThreadNLevels bt2 = new BenchThreadNLevels(inputFilePath[i_files2], outFile, nbCores, boolDebug); executor2.execute(bt2); i_files2++; } executor2.shutdown(); executor2.awaitTermination(Long.MAX_VALUE, TimeUnit.NANOSECONDS); int laxTotal = 0; int edfTotal = 0; int hybridTotal = 0; int laxPreempts = 0; int edfPreempts = 0; int hybridPreempts = 0; int laxActiv = 0; int edfActiv = 0; int hybridActiv = 0; // Read lines in file and do average int i = 0; File f = new File(outFile); @SuppressWarnings("resource") Scanner line = new Scanner(f); while (line.hasNextLine()) { String s = line.nextLine(); if (i > 0) { // To skip the first line try (Scanner inLine = new Scanner(s).useDelimiter("; ")) { int j = 0; while (inLine.hasNext()) { String val = inLine.next(); if (j == 2) { laxTotal += Integer.parseInt(val); } else if (j == 3) { laxPreempts += Integer.parseInt(val); } else if (j == 4) { laxActiv += Integer.parseInt(val); } else if (j == 5) { edfTotal += Integer.parseInt(val); } else if (j == 6) { edfPreempts += Integer.parseInt(val); } else if (j == 7) { edfActiv += Integer.parseInt(val); } else if (j == 8) { hybridTotal += Integer.parseInt(val); } else if (j == 9) { hybridPreempts += Integer.parseInt(val); } else if (j == 10) { hybridActiv += Integer.parseInt(val); } j++; } } } i++; } // Write percentage double laxPerc = (double) laxTotal / nbFiles; double edfPerc = (double) edfTotal / nbFiles; double hybridPerc = (double) hybridTotal / nbFiles; double laxPercPreempts = (double) laxPreempts / laxActiv; double edfPercPreempts = (double) edfPreempts / edfActiv; double hybridPercPreempts = (double) hybridPreempts / hybridActiv; Writer wOutput = new BufferedWriter(new FileWriter(outputFilePathTotal, true)); wOutput.write(Thread.currentThread().getName() + "; " + utilization + "; " + laxPerc + "; " + laxPreempts + "; " + laxActiv + "; " + laxPercPreempts + "; " + edfPerc + "; " + edfPreempts + "; " + edfActiv + "; " + edfPercPreempts + "; " + hybridPerc + "; " + hybridPreempts + "; " + hybridActiv + "; " + hybridPercPreempts + "\n"); wOutput.close(); } else { System.err.println("Wrong number of levels"); System.exit(-1); } System.out.println("[BENCH Main] Done benchmarking U = " + utilization + " Levels " + nbLvls); }
From source file:edu.wisc.doit.tcrypt.cli.TokenCrypt.java
public static void main(String[] args) throws IOException { // create Options object final Options options = new Options(); // operation opt group final OptionGroup cryptTypeGroup = new OptionGroup(); cryptTypeGroup.addOption(new Option("e", "encrypt", false, "Encrypt a token")); cryptTypeGroup.addOption(new Option("d", "decrypt", false, "Decrypt a token")); cryptTypeGroup/*from w w w . j ava2 s . c om*/ .addOption(new Option("c", "check", false, "Check if the string looks like an encrypted token")); cryptTypeGroup.setRequired(true); options.addOptionGroup(cryptTypeGroup); // token source opt group final OptionGroup tokenGroup = new OptionGroup(); final Option tokenOpt = new Option("t", "token", true, "The token(s) to operate on"); tokenOpt.setArgs(Option.UNLIMITED_VALUES); tokenGroup.addOption(tokenOpt); final Option tokenFileOpt = new Option("f", "file", true, "A file with one token per line to operate on, if - is specified stdin is used"); tokenGroup.addOption(tokenFileOpt); tokenGroup.setRequired(true); options.addOptionGroup(tokenGroup); final Option keyOpt = new Option("k", "keyFile", true, "Key file to use. Must be a private key for decryption and a public key for encryption"); keyOpt.setRequired(true); options.addOption(keyOpt); // create the parser final CommandLineParser parser = new GnuParser(); CommandLine line = null; try { // parse the command line arguments line = parser.parse(options, args); } catch (ParseException exp) { // automatically generate the help statement System.err.println(exp.getMessage()); final HelpFormatter formatter = new HelpFormatter(); formatter.printHelp("java " + TokenCrypt.class.getName(), options, true); System.exit(1); } final Reader keyReader = createKeyReader(line); final TokenHandler tokenHandler = createTokenHandler(line, keyReader); if (line.hasOption("t")) { //tokens on cli final String[] tokens = line.getOptionValues("t"); for (final String token : tokens) { handleToken(tokenHandler, token); } } else { //tokens from a file final String tokenFile = line.getOptionValue("f"); final BufferedReader fileReader; if ("-".equals(tokenFile)) { fileReader = new BufferedReader(new InputStreamReader(System.in)); } else { fileReader = new BufferedReader(new FileReader(tokenFile)); } while (true) { final String token = fileReader.readLine(); if (token == null) { break; } handleToken(tokenHandler, token); } } }
From source file:com.versusoft.packages.ooo.odt2daisy.gui.CommandLineGUI.java
public static void main(String args[]) throws IOException { Handler fh = new FileHandler(LOG_FILENAME_PATTERN); fh.setFormatter(new SimpleFormatter()); //removeAllLoggersHandlers(Logger.getLogger("")); Logger.getLogger("").addHandler(fh); Logger.getLogger("").setLevel(Level.FINEST); Options options = new Options(); Option option1 = new Option("in", "name of ODT file (required)"); option1.setRequired(true);/* w w w . j av a2 s . c o m*/ option1.setArgs(1); Option option2 = new Option("out", "name of DAISY DTB file (required)"); option2.setRequired(true); option2.setArgs(1); Option option3 = new Option("h", "show this help"); option3.setArgs(Option.UNLIMITED_VALUES); Option option4 = new Option("alt", "use alternate Level Markup"); Option option5 = new Option("u", "UID of DAISY DTB (optional)"); option5.setArgs(1); Option option6 = new Option("t", "Title of DAISY DTB"); option6.setArgs(1); Option option7 = new Option("c", "Creator of DAISY DTB"); option7.setArgs(1); Option option8 = new Option("p", "Publisher of DAISY DTB"); option8.setArgs(1); Option option9 = new Option("pr", "Producer of DAISY DTB"); option9.setArgs(1); Option option10 = new Option("pic", "set Picture directory"); option10.setArgs(1); Option option11 = new Option("page", "enable pagination"); option11.setArgs(0); Option option12 = new Option("css", "write CSS file"); option12.setArgs(0); options.addOption(option1); options.addOption(option2); options.addOption(option3); options.addOption(option4); options.addOption(option5); options.addOption(option6); options.addOption(option7); options.addOption(option8); options.addOption(option9); options.addOption(option10); options.addOption(option11); options.addOption(option12); CommandLineParser parser = new BasicParser(); CommandLine cmd = null; try { cmd = parser.parse(options, args); } catch (ParseException e) { //System.out.println("***ERROR: " + e.getClass() + ": " + e.getMessage()); printHelp(); return; } if (cmd.hasOption("help")) { printHelp(); return; } try { Odt2Daisy odt2daisy = new Odt2Daisy(cmd.getOptionValue("in")); //@todo add initial output directory URL? odt2daisy.init(); if (odt2daisy.isEmptyDocument()) { logger.info("Cannot convert empty documents. Export Aborted..."); System.exit(1); } //System.out.println("Metadatas"); //System.out.println("- title: " + odt2daisy.getTitleMeta()); //System.out.println("- creator: " + odt2daisy.getCreatorMeta()); if (!odt2daisy.isUsingHeadings()) { logger.info("You SHOULD use Heading styles in your document. Export in a single level."); } //@todo Warning for incompatible image formats should go here. See UnoGui.java. if (cmd.hasOption("u")) { //System.out.println("arg uid:"+cmd.getOptionValue("u")); odt2daisy.setUidParam(cmd.getOptionValue("u")); } if (cmd.hasOption("t")) { //System.out.println("arg title:"+cmd.getOptionValue("t")); odt2daisy.setTitleParam(cmd.getOptionValue("t")); } if (cmd.hasOption("c")) { //System.out.println("arg creator:"+cmd.getOptionValue("c")); odt2daisy.setCreatorParam(cmd.getOptionValue("c")); } if (cmd.hasOption("p")) { //System.out.println("arg publisher:"+cmd.getOptionValue("p")); odt2daisy.setPublisherParam(cmd.getOptionValue("p")); } if (cmd.hasOption("pr")) { //System.out.println("arg producer:"+cmd.getOptionValue("pr")); odt2daisy.setProducerParam(cmd.getOptionValue("pr")); } if (cmd.hasOption("alt")) { //System.out.println("arg alt:"+cmd.getOptionValue("alt")); odt2daisy.setUseAlternateLevelParam(true); } if (cmd.hasOption("css")) { odt2daisy.setWriteCSSParam(true); } if (cmd.hasOption("page")) { odt2daisy.paginationProcessing(); } odt2daisy.correctionProcessing(); if (cmd.hasOption("pic")) { odt2daisy.convertAsDTBook(cmd.getOptionValue("out"), cmd.getOptionValue("pic")); } else { logger.info("Language detected: " + odt2daisy.getLangParam()); odt2daisy.convertAsDTBook(cmd.getOptionValue("out"), Configuration.DEFAULT_IMAGE_DIR); } boolean valid = odt2daisy.validateDTD(cmd.getOptionValue("out")); if (valid) { logger.info("DAISY DTBook produced is valid against DTD - Congratulations !"); } else { logger.info("DAISY Book produced isn't valid against DTD - You SHOULD NOT use this DAISY Book !"); logger.info("Error at line: " + odt2daisy.getErrorHandler().getLineNumber()); logger.info("Error Message: " + odt2daisy.getErrorHandler().getMessage()); } } catch (Exception e) { e.printStackTrace(); } finally { if (fh != null) { fh.flush(); fh.close(); } } }
From source file:com.akana.demo.freemarker.templatetester.App.java
public static void main(String[] args) { final Options options = new Options(); @SuppressWarnings("static-access") Option optionContentType = OptionBuilder.withArgName("content-type").hasArg() .withDescription("content type of model").create("content"); @SuppressWarnings("static-access") Option optionUrlPath = OptionBuilder.withArgName("httpRequestLine").hasArg() .withDescription("url path and parameters in HTTP Request Line format").create("url"); @SuppressWarnings("static-access") Option optionRootMessageName = OptionBuilder.withArgName("messageName").hasArg() .withDescription("root data object name, defaults to 'message'").create("root"); @SuppressWarnings("static-access") Option optionAdditionalMessages = OptionBuilder.withArgName("dataModelPaths") .hasArgs(Option.UNLIMITED_VALUES).withDescription("additional message object data sources") .create("messages"); @SuppressWarnings("static-access") Option optionDebugMessages = OptionBuilder.hasArg(false) .withDescription("Shows debug information about template processing").create("debug"); Option optionHelp = new Option("help", "print this message"); options.addOption(optionHelp);/*from ww w. j a va 2 s . co m*/ options.addOption(optionContentType); options.addOption(optionUrlPath); options.addOption(optionRootMessageName); options.addOption(optionAdditionalMessages); options.addOption(optionDebugMessages); CommandLineParser parser = new DefaultParser(); CommandLine cmd; try { cmd = parser.parse(options, args); // Check for help flag if (cmd.hasOption("help")) { showHelp(options); return; } String[] remainingArguments = cmd.getArgs(); if (remainingArguments.length < 2) { showHelp(options); return; } String ftlPath, dataPath = "none"; ftlPath = remainingArguments[0]; dataPath = remainingArguments[1]; String contentType = "text/xml"; // Discover content type from file extension String ext = FilenameUtils.getExtension(dataPath); if (ext.equals("json")) { contentType = "json"; } else if (ext.equals("txt")) { contentType = "txt"; } // Override discovered content type if (cmd.hasOption("content")) { contentType = cmd.getOptionValue("content"); } // Root data model name String rootMessageName = "message"; if (cmd.hasOption("root")) { rootMessageName = cmd.getOptionValue("root"); } // Additional data models String[] additionalModels = new String[0]; if (cmd.hasOption("messages")) { additionalModels = cmd.getOptionValues("messages"); } // Debug Info if (cmd.hasOption("debug")) { System.out.println(" Processing ftl : " + ftlPath); System.out.println(" with data model: " + dataPath); System.out.println(" with content-type: " + contentType); System.out.println(" data model object: " + rootMessageName); if (cmd.hasOption("messages")) { System.out.println("additional models: " + additionalModels.length); } } Configuration cfg = new Configuration(Configuration.VERSION_2_3_23); cfg.setDirectoryForTemplateLoading(new File(".")); cfg.setDefaultEncoding("UTF-8"); cfg.setTemplateExceptionHandler(TemplateExceptionHandler.RETHROW_HANDLER); /* Create the primary data-model */ Map<String, Object> message = new HashMap<String, Object>(); if (contentType.contains("json") || contentType.contains("txt")) { message.put("contentAsString", FileUtils.readFileToString(new File(dataPath), StandardCharsets.UTF_8)); } else { message.put("contentAsXml", freemarker.ext.dom.NodeModel.parse(new File(dataPath))); } if (cmd.hasOption("url")) { message.put("getProperty", new AkanaGetProperty(cmd.getOptionValue("url"))); } Map<String, Object> root = new HashMap<String, Object>(); root.put(rootMessageName, message); if (additionalModels.length > 0) { for (int i = 0; i < additionalModels.length; i++) { Map<String, Object> m = createMessageFromFile(additionalModels[i], contentType); root.put("message" + i, m); } } /* Get the template (uses cache internally) */ Template temp = cfg.getTemplate(ftlPath); /* Merge data-model with template */ Writer out = new OutputStreamWriter(System.out); temp.process(root, out); } catch (ParseException e) { showHelp(options); System.exit(1); } catch (IOException e) { System.out.println("Unable to parse ftl."); e.printStackTrace(); } catch (SAXException e) { System.out.println("XML parsing issue."); e.printStackTrace(); } catch (ParserConfigurationException e) { System.out.println("Unable to configure parser."); e.printStackTrace(); } catch (TemplateException e) { System.out.println("Unable to parse template."); e.printStackTrace(); } }
From source file:edu.nyu.vida.data_polygamy.scalar_function_computation.Aggregation.java
/** * @param args/*from w w w .j av a 2s. c o m*/ */ @SuppressWarnings({ "deprecation" }) public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { Options options = new Options(); Option forceOption = new Option("f", "force", false, "force the computation of the aggregate functions " + "even if files already exist"); forceOption.setRequired(false); options.addOption(forceOption); Option gOption = new Option("g", "group", true, "set group of datasets for which the aggregate functions" + " will be computed, followed by their temporal and spatial attribute indices"); gOption.setRequired(true); gOption.setArgName("GROUP"); gOption.setArgs(Option.UNLIMITED_VALUES); options.addOption(gOption); Option machineOption = new Option("m", "machine", true, "machine identifier"); machineOption.setRequired(true); machineOption.setArgName("MACHINE"); machineOption.setArgs(1); options.addOption(machineOption); Option nodesOption = new Option("n", "nodes", true, "number of nodes"); nodesOption.setRequired(true); nodesOption.setArgName("NODES"); nodesOption.setArgs(1); options.addOption(nodesOption); Option s3Option = new Option("s3", "s3", false, "data on Amazon S3"); s3Option.setRequired(false); options.addOption(s3Option); Option awsAccessKeyIdOption = new Option("aws_id", "aws-id", true, "aws access key id; " + "this is required if the execution is on aws"); awsAccessKeyIdOption.setRequired(false); awsAccessKeyIdOption.setArgName("AWS-ACCESS-KEY-ID"); awsAccessKeyIdOption.setArgs(1); options.addOption(awsAccessKeyIdOption); Option awsSecretAccessKeyOption = new Option("aws_key", "aws-id", true, "aws secrect access key; " + "this is required if the execution is on aws"); awsSecretAccessKeyOption.setRequired(false); awsSecretAccessKeyOption.setArgName("AWS-SECRET-ACCESS-KEY"); awsSecretAccessKeyOption.setArgs(1); options.addOption(awsSecretAccessKeyOption); Option bucketOption = new Option("b", "s3-bucket", true, "bucket on s3; " + "this is required if the execution is on aws"); bucketOption.setRequired(false); bucketOption.setArgName("S3-BUCKET"); bucketOption.setArgs(1); options.addOption(bucketOption); Option helpOption = new Option("h", "help", false, "display this message"); helpOption.setRequired(false); options.addOption(helpOption); HelpFormatter formatter = new HelpFormatter(); CommandLineParser parser = new PosixParser(); CommandLine cmd = null; try { cmd = parser.parse(options, args); } catch (ParseException e) { formatter.printHelp("hadoop jar data-polygamy.jar " + "edu.nyu.vida.data_polygamy.scalar_function_computation.Aggregation", options, true); System.exit(0); } if (cmd.hasOption("h")) { formatter.printHelp("hadoop jar data-polygamy.jar " + "edu.nyu.vida.data_polygamy.scalar_function_computation.Aggregation", options, true); System.exit(0); } boolean s3 = cmd.hasOption("s3"); String s3bucket = ""; String awsAccessKeyId = ""; String awsSecretAccessKey = ""; if (s3) { if ((!cmd.hasOption("aws_id")) || (!cmd.hasOption("aws_key")) || (!cmd.hasOption("b"))) { System.out.println( "Arguments 'aws_id', 'aws_key', and 'b'" + " are mandatory if execution is on AWS."); formatter.printHelp( "hadoop jar data-polygamy.jar " + "edu.nyu.vida.data_polygamy.scalar_function_computation.Aggregation", options, true); System.exit(0); } s3bucket = cmd.getOptionValue("b"); awsAccessKeyId = cmd.getOptionValue("aws_id"); awsSecretAccessKey = cmd.getOptionValue("aws_key"); } boolean snappyCompression = false; boolean bzip2Compression = false; String machine = cmd.getOptionValue("m"); int nbNodes = Integer.parseInt(cmd.getOptionValue("n")); Configuration s3conf = new Configuration(); if (s3) { s3conf.set("fs.s3.awsAccessKeyId", awsAccessKeyId); s3conf.set("fs.s3.awsSecretAccessKey", awsSecretAccessKey); s3conf.set("bucket", s3bucket); } String datasetNames = ""; String datasetIds = ""; String preProcessingDatasets = ""; ArrayList<String> shortDataset = new ArrayList<String>(); ArrayList<String> shortDatasetAggregation = new ArrayList<String>(); HashMap<String, String> datasetTempAtt = new HashMap<String, String>(); HashMap<String, String> datasetSpatialAtt = new HashMap<String, String>(); HashMap<String, String> preProcessingDataset = new HashMap<String, String>(); HashMap<String, String> datasetId = new HashMap<String, String>(); boolean removeExistingFiles = cmd.hasOption("f"); String[] datasetArgs = cmd.getOptionValues("g"); for (int i = 0; i < datasetArgs.length; i += 3) { String dataset = datasetArgs[i]; // getting pre-processing String tempPreProcessing = FrameworkUtils.searchPreProcessing(dataset, s3conf, s3); if (tempPreProcessing == null) { System.out.println("No pre-processing available for " + dataset); continue; } preProcessingDataset.put(dataset, tempPreProcessing); shortDataset.add(dataset); datasetTempAtt.put(dataset, ((datasetArgs[i + 1] == "null") ? null : datasetArgs[i + 1])); datasetSpatialAtt.put(dataset, ((datasetArgs[i + 2] == "null") ? null : datasetArgs[i + 2])); datasetId.put(dataset, null); } if (shortDataset.size() == 0) { System.out.println("No datasets to process."); System.exit(0); } // getting dataset id Path path = null; FileSystem fs = null; if (s3) { path = new Path(s3bucket + FrameworkUtils.datasetsIndexDir); fs = FileSystem.get(path.toUri(), s3conf); } else { fs = FileSystem.get(new Configuration()); path = new Path(fs.getHomeDirectory() + "/" + FrameworkUtils.datasetsIndexDir); } BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(path))); String line = br.readLine(); while (line != null) { String[] dt = line.split("\t"); if (datasetId.containsKey(dt[0])) { datasetId.put(dt[0], dt[1]); datasetNames += dt[0] + ","; datasetIds += dt[1] + ","; } line = br.readLine(); } br.close(); if (s3) fs.close(); datasetNames = datasetNames.substring(0, datasetNames.length() - 1); datasetIds = datasetIds.substring(0, datasetIds.length() - 1); Iterator<String> it = shortDataset.iterator(); while (it.hasNext()) { String dataset = it.next(); if (datasetId.get(dataset) == null) { System.out.println("No dataset id for " + dataset); System.exit(0); } } FrameworkUtils.createDir(s3bucket + FrameworkUtils.aggregatesDir, s3conf, s3); // getting smallest resolution HashMap<String, String> tempResMap = new HashMap<String, String>(); HashMap<String, String> spatialResMap = new HashMap<String, String>(); HashMap<String, String> datasetTemporalStrMap = new HashMap<String, String>(); HashMap<String, String> datasetSpatialStrMap = new HashMap<String, String>(); HashSet<String> input = new HashSet<String>(); for (String dataset : shortDataset) { String[] datasetArray = preProcessingDataset.get(dataset).split("-"); String datasetTemporalStr = datasetArray[datasetArray.length - 2]; int datasetTemporal = utils.temporalResolution(datasetTemporalStr); String datasetSpatialStr = datasetArray[datasetArray.length - 1]; int datasetSpatial = utils.spatialResolution(datasetSpatialStr); // finding all possible resolutions String[] temporalResolutions = FrameworkUtils.getAggTempResolutions(datasetTemporal); String[] spatialResolutions = FrameworkUtils.getAggSpatialResolutions(datasetSpatial); String temporalResolution = ""; String spatialResolution = ""; String tempRes = ""; String spatialRes = ""; boolean dataAdded = false; for (int i = 0; i < temporalResolutions.length; i++) { for (int j = 0; j < spatialResolutions.length; j++) { temporalResolution = temporalResolutions[i]; spatialResolution = spatialResolutions[j]; String aggregatesOutputFileName = s3bucket + FrameworkUtils.aggregatesDir + "/" + dataset + "/"; if (removeExistingFiles) { FrameworkUtils.removeFile(aggregatesOutputFileName, s3conf, s3); } if (!FrameworkUtils.fileExists(aggregatesOutputFileName, s3conf, s3)) { dataAdded = true; tempRes += temporalResolution + "-"; spatialRes += spatialResolution + "-"; } } } if (dataAdded) { input.add(s3bucket + FrameworkUtils.preProcessingDir + "/" + preProcessingDataset.get(dataset)); shortDatasetAggregation.add(dataset); tempResMap.put(dataset, tempRes.substring(0, tempRes.length() - 1)); spatialResMap.put(dataset, spatialRes.substring(0, spatialRes.length() - 1)); datasetTemporalStrMap.put(dataset, datasetTemporalStr); datasetSpatialStrMap.put(dataset, datasetSpatialStr); } } if (input.isEmpty()) { System.out.println("All the input datasets have aggregates."); System.out.println("Use -f in the beginning of the command line to force the computation."); System.exit(0); } it = input.iterator(); while (it.hasNext()) { preProcessingDatasets += it.next() + ","; } Job aggJob = null; String aggregatesOutputDir = s3bucket + FrameworkUtils.aggregatesDir + "/tmp/"; String jobName = "aggregates"; FrameworkUtils.removeFile(aggregatesOutputDir, s3conf, s3); Configuration aggConf = new Configuration(); Machine machineConf = new Machine(machine, nbNodes); aggConf.set("dataset-name", datasetNames); aggConf.set("dataset-id", datasetIds); for (int i = 0; i < shortDatasetAggregation.size(); i++) { String dataset = shortDatasetAggregation.get(i); String id = datasetId.get(dataset); aggConf.set("dataset-" + id + "-temporal-resolutions", tempResMap.get(dataset)); aggConf.set("dataset-" + id + "-spatial-resolutions", spatialResMap.get(dataset)); aggConf.set("dataset-" + id + "-temporal-att", datasetTempAtt.get(dataset)); aggConf.set("dataset-" + id + "-spatial-att", datasetSpatialAtt.get(dataset)); aggConf.set("dataset-" + id + "-temporal", datasetTemporalStrMap.get(dataset)); aggConf.set("dataset-" + id + "-spatial", datasetSpatialStrMap.get(dataset)); if (s3) aggConf.set("dataset-" + id, s3bucket + FrameworkUtils.preProcessingDir + "/" + preProcessingDataset.get(dataset)); else aggConf.set("dataset-" + id, FileSystem.get(new Configuration()).getHomeDirectory() + "/" + FrameworkUtils.preProcessingDir + "/" + preProcessingDataset.get(dataset)); } aggConf.set("mapreduce.tasktracker.map.tasks.maximum", String.valueOf(machineConf.getMaximumTasks())); aggConf.set("mapreduce.tasktracker.reduce.tasks.maximum", String.valueOf(machineConf.getMaximumTasks())); aggConf.set("mapreduce.jobtracker.maxtasks.perjob", "-1"); aggConf.set("mapreduce.reduce.shuffle.parallelcopies", "20"); aggConf.set("mapreduce.input.fileinputformat.split.minsize", "0"); aggConf.set("mapreduce.task.io.sort.mb", "200"); aggConf.set("mapreduce.task.io.sort.factor", "100"); machineConf.setMachineConfiguration(aggConf); if (s3) { machineConf.setMachineConfiguration(aggConf); aggConf.set("fs.s3.awsAccessKeyId", awsAccessKeyId); aggConf.set("fs.s3.awsSecretAccessKey", awsSecretAccessKey); } if (snappyCompression) { aggConf.set("mapreduce.map.output.compress", "true"); aggConf.set("mapreduce.map.output.compress.codec", "org.apache.hadoop.io.compress.SnappyCodec"); //aggConf.set("mapreduce.output.fileoutputformat.compress.codec", "org.apache.hadoop.io.compress.SnappyCodec"); } if (bzip2Compression) { aggConf.set("mapreduce.map.output.compress", "true"); aggConf.set("mapreduce.map.output.compress.codec", "org.apache.hadoop.io.compress.BZip2Codec"); //aggConf.set("mapreduce.output.fileoutputformat.compress.codec", "org.apache.hadoop.io.compress.BZip2Codec"); } aggJob = new Job(aggConf); aggJob.setJobName(jobName); aggJob.setMapOutputKeyClass(SpatioTemporalWritable.class); aggJob.setMapOutputValueClass(AggregationArrayWritable.class); aggJob.setOutputKeyClass(SpatioTemporalWritable.class); aggJob.setOutputValueClass(FloatArrayWritable.class); //aggJob.setOutputKeyClass(Text.class); //aggJob.setOutputValueClass(Text.class); aggJob.setMapperClass(AggregationMapper.class); aggJob.setCombinerClass(AggregationCombiner.class); aggJob.setReducerClass(AggregationReducer.class); aggJob.setNumReduceTasks(machineConf.getNumberReduces()); aggJob.setInputFormatClass(SequenceFileInputFormat.class); //aggJob.setOutputFormatClass(SequenceFileOutputFormat.class); LazyOutputFormat.setOutputFormatClass(aggJob, SequenceFileOutputFormat.class); //LazyOutputFormat.setOutputFormatClass(aggJob, TextOutputFormat.class); SequenceFileOutputFormat.setCompressOutput(aggJob, true); SequenceFileOutputFormat.setOutputCompressionType(aggJob, CompressionType.BLOCK); FileInputFormat.setInputDirRecursive(aggJob, true); FileInputFormat.setInputPaths(aggJob, preProcessingDatasets.substring(0, preProcessingDatasets.length() - 1)); FileOutputFormat.setOutputPath(aggJob, new Path(aggregatesOutputDir)); aggJob.setJarByClass(Aggregation.class); long start = System.currentTimeMillis(); aggJob.submit(); aggJob.waitForCompletion(true); System.out.println(jobName + "\t" + (System.currentTimeMillis() - start)); // moving files to right place for (String dataset : shortDatasetAggregation) { String from = s3bucket + FrameworkUtils.aggregatesDir + "/tmp/" + dataset + "/"; String to = s3bucket + FrameworkUtils.aggregatesDir + "/" + dataset + "/"; FrameworkUtils.renameFile(from, to, s3conf, s3); } }
From source file:edu.nyu.vida.data_polygamy.feature_identification.IndexCreation.java
/** * @param args/*from ww w . ja v a 2 s . c om*/ */ @SuppressWarnings({ "deprecation" }) public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { Options options = new Options(); Option forceOption = new Option("f", "force", false, "force the computation of the index and events " + "even if files already exist"); forceOption.setRequired(false); options.addOption(forceOption); Option thresholdOption = new Option("t", "use-custom-thresholds", false, "use custom thresholds for regular and rare events, defined in HDFS_HOME/" + FrameworkUtils.thresholdDir + " file"); thresholdOption.setRequired(false); options.addOption(thresholdOption); Option gOption = new Option("g", "group", true, "set group of datasets for which the indices and events" + " will be computed"); gOption.setRequired(true); gOption.setArgName("GROUP"); gOption.setArgs(Option.UNLIMITED_VALUES); options.addOption(gOption); Option machineOption = new Option("m", "machine", true, "machine identifier"); machineOption.setRequired(true); machineOption.setArgName("MACHINE"); machineOption.setArgs(1); options.addOption(machineOption); Option nodesOption = new Option("n", "nodes", true, "number of nodes"); nodesOption.setRequired(true); nodesOption.setArgName("NODES"); nodesOption.setArgs(1); options.addOption(nodesOption); Option s3Option = new Option("s3", "s3", false, "data on Amazon S3"); s3Option.setRequired(false); options.addOption(s3Option); Option awsAccessKeyIdOption = new Option("aws_id", "aws-id", true, "aws access key id; " + "this is required if the execution is on aws"); awsAccessKeyIdOption.setRequired(false); awsAccessKeyIdOption.setArgName("AWS-ACCESS-KEY-ID"); awsAccessKeyIdOption.setArgs(1); options.addOption(awsAccessKeyIdOption); Option awsSecretAccessKeyOption = new Option("aws_key", "aws-id", true, "aws secrect access key; " + "this is required if the execution is on aws"); awsSecretAccessKeyOption.setRequired(false); awsSecretAccessKeyOption.setArgName("AWS-SECRET-ACCESS-KEY"); awsSecretAccessKeyOption.setArgs(1); options.addOption(awsSecretAccessKeyOption); Option bucketOption = new Option("b", "s3-bucket", true, "bucket on s3; " + "this is required if the execution is on aws"); bucketOption.setRequired(false); bucketOption.setArgName("S3-BUCKET"); bucketOption.setArgs(1); options.addOption(bucketOption); Option helpOption = new Option("h", "help", false, "display this message"); helpOption.setRequired(false); options.addOption(helpOption); HelpFormatter formatter = new HelpFormatter(); CommandLineParser parser = new PosixParser(); CommandLine cmd = null; try { cmd = parser.parse(options, args); } catch (ParseException e) { formatter.printHelp("hadoop jar data-polygamy.jar " + "edu.nyu.vida.data_polygamy.feature_identification.IndexCreation", options, true); System.exit(0); } if (cmd.hasOption("h")) { formatter.printHelp("hadoop jar data-polygamy.jar " + "edu.nyu.vida.data_polygamy.feature_identification.IndexCreation", options, true); System.exit(0); } boolean s3 = cmd.hasOption("s3"); String s3bucket = ""; String awsAccessKeyId = ""; String awsSecretAccessKey = ""; if (s3) { if ((!cmd.hasOption("aws_id")) || (!cmd.hasOption("aws_key")) || (!cmd.hasOption("b"))) { System.out.println( "Arguments 'aws_id', 'aws_key', and 'b'" + " are mandatory if execution is on AWS."); formatter.printHelp("hadoop jar data-polygamy.jar " + "edu.nyu.vida.data_polygamy.feature_identification.IndexCreation", options, true); System.exit(0); } s3bucket = cmd.getOptionValue("b"); awsAccessKeyId = cmd.getOptionValue("aws_id"); awsSecretAccessKey = cmd.getOptionValue("aws_key"); } boolean snappyCompression = false; boolean bzip2Compression = false; String machine = cmd.getOptionValue("m"); int nbNodes = Integer.parseInt(cmd.getOptionValue("n")); Configuration s3conf = new Configuration(); if (s3) { s3conf.set("fs.s3.awsAccessKeyId", awsAccessKeyId); s3conf.set("fs.s3.awsSecretAccessKey", awsSecretAccessKey); s3conf.set("bucket", s3bucket); } String datasetNames = ""; String datasetIds = ""; ArrayList<String> shortDataset = new ArrayList<String>(); ArrayList<String> shortDatasetIndex = new ArrayList<String>(); HashMap<String, String> datasetAgg = new HashMap<String, String>(); HashMap<String, String> datasetId = new HashMap<String, String>(); HashMap<String, HashMap<Integer, Double>> datasetRegThreshold = new HashMap<String, HashMap<Integer, Double>>(); HashMap<String, HashMap<Integer, Double>> datasetRareThreshold = new HashMap<String, HashMap<Integer, Double>>(); Path path = null; FileSystem fs = FileSystem.get(new Configuration()); BufferedReader br; boolean removeExistingFiles = cmd.hasOption("f"); boolean isThresholdUserDefined = cmd.hasOption("t"); for (String dataset : cmd.getOptionValues("g")) { // getting aggregates String[] aggregate = FrameworkUtils.searchAggregates(dataset, s3conf, s3); if (aggregate.length == 0) { System.out.println("No aggregates found for " + dataset + "."); continue; } // getting aggregates header String aggregatesHeaderFileName = FrameworkUtils.searchAggregatesHeader(dataset, s3conf, s3); if (aggregatesHeaderFileName == null) { System.out.println("No aggregate header for " + dataset); continue; } String aggregatesHeader = s3bucket + FrameworkUtils.preProcessingDir + "/" + aggregatesHeaderFileName; shortDataset.add(dataset); datasetId.put(dataset, null); if (s3) { path = new Path(aggregatesHeader); fs = FileSystem.get(path.toUri(), s3conf); } else { path = new Path(fs.getHomeDirectory() + "/" + aggregatesHeader); } br = new BufferedReader(new InputStreamReader(fs.open(path))); datasetAgg.put(dataset, br.readLine().split("\t")[1]); br.close(); if (s3) fs.close(); } if (shortDataset.size() == 0) { System.out.println("No datasets to process."); System.exit(0); } // getting dataset id if (s3) { path = new Path(s3bucket + FrameworkUtils.datasetsIndexDir); fs = FileSystem.get(path.toUri(), s3conf); } else { path = new Path(fs.getHomeDirectory() + "/" + FrameworkUtils.datasetsIndexDir); } br = new BufferedReader(new InputStreamReader(fs.open(path))); String line = br.readLine(); while (line != null) { String[] dt = line.split("\t"); if (datasetId.containsKey(dt[0])) { datasetId.put(dt[0], dt[1]); datasetNames += dt[0] + ","; datasetIds += dt[1] + ","; } line = br.readLine(); } br.close(); datasetNames = datasetNames.substring(0, datasetNames.length() - 1); datasetIds = datasetIds.substring(0, datasetIds.length() - 1); Iterator<String> it = shortDataset.iterator(); while (it.hasNext()) { String dataset = it.next(); if (datasetId.get(dataset) == null) { System.out.println("No dataset id for " + dataset); System.exit(0); } } // getting user defined thresholds if (isThresholdUserDefined) { if (s3) { path = new Path(s3bucket + FrameworkUtils.thresholdDir); fs = FileSystem.get(path.toUri(), s3conf); } else { path = new Path(fs.getHomeDirectory() + "/" + FrameworkUtils.thresholdDir); } br = new BufferedReader(new InputStreamReader(fs.open(path))); line = br.readLine(); while (line != null) { // getting dataset name String dataset = line.trim(); HashMap<Integer, Double> regThresholds = new HashMap<Integer, Double>(); HashMap<Integer, Double> rareThresholds = new HashMap<Integer, Double>(); line = br.readLine(); while ((line != null) && (line.split("\t").length > 1)) { // getting attribute ids and thresholds String[] keyVals = line.trim().split("\t"); int att = Integer.parseInt(keyVals[0].trim()); regThresholds.put(att, Double.parseDouble(keyVals[1].trim())); rareThresholds.put(att, Double.parseDouble(keyVals[2].trim())); line = br.readLine(); } datasetRegThreshold.put(dataset, regThresholds); datasetRareThreshold.put(dataset, rareThresholds); } br.close(); } if (s3) fs.close(); // datasets that will use existing merge tree ArrayList<String> useMergeTree = new ArrayList<String>(); // creating index for each spatio-temporal resolution FrameworkUtils.createDir(s3bucket + FrameworkUtils.indexDir, s3conf, s3); HashSet<String> input = new HashSet<String>(); for (String dataset : shortDataset) { String indexCreationOutputFileName = s3bucket + FrameworkUtils.indexDir + "/" + dataset + "/"; String mergeTreeFileName = s3bucket + FrameworkUtils.mergeTreeDir + "/" + dataset + "/"; if (removeExistingFiles) { FrameworkUtils.removeFile(indexCreationOutputFileName, s3conf, s3); FrameworkUtils.removeFile(mergeTreeFileName, s3conf, s3); FrameworkUtils.createDir(mergeTreeFileName, s3conf, s3); } else if (datasetRegThreshold.containsKey(dataset)) { FrameworkUtils.removeFile(indexCreationOutputFileName, s3conf, s3); if (FrameworkUtils.fileExists(mergeTreeFileName, s3conf, s3)) { useMergeTree.add(dataset); } } if (!FrameworkUtils.fileExists(indexCreationOutputFileName, s3conf, s3)) { input.add(s3bucket + FrameworkUtils.aggregatesDir + "/" + dataset); shortDatasetIndex.add(dataset); } } if (input.isEmpty()) { System.out.println("All the input datasets have indices."); System.out.println("Use -f in the beginning of the command line to force the computation."); System.exit(0); } String aggregateDatasets = ""; it = input.iterator(); while (it.hasNext()) { aggregateDatasets += it.next() + ","; } Job icJob = null; Configuration icConf = new Configuration(); Machine machineConf = new Machine(machine, nbNodes); String jobName = "index"; String indexOutputDir = s3bucket + FrameworkUtils.indexDir + "/tmp/"; FrameworkUtils.removeFile(indexOutputDir, s3conf, s3); icConf.set("dataset-name", datasetNames); icConf.set("dataset-id", datasetIds); if (!useMergeTree.isEmpty()) { String useMergeTreeStr = ""; for (String dt : useMergeTree) { useMergeTreeStr += dt + ","; } icConf.set("use-merge-tree", useMergeTreeStr.substring(0, useMergeTreeStr.length() - 1)); } for (int i = 0; i < shortDataset.size(); i++) { String dataset = shortDataset.get(i); String id = datasetId.get(dataset); icConf.set("dataset-" + id + "-aggregates", datasetAgg.get(dataset)); if (datasetRegThreshold.containsKey(dataset)) { HashMap<Integer, Double> regThresholds = datasetRegThreshold.get(dataset); String thresholds = ""; for (int att : regThresholds.keySet()) { thresholds += String.valueOf(att) + "-" + String.valueOf(regThresholds.get(att)) + ","; } icConf.set("regular-" + id, thresholds.substring(0, thresholds.length() - 1)); } if (datasetRareThreshold.containsKey(dataset)) { HashMap<Integer, Double> rareThresholds = datasetRareThreshold.get(dataset); String thresholds = ""; for (int att : rareThresholds.keySet()) { thresholds += String.valueOf(att) + "-" + String.valueOf(rareThresholds.get(att)) + ","; } icConf.set("rare-" + id, thresholds.substring(0, thresholds.length() - 1)); } } icConf.set("mapreduce.tasktracker.map.tasks.maximum", String.valueOf(machineConf.getMaximumTasks())); icConf.set("mapreduce.tasktracker.reduce.tasks.maximum", String.valueOf(machineConf.getMaximumTasks())); icConf.set("mapreduce.jobtracker.maxtasks.perjob", "-1"); icConf.set("mapreduce.reduce.shuffle.parallelcopies", "20"); icConf.set("mapreduce.input.fileinputformat.split.minsize", "0"); icConf.set("mapreduce.task.io.sort.mb", "200"); icConf.set("mapreduce.task.io.sort.factor", "100"); //icConf.set("mapreduce.task.timeout", "1800000"); machineConf.setMachineConfiguration(icConf); if (s3) { machineConf.setMachineConfiguration(icConf); icConf.set("fs.s3.awsAccessKeyId", awsAccessKeyId); icConf.set("fs.s3.awsSecretAccessKey", awsSecretAccessKey); icConf.set("bucket", s3bucket); } if (snappyCompression) { icConf.set("mapreduce.map.output.compress", "true"); icConf.set("mapreduce.map.output.compress.codec", "org.apache.hadoop.io.compress.SnappyCodec"); //icConf.set("mapreduce.output.fileoutputformat.compress.codec", "org.apache.hadoop.io.compress.SnappyCodec"); } if (bzip2Compression) { icConf.set("mapreduce.map.output.compress", "true"); icConf.set("mapreduce.map.output.compress.codec", "org.apache.hadoop.io.compress.BZip2Codec"); //icConf.set("mapreduce.output.fileoutputformat.compress.codec", "org.apache.hadoop.io.compress.BZip2Codec"); } icJob = new Job(icConf); icJob.setJobName(jobName); icJob.setMapOutputKeyClass(AttributeResolutionWritable.class); icJob.setMapOutputValueClass(SpatioTemporalFloatWritable.class); icJob.setOutputKeyClass(AttributeResolutionWritable.class); icJob.setOutputValueClass(TopologyTimeSeriesWritable.class); //icJob.setOutputKeyClass(Text.class); //icJob.setOutputValueClass(Text.class); icJob.setMapperClass(IndexCreationMapper.class); icJob.setReducerClass(IndexCreationReducer.class); icJob.setNumReduceTasks(machineConf.getNumberReduces()); icJob.setInputFormatClass(SequenceFileInputFormat.class); //icJob.setOutputFormatClass(SequenceFileOutputFormat.class); LazyOutputFormat.setOutputFormatClass(icJob, SequenceFileOutputFormat.class); //LazyOutputFormat.setOutputFormatClass(icJob, TextOutputFormat.class); SequenceFileOutputFormat.setCompressOutput(icJob, true); SequenceFileOutputFormat.setOutputCompressionType(icJob, CompressionType.BLOCK); FileInputFormat.setInputDirRecursive(icJob, true); FileInputFormat.setInputPaths(icJob, aggregateDatasets.substring(0, aggregateDatasets.length() - 1)); FileOutputFormat.setOutputPath(icJob, new Path(indexOutputDir)); icJob.setJarByClass(IndexCreation.class); long start = System.currentTimeMillis(); icJob.submit(); icJob.waitForCompletion(true); System.out.println(jobName + "\t" + (System.currentTimeMillis() - start)); // moving files to right place for (String dataset : shortDatasetIndex) { String from = s3bucket + FrameworkUtils.indexDir + "/tmp/" + dataset + "/"; String to = s3bucket + FrameworkUtils.indexDir + "/" + dataset + "/"; FrameworkUtils.renameFile(from, to, s3conf, s3); } }
From source file:com.hortonworks.streamline.storage.tool.SQLScriptRunner.java
public static void main(String[] args) throws Exception { Options options = new Options(); options.addOption(option(1, "c", OPTION_CONFIG_FILE_PATH, "Config file path")); options.addOption(option(Option.UNLIMITED_VALUES, "f", OPTION_SCRIPT_PATH, "Script path to execute")); options.addOption(option(Option.UNLIMITED_VALUES, "m", OPTION_MYSQL_JAR_URL_PATH, "Mysql client jar url to download")); CommandLineParser parser = new BasicParser(); CommandLine commandLine = parser.parse(options, args); if (!commandLine.hasOption(OPTION_CONFIG_FILE_PATH) || !commandLine.hasOption(OPTION_SCRIPT_PATH) || commandLine.getOptionValues(OPTION_SCRIPT_PATH).length <= 0) { usage(options);//from ww w . jav a 2s .co m System.exit(1); } String confFilePath = commandLine.getOptionValue(OPTION_CONFIG_FILE_PATH); String[] scripts = commandLine.getOptionValues(OPTION_SCRIPT_PATH); String mysqlJarUrl = commandLine.getOptionValue(OPTION_MYSQL_JAR_URL_PATH); try { Map<String, Object> conf = Utils.readStreamlineConfig(confFilePath); StorageProviderConfigurationReader confReader = new StorageProviderConfigurationReader(); StorageProviderConfiguration storageProperties = confReader.readStorageConfig(conf); String bootstrapDirPath = System.getProperty("bootstrap.dir"); MySqlDriverHelper.downloadMySQLJarIfNeeded(storageProperties, bootstrapDirPath, mysqlJarUrl); SQLScriptRunner sqlScriptRunner = new SQLScriptRunner(storageProperties); try { sqlScriptRunner.initializeDriver(); } catch (ClassNotFoundException e) { System.err.println( "Driver class is not found in classpath. Please ensure that driver is in classpath."); System.exit(1); } for (String script : scripts) { sqlScriptRunner.runScriptWithReplaceDBType(script); } } catch (IOException e) { System.err.println("Error occurred while reading config file: " + confFilePath); System.exit(1); } }
From source file:edu.nyu.vida.data_polygamy.pre_processing.PreProcessing.java
/** * @param args/* w w w. jav a 2s .co m*/ * @throws IOException * @throws ClassNotFoundException * @throws InterruptedException */ @SuppressWarnings("deprecation") public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { Options options = new Options(); Option nameOption = new Option("dn", "name", true, "the name of the dataset"); nameOption.setRequired(true); nameOption.setArgName("DATASET NAME"); options.addOption(nameOption); Option headerOption = new Option("dh", "header", true, "the file that contains the header of the dataset"); headerOption.setRequired(true); headerOption.setArgName("DATASET HEADER FILE"); options.addOption(headerOption); Option deafultsOption = new Option("dd", "defaults", true, "the file that contains the default values of the dataset"); deafultsOption.setRequired(true); deafultsOption.setArgName("DATASET DEFAULTS FILE"); options.addOption(deafultsOption); Option tempResOption = new Option("t", "temporal", true, "desired temporal resolution (hour, day, week, or month)"); tempResOption.setRequired(true); tempResOption.setArgName("TEMPORAL RESOLUTION"); options.addOption(tempResOption); Option spatialResOption = new Option("s", "spatial", true, "desired spatial resolution (points, nbhd, zip, grid, or city)"); spatialResOption.setRequired(true); spatialResOption.setArgName("SPATIAL RESOLUTION"); options.addOption(spatialResOption); Option currentSpatialResOption = new Option("cs", "current-spatial", true, "current spatial resolution (points, nbhd, zip, grid, or city)"); currentSpatialResOption.setRequired(true); currentSpatialResOption.setArgName("CURRENT SPATIAL RESOLUTION"); options.addOption(currentSpatialResOption); Option indexResOption = new Option("i", "index", true, "indexes of the temporal and spatial attributes"); indexResOption.setRequired(true); indexResOption.setArgName("INDEX OF SPATIO-TEMPORAL RESOLUTIONS"); indexResOption.setArgs(Option.UNLIMITED_VALUES); options.addOption(indexResOption); Option machineOption = new Option("m", "machine", true, "machine identifier"); machineOption.setRequired(true); machineOption.setArgName("MACHINE"); machineOption.setArgs(1); options.addOption(machineOption); Option nodesOption = new Option("n", "nodes", true, "number of nodes"); nodesOption.setRequired(true); nodesOption.setArgName("NODES"); nodesOption.setArgs(1); options.addOption(nodesOption); Option s3Option = new Option("s3", "s3", false, "data on Amazon S3"); s3Option.setRequired(false); options.addOption(s3Option); Option awsAccessKeyIdOption = new Option("aws_id", "aws-id", true, "aws access key id; " + "this is required if the execution is on aws"); awsAccessKeyIdOption.setRequired(false); awsAccessKeyIdOption.setArgName("AWS-ACCESS-KEY-ID"); awsAccessKeyIdOption.setArgs(1); options.addOption(awsAccessKeyIdOption); Option awsSecretAccessKeyOption = new Option("aws_key", "aws-id", true, "aws secrect access key; " + "this is required if the execution is on aws"); awsSecretAccessKeyOption.setRequired(false); awsSecretAccessKeyOption.setArgName("AWS-SECRET-ACCESS-KEY"); awsSecretAccessKeyOption.setArgs(1); options.addOption(awsSecretAccessKeyOption); Option bucketOption = new Option("b", "s3-bucket", true, "bucket on s3; " + "this is required if the execution is on aws"); bucketOption.setRequired(false); bucketOption.setArgName("S3-BUCKET"); bucketOption.setArgs(1); options.addOption(bucketOption); Option helpOption = new Option("h", "help", false, "display this message"); helpOption.setRequired(false); options.addOption(helpOption); HelpFormatter formatter = new HelpFormatter(); CommandLineParser parser = new PosixParser(); CommandLine cmd = null; try { cmd = parser.parse(options, args); } catch (ParseException e) { formatter.printHelp( "hadoop jar data-polygamy.jar " + "edu.nyu.vida.data_polygamy.pre_processing.PreProcessing", options, true); System.exit(0); } if (cmd.hasOption("h")) { formatter.printHelp( "hadoop jar data-polygamy.jar " + "edu.nyu.vida.data_polygamy.pre_processing.PreProcessing", options, true); System.exit(0); } boolean s3 = cmd.hasOption("s3"); String s3bucket = ""; String awsAccessKeyId = ""; String awsSecretAccessKey = ""; if (s3) { if ((!cmd.hasOption("aws_id")) || (!cmd.hasOption("aws_key")) || (!cmd.hasOption("b"))) { System.out.println( "Arguments 'aws_id', 'aws_key', and 'b'" + " are mandatory if execution is on AWS."); formatter.printHelp( "hadoop jar data-polygamy.jar " + "edu.nyu.vida.data_polygamy.pre_processing.PreProcessing", options, true); System.exit(0); } s3bucket = cmd.getOptionValue("b"); awsAccessKeyId = cmd.getOptionValue("aws_id"); awsSecretAccessKey = cmd.getOptionValue("aws_key"); } boolean snappyCompression = false; boolean bzip2Compression = false; String machine = cmd.getOptionValue("m"); int nbNodes = Integer.parseInt(cmd.getOptionValue("n")); Configuration s3conf = new Configuration(); if (s3) { s3conf.set("fs.s3.awsAccessKeyId", awsAccessKeyId); s3conf.set("fs.s3.awsSecretAccessKey", awsSecretAccessKey); s3conf.set("bucket", s3bucket); } Configuration conf = new Configuration(); Machine machineConf = new Machine(machine, nbNodes); String dataset = cmd.getOptionValue("dn"); String header = cmd.getOptionValue("dh"); String defaults = cmd.getOptionValue("dd"); String temporalResolution = cmd.getOptionValue("t"); String spatialResolution = cmd.getOptionValue("s"); String gridResolution = ""; String currentSpatialResolution = cmd.getOptionValue("cs"); if (spatialResolution.contains("grid")) { String[] res = spatialResolution.split("-"); spatialResolution = res[0]; gridResolution = res[1]; } conf.set("header", s3bucket + FrameworkUtils.dataDir + "/" + header); conf.set("defaults", s3bucket + FrameworkUtils.dataDir + "/" + defaults); conf.set("temporal-resolution", temporalResolution); conf.set("spatial-resolution", spatialResolution); conf.set("grid-resolution", gridResolution); conf.set("current-spatial-resolution", currentSpatialResolution); String[] indexes = cmd.getOptionValues("i"); String temporalPos = ""; Integer sizeSpatioTemp = 0; if (!(currentSpatialResolution.equals("points"))) { String spatialPos = ""; for (int i = 0; i < indexes.length; i++) { temporalPos += indexes[i] + ","; spatialPos += indexes[++i] + ","; sizeSpatioTemp++; } conf.set("spatial-pos", spatialPos); } else { String xPositions = "", yPositions = ""; for (int i = 0; i < indexes.length; i++) { temporalPos += indexes[i] + ","; xPositions += indexes[++i] + ","; yPositions += indexes[++i] + ","; sizeSpatioTemp++; } conf.set("xPositions", xPositions); conf.set("yPositions", yPositions); } conf.set("temporal-pos", temporalPos); conf.set("size-spatio-temporal", sizeSpatioTemp.toString()); // checking resolutions if (utils.spatialResolution(spatialResolution) < 0) { System.out.println("Invalid spatial resolution: " + spatialResolution); System.exit(-1); } if (utils.spatialResolution(spatialResolution) == FrameworkUtils.POINTS) { System.out.println("The data needs to be reduced at least to neighborhoods or grid."); System.exit(-1); } if (utils.spatialResolution(currentSpatialResolution) < 0) { System.out.println("Invalid spatial resolution: " + currentSpatialResolution); System.exit(-1); } if (utils.spatialResolution(currentSpatialResolution) > utils.spatialResolution(spatialResolution)) { System.out.println("The current spatial resolution is coarser than " + "the desired one. You can only navigate from a fine resolution" + " to a coarser one."); System.exit(-1); } if (utils.temporalResolution(temporalResolution) < 0) { System.out.println("Invalid temporal resolution: " + temporalResolution); System.exit(-1); } String fileName = s3bucket + FrameworkUtils.preProcessingDir + "/" + dataset + "-" + temporalResolution + "-" + spatialResolution + gridResolution; conf.set("aggregates", fileName + ".aggregates"); // making sure both files are removed, if they exist FrameworkUtils.removeFile(fileName, s3conf, s3); FrameworkUtils.removeFile(fileName + ".aggregates", s3conf, s3); /** * Hadoop Parameters * sources: http://www.slideshare.net/ImpetusInfo/ppt-on-advanced-hadoop-tuning-n-optimisation * https://cloudcelebrity.wordpress.com/2013/08/14/12-key-steps-to-keep-your-hadoop-cluster-running-strong-and-performing-optimum/ */ conf.set("mapreduce.tasktracker.map.tasks.maximum", String.valueOf(machineConf.getMaximumTasks())); conf.set("mapreduce.tasktracker.reduce.tasks.maximum", String.valueOf(machineConf.getMaximumTasks())); conf.set("mapreduce.jobtracker.maxtasks.perjob", "-1"); conf.set("mapreduce.reduce.shuffle.parallelcopies", "20"); conf.set("mapreduce.input.fileinputformat.split.minsize", "0"); conf.set("mapreduce.task.io.sort.mb", "200"); conf.set("mapreduce.task.io.sort.factor", "100"); // using SnappyCodec for intermediate and output data ? // TODO: for now, using SnappyCodec -- what about LZO + Protocol Buffer serialization? // LZO - http://www.oberhumer.com/opensource/lzo/#download // Hadoop-LZO - https://github.com/twitter/hadoop-lzo // Protocol Buffer - https://github.com/twitter/elephant-bird // General Info - http://www.devx.com/Java/Article/47913 // Compression - http://comphadoop.weebly.com/index.html if (snappyCompression) { conf.set("mapreduce.map.output.compress", "true"); conf.set("mapreduce.map.output.compress.codec", "org.apache.hadoop.io.compress.SnappyCodec"); conf.set("mapreduce.output.fileoutputformat.compress.codec", "org.apache.hadoop.io.compress.SnappyCodec"); } if (bzip2Compression) { conf.set("mapreduce.map.output.compress", "true"); conf.set("mapreduce.map.output.compress.codec", "org.apache.hadoop.io.compress.BZip2Codec"); conf.set("mapreduce.output.fileoutputformat.compress.codec", "org.apache.hadoop.io.compress.BZip2Codec"); } // TODO: this is dangerous! if (s3) { conf.set("fs.s3.awsAccessKeyId", awsAccessKeyId); conf.set("fs.s3.awsSecretAccessKey", awsSecretAccessKey); } Job job = new Job(conf); job.setJobName(dataset + "-" + temporalResolution + "-" + spatialResolution); job.setMapOutputKeyClass(MultipleSpatioTemporalWritable.class); job.setMapOutputValueClass(AggregationArrayWritable.class); job.setOutputKeyClass(MultipleSpatioTemporalWritable.class); job.setOutputValueClass(AggregationArrayWritable.class); job.setMapperClass(PreProcessingMapper.class); job.setCombinerClass(PreProcessingCombiner.class); job.setReducerClass(PreProcessingReducer.class); job.setNumReduceTasks(machineConf.getNumberReduces()); //job.setNumReduceTasks(1); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); SequenceFileOutputFormat.setCompressOutput(job, true); SequenceFileOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK); FileInputFormat.setInputPaths(job, new Path(s3bucket + FrameworkUtils.dataDir + "/" + dataset)); FileOutputFormat.setOutputPath(job, new Path(fileName)); job.setJarByClass(PreProcessing.class); long start = System.currentTimeMillis(); job.submit(); job.waitForCompletion(true); System.out.println(fileName + "\t" + (System.currentTimeMillis() - start)); }
From source file:edu.nyu.vida.data_polygamy.standard_techniques.CorrelationTechniques.java
/** * @param args//from w ww .ja v a 2 s . co m * @throws ParseException */ @SuppressWarnings({ "deprecation" }) public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { Options options = new Options(); Option forceOption = new Option("f", "force", false, "force the computation of the relationship " + "even if files already exist"); forceOption.setRequired(false); options.addOption(forceOption); Option g1Option = new Option("g1", "first-group", true, "set first group of datasets"); g1Option.setRequired(true); g1Option.setArgName("FIRST GROUP"); g1Option.setArgs(Option.UNLIMITED_VALUES); options.addOption(g1Option); Option g2Option = new Option("g2", "second-group", true, "set second group of datasets"); g2Option.setRequired(false); g2Option.setArgName("SECOND GROUP"); g2Option.setArgs(Option.UNLIMITED_VALUES); options.addOption(g2Option); Option machineOption = new Option("m", "machine", true, "machine identifier"); machineOption.setRequired(true); machineOption.setArgName("MACHINE"); machineOption.setArgs(1); options.addOption(machineOption); Option nodesOption = new Option("n", "nodes", true, "number of nodes"); nodesOption.setRequired(true); nodesOption.setArgName("NODES"); nodesOption.setArgs(1); options.addOption(nodesOption); Option s3Option = new Option("s3", "s3", false, "data on Amazon S3"); s3Option.setRequired(false); options.addOption(s3Option); Option awsAccessKeyIdOption = new Option("aws_id", "aws-id", true, "aws access key id; " + "this is required if the execution is on aws"); awsAccessKeyIdOption.setRequired(false); awsAccessKeyIdOption.setArgName("AWS-ACCESS-KEY-ID"); awsAccessKeyIdOption.setArgs(1); options.addOption(awsAccessKeyIdOption); Option awsSecretAccessKeyOption = new Option("aws_key", "aws-id", true, "aws secrect access key; " + "this is required if the execution is on aws"); awsSecretAccessKeyOption.setRequired(false); awsSecretAccessKeyOption.setArgName("AWS-SECRET-ACCESS-KEY"); awsSecretAccessKeyOption.setArgs(1); options.addOption(awsSecretAccessKeyOption); Option bucketOption = new Option("b", "s3-bucket", true, "bucket on s3; " + "this is required if the execution is on aws"); bucketOption.setRequired(false); bucketOption.setArgName("S3-BUCKET"); bucketOption.setArgs(1); options.addOption(bucketOption); Option helpOption = new Option("h", "help", false, "display this message"); helpOption.setRequired(false); options.addOption(helpOption); HelpFormatter formatter = new HelpFormatter(); CommandLineParser parser = new PosixParser(); CommandLine cmd = null; try { cmd = parser.parse(options, args); } catch (ParseException e) { formatter.printHelp( "hadoop jar data-polygamy.jar " + "edu.nyu.vida.data_polygamy.standard_techniques.CorrelationTechniques", options, true); System.exit(0); } if (cmd.hasOption("h")) { formatter.printHelp( "hadoop jar data-polygamy.jar " + "edu.nyu.vida.data_polygamy.standard_techniques.CorrelationTechniques", options, true); System.exit(0); } boolean s3 = cmd.hasOption("s3"); String s3bucket = ""; String awsAccessKeyId = ""; String awsSecretAccessKey = ""; if (s3) { if ((!cmd.hasOption("aws_id")) || (!cmd.hasOption("aws_key")) || (!cmd.hasOption("b"))) { System.out.println( "Arguments 'aws_id', 'aws_key', and 'b'" + " are mandatory if execution is on AWS."); formatter.printHelp( "hadoop jar data-polygamy.jar " + "edu.nyu.vida.data_polygamy.standard_techniques.CorrelationTechniques", options, true); System.exit(0); } s3bucket = cmd.getOptionValue("b"); awsAccessKeyId = cmd.getOptionValue("aws_id"); awsSecretAccessKey = cmd.getOptionValue("aws_key"); } boolean snappyCompression = false; boolean bzip2Compression = false; String machine = cmd.getOptionValue("m"); int nbNodes = Integer.parseInt(cmd.getOptionValue("n")); Configuration s3conf = new Configuration(); if (s3) { s3conf.set("fs.s3.awsAccessKeyId", awsAccessKeyId); s3conf.set("fs.s3.awsSecretAccessKey", awsSecretAccessKey); s3conf.set("bucket", s3bucket); } Path path = null; FileSystem fs = FileSystem.get(new Configuration()); ArrayList<String> shortDataset = new ArrayList<String>(); ArrayList<String> firstGroup = new ArrayList<String>(); ArrayList<String> secondGroup = new ArrayList<String>(); HashMap<String, String> datasetAgg = new HashMap<String, String>(); boolean removeExistingFiles = cmd.hasOption("f"); String[] firstGroupCmd = cmd.getOptionValues("g1"); String[] secondGroupCmd = cmd.hasOption("g2") ? cmd.getOptionValues("g2") : new String[0]; addDatasets(firstGroupCmd, firstGroup, shortDataset, datasetAgg, path, fs, s3conf, s3, s3bucket); addDatasets(secondGroupCmd, secondGroup, shortDataset, datasetAgg, path, fs, s3conf, s3, s3bucket); if (shortDataset.size() == 0) { System.out.println("No datasets to process."); System.exit(0); } if (firstGroup.isEmpty()) { System.out.println("First group of datasets (G1) is empty. " + "Doing G1 = G2."); firstGroup.addAll(secondGroup); } if (secondGroup.isEmpty()) { System.out.println("Second group of datasets (G2) is empty. " + "Doing G2 = G1."); secondGroup.addAll(firstGroup); } // getting dataset ids String datasetNames = ""; String datasetIds = ""; HashMap<String, String> datasetId = new HashMap<String, String>(); Iterator<String> it = shortDataset.iterator(); while (it.hasNext()) { datasetId.put(it.next(), null); } if (s3) { path = new Path(s3bucket + FrameworkUtils.datasetsIndexDir); fs = FileSystem.get(path.toUri(), s3conf); } else { path = new Path(fs.getHomeDirectory() + "/" + FrameworkUtils.datasetsIndexDir); } BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(path))); String line = br.readLine(); while (line != null) { String[] dt = line.split("\t"); if (datasetId.containsKey(dt[0])) { datasetId.put(dt[0], dt[1]); datasetNames += dt[0] + ","; datasetIds += dt[1] + ","; } line = br.readLine(); } br.close(); if (s3) fs.close(); datasetNames = datasetNames.substring(0, datasetNames.length() - 1); datasetIds = datasetIds.substring(0, datasetIds.length() - 1); it = shortDataset.iterator(); while (it.hasNext()) { String dataset = it.next(); if (datasetId.get(dataset) == null) { System.out.println("No dataset id for " + dataset); System.exit(0); } } String firstGroupStr = ""; String secondGroupStr = ""; for (String dataset : firstGroup) { firstGroupStr += datasetId.get(dataset) + ","; } for (String dataset : secondGroup) { secondGroupStr += datasetId.get(dataset) + ","; } firstGroupStr = firstGroupStr.substring(0, firstGroupStr.length() - 1); secondGroupStr = secondGroupStr.substring(0, secondGroupStr.length() - 1); FrameworkUtils.createDir(s3bucket + FrameworkUtils.correlationTechniquesDir, s3conf, s3); String dataAttributesInputDirs = ""; String noRelationship = ""; HashSet<String> dirs = new HashSet<String>(); String dataset1; String dataset2; String datasetId1; String datasetId2; for (int i = 0; i < firstGroup.size(); i++) { for (int j = 0; j < secondGroup.size(); j++) { if (Integer.parseInt(datasetId.get(firstGroup.get(i))) < Integer .parseInt(datasetId.get(secondGroup.get(j)))) { dataset1 = firstGroup.get(i); dataset2 = secondGroup.get(j); } else { dataset1 = secondGroup.get(j); dataset2 = firstGroup.get(i); } datasetId1 = datasetId.get(dataset1); datasetId2 = datasetId.get(dataset2); if (dataset1.equals(dataset2)) continue; String correlationOutputFileName = s3bucket + FrameworkUtils.correlationTechniquesDir + "/" + dataset1 + "-" + dataset2 + "/"; if (removeExistingFiles) { FrameworkUtils.removeFile(correlationOutputFileName, s3conf, s3); } if (!FrameworkUtils.fileExists(correlationOutputFileName, s3conf, s3)) { dirs.add(s3bucket + FrameworkUtils.aggregatesDir + "/" + dataset1); dirs.add(s3bucket + FrameworkUtils.aggregatesDir + "/" + dataset2); } else { noRelationship += datasetId1 + "-" + datasetId2 + ","; } } } if (dirs.isEmpty()) { System.out.println("All the relationships were already computed."); System.out.println("Use -f in the beginning of the command line to force the computation."); System.exit(0); } for (String dir : dirs) { dataAttributesInputDirs += dir + ","; } Configuration conf = new Configuration(); Machine machineConf = new Machine(machine, nbNodes); String jobName = "correlation"; String correlationOutputDir = s3bucket + FrameworkUtils.correlationTechniquesDir + "/tmp/"; FrameworkUtils.removeFile(correlationOutputDir, s3conf, s3); for (int i = 0; i < shortDataset.size(); i++) { conf.set("dataset-" + datasetId.get(shortDataset.get(i)) + "-agg", datasetAgg.get(shortDataset.get(i))); } for (int i = 0; i < shortDataset.size(); i++) { conf.set("dataset-" + datasetId.get(shortDataset.get(i)) + "-agg-size", Integer.toString(datasetAgg.get(shortDataset.get(i)).split(",").length)); } conf.set("dataset-keys", datasetIds); conf.set("dataset-names", datasetNames); conf.set("first-group", firstGroupStr); conf.set("second-group", secondGroupStr); conf.set("main-dataset-id", datasetId.get(shortDataset.get(0))); if (noRelationship.length() > 0) { conf.set("no-relationship", noRelationship.substring(0, noRelationship.length() - 1)); } conf.set("mapreduce.tasktracker.map.tasks.maximum", String.valueOf(machineConf.getMaximumTasks())); conf.set("mapreduce.tasktracker.reduce.tasks.maximum", String.valueOf(machineConf.getMaximumTasks())); conf.set("mapreduce.jobtracker.maxtasks.perjob", "-1"); conf.set("mapreduce.reduce.shuffle.parallelcopies", "20"); conf.set("mapreduce.input.fileinputformat.split.minsize", "0"); conf.set("mapreduce.task.io.sort.mb", "200"); conf.set("mapreduce.task.io.sort.factor", "100"); conf.set("mapreduce.task.timeout", "2400000"); if (s3) { machineConf.setMachineConfiguration(conf); conf.set("fs.s3.awsAccessKeyId", awsAccessKeyId); conf.set("fs.s3.awsSecretAccessKey", awsSecretAccessKey); conf.set("bucket", s3bucket); } if (snappyCompression) { conf.set("mapreduce.map.output.compress", "true"); conf.set("mapreduce.map.output.compress.codec", "org.apache.hadoop.io.compress.SnappyCodec"); //conf.set("mapreduce.output.fileoutputformat.compress.codec", "org.apache.hadoop.io.compress.SnappyCodec"); } if (bzip2Compression) { conf.set("mapreduce.map.output.compress", "true"); conf.set("mapreduce.map.output.compress.codec", "org.apache.hadoop.io.compress.BZip2Codec"); //conf.set("mapreduce.output.fileoutputformat.compress.codec", "org.apache.hadoop.io.compress.BZip2Codec"); } Job job = new Job(conf); job.setJobName(jobName); job.setMapOutputKeyClass(PairAttributeWritable.class); job.setMapOutputValueClass(SpatioTemporalValueWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setMapperClass(CorrelationTechniquesMapper.class); job.setReducerClass(CorrelationTechniquesReducer.class); job.setNumReduceTasks(machineConf.getNumberReduces()); job.setInputFormatClass(SequenceFileInputFormat.class); LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class); FileInputFormat.setInputDirRecursive(job, true); FileInputFormat.setInputPaths(job, dataAttributesInputDirs.substring(0, dataAttributesInputDirs.length() - 1)); FileOutputFormat.setOutputPath(job, new Path(correlationOutputDir)); job.setJarByClass(CorrelationTechniques.class); long start = System.currentTimeMillis(); job.submit(); job.waitForCompletion(true); System.out.println(jobName + "\t" + (System.currentTimeMillis() - start)); // moving files to right place for (int i = 0; i < firstGroup.size(); i++) { for (int j = 0; j < secondGroup.size(); j++) { if (Integer.parseInt(datasetId.get(firstGroup.get(i))) < Integer .parseInt(datasetId.get(secondGroup.get(j)))) { dataset1 = firstGroup.get(i); dataset2 = secondGroup.get(j); } else { dataset1 = secondGroup.get(j); dataset2 = firstGroup.get(i); } if (dataset1.equals(dataset2)) continue; String from = s3bucket + FrameworkUtils.correlationTechniquesDir + "/tmp/" + dataset1 + "-" + dataset2 + "/"; String to = s3bucket + FrameworkUtils.correlationTechniquesDir + "/" + dataset1 + "-" + dataset2 + "/"; FrameworkUtils.renameFile(from, to, s3conf, s3); } } }