List of usage examples for org.apache.hadoop.mapred JobConf setNumMapTasks
public void setNumMapTasks(int n)
From source file:org.mitre.ccv.mapred.CalculateCosineDistanceMatrix.java
License:Open Source License
@Override public int run(String[] args) throws Exception { JobConf conf = new JobConf(getConf()); String phylip = null;// w w w . j a v a2 s . c o m String packedRow = null; int fractionDigits = 6; //String userJarLocation = "/path/to/jar"; //conf.setJar(userJarLocation); //were conf is the JobConf object ArrayList<String> other_args = new ArrayList<String>(); for (int i = 0; i < args.length; ++i) { try { if ("-m".equals(args[i])) { conf.setNumMapTasks(Integer.parseInt(args[++i])); } else if ("-r".equals(args[i])) { conf.setNumReduceTasks(Integer.parseInt(args[++i])); } else if ("-D".equals(args[i])) { String[] props = args[++i].split("="); conf.set(props[0], props[1]); } else if ("-libjars".equals(args[i])) { conf.set("tmpjars", FileUtils.validateFiles(args[++i], conf)); URL[] libjars = FileUtils.getLibJars(conf); if (libjars != null && libjars.length > 0) { // Add libjars to client/tasks classpath conf.setClassLoader(new URLClassLoader(libjars, conf.getClassLoader())); // Adds libjars to our classpath Thread.currentThread().setContextClassLoader( new URLClassLoader(libjars, Thread.currentThread().getContextClassLoader())); } } else if ("-phylip".equals(args[i])) { phylip = args[++i]; } else if ("-packedRow".equals(args[i])) { packedRow = args[++i]; } else if ("-digits".equals(args[i])) { fractionDigits = Integer.parseInt(args[++i]); } else { other_args.add(args[i]); } } catch (NumberFormatException except) { System.out.println("ERROR: Integer expected instead of " + args[i]); return printUsage(); } catch (ArrayIndexOutOfBoundsException except) { System.out.println("ERROR: Required parameter missing from " + args[i - 1]); return printUsage(); } } boolean writeMatrix = (phylip != null || packedRow != null) ? true : false; // Make sure there are exactly 3 parameters left. if ((other_args.size() != 2 && !writeMatrix) || (other_args.size() == 0 && writeMatrix)) { System.out.println("ERROR: Wrong number of parameters: " + other_args.size() + " instead of 2."); return printUsage(); } int ret = 0; if (other_args.size() == 2) { ret = this.initJob(conf, other_args.get(0), other_args.get(1)); } // check writing out in Phylip format if (ret == 0 && other_args.size() == 1 && phylip != null) { printPhylipSquare(conf, other_args.get(0), phylip, fractionDigits); } else if (ret == 0 && other_args.size() == 2 && phylip != null) { printPhylipSquare(conf, other_args.get(1), phylip, fractionDigits); } // check writing out in row packed order if (ret == 0 && other_args.size() == 1 && packedRow != null) { printRowMajorMatrix(conf, other_args.get(0), packedRow, fractionDigits); } else if (ret == 0 && other_args.size() == 2 && packedRow != null) { printRowMajorMatrix(conf, other_args.get(1), packedRow, fractionDigits); } return ret; }
From source file:org.mitre.ccv.mapred.CalculateKmerCounts.java
License:Open Source License
@Override public int run(String[] args) throws Exception { JobConf conf = new JobConf(getConf()); int start = DEFAULT_START; int end = DEFAULT_END; // @TODO: use commons getopts List<String> other_args = new ArrayList<String>(); for (int i = 0; i < args.length; ++i) { try {/*from w w w . ja v a 2 s . c o m*/ if ("-m".equals(args[i])) { conf.setNumMapTasks(Integer.parseInt(args[++i])); } else if ("-r".equals(args[i])) { conf.setNumReduceTasks(Integer.parseInt(args[++i])); } else if ("-s".equals(args[i])) { start = Integer.parseInt(args[++i]); } else if ("-e".equals(args[i])) { end = Integer.parseInt(args[++i]); } else if ("-f".equals(args[i])) { conf.get(FAST_MAP, "true"); } else if ("-libjars".equals(args[i])) { conf.set("tmpjars", FileUtils.validateFiles(args[++i], conf)); URL[] libjars = FileUtils.getLibJars(conf); if (libjars != null && libjars.length > 0) { // Add libjars to client/tasks classpath conf.setClassLoader(new URLClassLoader(libjars, conf.getClassLoader())); // Adds libjars to our classpath Thread.currentThread().setContextClassLoader( new URLClassLoader(libjars, Thread.currentThread().getContextClassLoader())); } } else { other_args.add(args[i]); } } catch (NumberFormatException except) { System.out.println("ERROR: Integer expected instead of " + args[i]); return printUsage(); } catch (ArrayIndexOutOfBoundsException except) { System.out.println("ERROR: Required parameter missing from " + args[i - 1]); return printUsage(); } } // Make sure there are exactly 2 parameters left. if (other_args.size() != 2) { System.out.println("ERROR: Wrong number of parameters: " + other_args.size() + " instead of 2."); return printUsage(); } return initJob(conf, start, end, other_args.get(0), other_args.get(1)); }
From source file:org.mitre.ccv.mapred.CalculateKmerPiValues.java
License:Open Source License
@Override public int run(String[] args) throws Exception { JobConf conf = new JobConf(getConf()); boolean cleanLogs = false; Integer start = CalculateKmerCounts.DEFAULT_START; Integer end = CalculateKmerCounts.DEFAULT_END; List<String> other_args = new ArrayList<String>(); for (int i = 0; i < args.length; ++i) { try {//from www . ja v a 2s.c om if ("-m".equals(args[i])) { conf.setNumMapTasks(Integer.parseInt(args[++i])); } else if ("-r".equals(args[i])) { conf.setNumReduceTasks(Integer.parseInt(args[++i])); } else if ("-c".equals(args[i])) { cleanLogs = true; } else if ("-s".equals(args[i])) { start = Integer.parseInt(args[++i]); } else if ("-e".equals(args[i])) { end = Integer.parseInt(args[++i]); } else if ("-libjars".equals(args[i])) { conf.set("tmpjars", FileUtils.validateFiles(args[++i], conf)); URL[] libjars = FileUtils.getLibJars(conf); if (libjars != null && libjars.length > 0) { // Add libjars to client/tasks classpath conf.setClassLoader(new URLClassLoader(libjars, conf.getClassLoader())); // Adds libjars to our classpath Thread.currentThread().setContextClassLoader( new URLClassLoader(libjars, Thread.currentThread().getContextClassLoader())); } } else { other_args.add(args[i]); } } catch (NumberFormatException except) { System.out.println("ERROR: Integer expected instead of " + args[i]); return printUsage(); } catch (ArrayIndexOutOfBoundsException except) { System.out.println("ERROR: Required parameter missing from " + args[i - 1]); return printUsage(); } } // Make sure there are exactly 2 parameters left. if (other_args.size() != 2) { System.out.println("ERROR: Wrong number of parameters: " + other_args.size() + " instead of 2."); return printUsage(); } return initJob(conf, start, end, other_args.get(0), other_args.get(1), cleanLogs); }
From source file:org.mitre.ccv.mapred.CalculateKmerProbabilities.java
License:Open Source License
@Override public int run(String[] args) throws Exception { JobConf conf = new JobConf(getConf()); boolean cleanLogs = false; int start = CalculateKmerCounts.DEFAULT_START; int end = CalculateKmerCounts.DEFAULT_END; int length = -1; // @TODO: use commons getopts List<String> other_args = new ArrayList<String>(); for (int i = 0; i < args.length; ++i) { try {//from w w w .ja va 2 s . c o m if ("-m".equals(args[i])) { conf.setNumMapTasks(Integer.parseInt(args[++i])); } else if ("-r".equals(args[i])) { conf.setNumReduceTasks(Integer.parseInt(args[++i])); } else if ("-s".equals(args[i])) { start = Integer.parseInt(args[++i]); } else if ("-e".equals(args[i])) { end = Integer.parseInt(args[++i]); } else if ("-c".equals(args[i])) { cleanLogs = true; } else if ("-l".equals(args[i])) { length = Integer.parseInt(args[++i]); } else if ("-libjars".equals(args[i])) { conf.set("tmpjars", FileUtils.validateFiles(args[++i], conf)); URL[] libjars = FileUtils.getLibJars(conf); if (libjars != null && libjars.length > 0) { // Add libjars to client/tasks classpath conf.setClassLoader(new URLClassLoader(libjars, conf.getClassLoader())); // Adds libjars to our classpath Thread.currentThread().setContextClassLoader( new URLClassLoader(libjars, Thread.currentThread().getContextClassLoader())); } } else { other_args.add(args[i]); } } catch (NumberFormatException except) { System.out.println("ERROR: Integer expected instead of " + args[i]); return printUsage(); } catch (ArrayIndexOutOfBoundsException except) { System.out.println("ERROR: Required parameter missing from " + args[i - 1]); return printUsage(); } } // Make sure there are exactly 2 parameters left. if (other_args.size() != 2) { System.out.println("ERROR: Wrong number of parameters: " + other_args.size() + " instead of 2."); return printUsage(); } if (length <= 0) { System.out.println("ERROR: Requires total length of sequence to be > 0"); return printUsage(); } //return initJob(conf, inTable, sb.toString().trim(), new Path(other_args.get(1))); return initJob(conf, start, end, length, other_args.get(0), other_args.get(1), cleanLogs); }
From source file:org.mitre.ccv.mapred.CalculateKmerRevisedRelativeEntropy.java
License:Open Source License
@Override public int run(String[] args) throws Exception { JobConf conf = new JobConf(getConf()); boolean cleanLogs = false; // @TODO: use commons getopts List<String> other_args = new ArrayList<String>(); for (int i = 0; i < args.length; ++i) { try {//from w ww.j av a 2s .c o m if ("-m".equals(args[i])) { conf.setNumMapTasks(Integer.parseInt(args[++i])); } else if ("-r".equals(args[i])) { conf.setNumReduceTasks(Integer.parseInt(args[++i])); } else if ("-c".equals(args[i])) { cleanLogs = true; } else if ("-t".equals(args[i])) { conf.setBoolean(TEXT_OUTPUT, true); } else if ("-libjars".equals(args[i])) { conf.set("tmpjars", FileUtils.validateFiles(args[++i], conf)); URL[] libjars = FileUtils.getLibJars(conf); if (libjars != null && libjars.length > 0) { // Add libjars to client/tasks classpath conf.setClassLoader(new URLClassLoader(libjars, conf.getClassLoader())); // Adds libjars to our classpath Thread.currentThread().setContextClassLoader( new URLClassLoader(libjars, Thread.currentThread().getContextClassLoader())); } } else { other_args.add(args[i]); } } catch (NumberFormatException except) { System.out.println("ERROR: Integer expected instead of " + args[i]); return printUsage(); } catch (ArrayIndexOutOfBoundsException except) { System.out.println("ERROR: Required parameter missing from " + args[i - 1]); return printUsage(); } } // Make sure there are exactly 2 parameters left. if (other_args.size() != 3) { System.out.println("ERROR: Wrong number of parameters: " + other_args.size() + " instead of 3."); return printUsage(); } //return initJob(conf, inTable, sb.toString().trim(), new Path(other_args.get(1))); return initJob(conf, other_args.get(0), other_args.get(1), other_args.get(2), cleanLogs); }
From source file:org.mitre.ccv.mapred.CompleteCompositionVectors.java
License:Open Source License
/** * * The JSO data will be the same as {@link org.mitre.ccv.CompleteMatrix#jsonCompleteMatrix}, but the features * will be in a different order. This version, by default sorts, only by entropy values, whereas the * ccv in-memory version sorts by the k-mer natural order (i.e., lexigraphic). * @param argv//from w ww .ja v a2 s .c o m * @return * @throws java.lang.Exception */ @Override @SuppressWarnings("static-access") // For OptionBuilder public int run(String[] argv) throws Exception { JobConf conf = new JobConf(getConf()); String cli_title = "CompleteCompositionVectorHadoop"; int start = CalculateKmerCounts.DEFAULT_START; int end = CalculateKmerCounts.DEFAULT_END; int topkmers = 0; String input = null; String output = null; String vectorJsonOutput = null; //String kmerJsonOutput = null; boolean cleanLogs = false; /** create the Options */ Options options = new Options(); /** Hadoop Options */ options.addOption( OptionBuilder.withArgName("number").hasArg(true).withDescription("number of maps").create("m")); options.addOption( OptionBuilder.withArgName("number").hasArg(true).withDescription("number of reducers").create("r")); // org.hadoop.util.GenericOptionsParser should captures this, but it doesn't options.addOption(OptionBuilder.withArgName("property=value").hasArg(true).withValueSeparator() .withDescription("use value for given property").create("D")); /** CompleteCompositionVector Options */ options.addOption(OptionBuilder.withArgName("number").hasArg(true) .withDescription("number of top k-mers to use in calculations").create("topKmers")); options.addOption(OptionBuilder.withArgName("start").hasArg(true).withDescription("starting length of tile") .create("start")); options.addOption(OptionBuilder.withArgName("end").hasArg(true).withDescription("ending length of title") .create("end")); options.addOption(OptionBuilder.hasArg(true).withArgName("file") .withDescription("JSON file to write out k-mers to").create("kmersfile")); options.addOption(OptionBuilder.hasArg(true).withArgName("file") .withDescription("JSON file to write out feature vectors to " + "(Overrides kmersout, only one file will be written).") .create("vectorsfile")); options.addOption(OptionBuilder.withArgName("number").hasArg(true) .withDescription("What preference to use: 0-min 1-median 2-avg(min,med): default is median") .create("prefval")); options.addOption(OptionBuilder.withArgName("help").hasArg(false).withDescription("print this message") .create("help")); // automatically generate the help statement HelpFormatter formatter = new HelpFormatter(); //GenericOptionsParser gop = new GenericOptionsParser(conf, options, argv); GenericOptionsParser gop = new GenericOptionsParser(conf, argv); String[] remaining_args = gop.getRemainingArgs(); // create the parser CommandLineParser parser = new GnuParser(); //CommandLine line = gop.getCommandLine(); String[] other_args = new String[] {}; try { CommandLine line = parser.parse(options, remaining_args); other_args = line.getArgs(); // Make sure there is a parameter left. if (other_args.length == 0) { System.out.println(cli_title); System.out.println("Missing input path!"); formatter.printHelp("hccv [options] <input> [<output>] ", options); GenericOptionsParser.printGenericCommandUsage(System.out); return -1; } Option[] opts = line.getOptions(); if (line.hasOption("help")) { System.out.println(cli_title); formatter.printHelp("hccv [options] <input> [<output>] ", options); GenericOptionsParser.printGenericCommandUsage(System.out); return -1; } // could also use line.iterator() for (Option opt : opts) { if (opt.getOpt().equals("m")) { conf.setNumMapTasks(Integer.parseInt(opt.getValue())); } if (opt.getOpt().equals("r")) { conf.setNumReduceTasks(Integer.parseInt(opt.getValue())); } if (opt.getOpt().equals("D")) { // We can have multiple properties we want to set String[] properties = opt.getValues(); for (String property : properties) { String[] keyval = property.split("="); conf.set(keyval[0], keyval[1]); } } if (opt.getOpt().equals("start")) { start = Integer.parseInt(opt.getValue()); } if (opt.getOpt().equals("end")) { end = Integer.parseInt(opt.getValue()); } if (opt.getOpt().equals("topKmers")) { topkmers = Integer.parseInt(opt.getValue()); } if (opt.getOpt().equals("vectorsfile")) { vectorJsonOutput = opt.getValue(); } } } catch (ParseException e) { LOG.warn("options parsing faild: " + e.getMessage()); System.out.println(cli_title); formatter.printHelp("hccv [options] <input> [<output>] ", options); GenericOptionsParser.printGenericCommandUsage(System.out); } if (start <= 2) { throw new IllegalArgumentException("Value of 'start' argument must be larger than 2"); } input = other_args[0]; if (other_args.length < 2) { output = input + "_" + FileUtils.getSimpleDate(); } else { output = other_args[2]; } /** * Check output path. Either needs to exist as a directory or not exist */ Path outputPath = new Path(output); FileSystem fs = outputPath.getFileSystem(conf); if (!fs.exists(outputPath)) { fs.mkdirs(outputPath); } else if (fs.exists(outputPath) || !fs.getFileStatus(outputPath).isDir()) { LOG.fatal(String.format("Output directory %s already exists", outputPath.makeQualified(fs))); throw new FileAlreadyExistsException( String.format("Output directory %s already exists", outputPath.makeQualified(fs))); } String outputDir = output + Path.SEPARATOR; int res; /** * Zero, CalculateCompositionVectors */ LOG.info("Starting CalculateCompositionVectors Map-Reduce job"); CalculateCompositionVectors cv = new CalculateCompositionVectors(); res = cv.initJob(conf, start, end, input, outputDir + COMPOSITION_VECTORS, cleanLogs); if (res != 0) { LOG.info("CalculateCompositionVectors returned non-zero result!"); return res; } // We can stop now or continue to reduce dimensionallity using RRE or other means /** * First, CalculateKmerCounts */ LOG.info("Starting CalculateKmerCounts Map-Reduce job"); // FastMap option for CalculateKmers!?! CalculateKmerCounts ckc = new CalculateKmerCounts(); res = ckc.initJob(conf, start, end, input, outputDir + KMER_COUNTS); if (res != 0) { LOG.fatal("CalculateKmerCounts returned non-zero result!"); return res; } /** * Second, TotalSequenceLength */ LOG.info("Starting TotalSequenceLength Map-Reduce job"); TotalSequenceLength tsl = new TotalSequenceLength(); res = tsl.initJob(conf, input, outputDir + TOTAL_LENGTH, cleanLogs); if (res != 0) { LOG.fatal("TotalSequenceLength returned non-zero result!"); return res; } int length = tsl.getCount(conf, outputDir + TOTAL_LENGTH); if (length < 3) { LOG.fatal("TotalSequenceLength returned a total sequence length of less than 3."); return -1; } else { LOG.info(String.format("TotalSequenceLength returned a total sequence length of %d.", length)); } /** * Third, CalculateKmerProbabilities */ LOG.info("Starting CalculateKmerProbabilities Map-Reduce job"); CalculateKmerProbabilities ckp = new CalculateKmerProbabilities(); res = ckp.initJob(conf, start, end, length, outputDir + KMER_COUNTS, outputDir + KMER_PROBABILITIES, cleanLogs); if (res != 0) { LOG.fatal("CalculateKmerProbabilities returned non-zero result!"); return res; } /** * Fourth, InvertKmerProbabilities */ LOG.info("Starting InvertKmerProbabilities Map-Reduce job"); InvertKmerProbabilities ikp = new InvertKmerProbabilities(); res = ikp.initJob(conf, outputDir + KMER_PROBABILITIES, outputDir + INVERTED_KMER_PROBABILITIES, cleanLogs); if (res != 0) { LOG.fatal("InvertKmerProbabilities returned non-zero result!"); return res; } /** * Fifth, CalculateKmerPiValues */ LOG.info("Starting CalculateKmerPiValues Map-Reduce job"); CalculateKmerPiValues kpv = new CalculateKmerPiValues(); res = kpv.initJob(conf, start, end, outputDir + INVERTED_KMER_PROBABILITIES, outputDir + KMER_PI_VALUES, cleanLogs); if (res != 0) { LOG.fatal("CalculateKmerPiValues returned non-zero result!"); return res; } /** * Sixth,CalculateKmerRevisedRelativeEntropy */ LOG.info("Starting CalculateKmerRevisedRelativeEntropy Map-Reduce job"); CalculateKmerRevisedRelativeEntropy krre = new CalculateKmerRevisedRelativeEntropy(); res = krre.initJob(conf, outputDir + KMER_PI_VALUES, outputDir + COMPOSITION_VECTORS, outputDir + ENTROPY_VALUES, cleanLogs); if (res != 0) { LOG.fatal("CalculateKmerRevisedRelativeEntropy returned non-zero result!"); return res; } /** * Seventh, SortKmerRevisedRelativeEntropies */ SortKmerRevisedRelativeEntropies srre = new SortKmerRevisedRelativeEntropies(); res = srre.initJob(conf, outputDir + ENTROPY_VALUES, outputDir + SORTED_ENTROPY_VALUES, cleanLogs); if (res != 0) { LOG.fatal("SortKmerRevisedRelativeEntropies returned non-zero result!"); return res; } /** * Eigth, GenerateFeatureVectors * * Generate a flatten list to add to the cache to be distributed to the map-tasks. */ Path listOutputPath = new Path(outputDir + Integer.toString(topkmers) + KMER_ENTROPY_SET); LOG.info(String.format("Loading %d sorted k-mers from %s to %s", topkmers, outputDir + SORTED_ENTROPY_VALUES, listOutputPath.toString())); int num = CompleteCompositionVectorUtils.flattenKmerEntropySequenceFile(conf, topkmers, outputDir + SORTED_ENTROPY_VALUES, listOutputPath.toString(), cleanLogs); if (num != topkmers) { LOG.fatal(String.format("Requested %d k-mers, but got %d. Using %d", topkmers, num, num)); topkmers = num; } GenerateFeatureVectors fv = new GenerateFeatureVectors(); res = fv.initJob(conf, listOutputPath.toString(), topkmers, outputDir + COMPOSITION_VECTORS, outputDir + FEATURE_VECTORS, cleanLogs); if (res != 0) { LOG.fatal("GenerateFeatureVectors returned non-zero result!"); return res; } /** * Save feature vectors, features (k-mers), and properties to a JSON file. * * The data will be the same as {@link org.mitre.ccv.CompleteMatrix#jsonCompleteMatrix}, but the features * will be in a different order. This version, by default sorts, only by entropy values, whereas the * ccv in-memory version sorts by the k-mer natural order (i.e., lexigraphic). */ if (vectorJsonOutput != null && vectorJsonOutput.length() > 0) { LOG.info("Writing features out to " + vectorJsonOutput); CompleteCompositionVectorUtils.featureVectors2Json(conf, start, end, topkmers, outputDir + SORTED_ENTROPY_VALUES, outputDir + FEATURE_VECTORS, vectorJsonOutput); } LOG.info("All done generating complete composition vectors and feature vectors."); return res; }
From source file:org.mitre.ccv.mapred.CompleteCompositionVectorUtils.java
License:Open Source License
@Override public int run(String[] args) throws Exception { JobConf conf = new JobConf(getConf()); ArrayList<String> other_args = new ArrayList<String>(); for (int i = 0; i < args.length; ++i) { try {//from w w w. j a v a 2 s . c o m if ("-m".equals(args[i])) { conf.setNumMapTasks(Integer.parseInt(args[++i])); } else if ("-r".equals(args[i])) { conf.setNumReduceTasks(Integer.parseInt(args[++i])); } else if ("-libjars".equals(args[i])) { conf.set("tmpjars", FileUtils.validateFiles(args[++i], conf)); URL[] libjars = FileUtils.getLibJars(conf); if (libjars != null && libjars.length > 0) { // Add libjars to client/tasks classpath conf.setClassLoader(new URLClassLoader(libjars, conf.getClassLoader())); // Adds libjars to our classpath Thread.currentThread().setContextClassLoader( new URLClassLoader(libjars, Thread.currentThread().getContextClassLoader())); } } else { other_args.add(args[i]); } } catch (NumberFormatException except) { System.out.println("ERROR: Integer expected instead of " + args[i]); return printUsage(); } catch (ArrayIndexOutOfBoundsException except) { System.out.println("ERROR: Required parameter missing from " + args[i - 1]); return printUsage(); } } // Make sure there are exactly 2 parameters left. if (other_args.size() < 1) { System.out.println("ERROR: Require ONE argument!"); return printUsage(); } String cmd = other_args.get(0); if (cmd.equals("featureVectors2Json")) { if (other_args.size() >= 7) { try { int start = Integer.parseInt(other_args.get(1)); int end = Integer.parseInt(other_args.get(2)); int kmers = Integer.parseInt(other_args.get(3)); featureVectors2Json(conf, start, end, kmers, other_args.get(4), other_args.get(5), other_args.get(6)); } catch (NumberFormatException except) { System.err.println("Woops. Error converting number!"); return -1; } } else { System.err.println("We need more arguments!"); return -1; } } else if (cmd.equals("featureVectors2rows")) { int digits = 6; if (other_args.size() > 3) { try { digits = Integer.parseInt(other_args.get(1)); featureVectors2RowMajorMatrix(conf, other_args.get(2), other_args.get(3), digits); } catch (NumberFormatException except) { System.err.println("Woops. Error converting number!"); return -1; } } else { featureVectors2RowMajorMatrix(conf, other_args.get(1), other_args.get(2), digits); } } else { System.out.println("Unknown command:" + cmd); return -1; } return 0; }
From source file:org.mitre.ccv.mapred.GenerateFeatureVectors.java
License:Open Source License
@Override public int run(String[] args) throws Exception { JobConf conf = new JobConf(getConf()); int cardinality = Integer.MAX_VALUE; boolean cleanLogs = false; String listInput = null;//w w w.j a va 2 s .com // @TODO: use commons getopts, org.apache.hadoop.util.GenericOptionsParser used it ArrayList<String> other_args = new ArrayList<String>(); for (int i = 0; i < args.length; ++i) { try { if ("-m".equals(args[i])) { conf.setNumMapTasks(Integer.parseInt(args[++i])); } else if ("-r".equals(args[i])) { conf.setNumReduceTasks(Integer.parseInt(args[++i])); } else if ("-c".equals(args[i])) { cleanLogs = true; } else if ("-l".equals(args[i])) { listInput = args[++i]; } else if ("-t".equals(args[i])) { cardinality = Integer.parseInt(args[++i]); } else if ("-libjars".equals(args[i])) { conf.set("tmpjars", FileUtils.validateFiles(args[++i], conf)); URL[] libjars = FileUtils.getLibJars(conf); if (libjars != null && libjars.length > 0) { // Add libjars to client/tasks classpath conf.setClassLoader(new URLClassLoader(libjars, conf.getClassLoader())); // Adds libjars to our classpath Thread.currentThread().setContextClassLoader( new URLClassLoader(libjars, Thread.currentThread().getContextClassLoader())); } } else { other_args.add(args[i]); } } catch (NumberFormatException except) { System.out.println("ERROR: Integer expected instead of " + args[i]); return printUsage(); } catch (ArrayIndexOutOfBoundsException except) { System.out.println("ERROR: Required parameter missing from " + args[i - 1]); return printUsage(); } } // Make sure there are exactly 2 parameters left. if (other_args.size() != 2) { System.out.println("ERROR: Wrong number of parameters: " + other_args.size() + " instead of 3."); return printUsage(); } if (listInput == null || listInput.length() == 0) { System.out.println("Need kmer sequence file path!"); return printUsage(); } long now = System.currentTimeMillis(); Path listInputPath = new Path(listInput); Path listOutputPath = new Path(listInputPath.getParent(), "kmer_" + Long.toHexString(now) + "_tmp"); LOG.info(String.format("Loading %d sorted k-mers from %s to %s", cardinality, listInputPath.toString(), listOutputPath.toString())); int num = CompleteCompositionVectorUtils.flattenKmerEntropySequenceFile(conf, cardinality, listInputPath.toString(), listOutputPath.toString(), cleanLogs); initJob(conf, listOutputPath.toString(), num, other_args.get(0), other_args.get(1), cleanLogs); return 0; }
From source file:org.mitre.ccv.mapred.InvertKmerProbabilities.java
License:Open Source License
@Override public int run(String[] args) throws Exception { JobConf conf = new JobConf(getConf()); boolean cleanLogs = false; List<String> other_args = new ArrayList<String>(); for (int i = 0; i < args.length; ++i) { try {/* ww w. j ava 2 s .co m*/ if ("-m".equals(args[i])) { conf.setNumMapTasks(Integer.parseInt(args[++i])); } else if ("-r".equals(args[i])) { conf.setNumReduceTasks(Integer.parseInt(args[++i])); } else if ("-c".equals(args[i])) { cleanLogs = true; } else if ("-libjars".equals(args[i])) { conf.set("tmpjars", FileUtils.validateFiles(args[++i], conf)); URL[] libjars = FileUtils.getLibJars(conf); if (libjars != null && libjars.length > 0) { // Add libjars to client/tasks classpath conf.setClassLoader(new URLClassLoader(libjars, conf.getClassLoader())); // Adds libjars to our classpath Thread.currentThread().setContextClassLoader( new URLClassLoader(libjars, Thread.currentThread().getContextClassLoader())); } } else { other_args.add(args[i]); } } catch (NumberFormatException except) { System.out.println("ERROR: Integer expected instead of " + args[i]); return printUsage(); } catch (ArrayIndexOutOfBoundsException except) { System.out.println("ERROR: Required parameter missing from " + args[i - 1]); return printUsage(); } } // Make sure there are exactly 2 parameters left. if (other_args.size() != 2) { System.out.println("ERROR: Wrong number of parameters: " + other_args.size() + " instead of 2."); return printUsage(); } return initJob(conf, other_args.get(0), other_args.get(1), cleanLogs); }
From source file:org.mitre.ccv.mapred.SortKmerRevisedRelativeEntropies.java
License:Open Source License
@Override public int run(String[] args) throws Exception { JobConf conf = new JobConf(getConf()); boolean cleanLogs = false; // @TODO: use commons getopts, org.apache.hadoop.util.GenericOptionsParser used it ArrayList<String> other_args = new ArrayList<String>(); for (int i = 0; i < args.length; ++i) { try {// w w w .j ava2 s . c o m if ("-m".equals(args[i])) { conf.setNumMapTasks(Integer.parseInt(args[++i])); } else if ("-r".equals(args[i])) { conf.setNumReduceTasks(Integer.parseInt(args[++i])); } else if ("-c".equals(args[i])) { cleanLogs = true; } else if ("-t".equals(args[i])) { conf.setBoolean(TEXT_OUTPUT, true); } else if ("-libjars".equals(args[i])) { conf.set("tmpjars", FileUtils.validateFiles(args[++i], conf)); URL[] libjars = FileUtils.getLibJars(conf); if (libjars != null && libjars.length > 0) { // Add libjars to client/tasks classpath conf.setClassLoader(new URLClassLoader(libjars, conf.getClassLoader())); // Adds libjars to our classpath Thread.currentThread().setContextClassLoader( new URLClassLoader(libjars, Thread.currentThread().getContextClassLoader())); } } else { other_args.add(args[i]); } } catch (NumberFormatException except) { System.out.println("ERROR: Integer expected instead of " + args[i]); return printUsage(); } catch (ArrayIndexOutOfBoundsException except) { System.out.println("ERROR: Required parameter missing from " + args[i - 1]); return printUsage(); } } // Make sure there are exactly 2 parameters left. if (other_args.size() != 2) { System.out.println("ERROR: Wrong number of parameters: " + other_args.size() + " instead of 3."); return printUsage(); } return initJob(conf, other_args.get(0), other_args.get(1), cleanLogs); }