List of usage examples for org.apache.hadoop.mapred JobConf setNumReduceTasks
public void setNumReduceTasks(int n)
From source file:org.mitre.bio.mapred.Fasta2SequenceFile.java
License:Open Source License
@Override public int run(String[] args) throws Exception { JobConf conf = new JobConf(getConf()); boolean cleanLogs = false; List<String> other_args = new ArrayList<String>(); for (int i = 0; i < args.length; ++i) { try {// ww w. j a va 2 s . com if ("-m".equals(args[i])) { conf.setNumMapTasks(Integer.parseInt(args[++i])); } else if ("-r".equals(args[i])) { conf.setNumReduceTasks(Integer.parseInt(args[++i])); } else if ("-c".equals(args[i])) { cleanLogs = true; } else if ("-n".equals(args[i])) { conf.setInt(HEADER_FORMAT, Integer.parseInt(args[++i])); } else { other_args.add(args[i]); } } catch (NumberFormatException except) { System.out.println("ERROR: Integer expected instead of " + args[i]); return printUsage(); } catch (ArrayIndexOutOfBoundsException except) { System.out.println("ERROR: Required parameter missing from " + args[i - 1]); return printUsage(); } } // Make sure there are exactly 2 parameters left. if (other_args.size() != 2) { System.out.println("ERROR: Wrong number of parameters: " + other_args.size() + " instead of 2."); return printUsage(); } return initJob(conf, other_args.get(0), other_args.get(1), cleanLogs); }
From source file:org.mitre.bio.mapred.TotalSequenceLength.java
License:Open Source License
/** * Init the job with the given parameters and run it. * * @param jobConf the hadoop job configuration * @param input input {@link SequenceFile} path * @param output output path (this will contain ONE part with the length) * @return zero if successful/* ww w. j a va 2 s .c o m*/ * @throws java.lang.Exception */ public int initJob(JobConf jobConf, String input, String output, boolean cleanLogs) throws Exception { JobConf conf = new JobConf(jobConf, TotalSequenceLength.class); conf.setJobName("TotalSequenceLength"); // We can only handle one reducer if (conf.getNumReduceTasks() != 1) { conf.setNumReduceTasks(1); LOG.info("Setting number of reducers to ONE!"); } SequenceFileInputFormat.setInputPaths(conf, new Path(input)); conf.setInputFormat(SequenceFileInputFormat.class); conf.setMapperClass(SequenceMapClass.class); conf.setOutputKeyClass(IntWritable.class); // map output key class conf.setOutputValueClass(IntWritable.class); // map output value class conf.setCombinerClass(LengthReduceClass.class); conf.setReducerClass(LengthReduceClass.class); FileOutputFormat.setOutputPath(conf, new Path(output)); JobClient.runJob(conf); if (cleanLogs) { LOG.info("removing log directory"); Path path = new Path(output, "_logs"); FileSystem fs = path.getFileSystem(jobConf); fs.delete(path, true); } return 0; }
From source file:org.mitre.bio.mapred.TotalSequenceLength.java
License:Open Source License
@Override public int run(String[] args) throws Exception { JobConf conf = new JobConf(getConf()); boolean cleanLogs = false; List<String> other_args = new ArrayList<String>(); for (int i = 0; i < args.length; ++i) { try {/*from www . j a va2s . co m*/ if ("-m".equals(args[i])) { conf.setNumMapTasks(Integer.parseInt(args[++i])); } else if ("-r".equals(args[i])) { conf.setNumReduceTasks(Integer.parseInt(args[++i])); } else if ("-c".equals(args[i])) { cleanLogs = true; } else { other_args.add(args[i]); } } catch (NumberFormatException except) { System.out.println("ERROR: Integer expected instead of " + args[i]); return printUsage(); } catch (ArrayIndexOutOfBoundsException except) { System.out.println("ERROR: Required parameter missing from " + args[i - 1]); return printUsage(); } } // Make sure there are exactly 2 parameters left. if (other_args.size() != 2) { System.out.println("ERROR: Wrong number of parameters: " + other_args.size() + " instead of 2."); return printUsage(); } int res = initJob(conf, other_args.get(0), other_args.get(1), cleanLogs); int cnt = this.getCount(conf, other_args.get(1)); System.out.printf("Total length of sequences is %d\n", cnt); return res; }
From source file:org.mitre.ccv.mapred.CalculateCompositionVectors.java
License:Open Source License
@Override public int run(String[] args) throws Exception { JobConf conf = new JobConf(getConf()); int start = CalculateKmerCounts.DEFAULT_START; int end = CalculateKmerCounts.DEFAULT_END; boolean cleanLogs = false; // @TODO: use commons getopts List<String> other_args = new ArrayList<String>(); for (int i = 0; i < args.length; ++i) { try {/*ww w . ja va2 s . co m*/ if ("-m".equals(args[i])) { conf.setNumMapTasks(Integer.parseInt(args[++i])); } else if ("-r".equals(args[i])) { conf.setNumReduceTasks(Integer.parseInt(args[++i])); } else if ("-s".equals(args[i])) { start = Integer.parseInt(args[++i]); } else if ("-e".equals(args[i])) { end = Integer.parseInt(args[++i]); } else if ("-c".equals(args[i])) { cleanLogs = true; } else if ("-libjars".equals(args[i])) { conf.set("tmpjars", FileUtils.validateFiles(args[++i], conf)); URL[] libjars = FileUtils.getLibJars(conf); if (libjars != null && libjars.length > 0) { // Add libjars to client/tasks classpath conf.setClassLoader(new URLClassLoader(libjars, conf.getClassLoader())); // Adds libjars to our classpath Thread.currentThread().setContextClassLoader( new URLClassLoader(libjars, Thread.currentThread().getContextClassLoader())); } } else { other_args.add(args[i]); } } catch (NumberFormatException except) { System.out.println("ERROR: Integer expected instead of " + args[i]); return printUsage(); } catch (ArrayIndexOutOfBoundsException except) { System.out.println("ERROR: Required parameter missing from " + args[i - 1]); return printUsage(); } } // Make sure there are exactly 2 parameters left. if (other_args.size() != 2) { System.out.println("ERROR: Wrong number of parameters: " + other_args.size() + " instead of 2."); return printUsage(); } return initJob(conf, start, end, other_args.get(0), other_args.get(1), cleanLogs); }
From source file:org.mitre.ccv.mapred.CalculateCosineDistanceMatrix.java
License:Open Source License
public int initJob(JobConf jobConf, String input, String output) throws Exception { JobConf conf = new JobConf(jobConf, CalculateCosineDistanceMatrix.class); final Path inputPath = new Path(input); final FileSystem fs = inputPath.getFileSystem(conf); final Path qInputPath = fs.makeQualified(inputPath); /**//from w ww . j av a2s. co m * Need to get all of the sample names/labels */ JobConf cacheConf = new JobConf(jobConf, CalculateCosineDistanceMatrix.class); cacheConf.setJobName("CacheNorm2MapReduce"); cacheConf.setNumReduceTasks(1); // Want ONE part file // Set up IdentityMapper SequenceFileInputFormat.setInputPaths(cacheConf, new Path(input)); cacheConf.setInputFormat(SequenceFileInputFormat.class); cacheConf.setMapperClass(Norm2Mapper.class); cacheConf.setOutputKeyClass(StringDoublePairWritable.class); cacheConf.setOutputValueClass(SparseVectorWritable.class); // Set up IdentityReducer cacheConf.setReducerClass(IdentityReducer.class); cacheConf.setOutputFormat(SequenceFileOutputFormat.class); cacheConf.setNumReduceTasks(1); Path sfPath = FileUtils.createRemoteTempPath(fs, qInputPath.getParent()); LOG.info(String.format("Generating feature vector SequenceFile path %s", sfPath.toString())); SequenceFileOutputFormat.setOutputPath(cacheConf, sfPath); JobClient.runJob(cacheConf); Path cachePath = new Path(sfPath.toString() + Path.SEPARATOR + "part-00000"); // need to know the size (the reducer might be able to send this back via the Reporter, but how do we grab that info? StringDoublePairWritable key = new StringDoublePairWritable(); int size = 0; SequenceFile.Reader reader = new SequenceFile.Reader(fs, cachePath, conf); boolean hasNext = reader.next(key); while (hasNext) { size += 1; hasNext = reader.next(key); } try { reader.close(); } catch (IOException ioe) { // closing the SequenceFile.Reader will throw an exception if the file is over some unknown size LOG.debug("Probably caused by closing the SequenceFile.Reader. All is well", ioe); } //LOG.info(String.format("Caching model file %s", qInputPath.toString())); URI listURI = new URI(fs.makeQualified(cachePath).toString()); DistributedCache.addCacheFile(listURI, conf); LOG.info(String.format("SequenceFile cache path %s (%s) with %d labels", listURI.toString(), cachePath.getName(), size)); conf.set(CACHE_PATH, cachePath.getName()); conf.setInt(DISTANCE_MATRIX_SIZE, size); /** * Main MapReduce Task of generating dot products */ LOG.info("Generating distances"); JobConf distanceConf = new JobConf(conf, CalculateCosineDistanceMatrix.class); distanceConf.setJobName("DistanceMapReduce"); // Set up distance mapper SequenceFileInputFormat.setInputPaths(distanceConf, new Path(input)); distanceConf.setInputFormat(SequenceFileInputFormat.class); distanceConf.setMapperClass(DistanceMap.class); distanceConf.setMapOutputKeyClass(Text.class); distanceConf.setMapOutputValueClass(SparseVectorWritable.class); // Set up reducer to merge lower-triangle results into a single dense distance vector distanceConf.setReducerClass(DistanceReducer.class); distanceConf.setOutputKeyClass(Text.class); distanceConf.setOutputValueClass(DenseVectorWritable.class); distanceConf.setOutputFormat(SequenceFileOutputFormat.class); SequenceFileOutputFormat.setOutputPath(distanceConf, new Path(output)); JobClient.runJob(distanceConf); return 0; }
From source file:org.mitre.ccv.mapred.CalculateCosineDistanceMatrix.java
License:Open Source License
@Override public int run(String[] args) throws Exception { JobConf conf = new JobConf(getConf()); String phylip = null;//from w w w. jav a2 s.c o m String packedRow = null; int fractionDigits = 6; //String userJarLocation = "/path/to/jar"; //conf.setJar(userJarLocation); //were conf is the JobConf object ArrayList<String> other_args = new ArrayList<String>(); for (int i = 0; i < args.length; ++i) { try { if ("-m".equals(args[i])) { conf.setNumMapTasks(Integer.parseInt(args[++i])); } else if ("-r".equals(args[i])) { conf.setNumReduceTasks(Integer.parseInt(args[++i])); } else if ("-D".equals(args[i])) { String[] props = args[++i].split("="); conf.set(props[0], props[1]); } else if ("-libjars".equals(args[i])) { conf.set("tmpjars", FileUtils.validateFiles(args[++i], conf)); URL[] libjars = FileUtils.getLibJars(conf); if (libjars != null && libjars.length > 0) { // Add libjars to client/tasks classpath conf.setClassLoader(new URLClassLoader(libjars, conf.getClassLoader())); // Adds libjars to our classpath Thread.currentThread().setContextClassLoader( new URLClassLoader(libjars, Thread.currentThread().getContextClassLoader())); } } else if ("-phylip".equals(args[i])) { phylip = args[++i]; } else if ("-packedRow".equals(args[i])) { packedRow = args[++i]; } else if ("-digits".equals(args[i])) { fractionDigits = Integer.parseInt(args[++i]); } else { other_args.add(args[i]); } } catch (NumberFormatException except) { System.out.println("ERROR: Integer expected instead of " + args[i]); return printUsage(); } catch (ArrayIndexOutOfBoundsException except) { System.out.println("ERROR: Required parameter missing from " + args[i - 1]); return printUsage(); } } boolean writeMatrix = (phylip != null || packedRow != null) ? true : false; // Make sure there are exactly 3 parameters left. if ((other_args.size() != 2 && !writeMatrix) || (other_args.size() == 0 && writeMatrix)) { System.out.println("ERROR: Wrong number of parameters: " + other_args.size() + " instead of 2."); return printUsage(); } int ret = 0; if (other_args.size() == 2) { ret = this.initJob(conf, other_args.get(0), other_args.get(1)); } // check writing out in Phylip format if (ret == 0 && other_args.size() == 1 && phylip != null) { printPhylipSquare(conf, other_args.get(0), phylip, fractionDigits); } else if (ret == 0 && other_args.size() == 2 && phylip != null) { printPhylipSquare(conf, other_args.get(1), phylip, fractionDigits); } // check writing out in row packed order if (ret == 0 && other_args.size() == 1 && packedRow != null) { printRowMajorMatrix(conf, other_args.get(0), packedRow, fractionDigits); } else if (ret == 0 && other_args.size() == 2 && packedRow != null) { printRowMajorMatrix(conf, other_args.get(1), packedRow, fractionDigits); } return ret; }
From source file:org.mitre.ccv.mapred.CalculateKmerCounts.java
License:Open Source License
@Override public int run(String[] args) throws Exception { JobConf conf = new JobConf(getConf()); int start = DEFAULT_START; int end = DEFAULT_END; // @TODO: use commons getopts List<String> other_args = new ArrayList<String>(); for (int i = 0; i < args.length; ++i) { try {/* w ww .j a v a 2s .c o m*/ if ("-m".equals(args[i])) { conf.setNumMapTasks(Integer.parseInt(args[++i])); } else if ("-r".equals(args[i])) { conf.setNumReduceTasks(Integer.parseInt(args[++i])); } else if ("-s".equals(args[i])) { start = Integer.parseInt(args[++i]); } else if ("-e".equals(args[i])) { end = Integer.parseInt(args[++i]); } else if ("-f".equals(args[i])) { conf.get(FAST_MAP, "true"); } else if ("-libjars".equals(args[i])) { conf.set("tmpjars", FileUtils.validateFiles(args[++i], conf)); URL[] libjars = FileUtils.getLibJars(conf); if (libjars != null && libjars.length > 0) { // Add libjars to client/tasks classpath conf.setClassLoader(new URLClassLoader(libjars, conf.getClassLoader())); // Adds libjars to our classpath Thread.currentThread().setContextClassLoader( new URLClassLoader(libjars, Thread.currentThread().getContextClassLoader())); } } else { other_args.add(args[i]); } } catch (NumberFormatException except) { System.out.println("ERROR: Integer expected instead of " + args[i]); return printUsage(); } catch (ArrayIndexOutOfBoundsException except) { System.out.println("ERROR: Required parameter missing from " + args[i - 1]); return printUsage(); } } // Make sure there are exactly 2 parameters left. if (other_args.size() != 2) { System.out.println("ERROR: Wrong number of parameters: " + other_args.size() + " instead of 2."); return printUsage(); } return initJob(conf, start, end, other_args.get(0), other_args.get(1)); }
From source file:org.mitre.ccv.mapred.CalculateKmerPiValues.java
License:Open Source License
@Override public int run(String[] args) throws Exception { JobConf conf = new JobConf(getConf()); boolean cleanLogs = false; Integer start = CalculateKmerCounts.DEFAULT_START; Integer end = CalculateKmerCounts.DEFAULT_END; List<String> other_args = new ArrayList<String>(); for (int i = 0; i < args.length; ++i) { try {//from w w w . j av a2 s .c o m if ("-m".equals(args[i])) { conf.setNumMapTasks(Integer.parseInt(args[++i])); } else if ("-r".equals(args[i])) { conf.setNumReduceTasks(Integer.parseInt(args[++i])); } else if ("-c".equals(args[i])) { cleanLogs = true; } else if ("-s".equals(args[i])) { start = Integer.parseInt(args[++i]); } else if ("-e".equals(args[i])) { end = Integer.parseInt(args[++i]); } else if ("-libjars".equals(args[i])) { conf.set("tmpjars", FileUtils.validateFiles(args[++i], conf)); URL[] libjars = FileUtils.getLibJars(conf); if (libjars != null && libjars.length > 0) { // Add libjars to client/tasks classpath conf.setClassLoader(new URLClassLoader(libjars, conf.getClassLoader())); // Adds libjars to our classpath Thread.currentThread().setContextClassLoader( new URLClassLoader(libjars, Thread.currentThread().getContextClassLoader())); } } else { other_args.add(args[i]); } } catch (NumberFormatException except) { System.out.println("ERROR: Integer expected instead of " + args[i]); return printUsage(); } catch (ArrayIndexOutOfBoundsException except) { System.out.println("ERROR: Required parameter missing from " + args[i - 1]); return printUsage(); } } // Make sure there are exactly 2 parameters left. if (other_args.size() != 2) { System.out.println("ERROR: Wrong number of parameters: " + other_args.size() + " instead of 2."); return printUsage(); } return initJob(conf, start, end, other_args.get(0), other_args.get(1), cleanLogs); }
From source file:org.mitre.ccv.mapred.CalculateKmerProbabilities.java
License:Open Source License
@Override public int run(String[] args) throws Exception { JobConf conf = new JobConf(getConf()); boolean cleanLogs = false; int start = CalculateKmerCounts.DEFAULT_START; int end = CalculateKmerCounts.DEFAULT_END; int length = -1; // @TODO: use commons getopts List<String> other_args = new ArrayList<String>(); for (int i = 0; i < args.length; ++i) { try {/*www. j a v a2s.c om*/ if ("-m".equals(args[i])) { conf.setNumMapTasks(Integer.parseInt(args[++i])); } else if ("-r".equals(args[i])) { conf.setNumReduceTasks(Integer.parseInt(args[++i])); } else if ("-s".equals(args[i])) { start = Integer.parseInt(args[++i]); } else if ("-e".equals(args[i])) { end = Integer.parseInt(args[++i]); } else if ("-c".equals(args[i])) { cleanLogs = true; } else if ("-l".equals(args[i])) { length = Integer.parseInt(args[++i]); } else if ("-libjars".equals(args[i])) { conf.set("tmpjars", FileUtils.validateFiles(args[++i], conf)); URL[] libjars = FileUtils.getLibJars(conf); if (libjars != null && libjars.length > 0) { // Add libjars to client/tasks classpath conf.setClassLoader(new URLClassLoader(libjars, conf.getClassLoader())); // Adds libjars to our classpath Thread.currentThread().setContextClassLoader( new URLClassLoader(libjars, Thread.currentThread().getContextClassLoader())); } } else { other_args.add(args[i]); } } catch (NumberFormatException except) { System.out.println("ERROR: Integer expected instead of " + args[i]); return printUsage(); } catch (ArrayIndexOutOfBoundsException except) { System.out.println("ERROR: Required parameter missing from " + args[i - 1]); return printUsage(); } } // Make sure there are exactly 2 parameters left. if (other_args.size() != 2) { System.out.println("ERROR: Wrong number of parameters: " + other_args.size() + " instead of 2."); return printUsage(); } if (length <= 0) { System.out.println("ERROR: Requires total length of sequence to be > 0"); return printUsage(); } //return initJob(conf, inTable, sb.toString().trim(), new Path(other_args.get(1))); return initJob(conf, start, end, length, other_args.get(0), other_args.get(1), cleanLogs); }