Example usage for org.apache.hadoop.mapred JobConf setNumReduceTasks

List of usage examples for org.apache.hadoop.mapred JobConf setNumReduceTasks

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf setNumReduceTasks.

Prototype

public void setNumReduceTasks(int n) 

Source Link

Document

Set the requisite number of reduce tasks for this job.

Usage

From source file:org.mitre.bio.mapred.Fasta2SequenceFile.java

License:Open Source License

@Override
public int run(String[] args) throws Exception {
    JobConf conf = new JobConf(getConf());
    boolean cleanLogs = false;

    List<String> other_args = new ArrayList<String>();
    for (int i = 0; i < args.length; ++i) {
        try {//  ww  w.  j a  va  2 s .  com
            if ("-m".equals(args[i])) {
                conf.setNumMapTasks(Integer.parseInt(args[++i]));
            } else if ("-r".equals(args[i])) {
                conf.setNumReduceTasks(Integer.parseInt(args[++i]));
            } else if ("-c".equals(args[i])) {
                cleanLogs = true;
            } else if ("-n".equals(args[i])) {
                conf.setInt(HEADER_FORMAT, Integer.parseInt(args[++i]));
            } else {
                other_args.add(args[i]);
            }
        } catch (NumberFormatException except) {
            System.out.println("ERROR: Integer expected instead of " + args[i]);
            return printUsage();
        } catch (ArrayIndexOutOfBoundsException except) {
            System.out.println("ERROR: Required parameter missing from " + args[i - 1]);
            return printUsage();
        }
    }
    // Make sure there are exactly 2 parameters left.
    if (other_args.size() != 2) {
        System.out.println("ERROR: Wrong number of parameters: " + other_args.size() + " instead of 2.");
        return printUsage();
    }

    return initJob(conf, other_args.get(0), other_args.get(1), cleanLogs);
}

From source file:org.mitre.bio.mapred.TotalSequenceLength.java

License:Open Source License

/**
 * Init the job with the given parameters and run it.
 *
 * @param jobConf   the hadoop job configuration
 * @param input     input {@link SequenceFile} path
 * @param output    output path (this will contain ONE part with the length)
 * @return zero if successful/* ww  w. j  a  va  2 s  .c  o m*/
 * @throws java.lang.Exception
 */
public int initJob(JobConf jobConf, String input, String output, boolean cleanLogs) throws Exception {
    JobConf conf = new JobConf(jobConf, TotalSequenceLength.class);
    conf.setJobName("TotalSequenceLength");

    // We can only handle one reducer
    if (conf.getNumReduceTasks() != 1) {
        conf.setNumReduceTasks(1);
        LOG.info("Setting number of reducers to ONE!");
    }

    SequenceFileInputFormat.setInputPaths(conf, new Path(input));
    conf.setInputFormat(SequenceFileInputFormat.class);
    conf.setMapperClass(SequenceMapClass.class);
    conf.setOutputKeyClass(IntWritable.class); // map output key class
    conf.setOutputValueClass(IntWritable.class); // map output value class

    conf.setCombinerClass(LengthReduceClass.class);
    conf.setReducerClass(LengthReduceClass.class);
    FileOutputFormat.setOutputPath(conf, new Path(output));

    JobClient.runJob(conf);

    if (cleanLogs) {
        LOG.info("removing log directory");
        Path path = new Path(output, "_logs");
        FileSystem fs = path.getFileSystem(jobConf);
        fs.delete(path, true);
    }

    return 0;
}

From source file:org.mitre.bio.mapred.TotalSequenceLength.java

License:Open Source License

@Override
public int run(String[] args) throws Exception {
    JobConf conf = new JobConf(getConf());
    boolean cleanLogs = false;

    List<String> other_args = new ArrayList<String>();
    for (int i = 0; i < args.length; ++i) {
        try {/*from www  . j a  va2s  . co  m*/
            if ("-m".equals(args[i])) {
                conf.setNumMapTasks(Integer.parseInt(args[++i]));
            } else if ("-r".equals(args[i])) {
                conf.setNumReduceTasks(Integer.parseInt(args[++i]));
            } else if ("-c".equals(args[i])) {
                cleanLogs = true;
            } else {
                other_args.add(args[i]);
            }
        } catch (NumberFormatException except) {
            System.out.println("ERROR: Integer expected instead of " + args[i]);
            return printUsage();
        } catch (ArrayIndexOutOfBoundsException except) {
            System.out.println("ERROR: Required parameter missing from " + args[i - 1]);
            return printUsage();
        }
    }
    // Make sure there are exactly 2 parameters left.
    if (other_args.size() != 2) {
        System.out.println("ERROR: Wrong number of parameters: " + other_args.size() + " instead of 2.");
        return printUsage();
    }

    int res = initJob(conf, other_args.get(0), other_args.get(1), cleanLogs);
    int cnt = this.getCount(conf, other_args.get(1));
    System.out.printf("Total length of sequences is %d\n", cnt);
    return res;
}

From source file:org.mitre.ccv.mapred.CalculateCompositionVectors.java

License:Open Source License

@Override
public int run(String[] args) throws Exception {
    JobConf conf = new JobConf(getConf());
    int start = CalculateKmerCounts.DEFAULT_START;
    int end = CalculateKmerCounts.DEFAULT_END;
    boolean cleanLogs = false;

    // @TODO: use commons getopts
    List<String> other_args = new ArrayList<String>();
    for (int i = 0; i < args.length; ++i) {
        try {/*ww w .  ja va2  s  . co  m*/
            if ("-m".equals(args[i])) {
                conf.setNumMapTasks(Integer.parseInt(args[++i]));
            } else if ("-r".equals(args[i])) {
                conf.setNumReduceTasks(Integer.parseInt(args[++i]));
            } else if ("-s".equals(args[i])) {
                start = Integer.parseInt(args[++i]);
            } else if ("-e".equals(args[i])) {
                end = Integer.parseInt(args[++i]);
            } else if ("-c".equals(args[i])) {
                cleanLogs = true;
            } else if ("-libjars".equals(args[i])) {
                conf.set("tmpjars", FileUtils.validateFiles(args[++i], conf));

                URL[] libjars = FileUtils.getLibJars(conf);
                if (libjars != null && libjars.length > 0) {
                    // Add libjars to client/tasks classpath
                    conf.setClassLoader(new URLClassLoader(libjars, conf.getClassLoader()));
                    // Adds libjars to our classpath
                    Thread.currentThread().setContextClassLoader(
                            new URLClassLoader(libjars, Thread.currentThread().getContextClassLoader()));
                }
            } else {
                other_args.add(args[i]);
            }
        } catch (NumberFormatException except) {
            System.out.println("ERROR: Integer expected instead of " + args[i]);
            return printUsage();
        } catch (ArrayIndexOutOfBoundsException except) {
            System.out.println("ERROR: Required parameter missing from " + args[i - 1]);
            return printUsage();
        }
    }
    // Make sure there are exactly 2 parameters left.
    if (other_args.size() != 2) {
        System.out.println("ERROR: Wrong number of parameters: " + other_args.size() + " instead of 2.");

        return printUsage();
    }
    return initJob(conf, start, end, other_args.get(0), other_args.get(1), cleanLogs);
}

From source file:org.mitre.ccv.mapred.CalculateCosineDistanceMatrix.java

License:Open Source License

public int initJob(JobConf jobConf, String input, String output) throws Exception {
    JobConf conf = new JobConf(jobConf, CalculateCosineDistanceMatrix.class);

    final Path inputPath = new Path(input);
    final FileSystem fs = inputPath.getFileSystem(conf);
    final Path qInputPath = fs.makeQualified(inputPath);

    /**//from   w ww . j av a2s.  co m
     * Need to get all of the sample names/labels
     */
    JobConf cacheConf = new JobConf(jobConf, CalculateCosineDistanceMatrix.class);
    cacheConf.setJobName("CacheNorm2MapReduce");
    cacheConf.setNumReduceTasks(1); // Want ONE part file

    // Set up IdentityMapper
    SequenceFileInputFormat.setInputPaths(cacheConf, new Path(input));
    cacheConf.setInputFormat(SequenceFileInputFormat.class);
    cacheConf.setMapperClass(Norm2Mapper.class);
    cacheConf.setOutputKeyClass(StringDoublePairWritable.class);
    cacheConf.setOutputValueClass(SparseVectorWritable.class);

    // Set up IdentityReducer
    cacheConf.setReducerClass(IdentityReducer.class);
    cacheConf.setOutputFormat(SequenceFileOutputFormat.class);
    cacheConf.setNumReduceTasks(1);
    Path sfPath = FileUtils.createRemoteTempPath(fs, qInputPath.getParent());
    LOG.info(String.format("Generating feature vector SequenceFile path %s", sfPath.toString()));
    SequenceFileOutputFormat.setOutputPath(cacheConf, sfPath);
    JobClient.runJob(cacheConf);

    Path cachePath = new Path(sfPath.toString() + Path.SEPARATOR + "part-00000");

    // need to know the size (the reducer might be able to send this back via the Reporter, but how do we grab that info?
    StringDoublePairWritable key = new StringDoublePairWritable();
    int size = 0;
    SequenceFile.Reader reader = new SequenceFile.Reader(fs, cachePath, conf);
    boolean hasNext = reader.next(key);
    while (hasNext) {
        size += 1;
        hasNext = reader.next(key);
    }
    try {
        reader.close();
    } catch (IOException ioe) {
        // closing the SequenceFile.Reader will throw an exception if the file is over some unknown size
        LOG.debug("Probably caused by closing the SequenceFile.Reader. All is well", ioe);
    }

    //LOG.info(String.format("Caching model file %s", qInputPath.toString()));
    URI listURI = new URI(fs.makeQualified(cachePath).toString());
    DistributedCache.addCacheFile(listURI, conf);
    LOG.info(String.format("SequenceFile cache path %s (%s) with %d labels", listURI.toString(),
            cachePath.getName(), size));
    conf.set(CACHE_PATH, cachePath.getName());
    conf.setInt(DISTANCE_MATRIX_SIZE, size);

    /**
     * Main MapReduce Task of generating dot products
     */
    LOG.info("Generating distances");
    JobConf distanceConf = new JobConf(conf, CalculateCosineDistanceMatrix.class);
    distanceConf.setJobName("DistanceMapReduce");
    // Set up distance mapper
    SequenceFileInputFormat.setInputPaths(distanceConf, new Path(input));
    distanceConf.setInputFormat(SequenceFileInputFormat.class);
    distanceConf.setMapperClass(DistanceMap.class);
    distanceConf.setMapOutputKeyClass(Text.class);
    distanceConf.setMapOutputValueClass(SparseVectorWritable.class);

    // Set up reducer to merge lower-triangle results into a single dense distance vector
    distanceConf.setReducerClass(DistanceReducer.class);
    distanceConf.setOutputKeyClass(Text.class);
    distanceConf.setOutputValueClass(DenseVectorWritable.class);
    distanceConf.setOutputFormat(SequenceFileOutputFormat.class);
    SequenceFileOutputFormat.setOutputPath(distanceConf, new Path(output));
    JobClient.runJob(distanceConf);

    return 0;
}

From source file:org.mitre.ccv.mapred.CalculateCosineDistanceMatrix.java

License:Open Source License

@Override
public int run(String[] args) throws Exception {
    JobConf conf = new JobConf(getConf());

    String phylip = null;//from w  w  w. jav a2  s.c  o m
    String packedRow = null;
    int fractionDigits = 6;

    //String userJarLocation = "/path/to/jar";
    //conf.setJar(userJarLocation); //were conf is the JobConf object
    ArrayList<String> other_args = new ArrayList<String>();
    for (int i = 0; i < args.length; ++i) {
        try {
            if ("-m".equals(args[i])) {
                conf.setNumMapTasks(Integer.parseInt(args[++i]));
            } else if ("-r".equals(args[i])) {
                conf.setNumReduceTasks(Integer.parseInt(args[++i]));
            } else if ("-D".equals(args[i])) {
                String[] props = args[++i].split("=");
                conf.set(props[0], props[1]);
            } else if ("-libjars".equals(args[i])) {
                conf.set("tmpjars", FileUtils.validateFiles(args[++i], conf));

                URL[] libjars = FileUtils.getLibJars(conf);
                if (libjars != null && libjars.length > 0) {
                    // Add libjars to client/tasks classpath
                    conf.setClassLoader(new URLClassLoader(libjars, conf.getClassLoader()));
                    // Adds libjars to our classpath
                    Thread.currentThread().setContextClassLoader(
                            new URLClassLoader(libjars, Thread.currentThread().getContextClassLoader()));
                }
            } else if ("-phylip".equals(args[i])) {
                phylip = args[++i];
            } else if ("-packedRow".equals(args[i])) {
                packedRow = args[++i];
            } else if ("-digits".equals(args[i])) {
                fractionDigits = Integer.parseInt(args[++i]);
            } else {
                other_args.add(args[i]);
            }
        } catch (NumberFormatException except) {
            System.out.println("ERROR: Integer expected instead of " + args[i]);
            return printUsage();
        } catch (ArrayIndexOutOfBoundsException except) {
            System.out.println("ERROR: Required parameter missing from " + args[i - 1]);
            return printUsage();
        }
    }

    boolean writeMatrix = (phylip != null || packedRow != null) ? true : false;

    // Make sure there are exactly 3 parameters left.
    if ((other_args.size() != 2 && !writeMatrix) || (other_args.size() == 0 && writeMatrix)) {
        System.out.println("ERROR: Wrong number of parameters: " + other_args.size() + " instead of 2.");

        return printUsage();
    }

    int ret = 0;
    if (other_args.size() == 2) {
        ret = this.initJob(conf, other_args.get(0), other_args.get(1));
    }
    // check writing out in Phylip format
    if (ret == 0 && other_args.size() == 1 && phylip != null) {
        printPhylipSquare(conf, other_args.get(0), phylip, fractionDigits);
    } else if (ret == 0 && other_args.size() == 2 && phylip != null) {
        printPhylipSquare(conf, other_args.get(1), phylip, fractionDigits);
    }

    // check writing out in row packed order
    if (ret == 0 && other_args.size() == 1 && packedRow != null) {
        printRowMajorMatrix(conf, other_args.get(0), packedRow, fractionDigits);
    } else if (ret == 0 && other_args.size() == 2 && packedRow != null) {
        printRowMajorMatrix(conf, other_args.get(1), packedRow, fractionDigits);
    }

    return ret;
}

From source file:org.mitre.ccv.mapred.CalculateKmerCounts.java

License:Open Source License

@Override
public int run(String[] args) throws Exception {
    JobConf conf = new JobConf(getConf());
    int start = DEFAULT_START;
    int end = DEFAULT_END;

    // @TODO: use commons getopts
    List<String> other_args = new ArrayList<String>();
    for (int i = 0; i < args.length; ++i) {
        try {/*  w ww  .j a  v  a  2s .c  o m*/
            if ("-m".equals(args[i])) {
                conf.setNumMapTasks(Integer.parseInt(args[++i]));
            } else if ("-r".equals(args[i])) {
                conf.setNumReduceTasks(Integer.parseInt(args[++i]));
            } else if ("-s".equals(args[i])) {
                start = Integer.parseInt(args[++i]);
            } else if ("-e".equals(args[i])) {
                end = Integer.parseInt(args[++i]);
            } else if ("-f".equals(args[i])) {
                conf.get(FAST_MAP, "true");
            } else if ("-libjars".equals(args[i])) {
                conf.set("tmpjars", FileUtils.validateFiles(args[++i], conf));

                URL[] libjars = FileUtils.getLibJars(conf);
                if (libjars != null && libjars.length > 0) {
                    // Add libjars to client/tasks classpath
                    conf.setClassLoader(new URLClassLoader(libjars, conf.getClassLoader()));
                    // Adds libjars to our classpath
                    Thread.currentThread().setContextClassLoader(
                            new URLClassLoader(libjars, Thread.currentThread().getContextClassLoader()));
                }
            } else {
                other_args.add(args[i]);
            }
        } catch (NumberFormatException except) {
            System.out.println("ERROR: Integer expected instead of " + args[i]);
            return printUsage();
        } catch (ArrayIndexOutOfBoundsException except) {
            System.out.println("ERROR: Required parameter missing from " + args[i - 1]);
            return printUsage();
        }
    }
    // Make sure there are exactly 2 parameters left.
    if (other_args.size() != 2) {
        System.out.println("ERROR: Wrong number of parameters: " + other_args.size() + " instead of 2.");

        return printUsage();
    }

    return initJob(conf, start, end, other_args.get(0), other_args.get(1));

}

From source file:org.mitre.ccv.mapred.CalculateKmerPiValues.java

License:Open Source License

@Override
public int run(String[] args) throws Exception {
    JobConf conf = new JobConf(getConf());
    boolean cleanLogs = false;
    Integer start = CalculateKmerCounts.DEFAULT_START;
    Integer end = CalculateKmerCounts.DEFAULT_END;

    List<String> other_args = new ArrayList<String>();
    for (int i = 0; i < args.length; ++i) {
        try {//from w  w  w  .  j  av a2 s .c  o  m
            if ("-m".equals(args[i])) {
                conf.setNumMapTasks(Integer.parseInt(args[++i]));
            } else if ("-r".equals(args[i])) {
                conf.setNumReduceTasks(Integer.parseInt(args[++i]));
            } else if ("-c".equals(args[i])) {
                cleanLogs = true;
            } else if ("-s".equals(args[i])) {
                start = Integer.parseInt(args[++i]);
            } else if ("-e".equals(args[i])) {
                end = Integer.parseInt(args[++i]);
            } else if ("-libjars".equals(args[i])) {
                conf.set("tmpjars", FileUtils.validateFiles(args[++i], conf));

                URL[] libjars = FileUtils.getLibJars(conf);
                if (libjars != null && libjars.length > 0) {
                    // Add libjars to client/tasks classpath
                    conf.setClassLoader(new URLClassLoader(libjars, conf.getClassLoader()));
                    // Adds libjars to our classpath
                    Thread.currentThread().setContextClassLoader(
                            new URLClassLoader(libjars, Thread.currentThread().getContextClassLoader()));
                }
            } else {
                other_args.add(args[i]);
            }
        } catch (NumberFormatException except) {
            System.out.println("ERROR: Integer expected instead of " + args[i]);
            return printUsage();
        } catch (ArrayIndexOutOfBoundsException except) {
            System.out.println("ERROR: Required parameter missing from " + args[i - 1]);
            return printUsage();
        }
    }
    // Make sure there are exactly 2 parameters left.
    if (other_args.size() != 2) {
        System.out.println("ERROR: Wrong number of parameters: " + other_args.size() + " instead of 2.");
        return printUsage();
    }

    return initJob(conf, start, end, other_args.get(0), other_args.get(1), cleanLogs);
}

From source file:org.mitre.ccv.mapred.CalculateKmerProbabilities.java

License:Open Source License

@Override
public int run(String[] args) throws Exception {
    JobConf conf = new JobConf(getConf());
    boolean cleanLogs = false;
    int start = CalculateKmerCounts.DEFAULT_START;
    int end = CalculateKmerCounts.DEFAULT_END;
    int length = -1;

    // @TODO: use commons getopts
    List<String> other_args = new ArrayList<String>();
    for (int i = 0; i < args.length; ++i) {
        try {/*www.  j a  v  a2s.c  om*/
            if ("-m".equals(args[i])) {
                conf.setNumMapTasks(Integer.parseInt(args[++i]));
            } else if ("-r".equals(args[i])) {
                conf.setNumReduceTasks(Integer.parseInt(args[++i]));
            } else if ("-s".equals(args[i])) {
                start = Integer.parseInt(args[++i]);
            } else if ("-e".equals(args[i])) {
                end = Integer.parseInt(args[++i]);
            } else if ("-c".equals(args[i])) {
                cleanLogs = true;
            } else if ("-l".equals(args[i])) {
                length = Integer.parseInt(args[++i]);
            } else if ("-libjars".equals(args[i])) {
                conf.set("tmpjars", FileUtils.validateFiles(args[++i], conf));

                URL[] libjars = FileUtils.getLibJars(conf);
                if (libjars != null && libjars.length > 0) {
                    // Add libjars to client/tasks classpath
                    conf.setClassLoader(new URLClassLoader(libjars, conf.getClassLoader()));
                    // Adds libjars to our classpath
                    Thread.currentThread().setContextClassLoader(
                            new URLClassLoader(libjars, Thread.currentThread().getContextClassLoader()));
                }
            } else {
                other_args.add(args[i]);
            }
        } catch (NumberFormatException except) {
            System.out.println("ERROR: Integer expected instead of " + args[i]);
            return printUsage();
        } catch (ArrayIndexOutOfBoundsException except) {
            System.out.println("ERROR: Required parameter missing from " + args[i - 1]);
            return printUsage();
        }
    }
    // Make sure there are exactly 2 parameters left.
    if (other_args.size() != 2) {
        System.out.println("ERROR: Wrong number of parameters: " + other_args.size() + " instead of 2.");
        return printUsage();
    }

    if (length <= 0) {
        System.out.println("ERROR: Requires total length of sequence to be > 0");
        return printUsage();
    }

    //return initJob(conf, inTable, sb.toString().trim(), new Path(other_args.get(1)));
    return initJob(conf, start, end, length, other_args.get(0), other_args.get(1), cleanLogs);

}