Example usage for org.apache.hadoop.mapred JobConf setNumMapTasks

List of usage examples for org.apache.hadoop.mapred JobConf setNumMapTasks

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf setNumMapTasks.

Prototype

public void setNumMapTasks(int n) 

Source Link

Document

Set the number of map tasks for this job.

Usage

From source file:org.mitre.ccv.mapred.CalculateCosineDistanceMatrix.java

License:Open Source License

@Override
public int run(String[] args) throws Exception {
    JobConf conf = new JobConf(getConf());

    String phylip = null;//  w w  w .  j a  v  a2 s .  c o m
    String packedRow = null;
    int fractionDigits = 6;

    //String userJarLocation = "/path/to/jar";
    //conf.setJar(userJarLocation); //were conf is the JobConf object
    ArrayList<String> other_args = new ArrayList<String>();
    for (int i = 0; i < args.length; ++i) {
        try {
            if ("-m".equals(args[i])) {
                conf.setNumMapTasks(Integer.parseInt(args[++i]));
            } else if ("-r".equals(args[i])) {
                conf.setNumReduceTasks(Integer.parseInt(args[++i]));
            } else if ("-D".equals(args[i])) {
                String[] props = args[++i].split("=");
                conf.set(props[0], props[1]);
            } else if ("-libjars".equals(args[i])) {
                conf.set("tmpjars", FileUtils.validateFiles(args[++i], conf));

                URL[] libjars = FileUtils.getLibJars(conf);
                if (libjars != null && libjars.length > 0) {
                    // Add libjars to client/tasks classpath
                    conf.setClassLoader(new URLClassLoader(libjars, conf.getClassLoader()));
                    // Adds libjars to our classpath
                    Thread.currentThread().setContextClassLoader(
                            new URLClassLoader(libjars, Thread.currentThread().getContextClassLoader()));
                }
            } else if ("-phylip".equals(args[i])) {
                phylip = args[++i];
            } else if ("-packedRow".equals(args[i])) {
                packedRow = args[++i];
            } else if ("-digits".equals(args[i])) {
                fractionDigits = Integer.parseInt(args[++i]);
            } else {
                other_args.add(args[i]);
            }
        } catch (NumberFormatException except) {
            System.out.println("ERROR: Integer expected instead of " + args[i]);
            return printUsage();
        } catch (ArrayIndexOutOfBoundsException except) {
            System.out.println("ERROR: Required parameter missing from " + args[i - 1]);
            return printUsage();
        }
    }

    boolean writeMatrix = (phylip != null || packedRow != null) ? true : false;

    // Make sure there are exactly 3 parameters left.
    if ((other_args.size() != 2 && !writeMatrix) || (other_args.size() == 0 && writeMatrix)) {
        System.out.println("ERROR: Wrong number of parameters: " + other_args.size() + " instead of 2.");

        return printUsage();
    }

    int ret = 0;
    if (other_args.size() == 2) {
        ret = this.initJob(conf, other_args.get(0), other_args.get(1));
    }
    // check writing out in Phylip format
    if (ret == 0 && other_args.size() == 1 && phylip != null) {
        printPhylipSquare(conf, other_args.get(0), phylip, fractionDigits);
    } else if (ret == 0 && other_args.size() == 2 && phylip != null) {
        printPhylipSquare(conf, other_args.get(1), phylip, fractionDigits);
    }

    // check writing out in row packed order
    if (ret == 0 && other_args.size() == 1 && packedRow != null) {
        printRowMajorMatrix(conf, other_args.get(0), packedRow, fractionDigits);
    } else if (ret == 0 && other_args.size() == 2 && packedRow != null) {
        printRowMajorMatrix(conf, other_args.get(1), packedRow, fractionDigits);
    }

    return ret;
}

From source file:org.mitre.ccv.mapred.CalculateKmerCounts.java

License:Open Source License

@Override
public int run(String[] args) throws Exception {
    JobConf conf = new JobConf(getConf());
    int start = DEFAULT_START;
    int end = DEFAULT_END;

    // @TODO: use commons getopts
    List<String> other_args = new ArrayList<String>();
    for (int i = 0; i < args.length; ++i) {
        try {/*from  w w w  . ja v a  2  s  .  c o m*/
            if ("-m".equals(args[i])) {
                conf.setNumMapTasks(Integer.parseInt(args[++i]));
            } else if ("-r".equals(args[i])) {
                conf.setNumReduceTasks(Integer.parseInt(args[++i]));
            } else if ("-s".equals(args[i])) {
                start = Integer.parseInt(args[++i]);
            } else if ("-e".equals(args[i])) {
                end = Integer.parseInt(args[++i]);
            } else if ("-f".equals(args[i])) {
                conf.get(FAST_MAP, "true");
            } else if ("-libjars".equals(args[i])) {
                conf.set("tmpjars", FileUtils.validateFiles(args[++i], conf));

                URL[] libjars = FileUtils.getLibJars(conf);
                if (libjars != null && libjars.length > 0) {
                    // Add libjars to client/tasks classpath
                    conf.setClassLoader(new URLClassLoader(libjars, conf.getClassLoader()));
                    // Adds libjars to our classpath
                    Thread.currentThread().setContextClassLoader(
                            new URLClassLoader(libjars, Thread.currentThread().getContextClassLoader()));
                }
            } else {
                other_args.add(args[i]);
            }
        } catch (NumberFormatException except) {
            System.out.println("ERROR: Integer expected instead of " + args[i]);
            return printUsage();
        } catch (ArrayIndexOutOfBoundsException except) {
            System.out.println("ERROR: Required parameter missing from " + args[i - 1]);
            return printUsage();
        }
    }
    // Make sure there are exactly 2 parameters left.
    if (other_args.size() != 2) {
        System.out.println("ERROR: Wrong number of parameters: " + other_args.size() + " instead of 2.");

        return printUsage();
    }

    return initJob(conf, start, end, other_args.get(0), other_args.get(1));

}

From source file:org.mitre.ccv.mapred.CalculateKmerPiValues.java

License:Open Source License

@Override
public int run(String[] args) throws Exception {
    JobConf conf = new JobConf(getConf());
    boolean cleanLogs = false;
    Integer start = CalculateKmerCounts.DEFAULT_START;
    Integer end = CalculateKmerCounts.DEFAULT_END;

    List<String> other_args = new ArrayList<String>();
    for (int i = 0; i < args.length; ++i) {
        try {//from  www  . ja v a 2s.c om
            if ("-m".equals(args[i])) {
                conf.setNumMapTasks(Integer.parseInt(args[++i]));
            } else if ("-r".equals(args[i])) {
                conf.setNumReduceTasks(Integer.parseInt(args[++i]));
            } else if ("-c".equals(args[i])) {
                cleanLogs = true;
            } else if ("-s".equals(args[i])) {
                start = Integer.parseInt(args[++i]);
            } else if ("-e".equals(args[i])) {
                end = Integer.parseInt(args[++i]);
            } else if ("-libjars".equals(args[i])) {
                conf.set("tmpjars", FileUtils.validateFiles(args[++i], conf));

                URL[] libjars = FileUtils.getLibJars(conf);
                if (libjars != null && libjars.length > 0) {
                    // Add libjars to client/tasks classpath
                    conf.setClassLoader(new URLClassLoader(libjars, conf.getClassLoader()));
                    // Adds libjars to our classpath
                    Thread.currentThread().setContextClassLoader(
                            new URLClassLoader(libjars, Thread.currentThread().getContextClassLoader()));
                }
            } else {
                other_args.add(args[i]);
            }
        } catch (NumberFormatException except) {
            System.out.println("ERROR: Integer expected instead of " + args[i]);
            return printUsage();
        } catch (ArrayIndexOutOfBoundsException except) {
            System.out.println("ERROR: Required parameter missing from " + args[i - 1]);
            return printUsage();
        }
    }
    // Make sure there are exactly 2 parameters left.
    if (other_args.size() != 2) {
        System.out.println("ERROR: Wrong number of parameters: " + other_args.size() + " instead of 2.");
        return printUsage();
    }

    return initJob(conf, start, end, other_args.get(0), other_args.get(1), cleanLogs);
}

From source file:org.mitre.ccv.mapred.CalculateKmerProbabilities.java

License:Open Source License

@Override
public int run(String[] args) throws Exception {
    JobConf conf = new JobConf(getConf());
    boolean cleanLogs = false;
    int start = CalculateKmerCounts.DEFAULT_START;
    int end = CalculateKmerCounts.DEFAULT_END;
    int length = -1;

    // @TODO: use commons getopts
    List<String> other_args = new ArrayList<String>();
    for (int i = 0; i < args.length; ++i) {
        try {//from  w  w w  .ja  va  2 s . c o m
            if ("-m".equals(args[i])) {
                conf.setNumMapTasks(Integer.parseInt(args[++i]));
            } else if ("-r".equals(args[i])) {
                conf.setNumReduceTasks(Integer.parseInt(args[++i]));
            } else if ("-s".equals(args[i])) {
                start = Integer.parseInt(args[++i]);
            } else if ("-e".equals(args[i])) {
                end = Integer.parseInt(args[++i]);
            } else if ("-c".equals(args[i])) {
                cleanLogs = true;
            } else if ("-l".equals(args[i])) {
                length = Integer.parseInt(args[++i]);
            } else if ("-libjars".equals(args[i])) {
                conf.set("tmpjars", FileUtils.validateFiles(args[++i], conf));

                URL[] libjars = FileUtils.getLibJars(conf);
                if (libjars != null && libjars.length > 0) {
                    // Add libjars to client/tasks classpath
                    conf.setClassLoader(new URLClassLoader(libjars, conf.getClassLoader()));
                    // Adds libjars to our classpath
                    Thread.currentThread().setContextClassLoader(
                            new URLClassLoader(libjars, Thread.currentThread().getContextClassLoader()));
                }
            } else {
                other_args.add(args[i]);
            }
        } catch (NumberFormatException except) {
            System.out.println("ERROR: Integer expected instead of " + args[i]);
            return printUsage();
        } catch (ArrayIndexOutOfBoundsException except) {
            System.out.println("ERROR: Required parameter missing from " + args[i - 1]);
            return printUsage();
        }
    }
    // Make sure there are exactly 2 parameters left.
    if (other_args.size() != 2) {
        System.out.println("ERROR: Wrong number of parameters: " + other_args.size() + " instead of 2.");
        return printUsage();
    }

    if (length <= 0) {
        System.out.println("ERROR: Requires total length of sequence to be > 0");
        return printUsage();
    }

    //return initJob(conf, inTable, sb.toString().trim(), new Path(other_args.get(1)));
    return initJob(conf, start, end, length, other_args.get(0), other_args.get(1), cleanLogs);

}

From source file:org.mitre.ccv.mapred.CalculateKmerRevisedRelativeEntropy.java

License:Open Source License

@Override
public int run(String[] args) throws Exception {
    JobConf conf = new JobConf(getConf());
    boolean cleanLogs = false;

    // @TODO: use commons getopts
    List<String> other_args = new ArrayList<String>();
    for (int i = 0; i < args.length; ++i) {
        try {//from   w ww.j  av  a 2s .c  o m
            if ("-m".equals(args[i])) {
                conf.setNumMapTasks(Integer.parseInt(args[++i]));
            } else if ("-r".equals(args[i])) {
                conf.setNumReduceTasks(Integer.parseInt(args[++i]));
            } else if ("-c".equals(args[i])) {
                cleanLogs = true;
            } else if ("-t".equals(args[i])) {
                conf.setBoolean(TEXT_OUTPUT, true);
            } else if ("-libjars".equals(args[i])) {
                conf.set("tmpjars", FileUtils.validateFiles(args[++i], conf));

                URL[] libjars = FileUtils.getLibJars(conf);
                if (libjars != null && libjars.length > 0) {
                    // Add libjars to client/tasks classpath
                    conf.setClassLoader(new URLClassLoader(libjars, conf.getClassLoader()));
                    // Adds libjars to our classpath
                    Thread.currentThread().setContextClassLoader(
                            new URLClassLoader(libjars, Thread.currentThread().getContextClassLoader()));
                }
            } else {
                other_args.add(args[i]);
            }
        } catch (NumberFormatException except) {
            System.out.println("ERROR: Integer expected instead of " + args[i]);
            return printUsage();
        } catch (ArrayIndexOutOfBoundsException except) {
            System.out.println("ERROR: Required parameter missing from " + args[i - 1]);
            return printUsage();
        }
    }
    // Make sure there are exactly 2 parameters left.
    if (other_args.size() != 3) {
        System.out.println("ERROR: Wrong number of parameters: " + other_args.size() + " instead of 3.");
        return printUsage();
    }

    //return initJob(conf, inTable, sb.toString().trim(), new Path(other_args.get(1)));
    return initJob(conf, other_args.get(0), other_args.get(1), other_args.get(2), cleanLogs);

}

From source file:org.mitre.ccv.mapred.CompleteCompositionVectors.java

License:Open Source License

/**
 *
 * The JSO data will be the same as {@link org.mitre.ccv.CompleteMatrix#jsonCompleteMatrix}, but the features
 * will be in a different order. This version, by default sorts, only by entropy values, whereas the
 * ccv in-memory version sorts by the k-mer natural order (i.e., lexigraphic).
 * @param argv//from  w ww .ja v  a2 s .c  o  m
 * @return
 * @throws java.lang.Exception
 */
@Override
@SuppressWarnings("static-access") // For OptionBuilder
public int run(String[] argv) throws Exception {
    JobConf conf = new JobConf(getConf());
    String cli_title = "CompleteCompositionVectorHadoop";

    int start = CalculateKmerCounts.DEFAULT_START;
    int end = CalculateKmerCounts.DEFAULT_END;
    int topkmers = 0;

    String input = null;
    String output = null;
    String vectorJsonOutput = null;
    //String kmerJsonOutput = null;

    boolean cleanLogs = false;

    /** create the Options */
    Options options = new Options();

    /** Hadoop Options */
    options.addOption(
            OptionBuilder.withArgName("number").hasArg(true).withDescription("number of maps").create("m"));
    options.addOption(
            OptionBuilder.withArgName("number").hasArg(true).withDescription("number of reducers").create("r"));

    // org.hadoop.util.GenericOptionsParser should captures this, but it doesn't
    options.addOption(OptionBuilder.withArgName("property=value").hasArg(true).withValueSeparator()
            .withDescription("use value for given property").create("D"));

    /** CompleteCompositionVector Options */
    options.addOption(OptionBuilder.withArgName("number").hasArg(true)
            .withDescription("number of top k-mers to use in calculations").create("topKmers"));
    options.addOption(OptionBuilder.withArgName("start").hasArg(true).withDescription("starting length of tile")
            .create("start"));
    options.addOption(OptionBuilder.withArgName("end").hasArg(true).withDescription("ending length of title")
            .create("end"));
    options.addOption(OptionBuilder.hasArg(true).withArgName("file")
            .withDescription("JSON file to write out k-mers to").create("kmersfile"));

    options.addOption(OptionBuilder.hasArg(true).withArgName("file")
            .withDescription("JSON file to write out feature vectors to "
                    + "(Overrides kmersout, only one file will be written).")
            .create("vectorsfile"));

    options.addOption(OptionBuilder.withArgName("number").hasArg(true)
            .withDescription("What preference to use: 0-min 1-median 2-avg(min,med): default is median")
            .create("prefval"));

    options.addOption(OptionBuilder.withArgName("help").hasArg(false).withDescription("print this message")
            .create("help"));

    // automatically generate the help statement
    HelpFormatter formatter = new HelpFormatter();

    //GenericOptionsParser gop = new GenericOptionsParser(conf, options, argv);
    GenericOptionsParser gop = new GenericOptionsParser(conf, argv);

    String[] remaining_args = gop.getRemainingArgs();

    // create the parser
    CommandLineParser parser = new GnuParser();
    //CommandLine line = gop.getCommandLine();
    String[] other_args = new String[] {};

    try {
        CommandLine line = parser.parse(options, remaining_args);
        other_args = line.getArgs();

        // Make sure there is a parameter left.
        if (other_args.length == 0) {
            System.out.println(cli_title);
            System.out.println("Missing input path!");
            formatter.printHelp("hccv [options] <input> [<output>] ", options);
            GenericOptionsParser.printGenericCommandUsage(System.out);
            return -1;
        }

        Option[] opts = line.getOptions();
        if (line.hasOption("help")) {
            System.out.println(cli_title);
            formatter.printHelp("hccv [options] <input> [<output>] ", options);
            GenericOptionsParser.printGenericCommandUsage(System.out);
            return -1;
        }

        // could also use line.iterator()
        for (Option opt : opts) {
            if (opt.getOpt().equals("m")) {
                conf.setNumMapTasks(Integer.parseInt(opt.getValue()));
            }
            if (opt.getOpt().equals("r")) {
                conf.setNumReduceTasks(Integer.parseInt(opt.getValue()));
            }
            if (opt.getOpt().equals("D")) {
                // We can have multiple properties we want to set
                String[] properties = opt.getValues();
                for (String property : properties) {
                    String[] keyval = property.split("=");
                    conf.set(keyval[0], keyval[1]);
                }
            }
            if (opt.getOpt().equals("start")) {
                start = Integer.parseInt(opt.getValue());
            }
            if (opt.getOpt().equals("end")) {
                end = Integer.parseInt(opt.getValue());
            }
            if (opt.getOpt().equals("topKmers")) {
                topkmers = Integer.parseInt(opt.getValue());
            }
            if (opt.getOpt().equals("vectorsfile")) {
                vectorJsonOutput = opt.getValue();
            }
        }
    } catch (ParseException e) {
        LOG.warn("options parsing faild: " + e.getMessage());
        System.out.println(cli_title);
        formatter.printHelp("hccv [options] <input> [<output>] ", options);
        GenericOptionsParser.printGenericCommandUsage(System.out);
    }
    if (start <= 2) {
        throw new IllegalArgumentException("Value of 'start' argument must be larger than 2");
    }

    input = other_args[0];
    if (other_args.length < 2) {
        output = input + "_" + FileUtils.getSimpleDate();
    } else {
        output = other_args[2];
    }

    /**
     * Check output path. Either needs to exist as a directory or not exist
     */
    Path outputPath = new Path(output);
    FileSystem fs = outputPath.getFileSystem(conf);
    if (!fs.exists(outputPath)) {
        fs.mkdirs(outputPath);
    } else if (fs.exists(outputPath) || !fs.getFileStatus(outputPath).isDir()) {
        LOG.fatal(String.format("Output directory %s already exists", outputPath.makeQualified(fs)));
        throw new FileAlreadyExistsException(
                String.format("Output directory %s already exists", outputPath.makeQualified(fs)));
    }

    String outputDir = output + Path.SEPARATOR;

    int res;
    /**
     * Zero, CalculateCompositionVectors
     */
    LOG.info("Starting CalculateCompositionVectors Map-Reduce job");
    CalculateCompositionVectors cv = new CalculateCompositionVectors();
    res = cv.initJob(conf, start, end, input, outputDir + COMPOSITION_VECTORS, cleanLogs);
    if (res != 0) {
        LOG.info("CalculateCompositionVectors returned non-zero result!");
        return res;
    }
    // We can stop now or continue to reduce dimensionallity using RRE or other means

    /**
     * First, CalculateKmerCounts
     */
    LOG.info("Starting CalculateKmerCounts Map-Reduce job");
    // FastMap option for CalculateKmers!?!
    CalculateKmerCounts ckc = new CalculateKmerCounts();
    res = ckc.initJob(conf, start, end, input, outputDir + KMER_COUNTS);
    if (res != 0) {
        LOG.fatal("CalculateKmerCounts returned non-zero result!");
        return res;
    }

    /**
     * Second, TotalSequenceLength
     */
    LOG.info("Starting TotalSequenceLength Map-Reduce job");
    TotalSequenceLength tsl = new TotalSequenceLength();
    res = tsl.initJob(conf, input, outputDir + TOTAL_LENGTH, cleanLogs);
    if (res != 0) {
        LOG.fatal("TotalSequenceLength returned non-zero result!");
        return res;
    }
    int length = tsl.getCount(conf, outputDir + TOTAL_LENGTH);

    if (length < 3) {
        LOG.fatal("TotalSequenceLength returned a total sequence length of less than 3.");
        return -1;
    } else {
        LOG.info(String.format("TotalSequenceLength returned a total sequence length of %d.", length));
    }

    /**
     * Third, CalculateKmerProbabilities
     */
    LOG.info("Starting CalculateKmerProbabilities Map-Reduce job");
    CalculateKmerProbabilities ckp = new CalculateKmerProbabilities();
    res = ckp.initJob(conf, start, end, length, outputDir + KMER_COUNTS, outputDir + KMER_PROBABILITIES,
            cleanLogs);
    if (res != 0) {
        LOG.fatal("CalculateKmerProbabilities returned non-zero result!");
        return res;
    }

    /**
     * Fourth, InvertKmerProbabilities
     */
    LOG.info("Starting InvertKmerProbabilities Map-Reduce job");
    InvertKmerProbabilities ikp = new InvertKmerProbabilities();
    res = ikp.initJob(conf, outputDir + KMER_PROBABILITIES, outputDir + INVERTED_KMER_PROBABILITIES, cleanLogs);
    if (res != 0) {
        LOG.fatal("InvertKmerProbabilities returned non-zero result!");
        return res;
    }

    /**
     * Fifth, CalculateKmerPiValues
     */
    LOG.info("Starting CalculateKmerPiValues Map-Reduce job");
    CalculateKmerPiValues kpv = new CalculateKmerPiValues();
    res = kpv.initJob(conf, start, end, outputDir + INVERTED_KMER_PROBABILITIES, outputDir + KMER_PI_VALUES,
            cleanLogs);
    if (res != 0) {
        LOG.fatal("CalculateKmerPiValues returned non-zero result!");
        return res;
    }

    /**
     * Sixth,CalculateKmerRevisedRelativeEntropy
     */
    LOG.info("Starting CalculateKmerRevisedRelativeEntropy Map-Reduce job");
    CalculateKmerRevisedRelativeEntropy krre = new CalculateKmerRevisedRelativeEntropy();
    res = krre.initJob(conf, outputDir + KMER_PI_VALUES, outputDir + COMPOSITION_VECTORS,
            outputDir + ENTROPY_VALUES, cleanLogs);
    if (res != 0) {
        LOG.fatal("CalculateKmerRevisedRelativeEntropy returned non-zero result!");
        return res;
    }

    /**
     * Seventh, SortKmerRevisedRelativeEntropies
     */
    SortKmerRevisedRelativeEntropies srre = new SortKmerRevisedRelativeEntropies();
    res = srre.initJob(conf, outputDir + ENTROPY_VALUES, outputDir + SORTED_ENTROPY_VALUES, cleanLogs);
    if (res != 0) {
        LOG.fatal("SortKmerRevisedRelativeEntropies returned non-zero result!");
        return res;
    }

    /**
     * Eigth, GenerateFeatureVectors
     *
     * Generate a flatten list to add to the cache to be distributed to the map-tasks.
     */
    Path listOutputPath = new Path(outputDir + Integer.toString(topkmers) + KMER_ENTROPY_SET);
    LOG.info(String.format("Loading %d sorted k-mers from %s to %s", topkmers,
            outputDir + SORTED_ENTROPY_VALUES, listOutputPath.toString()));
    int num = CompleteCompositionVectorUtils.flattenKmerEntropySequenceFile(conf, topkmers,
            outputDir + SORTED_ENTROPY_VALUES, listOutputPath.toString(), cleanLogs);

    if (num != topkmers) {
        LOG.fatal(String.format("Requested %d k-mers, but got %d. Using %d", topkmers, num, num));
        topkmers = num;
    }
    GenerateFeatureVectors fv = new GenerateFeatureVectors();
    res = fv.initJob(conf, listOutputPath.toString(), topkmers, outputDir + COMPOSITION_VECTORS,
            outputDir + FEATURE_VECTORS, cleanLogs);
    if (res != 0) {
        LOG.fatal("GenerateFeatureVectors returned non-zero result!");
        return res;
    }

    /**
     * Save feature vectors, features (k-mers), and properties to a JSON file.
     *
     * The data will be the same as {@link org.mitre.ccv.CompleteMatrix#jsonCompleteMatrix}, but the features
     * will be in a different order. This version, by default sorts, only by entropy values, whereas the
     * ccv in-memory version sorts by the k-mer natural order (i.e., lexigraphic).
     */
    if (vectorJsonOutput != null && vectorJsonOutput.length() > 0) {
        LOG.info("Writing features out to " + vectorJsonOutput);
        CompleteCompositionVectorUtils.featureVectors2Json(conf, start, end, topkmers,
                outputDir + SORTED_ENTROPY_VALUES, outputDir + FEATURE_VECTORS, vectorJsonOutput);
    }

    LOG.info("All done generating complete composition vectors and feature vectors.");
    return res;
}

From source file:org.mitre.ccv.mapred.CompleteCompositionVectorUtils.java

License:Open Source License

@Override
public int run(String[] args) throws Exception {
    JobConf conf = new JobConf(getConf());

    ArrayList<String> other_args = new ArrayList<String>();
    for (int i = 0; i < args.length; ++i) {
        try {//from   w  w w. j  a  v a  2  s . c  o  m
            if ("-m".equals(args[i])) {
                conf.setNumMapTasks(Integer.parseInt(args[++i]));
            } else if ("-r".equals(args[i])) {
                conf.setNumReduceTasks(Integer.parseInt(args[++i]));
            } else if ("-libjars".equals(args[i])) {
                conf.set("tmpjars", FileUtils.validateFiles(args[++i], conf));

                URL[] libjars = FileUtils.getLibJars(conf);
                if (libjars != null && libjars.length > 0) {
                    // Add libjars to client/tasks classpath
                    conf.setClassLoader(new URLClassLoader(libjars, conf.getClassLoader()));
                    // Adds libjars to our classpath
                    Thread.currentThread().setContextClassLoader(
                            new URLClassLoader(libjars, Thread.currentThread().getContextClassLoader()));
                }
            } else {
                other_args.add(args[i]);
            }
        } catch (NumberFormatException except) {
            System.out.println("ERROR: Integer expected instead of " + args[i]);
            return printUsage();
        } catch (ArrayIndexOutOfBoundsException except) {
            System.out.println("ERROR: Required parameter missing from " + args[i - 1]);
            return printUsage();
        }
    }
    // Make sure there are exactly 2 parameters left.
    if (other_args.size() < 1) {
        System.out.println("ERROR: Require ONE argument!");
        return printUsage();
    }

    String cmd = other_args.get(0);
    if (cmd.equals("featureVectors2Json")) {
        if (other_args.size() >= 7) {
            try {
                int start = Integer.parseInt(other_args.get(1));
                int end = Integer.parseInt(other_args.get(2));
                int kmers = Integer.parseInt(other_args.get(3));
                featureVectors2Json(conf, start, end, kmers, other_args.get(4), other_args.get(5),
                        other_args.get(6));
            } catch (NumberFormatException except) {
                System.err.println("Woops. Error converting number!");
                return -1;
            }
        } else {
            System.err.println("We need more arguments!");
            return -1;
        }
    } else if (cmd.equals("featureVectors2rows")) {
        int digits = 6;
        if (other_args.size() > 3) {
            try {
                digits = Integer.parseInt(other_args.get(1));
                featureVectors2RowMajorMatrix(conf, other_args.get(2), other_args.get(3), digits);
            } catch (NumberFormatException except) {
                System.err.println("Woops. Error converting number!");
                return -1;
            }
        } else {
            featureVectors2RowMajorMatrix(conf, other_args.get(1), other_args.get(2), digits);
        }
    } else {
        System.out.println("Unknown command:" + cmd);
        return -1;
    }
    return 0;
}

From source file:org.mitre.ccv.mapred.GenerateFeatureVectors.java

License:Open Source License

@Override
public int run(String[] args) throws Exception {
    JobConf conf = new JobConf(getConf());
    int cardinality = Integer.MAX_VALUE;
    boolean cleanLogs = false;
    String listInput = null;//w  w  w.j  a va  2  s  .com

    // @TODO: use commons getopts, org.apache.hadoop.util.GenericOptionsParser used it
    ArrayList<String> other_args = new ArrayList<String>();
    for (int i = 0; i < args.length; ++i) {
        try {
            if ("-m".equals(args[i])) {
                conf.setNumMapTasks(Integer.parseInt(args[++i]));
            } else if ("-r".equals(args[i])) {
                conf.setNumReduceTasks(Integer.parseInt(args[++i]));
            } else if ("-c".equals(args[i])) {
                cleanLogs = true;
            } else if ("-l".equals(args[i])) {
                listInput = args[++i];
            } else if ("-t".equals(args[i])) {
                cardinality = Integer.parseInt(args[++i]);
            } else if ("-libjars".equals(args[i])) {
                conf.set("tmpjars", FileUtils.validateFiles(args[++i], conf));

                URL[] libjars = FileUtils.getLibJars(conf);
                if (libjars != null && libjars.length > 0) {
                    // Add libjars to client/tasks classpath
                    conf.setClassLoader(new URLClassLoader(libjars, conf.getClassLoader()));
                    // Adds libjars to our classpath
                    Thread.currentThread().setContextClassLoader(
                            new URLClassLoader(libjars, Thread.currentThread().getContextClassLoader()));
                }
            } else {
                other_args.add(args[i]);
            }
        } catch (NumberFormatException except) {
            System.out.println("ERROR: Integer expected instead of " + args[i]);
            return printUsage();
        } catch (ArrayIndexOutOfBoundsException except) {
            System.out.println("ERROR: Required parameter missing from " + args[i - 1]);
            return printUsage();
        }
    }
    // Make sure there are exactly 2 parameters left.
    if (other_args.size() != 2) {
        System.out.println("ERROR: Wrong number of parameters: " + other_args.size() + " instead of 3.");
        return printUsage();
    }

    if (listInput == null || listInput.length() == 0) {
        System.out.println("Need kmer sequence file path!");
        return printUsage();
    }

    long now = System.currentTimeMillis();
    Path listInputPath = new Path(listInput);
    Path listOutputPath = new Path(listInputPath.getParent(), "kmer_" + Long.toHexString(now) + "_tmp");
    LOG.info(String.format("Loading %d sorted k-mers from %s to %s", cardinality, listInputPath.toString(),
            listOutputPath.toString()));
    int num = CompleteCompositionVectorUtils.flattenKmerEntropySequenceFile(conf, cardinality,
            listInputPath.toString(), listOutputPath.toString(), cleanLogs);

    initJob(conf, listOutputPath.toString(), num, other_args.get(0), other_args.get(1), cleanLogs);
    return 0;
}

From source file:org.mitre.ccv.mapred.InvertKmerProbabilities.java

License:Open Source License

@Override
public int run(String[] args) throws Exception {
    JobConf conf = new JobConf(getConf());
    boolean cleanLogs = false;

    List<String> other_args = new ArrayList<String>();
    for (int i = 0; i < args.length; ++i) {
        try {/* ww  w.  j ava  2  s  .co  m*/
            if ("-m".equals(args[i])) {
                conf.setNumMapTasks(Integer.parseInt(args[++i]));
            } else if ("-r".equals(args[i])) {
                conf.setNumReduceTasks(Integer.parseInt(args[++i]));
            } else if ("-c".equals(args[i])) {
                cleanLogs = true;
            } else if ("-libjars".equals(args[i])) {
                conf.set("tmpjars", FileUtils.validateFiles(args[++i], conf));

                URL[] libjars = FileUtils.getLibJars(conf);
                if (libjars != null && libjars.length > 0) {
                    // Add libjars to client/tasks classpath
                    conf.setClassLoader(new URLClassLoader(libjars, conf.getClassLoader()));
                    // Adds libjars to our classpath
                    Thread.currentThread().setContextClassLoader(
                            new URLClassLoader(libjars, Thread.currentThread().getContextClassLoader()));
                }
            } else {
                other_args.add(args[i]);
            }
        } catch (NumberFormatException except) {
            System.out.println("ERROR: Integer expected instead of " + args[i]);
            return printUsage();
        } catch (ArrayIndexOutOfBoundsException except) {
            System.out.println("ERROR: Required parameter missing from " + args[i - 1]);
            return printUsage();
        }
    }
    // Make sure there are exactly 2 parameters left.
    if (other_args.size() != 2) {
        System.out.println("ERROR: Wrong number of parameters: " + other_args.size() + " instead of 2.");
        return printUsage();
    }

    return initJob(conf, other_args.get(0), other_args.get(1), cleanLogs);

}

From source file:org.mitre.ccv.mapred.SortKmerRevisedRelativeEntropies.java

License:Open Source License

@Override
public int run(String[] args) throws Exception {
    JobConf conf = new JobConf(getConf());
    boolean cleanLogs = false;

    // @TODO: use commons getopts, org.apache.hadoop.util.GenericOptionsParser used it
    ArrayList<String> other_args = new ArrayList<String>();
    for (int i = 0; i < args.length; ++i) {
        try {// w w w  .j  ava2 s  .  c o  m
            if ("-m".equals(args[i])) {
                conf.setNumMapTasks(Integer.parseInt(args[++i]));
            } else if ("-r".equals(args[i])) {
                conf.setNumReduceTasks(Integer.parseInt(args[++i]));
            } else if ("-c".equals(args[i])) {
                cleanLogs = true;
            } else if ("-t".equals(args[i])) {
                conf.setBoolean(TEXT_OUTPUT, true);
            } else if ("-libjars".equals(args[i])) {
                conf.set("tmpjars", FileUtils.validateFiles(args[++i], conf));

                URL[] libjars = FileUtils.getLibJars(conf);
                if (libjars != null && libjars.length > 0) {
                    // Add libjars to client/tasks classpath
                    conf.setClassLoader(new URLClassLoader(libjars, conf.getClassLoader()));
                    // Adds libjars to our classpath
                    Thread.currentThread().setContextClassLoader(
                            new URLClassLoader(libjars, Thread.currentThread().getContextClassLoader()));
                }
            } else {
                other_args.add(args[i]);
            }
        } catch (NumberFormatException except) {
            System.out.println("ERROR: Integer expected instead of " + args[i]);
            return printUsage();
        } catch (ArrayIndexOutOfBoundsException except) {
            System.out.println("ERROR: Required parameter missing from " + args[i - 1]);
            return printUsage();
        }
    }
    // Make sure there are exactly 2 parameters left.
    if (other_args.size() != 2) {
        System.out.println("ERROR: Wrong number of parameters: " + other_args.size() + " instead of 3.");
        return printUsage();
    }

    return initJob(conf, other_args.get(0), other_args.get(1), cleanLogs);
}