Example usage for org.apache.commons.cli2 CommandLine getValue

List of usage examples for org.apache.commons.cli2 CommandLine getValue

Introduction

In this page you can find the example usage for org.apache.commons.cli2 CommandLine getValue.

Prototype

Object getValue(final Option option) throws IllegalStateException;

Source Link

Document

Retrieves the single Argument value associated with the specified Option

Usage

From source file:org.apache.mahout.classifier.svm.algorithm.parallelalgorithms.ParallelClassifierDriver.java

public static void main(String[] args)
        throws IOException, InterruptedException, ClassNotFoundException, OptionException {

    // example args:
    // -if /user/maximzhao/dataset/rcv1_test.binary -of
    // /user/maximzhao/rcv.result
    // -m /user/maximzhao/rcv1.model -nor 1 -ms 241572968 -mhs -Xmx500M -ttt
    // 1080/*from  w  w w.  ja  v a 2s  .  c o  m*/
    log.info("[job] " + JOB_NAME);
    DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
    ArgumentBuilder abuilder = new ArgumentBuilder();
    GroupBuilder gbuilder = new GroupBuilder();

    Option testFileOpt = obuilder.withLongName("testFile").withRequired(true)
            .withArgument(abuilder.withName("testFile").withMinimum(1).withMaximum(1).create())
            .withDescription("Name of test data file (default = noTestFile)").withShortName("if").create();

    Option outputFileOpt = obuilder.withLongName("output").withRequired(true)
            .withArgument(abuilder.withName("output").withMinimum(1).withMaximum(1).create())
            .withDescription("Out put file name: ").withShortName("of").create();

    Option hdfsServerOpt = obuilder.withLongName("HDFSServer").withRequired(false)
            .withArgument(abuilder.withName("HDFSServer").withMinimum(1).withMaximum(1).create())
            .withDescription("HDFS Server's Address (default = null) ").withShortName("hdfs").create();

    Option modelFileOpt = obuilder.withLongName("modelFile").withRequired(true)
            .withArgument(abuilder.withName("modelFile").withMinimum(1).withMaximum(1).create())
            .withDescription("Name of model file (default = noModelFile) ").withShortName("m").create();

    Option mapSplitSizeOpt = obuilder.withLongName("mapSplitSize").withRequired(false)
            .withArgument(abuilder.withName("mapSplitSize").withMinimum(1).withMaximum(1).create())
            .withDescription("Max map Split size ").withShortName("ms").create();

    Option maxHeapSizeOpt = obuilder.withLongName("maxHeapSize").withRequired(false)
            .withArgument(abuilder.withName("maxHeapSize").withMinimum(1).withMaximum(1).create())
            .withDescription("Max Heap Size: ").withShortName("mhs").create();

    Option numberofReducersOpt = obuilder.withLongName("numberofReducers").withRequired(false)
            .withArgument(abuilder.withName("numberofReducers").withMinimum(1).withMaximum(1).create())
            .withDescription("Number of Reducers: (defaults = 0)").withShortName("nor").create();

    Option taskTimeoutOpt = obuilder.withLongName("taskTimeout").withRequired(false)
            .withArgument(abuilder.withName("taskTimeout").withMinimum(1).withMaximum(1).create())
            .withDescription("Task Time out ( Minutes ) : ").withShortName("ttt").create();

    Option helpOpt = obuilder.withLongName("help").withDescription("Print out help").withShortName("h")
            .create();

    Group group = gbuilder.withName("Options").withOption(modelFileOpt).withOption(testFileOpt)
            .withOption(mapSplitSizeOpt).withOption(hdfsServerOpt).withOption(outputFileOpt)
            .withOption(maxHeapSizeOpt).withOption(taskTimeoutOpt).withOption(numberofReducersOpt)
            .withOption(helpOpt).create();
    SVMParameters para = new SVMParameters();

    try {
        Parser parser = new Parser();
        parser.setGroup(group);
        CommandLine cmdLine = parser.parse(args);

        if (cmdLine.hasOption(helpOpt)) {
            CommandLineUtil.printHelp(group);
            return;
        }

        para.setTestFile(cmdLine.getValue(testFileOpt).toString());
        para.setOutFile(cmdLine.getValue(outputFileOpt).toString());
        para.setModelFileName(cmdLine.getValue(modelFileOpt).toString());

        // hdfs server address
        if (cmdLine.hasOption(hdfsServerOpt)) {
            para.setHdfsServerAddr(cmdLine.getValue(hdfsServerOpt).toString());
        }

        if (cmdLine.hasOption(mapSplitSizeOpt)) {
            para.setMapSplitSize(Long.parseLong(cmdLine.getValue(mapSplitSizeOpt).toString()));
        }

        if (cmdLine.hasOption(numberofReducersOpt)) {
            para.setNumberReducers(Integer.parseInt(cmdLine.getValue(numberofReducersOpt).toString()));
        }

        if (cmdLine.hasOption(maxHeapSizeOpt)) {
            para.setMaxHeapSize(cmdLine.getValue(maxHeapSizeOpt).toString());
        }
        if (cmdLine.hasOption(taskTimeoutOpt)) {
            para.setTaskTimeout(Long.parseLong(cmdLine.getValue(taskTimeoutOpt).toString()));
        }

    } catch (OptionException e) {
        log.error("Exception", e);
        CommandLineUtil.printHelp(group);
    }
    // set parameters for the mapper, combiner, reducer

    // creat a job
    Job job = new Job(new Configuration());

    // step 1.1 set job static parameters
    ParallelClassifierJob.setJobParameters(job);

    // step 1.2 set mapper parameters
    ParallelClassifierJob.setMapperParameters(job.getConfiguration(), para.getHdfsServerAddr(),
            para.getModelFileName());

    // set general parameters related to a job
    MapReduceUtil.setJobParameters(job, para.getTestFile(), para.getOutFile(), para.getMapSplitSize(),
            para.getNumberReducers(), para.getMaxHeapSize(), para.getTaskTimeout());

    // submit a job
    log.info("job completed: " + MapReduceUtil.submitJob(job));
}

From source file:org.apache.mahout.classifier.svm.algorithm.parallelalgorithms.ParallelMultiClassifierTrainDriver.java

public static void main(String[] args)
        throws IOException, InterruptedException, ClassNotFoundException, OptionException {
    // args = new String [] {"-if","infile","-of","outfile","m",
    // "-nm","10","--nr","11"};
    log.info("[job] " + JOB_NAME);

    DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
    ArgumentBuilder abuilder = new ArgumentBuilder();
    GroupBuilder gbuilder = new GroupBuilder();

    Option trainFileOpt = obuilder.withLongName("trainFile").withRequired(true)
            .withArgument(abuilder.withName("trainFile").withMinimum(1).withMaximum(1).create())
            .withDescription("Training data set file").withShortName("if").create();

    Option outputFileOpt = obuilder.withLongName("output").withRequired(true)
            .withArgument(abuilder.withName("output").withMinimum(1).withMaximum(1).create())
            .withDescription("Out put file name: ").withShortName("of").create();

    Option lambdaOpt = obuilder.withLongName("lambda").withRequired(false)
            .withArgument(abuilder.withName("lambda").withMinimum(1).withMaximum(1).create())
            .withDescription("Regularization parameter (default = 0.01) ").withShortName("l").create();

    Option iterOpt = obuilder.withLongName("iter").withRequired(false)
            .withArgument(abuilder.withName("iter").withMinimum(1).withMaximum(1).create())
            .withDescription("Number of iterations (default = 10/lambda) ").withShortName("i").create();

    Option kOpt = obuilder.withLongName("k").withRequired(false)
            .withArgument(abuilder.withName("k").withMinimum(1).withMaximum(1).create())
            .withDescription("Size of block for stochastic gradient (default = 1)").withShortName("v").create();

    Option sampleNumOpt = obuilder.withLongName("trainSampleNum").withRequired(false)
            .withArgument(abuilder.withName("trainSampleNum").withMinimum(1).withMaximum(1).create())
            .withDescription(/*from   ww w .  j  a va  2  s  .c om*/
                    "Number of Samples in traindata set, for large-scale dataset optimization (default = 0) ")
            .withShortName("tsn").create();

    Option classNumOpt = obuilder.withLongName("classNum").withRequired(true)
            .withArgument(abuilder.withName("classNum").withMinimum(1).withMaximum(1).create())
            .withDescription("The number of classes (Categories in multi-classification) ").withShortName("c")
            .create();

    Option startingClassIndexOpt = obuilder.withLongName("startingClassIndex").withRequired(false)
            .withArgument(abuilder.withName("startingClassIndex").withMinimum(1).withMaximum(1).create())
            .withDescription("The starting index of class (default = 0) or 1").withShortName("sci").create();

    Option hdfsServerOpt = obuilder.withLongName("HDFSServer").withRequired(false)
            .withArgument(abuilder.withName("HDFSServer").withMinimum(1).withMaximum(1).create())
            .withDescription("HDFS Server's Address (default = null) ").withShortName("hdfs").create();

    Option svmTypeOpt = obuilder.withLongName("svmType").withRequired(false)
            .withArgument(abuilder.withName("svmType").withMinimum(1).withMaximum(1).create())
            .withDescription("0 -> Binary Classfication, 1 -> Regression, "
                    + "2 -> Multi-Classification (one-vs.-one), 3 -> Multi-Classification (one-vs.-others) ")
            .withShortName("s").create();

    Option modelFileOpt = obuilder.withLongName("modelFile").withRequired(true)
            .withArgument(abuilder.withName("output").withMinimum(1).withMaximum(1).create())
            .withDescription("Name of model file (default = noModelFile) ").withShortName("m").create();

    // hadoop system setting.
    Option mapSplitSizeOpt = obuilder.withLongName("mapSplitSize").withRequired(false)
            .withArgument(abuilder.withName("mapSplitSize").withMinimum(1).withMaximum(1).create())
            .withDescription("Max map Split size ").withShortName("ms").create();

    Option maxHeapSizeOpt = obuilder.withLongName("maxHeapSize").withRequired(false)
            .withArgument(abuilder.withName("maxHeapSize").withMinimum(1).withMaximum(1).create())
            .withDescription("Max Heap Size: ").withShortName("mhs").create();

    Option numberofReducersOpt = obuilder.withLongName("numberofReducers").withRequired(false)
            .withArgument(abuilder.withName("numberofReducers").withMinimum(1).withMaximum(1).create())
            .withDescription("Number of Reducers: (defaults = 0)").withShortName("nor").create();

    Option taskTimeoutOpt = obuilder.withLongName("taskTimeout").withRequired(false)
            .withArgument(abuilder.withName("taskTimeout").withMinimum(1).withMaximum(1).create())
            .withDescription("Task Time out ( Minutes ) : ").withShortName("ttt").create();

    Option helpOpt = obuilder.withLongName("help").withDescription("Print out help").withShortName("h")
            .create();

    Group group = gbuilder.withName("Options").withOption(trainFileOpt).withOption(outputFileOpt)
            .withOption(lambdaOpt).withOption(iterOpt).withOption(kOpt).withOption(svmTypeOpt)
            .withOption(classNumOpt).withOption(hdfsServerOpt).withOption(modelFileOpt)
            .withOption(startingClassIndexOpt).withOption(sampleNumOpt).withOption(mapSplitSizeOpt)
            .withOption(maxHeapSizeOpt).withOption(taskTimeoutOpt).withOption(numberofReducersOpt)
            .withOption(helpOpt).create();

    SVMParameters para = new SVMParameters();

    try {
        Parser parser = new Parser();
        parser.setGroup(group);
        CommandLine cmdLine = parser.parse(args);

        if (cmdLine.hasOption(helpOpt)) {
            CommandLineUtil.printHelp(group);
            return;
        }

        para.setTrainFile(cmdLine.getValue(trainFileOpt).toString());
        para.setOutFile(cmdLine.getValue(outputFileOpt).toString());

        // lambda
        if (cmdLine.hasOption(lambdaOpt)) {
            para.setLambda(Double.parseDouble(cmdLine.getValue(lambdaOpt).toString()));
        }
        // iteration
        if (cmdLine.hasOption(iterOpt)) {
            para.setMaxIter(Integer.parseInt(cmdLine.getValue(iterOpt).toString()));
        }
        // k
        if (cmdLine.hasOption(kOpt)) {
            para.setExamplesPerIter(Integer.parseInt(cmdLine.getValue(kOpt).toString()));
        }
        // class number
        para.setClassNum(Integer.parseInt(cmdLine.getValue(classNumOpt).toString()));
        // number of samples in training data set.
        if (cmdLine.hasOption(sampleNumOpt)) {
            para.setTrainSampleNumber(Integer.parseInt(cmdLine.getValue(sampleNumOpt).toString()));
        }

        if (cmdLine.hasOption(startingClassIndexOpt)) {
            para.setStartingClassIndex(Integer.parseInt(cmdLine.getValue(startingClassIndexOpt).toString()));
        }
        // models' path
        para.setModelFileName(cmdLine.getValue(modelFileOpt).toString());
        // hdfs server address
        if (cmdLine.hasOption(hdfsServerOpt)) {
            para.setHdfsServerAddr(cmdLine.getValue(hdfsServerOpt).toString());
        }
        // multi classification classificationType
        if (cmdLine.hasOption(svmTypeOpt)) {
            para.setClassificationType(Integer.parseInt(cmdLine.getValue(svmTypeOpt).toString()));
        }
        // MapReduce system setting.
        if (cmdLine.hasOption(mapSplitSizeOpt)) {
            para.setMapSplitSize(Long.parseLong(cmdLine.getValue(mapSplitSizeOpt).toString()));
        }
        if (cmdLine.hasOption(numberofReducersOpt)) {
            para.setNumberReducers(Integer.parseInt(cmdLine.getValue(numberofReducersOpt).toString()));
        }
        if (cmdLine.hasOption(maxHeapSizeOpt)) {
            para.setMaxHeapSize(cmdLine.getValue(maxHeapSizeOpt).toString());
        }
        if (cmdLine.hasOption(taskTimeoutOpt)) {
            para.setTaskTimeout(Long.parseLong(cmdLine.getValue(taskTimeoutOpt).toString()));
        }

    } catch (OptionException e) {
        log.error("Exception", e);
        CommandLineUtil.printHelp(group);
    }

    // set parameters for the mapper, combiner, reducer

    // creat a job
    Job job = new Job(new Configuration());

    // step 1.1 set job static parameters
    ParallelMultiClassifierTrainJob.setJobParameters(job);

    // step 1.2 set mapper parameters
    ParallelMultiClassifierTrainJob.setMapperParameters(job.getConfiguration(), para.getMaxIter(),
            para.getTrainSampleNumber(), para.getClassNum(), para.getClassificationType(),
            para.getStartingClassIndex());

    ParallelMultiClassifierTrainJob.setReducerParameters(job.getConfiguration(), (float) para.getLambda(),
            para.getExamplesPerIter(), para.getModelFileName(), para.getHdfsServerAddr());

    // set general parameters related to a job
    MapReduceUtil.setJobParameters(job, para.getTrainFile(), para.getOutFile(), para.getMapSplitSize(),
            para.getNumberReducers(), para.getMaxHeapSize(), para.getTaskTimeout());

    // submit a job
    log.info("job completed: " + MapReduceUtil.submitJob(job));
}

From source file:org.apache.mahout.classifier.svm.algorithm.parallelalgorithms.ParallelMultiClassPredictionDriver.java

public static void main(String[] args)
        throws IOException, InterruptedException, ClassNotFoundException, OptionException {

    // example args:
    // -if /user/maximzhao/dataset/rcv1_test.binary -of
    // /user/maximzhao/rcv.result
    // -m /user/maximzhao/rcv1.model -nor 1 -ms 241572968 -mhs -Xmx500M -ttt
    // 1080/*from   ww  w  .j ava2s  . c  o  m*/
    log.info("[job] " + JOB_NAME);
    DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
    ArgumentBuilder abuilder = new ArgumentBuilder();
    GroupBuilder gbuilder = new GroupBuilder();

    Option testFileOpt = obuilder.withLongName("testFile").withRequired(true)
            .withArgument(abuilder.withName("testFile").withMinimum(1).withMaximum(1).create())
            .withDescription("Name of test data file (default = noTestFile)").withShortName("if").create();

    Option outputFileOpt = obuilder.withLongName("output").withRequired(true)
            .withArgument(abuilder.withName("output").withMinimum(1).withMaximum(1).create())
            .withDescription("Out put file name: ").withShortName("of").create();

    Option modelFileOpt = obuilder.withLongName("modelFilePath").withRequired(true)
            .withArgument(abuilder.withName("modelFilePath").withMinimum(1).withMaximum(1).create())
            .withDescription("Name of model files Path (default = /user) ").withShortName("m").create();

    Option classNumOpt = obuilder.withLongName("classNum").withRequired(true)
            .withArgument(abuilder.withName("classNum").withMinimum(1).withMaximum(1).create())
            .withDescription("The number of classes (Categories in multi-classification) ").withShortName("c")
            .create();

    Option hdfsServerOpt = obuilder.withLongName("HDFSServer").withRequired(false)
            .withArgument(abuilder.withName("HDFSServer").withMinimum(1).withMaximum(1).create())
            .withDescription("HDFS Server's Address (default = null) ").withShortName("hdfs").create();

    // system setup
    Option mapSplitSizeOpt = obuilder.withLongName("mapSplitSize").withRequired(false)
            .withArgument(abuilder.withName("mapSplitSize").withMinimum(1).withMaximum(1).create())
            .withDescription("Max map Split size ").withShortName("ms").create();

    Option maxHeapSizeOpt = obuilder.withLongName("maxHeapSize").withRequired(false)
            .withArgument(abuilder.withName("maxHeapSize").withMinimum(1).withMaximum(1).create())
            .withDescription("Max Heap Size: ").withShortName("mhs").create();

    Option numberofReducersOpt = obuilder.withLongName("numberofReducers").withRequired(false)
            .withArgument(abuilder.withName("numberofReducers").withMinimum(1).withMaximum(1).create())
            .withDescription("Number of Reducers: (defaults = 0)").withShortName("nor").create();

    Option taskTimeoutOpt = obuilder.withLongName("taskTimeout").withRequired(false)
            .withArgument(abuilder.withName("taskTimeout").withMinimum(1).withMaximum(1).create())
            .withDescription("Task Time out ( Minutes ) : ").withShortName("ttt").create();

    Option helpOpt = obuilder.withLongName("help").withDescription("Print out help").withShortName("h")
            .create();

    Group group = gbuilder.withName("Options").withOption(modelFileOpt).withOption(testFileOpt)
            .withOption(mapSplitSizeOpt).withOption(classNumOpt).withOption(outputFileOpt)
            .withOption(maxHeapSizeOpt).withOption(hdfsServerOpt).withOption(taskTimeoutOpt)
            .withOption(numberofReducersOpt).withOption(helpOpt).create();

    SVMParameters para = new SVMParameters();

    try {

        Parser parser = new Parser();
        parser.setGroup(group);
        CommandLine cmdLine = parser.parse(args);

        if (cmdLine.hasOption(helpOpt)) {
            CommandLineUtil.printHelp(group);
            return;
        }

        para.setTestFile(cmdLine.getValue(testFileOpt).toString());
        para.setOutFile(cmdLine.getValue(outputFileOpt).toString());
        // models' path
        para.setModelFileName(cmdLine.getValue(modelFileOpt).toString());
        // class number
        para.setClassNum(Integer.parseInt(cmdLine.getValue(classNumOpt).toString()));

        // hdfs server address
        if (cmdLine.hasOption(hdfsServerOpt)) {
            para.setHdfsServerAddr(cmdLine.getValue(hdfsServerOpt).toString());
        }

        if (cmdLine.hasOption(mapSplitSizeOpt)) {
            para.setMapSplitSize(Long.parseLong(cmdLine.getValue(mapSplitSizeOpt).toString()));
        }

        if (cmdLine.hasOption(numberofReducersOpt)) {
            para.setNumberReducers(Integer.parseInt(cmdLine.getValue(numberofReducersOpt).toString()));
        }

        if (cmdLine.hasOption(maxHeapSizeOpt)) {
            para.setMaxHeapSize(cmdLine.getValue(maxHeapSizeOpt).toString());
        }
        if (cmdLine.hasOption(taskTimeoutOpt)) {
            para.setTaskTimeout(Long.parseLong(cmdLine.getValue(taskTimeoutOpt).toString()));
        }
    } catch (OptionException e) {
        log.error("Exception", e);
        CommandLineUtil.printHelp(group);
    }

    // creat a job
    Job job = new Job(new Configuration());

    // step 1.1 set job static parameters
    ParallelMultiClassPredictionJob.setJobParameters(job);

    // step 1.2 set mapper parameters
    ParallelMultiClassPredictionJob.setMapperParameters(job.getConfiguration(), para.getModelFileName(),
            para.getHdfsServerAddr(), para.getClassNum(), para.getClassificationType());

    // set general parameters related to a job
    MapReduceUtil.setJobParameters(job, para.getTestFile(), para.getOutFile(), para.getMapSplitSize(),
            para.getNumberReducers(), para.getMaxHeapSize(), para.getTaskTimeout());

    // submit a job
    log.info("job completed: " + MapReduceUtil.submitJob(job));
}

From source file:org.apache.mahout.classifier.svm.algorithm.sequentialalgorithms.SVMSequentialPrediction.java

public static void main(String[] args) throws IOException, OptionException {
    if (args.length < 1) {
        args = new String[] { "-te", "../examples/src/test/resources/svmdataset/test.dat", "-m",
                "../examples/src/test/resources/svmdataset/SVM.model" };
        // args = new String[] {
        // "-te",
        // "/media/Data/MachineLearningDataset/triazines_scale.t",
        // "-m", "/home/maximzhao/SVMregression.model", "-s",
        // "1"};//from  w  w w  .  j av a 2  s . c o m
        // args = new String[] {
        // "-te",
        // "/media/Data/MachineLearningDataset/rcv1_train.binary",
        // "-m", "/home/maximzhao/SVMrcv1.model"};
        // args = new String[] {"-te",
        // "/media/Data/MachineLearningDataset/protein.t",
        // "-m", "/home/maximzhao/sectormulti/SVMprotein.model",
        // "-s", "2"};
        // args = new String[] {"-te",
        // "/media/Data/MachineLearningDataset/poker.t",
        // "-m", "/home/maximzhao/sectormulti/SVMpoker.model",
        // "-s", "3"};
        // args = new String[] {"-te", "/media/Data/MachineLearningDataset/poker",
        // "-m", "/user/maximzhao/pokerpro", "-s", "3",
        // "-hdfs", "hdfs://localhost:12009"};
    }

    DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
    ArgumentBuilder abuilder = new ArgumentBuilder();
    GroupBuilder gbuilder = new GroupBuilder();

    Option testFileOpt = obuilder.withLongName("testFile").withRequired(true)
            .withArgument(abuilder.withName("testFile").withMinimum(1).withMaximum(1).create())
            .withDescription("Name of test data file (default = noTestFile)").withShortName("te").create();

    Option svmTypeOpt = obuilder.withLongName("svmType").withRequired(false)
            .withArgument(abuilder.withName("svmType").withMinimum(1).withMaximum(1).create())
            .withDescription("0 -> Binary Classfication, 1 -> Regression, "
                    + "2 -> Multi-Classification (one-vs.-one), 3 -> Multi-Classification (one-vs.-others) ")
            .withShortName("s").create();

    Option modelFileOpt = obuilder.withLongName("modelFile").withRequired(true)
            .withArgument(abuilder.withName("modelFile").withMinimum(1).withMaximum(1).create())
            .withDescription("Name of model file (default = noModelFile) ").withShortName("m").create();

    Option hdfsServerOpt = obuilder.withLongName("HDFSServer").withRequired(false)
            .withArgument(abuilder.withName("HDFSServer").withMinimum(1).withMaximum(1).create())
            .withDescription("HDFS Server's Address (default = null) ").withShortName("hdfs").create();

    Option predictedFileOpt = obuilder.withLongName("predictedFile").withRequired(false)
            .withArgument(abuilder.withName("predictedFile").withMinimum(1).withMaximum(1).create())
            .withDescription("File to store predicted label(default = testFile.predict) ").withShortName("p")
            .create();

    Option helpOpt = obuilder.withLongName("help").withDescription("Print out help").withShortName("h")
            .create();

    Group group = gbuilder.withName("Options").withOption(modelFileOpt).withOption(predictedFileOpt)
            .withOption(testFileOpt).withOption(svmTypeOpt).withOption(helpOpt).withOption(hdfsServerOpt)
            .create();

    SVMParameters para = new SVMParameters();
    try {
        Parser parser = new Parser();
        parser.setGroup(group);
        CommandLine cmdLine = parser.parse(args);

        if (cmdLine.hasOption(helpOpt)) {
            CommandLineUtil.printHelp(group);
            return;
        }
        para.setTestFile(cmdLine.getValue(testFileOpt).toString());
        para.setModelFileName(cmdLine.getValue(modelFileOpt).toString());

        // svm classificationType
        if (cmdLine.hasOption(svmTypeOpt)) {
            para.setClassificationType(Integer.parseInt(cmdLine.getValue(svmTypeOpt).toString()));
        } else {
            para.setClassificationType(0); // default classfication
        }

        if (cmdLine.hasOption(predictedFileOpt)) {
            para.setOutFile(cmdLine.getValue(predictedFileOpt).toString());
        } else {
            para.setOutFile(para.getTestFile() + ".predict");
        }

        // hdfs server address
        if (cmdLine.hasOption(hdfsServerOpt)) {
            para.setHdfsServerAddr(cmdLine.getValue(hdfsServerOpt).toString());
        } else {
            para.setHdfsServerAddr(null);
        }

    } catch (OptionException e) {
        log.error("Exception", e);
        CommandLineUtil.printHelp(group);
    }

    // load test data set
    DataSetHandler test = new DataSetHandler(para.getTestFile());

    Prediction predictor = PredictionFactory.getInstance(para.getClassificationType());
    predictor.prediction(test, para);
    para.report(para.getClassificationType());
    log.info("Done!");
}

From source file:org.apache.mahout.classifier.svm.algorithm.sequentialalgorithms.SVMSequentialTraining.java

public static void main(String[] args) throws IOException, OptionException {
    if (args.length < 1) {
        args = new String[] { "-tr", "../examples/src/test/resources/svmdataset/train.dat", "-m",
                "../examples/src/test/resources/svmdataset/SVM.model" };
        //      args = new String[] {
        //                           "-tr",
        //                           "/media/Data/MachineLearningDataset/triazines_scale",
        //                           "-m", "/home/maximzhao/SVMregression.model", "-s",
        //                           "1"};
        //      // for rcv1
        //      args = new String[] {
        //                           "-tr",
        //                           "/media/Data/MachineLearningDataset/rcv1_test.binary",
        //                           "-m", "/home/maximzhao/SVMrcv1.model", "-ts",
        //                           "677399"};
        //      args = new String[] {"-tr", "/media/Data/MachineLearningDataset/protein",
        //                           "-m", "/home/maximzhao/sectormulti/SVMprotein.model",
        //                           "-s", "2"};
        //        args = new String[] {"-tr", "/media/Data/MachineLearningDataset/poker",
        //                           "-m", "/home/maximzhao/sectormulti/SVMpoker.model",
        //                           "-s", "3"};
        //      args = new String[] {"-tr", "/user/maximzhao/dataset/train.dat", "-hdfs",
        //                           "hdfs://localhost:12009", "-m",
        //                           "../examples/src/test/resources/svmdataset/SVM.model"};
    }//from  w  w w.j  a  v  a2  s  .  c  om
    DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
    ArgumentBuilder abuilder = new ArgumentBuilder();
    GroupBuilder gbuilder = new GroupBuilder();

    Option trainFileOpt = obuilder.withLongName("trainFile").withRequired(true)
            .withArgument(abuilder.withName("trainFile").withMinimum(1).withMaximum(1).create())
            .withDescription("Training data set file").withShortName("tr").create();

    Option modelFileOpt = obuilder.withLongName("modelFile").withRequired(false)
            .withArgument(abuilder.withName("output").withMinimum(1).withMaximum(1).create())
            .withDescription("Name of model file (default = noModelFile) ").withShortName("m").create();

    Option svmTypeOpt = obuilder.withLongName("svmType").withRequired(false)
            .withArgument(abuilder.withName("svmType").withMinimum(1).withMaximum(1).create())
            .withDescription("0 -> Binary Classfication, 1 -> Regression, "
                    + "2 -> Multi-Classification (one-vs.-one), 3 -> Multi-Classification (one-vs.-others) ")
            .withShortName("s").create();

    Option epsilonOpt = obuilder.withLongName("epsilon").withRequired(false)
            .withArgument(abuilder.withName("epsilon").withMinimum(1).withMaximum(1).create())
            .withDescription("epsilon for regression (default = 0.1) ").withShortName("e").create();

    Option lambdaOpt = obuilder.withLongName("lambda").withRequired(false)
            .withArgument(abuilder.withName("lambda").withMinimum(1).withMaximum(1).create())
            .withDescription("Regularization parameter (default = 0.01) ").withShortName("l").create();

    Option iterOpt = obuilder.withLongName("iter").withRequired(false)
            .withArgument(abuilder.withName("iter").withMinimum(1).withMaximum(1).create())
            .withDescription("Number of iterations (default = 10/lambda) ").withShortName("i").create();

    Option validateExampleNumberOpt = obuilder.withLongName("validateExampleNumber").withRequired(false)
            .withArgument(abuilder.withName("validateExampleNumber").withMinimum(1).withMaximum(1).create())
            .withDescription("Number of validate Examples (default = Maximum iteration / 10) ")
            .withShortName("ven").create();

    Option kOpt = obuilder.withLongName("k").withRequired(false)
            .withArgument(abuilder.withName("k").withMinimum(1).withMaximum(1).create())
            .withDescription("Size of block for stochastic gradient (default = 1)").withShortName("v").create();

    Option sampleNumOpt = obuilder.withLongName("trainSampleNum").withRequired(false)
            .withArgument(abuilder.withName("trainSampleNum").withMinimum(1).withMaximum(1).create())
            .withDescription(
                    "Number of Samples in traindata set, for large-scale dataset optimization (default = 0) ")
            .withShortName("ts").create();

    Option hdfsServerOpt = obuilder.withLongName("HDFSServer").withRequired(false)
            .withArgument(abuilder.withName("HDFSServer").withMinimum(1).withMaximum(1).create())
            .withDescription("HDFS Server's Address (default = null) ").withShortName("hdfs").create();

    Option helpOpt = obuilder.withLongName("help").withDescription("Print out help").withShortName("h")
            .create();

    Group group = gbuilder.withName("Options").withOption(trainFileOpt).withOption(validateExampleNumberOpt)
            .withOption(modelFileOpt).withOption(svmTypeOpt).withOption(lambdaOpt).withOption(hdfsServerOpt)
            .withOption(iterOpt).withOption(epsilonOpt).withOption(kOpt).withOption(sampleNumOpt)
            .withOption(helpOpt).create();

    SVMParameters para = new SVMParameters();
    try {
        Parser parser = new Parser();
        parser.setGroup(group);
        CommandLine cmdLine = parser.parse(args);

        if (cmdLine.hasOption(helpOpt)) {
            CommandLineUtil.printHelp(group);
            return;
        }
        para.setTrainFile(cmdLine.getValue(trainFileOpt).toString());

        // svm classificationType
        if (cmdLine.hasOption(svmTypeOpt)) {
            para.setClassificationType(Integer.parseInt(cmdLine.getValue(svmTypeOpt).toString()));
        }

        // epsilon
        if (cmdLine.hasOption(epsilonOpt)) {
            para.setEpsilon(Double.parseDouble(cmdLine.getValue(epsilonOpt).toString()));
        }

        // lambda
        if (cmdLine.hasOption(lambdaOpt)) {
            para.setLambda(Double.parseDouble(cmdLine.getValue(lambdaOpt).toString()));
        }

        // iteration
        if (cmdLine.hasOption(iterOpt)) {
            para.setMaxIter(Integer.parseInt(cmdLine.getValue(iterOpt).toString()));
        }

        // iteration
        if (cmdLine.hasOption(validateExampleNumberOpt)) {
            para.setValidateExampleNumber(
                    Integer.parseInt(cmdLine.getValue(validateExampleNumberOpt).toString()));
        } else {
            para.setValidateExampleNumber(para.getMaxIter() / 10);
        }

        // k
        if (cmdLine.hasOption(kOpt)) {
            para.setExamplesPerIter(Integer.parseInt(cmdLine.getValue(kOpt).toString()));
        }

        if (cmdLine.hasOption(modelFileOpt)) {
            para.setModelFileName(cmdLine.getValue(modelFileOpt).toString());
        } else {
            para.setModelFileName("SVM.model");
        }

        // number of samples in training data set.
        if (cmdLine.hasOption(sampleNumOpt)) {
            para.setTrainSampleNumber(Integer.parseInt(cmdLine.getValue(sampleNumOpt).toString()));
        }

        // hdfs server address
        if (cmdLine.hasOption(hdfsServerOpt)) {
            para.setHdfsServerAddr(cmdLine.getValue(hdfsServerOpt).toString());
        } else {
            para.setHdfsServerAddr(null);
        }
    } catch (OptionException e) {
        log.error("Exception", e);
        CommandLineUtil.printHelp(group);
    }

    DataSetHandler train = new DataSetHandler(para.getTrainFile());

    // Get data set
    train.getData(para);

    Training classifier = TrainingFactory.getInstance(para.getClassificationType());
    classifier.training(train, para);
    para.report(para.getClassificationType());
    log.info("All Processes are Finished!!");
}

From source file:org.apache.mahout.clustering.canopy.CanopyClusteringJob.java

/**
 * @param args//w  ww.  j  ava2  s  .c  om
 */
public static void main(String[] args) throws IOException, ClassNotFoundException {
    DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
    ArgumentBuilder abuilder = new ArgumentBuilder();
    GroupBuilder gbuilder = new GroupBuilder();

    Option inputOpt = obuilder.withLongName("input").withRequired(true)
            .withArgument(abuilder.withName("input").withMinimum(1).withMaximum(1).create())
            .withDescription("The Path for input Vectors. Must be a SequenceFile of Writable, Vector")
            .withShortName("i").create();

    Option outputOpt = obuilder.withLongName("output").withRequired(true)
            .withArgument(abuilder.withName("output").withMinimum(1).withMaximum(1).create())
            .withDescription("The Path to put the output in").withShortName("o").create();

    Option measureClassOpt = obuilder.withLongName("distance").withRequired(false)
            .withArgument(abuilder.withName("distance").withMinimum(1).withMaximum(1).create())
            .withDescription("The Distance Measure to use.  Default is SquaredEuclidean").withShortName("m")
            .create();

    Option vectorClassOpt = obuilder.withLongName("vectorClass").withRequired(false)
            .withArgument(abuilder.withName("vectorClass").withMinimum(1).withMaximum(1).create())
            .withDescription("The Vector implementation class name.  Default is SparseVector.class")
            .withShortName("v").create();
    Option t1Opt = obuilder.withLongName("t1").withRequired(true)
            .withArgument(abuilder.withName("t1").withMinimum(1).withMaximum(1).create()).withDescription("t1")
            .withShortName("t1").create();
    Option t2Opt = obuilder.withLongName("t2").withRequired(true)
            .withArgument(abuilder.withName("t2").withMinimum(1).withMaximum(1).create()).withDescription("t2")
            .withShortName("t2").create();

    Option helpOpt = obuilder.withLongName("help").withDescription("Print out help").withShortName("h")
            .create();

    Group group = gbuilder.withName("Options").withOption(inputOpt).withOption(outputOpt)
            .withOption(measureClassOpt).withOption(vectorClassOpt).withOption(t1Opt).withOption(t2Opt)
            .withOption(helpOpt).create();

    try {
        Parser parser = new Parser();
        parser.setGroup(group);
        CommandLine cmdLine = parser.parse(args);

        if (cmdLine.hasOption(helpOpt)) {
            CommandLineUtil.printHelp(group);
            return;
        }

        String input = cmdLine.getValue(inputOpt).toString();
        String output = cmdLine.getValue(outputOpt).toString();
        String measureClass = SquaredEuclideanDistanceMeasure.class.getName();
        if (cmdLine.hasOption(measureClassOpt)) {
            measureClass = cmdLine.getValue(measureClassOpt).toString();
        }

        Class<? extends Vector> vectorClass = cmdLine.hasOption(vectorClassOpt) == false ? SparseVector.class
                : (Class<? extends Vector>) Class.forName(cmdLine.getValue(vectorClassOpt).toString());
        double t1 = Double.parseDouble(cmdLine.getValue(t1Opt).toString());
        double t2 = Double.parseDouble(cmdLine.getValue(t2Opt).toString());

        runJob(input, output, measureClass, t1, t2, vectorClass);

    } catch (OptionException e) {
        log.error("Exception", e);
        CommandLineUtil.printHelp(group);
    }
}

From source file:org.apache.mahout.clustering.canopy.CanopyDriver.java

public static void main(String[] args) throws IOException {
    Option helpOpt = DefaultOptionCreator.helpOption();
    Option inputOpt = DefaultOptionCreator.inputOption().create();
    Option outputOpt = DefaultOptionCreator.outputOption().create();
    Option measureClassOpt = DefaultOptionCreator.distanceMeasureOption().create();
    Option t1Opt = DefaultOptionCreator.t1Option().create();
    Option t2Opt = DefaultOptionCreator.t2Option().create();

    Option overwriteOutput = DefaultOptionCreator.overwriteOption().create();
    Option clusteringOpt = DefaultOptionCreator.clusteringOption().create();

    Group group = new GroupBuilder().withName("Options").withOption(inputOpt).withOption(outputOpt)
            .withOption(overwriteOutput).withOption(measureClassOpt).withOption(t1Opt).withOption(t2Opt)
            .withOption(clusteringOpt).withOption(helpOpt).create();

    try {/* ww  w . ja v a  2s.  co  m*/
        Parser parser = new Parser();
        parser.setGroup(group);
        CommandLine cmdLine = parser.parse(args);

        if (cmdLine.hasOption(helpOpt)) {
            CommandLineUtil.printHelp(group);
            return;
        }

        Path input = new Path(cmdLine.getValue(inputOpt).toString());
        Path output = new Path(cmdLine.getValue(outputOpt).toString());
        if (cmdLine.hasOption(overwriteOutput)) {
            HadoopUtil.overwriteOutput(output);
        }
        String measureClass = cmdLine.getValue(measureClassOpt).toString();
        double t1 = Double.parseDouble(cmdLine.getValue(t1Opt).toString());
        double t2 = Double.parseDouble(cmdLine.getValue(t2Opt).toString());

        runJob(input, output, measureClass, t1, t2, cmdLine.hasOption(clusteringOpt));
    } catch (OptionException e) {
        log.error("Exception", e);
        CommandLineUtil.printHelp(group);

    }
}

From source file:org.apache.mahout.clustering.canopy.ClusterDriver.java

public static void main(String[] args) throws IOException, ClassNotFoundException {

    DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
    ArgumentBuilder abuilder = new ArgumentBuilder();
    GroupBuilder gbuilder = new GroupBuilder();

    Option vectorClassOpt = obuilder.withLongName("vectorClass").withRequired(false)
            .withArgument(abuilder.withName("vectorClass").withMinimum(1).withMaximum(1).create())
            .withDescription("The Vector implementation class name.  Default is SparseVector.class")
            .withShortName("v").create();
    Option t1Opt = obuilder.withLongName("t1").withRequired(true)
            .withArgument(abuilder.withName("t1").withMinimum(1).withMaximum(1).create()).withDescription("t1")
            .withShortName("t1").create();
    Option t2Opt = obuilder.withLongName("t2").withRequired(true)
            .withArgument(abuilder.withName("t2").withMinimum(1).withMaximum(1).create()).withDescription("t2")
            .withShortName("t2").create();

    Option pointsOpt = obuilder.withLongName("points").withRequired(true)
            .withArgument(abuilder.withName("points").withMinimum(1).withMaximum(1).create())
            .withDescription("The path containing the points").withShortName("p").create();

    Option canopiesOpt = obuilder.withLongName("canopies").withRequired(true)
            .withArgument(abuilder.withName("canopies").withMinimum(1).withMaximum(1).create())
            .withDescription("The location of the canopies, as a Path").withShortName("c").create();

    Option measureClassOpt = obuilder.withLongName("distance").withRequired(false)
            .withArgument(abuilder.withName("distance").withMinimum(1).withMaximum(1).create())
            .withDescription("The Distance Measure to use.  Default is SquaredEuclidean").withShortName("m")
            .create();/*from   ww  w  .  j a  v  a2  s. c o  m*/

    Option outputOpt = obuilder.withLongName("output").withRequired(true)
            .withArgument(abuilder.withName("output").withMinimum(1).withMaximum(1).create())
            .withDescription("The Path to put the output in").withShortName("o").create();

    Option helpOpt = obuilder.withLongName("help").withDescription("Print out help").withShortName("h")
            .create();

    Group group = gbuilder.withName("Options").withOption(vectorClassOpt).withOption(t1Opt).withOption(t2Opt)
            .withOption(pointsOpt).withOption(canopiesOpt).withOption(measureClassOpt).withOption(outputOpt)
            .withOption(helpOpt).create();

    try {
        Parser parser = new Parser();
        parser.setGroup(group);
        CommandLine cmdLine = parser.parse(args);
        if (cmdLine.hasOption(helpOpt)) {
            CommandLineUtil.printHelp(group);
            return;
        }

        String measureClass = SquaredEuclideanDistanceMeasure.class.getName();
        if (cmdLine.hasOption(measureClassOpt)) {
            measureClass = cmdLine.getValue(measureClassOpt).toString();
        }
        String output = cmdLine.getValue(outputOpt).toString();
        String canopies = cmdLine.getValue(canopiesOpt).toString();
        String points = cmdLine.getValue(pointsOpt).toString();
        Class<? extends Vector> vectorClass = cmdLine.hasOption(vectorClassOpt) == false ? SparseVector.class
                : (Class<? extends Vector>) Class.forName(cmdLine.getValue(vectorClassOpt).toString());
        double t1 = Double.parseDouble(cmdLine.getValue(t1Opt).toString());
        double t2 = Double.parseDouble(cmdLine.getValue(t2Opt).toString());

        runJob(points, canopies, output, measureClass, t1, t2, vectorClass);

    } catch (OptionException e) {
        log.error("Exception", e);
        CommandLineUtil.printHelp(group);
    }

}

From source file:org.apache.mahout.clustering.cdbw.CDbwDriver.java

public static void main(String[] args) throws Exception {
    DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
    ArgumentBuilder abuilder = new ArgumentBuilder();
    GroupBuilder gbuilder = new GroupBuilder();

    Option inputOpt = DefaultOptionCreator.inputOption().create();
    Option outputOpt = DefaultOptionCreator.outputOption().create();
    Option maxIterOpt = DefaultOptionCreator.maxIterationsOption().create();
    Option helpOpt = DefaultOptionCreator.helpOption();

    Option modelOpt = obuilder.withLongName("modelClass").withRequired(true).withShortName("d")
            .withArgument(abuilder.withName("modelClass").withMinimum(1).withMaximum(1).create())
            .withDescription("The ModelDistribution class name. "
                    + "Defaults to org.apache.mahout.clustering.dirichlet.models.NormalModelDistribution")
            .create();//from   w w  w  . j  a  va2 s .  c o  m

    Option numRedOpt = obuilder.withLongName("maxRed").withRequired(true).withShortName("r")
            .withArgument(abuilder.withName("maxRed").withMinimum(1).withMaximum(1).create())
            .withDescription("The number of reduce tasks.").create();

    Group group = gbuilder.withName("Options").withOption(inputOpt).withOption(outputOpt).withOption(modelOpt)
            .withOption(maxIterOpt).withOption(helpOpt).withOption(numRedOpt).create();

    try {
        Parser parser = new Parser();
        parser.setGroup(group);
        CommandLine cmdLine = parser.parse(args);
        if (cmdLine.hasOption(helpOpt)) {
            CommandLineUtil.printHelp(group);
            return;
        }

        Path input = new Path(cmdLine.getValue(inputOpt).toString());
        Path output = new Path(cmdLine.getValue(outputOpt).toString());
        String modelFactory = "org.apache.mahout.clustering.dirichlet.models.NormalModelDistribution";
        if (cmdLine.hasOption(modelOpt)) {
            modelFactory = cmdLine.getValue(modelOpt).toString();
        }
        int numReducers = Integer.parseInt(cmdLine.getValue(numRedOpt).toString());
        int maxIterations = Integer.parseInt(cmdLine.getValue(maxIterOpt).toString());
        runJob(input, null, output, modelFactory, maxIterations, numReducers);
    } catch (OptionException e) {
        log.error("Exception parsing command line: ", e);
        CommandLineUtil.printHelp(group);
    }
}

From source file:org.apache.mahout.clustering.dirichlet.DirichletDriver.java

public static void main(String[] args) throws Exception {
    Option helpOpt = DefaultOptionCreator.helpOption();
    Option inputOpt = DefaultOptionCreator.inputOption().create();
    Option outputOpt = DefaultOptionCreator.outputOption().create();
    Option maxIterOpt = DefaultOptionCreator.maxIterationsOption().create();
    Option kOpt = DefaultOptionCreator.kOption().withRequired(true).create();
    Option overwriteOutput = DefaultOptionCreator.overwriteOption().create();
    Option clusteringOpt = DefaultOptionCreator.clusteringOption().create();
    Option alphaOpt = DefaultOptionCreator.alphaOption().create();
    Option modelDistOpt = DefaultOptionCreator.modelDistributionOption().create();
    Option prototypeOpt = DefaultOptionCreator.modelPrototypeOption().create();
    Option numRedOpt = DefaultOptionCreator.numReducersOption().create();
    Option emitMostLikelyOpt = DefaultOptionCreator.emitMostLikelyOption().create();
    Option thresholdOpt = DefaultOptionCreator.thresholdOption().create();

    Group group = new GroupBuilder().withName("Options").withOption(inputOpt).withOption(outputOpt)
            .withOption(overwriteOutput).withOption(modelDistOpt).withOption(prototypeOpt)
            .withOption(maxIterOpt).withOption(alphaOpt).withOption(kOpt).withOption(helpOpt)
            .withOption(numRedOpt).withOption(clusteringOpt).withOption(emitMostLikelyOpt)
            .withOption(thresholdOpt).create();

    try {/*w w w.  ja  v a2s  .  c  o m*/
        Parser parser = new Parser();
        parser.setGroup(group);
        CommandLine cmdLine = parser.parse(args);
        if (cmdLine.hasOption(helpOpt)) {
            CommandLineUtil.printHelp(group);
            return;
        }

        Path input = new Path(cmdLine.getValue(inputOpt).toString());
        Path output = new Path(cmdLine.getValue(outputOpt).toString());
        if (cmdLine.hasOption(overwriteOutput)) {
            HadoopUtil.overwriteOutput(output);
        }
        String modelFactory = cmdLine.getValue(modelDistOpt).toString();
        String modelPrototype = cmdLine.getValue(prototypeOpt).toString();
        int numModels = Integer.parseInt(cmdLine.getValue(kOpt).toString());
        int numReducers = Integer.parseInt(cmdLine.getValue(numRedOpt).toString());
        int maxIterations = Integer.parseInt(cmdLine.getValue(maxIterOpt).toString());
        boolean emitMostLikely = Boolean.parseBoolean(cmdLine.getValue(emitMostLikelyOpt).toString());
        double threshold = Double.parseDouble(cmdLine.getValue(thresholdOpt).toString());
        double alpha_0 = Double.parseDouble(cmdLine.getValue(alphaOpt).toString());

        runJob(input, output, modelFactory, modelPrototype, numModels, maxIterations, alpha_0, numReducers,
                cmdLine.hasOption(clusteringOpt), emitMostLikely, threshold);
    } catch (OptionException e) {
        log.error("Exception parsing command line: ", e);
        CommandLineUtil.printHelp(group);
    }
}