Example usage for org.apache.commons.cli2 CommandLine getValue

Introduction

In this page you can find the example usage for org.apache.commons.cli2 CommandLine getValue.

Prototype

Object getValue(final Option option) throws IllegalStateException;

Source Link

Document

Retrieves the single Argument value associated with the specified Option

Usage

From source file:org.apache.mahout.classifier.bayes.TrainClassifier.java

public static void main(String[] args) throws Exception {
    DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
    ArgumentBuilder abuilder = new ArgumentBuilder();
    GroupBuilder gbuilder = new GroupBuilder();

    Option helpOpt = DefaultOptionCreator.helpOption();

    Option inputDirOpt = DefaultOptionCreator.inputOption().create();

    Option outputOpt = DefaultOptionCreator.outputOption().create();

    Option gramSizeOpt = obuilder.withLongName("gramSize").withRequired(false)
            .withArgument(abuilder.withName("gramSize").withMinimum(1).withMaximum(1).create())
            .withDescription("Size of the n-gram. Default Value: 1 ").withShortName("ng").create();

    Option minDfOpt = obuilder.withLongName("minDf").withRequired(false)
            .withArgument(abuilder.withName("minDf").withMinimum(1).withMaximum(1).create())
            .withDescription("Minimum Term Document Frequency: 1 ").withShortName("mf").create();

    Option minSupportOpt = obuilder.withLongName("minSupport").withRequired(false)
            .withArgument(abuilder.withName("minSupport").withMinimum(1).withMaximum(1).create())
            .withDescription("Minimum Support (Term Frequency): 1 ").withShortName("ms").create();

    Option alphaOpt = obuilder.withLongName("alpha").withRequired(false)
            .withArgument(abuilder.withName("a").withMinimum(1).withMaximum(1).create())
            .withDescription("Smoothing parameter Default Value: 1.0").withShortName("a").create();

    Option typeOpt = obuilder.withLongName("classifierType").withRequired(false)
            .withArgument(abuilder.withName("classifierType").withMinimum(1).withMaximum(1).create())
            .withDescription("Type of classifier: bayes|cbayes. Default: bayes").withShortName("type").create();

    Option dataSourceOpt = obuilder.withLongName("dataSource").withRequired(false)
            .withArgument(abuilder.withName("dataSource").withMinimum(1).withMaximum(1).create())
            .withDescription("Location of model: hdfs. Default Value: hdfs").withShortName("source").create();

    Option skipCleanupOpt = obuilder.withLongName("skipCleanup").withRequired(false)
            .withDescription("Skip cleanup of feature extraction output").withShortName("sc").create();

    Option compressOpt = obuilder.withLongName("compress").withRequired(false)
            .withArgument(abuilder.withName("compress").withDefault("0").withMinimum(0).withMaximum(1).create())
            .withDescription("True if the output should be compressed. Default is false").withShortName("comp")
            .create();//from  w  w w  .ja v a 2 s . c om

    Option compressCodecOpt = obuilder.withLongName("codec").withRequired(false)
            .withArgument(abuilder.withName("codec").withDefault("org.apache.hadoop.io.compress.DefaultCodec")
                    .withMinimum(0).withMaximum(1).create())
            .withDescription("Compress codec Default Value: org.apache.hadoop.io.compress.DefaultCodec")
            .withShortName("co").create();

    Group group = gbuilder.withName("Options").withOption(gramSizeOpt).withOption(helpOpt)
            .withOption(inputDirOpt).withOption(outputOpt).withOption(typeOpt).withOption(dataSourceOpt)
            .withOption(alphaOpt).withOption(minDfOpt).withOption(minSupportOpt).withOption(skipCleanupOpt)
            .withOption(compressOpt).withOption(compressCodecOpt).create();
    try {
        Parser parser = new Parser();

        parser.setGroup(group);
        parser.setHelpOption(helpOpt);
        CommandLine cmdLine = parser.parse(args);
        if (cmdLine.hasOption(helpOpt)) {
            CommandLineUtil.printHelp(group);
            return;
        }

        String classifierType = (String) cmdLine.getValue(typeOpt);
        String dataSourceType = (String) cmdLine.getValue(dataSourceOpt);

        BayesParameters params = new BayesParameters();
        // Setting all the default parameter values
        params.setGramSize(1);
        params.setMinDF(1);
        params.set("alpha_i", "1.0");
        params.set("dataSource", "hdfs");

        if (cmdLine.hasOption(gramSizeOpt)) {
            params.setGramSize(Integer.parseInt((String) cmdLine.getValue(gramSizeOpt)));
        }

        if (cmdLine.hasOption(minDfOpt)) {
            params.setMinDF(Integer.parseInt((String) cmdLine.getValue(minDfOpt)));
        }

        if (cmdLine.hasOption(minSupportOpt)) {
            params.setMinSupport(Integer.parseInt((String) cmdLine.getValue(minSupportOpt)));
        }

        if (cmdLine.hasOption(skipCleanupOpt)) {
            params.setSkipCleanup(true);
        }

        if (cmdLine.hasOption(alphaOpt)) {
            params.set("alpha_i", (String) cmdLine.getValue(alphaOpt));
        }

        if (cmdLine.hasOption(dataSourceOpt)) {
            params.set("dataSource", dataSourceType);
        }

        if (cmdLine.hasOption(compressOpt) && cmdLine.getValue(compressOpt).toString().equals("1")) {
            params.set("compress", "true");
        } else {
            params.set("compress", "false");
        }

        if (cmdLine.hasOption(compressCodecOpt)) {
            params.set("codec", (String) cmdLine.getValue(compressCodecOpt));
        }

        Path inputPath = new Path((String) cmdLine.getValue(inputDirOpt));
        Path outputPath = new Path((String) cmdLine.getValue(outputOpt));
        if ("cbayes".equalsIgnoreCase(classifierType)) {
            log.info("Training Complementary Bayes Classifier");
            trainCNaiveBayes(inputPath, outputPath, params);
        } else {
            log.info("Training Bayes Classifier");
            // setup the HDFS and copy the files there, then run the trainer
            trainNaiveBayes(inputPath, outputPath, params);
        }
    } catch (OptionException e) {
        log.error("Error while parsing options", e);
        CommandLineUtil.printHelp(group);
    }
}

From source file:org.apache.mahout.classifier.bayes.WikipediaDatasetCreatorDriver.java

/**
 * Takes in two arguments:/* www .  j  a  v a2s . c  om*/
 * <ol>
 * <li>The input {@link org.apache.hadoop.fs.Path} where the input documents live</li>
 * <li>The output {@link org.apache.hadoop.fs.Path} where to write the classifier as a
 * {@link org.apache.hadoop.io.SequenceFile}</li>
 * </ol>
 */
public static void main(String[] args) throws IOException, InterruptedException {
    DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
    ArgumentBuilder abuilder = new ArgumentBuilder();
    GroupBuilder gbuilder = new GroupBuilder();

    Option dirInputPathOpt = DefaultOptionCreator.inputOption().create();

    Option dirOutputPathOpt = DefaultOptionCreator.outputOption().create();

    Option categoriesOpt = obuilder.withLongName("categories").withRequired(true)
            .withArgument(abuilder.withName("categories").withMinimum(1).withMaximum(1).create())
            .withDescription("Location of the categories file.  One entry per line. "
                    + "Will be used to make a string match in Wikipedia Category field")
            .withShortName("c").create();

    Option exactMatchOpt = obuilder.withLongName("exactMatch")
            .withDescription("If set, then the category name must exactly match the "
                    + "entry in the categories file. Default is false")
            .withShortName("e").create();
    Option analyzerOpt = obuilder.withLongName("analyzer").withRequired(false)
            .withArgument(abuilder.withName("analyzer").withMinimum(1).withMaximum(1).create())
            .withDescription("The analyzer to use, must have a no argument constructor").withShortName("a")
            .create();
    Option helpOpt = DefaultOptionCreator.helpOption();

    Group group = gbuilder.withName("Options").withOption(categoriesOpt).withOption(dirInputPathOpt)
            .withOption(dirOutputPathOpt).withOption(exactMatchOpt).withOption(analyzerOpt).withOption(helpOpt)
            .create();

    Parser parser = new Parser();
    parser.setGroup(group);
    try {
        CommandLine cmdLine = parser.parse(args);
        if (cmdLine.hasOption(helpOpt)) {
            CommandLineUtil.printHelp(group);
            return;
        }

        String inputPath = (String) cmdLine.getValue(dirInputPathOpt);
        String outputPath = (String) cmdLine.getValue(dirOutputPathOpt);
        String catFile = (String) cmdLine.getValue(categoriesOpt);
        Class<? extends Analyzer> analyzerClass = WikipediaAnalyzer.class;
        if (cmdLine.hasOption(analyzerOpt)) {
            String className = cmdLine.getValue(analyzerOpt).toString();
            analyzerClass = Class.forName(className).asSubclass(Analyzer.class);
            // try instantiating it, b/c there isn't any point in setting it if
            // you can't instantiate it
            ClassUtils.instantiateAs(analyzerClass, Analyzer.class);
        }
        runJob(inputPath, outputPath, catFile, cmdLine.hasOption(exactMatchOpt), analyzerClass);
    } catch (OptionException e) {
        log.error("Exception", e);
        CommandLineUtil.printHelp(group);
    } catch (ClassNotFoundException e) {
        log.error("Exception", e);
        CommandLineUtil.printHelp(group);
    }
}

From source file:org.apache.mahout.classifier.bayes.WikipediaXmlSplitter.java

public static void main(String[] args) throws IOException {
    DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
    ArgumentBuilder abuilder = new ArgumentBuilder();
    GroupBuilder gbuilder = new GroupBuilder();

    Option dumpFileOpt = obuilder.withLongName("dumpFile").withRequired(true)
            .withArgument(abuilder.withName("dumpFile").withMinimum(1).withMaximum(1).create())
            .withDescription("The path to the wikipedia dump file (.bz2 or uncompressed)").withShortName("d")
            .create();/*from w w  w  .  j  a va  2  s  .c om*/

    Option outputDirOpt = obuilder.withLongName("outputDir").withRequired(true)
            .withArgument(abuilder.withName("outputDir").withMinimum(1).withMaximum(1).create())
            .withDescription("The output directory to place the splits in:\n"
                    + "local files:\n\t/var/data/wikipedia-xml-chunks or\n\tfile:///var/data/wikipedia-xml-chunks\n"
                    + "Hadoop DFS:\n\thdfs://wikipedia-xml-chunks\n"
                    + "AWS S3 (blocks):\n\ts3://bucket-name/wikipedia-xml-chunks\n"
                    + "AWS S3 (native files):\n\ts3n://bucket-name/wikipedia-xml-chunks\n")

            .withShortName("o").create();

    Option s3IdOpt = obuilder.withLongName("s3ID").withRequired(false)
            .withArgument(abuilder.withName("s3Id").withMinimum(1).withMaximum(1).create())
            .withDescription("Amazon S3 ID key").withShortName("i").create();
    Option s3SecretOpt = obuilder.withLongName("s3Secret").withRequired(false)
            .withArgument(abuilder.withName("s3Secret").withMinimum(1).withMaximum(1).create())
            .withDescription("Amazon S3 secret key").withShortName("s").create();

    Option chunkSizeOpt = obuilder.withLongName("chunkSize").withRequired(true)
            .withArgument(abuilder.withName("chunkSize").withMinimum(1).withMaximum(1).create())
            .withDescription("The Size of the chunk, in megabytes").withShortName("c").create();
    Option numChunksOpt = obuilder.withLongName("numChunks").withRequired(false)
            .withArgument(abuilder.withName("numChunks").withMinimum(1).withMaximum(1).create())
            .withDescription(
                    "The maximum number of chunks to create.  If specified, program will only create a subset of the chunks")
            .withShortName("n").create();
    Group group = gbuilder.withName("Options").withOption(dumpFileOpt).withOption(outputDirOpt)
            .withOption(chunkSizeOpt).withOption(numChunksOpt).withOption(s3IdOpt).withOption(s3SecretOpt)
            .create();

    Parser parser = new Parser();
    parser.setGroup(group);
    CommandLine cmdLine;
    try {
        cmdLine = parser.parse(args);
    } catch (OptionException e) {
        log.error("Error while parsing options", e);
        CommandLineUtil.printHelp(group);
        return;
    }

    Configuration conf = new Configuration();
    String dumpFilePath = (String) cmdLine.getValue(dumpFileOpt);
    String outputDirPath = (String) cmdLine.getValue(outputDirOpt);

    if (cmdLine.hasOption(s3IdOpt)) {
        String id = (String) cmdLine.getValue(s3IdOpt);
        conf.set("fs.s3n.awsAccessKeyId", id);
        conf.set("fs.s3.awsAccessKeyId", id);
    }
    if (cmdLine.hasOption(s3SecretOpt)) {
        String secret = (String) cmdLine.getValue(s3SecretOpt);
        conf.set("fs.s3n.awsSecretAccessKey", secret);
        conf.set("fs.s3.awsSecretAccessKey", secret);
    }
    // do not compute crc file when using local FS
    conf.set("fs.file.impl", "org.apache.hadoop.fs.RawLocalFileSystem");
    FileSystem fs = FileSystem.get(URI.create(outputDirPath), conf);

    int chunkSize = 1024 * 1024 * Integer.parseInt((String) cmdLine.getValue(chunkSizeOpt));

    int numChunks = Integer.MAX_VALUE;
    if (cmdLine.hasOption(numChunksOpt)) {
        numChunks = Integer.parseInt((String) cmdLine.getValue(numChunksOpt));
    }

    String header = "<mediawiki xmlns=\"http://www.mediawiki.org/xml/export-0.3/\" "
            + "xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" "
            + "xsi:schemaLocation=\"http://www.mediawiki.org/xml/export-0.3/ "
            + "http://www.mediawiki.org/xml/export-0.3.xsd\" " + "version=\"0.3\" " + "xml:lang=\"en\">\n"
            + "  <siteinfo>\n" + "<sitename>Wikipedia</sitename>\n"
            + "    <base>http://en.wikipedia.org/wiki/Main_Page</base>\n"
            + "    <generator>MediaWiki 1.13alpha</generator>\n" + "    <case>first-letter</case>\n"
            + "    <namespaces>\n" + "      <namespace key=\"-2\">Media</namespace>\n"
            + "      <namespace key=\"-1\">Special</namespace>\n" + "      <namespace key=\"0\" />\n"
            + "      <namespace key=\"1\">Talk</namespace>\n" + "      <namespace key=\"2\">User</namespace>\n"
            + "      <namespace key=\"3\">User talk</namespace>\n"
            + "      <namespace key=\"4\">Wikipedia</namespace>\n"
            + "      <namespace key=\"5\">Wikipedia talk</namespace>\n"
            + "      <namespace key=\"6\">Image</namespace>\n"
            + "      <namespace key=\"7\">Image talk</namespace>\n"
            + "      <namespace key=\"8\">MediaWiki</namespace>\n"
            + "      <namespace key=\"9\">MediaWiki talk</namespace>\n"
            + "      <namespace key=\"10\">Template</namespace>\n"
            + "      <namespace key=\"11\">Template talk</namespace>\n"
            + "      <namespace key=\"12\">Help</namespace>\n"
            + "      <namespace key=\"13\">Help talk</namespace>\n"
            + "      <namespace key=\"14\">Category</namespace>\n"
            + "      <namespace key=\"15\">Category talk</namespace>\n"
            + "      <namespace key=\"100\">Portal</namespace>\n"
            + "      <namespace key=\"101\">Portal talk</namespace>\n" + "    </namespaces>\n"
            + "  </siteinfo>\n";

    StringBuilder content = new StringBuilder();
    content.append(header);
    NumberFormat decimalFormatter = new DecimalFormat("0000");
    File dumpFile = new File(dumpFilePath);
    FileLineIterator it;
    if (dumpFilePath.endsWith(".bz2")) {
        // default compression format from http://download.wikimedia.org
        CompressionCodec codec = new BZip2Codec();
        it = new FileLineIterator(codec.createInputStream(new FileInputStream(dumpFile)));
    } else {
        // assume the user has previously de-compressed the dump file
        it = new FileLineIterator(dumpFile);
    }
    int filenumber = 0;
    while (it.hasNext()) {
        String thisLine = it.next();
        if (thisLine.trim().startsWith("<page>")) {
            boolean end = false;
            while (!thisLine.trim().startsWith("</page>")) {
                content.append(thisLine).append('\n');
                if (it.hasNext()) {
                    thisLine = it.next();
                } else {
                    end = true;
                    break;
                }
            }
            content.append(thisLine).append('\n');

            if (content.length() > chunkSize || end) {
                content.append("</mediawiki>");
                filenumber++;
                String filename = outputDirPath + "/chunk-" + decimalFormatter.format(filenumber) + ".xml";
                BufferedWriter chunkWriter = new BufferedWriter(
                        new OutputStreamWriter(fs.create(new Path(filename)), "UTF-8"));
                try {
                    chunkWriter.write(content.toString(), 0, content.length());
                } finally {
                    Closeables.closeQuietly(chunkWriter);
                }
                if (filenumber >= numChunks) {
                    break;
                }
                content = new StringBuilder();
                content.append(header);
            }
        }
    }
}

From source file:org.apache.mahout.classifier.BayesFileFormatter.java

/**
 * Run the FileFormatter/*from w  w w  . j av a 2 s  .  co  m*/
 * 
 * @param args
 *          The input args. Run with -h to see the help
 * @throws ClassNotFoundException
 *           if the Analyzer can't be found
 * @throws IllegalAccessException
 *           if the Analyzer can't be constructed
 * @throws InstantiationException
 *           if the Analyzer can't be constructed
 * @throws IOException
 *           if the files can't be dealt with properly
 */
public static void main(String[] args) throws Exception {
    DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
    ArgumentBuilder abuilder = new ArgumentBuilder();
    GroupBuilder gbuilder = new GroupBuilder();

    Option inputOpt = DefaultOptionCreator.inputOption().create();

    Option outputOpt = DefaultOptionCreator.outputOption().create();

    Option labelOpt = obuilder.withLongName("label").withRequired(true)
            .withArgument(abuilder.withName("label").withMinimum(1).withMaximum(1).create())
            .withDescription("The label of the file").withShortName("l").create();

    Option analyzerOpt = obuilder.withLongName("analyzer")
            .withArgument(abuilder.withName("analyzer").withMinimum(1).withMaximum(1).create())
            .withDescription("The fully qualified class name of the analyzer to use. "
                    + "Must have a no-arg constructor.  Default is the StandardAnalyzer")
            .withShortName("a").create();

    Option charsetOpt = obuilder.withLongName("charset")
            .withArgument(abuilder.withName("charset").withMinimum(1).withMaximum(1).create())
            .withDescription("The character encoding of the input file").withShortName("c").create();

    Option collapseOpt = obuilder.withLongName("collapse").withRequired(true)
            .withArgument(abuilder.withName("collapse").withMinimum(1).withMaximum(1).create())
            .withDescription("Collapse a whole directory to a single file, one doc per line").withShortName("p")
            .create();

    Option helpOpt = DefaultOptionCreator.helpOption();
    Group group = gbuilder.withName("Options").withOption(inputOpt).withOption(outputOpt).withOption(labelOpt)
            .withOption(analyzerOpt).withOption(charsetOpt).withOption(collapseOpt).withOption(helpOpt)
            .create();
    try {
        Parser parser = new Parser();
        parser.setGroup(group);
        CommandLine cmdLine = parser.parse(args);

        if (cmdLine.hasOption(helpOpt)) {

            return;
        }
        File input = new File((String) cmdLine.getValue(inputOpt));
        File output = new File((String) cmdLine.getValue(outputOpt));
        String label = (String) cmdLine.getValue(labelOpt);
        Analyzer analyzer;
        if (cmdLine.hasOption(analyzerOpt)) {
            analyzer = ClassUtils.instantiateAs((String) cmdLine.getValue(analyzerOpt), Analyzer.class);
        } else {
            analyzer = new StandardAnalyzer(Version.LUCENE_31);
        }
        Charset charset = Charsets.UTF_8;
        if (cmdLine.hasOption(charsetOpt)) {
            charset = Charset.forName((String) cmdLine.getValue(charsetOpt));
        }
        boolean collapse = cmdLine.hasOption(collapseOpt);

        if (collapse) {
            collapse(label, analyzer, input, charset, output);
        } else {
            format(label, analyzer, input, charset, output);
        }

    } catch (OptionException e) {
        log.error("Exception", e);
    }
}

From source file:org.apache.mahout.classifier.chi_rwcs.mapreduce.BuildModel.java

@Override
public int run(String[] args) throws IOException, ClassNotFoundException, InterruptedException {

    DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
    ArgumentBuilder abuilder = new ArgumentBuilder();
    GroupBuilder gbuilder = new GroupBuilder();

    Option dataOpt = obuilder.withLongName("data").withShortName("d").withRequired(true)
            .withArgument(abuilder.withName("path").withMinimum(1).withMaximum(1).create())
            .withDescription("Data path").create();

    Option datasetOpt = obuilder.withLongName("dataset").withShortName("ds").withRequired(true)
            .withArgument(abuilder.withName("dataset").withMinimum(1).withMaximum(1).create())
            .withDescription("The path of the file descriptor of the dataset").create();

    Option timeOpt = obuilder.withLongName("time").withShortName("tm").withRequired(false)
            .withArgument(abuilder.withName("path").withMinimum(1).withMaximum(1).create())
            .withDescription("Time path").create();

    Option outputOpt = obuilder.withLongName("output").withShortName("o").withRequired(true)
            .withArgument(abuilder.withName("path").withMinimum(1).withMaximum(1).create())
            .withDescription("Output path, will contain the Decision Forest").create();

    Option labelsOpt = obuilder.withLongName("labels").withShortName("l").withRequired(true)
            .withArgument(abuilder.withName("labels").withMinimum(1).withMaximum(1).create())
            .withDescription("Number of Labels").create();

    Option combinationTypeOpt = obuilder.withLongName("combinationType").withShortName("t").withRequired(true)
            .withArgument(abuilder.withName("combinationType").withMinimum(1).withMaximum(1).create())
            .withDescription("T-norm for the computation of the compatibility degree").create();

    Option rule_weightOpt = obuilder.withLongName("rule_weight").withShortName("r").withRequired(true)
            .withArgument(abuilder.withName("rule_weight").withMinimum(1).withMaximum(1).create())
            .withDescription("Rule Weight").create();

    Option fuzzy_r_mOpt = obuilder.withLongName("fuzzy_r_m").withShortName("f").withRequired(true)
            .withArgument(abuilder.withName("fuzzy_r_m").withMinimum(1).withMaximum(1).create())
            .withDescription("Fuzzy Reasoning Method").create();

    Option helpOpt = obuilder.withLongName("help").withShortName("h").withDescription("Print out help")
            .create();// ww w. ja va 2  s  .c  o m

    Group group = gbuilder.withName("Options").withOption(dataOpt).withOption(datasetOpt).withOption(timeOpt)
            .withOption(outputOpt).withOption(labelsOpt).withOption(combinationTypeOpt)
            .withOption(rule_weightOpt).withOption(fuzzy_r_mOpt).withOption(helpOpt).create();

    try {
        Parser parser = new Parser();
        parser.setGroup(group);
        CommandLine cmdLine = parser.parse(args);

        if (cmdLine.hasOption("help")) {
            CommandLineUtil.printHelp(group);
            return -1;
        }

        dataName = cmdLine.getValue(dataOpt).toString();
        String datasetName = cmdLine.getValue(datasetOpt).toString();
        String outputName = cmdLine.getValue(outputOpt).toString();
        nLabels = Integer.parseInt(cmdLine.getValue(labelsOpt).toString());
        String combinationType_aux = cmdLine.getValue(combinationTypeOpt).toString();
        String ruleWeight_aux = cmdLine.getValue(rule_weightOpt).toString();
        String inferenceType_aux = cmdLine.getValue(fuzzy_r_mOpt).toString();

        if (cmdLine.hasOption(timeOpt)) {
            buildTimeIsStored = true;
            timeName = cmdLine.getValue(timeOpt).toString();
        }

        if (log.isDebugEnabled()) {
            log.debug("data : {}", dataName);
            log.debug("dataset : {}", datasetName);
            log.debug("output : {}", outputName);
            log.debug("labels : {}", nLabels);
            log.debug("t_norm : {}", combinationType_aux);
            log.debug("rule_weight : {}", ruleWeight_aux);
            log.debug("fuzzy_r_m : {}", inferenceType_aux);
            log.debug("time : {}", timeName);
        }

        dataPath = new Path(dataName);
        datasetPath = new Path(datasetName);
        outputPath = new Path(outputName);
        if (buildTimeIsStored)
            timePath = new Path(timeName);

        combinationType = PRODUCT;
        if (combinationType_aux.compareToIgnoreCase("minimum") == 0) {
            combinationType = MINIMUM;
        }

        ruleWeight = PCF_IV;
        if (ruleWeight_aux.compareToIgnoreCase("Certainty_Factor") == 0) {
            ruleWeight = CF;
        } else if (ruleWeight_aux.compareToIgnoreCase("Average_Penalized_Certainty_Factor") == 0) {
            ruleWeight = PCF_II;
        } else if (ruleWeight_aux.compareToIgnoreCase("No_Weights") == 0) {
            ruleWeight = NO_RW;
        }

        inferenceType = WINNING_RULE;
        if (inferenceType_aux.compareToIgnoreCase("Additive_Combination") == 0) {
            inferenceType = ADDITIVE_COMBINATION;
        }

    } catch (OptionException e) {
        log.error("Exception", e);
        CommandLineUtil.printHelp(group);
        return -1;
    }

    buildModel();

    return 0;
}

From source file:org.apache.mahout.classifier.chi_rwcs.mapreduce.TestModel.java

@Override
public int run(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
    // TODO Auto-generated method stub
    DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
    ArgumentBuilder abuilder = new ArgumentBuilder();
    GroupBuilder gbuilder = new GroupBuilder();

    Option inputOpt = DefaultOptionCreator.inputOption().create();

    Option datasetOpt = obuilder.withLongName("dataset").withShortName("ds").withRequired(true)
            .withArgument(abuilder.withName("dataset").withMinimum(1).withMaximum(1).create())
            .withDescription("Dataset path").create();

    Option modelOpt = obuilder.withLongName("model").withShortName("m").withRequired(true)
            .withArgument(abuilder.withName("path").withMinimum(1).withMaximum(1).create())
            .withDescription("Path to the Model").create();

    Option outputOpt = DefaultOptionCreator.outputOption().create();

    Option helpOpt = DefaultOptionCreator.helpOption();

    Group group = gbuilder.withName("Options").withOption(inputOpt).withOption(datasetOpt).withOption(modelOpt)
            .withOption(outputOpt).withOption(helpOpt).create();

    try {/*from  w w w. j  a  v  a 2s  .com*/
        Parser parser = new Parser();
        parser.setGroup(group);
        CommandLine cmdLine = parser.parse(args);

        if (cmdLine.hasOption("help")) {
            CommandLineUtil.printHelp(group);
            return -1;
        }

        dataName = cmdLine.getValue(inputOpt).toString();
        String datasetName = cmdLine.getValue(datasetOpt).toString();
        String modelName = cmdLine.getValue(modelOpt).toString();
        String outputName = cmdLine.hasOption(outputOpt) ? cmdLine.getValue(outputOpt).toString() : null;

        if (log.isDebugEnabled()) {
            log.debug("inout     : {}", dataName);
            log.debug("dataset   : {}", datasetName);
            log.debug("model     : {}", modelName);
            log.debug("output    : {}", outputName);
        }

        dataPath = new Path(dataName);
        datasetPath = new Path(datasetName);
        modelPath = new Path(modelName);
        if (outputName != null) {
            outputPath = new Path(outputName);
        }

    } catch (OptionException e) {

        log.warn(e.toString(), e);
        CommandLineUtil.printHelp(group);
        return -1;

    }

    time = System.currentTimeMillis();

    testModel();

    time = System.currentTimeMillis() - time;

    writeToFileClassifyTime(Chi_RWCSUtils.elapsedTime(time));

    return 0;
}

From source file:org.apache.mahout.classifier.Classify.java

public static void main(String[] args) throws Exception {

    DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
    ArgumentBuilder abuilder = new ArgumentBuilder();
    GroupBuilder gbuilder = new GroupBuilder();

    Option pathOpt = obuilder.withLongName("path").withRequired(true)
            .withArgument(abuilder.withName("path").withMinimum(1).withMaximum(1).create())
            .withDescription("The local file system path").withShortName("m").create();

    Option classifyOpt = obuilder.withLongName("classify").withRequired(true)
            .withArgument(abuilder.withName("classify").withMinimum(1).withMaximum(1).create())
            .withDescription("The doc to classify").withShortName("").create();

    Option encodingOpt = obuilder.withLongName("encoding").withRequired(true)
            .withArgument(abuilder.withName("encoding").withMinimum(1).withMaximum(1).create())
            .withDescription("The file encoding.  Default: UTF-8").withShortName("e").create();

    Option analyzerOpt = obuilder.withLongName("analyzer").withRequired(true)
            .withArgument(abuilder.withName("analyzer").withMinimum(1).withMaximum(1).create())
            .withDescription("The Analyzer to use").withShortName("a").create();

    Option defaultCatOpt = obuilder.withLongName("defaultCat").withRequired(true)
            .withArgument(abuilder.withName("defaultCat").withMinimum(1).withMaximum(1).create())
            .withDescription("The default category").withShortName("d").create();

    Option gramSizeOpt = obuilder.withLongName("gramSize").withRequired(true)
            .withArgument(abuilder.withName("gramSize").withMinimum(1).withMaximum(1).create())
            .withDescription("Size of the n-gram").withShortName("ng").create();

    Option typeOpt = obuilder.withLongName("classifierType").withRequired(true)
            .withArgument(abuilder.withName("classifierType").withMinimum(1).withMaximum(1).create())
            .withDescription("Type of classifier").withShortName("type").create();

    Option dataSourceOpt = obuilder.withLongName("dataSource").withRequired(true)
            .withArgument(abuilder.withName("dataSource").withMinimum(1).withMaximum(1).create())
            .withDescription("Location of model: hdfs").withShortName("source").create();

    Group options = gbuilder.withName("Options").withOption(pathOpt).withOption(classifyOpt)
            .withOption(encodingOpt).withOption(analyzerOpt).withOption(defaultCatOpt).withOption(gramSizeOpt)
            .withOption(typeOpt).withOption(dataSourceOpt).create();

    Parser parser = new Parser();
    parser.setGroup(options);/* ww  w  .  ja  va  2 s  .c o  m*/
    CommandLine cmdLine = parser.parse(args);

    int gramSize = 1;
    if (cmdLine.hasOption(gramSizeOpt)) {
        gramSize = Integer.parseInt((String) cmdLine.getValue(gramSizeOpt));

    }

    BayesParameters params = new BayesParameters();
    params.setGramSize(gramSize);
    String modelBasePath = (String) cmdLine.getValue(pathOpt);
    params.setBasePath(modelBasePath);

    log.info("Loading model from: {}", params.print());

    Algorithm algorithm;
    Datastore datastore;

    String classifierType = (String) cmdLine.getValue(typeOpt);

    String dataSource = (String) cmdLine.getValue(dataSourceOpt);
    if ("hdfs".equals(dataSource)) {
        if ("bayes".equalsIgnoreCase(classifierType)) {
            log.info("Using Bayes Classifier");
            algorithm = new BayesAlgorithm();
            datastore = new InMemoryBayesDatastore(params);
        } else if ("cbayes".equalsIgnoreCase(classifierType)) {
            log.info("Using Complementary Bayes Classifier");
            algorithm = new CBayesAlgorithm();
            datastore = new InMemoryBayesDatastore(params);
        } else {
            throw new IllegalArgumentException("Unrecognized classifier type: " + classifierType);
        }

    } else {
        throw new IllegalArgumentException("Unrecognized dataSource type: " + dataSource);
    }
    ClassifierContext classifier = new ClassifierContext(algorithm, datastore);
    classifier.initialize();
    String defaultCat = "unknown";
    if (cmdLine.hasOption(defaultCatOpt)) {
        defaultCat = (String) cmdLine.getValue(defaultCatOpt);
    }
    File docPath = new File((String) cmdLine.getValue(classifyOpt));
    String encoding = "UTF-8";
    if (cmdLine.hasOption(encodingOpt)) {
        encoding = (String) cmdLine.getValue(encodingOpt);
    }
    Analyzer analyzer = null;
    if (cmdLine.hasOption(analyzerOpt)) {
        analyzer = ClassUtils.instantiateAs((String) cmdLine.getValue(analyzerOpt), Analyzer.class);
    }
    if (analyzer == null) {
        analyzer = new StandardAnalyzer(Version.LUCENE_31);
    }

    log.info("Converting input document to proper format");

    String[] document = BayesFileFormatter.readerToDocument(analyzer,
            Files.newReader(docPath, Charset.forName(encoding)));
    StringBuilder line = new StringBuilder();
    for (String token : document) {
        line.append(token).append(' ');
    }

    List<String> doc = new NGrams(line.toString(), gramSize).generateNGramsWithoutLabel();

    log.info("Done converting");
    log.info("Classifying document: {}", docPath);
    ClassifierResult category = classifier.classifyDocument(doc.toArray(new String[doc.size()]), defaultCat);
    log.info("Category for {} is {}", docPath, category);

}

From source file:org.apache.mahout.classifier.df.BreimanExample.java

@Override
public int run(String[] args) throws IOException {

    DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
    ArgumentBuilder abuilder = new ArgumentBuilder();
    GroupBuilder gbuilder = new GroupBuilder();

    Option dataOpt = obuilder.withLongName("data").withShortName("d").withRequired(true)
            .withArgument(abuilder.withName("path").withMinimum(1).withMaximum(1).create())
            .withDescription("Data path").create();

    Option datasetOpt = obuilder.withLongName("dataset").withShortName("ds").withRequired(true)
            .withArgument(abuilder.withName("dataset").withMinimum(1).withMaximum(1).create())
            .withDescription("Dataset path").create();

    Option nbtreesOpt = obuilder.withLongName("nbtrees").withShortName("t").withRequired(true)
            .withArgument(abuilder.withName("nbtrees").withMinimum(1).withMaximum(1).create())
            .withDescription("Number of trees to grow, each iteration").create();

    Option nbItersOpt = obuilder.withLongName("iterations").withShortName("i").withRequired(true)
            .withArgument(abuilder.withName("numIterations").withMinimum(1).withMaximum(1).create())
            .withDescription("Number of times to repeat the test").create();

    Option helpOpt = obuilder.withLongName("help").withDescription("Print out help").withShortName("h")
            .create();/*from w ww.j a va2s  . c  o  m*/

    Group group = gbuilder.withName("Options").withOption(dataOpt).withOption(datasetOpt).withOption(nbItersOpt)
            .withOption(nbtreesOpt).withOption(helpOpt).create();

    Path dataPath;
    Path datasetPath;
    int nbTrees;
    int nbIterations;

    try {
        Parser parser = new Parser();
        parser.setGroup(group);
        CommandLine cmdLine = parser.parse(args);

        if (cmdLine.hasOption("help")) {
            CommandLineUtil.printHelp(group);
            return -1;
        }

        String dataName = cmdLine.getValue(dataOpt).toString();
        String datasetName = cmdLine.getValue(datasetOpt).toString();
        nbTrees = Integer.parseInt(cmdLine.getValue(nbtreesOpt).toString());
        nbIterations = Integer.parseInt(cmdLine.getValue(nbItersOpt).toString());

        dataPath = new Path(dataName);
        datasetPath = new Path(datasetName);
    } catch (OptionException e) {
        log.error("Error while parsing options", e);
        CommandLineUtil.printHelp(group);
        return -1;
    }

    // load the data
    FileSystem fs = dataPath.getFileSystem(new Configuration());
    Dataset dataset = Dataset.load(getConf(), datasetPath);
    Data data = DataLoader.loadData(dataset, fs, dataPath);

    // take m to be the first integer less than log2(M) + 1, where M is the
    // number of inputs
    int m = (int) Math.floor(FastMath.log(2.0, data.getDataset().nbAttributes()) + 1);

    Random rng = RandomUtils.getRandom();
    for (int iteration = 0; iteration < nbIterations; iteration++) {
        log.info("Iteration {}", iteration);
        runIteration(rng, data, m, nbTrees);
    }

    log.info("********************************************");
    log.info("Random Input Test Error : {}", sumTestErrM / nbIterations);
    log.info("Single Input Test Error : {}", sumTestErrOne / nbIterations);
    log.info("Mean Random Input Time : {}", DFUtils.elapsedTime(sumTimeM / nbIterations));
    log.info("Mean Single Input Time : {}", DFUtils.elapsedTime(sumTimeOne / nbIterations));
    log.info("Mean Random Input Num Nodes : {}", numNodesM / nbIterations);
    log.info("Mean Single Input Num Nodes : {}", numNodesOne / nbIterations);

    return 0;
}

From source file:org.apache.mahout.classifier.df.mapreduce.Resampling.java

public int run(String[] args) throws Exception, ClassNotFoundException, InterruptedException {
    DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
    ArgumentBuilder abuilder = new ArgumentBuilder();
    GroupBuilder gbuilder = new GroupBuilder();

    Option dataOpt = obuilder.withLongName("data").withShortName("d").withRequired(true)
            .withArgument(abuilder.withName("path").withMinimum(1).withMaximum(1).create())
            .withDescription("Data path").create();

    Option dataPreprocessingOpt = obuilder.withLongName("dataPreprocessing").withShortName("dp")
            .withRequired(true).withArgument(abuilder.withName("path").withMinimum(1).withMaximum(1).create())
            .withDescription("Data Preprocessing path").create();

    Option datasetOpt = obuilder.withLongName("dataset").withShortName("ds").withRequired(true)
            .withArgument(abuilder.withName("dataset").withMinimum(1).withMaximum(1).create())
            .withDescription("Dataset path").create();

    Option timeOpt = obuilder.withLongName("time").withShortName("tm").withRequired(false)
            .withArgument(abuilder.withName("path").withMinimum(1).withMaximum(1).create())
            .withDescription("Time path").create();

    Option helpOpt = obuilder.withLongName("help").withShortName("h").withDescription("Print out help")
            .create();/*  w w  w .j  a  va 2  s.  c  om*/

    Option resamplingOpt = obuilder.withLongName("resampling").withShortName("rs").withRequired(true)
            .withArgument(abuilder.withName("resampling").withMinimum(1).withMaximum(1).create())
            .withDescription(
                    "The resampling technique (oversampling (overs), undersampling (unders) or SMOTE (smote))")
            .create();

    Option nbpartitionsOpt = obuilder.withLongName("nbpartitions").withShortName("p").withRequired(true)
            .withArgument(abuilder.withName("nbpartitions").withMinimum(1).withMaximum(1).create())
            .withDescription("Number of partitions").create();

    Option nposOpt = obuilder.withLongName("npos").withShortName("npos").withRequired(true)
            .withArgument(abuilder.withName("npos").withMinimum(1).withMaximum(1).create())
            .withDescription("Number of instances of the positive class").create();

    Option nnegOpt = obuilder.withLongName("nneg").withShortName("nneg").withRequired(true)
            .withArgument(abuilder.withName("nneg").withMinimum(1).withMaximum(1).create())
            .withDescription("Number of instances of the negative class").create();

    Option negclassOpt = obuilder.withLongName("negclass").withShortName("negclass").withRequired(true)
            .withArgument(abuilder.withName("negclass").withMinimum(1).withMaximum(1).create())
            .withDescription("Name of the negative class").create();

    Option posclassOpt = obuilder.withLongName("posclass").withShortName("posclass").withRequired(true)
            .withArgument(abuilder.withName("posclass").withMinimum(1).withMaximum(1).create())
            .withDescription("Name of the positive class").create();

    Group group = gbuilder.withName("Options").withOption(dataOpt).withOption(datasetOpt).withOption(timeOpt)
            .withOption(helpOpt).withOption(resamplingOpt).withOption(dataPreprocessingOpt)
            .withOption(nbpartitionsOpt).withOption(nposOpt).withOption(nnegOpt).withOption(negclassOpt)
            .withOption(posclassOpt).create();

    try {
        Parser parser = new Parser();
        parser.setGroup(group);
        CommandLine cmdLine = parser.parse(args);

        if (cmdLine.hasOption("help")) {
            CommandLineUtil.printHelp(group);
            return -1;
        }

        dataName = cmdLine.getValue(dataOpt).toString();
        String datasetName = cmdLine.getValue(datasetOpt).toString();
        dataPreprocessing = cmdLine.getValue(dataPreprocessingOpt).toString();
        String resampling = cmdLine.getValue(resamplingOpt).toString();
        partitions = Integer.parseInt(cmdLine.getValue(nbpartitionsOpt).toString());
        npos = Integer.parseInt(cmdLine.getValue(nposOpt).toString());
        nneg = Integer.parseInt(cmdLine.getValue(nnegOpt).toString());
        negclass = cmdLine.getValue(negclassOpt).toString();
        posclass = cmdLine.getValue(posclassOpt).toString();

        if (resampling.equalsIgnoreCase("overs")) {
            withOversampling = true;
        } else if (resampling.equalsIgnoreCase("unders")) {
            withUndersampling = true;
        } else if (resampling.equalsIgnoreCase("smote")) {
            withSmote = true;
        }

        if (cmdLine.hasOption(timeOpt)) {
            preprocessingTimeIsStored = true;
            timeName = cmdLine.getValue(timeOpt).toString();
        }

        if (log.isDebugEnabled()) {
            log.debug("data : {}", dataName);
            log.debug("dataset : {}", datasetName);
            log.debug("time : {}", timeName);
            log.debug("Oversampling : {}", withOversampling);
            log.debug("Undersampling : {}", withUndersampling);
            log.debug("SMOTE : {}", withSmote);
        }

        dataPath = new Path(dataName);
        datasetPath = new Path(datasetName);
        dataPreprocessingPath = new Path(dataPreprocessing);
        if (preprocessingTimeIsStored)
            timePath = new Path(timeName);

    } catch (OptionException e) {
        log.error("Exception", e);
        CommandLineUtil.printHelp(group);
        return -1;
    }

    if (withOversampling) {
        overSampling();
    } else if (withUndersampling) {
        underSampling();
    } else if (withSmote) {
        smote();
    }

    return 0;
}

From source file:org.apache.mahout.classifier.df.tools.ForestVisualizer.java

public static void main(String[] args) {
    DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
    ArgumentBuilder abuilder = new ArgumentBuilder();
    GroupBuilder gbuilder = new GroupBuilder();

    Option datasetOpt = obuilder.withLongName("dataset").withShortName("ds").withRequired(true)
            .withArgument(abuilder.withName("dataset").withMinimum(1).withMaximum(1).create())
            .withDescription("Dataset path").create();

    Option modelOpt = obuilder.withLongName("model").withShortName("m").withRequired(true)
            .withArgument(abuilder.withName("path").withMinimum(1).withMaximum(1).create())
            .withDescription("Path to the Decision Forest").create();

    Option attrNamesOpt = obuilder.withLongName("names").withShortName("n").withRequired(false)
            .withArgument(abuilder.withName("names").withMinimum(1).create())
            .withDescription("Optional, Attribute names").create();

    Option helpOpt = obuilder.withLongName("help").withShortName("h").withDescription("Print out help")
            .create();/*  w  w w  .j  a v  a  2s.  c o  m*/

    Group group = gbuilder.withName("Options").withOption(datasetOpt).withOption(modelOpt)
            .withOption(attrNamesOpt).withOption(helpOpt).create();

    try {
        Parser parser = new Parser();
        parser.setGroup(group);
        CommandLine cmdLine = parser.parse(args);

        if (cmdLine.hasOption("help")) {
            CommandLineUtil.printHelp(group);
            return;
        }

        String datasetName = cmdLine.getValue(datasetOpt).toString();
        String modelName = cmdLine.getValue(modelOpt).toString();
        String[] attrNames = null;
        if (cmdLine.hasOption(attrNamesOpt)) {
            Collection<String> names = (Collection<String>) cmdLine.getValues(attrNamesOpt);
            if (!names.isEmpty()) {
                attrNames = new String[names.size()];
                names.toArray(attrNames);
            }
        }

        print(modelName, datasetName, attrNames);
    } catch (Exception e) {
        log.error("Exception", e);
        CommandLineUtil.printHelp(group);
    }
}