Example usage for org.apache.mahout.common.commandline DefaultOptionCreator helpOption

List of usage examples for org.apache.mahout.common.commandline DefaultOptionCreator helpOption

Introduction

In this page you can find the example usage for org.apache.mahout.common.commandline DefaultOptionCreator helpOption.

Prototype

public static Option helpOption() 

Source Link

Document

Returns a default command line option for help.

Usage

From source file:chapter7.src.InputDriver.java

License:Apache License

public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
    DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
    ArgumentBuilder abuilder = new ArgumentBuilder();
    GroupBuilder gbuilder = new GroupBuilder();

    Option inputOpt = DefaultOptionCreator.inputOption().withRequired(false).create();
    Option outputOpt = DefaultOptionCreator.outputOption().withRequired(false).create();
    Option vectorOpt = obuilder.withLongName("vector").withRequired(false)
            .withArgument(abuilder.withName("v").withMinimum(1).withMaximum(1).create())
            .withDescription("The vector implementation to use.").withShortName("v").create();

    Option helpOpt = DefaultOptionCreator.helpOption();

    Group group = gbuilder.withName("Options").withOption(inputOpt).withOption(outputOpt).withOption(vectorOpt)
            .withOption(helpOpt).create();

    try {/*from   ww  w .j  av  a2s  .  c  o  m*/
        Parser parser = new Parser();
        parser.setGroup(group);
        CommandLine cmdLine = parser.parse(args);
        if (cmdLine.hasOption(helpOpt)) {
            CommandLineUtil.printHelp(group);
            return;
        }

        Path input = new Path(cmdLine.getValue(inputOpt, "testdata").toString());
        Path output = new Path(cmdLine.getValue(outputOpt, "output").toString());
        String vectorClassName = cmdLine.getValue(vectorOpt, "org.apache.mahout.math.RandomAccessSparseVector")
                .toString();
        //runJob(input, output, vectorClassName);
    } catch (OptionException e) {
        InputDriver.log.error("Exception parsing command line: ", e);
        CommandLineUtil.printHelp(group);
    }
}

From source file:com.elex.dmp.lda.InMemoryCollapsedVariationalBayes0.java

License:Apache License

public static int main2(String[] args, Configuration conf) throws Exception {
    DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
    ArgumentBuilder abuilder = new ArgumentBuilder();
    GroupBuilder gbuilder = new GroupBuilder();

    Option helpOpt = DefaultOptionCreator.helpOption();

    Option inputDirOpt = obuilder.withLongName("input").withRequired(true)
            .withArgument(abuilder.withName("input").withMinimum(1).withMaximum(1).create())
            .withDescription("The Directory on HDFS containing the collapsed, properly formatted files having "
                    + "one doc per line")
            .withShortName("i").create();

    Option dictOpt = obuilder.withLongName("dictionary").withRequired(false)
            .withArgument(abuilder.withName("dictionary").withMinimum(1).withMaximum(1).create())
            .withDescription("The path to the term-dictionary format is ... ").withShortName("d").create();

    Option dfsOpt = obuilder.withLongName("dfs").withRequired(false)
            .withArgument(abuilder.withName("dfs").withMinimum(1).withMaximum(1).create())
            .withDescription("HDFS namenode URI").withShortName("dfs").create();

    Option numTopicsOpt = obuilder.withLongName("numTopics").withRequired(true)
            .withArgument(abuilder.withName("numTopics").withMinimum(1).withMaximum(1).create())
            .withDescription("Number of topics to learn").withShortName("top").create();

    Option outputTopicFileOpt = obuilder.withLongName("topicOutputFile").withRequired(true)
            .withArgument(abuilder.withName("topicOutputFile").withMinimum(1).withMaximum(1).create())
            .withDescription("File to write out p(term | topic)").withShortName("to").create();

    Option outputDocFileOpt = obuilder.withLongName("docOutputFile").withRequired(true)
            .withArgument(abuilder.withName("docOutputFile").withMinimum(1).withMaximum(1).create())
            .withDescription("File to write out p(topic | docid)").withShortName("do").create();

    Option alphaOpt = obuilder.withLongName("alpha").withRequired(false)
            .withArgument(abuilder.withName("alpha").withMinimum(1).withMaximum(1).withDefault("0.1").create())
            .withDescription("Smoothing parameter for p(topic | document) prior").withShortName("a").create();

    Option etaOpt = obuilder.withLongName("eta").withRequired(false)
            .withArgument(abuilder.withName("eta").withMinimum(1).withMaximum(1).withDefault("0.1").create())
            .withDescription("Smoothing parameter for p(term | topic)").withShortName("e").create();

    Option maxIterOpt = obuilder.withLongName("maxIterations").withRequired(false)
            .withArgument(/*from ww  w  .ja va 2s  .com*/
                    abuilder.withName("maxIterations").withMinimum(1).withMaximum(1).withDefault(10).create())
            .withDescription("Maximum number of training passes").withShortName("m").create();

    Option modelCorpusFractionOption = obuilder.withLongName("modelCorpusFraction").withRequired(false)
            .withArgument(abuilder.withName("modelCorpusFraction").withMinimum(1).withMaximum(1)
                    .withDefault(0.0).create())
            .withShortName("mcf").withDescription("For online updates, initial value of |model|/|corpus|")
            .create();

    Option burnInOpt = obuilder.withLongName("burnInIterations").withRequired(false)
            .withArgument(
                    abuilder.withName("burnInIterations").withMinimum(1).withMaximum(1).withDefault(5).create())
            .withDescription("Minimum number of iterations").withShortName("b").create();

    Option convergenceOpt = obuilder.withLongName("convergence").withRequired(false)
            .withArgument(
                    abuilder.withName("convergence").withMinimum(1).withMaximum(1).withDefault("0.0").create())
            .withDescription("Fractional rate of perplexity to consider convergence").withShortName("c")
            .create();

    Option reInferDocTopicsOpt = obuilder.withLongName("reInferDocTopics").withRequired(false)
            .withArgument(abuilder.withName("reInferDocTopics").withMinimum(1).withMaximum(1).withDefault("no")
                    .create())
            .withDescription("re-infer p(topic | doc) : [no | randstart | continue]").withShortName("rdt")
            .create();

    Option numTrainThreadsOpt = obuilder
            .withLongName("numTrainThreads").withRequired(false).withArgument(abuilder
                    .withName("numTrainThreads").withMinimum(1).withMaximum(1).withDefault("1").create())
            .withDescription("number of threads to train with").withShortName("ntt").create();

    Option numUpdateThreadsOpt = obuilder.withLongName("numUpdateThreads").withRequired(false)
            .withArgument(abuilder.withName("numUpdateThreads").withMinimum(1).withMaximum(1).withDefault("1")
                    .create())
            .withDescription("number of threads to update the model with").withShortName("nut").create();

    Option verboseOpt = obuilder.withLongName("verbose").withRequired(false)
            .withArgument(
                    abuilder.withName("verbose").withMinimum(1).withMaximum(1).withDefault("false").create())
            .withDescription("print verbose information, like top-terms in each topic, during iteration")
            .withShortName("v").create();

    Group group = gbuilder.withName("Options").withOption(inputDirOpt).withOption(numTopicsOpt)
            .withOption(alphaOpt).withOption(etaOpt).withOption(maxIterOpt).withOption(burnInOpt)
            .withOption(convergenceOpt).withOption(dictOpt).withOption(reInferDocTopicsOpt)
            .withOption(outputDocFileOpt).withOption(outputTopicFileOpt).withOption(dfsOpt)
            .withOption(numTrainThreadsOpt).withOption(numUpdateThreadsOpt)
            .withOption(modelCorpusFractionOption).withOption(verboseOpt).create();

    try {
        Parser parser = new Parser();

        parser.setGroup(group);
        parser.setHelpOption(helpOpt);
        CommandLine cmdLine = parser.parse(args);
        if (cmdLine.hasOption(helpOpt)) {
            CommandLineUtil.printHelp(group);
            return -1;
        }

        String inputDirString = (String) cmdLine.getValue(inputDirOpt);
        String dictDirString = cmdLine.hasOption(dictOpt) ? (String) cmdLine.getValue(dictOpt) : null;
        int numTopics = Integer.parseInt((String) cmdLine.getValue(numTopicsOpt));
        double alpha = Double.parseDouble((String) cmdLine.getValue(alphaOpt));
        double eta = Double.parseDouble((String) cmdLine.getValue(etaOpt));
        int maxIterations = Integer.parseInt((String) cmdLine.getValue(maxIterOpt));
        int burnInIterations = (Integer) cmdLine.getValue(burnInOpt);
        double minFractionalErrorChange = Double.parseDouble((String) cmdLine.getValue(convergenceOpt));
        int numTrainThreads = Integer.parseInt((String) cmdLine.getValue(numTrainThreadsOpt));
        int numUpdateThreads = Integer.parseInt((String) cmdLine.getValue(numUpdateThreadsOpt));
        String topicOutFile = (String) cmdLine.getValue(outputTopicFileOpt);
        String docOutFile = (String) cmdLine.getValue(outputDocFileOpt);
        String reInferDocTopics = (String) cmdLine.getValue(reInferDocTopicsOpt);
        boolean verbose = Boolean.parseBoolean((String) cmdLine.getValue(verboseOpt));
        double modelCorpusFraction = (Double) cmdLine.getValue(modelCorpusFractionOption);

        long start = System.nanoTime();

        if (conf.get("fs.default.name") == null) {
            String dfsNameNode = (String) cmdLine.getValue(dfsOpt);
            conf.set("fs.default.name", dfsNameNode);
        }
        String[] terms = loadDictionary(dictDirString, conf);
        logTime("dictionary loading", System.nanoTime() - start);
        start = System.nanoTime();
        Matrix corpus = loadVectors(inputDirString, conf);
        logTime("vector seqfile corpus loading", System.nanoTime() - start);
        start = System.nanoTime();
        InMemoryCollapsedVariationalBayes0 cvb0 = new InMemoryCollapsedVariationalBayes0(corpus, terms,
                numTopics, alpha, eta, numTrainThreads, numUpdateThreads, modelCorpusFraction, 1234);
        logTime("cvb0 init", System.nanoTime() - start);

        start = System.nanoTime();
        cvb0.setVerbose(verbose);
        cvb0.iterateUntilConvergence(minFractionalErrorChange, maxIterations, burnInIterations);
        logTime("total training time", System.nanoTime() - start);

        if ("randstart".equalsIgnoreCase(reInferDocTopics)) {
            cvb0.inferDocuments(0.0, 100, true);
        } else if ("continue".equalsIgnoreCase(reInferDocTopics)) {
            cvb0.inferDocuments(0.0, 100, false);
        }

        start = System.nanoTime();
        cvb0.writeModel(new Path(topicOutFile));
        DistributedRowMatrixWriter.write(new Path(docOutFile), conf, cvb0.docTopicCounts);
        logTime("printTopics", System.nanoTime() - start);
    } catch (OptionException e) {
        log.error("Error while parsing options", e);
        CommandLineUtil.printHelp(group);
    }
    return 0;
}

From source file:com.tamingtext.classifier.maxent.TestMaxent.java

License:Apache License

/**
 * @param args/*w w  w .  jav a2  s.  co  m*/
 */
public static void main(String[] args) throws IOException {
    DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
    ArgumentBuilder abuilder = new ArgumentBuilder();
    GroupBuilder gbuilder = new GroupBuilder();

    Option helpOpt = DefaultOptionCreator.helpOption();

    Option inputDirOpt = obuilder.withLongName("input").withRequired(true)
            .withArgument(abuilder.withName("input").withMinimum(1).withMaximum(1).create())
            .withDescription("The input directory").withShortName("i").create();

    Option modelOpt = obuilder.withLongName("model").withRequired(true)
            .withArgument(abuilder.withName("index").withMinimum(1).withMaximum(1).create())
            .withDescription("The directory containing the index model").withShortName("m").create();

    Group group = gbuilder.withName("Options").withOption(helpOpt).withOption(inputDirOpt).withOption(modelOpt)
            .create();

    try {
        Parser parser = new Parser();

        parser.setGroup(group);
        parser.setHelpOption(helpOpt);
        CommandLine cmdLine = parser.parse(args);
        if (cmdLine.hasOption(helpOpt)) {
            CommandLineUtil.printHelp(group);
            return;
        }

        String inputPath = (String) cmdLine.getValue(inputDirOpt);
        File f = new File(inputPath);
        if (!f.isDirectory()) {
            throw new IllegalArgumentException(f + " is not a directory or does not exit");
        }
        File[] inputFiles = FileUtil.buildFileList(f);

        File modelDir = new File((String) cmdLine.getValue(modelOpt));
        execute(inputFiles, modelDir);
    } catch (OptionException e) {
        log.error("Error while parsing options", e);
    }

}

From source file:com.tamingtext.classifier.maxent.TrainMaxent.java

License:Apache License

public static void main(String[] args) throws Exception {

    DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
    ArgumentBuilder abuilder = new ArgumentBuilder();
    GroupBuilder gbuilder = new GroupBuilder();

    Option helpOpt = DefaultOptionCreator.helpOption();

    Option inputDirOpt = obuilder.withLongName("input").withRequired(true)
            .withArgument(abuilder.withName("input").withMinimum(1).withMaximum(1).create())
            .withDescription("The input directory, containing properly formatted files: "
                    + "One doc per line, first entry on the line is the label, rest is the evidence")
            .withShortName("i").create();

    Option outputOpt = obuilder.withLongName("output").withRequired(true)
            .withArgument(abuilder.withName("output").withMinimum(1).withMaximum(1).create())
            .withDescription("The output directory").withShortName("o").create();

    Group group = gbuilder.withName("Options").withOption(helpOpt).withOption(inputDirOpt).withOption(outputOpt)
            .create();//from   w w w  . j  a va 2 s . c o m

    //.withOption(gramSizeOpt).withOption(typeOpt)

    try {
        Parser parser = new Parser();

        parser.setGroup(group);
        parser.setHelpOption(helpOpt);
        CommandLine cmdLine = parser.parse(args);
        if (cmdLine.hasOption(helpOpt)) {
            CommandLineUtil.printHelp(group);
            return;
        }

        String inputPath = (String) cmdLine.getValue(inputDirOpt);
        String outputPath = (String) cmdLine.getValue(outputOpt);
        TrainMaxent trainer = new TrainMaxent();
        trainer.train(inputPath, outputPath);
    } catch (OptionException e) {
        log.error("Error while parsing options", e);
    }
}

From source file:com.tamingtext.classifier.mlt.MoreLikeThisCategorizer.java

License:Apache License

public static void main(String[] args) throws Exception {
    DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
    ArgumentBuilder abuilder = new ArgumentBuilder();
    GroupBuilder gbuilder = new GroupBuilder();

    Option helpOpt = DefaultOptionCreator.helpOption();

    Option inputDirOpt = obuilder.withLongName("input").withRequired(true)
            .withArgument(abuilder.withName("input").withMinimum(1).withMaximum(1).create())
            .withDescription("The input file to classify").withShortName("i").create();

    Option modelOpt = obuilder.withLongName("model").withRequired(true)
            .withArgument(abuilder.withName("index").withMinimum(1).withMaximum(1).create())
            .withDescription("The directory containing the index model").withShortName("m").create();

    Option categoryFieldOpt = obuilder.withLongName("categoryField").withRequired(true)
            .withArgument(abuilder.withName("index").withMinimum(1).withMaximum(1).create())
            .withDescription("Name of the field containing category information").withShortName("catf")
            .create();/*w w w.j  ava  2  s. com*/

    Option contentFieldOpt = obuilder.withLongName("contentField").withRequired(true)
            .withArgument(abuilder.withName("index").withMinimum(1).withMaximum(1).create())
            .withDescription("Name of the field containing content information").withShortName("contf")
            .create();

    Option maxResultsOpt = obuilder.withLongName("maxResults").withRequired(false)
            .withArgument(abuilder.withName("gramSize").withMinimum(1).withMaximum(1).create())
            .withDescription("Number of results to retrive, default: 10 ").withShortName("r").create();

    Option gramSizeOpt = obuilder.withLongName("gramSize").withRequired(false)
            .withArgument(abuilder.withName("gramSize").withMinimum(1).withMaximum(1).create())
            .withDescription("Size of the n-gram. Default Value: 1 ").withShortName("ng").create();

    Option typeOpt = obuilder.withLongName("classifierType").withRequired(false)
            .withArgument(abuilder.withName("classifierType").withMinimum(1).withMaximum(1).create())
            .withDescription("Type of classifier: knn|tfidf. Default: bayes").withShortName("type").create();

    Group group = gbuilder.withName("Options").withOption(gramSizeOpt).withOption(helpOpt)
            .withOption(inputDirOpt).withOption(modelOpt).withOption(typeOpt).withOption(contentFieldOpt)
            .withOption(categoryFieldOpt).withOption(maxResultsOpt).create();

    try {
        Parser parser = new Parser();

        parser.setGroup(group);
        parser.setHelpOption(helpOpt);
        CommandLine cmdLine = parser.parse(args);
        if (cmdLine.hasOption(helpOpt)) {
            CommandLineUtil.printHelp(group);
            return;
        }

        String classifierType = (String) cmdLine.getValue(typeOpt);

        if (cmdLine.hasOption(gramSizeOpt)) {

        }

        int gramSize = 1;
        if (cmdLine.hasOption(gramSizeOpt)) {
            gramSize = Integer.parseInt((String) cmdLine.getValue(gramSizeOpt));
        }

        int maxResults = 10;
        if (cmdLine.hasOption(maxResultsOpt)) {
            maxResults = Integer.parseInt((String) cmdLine.getValue(maxResultsOpt));
        }

        String inputPath = (String) cmdLine.getValue(inputDirOpt);
        String modelPath = (String) cmdLine.getValue(modelOpt);
        String categoryField = (String) cmdLine.getValue(categoryFieldOpt);
        String contentField = (String) cmdLine.getValue(contentFieldOpt);

        MatchMode mode;

        if ("knn".equalsIgnoreCase(classifierType)) {
            mode = MatchMode.KNN;
        } else if ("tfidf".equalsIgnoreCase(classifierType)) {
            mode = MatchMode.TFIDF;
        } else {
            throw new IllegalArgumentException("Unkown classifierType: " + classifierType);
        }

        Reader reader = new FileReader(inputPath);
        Directory directory = FSDirectory.open(new File(modelPath));
        IndexReader indexReader = IndexReader.open(directory);
        MoreLikeThisCategorizer categorizer = new MoreLikeThisCategorizer(indexReader, categoryField);
        categorizer.setMatchMode(mode);
        categorizer.setFieldNames(new String[] { contentField });
        categorizer.setMaxResults(maxResults);

        if (gramSize > 1)
            categorizer.setNgramSize(gramSize);

        CategoryHits[] categories = categorizer.categorize(reader);
        for (CategoryHits c : categories) {
            System.out.println(c.getLabel() + "\t" + c.getHits() + "\t" + c.getScore());
        }

    } catch (OptionException e) {
        log.error("Error while parsing options", e);
    }
}

From source file:com.tamingtext.classifier.mlt.TestMoreLikeThis.java

License:Apache License

public static void main(String[] args) throws Exception {
    DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
    ArgumentBuilder abuilder = new ArgumentBuilder();
    GroupBuilder gbuilder = new GroupBuilder();

    Option helpOpt = DefaultOptionCreator.helpOption();

    Option inputDirOpt = obuilder.withLongName("input").withRequired(true)
            .withArgument(abuilder.withName("input").withMinimum(1).withMaximum(1).create())
            .withDescription("The input directory").withShortName("i").create();

    Option modelOpt = obuilder.withLongName("model").withRequired(true)
            .withArgument(abuilder.withName("index").withMinimum(1).withMaximum(1).create())
            .withDescription("The directory containing the index model").withShortName("m").create();

    Option categoryFieldOpt = obuilder.withLongName("categoryField").withRequired(true)
            .withArgument(abuilder.withName("index").withMinimum(1).withMaximum(1).create())
            .withDescription("Name of the field containing category information").withShortName("catf")
            .create();/*from ww w .  j  av  a 2  s.co  m*/

    Option contentFieldOpt = obuilder.withLongName("contentField").withRequired(true)
            .withArgument(abuilder.withName("index").withMinimum(1).withMaximum(1).create())
            .withDescription("Name of the field containing content information").withShortName("contf")
            .create();

    Option maxResultsOpt = obuilder.withLongName("maxResults").withRequired(false)
            .withArgument(abuilder.withName("gramSize").withMinimum(1).withMaximum(1).create())
            .withDescription("Number of results to retrive, default: 10 ").withShortName("r").create();

    Option gramSizeOpt = obuilder.withLongName("gramSize").withRequired(false)
            .withArgument(abuilder.withName("gramSize").withMinimum(1).withMaximum(1).create())
            .withDescription("Size of the n-gram. Default Value: 1 ").withShortName("ng").create();

    Option typeOpt = obuilder.withLongName("classifierType").withRequired(false)
            .withArgument(abuilder.withName("classifierType").withMinimum(1).withMaximum(1).create())
            .withDescription("Type of classifier: knn|tfidf. Default: bayes").withShortName("type").create();

    Group group = gbuilder.withName("Options").withOption(gramSizeOpt).withOption(helpOpt)
            .withOption(inputDirOpt).withOption(modelOpt).withOption(typeOpt).withOption(contentFieldOpt)
            .withOption(categoryFieldOpt).withOption(maxResultsOpt).create();

    try {
        Parser parser = new Parser();

        parser.setGroup(group);
        parser.setHelpOption(helpOpt);
        CommandLine cmdLine = parser.parse(args);
        if (cmdLine.hasOption(helpOpt)) {
            CommandLineUtil.printHelp(group);
            return;
        }

        String classifierType = (String) cmdLine.getValue(typeOpt);

        int gramSize = 1;
        if (cmdLine.hasOption(gramSizeOpt)) {
            gramSize = Integer.parseInt((String) cmdLine.getValue(gramSizeOpt));
        }

        int maxResults = 10;
        if (cmdLine.hasOption(maxResultsOpt)) {
            maxResults = Integer.parseInt((String) cmdLine.getValue(maxResultsOpt));
        }

        String inputPath = (String) cmdLine.getValue(inputDirOpt);
        String modelPath = (String) cmdLine.getValue(modelOpt);
        String categoryField = (String) cmdLine.getValue(categoryFieldOpt);
        String contentField = (String) cmdLine.getValue(contentFieldOpt);

        MatchMode mode;

        if ("knn".equalsIgnoreCase(classifierType)) {
            mode = MatchMode.KNN;
        } else if ("tfidf".equalsIgnoreCase(classifierType)) {
            mode = MatchMode.TFIDF;
        } else {
            throw new IllegalArgumentException("Unkown classifierType: " + classifierType);
        }

        Directory directory = FSDirectory.open(new File(modelPath));
        IndexReader indexReader = IndexReader.open(directory);
        Analyzer analyzer //<co id="mlt.analyzersetup"/>
                = new EnglishAnalyzer(Version.LUCENE_36);

        MoreLikeThisCategorizer categorizer = new MoreLikeThisCategorizer(indexReader, categoryField);
        categorizer.setAnalyzer(analyzer);
        categorizer.setMatchMode(mode);
        categorizer.setFieldNames(new String[] { contentField });
        categorizer.setMaxResults(maxResults);
        categorizer.setNgramSize(gramSize);

        File f = new File(inputPath);
        if (!f.isDirectory()) {
            throw new IllegalArgumentException(f + " is not a directory or does not exit");
        }

        File[] inputFiles = FileUtil.buildFileList(f);

        String line = null;
        //<start id="lucene.examples.mlt.test"/>
        final ClassifierResult UNKNOWN = new ClassifierResult("unknown", 1.0);

        ResultAnalyzer resultAnalyzer = //<co id="co.mlt.ra"/>
                new ResultAnalyzer(categorizer.getCategories(), UNKNOWN.getLabel());

        for (File ff : inputFiles) { //<co id="co.mlt.read"/>
            BufferedReader in = new BufferedReader(new InputStreamReader(new FileInputStream(ff), "UTF-8"));
            while ((line = in.readLine()) != null) {
                String[] parts = line.split("\t");
                if (parts.length != 2) {
                    continue;
                }

                CategoryHits[] hits //<co id="co.mlt.cat"/>
                        = categorizer.categorize(new StringReader(parts[1]));
                ClassifierResult result = hits.length > 0 ? hits[0] : UNKNOWN;
                resultAnalyzer.addInstance(parts[0], result); //<co id="co.mlt.an"/>
            }

            in.close();
        }

        System.out.println(resultAnalyzer.toString());//<co id="co.mlt.print"/>
        /*
        <calloutlist>
          <callout arearefs="co.mlt.ra">Create <classname>ResultAnalyzer</classname></callout>
          <callout arearefs="co.mlt.read">Read Test data</callout>
          <callout arearefs="co.mlt.cat">Categorize</callout>
          <callout arearefs="co.mlt.an">Collect Results</callout>
          <callout arearefs="co.mlt.print">Display Results</callout>
        </calloutlist>
        */
        //<end id="lucene.examples.mlt.test"/>
    } catch (OptionException e) {
        log.error("Error while parsing options", e);
    }
}

From source file:com.tamingtext.classifier.mlt.TrainMoreLikeThis.java

License:Apache License

public static void main(String[] args) throws Exception {
    DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
    ArgumentBuilder abuilder = new ArgumentBuilder();
    GroupBuilder gbuilder = new GroupBuilder();

    Option helpOpt = DefaultOptionCreator.helpOption();

    Option inputDirOpt = obuilder.withLongName("input").withRequired(true)
            .withArgument(abuilder.withName("input").withMinimum(1).withMaximum(1).create())
            .withDescription("The input directory, containing properly formatted files: "
                    + "One doc per line, first entry on the line is the label, rest is the evidence")
            .withShortName("i").create();

    Option outputOpt = obuilder.withLongName("output").withRequired(true)
            .withArgument(abuilder.withName("output").withMinimum(1).withMaximum(1).create())
            .withDescription("The output directory").withShortName("o").create();

    Option gramSizeOpt = obuilder.withLongName("gramSize").withRequired(false)
            .withArgument(abuilder.withName("gramSize").withMinimum(1).withMaximum(1).create())
            .withDescription("Size of the n-gram. Default Value: 1 ").withShortName("ng").create();

    Option typeOpt = obuilder.withLongName("classifierType").withRequired(false)
            .withArgument(abuilder.withName("classifierType").withMinimum(1).withMaximum(1).create())
            .withDescription("Type of classifier: knn|tfidf.").withShortName("type").create();

    Group group = gbuilder.withName("Options").withOption(gramSizeOpt).withOption(helpOpt)
            .withOption(inputDirOpt).withOption(outputOpt).withOption(typeOpt).create();

    try {//from   ww w  .j  av a 2  s .c  o  m
        Parser parser = new Parser();

        parser.setGroup(group);
        parser.setHelpOption(helpOpt);
        CommandLine cmdLine = parser.parse(args);
        if (cmdLine.hasOption(helpOpt)) {
            CommandLineUtil.printHelp(group);
            return;
        }

        String classifierType = (String) cmdLine.getValue(typeOpt);

        int gramSize = 1;
        if (cmdLine.hasOption(gramSizeOpt)) {
            gramSize = Integer.parseInt((String) cmdLine.getValue(gramSizeOpt));
        }

        String inputPath = (String) cmdLine.getValue(inputDirOpt);
        String outputPath = (String) cmdLine.getValue(outputOpt);
        TrainMoreLikeThis trainer = new TrainMoreLikeThis();
        MatchMode mode;

        if ("knn".equalsIgnoreCase(classifierType)) {
            mode = MatchMode.KNN;
        } else if ("tfidf".equalsIgnoreCase(classifierType)) {
            mode = MatchMode.TFIDF;
        } else {
            throw new IllegalArgumentException("Unkown classifierType: " + classifierType);
        }

        if (gramSize > 1)
            trainer.setNGramSize(gramSize);

        trainer.train(inputPath, outputPath, mode);

    } catch (OptionException e) {
        log.error("Error while parsing options", e);
    }
}

From source file:com.tamingtext.tagrecommender.CountStackOverflowTags.java

License:Apache License

public boolean parseArgs(String[] args) {
    DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
    ArgumentBuilder abuilder = new ArgumentBuilder();
    GroupBuilder gbuilder = new GroupBuilder();
    Option helpOpt = DefaultOptionCreator.helpOption();

    Option inputFileOpt = obuilder.withLongName("inputFile").withRequired(true)
            .withArgument(abuilder.withName("inputFile").withMinimum(1).withMaximum(1).create())
            .withDescription("The input file").withShortName("i").create();

    Option outputFileOpt = obuilder.withLongName("outputFile").withRequired(true)
            .withArgument(abuilder.withName("outputFile").withMinimum(1).withMaximum(1).create())
            .withDescription("The output file").withShortName("o").create();

    Option limitOpt = obuilder.withLongName("limit").withRequired(false)
            .withArgument(abuilder.withName("limit").withMinimum(1).withMaximum(1).create())
            .withDescription("Emit this many of the most frequent tags").withShortName("l").create();

    Option cutoffOpt = obuilder.withLongName("cutoff").withRequired(false)
            .withArgument(abuilder.withName("cutoff").withMinimum(1).withMaximum(1).create())
            .withDescription("Drop tags with a count less than this number").withShortName("c").create();

    Group group = gbuilder.withName("Options").withOption(inputFileOpt).withOption(outputFileOpt)
            .withOption(limitOpt).withOption(cutoffOpt).create();

    try {// w ww  .  jav  a 2s. c o m
        Parser parser = new Parser();
        parser.setGroup(group);
        CommandLine cmdLine = parser.parse(args);

        if (cmdLine.hasOption(helpOpt)) {
            CommandLineUtil.printHelp(group);
            return false;
        }

        inputFile = new File((String) cmdLine.getValue(inputFileOpt));
        countFile = new File((String) cmdLine.getValue(outputFileOpt));

        if (cmdLine.hasOption(limitOpt)) {
            limit = Integer.parseInt((String) cmdLine.getValue(limitOpt));
        }

        if (cmdLine.hasOption(cutoffOpt)) {
            cutoff = Integer.parseInt((String) cmdLine.getValue(cutoffOpt));
        }

    } catch (OptionException e) {
        log.error("Command-line option Exception", e);
        CommandLineUtil.printHelp(group);
        return false;
    }

    validate();
    return true;
}

From source file:com.tamingtext.tagrecommender.ExtractStackOverflowData.java

License:Apache License

public boolean parseArgs(String[] args) {
    DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
    ArgumentBuilder abuilder = new ArgumentBuilder();
    GroupBuilder gbuilder = new GroupBuilder();
    Option helpOpt = DefaultOptionCreator.helpOption();

    Option inputFileOpt = obuilder.withLongName("inputFile").withRequired(true)
            .withArgument(abuilder.withName("inputFile").withMinimum(1).withMaximum(1).create())
            .withDescription("The input file").withShortName("i").create();

    Option trainingOutputOpt = obuilder.withLongName("trainingOutputFile").withRequired(true)
            .withArgument(abuilder.withName("trainingOutputFile").withMinimum(1).withMaximum(1).create())
            .withDescription("The training data output file").withShortName("tr").create();

    Option testOutputOpt = obuilder.withLongName("testOutputFile").withRequired(true)
            .withArgument(abuilder.withName("testOutputFile").withMinimum(1).withMaximum(1).create())
            .withDescription("The test data output file").withShortName("te").create();

    Option trainingDataSizeOpt = obuilder.withLongName("trainingDataSize").withRequired(false)
            .withArgument(abuilder.withName("trainingDataSize").withMinimum(1).withMaximum(1).create())
            .withDescription("The number of questions to extract for training data").withShortName("trs")
            .create();//from  www.  jav a  2s.  com

    Option testDataSizeOpt = obuilder.withLongName("testDataSize").withRequired(false)
            .withArgument(abuilder.withName("testDataSize").withMinimum(1).withMaximum(1).create())
            .withDescription("The number of questions to extract for training data").withShortName("tes")
            .create();

    Group group = gbuilder.withName("Options").withOption(inputFileOpt).withOption(trainingOutputOpt)
            .withOption(testOutputOpt).withOption(trainingDataSizeOpt).withOption(testDataSizeOpt).create();

    try {
        Parser parser = new Parser();
        parser.setGroup(group);
        CommandLine cmdLine = parser.parse(args);

        if (cmdLine.hasOption(helpOpt)) {
            CommandLineUtil.printHelp(group);
            return false;
        }

        inputFile = new File((String) cmdLine.getValue(inputFileOpt));
        trainingOutputFile = new File((String) cmdLine.getValue(trainingOutputOpt));
        testOutputFile = new File((String) cmdLine.getValue(testOutputOpt));

        if (cmdLine.hasOption(trainingDataSizeOpt)) {
            trainingDataSize = Integer.parseInt((String) cmdLine.getValue(trainingDataSizeOpt));
        }

        if (cmdLine.hasOption(testDataSizeOpt)) {
            testDataSize = Integer.parseInt((String) cmdLine.getValue(testDataSizeOpt));
        }

    } catch (OptionException e) {
        log.error("Command-line option Exception", e);
        CommandLineUtil.printHelp(group);
        return false;
    }

    validate();
    return true;
}

From source file:com.tamingtext.tagrecommender.TestStackOverflowTagger.java

License:Apache License

public boolean parseArgs(String[] args) {
    DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
    ArgumentBuilder abuilder = new ArgumentBuilder();
    GroupBuilder gbuilder = new GroupBuilder();
    Option helpOpt = DefaultOptionCreator.helpOption();

    Option inputFileOpt = obuilder.withLongName("inputFile").withRequired(true)
            .withArgument(abuilder.withName("inputFile").withMinimum(1).withMaximum(1).create())
            .withDescription("The input file").withShortName("i").create();

    Option countFileOpt = obuilder.withLongName("countFile").withRequired(true)
            .withArgument(abuilder.withName("countFile").withMinimum(1).withMaximum(1).create())
            .withDescription("The tag count file").withShortName("c").create();

    Option outputFileOpt = obuilder.withLongName("outputFile").withRequired(true)
            .withArgument(abuilder.withName("outputFile").withMinimum(1).withMaximum(1).create())
            .withDescription("The output file").withShortName("c").create();

    Option solrUrlOpt = obuilder.withLongName("solrUrl").withRequired(true)
            .withArgument(abuilder.withName("solrUrl").withMinimum(1).withMaximum(1).create())
            .withDescription("URL of the solr server").withShortName("s").create();

    Group group = gbuilder.withName("Options").withOption(inputFileOpt).withOption(countFileOpt)
            .withOption(outputFileOpt).withOption(solrUrlOpt).create();

    try {/*from  w w  w  . j  av a 2 s  .c o  m*/
        Parser parser = new Parser();
        parser.setGroup(group);
        CommandLine cmdLine = parser.parse(args);

        if (cmdLine.hasOption(helpOpt)) {
            CommandLineUtil.printHelp(group);
            return false;
        }

        inputFile = new File((String) cmdLine.getValue(inputFileOpt));
        countFile = new File((String) cmdLine.getValue(countFileOpt));
        outputFile = new File((String) cmdLine.getValue(outputFileOpt));
        solrUrl = (String) cmdLine.getValue(solrUrlOpt);
        client = new TagRecommenderClient(solrUrl);
    } catch (OptionException e) {
        log.error("Command-line option Exception", e);
        CommandLineUtil.printHelp(group);
        return false;
    } catch (MalformedURLException e) {
        log.error("MalformedURLException", e);
        return false;
    }

    validate();
    return true;
}