List of usage examples for org.apache.mahout.common.commandline DefaultOptionCreator helpOption
public static Option helpOption()
From source file:chapter7.src.InputDriver.java
License:Apache License
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { DefaultOptionBuilder obuilder = new DefaultOptionBuilder(); ArgumentBuilder abuilder = new ArgumentBuilder(); GroupBuilder gbuilder = new GroupBuilder(); Option inputOpt = DefaultOptionCreator.inputOption().withRequired(false).create(); Option outputOpt = DefaultOptionCreator.outputOption().withRequired(false).create(); Option vectorOpt = obuilder.withLongName("vector").withRequired(false) .withArgument(abuilder.withName("v").withMinimum(1).withMaximum(1).create()) .withDescription("The vector implementation to use.").withShortName("v").create(); Option helpOpt = DefaultOptionCreator.helpOption(); Group group = gbuilder.withName("Options").withOption(inputOpt).withOption(outputOpt).withOption(vectorOpt) .withOption(helpOpt).create(); try {/*from ww w .j av a2s . c o m*/ Parser parser = new Parser(); parser.setGroup(group); CommandLine cmdLine = parser.parse(args); if (cmdLine.hasOption(helpOpt)) { CommandLineUtil.printHelp(group); return; } Path input = new Path(cmdLine.getValue(inputOpt, "testdata").toString()); Path output = new Path(cmdLine.getValue(outputOpt, "output").toString()); String vectorClassName = cmdLine.getValue(vectorOpt, "org.apache.mahout.math.RandomAccessSparseVector") .toString(); //runJob(input, output, vectorClassName); } catch (OptionException e) { InputDriver.log.error("Exception parsing command line: ", e); CommandLineUtil.printHelp(group); } }
From source file:com.elex.dmp.lda.InMemoryCollapsedVariationalBayes0.java
License:Apache License
public static int main2(String[] args, Configuration conf) throws Exception { DefaultOptionBuilder obuilder = new DefaultOptionBuilder(); ArgumentBuilder abuilder = new ArgumentBuilder(); GroupBuilder gbuilder = new GroupBuilder(); Option helpOpt = DefaultOptionCreator.helpOption(); Option inputDirOpt = obuilder.withLongName("input").withRequired(true) .withArgument(abuilder.withName("input").withMinimum(1).withMaximum(1).create()) .withDescription("The Directory on HDFS containing the collapsed, properly formatted files having " + "one doc per line") .withShortName("i").create(); Option dictOpt = obuilder.withLongName("dictionary").withRequired(false) .withArgument(abuilder.withName("dictionary").withMinimum(1).withMaximum(1).create()) .withDescription("The path to the term-dictionary format is ... ").withShortName("d").create(); Option dfsOpt = obuilder.withLongName("dfs").withRequired(false) .withArgument(abuilder.withName("dfs").withMinimum(1).withMaximum(1).create()) .withDescription("HDFS namenode URI").withShortName("dfs").create(); Option numTopicsOpt = obuilder.withLongName("numTopics").withRequired(true) .withArgument(abuilder.withName("numTopics").withMinimum(1).withMaximum(1).create()) .withDescription("Number of topics to learn").withShortName("top").create(); Option outputTopicFileOpt = obuilder.withLongName("topicOutputFile").withRequired(true) .withArgument(abuilder.withName("topicOutputFile").withMinimum(1).withMaximum(1).create()) .withDescription("File to write out p(term | topic)").withShortName("to").create(); Option outputDocFileOpt = obuilder.withLongName("docOutputFile").withRequired(true) .withArgument(abuilder.withName("docOutputFile").withMinimum(1).withMaximum(1).create()) .withDescription("File to write out p(topic | docid)").withShortName("do").create(); Option alphaOpt = obuilder.withLongName("alpha").withRequired(false) .withArgument(abuilder.withName("alpha").withMinimum(1).withMaximum(1).withDefault("0.1").create()) .withDescription("Smoothing parameter for p(topic | document) prior").withShortName("a").create(); Option etaOpt = obuilder.withLongName("eta").withRequired(false) .withArgument(abuilder.withName("eta").withMinimum(1).withMaximum(1).withDefault("0.1").create()) .withDescription("Smoothing parameter for p(term | topic)").withShortName("e").create(); Option maxIterOpt = obuilder.withLongName("maxIterations").withRequired(false) .withArgument(/*from ww w .ja va 2s .com*/ abuilder.withName("maxIterations").withMinimum(1).withMaximum(1).withDefault(10).create()) .withDescription("Maximum number of training passes").withShortName("m").create(); Option modelCorpusFractionOption = obuilder.withLongName("modelCorpusFraction").withRequired(false) .withArgument(abuilder.withName("modelCorpusFraction").withMinimum(1).withMaximum(1) .withDefault(0.0).create()) .withShortName("mcf").withDescription("For online updates, initial value of |model|/|corpus|") .create(); Option burnInOpt = obuilder.withLongName("burnInIterations").withRequired(false) .withArgument( abuilder.withName("burnInIterations").withMinimum(1).withMaximum(1).withDefault(5).create()) .withDescription("Minimum number of iterations").withShortName("b").create(); Option convergenceOpt = obuilder.withLongName("convergence").withRequired(false) .withArgument( abuilder.withName("convergence").withMinimum(1).withMaximum(1).withDefault("0.0").create()) .withDescription("Fractional rate of perplexity to consider convergence").withShortName("c") .create(); Option reInferDocTopicsOpt = obuilder.withLongName("reInferDocTopics").withRequired(false) .withArgument(abuilder.withName("reInferDocTopics").withMinimum(1).withMaximum(1).withDefault("no") .create()) .withDescription("re-infer p(topic | doc) : [no | randstart | continue]").withShortName("rdt") .create(); Option numTrainThreadsOpt = obuilder .withLongName("numTrainThreads").withRequired(false).withArgument(abuilder .withName("numTrainThreads").withMinimum(1).withMaximum(1).withDefault("1").create()) .withDescription("number of threads to train with").withShortName("ntt").create(); Option numUpdateThreadsOpt = obuilder.withLongName("numUpdateThreads").withRequired(false) .withArgument(abuilder.withName("numUpdateThreads").withMinimum(1).withMaximum(1).withDefault("1") .create()) .withDescription("number of threads to update the model with").withShortName("nut").create(); Option verboseOpt = obuilder.withLongName("verbose").withRequired(false) .withArgument( abuilder.withName("verbose").withMinimum(1).withMaximum(1).withDefault("false").create()) .withDescription("print verbose information, like top-terms in each topic, during iteration") .withShortName("v").create(); Group group = gbuilder.withName("Options").withOption(inputDirOpt).withOption(numTopicsOpt) .withOption(alphaOpt).withOption(etaOpt).withOption(maxIterOpt).withOption(burnInOpt) .withOption(convergenceOpt).withOption(dictOpt).withOption(reInferDocTopicsOpt) .withOption(outputDocFileOpt).withOption(outputTopicFileOpt).withOption(dfsOpt) .withOption(numTrainThreadsOpt).withOption(numUpdateThreadsOpt) .withOption(modelCorpusFractionOption).withOption(verboseOpt).create(); try { Parser parser = new Parser(); parser.setGroup(group); parser.setHelpOption(helpOpt); CommandLine cmdLine = parser.parse(args); if (cmdLine.hasOption(helpOpt)) { CommandLineUtil.printHelp(group); return -1; } String inputDirString = (String) cmdLine.getValue(inputDirOpt); String dictDirString = cmdLine.hasOption(dictOpt) ? (String) cmdLine.getValue(dictOpt) : null; int numTopics = Integer.parseInt((String) cmdLine.getValue(numTopicsOpt)); double alpha = Double.parseDouble((String) cmdLine.getValue(alphaOpt)); double eta = Double.parseDouble((String) cmdLine.getValue(etaOpt)); int maxIterations = Integer.parseInt((String) cmdLine.getValue(maxIterOpt)); int burnInIterations = (Integer) cmdLine.getValue(burnInOpt); double minFractionalErrorChange = Double.parseDouble((String) cmdLine.getValue(convergenceOpt)); int numTrainThreads = Integer.parseInt((String) cmdLine.getValue(numTrainThreadsOpt)); int numUpdateThreads = Integer.parseInt((String) cmdLine.getValue(numUpdateThreadsOpt)); String topicOutFile = (String) cmdLine.getValue(outputTopicFileOpt); String docOutFile = (String) cmdLine.getValue(outputDocFileOpt); String reInferDocTopics = (String) cmdLine.getValue(reInferDocTopicsOpt); boolean verbose = Boolean.parseBoolean((String) cmdLine.getValue(verboseOpt)); double modelCorpusFraction = (Double) cmdLine.getValue(modelCorpusFractionOption); long start = System.nanoTime(); if (conf.get("fs.default.name") == null) { String dfsNameNode = (String) cmdLine.getValue(dfsOpt); conf.set("fs.default.name", dfsNameNode); } String[] terms = loadDictionary(dictDirString, conf); logTime("dictionary loading", System.nanoTime() - start); start = System.nanoTime(); Matrix corpus = loadVectors(inputDirString, conf); logTime("vector seqfile corpus loading", System.nanoTime() - start); start = System.nanoTime(); InMemoryCollapsedVariationalBayes0 cvb0 = new InMemoryCollapsedVariationalBayes0(corpus, terms, numTopics, alpha, eta, numTrainThreads, numUpdateThreads, modelCorpusFraction, 1234); logTime("cvb0 init", System.nanoTime() - start); start = System.nanoTime(); cvb0.setVerbose(verbose); cvb0.iterateUntilConvergence(minFractionalErrorChange, maxIterations, burnInIterations); logTime("total training time", System.nanoTime() - start); if ("randstart".equalsIgnoreCase(reInferDocTopics)) { cvb0.inferDocuments(0.0, 100, true); } else if ("continue".equalsIgnoreCase(reInferDocTopics)) { cvb0.inferDocuments(0.0, 100, false); } start = System.nanoTime(); cvb0.writeModel(new Path(topicOutFile)); DistributedRowMatrixWriter.write(new Path(docOutFile), conf, cvb0.docTopicCounts); logTime("printTopics", System.nanoTime() - start); } catch (OptionException e) { log.error("Error while parsing options", e); CommandLineUtil.printHelp(group); } return 0; }
From source file:com.tamingtext.classifier.maxent.TestMaxent.java
License:Apache License
/** * @param args/*w w w . jav a2 s. co m*/ */ public static void main(String[] args) throws IOException { DefaultOptionBuilder obuilder = new DefaultOptionBuilder(); ArgumentBuilder abuilder = new ArgumentBuilder(); GroupBuilder gbuilder = new GroupBuilder(); Option helpOpt = DefaultOptionCreator.helpOption(); Option inputDirOpt = obuilder.withLongName("input").withRequired(true) .withArgument(abuilder.withName("input").withMinimum(1).withMaximum(1).create()) .withDescription("The input directory").withShortName("i").create(); Option modelOpt = obuilder.withLongName("model").withRequired(true) .withArgument(abuilder.withName("index").withMinimum(1).withMaximum(1).create()) .withDescription("The directory containing the index model").withShortName("m").create(); Group group = gbuilder.withName("Options").withOption(helpOpt).withOption(inputDirOpt).withOption(modelOpt) .create(); try { Parser parser = new Parser(); parser.setGroup(group); parser.setHelpOption(helpOpt); CommandLine cmdLine = parser.parse(args); if (cmdLine.hasOption(helpOpt)) { CommandLineUtil.printHelp(group); return; } String inputPath = (String) cmdLine.getValue(inputDirOpt); File f = new File(inputPath); if (!f.isDirectory()) { throw new IllegalArgumentException(f + " is not a directory or does not exit"); } File[] inputFiles = FileUtil.buildFileList(f); File modelDir = new File((String) cmdLine.getValue(modelOpt)); execute(inputFiles, modelDir); } catch (OptionException e) { log.error("Error while parsing options", e); } }
From source file:com.tamingtext.classifier.maxent.TrainMaxent.java
License:Apache License
public static void main(String[] args) throws Exception { DefaultOptionBuilder obuilder = new DefaultOptionBuilder(); ArgumentBuilder abuilder = new ArgumentBuilder(); GroupBuilder gbuilder = new GroupBuilder(); Option helpOpt = DefaultOptionCreator.helpOption(); Option inputDirOpt = obuilder.withLongName("input").withRequired(true) .withArgument(abuilder.withName("input").withMinimum(1).withMaximum(1).create()) .withDescription("The input directory, containing properly formatted files: " + "One doc per line, first entry on the line is the label, rest is the evidence") .withShortName("i").create(); Option outputOpt = obuilder.withLongName("output").withRequired(true) .withArgument(abuilder.withName("output").withMinimum(1).withMaximum(1).create()) .withDescription("The output directory").withShortName("o").create(); Group group = gbuilder.withName("Options").withOption(helpOpt).withOption(inputDirOpt).withOption(outputOpt) .create();//from w w w . j a va 2 s . c o m //.withOption(gramSizeOpt).withOption(typeOpt) try { Parser parser = new Parser(); parser.setGroup(group); parser.setHelpOption(helpOpt); CommandLine cmdLine = parser.parse(args); if (cmdLine.hasOption(helpOpt)) { CommandLineUtil.printHelp(group); return; } String inputPath = (String) cmdLine.getValue(inputDirOpt); String outputPath = (String) cmdLine.getValue(outputOpt); TrainMaxent trainer = new TrainMaxent(); trainer.train(inputPath, outputPath); } catch (OptionException e) { log.error("Error while parsing options", e); } }
From source file:com.tamingtext.classifier.mlt.MoreLikeThisCategorizer.java
License:Apache License
public static void main(String[] args) throws Exception { DefaultOptionBuilder obuilder = new DefaultOptionBuilder(); ArgumentBuilder abuilder = new ArgumentBuilder(); GroupBuilder gbuilder = new GroupBuilder(); Option helpOpt = DefaultOptionCreator.helpOption(); Option inputDirOpt = obuilder.withLongName("input").withRequired(true) .withArgument(abuilder.withName("input").withMinimum(1).withMaximum(1).create()) .withDescription("The input file to classify").withShortName("i").create(); Option modelOpt = obuilder.withLongName("model").withRequired(true) .withArgument(abuilder.withName("index").withMinimum(1).withMaximum(1).create()) .withDescription("The directory containing the index model").withShortName("m").create(); Option categoryFieldOpt = obuilder.withLongName("categoryField").withRequired(true) .withArgument(abuilder.withName("index").withMinimum(1).withMaximum(1).create()) .withDescription("Name of the field containing category information").withShortName("catf") .create();/*w w w.j ava 2 s. com*/ Option contentFieldOpt = obuilder.withLongName("contentField").withRequired(true) .withArgument(abuilder.withName("index").withMinimum(1).withMaximum(1).create()) .withDescription("Name of the field containing content information").withShortName("contf") .create(); Option maxResultsOpt = obuilder.withLongName("maxResults").withRequired(false) .withArgument(abuilder.withName("gramSize").withMinimum(1).withMaximum(1).create()) .withDescription("Number of results to retrive, default: 10 ").withShortName("r").create(); Option gramSizeOpt = obuilder.withLongName("gramSize").withRequired(false) .withArgument(abuilder.withName("gramSize").withMinimum(1).withMaximum(1).create()) .withDescription("Size of the n-gram. Default Value: 1 ").withShortName("ng").create(); Option typeOpt = obuilder.withLongName("classifierType").withRequired(false) .withArgument(abuilder.withName("classifierType").withMinimum(1).withMaximum(1).create()) .withDescription("Type of classifier: knn|tfidf. Default: bayes").withShortName("type").create(); Group group = gbuilder.withName("Options").withOption(gramSizeOpt).withOption(helpOpt) .withOption(inputDirOpt).withOption(modelOpt).withOption(typeOpt).withOption(contentFieldOpt) .withOption(categoryFieldOpt).withOption(maxResultsOpt).create(); try { Parser parser = new Parser(); parser.setGroup(group); parser.setHelpOption(helpOpt); CommandLine cmdLine = parser.parse(args); if (cmdLine.hasOption(helpOpt)) { CommandLineUtil.printHelp(group); return; } String classifierType = (String) cmdLine.getValue(typeOpt); if (cmdLine.hasOption(gramSizeOpt)) { } int gramSize = 1; if (cmdLine.hasOption(gramSizeOpt)) { gramSize = Integer.parseInt((String) cmdLine.getValue(gramSizeOpt)); } int maxResults = 10; if (cmdLine.hasOption(maxResultsOpt)) { maxResults = Integer.parseInt((String) cmdLine.getValue(maxResultsOpt)); } String inputPath = (String) cmdLine.getValue(inputDirOpt); String modelPath = (String) cmdLine.getValue(modelOpt); String categoryField = (String) cmdLine.getValue(categoryFieldOpt); String contentField = (String) cmdLine.getValue(contentFieldOpt); MatchMode mode; if ("knn".equalsIgnoreCase(classifierType)) { mode = MatchMode.KNN; } else if ("tfidf".equalsIgnoreCase(classifierType)) { mode = MatchMode.TFIDF; } else { throw new IllegalArgumentException("Unkown classifierType: " + classifierType); } Reader reader = new FileReader(inputPath); Directory directory = FSDirectory.open(new File(modelPath)); IndexReader indexReader = IndexReader.open(directory); MoreLikeThisCategorizer categorizer = new MoreLikeThisCategorizer(indexReader, categoryField); categorizer.setMatchMode(mode); categorizer.setFieldNames(new String[] { contentField }); categorizer.setMaxResults(maxResults); if (gramSize > 1) categorizer.setNgramSize(gramSize); CategoryHits[] categories = categorizer.categorize(reader); for (CategoryHits c : categories) { System.out.println(c.getLabel() + "\t" + c.getHits() + "\t" + c.getScore()); } } catch (OptionException e) { log.error("Error while parsing options", e); } }
From source file:com.tamingtext.classifier.mlt.TestMoreLikeThis.java
License:Apache License
public static void main(String[] args) throws Exception { DefaultOptionBuilder obuilder = new DefaultOptionBuilder(); ArgumentBuilder abuilder = new ArgumentBuilder(); GroupBuilder gbuilder = new GroupBuilder(); Option helpOpt = DefaultOptionCreator.helpOption(); Option inputDirOpt = obuilder.withLongName("input").withRequired(true) .withArgument(abuilder.withName("input").withMinimum(1).withMaximum(1).create()) .withDescription("The input directory").withShortName("i").create(); Option modelOpt = obuilder.withLongName("model").withRequired(true) .withArgument(abuilder.withName("index").withMinimum(1).withMaximum(1).create()) .withDescription("The directory containing the index model").withShortName("m").create(); Option categoryFieldOpt = obuilder.withLongName("categoryField").withRequired(true) .withArgument(abuilder.withName("index").withMinimum(1).withMaximum(1).create()) .withDescription("Name of the field containing category information").withShortName("catf") .create();/*from ww w . j av a 2 s.co m*/ Option contentFieldOpt = obuilder.withLongName("contentField").withRequired(true) .withArgument(abuilder.withName("index").withMinimum(1).withMaximum(1).create()) .withDescription("Name of the field containing content information").withShortName("contf") .create(); Option maxResultsOpt = obuilder.withLongName("maxResults").withRequired(false) .withArgument(abuilder.withName("gramSize").withMinimum(1).withMaximum(1).create()) .withDescription("Number of results to retrive, default: 10 ").withShortName("r").create(); Option gramSizeOpt = obuilder.withLongName("gramSize").withRequired(false) .withArgument(abuilder.withName("gramSize").withMinimum(1).withMaximum(1).create()) .withDescription("Size of the n-gram. Default Value: 1 ").withShortName("ng").create(); Option typeOpt = obuilder.withLongName("classifierType").withRequired(false) .withArgument(abuilder.withName("classifierType").withMinimum(1).withMaximum(1).create()) .withDescription("Type of classifier: knn|tfidf. Default: bayes").withShortName("type").create(); Group group = gbuilder.withName("Options").withOption(gramSizeOpt).withOption(helpOpt) .withOption(inputDirOpt).withOption(modelOpt).withOption(typeOpt).withOption(contentFieldOpt) .withOption(categoryFieldOpt).withOption(maxResultsOpt).create(); try { Parser parser = new Parser(); parser.setGroup(group); parser.setHelpOption(helpOpt); CommandLine cmdLine = parser.parse(args); if (cmdLine.hasOption(helpOpt)) { CommandLineUtil.printHelp(group); return; } String classifierType = (String) cmdLine.getValue(typeOpt); int gramSize = 1; if (cmdLine.hasOption(gramSizeOpt)) { gramSize = Integer.parseInt((String) cmdLine.getValue(gramSizeOpt)); } int maxResults = 10; if (cmdLine.hasOption(maxResultsOpt)) { maxResults = Integer.parseInt((String) cmdLine.getValue(maxResultsOpt)); } String inputPath = (String) cmdLine.getValue(inputDirOpt); String modelPath = (String) cmdLine.getValue(modelOpt); String categoryField = (String) cmdLine.getValue(categoryFieldOpt); String contentField = (String) cmdLine.getValue(contentFieldOpt); MatchMode mode; if ("knn".equalsIgnoreCase(classifierType)) { mode = MatchMode.KNN; } else if ("tfidf".equalsIgnoreCase(classifierType)) { mode = MatchMode.TFIDF; } else { throw new IllegalArgumentException("Unkown classifierType: " + classifierType); } Directory directory = FSDirectory.open(new File(modelPath)); IndexReader indexReader = IndexReader.open(directory); Analyzer analyzer //<co id="mlt.analyzersetup"/> = new EnglishAnalyzer(Version.LUCENE_36); MoreLikeThisCategorizer categorizer = new MoreLikeThisCategorizer(indexReader, categoryField); categorizer.setAnalyzer(analyzer); categorizer.setMatchMode(mode); categorizer.setFieldNames(new String[] { contentField }); categorizer.setMaxResults(maxResults); categorizer.setNgramSize(gramSize); File f = new File(inputPath); if (!f.isDirectory()) { throw new IllegalArgumentException(f + " is not a directory or does not exit"); } File[] inputFiles = FileUtil.buildFileList(f); String line = null; //<start id="lucene.examples.mlt.test"/> final ClassifierResult UNKNOWN = new ClassifierResult("unknown", 1.0); ResultAnalyzer resultAnalyzer = //<co id="co.mlt.ra"/> new ResultAnalyzer(categorizer.getCategories(), UNKNOWN.getLabel()); for (File ff : inputFiles) { //<co id="co.mlt.read"/> BufferedReader in = new BufferedReader(new InputStreamReader(new FileInputStream(ff), "UTF-8")); while ((line = in.readLine()) != null) { String[] parts = line.split("\t"); if (parts.length != 2) { continue; } CategoryHits[] hits //<co id="co.mlt.cat"/> = categorizer.categorize(new StringReader(parts[1])); ClassifierResult result = hits.length > 0 ? hits[0] : UNKNOWN; resultAnalyzer.addInstance(parts[0], result); //<co id="co.mlt.an"/> } in.close(); } System.out.println(resultAnalyzer.toString());//<co id="co.mlt.print"/> /* <calloutlist> <callout arearefs="co.mlt.ra">Create <classname>ResultAnalyzer</classname></callout> <callout arearefs="co.mlt.read">Read Test data</callout> <callout arearefs="co.mlt.cat">Categorize</callout> <callout arearefs="co.mlt.an">Collect Results</callout> <callout arearefs="co.mlt.print">Display Results</callout> </calloutlist> */ //<end id="lucene.examples.mlt.test"/> } catch (OptionException e) { log.error("Error while parsing options", e); } }
From source file:com.tamingtext.classifier.mlt.TrainMoreLikeThis.java
License:Apache License
public static void main(String[] args) throws Exception { DefaultOptionBuilder obuilder = new DefaultOptionBuilder(); ArgumentBuilder abuilder = new ArgumentBuilder(); GroupBuilder gbuilder = new GroupBuilder(); Option helpOpt = DefaultOptionCreator.helpOption(); Option inputDirOpt = obuilder.withLongName("input").withRequired(true) .withArgument(abuilder.withName("input").withMinimum(1).withMaximum(1).create()) .withDescription("The input directory, containing properly formatted files: " + "One doc per line, first entry on the line is the label, rest is the evidence") .withShortName("i").create(); Option outputOpt = obuilder.withLongName("output").withRequired(true) .withArgument(abuilder.withName("output").withMinimum(1).withMaximum(1).create()) .withDescription("The output directory").withShortName("o").create(); Option gramSizeOpt = obuilder.withLongName("gramSize").withRequired(false) .withArgument(abuilder.withName("gramSize").withMinimum(1).withMaximum(1).create()) .withDescription("Size of the n-gram. Default Value: 1 ").withShortName("ng").create(); Option typeOpt = obuilder.withLongName("classifierType").withRequired(false) .withArgument(abuilder.withName("classifierType").withMinimum(1).withMaximum(1).create()) .withDescription("Type of classifier: knn|tfidf.").withShortName("type").create(); Group group = gbuilder.withName("Options").withOption(gramSizeOpt).withOption(helpOpt) .withOption(inputDirOpt).withOption(outputOpt).withOption(typeOpt).create(); try {//from ww w .j av a 2 s .c o m Parser parser = new Parser(); parser.setGroup(group); parser.setHelpOption(helpOpt); CommandLine cmdLine = parser.parse(args); if (cmdLine.hasOption(helpOpt)) { CommandLineUtil.printHelp(group); return; } String classifierType = (String) cmdLine.getValue(typeOpt); int gramSize = 1; if (cmdLine.hasOption(gramSizeOpt)) { gramSize = Integer.parseInt((String) cmdLine.getValue(gramSizeOpt)); } String inputPath = (String) cmdLine.getValue(inputDirOpt); String outputPath = (String) cmdLine.getValue(outputOpt); TrainMoreLikeThis trainer = new TrainMoreLikeThis(); MatchMode mode; if ("knn".equalsIgnoreCase(classifierType)) { mode = MatchMode.KNN; } else if ("tfidf".equalsIgnoreCase(classifierType)) { mode = MatchMode.TFIDF; } else { throw new IllegalArgumentException("Unkown classifierType: " + classifierType); } if (gramSize > 1) trainer.setNGramSize(gramSize); trainer.train(inputPath, outputPath, mode); } catch (OptionException e) { log.error("Error while parsing options", e); } }
From source file:com.tamingtext.tagrecommender.CountStackOverflowTags.java
License:Apache License
public boolean parseArgs(String[] args) { DefaultOptionBuilder obuilder = new DefaultOptionBuilder(); ArgumentBuilder abuilder = new ArgumentBuilder(); GroupBuilder gbuilder = new GroupBuilder(); Option helpOpt = DefaultOptionCreator.helpOption(); Option inputFileOpt = obuilder.withLongName("inputFile").withRequired(true) .withArgument(abuilder.withName("inputFile").withMinimum(1).withMaximum(1).create()) .withDescription("The input file").withShortName("i").create(); Option outputFileOpt = obuilder.withLongName("outputFile").withRequired(true) .withArgument(abuilder.withName("outputFile").withMinimum(1).withMaximum(1).create()) .withDescription("The output file").withShortName("o").create(); Option limitOpt = obuilder.withLongName("limit").withRequired(false) .withArgument(abuilder.withName("limit").withMinimum(1).withMaximum(1).create()) .withDescription("Emit this many of the most frequent tags").withShortName("l").create(); Option cutoffOpt = obuilder.withLongName("cutoff").withRequired(false) .withArgument(abuilder.withName("cutoff").withMinimum(1).withMaximum(1).create()) .withDescription("Drop tags with a count less than this number").withShortName("c").create(); Group group = gbuilder.withName("Options").withOption(inputFileOpt).withOption(outputFileOpt) .withOption(limitOpt).withOption(cutoffOpt).create(); try {// w ww . jav a 2s. c o m Parser parser = new Parser(); parser.setGroup(group); CommandLine cmdLine = parser.parse(args); if (cmdLine.hasOption(helpOpt)) { CommandLineUtil.printHelp(group); return false; } inputFile = new File((String) cmdLine.getValue(inputFileOpt)); countFile = new File((String) cmdLine.getValue(outputFileOpt)); if (cmdLine.hasOption(limitOpt)) { limit = Integer.parseInt((String) cmdLine.getValue(limitOpt)); } if (cmdLine.hasOption(cutoffOpt)) { cutoff = Integer.parseInt((String) cmdLine.getValue(cutoffOpt)); } } catch (OptionException e) { log.error("Command-line option Exception", e); CommandLineUtil.printHelp(group); return false; } validate(); return true; }
From source file:com.tamingtext.tagrecommender.ExtractStackOverflowData.java
License:Apache License
public boolean parseArgs(String[] args) { DefaultOptionBuilder obuilder = new DefaultOptionBuilder(); ArgumentBuilder abuilder = new ArgumentBuilder(); GroupBuilder gbuilder = new GroupBuilder(); Option helpOpt = DefaultOptionCreator.helpOption(); Option inputFileOpt = obuilder.withLongName("inputFile").withRequired(true) .withArgument(abuilder.withName("inputFile").withMinimum(1).withMaximum(1).create()) .withDescription("The input file").withShortName("i").create(); Option trainingOutputOpt = obuilder.withLongName("trainingOutputFile").withRequired(true) .withArgument(abuilder.withName("trainingOutputFile").withMinimum(1).withMaximum(1).create()) .withDescription("The training data output file").withShortName("tr").create(); Option testOutputOpt = obuilder.withLongName("testOutputFile").withRequired(true) .withArgument(abuilder.withName("testOutputFile").withMinimum(1).withMaximum(1).create()) .withDescription("The test data output file").withShortName("te").create(); Option trainingDataSizeOpt = obuilder.withLongName("trainingDataSize").withRequired(false) .withArgument(abuilder.withName("trainingDataSize").withMinimum(1).withMaximum(1).create()) .withDescription("The number of questions to extract for training data").withShortName("trs") .create();//from www. jav a 2s. com Option testDataSizeOpt = obuilder.withLongName("testDataSize").withRequired(false) .withArgument(abuilder.withName("testDataSize").withMinimum(1).withMaximum(1).create()) .withDescription("The number of questions to extract for training data").withShortName("tes") .create(); Group group = gbuilder.withName("Options").withOption(inputFileOpt).withOption(trainingOutputOpt) .withOption(testOutputOpt).withOption(trainingDataSizeOpt).withOption(testDataSizeOpt).create(); try { Parser parser = new Parser(); parser.setGroup(group); CommandLine cmdLine = parser.parse(args); if (cmdLine.hasOption(helpOpt)) { CommandLineUtil.printHelp(group); return false; } inputFile = new File((String) cmdLine.getValue(inputFileOpt)); trainingOutputFile = new File((String) cmdLine.getValue(trainingOutputOpt)); testOutputFile = new File((String) cmdLine.getValue(testOutputOpt)); if (cmdLine.hasOption(trainingDataSizeOpt)) { trainingDataSize = Integer.parseInt((String) cmdLine.getValue(trainingDataSizeOpt)); } if (cmdLine.hasOption(testDataSizeOpt)) { testDataSize = Integer.parseInt((String) cmdLine.getValue(testDataSizeOpt)); } } catch (OptionException e) { log.error("Command-line option Exception", e); CommandLineUtil.printHelp(group); return false; } validate(); return true; }
From source file:com.tamingtext.tagrecommender.TestStackOverflowTagger.java
License:Apache License
public boolean parseArgs(String[] args) { DefaultOptionBuilder obuilder = new DefaultOptionBuilder(); ArgumentBuilder abuilder = new ArgumentBuilder(); GroupBuilder gbuilder = new GroupBuilder(); Option helpOpt = DefaultOptionCreator.helpOption(); Option inputFileOpt = obuilder.withLongName("inputFile").withRequired(true) .withArgument(abuilder.withName("inputFile").withMinimum(1).withMaximum(1).create()) .withDescription("The input file").withShortName("i").create(); Option countFileOpt = obuilder.withLongName("countFile").withRequired(true) .withArgument(abuilder.withName("countFile").withMinimum(1).withMaximum(1).create()) .withDescription("The tag count file").withShortName("c").create(); Option outputFileOpt = obuilder.withLongName("outputFile").withRequired(true) .withArgument(abuilder.withName("outputFile").withMinimum(1).withMaximum(1).create()) .withDescription("The output file").withShortName("c").create(); Option solrUrlOpt = obuilder.withLongName("solrUrl").withRequired(true) .withArgument(abuilder.withName("solrUrl").withMinimum(1).withMaximum(1).create()) .withDescription("URL of the solr server").withShortName("s").create(); Group group = gbuilder.withName("Options").withOption(inputFileOpt).withOption(countFileOpt) .withOption(outputFileOpt).withOption(solrUrlOpt).create(); try {/*from w w w . j av a 2 s .c o m*/ Parser parser = new Parser(); parser.setGroup(group); CommandLine cmdLine = parser.parse(args); if (cmdLine.hasOption(helpOpt)) { CommandLineUtil.printHelp(group); return false; } inputFile = new File((String) cmdLine.getValue(inputFileOpt)); countFile = new File((String) cmdLine.getValue(countFileOpt)); outputFile = new File((String) cmdLine.getValue(outputFileOpt)); solrUrl = (String) cmdLine.getValue(solrUrlOpt); client = new TagRecommenderClient(solrUrl); } catch (OptionException e) { log.error("Command-line option Exception", e); CommandLineUtil.printHelp(group); return false; } catch (MalformedURLException e) { log.error("MalformedURLException", e); return false; } validate(); return true; }