Example usage for org.apache.commons.cli2 CommandLine getValue

List of usage examples for org.apache.commons.cli2 CommandLine getValue

Introduction

In this page you can find the example usage for org.apache.commons.cli2 CommandLine getValue.

Prototype

Object getValue(final Option option) throws IllegalStateException;

Source Link

Document

Retrieves the single Argument value associated with the specified Option

Usage

From source file:com.grantingersoll.intell.index.Indexer.java

public static void main(String[] args) throws Exception {
    DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
    ArgumentBuilder abuilder = new ArgumentBuilder();
    GroupBuilder gbuilder = new GroupBuilder();

    Option wikipediaFileOpt = obuilder.withLongName("wikiFile").withRequired(true)
            .withArgument(abuilder.withName("wikiFile").withMinimum(1).withMaximum(1).create())
            .withDescription(//from   w w  w  . ja  va2s.c  om
                    "The path to the wikipedia dump file.  Maybe a directory containing wikipedia dump files."
                            + "  If a directory is specified, only .xml files are used.")
            .withShortName("w").create();

    Option numDocsOpt = obuilder.withLongName("numDocs").withRequired(false)
            .withArgument(abuilder.withName("numDocs").withMinimum(1).withMaximum(1).create())
            .withDescription("The number of docs to index").withShortName("n").create();

    Option solrURLOpt = obuilder.withLongName("solrURL").withRequired(false)
            .withArgument(abuilder.withName("solrURL").withMinimum(1).withMaximum(1).create())
            .withDescription("The URL where Solr lives").withShortName("s").create();

    Option solrBatchOpt = obuilder.withLongName("batch").withRequired(false)
            .withArgument(abuilder.withName("batch").withMinimum(1).withMaximum(1).create())
            .withDescription("The number of docs to include in each indexing batch").withShortName("b")
            .create();

    Group group = gbuilder.withName("Options").withOption(wikipediaFileOpt).withOption(numDocsOpt)
            .withOption(solrURLOpt).withOption(solrBatchOpt).create();

    Parser parser = new Parser();
    parser.setGroup(group);
    CommandLine cmdLine = parser.parse(args);

    File file;
    file = new File(cmdLine.getValue(wikipediaFileOpt).toString());
    File[] dumpFiles;
    if (file.isDirectory()) {
        dumpFiles = file.listFiles(new FilenameFilter() {
            public boolean accept(File file, String s) {
                return s.endsWith(".xml");
            }
        });
    } else {
        dumpFiles = new File[] { file };
    }

    int numDocs = Integer.MAX_VALUE;
    if (cmdLine.hasOption(numDocsOpt)) {
        numDocs = Integer.parseInt(cmdLine.getValue(numDocsOpt).toString());
    }
    String url = DEFAULT_SOLR_URL;
    if (cmdLine.hasOption(solrURLOpt)) {
        url = cmdLine.getValue(solrURLOpt).toString();
    }
    int batch = 100;
    if (cmdLine.hasOption(solrBatchOpt)) {
        batch = Integer.parseInt(cmdLine.getValue(solrBatchOpt).toString());
    }
    Indexer indexer = new Indexer(new CommonsHttpSolrServer(url));
    int total = 0;
    for (int i = 0; i < dumpFiles.length && total < numDocs; i++) {
        File dumpFile = dumpFiles[i];
        log.info("Indexing: " + file + " Num files to index: " + (numDocs - total));
        long start = System.currentTimeMillis();
        int totalFile = indexer.index(dumpFile, numDocs - total, batch);
        long finish = System.currentTimeMillis();
        if (log.isInfoEnabled()) {
            log.info("Indexing " + dumpFile + " took " + (finish - start) + " ms");
        }
        total += totalFile;
        log.info("Done Indexing: " + file + ". Indexed " + totalFile + " docs for that file and " + total
                + " overall.");

    }
    log.info("Indexed " + total + " docs overall.");
}

From source file:com.tamingtext.qa.WikipediaIndexer.java

public static void main(String[] args) throws Exception {
    DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
    ArgumentBuilder abuilder = new ArgumentBuilder();
    GroupBuilder gbuilder = new GroupBuilder();

    Option wikipediaFileOpt = obuilder.withLongName("wikiFile").withRequired(true)
            .withArgument(abuilder.withName("wikiFile").withMinimum(1).withMaximum(1).create())
            .withDescription(/*w  w  w . j  a v  a 2 s  .  c om*/
                    "The path to the wikipedia dump file.  Maybe a directory containing wikipedia dump files."
                            + "  If a directory is specified, only .xml files are used.")
            .withShortName("w").create();

    Option numDocsOpt = obuilder.withLongName("numDocs").withRequired(false)
            .withArgument(abuilder.withName("numDocs").withMinimum(1).withMaximum(1).create())
            .withDescription("The number of docs to index").withShortName("n").create();

    Option solrURLOpt = obuilder.withLongName("solrURL").withRequired(false)
            .withArgument(abuilder.withName("solrURL").withMinimum(1).withMaximum(1).create())
            .withDescription("The URL where Solr lives").withShortName("s").create();

    Option solrBatchOpt = obuilder.withLongName("batch").withRequired(false)
            .withArgument(abuilder.withName("batch").withMinimum(1).withMaximum(1).create())
            .withDescription("The number of docs to include in each indexing batch").withShortName("b")
            .create();

    Group group = gbuilder.withName("Options").withOption(wikipediaFileOpt).withOption(numDocsOpt)
            .withOption(solrURLOpt).withOption(solrBatchOpt).create();

    Parser parser = new Parser();
    parser.setGroup(group);
    CommandLine cmdLine = parser.parse(args);

    File file;
    file = new File(cmdLine.getValue(wikipediaFileOpt).toString());
    File[] dumpFiles;
    if (file.isDirectory()) {
        dumpFiles = file.listFiles(new FilenameFilter() {
            public boolean accept(File file, String s) {
                return s.endsWith(".xml");
            }
        });
    } else {
        dumpFiles = new File[] { file };
    }

    int numDocs = Integer.MAX_VALUE;
    if (cmdLine.hasOption(numDocsOpt)) {
        numDocs = Integer.parseInt(cmdLine.getValue(numDocsOpt).toString());
    }
    String url = DEFAULT_SOLR_URL;
    if (cmdLine.hasOption(solrURLOpt)) {
        url = cmdLine.getValue(solrURLOpt).toString();
    }
    int batch = 100;
    if (cmdLine.hasOption(solrBatchOpt)) {
        batch = Integer.parseInt(cmdLine.getValue(solrBatchOpt).toString());
    }
    WikipediaIndexer indexer = new WikipediaIndexer(new CommonsHttpSolrServer(url));
    int total = 0;
    for (int i = 0; i < dumpFiles.length && total < numDocs; i++) {
        File dumpFile = dumpFiles[i];
        log.info("Indexing: " + file + " Num files to index: " + (numDocs - total));
        long start = System.currentTimeMillis();
        int totalFile = indexer.index(dumpFile, numDocs - total, batch);
        long finish = System.currentTimeMillis();
        if (log.isInfoEnabled()) {
            log.info("Indexing " + dumpFile + " took " + (finish - start) + " ms");
        }
        total += totalFile;
        log.info("Done Indexing: " + file + ". Indexed " + totalFile + " docs for that file and " + total
                + " overall.");

    }
    log.info("Indexed " + total + " docs overall.");
}

From source file:com.tamingtext.classifier.maxent.TrainMaxent.java

public static void main(String[] args) throws Exception {

    DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
    ArgumentBuilder abuilder = new ArgumentBuilder();
    GroupBuilder gbuilder = new GroupBuilder();

    Option helpOpt = DefaultOptionCreator.helpOption();

    Option inputDirOpt = obuilder.withLongName("input").withRequired(true)
            .withArgument(abuilder.withName("input").withMinimum(1).withMaximum(1).create())
            .withDescription("The input directory, containing properly formatted files: "
                    + "One doc per line, first entry on the line is the label, rest is the evidence")
            .withShortName("i").create();

    Option outputOpt = obuilder.withLongName("output").withRequired(true)
            .withArgument(abuilder.withName("output").withMinimum(1).withMaximum(1).create())
            .withDescription("The output directory").withShortName("o").create();

    Group group = gbuilder.withName("Options").withOption(helpOpt).withOption(inputDirOpt).withOption(outputOpt)
            .create();//  www .java2s.co m

    //.withOption(gramSizeOpt).withOption(typeOpt)

    try {
        Parser parser = new Parser();

        parser.setGroup(group);
        parser.setHelpOption(helpOpt);
        CommandLine cmdLine = parser.parse(args);
        if (cmdLine.hasOption(helpOpt)) {
            CommandLineUtil.printHelp(group);
            return;
        }

        String inputPath = (String) cmdLine.getValue(inputDirOpt);
        String outputPath = (String) cmdLine.getValue(outputOpt);
        TrainMaxent trainer = new TrainMaxent();
        trainer.train(inputPath, outputPath);
    } catch (OptionException e) {
        log.error("Error while parsing options", e);
    }
}

From source file:net.hubs1.mahout.cluster.CRLFSeparatedToSequenceFile.java

/**
 * Takes in two arguments:/*from   w  ww .ja v  a 2  s. c  o  m*/
 * <ol>
 * <li>The input {@link org.apache.hadoop.fs.Path} where the input documents live</li>
 * <li>The output {@link org.apache.hadoop.fs.Path} where to write the classifier as a
 * {@link org.apache.hadoop.io.SequenceFile}</li>
 * </ol>
 */
public static void main(String[] args) throws IOException {

    GroupBuilder gbuilder = new GroupBuilder();

    Option dirInputPathOpt = DefaultOptionCreator.inputOption().create();

    Option dirOutputPathOpt = DefaultOptionCreator.outputOption().create();

    Option helpOpt = DefaultOptionCreator.helpOption();

    Group group = gbuilder.withName("Options").withOption(dirInputPathOpt).withOption(dirOutputPathOpt)
            .withOption(helpOpt).create();

    Parser parser = new Parser();
    parser.setGroup(group);
    parser.setHelpOption(helpOpt);
    try {
        CommandLine cmdLine = parser.parse(args);
        if (cmdLine.hasOption(helpOpt)) {
            CommandLineUtil.printHelp(group);
            return;
        }

        String inputPath = (String) cmdLine.getValue(dirInputPathOpt);
        String outputPath = (String) cmdLine.getValue(dirOutputPathOpt);

        runJob(inputPath, outputPath);
    } catch (OptionException e) {
        log.error("Exception", e);
        CommandLineUtil.printHelp(group);
    } catch (InterruptedException e) {
        log.error("Exception", e);
        CommandLineUtil.printHelp(group);
    } catch (ClassNotFoundException e) {
        log.error("Exception", e);
        CommandLineUtil.printHelp(group);
    }
}

From source file:imageClassify.Describe.java

public static void main(String[] args) throws IOException, DescriptorException {

    DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
    ArgumentBuilder abuilder = new ArgumentBuilder();
    GroupBuilder gbuilder = new GroupBuilder();

    Option pathOpt = obuilder.withLongName("path").withShortName("p").withRequired(true)
            .withArgument(abuilder.withName("path").withMinimum(1).withMaximum(1).create())
            .withDescription("Data path").create();

    Option descriptorOpt = obuilder.withLongName("descriptor").withShortName("d").withRequired(true)
            .withArgument(abuilder.withName("descriptor").withMinimum(1).create())
            .withDescription("data descriptor").create();

    Option descPathOpt = obuilder.withLongName("file").withShortName("f").withRequired(true)
            .withArgument(abuilder.withName("file").withMinimum(1).withMaximum(1).create())
            .withDescription("Path to generated descriptor file").create();

    Option regOpt = obuilder.withLongName("regression").withDescription("Regression Problem").withShortName("r")
            .create();//from  ww  w .j  a  v  a 2  s  . c  o  m

    Option helpOpt = obuilder.withLongName("help").withDescription("Print out help").withShortName("h")
            .create();

    Group group = gbuilder.withName("Options").withOption(pathOpt).withOption(descPathOpt)
            .withOption(descriptorOpt).withOption(regOpt).withOption(helpOpt).create();

    try {
        Parser parser = new Parser();
        parser.setGroup(group);
        CommandLine cmdLine = parser.parse(args);

        if (cmdLine.hasOption(helpOpt)) {
            CommandLineUtil.printHelp(group);
            return;
        }

        String dataPath = cmdLine.getValue(pathOpt).toString();
        String descPath = cmdLine.getValue(descPathOpt).toString();
        List<String> descriptor = convert(cmdLine.getValues(descriptorOpt));
        boolean regression = cmdLine.hasOption(regOpt);

        log.debug("Data path : {}", dataPath);
        log.debug("Descriptor path : {}", descPath);
        log.debug("Descriptor : {}", descriptor);
        log.debug("Regression : {}", regression);

        runTool(dataPath, descriptor, descPath, regression);
    } catch (OptionException e) {
        log.warn(e.toString());
        CommandLineUtil.printHelp(group);
    }
}

From source file:com.tamingtext.classifier.bayes.ClassifyDocument.java

public static void main(String[] args) {
    log.info("Command-line arguments: " + Arrays.toString(args));

    DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
    ArgumentBuilder abuilder = new ArgumentBuilder();
    GroupBuilder gbuilder = new GroupBuilder();

    Option inputOpt = obuilder.withLongName("input").withRequired(true)
            .withArgument(abuilder.withName("input").withMinimum(1).withMaximum(1).create())
            .withDescription("Input file").withShortName("i").create();

    Option modelOpt = obuilder.withLongName("model").withRequired(true)
            .withArgument(abuilder.withName("model").withMinimum(1).withMaximum(1).create())
            .withDescription("Model to use when classifying data").withShortName("m").create();

    Option helpOpt = obuilder.withLongName("help").withDescription("Print out help").withShortName("h")
            .create();//from  w ww. j  a v  a2  s  . co  m

    Group group = gbuilder.withName("Options").withOption(inputOpt).withOption(modelOpt).withOption(helpOpt)
            .create();

    try {
        Parser parser = new Parser();
        parser.setGroup(group);
        CommandLine cmdLine = parser.parse(args);

        if (cmdLine.hasOption(helpOpt)) {
            CommandLineUtil.printHelp(group);
            return;
        }

        File inputFile = new File(cmdLine.getValue(inputOpt).toString());

        if (!inputFile.isFile()) {
            throw new IllegalArgumentException(inputFile + " does not exist or is not a file");
        }

        File modelDir = new File(cmdLine.getValue(modelOpt).toString());

        if (!modelDir.isDirectory()) {
            throw new IllegalArgumentException(modelDir + " does not exist or is not a directory");
        }

        BayesParameters p = new BayesParameters();
        p.set("basePath", modelDir.getCanonicalPath());
        Datastore ds = new InMemoryBayesDatastore(p);
        Algorithm a = new BayesAlgorithm();
        ClassifierContext ctx = new ClassifierContext(a, ds);
        ctx.initialize();

        //TODO: make the analyzer configurable
        StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_36);
        TokenStream ts = analyzer.tokenStream(null,
                new InputStreamReader(new FileInputStream(inputFile), "UTF-8"));

        ArrayList<String> tokens = new ArrayList<String>(1000);
        while (ts.incrementToken()) {
            tokens.add(ts.getAttribute(CharTermAttribute.class).toString());
        }
        String[] document = tokens.toArray(new String[tokens.size()]);

        ClassifierResult[] cr = ctx.classifyDocument(document, "unknown", 5);

        for (ClassifierResult r : cr) {
            System.err.println(r.getLabel() + "\t" + r.getScore());
        }
    } catch (OptionException e) {
        log.error("Exception", e);
        CommandLineUtil.printHelp(group);
    } catch (IOException e) {
        log.error("IOException", e);
    } catch (InvalidDatastoreException e) {
        log.error("InvalidDataStoreException", e);
    } finally {

    }
}

From source file:com.tamingtext.classifier.maxent.TestMaxent.java

/**
 * @param args//ww  w  .  j  a v a 2s .c o  m
 */
public static void main(String[] args) throws IOException {
    DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
    ArgumentBuilder abuilder = new ArgumentBuilder();
    GroupBuilder gbuilder = new GroupBuilder();

    Option helpOpt = DefaultOptionCreator.helpOption();

    Option inputDirOpt = obuilder.withLongName("input").withRequired(true)
            .withArgument(abuilder.withName("input").withMinimum(1).withMaximum(1).create())
            .withDescription("The input directory").withShortName("i").create();

    Option modelOpt = obuilder.withLongName("model").withRequired(true)
            .withArgument(abuilder.withName("index").withMinimum(1).withMaximum(1).create())
            .withDescription("The directory containing the index model").withShortName("m").create();

    Group group = gbuilder.withName("Options").withOption(helpOpt).withOption(inputDirOpt).withOption(modelOpt)
            .create();

    try {
        Parser parser = new Parser();

        parser.setGroup(group);
        parser.setHelpOption(helpOpt);
        CommandLine cmdLine = parser.parse(args);
        if (cmdLine.hasOption(helpOpt)) {
            CommandLineUtil.printHelp(group);
            return;
        }

        String inputPath = (String) cmdLine.getValue(inputDirOpt);
        File f = new File(inputPath);
        if (!f.isDirectory()) {
            throw new IllegalArgumentException(f + " is not a directory or does not exit");
        }
        File[] inputFiles = FileUtil.buildFileList(f);

        File modelDir = new File((String) cmdLine.getValue(modelOpt));
        execute(inputFiles, modelDir);
    } catch (OptionException e) {
        log.error("Error while parsing options", e);
    }

}

From source file:com.tamingtext.tagging.LuceneCategoryExtractor.java

public static void main(String[] args) throws IOException {
    DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
    ArgumentBuilder abuilder = new ArgumentBuilder();
    GroupBuilder gbuilder = new GroupBuilder();

    Option inputOpt = obuilder.withLongName("dir").withRequired(true)
            .withArgument(abuilder.withName("dir").withMinimum(1).withMaximum(1).create())
            .withDescription("The Lucene directory").withShortName("d").create();

    Option outputOpt = obuilder.withLongName("output").withRequired(false)
            .withArgument(abuilder.withName("output").withMinimum(1).withMaximum(1).create())
            .withDescription("The output directory").withShortName("o").create();

    Option maxOpt = obuilder.withLongName("max").withRequired(false)
            .withArgument(abuilder.withName("max").withMinimum(1).withMaximum(1).create())
            .withDescription(/*from  w w  w.  ja v  a2s.  c o m*/
                    "The maximum number of documents to analyze.  If not specified, then it will loop over all docs")
            .withShortName("m").create();

    Option fieldOpt = obuilder.withLongName("field").withRequired(true)
            .withArgument(abuilder.withName("field").withMinimum(1).withMaximum(1).create())
            .withDescription("The field in the index").withShortName("f").create();

    Option helpOpt = obuilder.withLongName("help").withDescription("Print out help").withShortName("h")
            .create();

    Group group = gbuilder.withName("Options").withOption(inputOpt).withOption(outputOpt).withOption(maxOpt)
            .withOption(fieldOpt).create();

    try {
        Parser parser = new Parser();
        parser.setGroup(group);
        CommandLine cmdLine = parser.parse(args);

        if (cmdLine.hasOption(helpOpt)) {
            CommandLineUtil.printHelp(group);
            return;
        }

        File inputDir = new File(cmdLine.getValue(inputOpt).toString());

        if (!inputDir.isDirectory()) {
            throw new IllegalArgumentException(inputDir + " does not exist or is not a directory");
        }

        long maxDocs = Long.MAX_VALUE;
        if (cmdLine.hasOption(maxOpt)) {
            maxDocs = Long.parseLong(cmdLine.getValue(maxOpt).toString());
        }

        if (maxDocs < 0) {
            throw new IllegalArgumentException("maxDocs must be >= 0");
        }

        String field = cmdLine.getValue(fieldOpt).toString();

        PrintWriter out = null;
        if (cmdLine.hasOption(outputOpt)) {
            out = new PrintWriter(new FileWriter(cmdLine.getValue(outputOpt).toString()));
        } else {
            out = new PrintWriter(new OutputStreamWriter(System.out, "UTF-8"));
        }

        dumpDocumentFields(inputDir, field, maxDocs, out);

        IOUtils.close(Collections.singleton(out));
    } catch (OptionException e) {
        log.error("Exception", e);
        CommandLineUtil.printHelp(group);
    }

}

From source file:com.tamingtext.tagging.LuceneTagExtractor.java

public static void main(String[] args) throws IOException {
    DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
    ArgumentBuilder abuilder = new ArgumentBuilder();
    GroupBuilder gbuilder = new GroupBuilder();

    Option inputOpt = obuilder.withLongName("dir").withRequired(true)
            .withArgument(abuilder.withName("dir").withMinimum(1).withMaximum(1).create())
            .withDescription("The Lucene directory").withShortName("d").create();

    Option outputOpt = obuilder.withLongName("output").withRequired(false)
            .withArgument(abuilder.withName("output").withMinimum(1).withMaximum(1).create())
            .withDescription("The output directory").withShortName("o").create();

    Option maxOpt = obuilder.withLongName("max").withRequired(false)
            .withArgument(abuilder.withName("max").withMinimum(1).withMaximum(1).create())
            .withDescription(/*from   w  w  w .  j a  va  2s .c  o m*/
                    "The maximum number of vectors to output.  If not specified, then it will loop over all docs")
            .withShortName("m").create();

    Option fieldOpt = obuilder.withLongName("field").withRequired(true)
            .withArgument(abuilder.withName("field").withMinimum(1).withMaximum(1).create())
            .withDescription("The field in the index").withShortName("f").create();

    Option helpOpt = obuilder.withLongName("help").withDescription("Print out help").withShortName("h")
            .create();

    Group group = gbuilder.withName("Options").withOption(inputOpt).withOption(outputOpt).withOption(maxOpt)
            .withOption(fieldOpt).create();

    try {
        Parser parser = new Parser();
        parser.setGroup(group);
        CommandLine cmdLine = parser.parse(args);

        if (cmdLine.hasOption(helpOpt)) {
            CommandLineUtil.printHelp(group);
            return;
        }

        File file = new File(cmdLine.getValue(inputOpt).toString());

        if (!file.isDirectory()) {
            throw new IllegalArgumentException(file + " does not exist or is not a directory");
        }

        long maxDocs = Long.MAX_VALUE;
        if (cmdLine.hasOption(maxOpt)) {
            maxDocs = Long.parseLong(cmdLine.getValue(maxOpt).toString());
        }

        if (maxDocs < 0) {
            throw new IllegalArgumentException("maxDocs must be >= 0");
        }

        String field = cmdLine.getValue(fieldOpt).toString();

        PrintWriter out = null;
        if (cmdLine.hasOption(outputOpt)) {
            out = new PrintWriter(new FileWriter(cmdLine.getValue(outputOpt).toString()));
        } else {
            out = new PrintWriter(new OutputStreamWriter(System.out, "UTF-8"));
        }

        File output = new File("/home/drew/taming-text/delicious/training");
        output.mkdirs();

        emitTextForTags(file, output);

        IOUtils.close(Collections.singleton(out));
    } catch (OptionException e) {
        log.error("Exception", e);
        CommandLineUtil.printHelp(group);
    }

}

From source file:com.tamingtext.qa.WikipediaWexIndexer.java

public static void main(String[] args) throws Exception {
    DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
    ArgumentBuilder abuilder = new ArgumentBuilder();
    GroupBuilder gbuilder = new GroupBuilder();

    Option wikipediaFileOpt = obuilder.withLongName("wikiFile").withRequired(true)
            .withArgument(abuilder.withName("wikiFile").withMinimum(1).withMaximum(1).create())
            .withDescription("The path to the wikipedia dump file. "
                    + "May be a directory containing wikipedia dump files. "
                    + "If a directory is specified, files starting with the prefix "
                    + "freebase-segment- are used.")
            .withShortName("w").create();

    Option numDocsOpt = obuilder.withLongName("numDocs").withRequired(false)
            .withArgument(abuilder.withName("numDocs").withMinimum(1).withMaximum(1).create())
            .withDescription("The number of docs to index").withShortName("n").create();

    Option solrURLOpt = obuilder.withLongName("solrURL").withRequired(false)
            .withArgument(abuilder.withName("solrURL").withMinimum(1).withMaximum(1).create())
            .withDescription("The URL where Solr lives").withShortName("s").create();

    Option solrBatchOpt = obuilder.withLongName("batch").withRequired(false)
            .withArgument(abuilder.withName("batch").withMinimum(1).withMaximum(1).create())
            .withDescription("The number of docs to include in each indexing batch").withShortName("b")
            .create();/* www  .  ja  v a2s .  c  o  m*/

    Option helpOpt = obuilder.withLongName("help").withDescription("Print out help").withShortName("h")
            .create();

    Group group = gbuilder.withName("Options").withOption(wikipediaFileOpt).withOption(numDocsOpt)
            .withOption(solrURLOpt).withOption(solrBatchOpt).withOption(helpOpt).create();

    Parser parser = new Parser();
    parser.setGroup(group);

    try {
        CommandLine cmdLine = parser.parse(args);

        if (cmdLine.hasOption(helpOpt)) {
            CommandLineUtil.printHelp(group);
            return;
        }

        File file;
        file = new File(cmdLine.getValue(wikipediaFileOpt).toString());
        File[] dumpFiles;
        if (file.isDirectory()) {
            dumpFiles = file.listFiles(new FilenameFilter() {
                public boolean accept(File file, String s) {
                    return s.startsWith("freebase-segment-");
                }
            });
        } else {
            dumpFiles = new File[] { file };
        }

        int numDocs = Integer.MAX_VALUE;
        if (cmdLine.hasOption(numDocsOpt)) {
            numDocs = Integer.parseInt(cmdLine.getValue(numDocsOpt).toString());
        }
        String url = DEFAULT_SOLR_URL;
        if (cmdLine.hasOption(solrURLOpt)) {
            url = cmdLine.getValue(solrURLOpt).toString();
        }
        int batch = 100;
        if (cmdLine.hasOption(solrBatchOpt)) {
            batch = Integer.parseInt(cmdLine.getValue(solrBatchOpt).toString());
        }
        WikipediaWexIndexer indexer = new WikipediaWexIndexer(new CommonsHttpSolrServer(url));
        int total = 0;
        for (int i = 0; i < dumpFiles.length && total < numDocs; i++) {
            File dumpFile = dumpFiles[i];
            log.info("Indexing: " + file + " Num files to index: " + (numDocs - total));
            long start = System.currentTimeMillis();
            int totalFile = indexer.index(dumpFile, numDocs - total, batch);
            long finish = System.currentTimeMillis();
            if (log.isInfoEnabled()) {
                log.info("Indexing " + dumpFile + " took " + (finish - start) + " ms");
            }
            total += totalFile;
            log.info("Done Indexing: " + file + ". Indexed " + totalFile + " docs for that file and " + total
                    + " overall.");

        }
        log.info("Indexed " + total + " docs overall.");
    } catch (OptionException e) {
        log.error("Exception", e);
        CommandLineUtil.printHelp(group);
        return;
    }
}