Example usage for org.apache.commons.cli2 CommandLine getValue

Introduction

In this page you can find the example usage for org.apache.commons.cli2 CommandLine getValue.

Prototype

Object getValue(final Option option) throws IllegalStateException;

Source Link

Document

Retrieves the single Argument value associated with the specified Option

Usage

From source file:com.grantingersoll.intell.index.Indexer.java

public static void main(String[] args) throws Exception {
    DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
    ArgumentBuilder abuilder = new ArgumentBuilder();
    GroupBuilder gbuilder = new GroupBuilder();

    Option wikipediaFileOpt = obuilder.withLongName("wikiFile").withRequired(true)
            .withArgument(abuilder.withName("wikiFile").withMinimum(1).withMaximum(1).create())
            .withDescription(//from   w w  w  . ja  va2s.c  om
                    "The path to the wikipedia dump file.  Maybe a directory containing wikipedia dump files."
                            + "  If a directory is specified, only .xml files are used.")
            .withShortName("w").create();

    Option numDocsOpt = obuilder.withLongName("numDocs").withRequired(false)
            .withArgument(abuilder.withName("numDocs").withMinimum(1).withMaximum(1).create())
            .withDescription("The number of docs to index").withShortName("n").create();

    Option solrURLOpt = obuilder.withLongName("solrURL").withRequired(false)
            .withArgument(abuilder.withName("solrURL").withMinimum(1).withMaximum(1).create())
            .withDescription("The URL where Solr lives").withShortName("s").create();

    Option solrBatchOpt = obuilder.withLongName("batch").withRequired(false)
            .withArgument(abuilder.withName("batch").withMinimum(1).withMaximum(1).create())
            .withDescription("The number of docs to include in each indexing batch").withShortName("b")
            .create();

    Group group = gbuilder.withName("Options").withOption(wikipediaFileOpt).withOption(numDocsOpt)
            .withOption(solrURLOpt).withOption(solrBatchOpt).create();

    Parser parser = new Parser();
    parser.setGroup(group);
    CommandLine cmdLine = parser.parse(args);

    File file;
    file = new File(cmdLine.getValue(wikipediaFileOpt).toString());
    File[] dumpFiles;
    if (file.isDirectory()) {
        dumpFiles = file.listFiles(new FilenameFilter() {
            public boolean accept(File file, String s) {
                return s.endsWith(".xml");
            }
        });
    } else {
        dumpFiles = new File[] { file };
    }

    int numDocs = Integer.MAX_VALUE;
    if (cmdLine.hasOption(numDocsOpt)) {
        numDocs = Integer.parseInt(cmdLine.getValue(numDocsOpt).toString());
    }
    String url = DEFAULT_SOLR_URL;
    if (cmdLine.hasOption(solrURLOpt)) {
        url = cmdLine.getValue(solrURLOpt).toString();
    }
    int batch = 100;
    if (cmdLine.hasOption(solrBatchOpt)) {
        batch = Integer.parseInt(cmdLine.getValue(solrBatchOpt).toString());
    }
    Indexer indexer = new Indexer(new CommonsHttpSolrServer(url));
    int total = 0;
    for (int i = 0; i < dumpFiles.length && total < numDocs; i++) {
        File dumpFile = dumpFiles[i];
        log.info("Indexing: " + file + " Num files to index: " + (numDocs - total));
        long start = System.currentTimeMillis();
        int totalFile = indexer.index(dumpFile, numDocs - total, batch);
        long finish = System.currentTimeMillis();
        if (log.isInfoEnabled()) {
            log.info("Indexing " + dumpFile + " took " + (finish - start) + " ms");
        }
        total += totalFile;
        log.info("Done Indexing: " + file + ". Indexed " + totalFile + " docs for that file and " + total
                + " overall.");

    }
    log.info("Indexed " + total + " docs overall.");
}

From source file:com.tamingtext.qa.WikipediaIndexer.java

public static void main(String[] args) throws Exception {
    DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
    ArgumentBuilder abuilder = new ArgumentBuilder();
    GroupBuilder gbuilder = new GroupBuilder();

    Option wikipediaFileOpt = obuilder.withLongName("wikiFile").withRequired(true)
            .withArgument(abuilder.withName("wikiFile").withMinimum(1).withMaximum(1).create())
            .withDescription(/*w  w  w . j  a v  a 2 s  .  c om*/
                    "The path to the wikipedia dump file.  Maybe a directory containing wikipedia dump files."
                            + "  If a directory is specified, only .xml files are used.")
            .withShortName("w").create();

    Option numDocsOpt = obuilder.withLongName("numDocs").withRequired(false)
            .withArgument(abuilder.withName("numDocs").withMinimum(1).withMaximum(1).create())
            .withDescription("The number of docs to index").withShortName("n").create();

    Option solrURLOpt = obuilder.withLongName("solrURL").withRequired(false)
            .withArgument(abuilder.withName("solrURL").withMinimum(1).withMaximum(1).create())
            .withDescription("The URL where Solr lives").withShortName("s").create();

    Option solrBatchOpt = obuilder.withLongName("batch").withRequired(false)
            .withArgument(abuilder.withName("batch").withMinimum(1).withMaximum(1).create())
            .withDescription("The number of docs to include in each indexing batch").withShortName("b")
            .create();

    Group group = gbuilder.withName("Options").withOption(wikipediaFileOpt).withOption(numDocsOpt)
            .withOption(solrURLOpt).withOption(solrBatchOpt).create();

    Parser parser = new Parser();
    parser.setGroup(group);
    CommandLine cmdLine = parser.parse(args);

    File file;
    file = new File(cmdLine.getValue(wikipediaFileOpt).toString());
    File[] dumpFiles;
    if (file.isDirectory()) {
        dumpFiles = file.listFiles(new FilenameFilter() {
            public boolean accept(File file, String s) {
                return s.endsWith(".xml");
            }
        });
    } else {
        dumpFiles = new File[] { file };
    }

    int numDocs = Integer.MAX_VALUE;
    if (cmdLine.hasOption(numDocsOpt)) {
        numDocs = Integer.parseInt(cmdLine.getValue(numDocsOpt).toString());
    }
    String url = DEFAULT_SOLR_URL;
    if (cmdLine.hasOption(solrURLOpt)) {
        url = cmdLine.getValue(solrURLOpt).toString();
    }
    int batch = 100;
    if (cmdLine.hasOption(solrBatchOpt)) {
        batch = Integer.parseInt(cmdLine.getValue(solrBatchOpt).toString());
    }
    WikipediaIndexer indexer = new WikipediaIndexer(new CommonsHttpSolrServer(url));
    int total = 0;
    for (int i = 0; i < dumpFiles.length && total < numDocs; i++) {
        File dumpFile = dumpFiles[i];
        log.info("Indexing: " + file + " Num files to index: " + (numDocs - total));
        long start = System.currentTimeMillis();
        int totalFile = indexer.index(dumpFile, numDocs - total, batch);
        long finish = System.currentTimeMillis();
        if (log.isInfoEnabled()) {
            log.info("Indexing " + dumpFile + " took " + (finish - start) + " ms");
        }
        total += totalFile;
        log.info("Done Indexing: " + file + ". Indexed " + totalFile + " docs for that file and " + total
                + " overall.");

    }
    log.info("Indexed " + total + " docs overall.");
}

From source file:com.tamingtext.classifier.maxent.TrainMaxent.java

public static void main(String[] args) throws Exception {

    DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
    ArgumentBuilder abuilder = new ArgumentBuilder();
    GroupBuilder gbuilder = new GroupBuilder();

    Option helpOpt = DefaultOptionCreator.helpOption();

    Option inputDirOpt = obuilder.withLongName("input").withRequired(true)
            .withArgument(abuilder.withName("input").withMinimum(1).withMaximum(1).create())
            .withDescription("The input directory, containing properly formatted files: "
                    + "One doc per line, first entry on the line is the label, rest is the evidence")
            .withShortName("i").create();

    Option outputOpt = obuilder.withLongName("output").withRequired(true)
            .withArgument(abuilder.withName("output").withMinimum(1).withMaximum(1).create())
            .withDescription("The output directory").withShortName("o").create();

    Group group = gbuilder.withName("Options").withOption(helpOpt).withOption(inputDirOpt).withOption(outputOpt)
            .create();//  www .java2s.co m

    //.withOption(gramSizeOpt).withOption(typeOpt)

    try {
        Parser parser = new Parser();

        parser.setGroup(group);
        parser.setHelpOption(helpOpt);
        CommandLine cmdLine = parser.parse(args);
        if (cmdLine.hasOption(helpOpt)) {
            CommandLineUtil.printHelp(group);
            return;
        }

        String inputPath = (String) cmdLine.getValue(inputDirOpt);
        String outputPath = (String) cmdLine.getValue(outputOpt);
        TrainMaxent trainer = new TrainMaxent();
        trainer.train(inputPath, outputPath);
    } catch (OptionException e) {
        log.error("Error while parsing options", e);
    }
}

From source file:net.hubs1.mahout.cluster.CRLFSeparatedToSequenceFile.java

/**
 * Takes in two arguments:/*from   w  ww .ja v  a 2  s. c  o  m*/
 * <ol>
 * <li>The input {@link org.apache.hadoop.fs.Path} where the input documents live</li>
 * <li>The output {@link org.apache.hadoop.fs.Path} where to write the classifier as a
 * {@link org.apache.hadoop.io.SequenceFile}</li>
 * </ol>
 */
public static void main(String[] args) throws IOException {

    GroupBuilder gbuilder = new GroupBuilder();

    Option dirInputPathOpt = DefaultOptionCreator.inputOption().create();

    Option dirOutputPathOpt = DefaultOptionCreator.outputOption().create();

    Option helpOpt = DefaultOptionCreator.helpOption();

    Group group = gbuilder.withName("Options").withOption(dirInputPathOpt).withOption(dirOutputPathOpt)
            .withOption(helpOpt).create();

    Parser parser = new Parser();
    parser.setGroup(group);
    parser.setHelpOption(helpOpt);
    try {
        CommandLine cmdLine = parser.parse(args);
        if (cmdLine.hasOption(helpOpt)) {
            CommandLineUtil.printHelp(group);
            return;
        }

        String inputPath = (String) cmdLine.getValue(dirInputPathOpt);
        String outputPath = (String) cmdLine.getValue(dirOutputPathOpt);

        runJob(inputPath, outputPath);
    } catch (OptionException e) {
        log.error("Exception", e);
        CommandLineUtil.printHelp(group);
    } catch (InterruptedException e) {
        log.error("Exception", e);
        CommandLineUtil.printHelp(group);
    } catch (ClassNotFoundException e) {
        log.error("Exception", e);
        CommandLineUtil.printHelp(group);
    }
}

From source file:imageClassify.Describe.java

public static void main(String[] args) throws IOException, DescriptorException {

    DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
    ArgumentBuilder abuilder = new ArgumentBuilder();
    GroupBuilder gbuilder = new GroupBuilder();

    Option pathOpt = obuilder.withLongName("path").withShortName("p").withRequired(true)
            .withArgument(abuilder.withName("path").withMinimum(1).withMaximum(1).create())
            .withDescription("Data path").create();

    Option descriptorOpt = obuilder.withLongName("descriptor").withShortName("d").withRequired(true)
            .withArgument(abuilder.withName("descriptor").withMinimum(1).create())
            .withDescription("data descriptor").create();

    Option descPathOpt = obuilder.withLongName("file").withShortName("f").withRequired(true)
            .withArgument(abuilder.withName("file").withMinimum(1).withMaximum(1).create())
            .withDescription("Path to generated descriptor file").create();

    Option regOpt = obuilder.withLongName("regression").withDescription("Regression Problem").withShortName("r")
            .create();//from  ww  w .j  a  v  a 2  s  . c  o  m

    Option helpOpt = obuilder.withLongName("help").withDescription("Print out help").withShortName("h")
            .create();

    Group group = gbuilder.withName("Options").withOption(pathOpt).withOption(descPathOpt)
            .withOption(descriptorOpt).withOption(regOpt).withOption(helpOpt).create();

    try {
        Parser parser = new Parser();
        parser.setGroup(group);
        CommandLine cmdLine = parser.parse(args);

        if (cmdLine.hasOption(helpOpt)) {
            CommandLineUtil.printHelp(group);
            return;
        }

        String dataPath = cmdLine.getValue(pathOpt).toString();
        String descPath = cmdLine.getValue(descPathOpt).toString();
        List<String> descriptor = convert(cmdLine.getValues(descriptorOpt));
        boolean regression = cmdLine.hasOption(regOpt);

        log.debug("Data path : {}", dataPath);
        log.debug("Descriptor path : {}", descPath);
        log.debug("Descriptor : {}", descriptor);
        log.debug("Regression : {}", regression);

        runTool(dataPath, descriptor, descPath, regression);
    } catch (OptionException e) {
        log.warn(e.toString());
        CommandLineUtil.printHelp(group);
    }
}

From source file:com.tamingtext.classifier.bayes.ClassifyDocument.java

public static void main(String[] args) {
    log.info("Command-line arguments: " + Arrays.toString(args));

    DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
    ArgumentBuilder abuilder = new ArgumentBuilder();
    GroupBuilder gbuilder = new GroupBuilder();

    Option inputOpt = obuilder.withLongName("input").withRequired(true)
            .withArgument(abuilder.withName("input").withMinimum(1).withMaximum(1).create())
            .withDescription("Input file").withShortName("i").create();

    Option modelOpt = obuilder.withLongName("model").withRequired(true)
            .withArgument(abuilder.withName("model").withMinimum(1).withMaximum(1).create())
            .withDescription("Model to use when classifying data").withShortName("m").create();

    Option helpOpt = obuilder.withLongName("help").withDescription("Print out help").withShortName("h")
            .create();//from  w ww. j  a v  a2  s  . co  m

    Group group = gbuilder.withName("Options").withOption(inputOpt).withOption(modelOpt).withOption(helpOpt)
            .create();

    try {
        Parser parser = new Parser();
        parser.setGroup(group);
        CommandLine cmdLine = parser.parse(args);

        if (cmdLine.hasOption(helpOpt)) {
            CommandLineUtil.printHelp(group);
            return;
        }

        File inputFile = new File(cmdLine.getValue(inputOpt).toString());

        if (!inputFile.isFile()) {
            throw new IllegalArgumentException(inputFile + " does not exist or is not a file");
        }

        File modelDir = new File(cmdLine.getValue(modelOpt).toString());

        if (!modelDir.isDirectory()) {
            throw new IllegalArgumentException(modelDir + " does not exist or is not a directory");
        }

        BayesParameters p = new BayesParameters();
        p.set("basePath", modelDir.getCanonicalPath());
        Datastore ds = new InMemoryBayesDatastore(p);
        Algorithm a = new BayesAlgorithm();
        ClassifierContext ctx = new ClassifierContext(a, ds);
        ctx.initialize();

        //TODO: make the analyzer configurable
        StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_36);
        TokenStream ts = analyzer.tokenStream(null,
                new InputStreamReader(new FileInputStream(inputFile), "UTF-8"));

        ArrayList<String> tokens = new ArrayList<String>(1000);
        while (ts.incrementToken()) {
            tokens.add(ts.getAttribute(CharTermAttribute.class).toString());
        }
        String[] document = tokens.toArray(new String[tokens.size()]);

        ClassifierResult[] cr = ctx.classifyDocument(document, "unknown", 5);

        for (ClassifierResult r : cr) {
            System.err.println(r.getLabel() + "\t" + r.getScore());
        }
    } catch (OptionException e) {
        log.error("Exception", e);
        CommandLineUtil.printHelp(group);
    } catch (IOException e) {
        log.error("IOException", e);
    } catch (InvalidDatastoreException e) {
        log.error("InvalidDataStoreException", e);
    } finally {

    }
}

From source file:com.tamingtext.classifier.maxent.TestMaxent.java

/**
 * @param args//ww  w  .  j  a v a 2s .c o  m
 */
public static void main(String[] args) throws IOException {
    DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
    ArgumentBuilder abuilder = new ArgumentBuilder();
    GroupBuilder gbuilder = new GroupBuilder();

    Option helpOpt = DefaultOptionCreator.helpOption();

    Option inputDirOpt = obuilder.withLongName("input").withRequired(true)
            .withArgument(abuilder.withName("input").withMinimum(1).withMaximum(1).create())
            .withDescription("The input directory").withShortName("i").create();

    Option modelOpt = obuilder.withLongName("model").withRequired(true)
            .withArgument(abuilder.withName("index").withMinimum(1).withMaximum(1).create())
            .withDescription("The directory containing the index model").withShortName("m").create();

    Group group = gbuilder.withName("Options").withOption(helpOpt).withOption(inputDirOpt).withOption(modelOpt)
            .create();

    try {
        Parser parser = new Parser();

        parser.setGroup(group);
        parser.setHelpOption(helpOpt);
        CommandLine cmdLine = parser.parse(args);
        if (cmdLine.hasOption(helpOpt)) {
            CommandLineUtil.printHelp(group);
            return;
        }

        String inputPath = (String) cmdLine.getValue(inputDirOpt);
        File f = new File(inputPath);
        if (!f.isDirectory()) {
            throw new IllegalArgumentException(f + " is not a directory or does not exit");
        }
        File[] inputFiles = FileUtil.buildFileList(f);

        File modelDir = new File((String) cmdLine.getValue(modelOpt));
        execute(inputFiles, modelDir);
    } catch (OptionException e) {
        log.error("Error while parsing options", e);
    }

}

From source file:com.tamingtext.tagging.LuceneCategoryExtractor.java

public static void main(String[] args) throws IOException {
    DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
    ArgumentBuilder abuilder = new ArgumentBuilder();
    GroupBuilder gbuilder = new GroupBuilder();

    Option inputOpt = obuilder.withLongName("dir").withRequired(true)
            .withArgument(abuilder.withName("dir").withMinimum(1).withMaximum(1).create())
            .withDescription("The Lucene directory").withShortName("d").create();

    Option outputOpt = obuilder.withLongName("output").withRequired(false)
            .withArgument(abuilder.withName("output").withMinimum(1).withMaximum(1).create())
            .withDescription("The output directory").withShortName("o").create();

    Option maxOpt = obuilder.withLongName("max").withRequired(false)
            .withArgument(abuilder.withName("max").withMinimum(1).withMaximum(1).create())
            .withDescription(/*from  w w  w.  ja v  a2s.  c o m*/
                    "The maximum number of documents to analyze.  If not specified, then it will loop over all docs")
            .withShortName("m").create();

    Option fieldOpt = obuilder.withLongName("field").withRequired(true)
            .withArgument(abuilder.withName("field").withMinimum(1).withMaximum(1).create())
            .withDescription("The field in the index").withShortName("f").create();

    Option helpOpt = obuilder.withLongName("help").withDescription("Print out help").withShortName("h")
            .create();

    Group group = gbuilder.withName("Options").withOption(inputOpt).withOption(outputOpt).withOption(maxOpt)
            .withOption(fieldOpt).create();

    try {
        Parser parser = new Parser();
        parser.setGroup(group);
        CommandLine cmdLine = parser.parse(args);

        if (cmdLine.hasOption(helpOpt)) {
            CommandLineUtil.printHelp(group);
            return;
        }

        File inputDir = new File(cmdLine.getValue(inputOpt).toString());

        if (!inputDir.isDirectory()) {
            throw new IllegalArgumentException(inputDir + " does not exist or is not a directory");
        }

        long maxDocs = Long.MAX_VALUE;
        if (cmdLine.hasOption(maxOpt)) {
            maxDocs = Long.parseLong(cmdLine.getValue(maxOpt).toString());
        }

        if (maxDocs < 0) {
            throw new IllegalArgumentException("maxDocs must be >= 0");
        }

        String field = cmdLine.getValue(fieldOpt).toString();

        PrintWriter out = null;
        if (cmdLine.hasOption(outputOpt)) {
            out = new PrintWriter(new FileWriter(cmdLine.getValue(outputOpt).toString()));
        } else {
            out = new PrintWriter(new OutputStreamWriter(System.out, "UTF-8"));
        }

        dumpDocumentFields(inputDir, field, maxDocs, out);

        IOUtils.close(Collections.singleton(out));
    } catch (OptionException e) {
        log.error("Exception", e);
        CommandLineUtil.printHelp(group);
    }

}

From source file:com.tamingtext.tagging.LuceneTagExtractor.java

public static void main(String[] args) throws IOException {
    DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
    ArgumentBuilder abuilder = new ArgumentBuilder();
    GroupBuilder gbuilder = new GroupBuilder();

    Option inputOpt = obuilder.withLongName("dir").withRequired(true)
            .withArgument(abuilder.withName("dir").withMinimum(1).withMaximum(1).create())
            .withDescription("The Lucene directory").withShortName("d").create();

    Option outputOpt = obuilder.withLongName("output").withRequired(false)
            .withArgument(abuilder.withName("output").withMinimum(1).withMaximum(1).create())
            .withDescription("The output directory").withShortName("o").create();

    Option maxOpt = obuilder.withLongName("max").withRequired(false)
            .withArgument(abuilder.withName("max").withMinimum(1).withMaximum(1).create())
            .withDescription(/*from   w  w  w .  j a  va  2s .c  o m*/
                    "The maximum number of vectors to output.  If not specified, then it will loop over all docs")
            .withShortName("m").create();

    Option fieldOpt = obuilder.withLongName("field").withRequired(true)
            .withArgument(abuilder.withName("field").withMinimum(1).withMaximum(1).create())
            .withDescription("The field in the index").withShortName("f").create();

    Option helpOpt = obuilder.withLongName("help").withDescription("Print out help").withShortName("h")
            .create();

    Group group = gbuilder.withName("Options").withOption(inputOpt).withOption(outputOpt).withOption(maxOpt)
            .withOption(fieldOpt).create();

    try {
        Parser parser = new Parser();
        parser.setGroup(group);
        CommandLine cmdLine = parser.parse(args);

        if (cmdLine.hasOption(helpOpt)) {
            CommandLineUtil.printHelp(group);
            return;
        }

        File file = new File(cmdLine.getValue(inputOpt).toString());

        if (!file.isDirectory()) {
            throw new IllegalArgumentException(file + " does not exist or is not a directory");
        }

        long maxDocs = Long.MAX_VALUE;
        if (cmdLine.hasOption(maxOpt)) {
            maxDocs = Long.parseLong(cmdLine.getValue(maxOpt).toString());
        }

        if (maxDocs < 0) {
            throw new IllegalArgumentException("maxDocs must be >= 0");
        }

        String field = cmdLine.getValue(fieldOpt).toString();

        PrintWriter out = null;
        if (cmdLine.hasOption(outputOpt)) {
            out = new PrintWriter(new FileWriter(cmdLine.getValue(outputOpt).toString()));
        } else {
            out = new PrintWriter(new OutputStreamWriter(System.out, "UTF-8"));
        }

        File output = new File("/home/drew/taming-text/delicious/training");
        output.mkdirs();

        emitTextForTags(file, output);

        IOUtils.close(Collections.singleton(out));
    } catch (OptionException e) {
        log.error("Exception", e);
        CommandLineUtil.printHelp(group);
    }

}

From source file:com.tamingtext.qa.WikipediaWexIndexer.java

public static void main(String[] args) throws Exception {
    DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
    ArgumentBuilder abuilder = new ArgumentBuilder();
    GroupBuilder gbuilder = new GroupBuilder();

    Option wikipediaFileOpt = obuilder.withLongName("wikiFile").withRequired(true)
            .withArgument(abuilder.withName("wikiFile").withMinimum(1).withMaximum(1).create())
            .withDescription("The path to the wikipedia dump file. "
                    + "May be a directory containing wikipedia dump files. "
                    + "If a directory is specified, files starting with the prefix "
                    + "freebase-segment- are used.")
            .withShortName("w").create();

    Option numDocsOpt = obuilder.withLongName("numDocs").withRequired(false)
            .withArgument(abuilder.withName("numDocs").withMinimum(1).withMaximum(1).create())
            .withDescription("The number of docs to index").withShortName("n").create();

    Option solrURLOpt = obuilder.withLongName("solrURL").withRequired(false)
            .withArgument(abuilder.withName("solrURL").withMinimum(1).withMaximum(1).create())
            .withDescription("The URL where Solr lives").withShortName("s").create();

    Option solrBatchOpt = obuilder.withLongName("batch").withRequired(false)
            .withArgument(abuilder.withName("batch").withMinimum(1).withMaximum(1).create())
            .withDescription("The number of docs to include in each indexing batch").withShortName("b")
            .create();/* www  .  ja  v a2s .  c  o  m*/

    Option helpOpt = obuilder.withLongName("help").withDescription("Print out help").withShortName("h")
            .create();

    Group group = gbuilder.withName("Options").withOption(wikipediaFileOpt).withOption(numDocsOpt)
            .withOption(solrURLOpt).withOption(solrBatchOpt).withOption(helpOpt).create();

    Parser parser = new Parser();
    parser.setGroup(group);

    try {
        CommandLine cmdLine = parser.parse(args);

        if (cmdLine.hasOption(helpOpt)) {
            CommandLineUtil.printHelp(group);
            return;
        }

        File file;
        file = new File(cmdLine.getValue(wikipediaFileOpt).toString());
        File[] dumpFiles;
        if (file.isDirectory()) {
            dumpFiles = file.listFiles(new FilenameFilter() {
                public boolean accept(File file, String s) {
                    return s.startsWith("freebase-segment-");
                }
            });
        } else {
            dumpFiles = new File[] { file };
        }

        int numDocs = Integer.MAX_VALUE;
        if (cmdLine.hasOption(numDocsOpt)) {
            numDocs = Integer.parseInt(cmdLine.getValue(numDocsOpt).toString());
        }
        String url = DEFAULT_SOLR_URL;
        if (cmdLine.hasOption(solrURLOpt)) {
            url = cmdLine.getValue(solrURLOpt).toString();
        }
        int batch = 100;
        if (cmdLine.hasOption(solrBatchOpt)) {
            batch = Integer.parseInt(cmdLine.getValue(solrBatchOpt).toString());
        }
        WikipediaWexIndexer indexer = new WikipediaWexIndexer(new CommonsHttpSolrServer(url));
        int total = 0;
        for (int i = 0; i < dumpFiles.length && total < numDocs; i++) {
            File dumpFile = dumpFiles[i];
            log.info("Indexing: " + file + " Num files to index: " + (numDocs - total));
            long start = System.currentTimeMillis();
            int totalFile = indexer.index(dumpFile, numDocs - total, batch);
            long finish = System.currentTimeMillis();
            if (log.isInfoEnabled()) {
                log.info("Indexing " + dumpFile + " took " + (finish - start) + " ms");
            }
            total += totalFile;
            log.info("Done Indexing: " + file + ". Indexed " + totalFile + " docs for that file and " + total
                    + " overall.");

        }
        log.info("Indexed " + total + " docs overall.");
    } catch (OptionException e) {
        log.error("Exception", e);
        CommandLineUtil.printHelp(group);
        return;
    }
}