List of usage examples for org.apache.commons.cli PosixParser parse
public CommandLine parse(Options options, String[] arguments, Properties properties) throws ParseException
From source file:com.cyberway.issue.io.Arc2Warc.java
/** * Command-line interface to Arc2Warc.// ww w . j av a2 s . c om * * @param args Command-line arguments. * @throws ParseException Failed parse of the command line. * @throws IOException * @throws java.text.ParseException */ public static void main(String[] args) throws ParseException, IOException, java.text.ParseException { Options options = new Options(); options.addOption(new Option("h", "help", false, "Prints this message and exits.")); options.addOption(new Option("f", "force", false, "Force overwrite of target file.")); PosixParser parser = new PosixParser(); CommandLine cmdline = parser.parse(options, args, false); List cmdlineArgs = cmdline.getArgList(); Option[] cmdlineOptions = cmdline.getOptions(); HelpFormatter formatter = new HelpFormatter(); // If no args, print help. if (cmdlineArgs.size() <= 0) { usage(formatter, options, 0); } // Now look at options passed. boolean force = false; for (int i = 0; i < cmdlineOptions.length; i++) { switch (cmdlineOptions[i].getId()) { case 'h': usage(formatter, options, 0); break; case 'f': force = true; break; default: throw new RuntimeException("Unexpected option: " + +cmdlineOptions[i].getId()); } } // If no args, print help. if (cmdlineArgs.size() != 2) { usage(formatter, options, 0); } (new Arc2Warc()).transform(new File(cmdlineArgs.get(0).toString()), new File(cmdlineArgs.get(1).toString()), force); }
From source file:com.cyberway.issue.io.Warc2Arc.java
/** * Command-line interface to Arc2Warc./* ww w .j ava2 s .c o m*/ * * @param args Command-line arguments. * @throws ParseException Failed parse of the command line. * @throws IOException * @throws java.text.ParseException */ public static void main(String[] args) throws ParseException, IOException, java.text.ParseException { Options options = new Options(); options.addOption(new Option("h", "help", false, "Prints this message and exits.")); options.addOption(new Option("f", "force", false, "Force overwrite of target file.")); options.addOption( new Option("p", "prefix", true, "Prefix to use on created ARC files, else uses default.")); options.addOption( new Option("s", "suffix", true, "Suffix to use on created ARC files, else uses default.")); PosixParser parser = new PosixParser(); CommandLine cmdline = parser.parse(options, args, false); List cmdlineArgs = cmdline.getArgList(); Option[] cmdlineOptions = cmdline.getOptions(); HelpFormatter formatter = new HelpFormatter(); // If no args, print help. if (cmdlineArgs.size() < 0) { usage(formatter, options, 0); } // Now look at options passed. boolean force = false; String prefix = "WARC2ARC"; String suffix = null; for (int i = 0; i < cmdlineOptions.length; i++) { switch (cmdlineOptions[i].getId()) { case 'h': usage(formatter, options, 0); break; case 'f': force = true; break; case 'p': prefix = cmdlineOptions[i].getValue(); break; case 's': suffix = cmdlineOptions[i].getValue(); break; default: throw new RuntimeException("Unexpected option: " + +cmdlineOptions[i].getId()); } } // If no args, print help. if (cmdlineArgs.size() != 2) { usage(formatter, options, 0); } (new Warc2Arc()).transform(new File(cmdlineArgs.get(0).toString()), new File(cmdlineArgs.get(1).toString()), prefix, suffix, force); }
From source file:com.cyberway.issue.crawler.extractor.ExtractorTool.java
public static void main(String[] args) throws Exception { Options options = new Options(); options.addOption(new Option("h", "help", false, "Prints this message and exits.")); StringBuffer defaultExtractors = new StringBuffer(); for (int i = 0; i < DEFAULT_EXTRACTORS.length; i++) { if (i > 0) { defaultExtractors.append(", "); }/*from ww w . jav a2s.com*/ defaultExtractors.append(DEFAULT_EXTRACTORS[i]); } options.addOption(new Option("e", "extractor", true, "List of comma-separated extractor class names. " + "Run in order listed. " + "If no extractors listed, runs following: " + defaultExtractors.toString() + ".")); options.addOption( new Option("s", "scratch", true, "Directory to write scratch files to. Default: '/tmp'.")); PosixParser parser = new PosixParser(); CommandLine cmdline = parser.parse(options, args, false); List cmdlineArgs = cmdline.getArgList(); Option[] cmdlineOptions = cmdline.getOptions(); HelpFormatter formatter = new HelpFormatter(); // If no args, print help. if (cmdlineArgs.size() <= 0) { usage(formatter, options, 0); } // Now look at options passed. String[] extractors = DEFAULT_EXTRACTORS; String scratch = null; for (int i = 0; i < cmdlineOptions.length; i++) { switch (cmdlineOptions[i].getId()) { case 'h': usage(formatter, options, 0); break; case 'e': String value = cmdlineOptions[i].getValue(); if (value == null || value.length() <= 0) { // Allow saying NO extractors so we can see // how much it costs just reading through // ARCs. extractors = new String[0]; } else { extractors = value.split(","); } break; case 's': scratch = cmdlineOptions[i].getValue(); break; default: throw new RuntimeException("Unexpected option: " + +cmdlineOptions[i].getId()); } } ExtractorTool tool = new ExtractorTool(extractors, scratch); for (Iterator i = cmdlineArgs.iterator(); i.hasNext();) { tool.extract((String) i.next()); } }
From source file:asl.seedscan.DQAWeb.java
public static void main(String args[]) { db = new MetricDatabase("", "", ""); findConsoleHandler();/*www . ja va2s .c o m*/ consoleHandler.setLevel(Level.ALL); Logger.getLogger("").setLevel(Level.CONFIG); // Default locations of config and schema files File configFile = new File("dqaweb-config.xml"); File schemaFile = new File("schemas/DQAWebConfig.xsd"); boolean parseConfig = true; boolean testMode = false; ArrayList<File> schemaFiles = new ArrayList<File>(); schemaFiles.add(schemaFile); // ==== Command Line Parsing ==== Options options = new Options(); Option opConfigFile = new Option("c", "config-file", true, "The config file to use for seedscan. XML format according to SeedScanConfig.xsd."); Option opSchemaFile = new Option("s", "schema-file", true, "The schame file which should be used to verify the config file format. "); Option opTest = new Option("t", "test", false, "Run in test console mode rather than as a servlet."); OptionGroup ogConfig = new OptionGroup(); ogConfig.addOption(opConfigFile); OptionGroup ogSchema = new OptionGroup(); ogConfig.addOption(opSchemaFile); OptionGroup ogTest = new OptionGroup(); ogTest.addOption(opTest); options.addOptionGroup(ogConfig); options.addOptionGroup(ogSchema); options.addOptionGroup(ogTest); PosixParser optParser = new PosixParser(); CommandLine cmdLine = null; try { cmdLine = optParser.parse(options, args, true); } catch (org.apache.commons.cli.ParseException e) { logger.severe("Error while parsing command-line arguments."); System.exit(1); } Option opt; Iterator iter = cmdLine.iterator(); while (iter.hasNext()) { opt = (Option) iter.next(); if (opt.getOpt().equals("c")) { configFile = new File(opt.getValue()); } else if (opt.getOpt().equals("s")) { schemaFile = new File(opt.getValue()); } else if (opt.getOpt().equals("t")) { testMode = true; } } String query = ""; System.out.println("Entering Test Mode"); System.out.println("Enter a query string to view results or type \"help\" for example query strings"); InputStreamReader input = new InputStreamReader(System.in); BufferedReader reader = new BufferedReader(input); String result = ""; while (testMode == true) { try { System.out.printf("Query: "); query = reader.readLine(); if (query.equals("exit")) { testMode = false; } else if (query.equals("help")) { System.out.println("Need to add some help for people"); //TODO } else { result = processCommand(query); } System.out.println(result); } catch (IOException err) { System.err.println("Error reading line, in DQAWeb.java"); } } System.err.printf("DONE.\n"); }
From source file:com.cyberway.issue.io.warc.WARCReader.java
/** * Command-line interface to WARCReader. * * Here is the command-line interface:/* w w w. ja va 2s.c om*/ * <pre> * usage: java com.cyberway.issue.io.arc.WARCReader [--offset=#] ARCFILE * -h,--help Prints this message and exits. * -o,--offset Outputs record at this offset into arc file.</pre> * * <p>Outputs using a pseudo-CDX format as described here: * <a href="http://www.archive.org/web/researcher/cdx_legend.php">CDX * Legent</a> and here * <a href="http://www.archive.org/web/researcher/example_cdx.php">Example</a>. * Legend used in below is: 'CDX b e a m s c V (or v if uncompressed) n g'. * Hash is hard-coded straight SHA-1 hash of content. * * @param args Command-line arguments. * @throws ParseException Failed parse of the command line. * @throws IOException * @throws java.text.ParseException */ public static void main(String[] args) throws ParseException, IOException, java.text.ParseException { Options options = getOptions(); PosixParser parser = new PosixParser(); CommandLine cmdline = parser.parse(options, args, false); List cmdlineArgs = cmdline.getArgList(); Option[] cmdlineOptions = cmdline.getOptions(); HelpFormatter formatter = new HelpFormatter(); // If no args, print help. if (cmdlineArgs.size() <= 0) { usage(formatter, options, 0); } // Now look at options passed. long offset = -1; boolean digest = false; boolean strict = false; String format = CDX; for (int i = 0; i < cmdlineOptions.length; i++) { switch (cmdlineOptions[i].getId()) { case 'h': usage(formatter, options, 0); break; case 'o': offset = Long.parseLong(cmdlineOptions[i].getValue()); break; case 's': strict = true; break; case 'd': digest = getTrueOrFalse(cmdlineOptions[i].getValue()); break; case 'f': format = cmdlineOptions[i].getValue().toLowerCase(); boolean match = false; // List of supported formats. final String[] supportedFormats = { CDX, DUMP, GZIP_DUMP, CDX_FILE }; for (int ii = 0; ii < supportedFormats.length; ii++) { if (supportedFormats[ii].equals(format)) { match = true; break; } } if (!match) { usage(formatter, options, 1); } break; default: throw new RuntimeException("Unexpected option: " + +cmdlineOptions[i].getId()); } } if (offset >= 0) { if (cmdlineArgs.size() != 1) { System.out.println("Error: Pass one arcfile only."); usage(formatter, options, 1); } WARCReader r = WARCReaderFactory.get(new File((String) cmdlineArgs.get(0)), offset); r.setStrict(strict); outputRecord(r, format); } else { for (Iterator i = cmdlineArgs.iterator(); i.hasNext();) { String urlOrPath = (String) i.next(); try { WARCReader r = WARCReaderFactory.get(urlOrPath); r.setStrict(strict); r.setDigest(digest); output(r, format); } catch (RuntimeException e) { // Write out name of file we failed on to help with // debugging. Then print stack trace and try to keep // going. We do this for case where we're being fed // a bunch of ARCs; just note the bad one and move // on to the next. System.err.println("Exception processing " + urlOrPath + ": " + e.getMessage()); e.printStackTrace(System.err); System.exit(1); } } } }
From source file:asl.seedscan.SeedScan.java
public static void main(String args[]) { // Default locations of config and schema files File configFile = new File("config.xml"); File schemaFile = new File("schemas/SeedScanConfig.xsd"); boolean parseConfig = true; ArrayList<File> schemaFiles = new ArrayList<File>(); schemaFiles.add(schemaFile);/*from ww w . java2s .c o m*/ // ==== Command Line Parsing ==== Options options = new Options(); Option opConfigFile = new Option("c", "config-file", true, "The config file to use for seedscan. XML format according to SeedScanConfig.xsd."); Option opSchemaFile = new Option("s", "schema-file", true, "The xsd schema file which should be used to verify the config file format. "); OptionGroup ogConfig = new OptionGroup(); ogConfig.addOption(opConfigFile); OptionGroup ogSchema = new OptionGroup(); ogConfig.addOption(opSchemaFile); options.addOptionGroup(ogConfig); options.addOptionGroup(ogSchema); PosixParser optParser = new PosixParser(); CommandLine cmdLine = null; try { cmdLine = optParser.parse(options, args, true); } catch (org.apache.commons.cli.ParseException e) { logger.error("Error while parsing command-line arguments."); System.exit(1); } Option opt; Iterator<?> iter = cmdLine.iterator(); while (iter.hasNext()) { opt = (Option) iter.next(); if (opt.getOpt().equals("c")) { configFile = new File(opt.getValue()); } else if (opt.getOpt().equals("s")) { schemaFile = new File(opt.getValue()); } } // ==== Configuration Read and Parse Actions ==== ConfigParser parser = new ConfigParser(schemaFiles); ConfigT config = parser.parseConfig(configFile); // Print out configuration file contents Formatter formatter = new Formatter(new StringBuilder(), Locale.US); // ===== CONFIG: LOCK FILE ===== File lockFile = new File(config.getLockfile()); logger.info("SeedScan lock file is '" + lockFile + "'"); LockFile lock = new LockFile(lockFile); if (!lock.acquire()) { logger.error("Could not acquire lock."); System.exit(1); } // ===== CONFIG: LOGGING ===== // MTH: This is now done in log4j.properties file // ===== CONFIG: DATABASE ===== MetricDatabase readDB = new MetricDatabase(config.getDatabase()); MetricDatabase writeDB = new MetricDatabase(config.getDatabase()); MetricReader reader = new MetricReader(readDB); MetricInjector injector = new MetricInjector(writeDB); // ===== CONFIG: SCANS ===== Hashtable<String, Scan> scans = new Hashtable<String, Scan>(); if (config.getScans().getScan() == null) { logger.error("No scans in configuration."); System.exit(1); } else { for (ScanT scanCfg : config.getScans().getScan()) { String name = scanCfg.getName(); if (scans.containsKey(name)) { logger.error("Duplicate scan name '" + name + "' encountered."); System.exit(1); } // This should really be handled by jaxb by setting it up in schemas/SeedScanConfig.xsd if (scanCfg.getStartDay() == null && scanCfg.getStartDate() == null) { logger.error( "== SeedScan Error: Must set EITHER cfg:start_day -OR- cfg:start_date in config.xml to start Scan!"); System.exit(1); } // Configure this Scan Scan scan = new Scan(scanCfg.getName()); scan.setPathPattern(scanCfg.getPath()); scan.setDatalessDir(scanCfg.getDatalessDir()); scan.setEventsDir(scanCfg.getEventsDir()); scan.setPlotsDir(scanCfg.getPlotsDir()); scan.setDaysToScan(scanCfg.getDaysToScan().intValue()); if (scanCfg.getStartDay() != null) { scan.setStartDay(scanCfg.getStartDay().intValue()); } if (scanCfg.getStartDate() != null) { scan.setStartDate(scanCfg.getStartDate().intValue()); } if (scanCfg.getNetworkSubset() != null) { logger.debug("Filter on Network Subset=[{}]", scanCfg.getNetworkSubset()); Filter filter = new Filter(false); for (String network : scanCfg.getNetworkSubset().split(",")) { logger.debug("Network =[{}]", network); filter.addFilter(network); } scan.setNetworks(filter); } if (scanCfg.getStationSubset() != null) { logger.debug("Filter on Station Subset=[{}]", scanCfg.getStationSubset()); Filter filter = new Filter(false); for (String station : scanCfg.getStationSubset().split(",")) { logger.debug("Station =[{}]", station); filter.addFilter(station); } scan.setStations(filter); } if (scanCfg.getLocationSubset() != null) { logger.debug("Filter on Location Subset=[{}]", scanCfg.getLocationSubset()); Filter filter = new Filter(false); for (String location : scanCfg.getLocationSubset().split(",")) { logger.debug("Location =[{}]", location); filter.addFilter(location); } scan.setLocations(filter); } if (scanCfg.getChannelSubset() != null) { logger.debug("Filter on Channel Subset=[{}]", scanCfg.getChannelSubset()); Filter filter = new Filter(false); for (String channel : scanCfg.getChannelSubset().split(",")) { logger.debug("Channel =[{}]", channel); filter.addFilter(channel); } scan.setChannels(filter); } for (MetricT met : scanCfg.getMetrics().getMetric()) { try { Class<?> metricClass = Class.forName(met.getClassName()); MetricWrapper wrapper = new MetricWrapper(metricClass); for (ArgumentT arg : met.getArgument()) { wrapper.add(arg.getName(), arg.getValue()); } scan.addMetric(wrapper); } catch (ClassNotFoundException ex) { logger.error("No such metric class '" + met.getClassName() + "'"); System.exit(1); } catch (InstantiationException ex) { logger.error("Could not dynamically instantiate class '" + met.getClassName() + "'"); System.exit(1); } catch (IllegalAccessException ex) { logger.error("Illegal access while loading class '" + met.getClassName() + "'"); System.exit(1); } catch (NoSuchFieldException ex) { logger.error("Invalid dynamic argument to Metric subclass '" + met.getClassName() + "'"); System.exit(1); } } scans.put(name, scan); } } // ==== Establish Database Connection ==== // TODO: State Tracking in the Database // - Record scan started in database. // - Track our progress as we go so a new process can pick up where // we left off if our process dies. // - Mark when each date-station-channel-operation is complete //LogDatabaseHandler logDB = new LogDatabaseHandler(configuration.get // For each day ((yesterday - scanDepth) to yesterday) // scan for these channel files, only process them if // they have not yet been scanned, or if changes have // occurred to the file since its last scan. Do this for // each scan type. Do not re-scan data for each type, // launch processes for each scan and use the same data set // for each. If we can pipe the data as it is read, do so. // If we need to push all of it at once, do these in sequence // in order to preserve overall system memory resources. Scan scan = null; // ==== Perform Scans ==== scan = scans.get("daily"); //MTH: This part could/should be moved up higher except that we need to know datalessDir, which, // at this point, is configured on a per scan basis ... so we need to know what scan we're doing MetaServer metaServer = null; if (config.getMetaserver() != null) { if (config.getMetaserver().getUseRemote().equals("yes") || config.getMetaserver().getUseRemote().equals("true")) { String remoteServer = config.getMetaserver().getRemoteUri(); try { metaServer = new MetaServer(new URI(remoteServer)); } catch (Exception e) { logger.error("caught URI exception:" + e.getMessage()); } } else { metaServer = new MetaServer(scan.getDatalessDir()); } } else { // Use local MetaServer metaServer = new MetaServer(scan.getDatalessDir()); } List<Station> stations = null; if (config.getStationList() == null) { // get StationList from MetaServer logger.info("Get StationList from MetaServer"); stations = metaServer.getStationList(); } else { // read StationList from config.xml logger.info("Read StationList from config.xml"); List<String> stationList = config.getStationList().getStation(); if (stationList.size() > 0) { stations = new ArrayList<Station>(); for (String station : stationList) { String[] words = station.split("_"); if (words.length != 2) { logger.warn(String.format("stationList: station=[%s] is NOT a valid station --> Skip", station)); } else { stations.add(new Station(words[0], words[1])); logger.info("config.xml: Read station:" + station); } } } else { logger.error("Error: No valid stations read from config.xml"); } } if (stations == null) { logger.error("Found NO stations to scan --> EXITTING SeedScan"); System.exit(1); } Thread readerThread = new Thread(reader); readerThread.start(); logger.info("Reader thread started."); Thread injectorThread = new Thread(injector); injectorThread.start(); logger.info("Injector thread started."); // Loop over scans and hand each one to a ScanManager logger.info("Hand scan to ScanManager"); for (String key : scans.keySet()) { scan = scans.get(key); logger.info(String.format("Scan=[%s] startDay=%d startDate=%d daysToScan=%d\n", key, scan.getStartDay(), scan.getStartDate(), scan.getDaysToScan())); ScanManager scanManager = new ScanManager(reader, injector, stations, scan, metaServer); } logger.info("ScanManager is [ FINISHED ] --> stop the injector and reader threads"); try { injector.halt(); logger.info("All stations processed. Waiting for injector thread to finish..."); synchronized (injectorThread) { //injectorThread.wait(); injectorThread.interrupt(); } logger.info("Injector thread halted."); } catch (InterruptedException ex) { logger.warn("The injector thread was interrupted while attempting to complete requests."); } try { reader.halt(); logger.info("All stations processed. Waiting for reader thread to finish..."); synchronized (readerThread) { //readerThread.wait(); readerThread.interrupt(); } logger.info("Reader thread halted."); } catch (InterruptedException ex) { logger.warn("The reader thread was interrupted while attempting to complete requests."); } try { lock.release(); } catch (IOException e) { ; } finally { logger.info("Release seedscan lock and quit metaServer"); lock = null; metaServer.quit(); } }
From source file:com.cyberway.issue.io.arc.ARCReader.java
/** * Command-line interface to ARCReader./* w w w . j a va 2 s . c o m*/ * * Here is the command-line interface: * <pre> * usage: java com.cyberway.issue.io.arc.ARCReader [--offset=#] ARCFILE * -h,--help Prints this message and exits. * -o,--offset Outputs record at this offset into arc file.</pre> * * <p>See in <code>$HERITRIX_HOME/bin/arcreader</code> for a script that'll * take care of classpaths and the calling of ARCReader. * * <p>Outputs using a pseudo-CDX format as described here: * <a href="http://www.archive.org/web/researcher/cdx_legend.php">CDX * Legent</a> and here * <a href="http://www.archive.org/web/researcher/example_cdx.php">Example</a>. * Legend used in below is: 'CDX b e a m s c V (or v if uncompressed) n g'. * Hash is hard-coded straight SHA-1 hash of content. * * @param args Command-line arguments. * @throws ParseException Failed parse of the command line. * @throws IOException * @throws java.text.ParseException */ public static void main(String[] args) throws ParseException, IOException, java.text.ParseException { Options options = getOptions(); options.addOption(new Option("p", "parse", false, "Parse headers.")); PosixParser parser = new PosixParser(); CommandLine cmdline = parser.parse(options, args, false); List cmdlineArgs = cmdline.getArgList(); Option[] cmdlineOptions = cmdline.getOptions(); HelpFormatter formatter = new HelpFormatter(); // If no args, print help. if (cmdlineArgs.size() <= 0) { usage(formatter, options, 0); } // Now look at options passed. long offset = -1; boolean digest = false; boolean strict = false; boolean parse = false; String format = CDX; for (int i = 0; i < cmdlineOptions.length; i++) { switch (cmdlineOptions[i].getId()) { case 'h': usage(formatter, options, 0); break; case 'o': offset = Long.parseLong(cmdlineOptions[i].getValue()); break; case 's': strict = true; break; case 'p': parse = true; break; case 'd': digest = getTrueOrFalse(cmdlineOptions[i].getValue()); break; case 'f': format = cmdlineOptions[i].getValue().toLowerCase(); boolean match = false; // List of supported formats. final String[] supportedFormats = { CDX, DUMP, GZIP_DUMP, HEADER, NOHEAD, CDX_FILE }; for (int ii = 0; ii < supportedFormats.length; ii++) { if (supportedFormats[ii].equals(format)) { match = true; break; } } if (!match) { usage(formatter, options, 1); } break; default: throw new RuntimeException("Unexpected option: " + +cmdlineOptions[i].getId()); } } if (offset >= 0) { if (cmdlineArgs.size() != 1) { System.out.println("Error: Pass one arcfile only."); usage(formatter, options, 1); } ARCReader arc = ARCReaderFactory.get((String) cmdlineArgs.get(0), offset); arc.setStrict(strict); // We must parse headers if we need to skip them. if (format.equals(NOHEAD) || format.equals(HEADER)) { parse = true; } arc.setParseHttpHeaders(parse); outputRecord(arc, format); } else { for (Iterator i = cmdlineArgs.iterator(); i.hasNext();) { String urlOrPath = (String) i.next(); try { ARCReader r = ARCReaderFactory.get(urlOrPath); r.setStrict(strict); r.setParseHttpHeaders(parse); r.setDigest(digest); output(r, format); } catch (RuntimeException e) { // Write out name of file we failed on to help with // debugging. Then print stack trace and try to keep // going. We do this for case where we're being fed // a bunch of ARCs; just note the bad one and move // on to the next. System.err.println("Exception processing " + urlOrPath + ": " + e.getMessage()); e.printStackTrace(System.err); System.exit(1); } } } }
From source file:com.milaboratory.mitcr.cli.Main.java
public static void main(String[] args) { int o = 0;/*from w w w. j a v a 2s. c o m*/ BuildInformation buildInformation = BuildInformationProvider.get(); final boolean isProduction = "default".equals(buildInformation.scmBranch); // buildInformation.version != null && buildInformation.version.lastIndexOf("SNAPSHOT") < 0; orderingMap.put(PARAMETERS_SET_OPTION, o++); orderingMap.put(SPECIES_OPTION, o++); orderingMap.put(GENE_OPTION, o++); orderingMap.put(ERROR_CORECTION_LEVEL_OPTION, o++); orderingMap.put(QUALITY_THRESHOLD_OPTION, o++); orderingMap.put(AVERAGE_QUALITY_OPTION, o++); orderingMap.put(LQ_OPTION, o++); orderingMap.put(CLUSTERIZATION_OPTION, o++); orderingMap.put(INCLUDE_CYS_PHE_OPTION, o++); orderingMap.put(LIMIT_OPTION, o++); orderingMap.put(EXPORT_OPTION, o++); orderingMap.put(REPORT_OPTION, o++); orderingMap.put(REPORTING_LEVEL_OPTION, o++); orderingMap.put(PHRED33_OPTION, o++); orderingMap.put(PHRED64_OPTION, o++); orderingMap.put(THREADS_OPTION, o++); orderingMap.put(COMPRESSED_OPTION, o++); orderingMap.put(PRINT_HELP_OPTION, o++); orderingMap.put(PRINT_VERSION_OPTION, o++); orderingMap.put(PRINT_DEBUG_OPTION, o++); options.addOption(OptionBuilder.withArgName("preset name").hasArg() .withDescription("preset of pipeline parameters to use").create(PARAMETERS_SET_OPTION)); options.addOption(OptionBuilder.withArgName("species").hasArg() .withDescription("overrides species ['hs' for Homo sapiens, 'mm' for us Mus musculus] " + "(default for built-in presets is 'hs')") .create(SPECIES_OPTION)); options.addOption(OptionBuilder.withArgName("gene").hasArg() .withDescription("overrides gene: TRB or TRA (default value for built-in parameter sets is TRB)") .create(GENE_OPTION)); options.addOption(OptionBuilder.withArgName("0|1|2").hasArg() .withDescription( "overrides error correction level (0 = don't correct errors, 1 = correct sequenecing " + "errors only (see -" + QUALITY_THRESHOLD_OPTION + " and -" + LQ_OPTION + " options for details), " + "2 = also correct PCR errors (see -" + CLUSTERIZATION_OPTION + " option)") .create(ERROR_CORECTION_LEVEL_OPTION)); options.addOption(OptionBuilder.withArgName("value").hasArg().withDescription( "overrides quality threshold value for segment alignment and bad quality sequences " + "correction algorithms. 0 tells the program not to process quality information. (default is 25)") .create(QUALITY_THRESHOLD_OPTION)); if (!isProduction) options.addOption(OptionBuilder.hasArg(false) .withDescription("use this option to output average instead of " + "maximal, quality for CDR3 nucleotide sequences. (Experimental option, use with caution.)") .create(AVERAGE_QUALITY_OPTION)); options.addOption(OptionBuilder.withArgName("map | drop").hasArg() .withDescription("overrides low quality CDR3s processing strategy (drop = filter off, " + "map = map onto clonotypes created from the high quality CDR3s). This option makes no difference if " + "quality threshold (-" + QUALITY_THRESHOLD_OPTION + " option) is set to 0, or error correction " + "level (-" + ERROR_CORECTION_LEVEL_OPTION + ") is 0.") .create(LQ_OPTION)); options.addOption(OptionBuilder.withArgName("smd | ete").hasArg() .withDescription("overrides the PCR error correction algorithm: smd = \"save my diversity\", " + "ete = \"eliminate these errors\". Default value for built-in parameters is ete.") .create(CLUSTERIZATION_OPTION)); options.addOption(OptionBuilder.withArgName("0|1").hasArg() .withDescription("overrides weather include bounding Cys & Phe into CDR3 sequence") .create(INCLUDE_CYS_PHE_OPTION)); options.addOption( OptionBuilder.withArgName("# of reads").hasArg() .withDescription("limits the number of input sequencing reads, use this parameter to " + "normalize several datasets or to have a glance at the data") .create(LIMIT_OPTION)); options.addOption(OptionBuilder.withArgName("new name").hasArg() .withDescription("use this option to export presets to a local xml files").create(EXPORT_OPTION)); options.addOption(OptionBuilder.withArgName("file name").hasArg() .withDescription("use this option to write analysis report (summary) to file") .create(REPORT_OPTION)); options.addOption(OptionBuilder.withArgName("1|2|3").hasArg(true) .withDescription("output detalization level (1 = simple, 2 = medium, 3 = full, this format " + "could be deserialized using mitcr API). Affects only tab-delimited output. Default value is 3.") .create(REPORTING_LEVEL_OPTION)); options.addOption(OptionBuilder.hasArg(false).withDescription( "add this option if input file is in old illumina format with 64 byte offset for quality " + "string (MiTCR will try to automatically detect file format if one of the \"-phredXX\" options is not provided)") .create(PHRED64_OPTION)); options.addOption(OptionBuilder.hasArg(false) .withDescription("add this option if input file is in Phred+33 format for quality values " + "(MiTCR will try to automatically detect file format if one of the \"-phredXX\" options is not provided)") .create(PHRED33_OPTION)); options.addOption(OptionBuilder.withArgName("threads").hasArg() .withDescription( "specifies the number of CDR3 extraction threads (default = number of available CPU cores)") .create(THREADS_OPTION)); if (!isProduction) options.addOption(OptionBuilder.hasArg(false) .withDescription("use compressed data structures for storing individual " + "clone segments statistics (from which arises the clone segment information). This option reduces required " + "amount of memory, but introduces small stochastic errors into the algorithm which determines clone " + "segments. (Experimental option, use with caution.)") .create(COMPRESSED_OPTION)); options.addOption( OptionBuilder.hasArg(false).withDescription("print this message").create(PRINT_HELP_OPTION)); options.addOption(OptionBuilder.hasArg(false).withDescription("print version information") .create(PRINT_VERSION_OPTION)); options.addOption(OptionBuilder.hasArg(false) .withDescription("print additional information about analysis process").create(PRINT_DEBUG_OPTION)); PosixParser parser = new PosixParser(); try { long input_limit = -1; int threads = Runtime.getRuntime().availableProcessors(); int reporting_level = 3; int ec_level = 2; CommandLine cl = parser.parse(options, args, true); if (cl.hasOption(PRINT_HELP_OPTION)) { printHelp(); return; } boolean averageQuality = cl.hasOption(AVERAGE_QUALITY_OPTION), compressedAggregators = cl.hasOption(COMPRESSED_OPTION); if (cl.hasOption(PRINT_VERSION_OPTION)) { System.out.println("MiTCR by MiLaboratory, version: " + buildInformation.version); System.out.println("Branch: " + buildInformation.scmBranch); System.out.println("Built: " + buildInformation.buildDate + ", " + buildInformation.jdk + " JDK, " + "build machine: " + buildInformation.builtBy); System.out.println("SCM changeset: " + buildInformation.scmChangeset + " (" + buildInformation.scmDate.replace("\"", "") + ")"); return; } //Normal execution String paramName = cl.getOptionValue(PARAMETERS_SET_OPTION); if (paramName == null) { err.println("No parameters set is specified."); return; } Parameters params = ParametersIO.getParameters(paramName); if (params == null) { err.println("No parameters set found with name '" + paramName + "'."); return; } String value; if ((value = cl.getOptionValue(THREADS_OPTION)) != null) threads = Integer.decode(value); if ((value = cl.getOptionValue(REPORTING_LEVEL_OPTION)) != null) reporting_level = Integer.decode(value); if ((value = cl.getOptionValue(LIMIT_OPTION)) != null) input_limit = Long.decode(value); if ((value = cl.getOptionValue(GENE_OPTION)) != null) params.setGene(Gene.fromXML(value)); if ((value = cl.getOptionValue(SPECIES_OPTION)) != null) params.setSpecies(Species.getFromShortName(value)); if ((value = cl.getOptionValue(INCLUDE_CYS_PHE_OPTION)) != null) { if (value.equals("1")) params.getCDR3ExtractorParameters().setIncludeCysPhe(true); else if (value.equals("0")) params.getCDR3ExtractorParameters().setIncludeCysPhe(false); else { err.println("Illegal value for -" + INCLUDE_CYS_PHE_OPTION + " parameter."); return; } } if ((value = cl.getOptionValue(ERROR_CORECTION_LEVEL_OPTION)) != null) { int v = Integer.decode(value); ec_level = v; if (v == 0) { params.setCloneGeneratorParameters(new BasicCloneGeneratorParameters()); params.setClusterizationType(CloneClusterizationType.None); } else if (v == 1) { params.setCloneGeneratorParameters(new LQMappingCloneGeneratorParameters()); params.setClusterizationType(CloneClusterizationType.None); } else if (v == 2) { params.setCloneGeneratorParameters(new LQMappingCloneGeneratorParameters()); params.setClusterizationType(CloneClusterizationType.OneMismatch, .1f); } else throw new RuntimeException("This (" + v + ") error correction level is not supported."); } if ((value = cl.getOptionValue(QUALITY_THRESHOLD_OPTION)) != null) { int v = Integer.decode(value); if (v == 0) params.setQualityInterpretationStrategy(new DummyQualityInterpretationStrategy()); else params.setQualityInterpretationStrategy(new IlluminaQualityInterpretationStrategy((byte) v)); } if ((value = cl.getOptionValue(LQ_OPTION)) != null) if (ec_level > 0) switch (value) { case "map": params.setCloneGeneratorParameters(new LQMappingCloneGeneratorParameters( ((BasicCloneGeneratorParameters) params.getCloneGeneratorParameters()) .getSegmentInformationAggregationFactor(), 3, true)); break; case "drop": params.setCloneGeneratorParameters(new LQFilteringOffCloneGeneratorParameters( ((BasicCloneGeneratorParameters) params.getCloneGeneratorParameters()) .getSegmentInformationAggregationFactor())); break; default: throw new RuntimeException("Wrong value for -" + LQ_OPTION + " option."); } if ((value = cl.getOptionValue(CLUSTERIZATION_OPTION)) != null) if (ec_level > 1) // == 2 switch (value) { case "smd": params.setClusterizationType(CloneClusterizationType.V2D1J2T3Explicit); break; case "ete": params.setClusterizationType(CloneClusterizationType.OneMismatch); break; default: throw new RuntimeException("Wrong value for -" + CLUSTERIZATION_OPTION + " option."); } ((BasicCloneGeneratorParameters) params.getCloneGeneratorParameters()) .setAccumulatorType(AccumulatorType.get(compressedAggregators, averageQuality)); if ((value = cl.getOptionValue(EXPORT_OPTION)) != null) { //Exporting parameters ParametersIO.exportParameters(params, value); return; } String[] offArgs = cl.getArgs(); if (offArgs.length == 0) { err.println("Input file not specified."); return; } else if (offArgs.length == 1) { err.println("Output file not specified."); return; } else if (offArgs.length > 2) { err.println("Unrecognized argument."); return; } String inputFileName = offArgs[0]; String outputFileName = offArgs[1]; File input = new File(inputFileName); if (!input.exists()) { err.println("Input file not found."); return; } //TODO This also done inside SFastqReader constructor CompressionType compressionType = CompressionType.None; if (inputFileName.endsWith(".gz")) compressionType = CompressionType.GZIP; QualityFormat format = null; // If variable remains null file format will be detected automatically if (cl.hasOption(PHRED33_OPTION)) format = QualityFormat.Phred33; if (cl.hasOption(PHRED64_OPTION)) if (format == null) format = QualityFormat.Phred64; else { err.println( "Options: -" + PHRED33_OPTION + " and -" + PHRED64_OPTION + " are mutually exclusive"); return; } SFastqReader reads = format == null ? new SFastqReader(input, compressionType) : new SFastqReader(input, format, compressionType); OutputPort<SSequencingRead> inputToPipeline = reads; if (input_limit >= 0) inputToPipeline = new CountLimitingOutputPort<>(inputToPipeline, input_limit); SegmentLibrary library = DefaultSegmentLibrary.load(); AnalysisStatisticsAggregator statisticsAggregator = new AnalysisStatisticsAggregator(); FullPipeline pipeline = new FullPipeline(inputToPipeline, params, false, library); pipeline.setThreads(threads); pipeline.setAnalysisListener(statisticsAggregator); new Thread(new SmartProgressReporter(pipeline, err)).start(); // Printing status to the standard error stream pipeline.run(); if (cl.hasOption(PRINT_DEBUG_OPTION)) { err.println("Memory = " + (Runtime.getRuntime().totalMemory() - Runtime.getRuntime().freeMemory())); err.println("Clusterization: " + pipeline.getQC().getReadsClusterized() + "% of reads, " + pipeline.getQC().getClonesClusterized() + " % clones"); } CloneSetClustered cloneSet = pipeline.getResult(); if ((value = cl.getOptionValue(REPORT_OPTION)) != null) { File file = new File(value); TablePrintStreamAdapter table; if (file.exists()) table = new TablePrintStreamAdapter(new FileOutputStream(file, true)); else { table = new TablePrintStreamAdapter(file); ReportExporter.printHeader(table); } //CloneSetQualityControl qc = new CloneSetQualityControl(library, params.getSpecies(), params.getGene(), cloneSet); ReportExporter.printRow(table, inputFileName, outputFileName, pipeline.getQC(), statisticsAggregator); table.close(); } if (outputFileName.endsWith(".cls")) ClsExporter.export(pipeline, outputFileName.replace(".cls", "") + " " + new Date().toString(), input.getName(), outputFileName); else { //Dry run if (outputFileName.startsWith("-")) return; ExportDetalizationLevel detalization = ExportDetalizationLevel.fromLevel(reporting_level); CompressionType compressionType1 = CompressionType.None; if (outputFileName.endsWith(".gz")) compressionType1 = CompressionType.GZIP; CloneSetIO.exportCloneSet(outputFileName, cloneSet, detalization, params, input.getAbsolutePath(), compressionType1); } } catch (ParseException | RuntimeException | IOException e) { err.println("Error occurred in the analysis pipeline."); err.println(); e.printStackTrace(); //printHelp(); } }
From source file:com.cyberway.issue.crawler.CommandLineParser.java
/** * Constructor.//from www . j av a 2 s . co m * * @param args Command-line arguments to process. * @param out PrintStream to write on. * @param version Heritrix version * * @throws ParseException Failied parse of command line. */ public CommandLineParser(String[] args, PrintWriter out, String version) throws ParseException { super(); this.out = out; this.version = version; this.options = new Options(); this.options.addOption(new Option("h", "help", false, "Prints this message and exits.")); this.options.addOption(new Option("b", "bind", true, "Comma-separated list of IP addresses or hostnames for web server " + "to listen on. Set to / to listen on all available\nnetwork " + "interfaces. Default is 127.0.0.1.")); this.options.addOption(new Option("p", "port", true, "Port to run web user interface on. Default: 8080.")); this.options.addOption(new Option("a", "admin", true, "Login and password for web user interface administration. " + "Required (unless passed via the 'heritrix.cmdline.admin'\n" + "system property). Pass value of the form 'LOGIN:PASSWORD'.")); this.options .addOption(new Option("r", "run", false, "Put heritrix into run mode. If ORDER.XML begin crawl.")); this.options.addOption( new Option("n", "nowui", false, "Put heritrix into run mode and begin crawl using ORDER.XML." + " Do not put up web user interface.")); Option option = new Option("s", "selftest", true, "Run the integrated selftests. Pass test name to test it only" + " (Case sensitive: E.g. pass 'Charset' to run charset selftest)."); option.setOptionalArg(true); this.options.addOption(option); PosixParser parser = new PosixParser(); try { this.commandLine = parser.parse(this.options, args, false); } catch (UnrecognizedOptionException e) { usage(e.getMessage(), 1); } }
From source file:is.landsbokasafn.deduplicator.indexer.CommandLineParser.java
/** * Constructor./*from w w w. j a va 2 s . c om*/ * * @param args Command-line arguments to process. * @param out PrintStream to write on. * * @throws ParseException Failed parse of command line. */ public CommandLineParser(String[] args, PrintWriter out) throws ParseException { super(); this.out = out; this.options = new Options(); this.options.addOption(new Option("h", "help", false, "Prints this message and exits.")); Option opt = new Option("u", "no-url-index", false, "Do not index the URLs. Index will only be searchable by digest. " + "Choosing this also sets --no-canonicalized."); this.options.addOption(opt); this.options.addOption(new Option("s", "no-canonicalized", false, "Do not add a canonicalized version of the URL to the index.")); this.options.addOption( new Option("e", "etag", false, "Include etags in the index (if available in the source).")); opt = new Option("m", "mime", true, "A filter on what mime types are added into the index " + "(blacklist). Default: ^text/.*"); opt.setArgName("reg.expr."); this.options.addOption(opt); this.options.addOption( new Option("w", "whitelist", false, "Make the --mime filter a whitelist instead of blacklist.")); this.options.addOption( new Option("v", "verbose", false, "Make the program print progress info to standard out.")); opt = new Option("i", "iterator", true, "An iterator suitable for the source data (default iterator " + "works WARC files)."); opt.setArgName("classname"); this.options.addOption(opt); this.options.addOption(new Option("a", "add", false, "Add source data to existing index.")); PosixParser parser = new PosixParser(); try { this.commandLine = parser.parse(this.options, args, false); } catch (UnrecognizedOptionException e) { usage(e.getMessage(), 1); } }