List of usage examples for org.apache.commons.cli PosixParser PosixParser
PosixParser
From source file:com.genentech.chemistry.openEye.apps.SDFMCSSNNFinder.java
public static void main(String... args) throws IOException { CommandLineParser parser = new PosixParser(); CommandLine cmd = null;// w ww . j ava2 s .c o m try { cmd = parser.parse(options, args); } catch (Exception e) { System.err.println(e.getMessage()); exitWithHelp(); } args = cmd.getArgs(); if (args.length > 0) { exitWithHelp("Unknown param: " + args[0]); } if (cmd.hasOption("d")) { System.err.println("Start debugger and press return:"); new BufferedReader(new InputStreamReader(System.in)).readLine(); } int nCpu = 1; int maxNeighbors = 1; double minSim = 0D; String idTag = cmd.getOptionValue("idTag"); boolean printAll = cmd.hasOption("printAll"); String d = cmd.getOptionValue("nCpu"); if (d != null) nCpu = Integer.parseInt(d); d = cmd.getOptionValue("maxNeighbors"); if (d != null) maxNeighbors = Integer.parseInt(d); d = cmd.getOptionValue("minSimilarity"); if (d != null) minSim = Double.parseDouble(d); String countAboveSimilarityStr = cmd.getOptionValue("countSimilarAbove"); String inFile = cmd.getOptionValue("in"); String outFile = cmd.getOptionValue("out"); String refFile = cmd.getOptionValue("ref"); String tabOutput = cmd.getOptionValue("tabOutput"); boolean outputDuplicates = cmd.hasOption("outputDuplicates"); if (outputDuplicates && tabOutput != null) exitWithHelp("-outputDuplicates will not work with outputVTab"); if (outputDuplicates && refFile == null) exitWithHelp("-outputDuplicates requires -ref "); if ("tab".equalsIgnoreCase(tabOutput) && refFile != null) exitWithHelp("-tabOutput tab: does not work with reference file"); if ("tab".equalsIgnoreCase(tabOutput) && maxNeighbors == 1) exitWithHelp("-tabOutput tab: does not make sense with -maxNeighbors = 1"); if (cmd.hasOption("countSimilarAbove") && tabOutput != null) exitWithHelp("-countSimilarAbove not supported for tab or vTab output"); if (printAll && !(maxNeighbors > 1 || minSim > 0)) exitWithHelp("printAll only supported if: maxNeighbors > 1 or minSim > 0"); if (printAll && tabOutput != null) System.err.println("WARNING: printAll ignored for tab output!\n"); SimComparatorFactory<OEMolBase, OEMolBase, SimComparator<OEMolBase>> compFact; compFact = getComparatorFactory(cmd); if (refFile == null) { // no reference file; run all by all comparison performMatrixNNSearch(inFile, outFile, tabOutput, compFact, minSim, maxNeighbors, idTag, nCpu, countAboveSimilarityStr, printAll); } else { // refrence file; compare inFile to refFile performReferenceSearch(inFile, refFile, outFile, tabOutput, compFact, minSim, maxNeighbors, idTag, nCpu, countAboveSimilarityStr, outputDuplicates, printAll); } }
From source file:com.genentech.chemistry.openEye.apps.SDFCatsIndexer.java
/** * @param args// w ww. j a v a 2 s . c o m */ public static void main(String... args) throws IOException { // create command line Options object Options options = new Options(); Option opt = new Option(OPT_INFILE, true, "input file oe-supported Use .sdf|.smi to specify the file type."); opt.setRequired(true); opt.setArgName("fn"); options.addOption(opt); opt = new Option(OPT_OUTFILE, true, "output file oe-supported. Use .sdf|.smi to specify the file type."); opt.setRequired(true); opt.setArgName("fn"); options.addOption(opt); opt = new Option(OPT_NORMALIZATION, true, "Normalization method: Counts|CountsPerAtom|CountsPerFeature(def) multiple allowed"); opt.setArgName("meth"); options.addOption(opt); opt = new Option(OPT_PRINTDESC, false, "Causes the descriptor for describing each linear path in a molceule to be created"); options.addOption(opt); opt = new Option(OPT_RGROUPTYPES, false, "treat RGroup attachement point ([U]) as atom type."); options.addOption(opt); CommandLineParser parser = new PosixParser(); CommandLine cmd = null; try { cmd = parser.parse(options, args); } catch (Exception e) { System.err.println(e.getMessage()); exitWithHelp(options); } String inFile = cmd.getOptionValue(OPT_INFILE); String outFile = cmd.getOptionValue(OPT_OUTFILE); AtomTyperInterface[] myTypes = CATSIndexer.typers; String tagPrefix = ""; if (cmd.hasOption(OPT_RGROUPTYPES)) { myTypes = CATSIndexer.rgroupTypers; tagPrefix = "RG"; } if (cmd.hasOption(OPT_PRINTDESC)) { SDFCatsIndexer sdfIndexer = new SDFCatsIndexer(myTypes, tagPrefix); sdfIndexer.printDescriptors(inFile, outFile); sdfIndexer.close(); return; } EnumSet<Normalization> normMeth = EnumSet.noneOf(Normalization.class); if (cmd.hasOption(OPT_NORMALIZATION)) for (String n : cmd.getOptionValues(OPT_NORMALIZATION)) normMeth.add(Normalization.valueOf(n)); else normMeth.add(Normalization.CountsPerFeature); SDFCatsIndexer sdfIndexer = new SDFCatsIndexer(myTypes, tagPrefix); sdfIndexer.run(inFile, outFile, normMeth); sdfIndexer.close(); }
From source file:com.flaptor.hounder.indexer.RmiIndexerStub.java
public static void main(String[] args) { // create the parser CommandLineParser parser = new PosixParser(); CommandLine line = null;/* w w w .j a va2s.c om*/ Options options = getOptions(); try { // parse the command line arguments line = parser.parse(options, args); } catch (ParseException exp) { // oops, something went wrong HelpFormatter formatter = new HelpFormatter(); formatter.printHelp("RmiIndexerStub -h <hostName> -p <basePort> [options] ", options); System.exit(1); } boolean doOptimize = line.hasOption("optimize"); boolean doCheckpoint = line.hasOption("checkpoint"); boolean doStop = line.hasOption("stop"); Integer port = ((Long) line.getOptionObject("port")).intValue(); String host = line.getOptionValue("host"); try { RmiIndexerStub stub = new RmiIndexerStub(port, host); if (line.hasOption("deleteUrl")) { String url = line.getOptionValue("deleteUrl"); Document dom = generateDeleteDocument(url); indexOrFail(stub, dom, "Could not delete " + url); System.out.println("delete " + url + " command accepted by indexer"); } if (line.hasOption("deleteFile")) { BufferedReader reader = new BufferedReader(new FileReader(line.getOptionValue("deleteFile"))); while (reader.ready()) { String url = reader.readLine(); if (url.length() > 0 && url.charAt(0) != '#') { // ignore empty lines and comments Document dom = generateDeleteDocument(url); indexOrFail(stub, dom, "Could not delete " + url); System.out.println("delete " + url + " command accepted by indexer"); } } reader.close(); } if (doOptimize) { Document dom = generateCommandDocument("optimize"); indexOrFail(stub, dom, "Could not send optimize command."); System.out.println("optimize command accepted by indexer"); } if (doCheckpoint) { Document dom = generateCommandDocument("checkpoint"); indexOrFail(stub, dom, "Could not send checkpoint command."); System.out.println("checkpoint command accepted by indexer"); } if (doStop) { Document dom = generateCommandDocument("close"); indexOrFail(stub, dom, "Could not send stop command."); System.out.println("stop command accepted by indexer"); } } catch (Exception e) { System.err.println("An error occurred: " + e.getMessage()); e.printStackTrace(); } }
From source file:edu.msu.cme.rdp.kmer.KmerFilter.java
public static void main(String[] args) throws Exception { final KmerTrie kmerTrie; final SeqReader queryReader; final SequenceType querySeqType; final File queryFile; final KmerStartsWriter out; final boolean translQuery; final int wordSize; final int translTable; final boolean alignedSeqs; final List<String> refLabels = new ArrayList(); final int maxThreads; try {//from www . j a v a 2s . co m CommandLine cmdLine = new PosixParser().parse(options, args); args = cmdLine.getArgs(); if (args.length < 3) { throw new Exception("Unexpected number of arguments"); } if (cmdLine.hasOption("out")) { out = new KmerStartsWriter(cmdLine.getOptionValue("out")); } else { out = new KmerStartsWriter(System.out); } if (cmdLine.hasOption("aligned")) { alignedSeqs = true; } else { alignedSeqs = false; } if (cmdLine.hasOption("transl-table")) { translTable = Integer.valueOf(cmdLine.getOptionValue("transl-table")); } else { translTable = 11; } if (cmdLine.hasOption("threads")) { maxThreads = Integer.valueOf(cmdLine.getOptionValue("threads")); } else { maxThreads = Runtime.getRuntime().availableProcessors(); } queryFile = new File(args[1]); wordSize = Integer.valueOf(args[0]); SequenceType refSeqType = null; querySeqType = SeqUtils.guessSequenceType(queryFile); queryReader = new SequenceReader(queryFile); if (querySeqType == SequenceType.Protein) { throw new Exception("Expected nucl query sequences"); } refSeqType = SeqUtils .guessSequenceType(new File(args[2].contains("=") ? args[2].split("=")[1] : args[2])); translQuery = refSeqType == SequenceType.Protein; if (translQuery && wordSize % 3 != 0) { throw new Exception("Word size must be a multiple of 3 for nucl ref seqs"); } int trieWordSize; if (translQuery) { trieWordSize = wordSize / 3; } else { trieWordSize = wordSize; } kmerTrie = new KmerTrie(trieWordSize, translQuery); for (int index = 2; index < args.length; index++) { String refName; String refFileName = args[index]; if (refFileName.contains("=")) { String[] lexemes = refFileName.split("="); refName = lexemes[0]; refFileName = lexemes[1]; } else { String tmpName = new File(refFileName).getName(); if (tmpName.contains(".")) { refName = tmpName.substring(0, tmpName.lastIndexOf(".")); } else { refName = tmpName; } } File refFile = new File(refFileName); if (refSeqType != SeqUtils.guessSequenceType(refFile)) { throw new Exception( "Reference file " + refFile + " contains " + SeqUtils.guessFileFormat(refFile) + " sequences but expected " + refSeqType + " sequences"); } SequenceReader seqReader = new SequenceReader(refFile); Sequence seq; while ((seq = seqReader.readNextSequence()) != null) { if (seq.getSeqName().startsWith("#")) { continue; } if (alignedSeqs) { kmerTrie.addModelSequence(seq, refLabels.size()); } else { kmerTrie.addSequence(seq, refLabels.size()); } } seqReader.close(); refLabels.add(refName); } } catch (Exception e) { new HelpFormatter().printHelp("KmerSearch <word_size> <query_file> [name=]<ref_file> ...", options); System.err.println(e.getMessage()); e.printStackTrace(); System.exit(1); throw new RuntimeException("Stupid jvm"); //While this will never get thrown it is required to make sure javac doesn't get confused about uninitialized variables } long startTime = System.currentTimeMillis(); long seqCount = 0; final int maxTasks = 25000; /* * if (args.length == 4) { maxThreads = Integer.valueOf(args[3]); } else { */ //} System.err.println("Starting kmer mapping at " + new Date()); System.err.println("* Number of threads: " + maxThreads); System.err.println("* References: " + refLabels); System.err.println("* Reads file: " + queryFile); System.err.println("* Kmer length: " + kmerTrie.getWordSize()); final AtomicInteger processed = new AtomicInteger(); final AtomicInteger outstandingTasks = new AtomicInteger(); ExecutorService service = Executors.newFixedThreadPool(maxThreads); Sequence querySeq; while ((querySeq = queryReader.readNextSequence()) != null) { seqCount++; String seqString = querySeq.getSeqString(); if (seqString.length() < 3) { System.err.println("Sequence " + querySeq.getSeqName() + "'s length is less than 3"); continue; } final Sequence threadSeq = querySeq; Runnable r = new Runnable() { public void run() { try { processSeq(threadSeq, refLabels, kmerTrie, out, wordSize, translQuery, translTable, false); processSeq(threadSeq, refLabels, kmerTrie, out, wordSize, translQuery, translTable, true); } catch (IOException e) { throw new RuntimeException(e); } processed.incrementAndGet(); outstandingTasks.decrementAndGet(); } }; outstandingTasks.incrementAndGet(); service.submit(r); while (outstandingTasks.get() >= maxTasks) ; if ((processed.get() + 1) % 1000000 == 0) { System.err.println("Processed " + processed + " sequences in " + (System.currentTimeMillis() - startTime) + " ms"); } } service.shutdown(); service.awaitTermination(1, TimeUnit.DAYS); System.err.println("Finished Processed " + processed + " sequences in " + (System.currentTimeMillis() - startTime) + " ms"); out.close(); }
From source file:eqtlmappingpipeline.util.ModuleEqtlNeutrophilReplication.java
/** * @param args the command line arguments *//* w ww . jav a2s. c o m*/ public static void main(String[] args) throws IOException, LdCalculatorException { System.out.println(HEADER); System.out.println(); System.out.flush(); //flush to make sure header is before errors try { Thread.sleep(25); //Allows flush to complete } catch (InterruptedException ex) { } CommandLineParser parser = new PosixParser(); final CommandLine commandLine; try { commandLine = parser.parse(OPTIONS, args, true); } catch (ParseException ex) { System.err.println("Invalid command line arguments: " + ex.getMessage()); System.err.println(); new HelpFormatter().printHelp(" ", OPTIONS); System.exit(1); return; } final String[] genotypesBasePaths = commandLine.getOptionValues("g"); final RandomAccessGenotypeDataReaderFormats genotypeDataType; final String replicationQtlFilePath = commandLine.getOptionValue("e"); final String interactionQtlFilePath = commandLine.getOptionValue("i"); final String outputFilePath = commandLine.getOptionValue("o"); final double ldCutoff = Double.parseDouble(commandLine.getOptionValue("ld")); final int window = Integer.parseInt(commandLine.getOptionValue("w")); System.out.println("Genotype: " + Arrays.toString(genotypesBasePaths)); System.out.println("Interaction file: " + interactionQtlFilePath); System.out.println("Replication file: " + replicationQtlFilePath); System.out.println("Output: " + outputFilePath); System.out.println("LD: " + ldCutoff); System.out.println("Window: " + window); try { if (commandLine.hasOption("G")) { genotypeDataType = RandomAccessGenotypeDataReaderFormats .valueOf(commandLine.getOptionValue("G").toUpperCase()); } else { if (genotypesBasePaths[0].endsWith(".vcf")) { System.err.println( "Only vcf.gz is supported. Please see manual on how to do create a vcf.gz file."); System.exit(1); return; } try { genotypeDataType = RandomAccessGenotypeDataReaderFormats .matchFormatToPath(genotypesBasePaths[0]); } catch (GenotypeDataException e) { System.err .println("Unable to determine input 1 type based on specified path. Please specify -G"); System.exit(1); return; } } } catch (IllegalArgumentException e) { System.err.println("Error parsing --genotypesFormat \"" + commandLine.getOptionValue("G") + "\" is not a valid input data format"); System.exit(1); return; } final RandomAccessGenotypeData genotypeData; try { genotypeData = genotypeDataType.createFilteredGenotypeData(genotypesBasePaths, 100, null, null, null, 0.8); } catch (TabixFileNotFoundException e) { LOGGER.fatal("Tabix file not found for input data at: " + e.getPath() + "\n" + "Please see README on how to create a tabix file"); System.exit(1); return; } catch (IOException e) { LOGGER.fatal("Error reading input data: " + e.getMessage(), e); System.exit(1); return; } catch (IncompatibleMultiPartGenotypeDataException e) { LOGGER.fatal("Error combining the impute genotype data files: " + e.getMessage(), e); System.exit(1); return; } catch (GenotypeDataException e) { LOGGER.fatal("Error reading input data: " + e.getMessage(), e); System.exit(1); return; } ChrPosTreeMap<ArrayList<ReplicationQtl>> replicationQtls = new ChrPosTreeMap<>(); CSVReader replicationQtlReader = new CSVReader(new FileReader(replicationQtlFilePath), '\t'); replicationQtlReader.readNext();//skip header String[] replicationLine; while ((replicationLine = replicationQtlReader.readNext()) != null) { try { GeneticVariant variant = genotypeData.getSnpVariantByPos(replicationLine[REPLICATION_SNP_CHR_COL], Integer.parseInt(replicationLine[REPLICATION_SNP_POS_COL])); if (variant == null) { continue; } ReplicationQtl replicationQtl = new ReplicationQtl(replicationLine[REPLICATION_SNP_CHR_COL], Integer.parseInt(replicationLine[REPLICATION_SNP_POS_COL]), replicationLine[REPLICATION_GENE_COL], Double.parseDouble(replicationLine[REPLICATION_BETA_COL]), variant.getAlternativeAlleles().get(0).getAlleleAsString()); ArrayList<ReplicationQtl> posReplicationQtls = replicationQtls.get(replicationQtl.getChr(), replicationQtl.getPos()); if (posReplicationQtls == null) { posReplicationQtls = new ArrayList<>(); replicationQtls.put(replicationQtl.getChr(), replicationQtl.getPos(), posReplicationQtls); } posReplicationQtls.add(replicationQtl); } catch (Exception e) { System.out.println(Arrays.toString(replicationLine)); throw e; } } int interactionSnpNotInGenotypeData = 0; int noReplicationQtlsInWindow = 0; int noReplicationQtlsInLd = 0; int multipleReplicationQtlsInLd = 0; int replicationTopSnpNotInGenotypeData = 0; final CSVWriter outputWriter = new CSVWriter(new FileWriter(new File(outputFilePath)), '\t', '\0'); final String[] outputLine = new String[14]; int c = 0; outputLine[c++] = "Chr"; outputLine[c++] = "Pos"; outputLine[c++] = "SNP"; outputLine[c++] = "Gene"; outputLine[c++] = "Module"; outputLine[c++] = "DiscoveryZ"; outputLine[c++] = "ReplicationZ"; outputLine[c++] = "DiscoveryZCorrected"; outputLine[c++] = "ReplicationZCorrected"; outputLine[c++] = "DiscoveryAlleleAssessed"; outputLine[c++] = "ReplicationAlleleAssessed"; outputLine[c++] = "bestLd"; outputLine[c++] = "bestLd_dist"; outputLine[c++] = "nextLd"; outputWriter.writeNext(outputLine); HashSet<String> notFound = new HashSet<>(); CSVReader interactionQtlReader = new CSVReader(new FileReader(interactionQtlFilePath), '\t'); interactionQtlReader.readNext();//skip header String[] interactionQtlLine; while ((interactionQtlLine = interactionQtlReader.readNext()) != null) { String snp = interactionQtlLine[1]; String chr = interactionQtlLine[2]; int pos = Integer.parseInt(interactionQtlLine[3]); String gene = interactionQtlLine[4]; String alleleAssessed = interactionQtlLine[9]; String module = interactionQtlLine[12]; double discoveryZ = Double.parseDouble(interactionQtlLine[10]); GeneticVariant interactionQtlVariant = genotypeData.getSnpVariantByPos(chr, pos); if (interactionQtlVariant == null) { System.err.println("Interaction QTL SNP not found in genotype data: " + chr + ":" + pos); ++interactionSnpNotInGenotypeData; continue; } ReplicationQtl bestMatch = null; double bestMatchR2 = Double.NaN; Ld bestMatchLd = null; double nextBestR2 = Double.NaN; ArrayList<ReplicationQtl> sameSnpQtls = replicationQtls.get(chr, pos); if (sameSnpQtls != null) { for (ReplicationQtl sameSnpQtl : sameSnpQtls) { if (sameSnpQtl.getGene().equals(gene)) { bestMatch = sameSnpQtl; bestMatchR2 = 1; } } } NavigableMap<Integer, ArrayList<ReplicationQtl>> potentionalReplicationQtls = replicationQtls .getChrRange(chr, pos - window, true, pos + window, true); for (ArrayList<ReplicationQtl> potentialReplicationQtls : potentionalReplicationQtls.values()) { for (ReplicationQtl potentialReplicationQtl : potentialReplicationQtls) { if (!potentialReplicationQtl.getGene().equals(gene)) { continue; } GeneticVariant potentialReplicationQtlVariant = genotypeData .getSnpVariantByPos(potentialReplicationQtl.getChr(), potentialReplicationQtl.getPos()); if (potentialReplicationQtlVariant == null) { notFound.add(potentialReplicationQtl.getChr() + ":" + potentialReplicationQtl.getPos()); ++replicationTopSnpNotInGenotypeData; continue; } Ld ld = interactionQtlVariant.calculateLd(potentialReplicationQtlVariant); double r2 = ld.getR2(); if (r2 > 1) { r2 = 1; } if (bestMatch == null) { bestMatch = potentialReplicationQtl; bestMatchR2 = r2; bestMatchLd = ld; } else if (r2 > bestMatchR2) { bestMatch = potentialReplicationQtl; nextBestR2 = bestMatchR2; bestMatchR2 = r2; bestMatchLd = ld; } } } double replicationZ = Double.NaN; double replicationZCorrected = Double.NaN; double discoveryZCorrected = Double.NaN; String replicationAlleleAssessed = null; if (bestMatch != null) { replicationZ = bestMatch.getBeta(); replicationAlleleAssessed = bestMatch.getAssessedAllele(); if (pos != bestMatch.getPos()) { String commonHap = null; double commonHapFreq = -1; for (Map.Entry<String, Double> hapFreq : bestMatchLd.getHaplotypesFreq().entrySet()) { double f = hapFreq.getValue(); if (f > commonHapFreq) { commonHapFreq = f; commonHap = hapFreq.getKey(); } } String[] commonHapAlleles = StringUtils.split(commonHap, '/'); discoveryZCorrected = commonHapAlleles[0].equals(alleleAssessed) ? discoveryZ : discoveryZ * -1; replicationZCorrected = commonHapAlleles[1].equals(replicationAlleleAssessed) ? replicationZ : replicationZ * -1; } else { discoveryZCorrected = discoveryZ; replicationZCorrected = alleleAssessed.equals(replicationAlleleAssessed) ? replicationZ : replicationZ * -1; } } c = 0; outputLine[c++] = chr; outputLine[c++] = String.valueOf(pos); outputLine[c++] = snp; outputLine[c++] = gene; outputLine[c++] = module; outputLine[c++] = String.valueOf(discoveryZ); outputLine[c++] = bestMatch == null ? "NA" : String.valueOf(replicationZ); outputLine[c++] = bestMatch == null ? "NA" : String.valueOf(discoveryZCorrected); outputLine[c++] = bestMatch == null ? "NA" : String.valueOf(replicationZCorrected); outputLine[c++] = alleleAssessed; outputLine[c++] = bestMatch == null ? "NA" : String.valueOf(bestMatch.getAssessedAllele()); outputLine[c++] = String.valueOf(bestMatchR2); outputLine[c++] = bestMatch == null ? "NA" : String.valueOf(Math.abs(pos - bestMatch.getPos())); outputLine[c++] = String.valueOf(nextBestR2); outputWriter.writeNext(outputLine); } outputWriter.close(); for (String e : notFound) { System.err.println("Not found: " + e); } System.out.println("interactionSnpNotInGenotypeData: " + interactionSnpNotInGenotypeData); System.out.println("noReplicationQtlsInWindow: " + noReplicationQtlsInWindow); System.out.println("noReplicationQtlsInLd: " + noReplicationQtlsInLd); System.out.println("multipleReplicationQtlsInLd: " + multipleReplicationQtlsInLd); System.out.println("replicationTopSnpNotInGenotypeData: " + replicationTopSnpNotInGenotypeData); }
From source file:edu.msu.cme.rdp.framebot.stat.TaxonAbundance.java
/** * this class group the nearest matches by phylum/class, or by match * @param args/* w w w . j a v a 2 s . c o m*/ * @throws Exception */ public static void main(String[] args) throws Exception { HashMap<String, Double> coveragetMap = null; double identity = 0.0; try { CommandLine line = new PosixParser().parse(options, args); if (line.hasOption("seqCoverage")) { String coveragefile = line.getOptionValue("seqCoverage"); coveragetMap = parseKmerCoverage(coveragefile); } if (line.hasOption("identity")) { identity = Double.parseDouble(line.getOptionValue("identity")); if (identity < 0 || identity > 100) { throw new IllegalArgumentException("identity cutoff should be in the range of 0 and 100"); } } args = line.getArgs(); if (args.length != 3) { throw new Exception(""); } } catch (Exception e) { System.out.println("Command Error: " + e.getMessage()); new HelpFormatter().printHelp(80, "[options] <FrameBot Alignment file or Dir> <seqLineage> <out file> ", "", options, "seqLineage: a tab-delimited file with ref seqID and lineage, or fasta of ref seq with lineage as the descrption" + "\nframeBot alignment file or Dir: frameBot alignment files " + "\noutfile: output with the nearest match count group by phylum/class; and by match name"); } TaxonAbundance.mapAbundance(new File(args[0]), new File(args[1]), args[2], coveragetMap, identity); }
From source file:edu.nyu.vida.data_polygamy.pre_processing.PreProcessing.java
/** * @param args/*ww w. j av a2 s. c o m*/ * @throws IOException * @throws ClassNotFoundException * @throws InterruptedException */ @SuppressWarnings("deprecation") public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { Options options = new Options(); Option nameOption = new Option("dn", "name", true, "the name of the dataset"); nameOption.setRequired(true); nameOption.setArgName("DATASET NAME"); options.addOption(nameOption); Option headerOption = new Option("dh", "header", true, "the file that contains the header of the dataset"); headerOption.setRequired(true); headerOption.setArgName("DATASET HEADER FILE"); options.addOption(headerOption); Option deafultsOption = new Option("dd", "defaults", true, "the file that contains the default values of the dataset"); deafultsOption.setRequired(true); deafultsOption.setArgName("DATASET DEFAULTS FILE"); options.addOption(deafultsOption); Option tempResOption = new Option("t", "temporal", true, "desired temporal resolution (hour, day, week, or month)"); tempResOption.setRequired(true); tempResOption.setArgName("TEMPORAL RESOLUTION"); options.addOption(tempResOption); Option spatialResOption = new Option("s", "spatial", true, "desired spatial resolution (points, nbhd, zip, grid, or city)"); spatialResOption.setRequired(true); spatialResOption.setArgName("SPATIAL RESOLUTION"); options.addOption(spatialResOption); Option currentSpatialResOption = new Option("cs", "current-spatial", true, "current spatial resolution (points, nbhd, zip, grid, or city)"); currentSpatialResOption.setRequired(true); currentSpatialResOption.setArgName("CURRENT SPATIAL RESOLUTION"); options.addOption(currentSpatialResOption); Option indexResOption = new Option("i", "index", true, "indexes of the temporal and spatial attributes"); indexResOption.setRequired(true); indexResOption.setArgName("INDEX OF SPATIO-TEMPORAL RESOLUTIONS"); indexResOption.setArgs(Option.UNLIMITED_VALUES); options.addOption(indexResOption); Option machineOption = new Option("m", "machine", true, "machine identifier"); machineOption.setRequired(true); machineOption.setArgName("MACHINE"); machineOption.setArgs(1); options.addOption(machineOption); Option nodesOption = new Option("n", "nodes", true, "number of nodes"); nodesOption.setRequired(true); nodesOption.setArgName("NODES"); nodesOption.setArgs(1); options.addOption(nodesOption); Option s3Option = new Option("s3", "s3", false, "data on Amazon S3"); s3Option.setRequired(false); options.addOption(s3Option); Option awsAccessKeyIdOption = new Option("aws_id", "aws-id", true, "aws access key id; " + "this is required if the execution is on aws"); awsAccessKeyIdOption.setRequired(false); awsAccessKeyIdOption.setArgName("AWS-ACCESS-KEY-ID"); awsAccessKeyIdOption.setArgs(1); options.addOption(awsAccessKeyIdOption); Option awsSecretAccessKeyOption = new Option("aws_key", "aws-id", true, "aws secrect access key; " + "this is required if the execution is on aws"); awsSecretAccessKeyOption.setRequired(false); awsSecretAccessKeyOption.setArgName("AWS-SECRET-ACCESS-KEY"); awsSecretAccessKeyOption.setArgs(1); options.addOption(awsSecretAccessKeyOption); Option bucketOption = new Option("b", "s3-bucket", true, "bucket on s3; " + "this is required if the execution is on aws"); bucketOption.setRequired(false); bucketOption.setArgName("S3-BUCKET"); bucketOption.setArgs(1); options.addOption(bucketOption); Option helpOption = new Option("h", "help", false, "display this message"); helpOption.setRequired(false); options.addOption(helpOption); HelpFormatter formatter = new HelpFormatter(); CommandLineParser parser = new PosixParser(); CommandLine cmd = null; try { cmd = parser.parse(options, args); } catch (ParseException e) { formatter.printHelp( "hadoop jar data-polygamy.jar " + "edu.nyu.vida.data_polygamy.pre_processing.PreProcessing", options, true); System.exit(0); } if (cmd.hasOption("h")) { formatter.printHelp( "hadoop jar data-polygamy.jar " + "edu.nyu.vida.data_polygamy.pre_processing.PreProcessing", options, true); System.exit(0); } boolean s3 = cmd.hasOption("s3"); String s3bucket = ""; String awsAccessKeyId = ""; String awsSecretAccessKey = ""; if (s3) { if ((!cmd.hasOption("aws_id")) || (!cmd.hasOption("aws_key")) || (!cmd.hasOption("b"))) { System.out.println( "Arguments 'aws_id', 'aws_key', and 'b'" + " are mandatory if execution is on AWS."); formatter.printHelp( "hadoop jar data-polygamy.jar " + "edu.nyu.vida.data_polygamy.pre_processing.PreProcessing", options, true); System.exit(0); } s3bucket = cmd.getOptionValue("b"); awsAccessKeyId = cmd.getOptionValue("aws_id"); awsSecretAccessKey = cmd.getOptionValue("aws_key"); } boolean snappyCompression = false; boolean bzip2Compression = false; String machine = cmd.getOptionValue("m"); int nbNodes = Integer.parseInt(cmd.getOptionValue("n")); Configuration s3conf = new Configuration(); if (s3) { s3conf.set("fs.s3.awsAccessKeyId", awsAccessKeyId); s3conf.set("fs.s3.awsSecretAccessKey", awsSecretAccessKey); s3conf.set("bucket", s3bucket); } Configuration conf = new Configuration(); Machine machineConf = new Machine(machine, nbNodes); String dataset = cmd.getOptionValue("dn"); String header = cmd.getOptionValue("dh"); String defaults = cmd.getOptionValue("dd"); String temporalResolution = cmd.getOptionValue("t"); String spatialResolution = cmd.getOptionValue("s"); String gridResolution = ""; String currentSpatialResolution = cmd.getOptionValue("cs"); if (spatialResolution.contains("grid")) { String[] res = spatialResolution.split("-"); spatialResolution = res[0]; gridResolution = res[1]; } conf.set("header", s3bucket + FrameworkUtils.dataDir + "/" + header); conf.set("defaults", s3bucket + FrameworkUtils.dataDir + "/" + defaults); conf.set("temporal-resolution", temporalResolution); conf.set("spatial-resolution", spatialResolution); conf.set("grid-resolution", gridResolution); conf.set("current-spatial-resolution", currentSpatialResolution); String[] indexes = cmd.getOptionValues("i"); String temporalPos = ""; Integer sizeSpatioTemp = 0; if (!(currentSpatialResolution.equals("points"))) { String spatialPos = ""; for (int i = 0; i < indexes.length; i++) { temporalPos += indexes[i] + ","; spatialPos += indexes[++i] + ","; sizeSpatioTemp++; } conf.set("spatial-pos", spatialPos); } else { String xPositions = "", yPositions = ""; for (int i = 0; i < indexes.length; i++) { temporalPos += indexes[i] + ","; xPositions += indexes[++i] + ","; yPositions += indexes[++i] + ","; sizeSpatioTemp++; } conf.set("xPositions", xPositions); conf.set("yPositions", yPositions); } conf.set("temporal-pos", temporalPos); conf.set("size-spatio-temporal", sizeSpatioTemp.toString()); // checking resolutions if (utils.spatialResolution(spatialResolution) < 0) { System.out.println("Invalid spatial resolution: " + spatialResolution); System.exit(-1); } if (utils.spatialResolution(spatialResolution) == FrameworkUtils.POINTS) { System.out.println("The data needs to be reduced at least to neighborhoods or grid."); System.exit(-1); } if (utils.spatialResolution(currentSpatialResolution) < 0) { System.out.println("Invalid spatial resolution: " + currentSpatialResolution); System.exit(-1); } if (utils.spatialResolution(currentSpatialResolution) > utils.spatialResolution(spatialResolution)) { System.out.println("The current spatial resolution is coarser than " + "the desired one. You can only navigate from a fine resolution" + " to a coarser one."); System.exit(-1); } if (utils.temporalResolution(temporalResolution) < 0) { System.out.println("Invalid temporal resolution: " + temporalResolution); System.exit(-1); } String fileName = s3bucket + FrameworkUtils.preProcessingDir + "/" + dataset + "-" + temporalResolution + "-" + spatialResolution + gridResolution; conf.set("aggregates", fileName + ".aggregates"); // making sure both files are removed, if they exist FrameworkUtils.removeFile(fileName, s3conf, s3); FrameworkUtils.removeFile(fileName + ".aggregates", s3conf, s3); /** * Hadoop Parameters * sources: http://www.slideshare.net/ImpetusInfo/ppt-on-advanced-hadoop-tuning-n-optimisation * https://cloudcelebrity.wordpress.com/2013/08/14/12-key-steps-to-keep-your-hadoop-cluster-running-strong-and-performing-optimum/ */ conf.set("mapreduce.tasktracker.map.tasks.maximum", String.valueOf(machineConf.getMaximumTasks())); conf.set("mapreduce.tasktracker.reduce.tasks.maximum", String.valueOf(machineConf.getMaximumTasks())); conf.set("mapreduce.jobtracker.maxtasks.perjob", "-1"); conf.set("mapreduce.reduce.shuffle.parallelcopies", "20"); conf.set("mapreduce.input.fileinputformat.split.minsize", "0"); conf.set("mapreduce.task.io.sort.mb", "200"); conf.set("mapreduce.task.io.sort.factor", "100"); // using SnappyCodec for intermediate and output data ? // TODO: for now, using SnappyCodec -- what about LZO + Protocol Buffer serialization? // LZO - http://www.oberhumer.com/opensource/lzo/#download // Hadoop-LZO - https://github.com/twitter/hadoop-lzo // Protocol Buffer - https://github.com/twitter/elephant-bird // General Info - http://www.devx.com/Java/Article/47913 // Compression - http://comphadoop.weebly.com/index.html if (snappyCompression) { conf.set("mapreduce.map.output.compress", "true"); conf.set("mapreduce.map.output.compress.codec", "org.apache.hadoop.io.compress.SnappyCodec"); conf.set("mapreduce.output.fileoutputformat.compress.codec", "org.apache.hadoop.io.compress.SnappyCodec"); } if (bzip2Compression) { conf.set("mapreduce.map.output.compress", "true"); conf.set("mapreduce.map.output.compress.codec", "org.apache.hadoop.io.compress.BZip2Codec"); conf.set("mapreduce.output.fileoutputformat.compress.codec", "org.apache.hadoop.io.compress.BZip2Codec"); } // TODO: this is dangerous! if (s3) { conf.set("fs.s3.awsAccessKeyId", awsAccessKeyId); conf.set("fs.s3.awsSecretAccessKey", awsSecretAccessKey); } Job job = new Job(conf); job.setJobName(dataset + "-" + temporalResolution + "-" + spatialResolution); job.setMapOutputKeyClass(MultipleSpatioTemporalWritable.class); job.setMapOutputValueClass(AggregationArrayWritable.class); job.setOutputKeyClass(MultipleSpatioTemporalWritable.class); job.setOutputValueClass(AggregationArrayWritable.class); job.setMapperClass(PreProcessingMapper.class); job.setCombinerClass(PreProcessingCombiner.class); job.setReducerClass(PreProcessingReducer.class); job.setNumReduceTasks(machineConf.getNumberReduces()); //job.setNumReduceTasks(1); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); SequenceFileOutputFormat.setCompressOutput(job, true); SequenceFileOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK); FileInputFormat.setInputPaths(job, new Path(s3bucket + FrameworkUtils.dataDir + "/" + dataset)); FileOutputFormat.setOutputPath(job, new Path(fileName)); job.setJarByClass(PreProcessing.class); long start = System.currentTimeMillis(); job.submit(); job.waitForCompletion(true); System.out.println(fileName + "\t" + (System.currentTimeMillis() - start)); }
From source file:edu.msu.cme.rdp.abundstats.cli.AbundMain.java
public static void main(String[] args) throws IOException { File inputFile;/*ww w . ja v a 2 s . c o m*/ File resultDir = new File("."); RPlotter plotter = null; boolean isClusterFile = true; List<AbundStatsCalculator> statCalcs = new ArrayList(); double clustCutoffFrom = Double.MIN_VALUE, clustCutoffTo = Double.MAX_VALUE; String usage = "Main [options] <cluster file>"; try { CommandLine line = new PosixParser().parse(options, args); if (line.hasOption("result-dir")) { resultDir = new File(line.getOptionValue("result-dir")); if (!resultDir.exists() && !resultDir.mkdirs()) { throw new Exception( "Result directory " + resultDir + " does not exist and could not be created"); } } if (line.hasOption("R-location")) { plotter = new RPlotter(); plotter.setCommandTemplate(rplotterTemplate); plotter.setRPath(line.getOptionValue("R-location")); plotter.setOutFileExt(".png"); if (!new File(plotter.getRPath()).canExecute()) { throw new Exception(plotter.getRPath() + " does not exist or is not exectuable"); } } if (line.hasOption("lower-cutoff")) { clustCutoffFrom = Double.valueOf(line.getOptionValue("lower-cutoff")); } if (line.hasOption("upper-cutoff")) { clustCutoffTo = Double.valueOf(line.getOptionValue("upper-cutoff")); } if (line.hasOption("jaccard")) { statCalcs.add(new Jaccard(true)); } if (line.hasOption("sorensen")) { statCalcs.add(new Sorensen(true)); } if (line.hasOption("otu-table")) { isClusterFile = false; } if (statCalcs.isEmpty()) { throw new Exception("Must specify at least one stat to compute (jaccard, sorensen)"); } args = line.getArgs(); if (args.length != 1) { throw new Exception("Unexpected number of command line arguments"); } inputFile = new File(args[0]); } catch (Exception e) { new HelpFormatter().printHelp(usage, options); System.err.println("Error: " + e.getMessage()); return; } if (isClusterFile) { RDPClustParser parser; parser = new RDPClustParser(inputFile); try { if (parser.getClusterSamples().size() == 1) { throw new IOException("Cluster file must have more than one sample"); } List<Cutoff> cutoffs = parser.getCutoffs(clustCutoffFrom, clustCutoffTo); if (cutoffs.isEmpty()) { throw new IOException( "No cutoffs in cluster file in range [" + clustCutoffFrom + "-" + clustCutoffTo + "]"); } for (Cutoff cutoff : cutoffs) { List<Sample> samples = new ArrayList(); for (ClusterSample clustSample : parser.getClusterSamples()) { Sample s = new Sample(clustSample.getName()); for (Cluster clust : cutoff.getClusters().get(clustSample.getName())) { s.addSpecies(clust.getNumberOfSeqs()); } samples.add(s); } processSamples(samples, statCalcs, resultDir, cutoff.getCutoff() + "_", plotter); } } finally { parser.close(); } } else { List<Sample> samples = new ArrayList(); BufferedReader reader = new BufferedReader(new FileReader(inputFile)); String line = reader.readLine(); if (line == null || line.split("\\s+").length < 2) { throw new IOException("Must be 2 or more samples for abundance statistic calculations!"); } int numSamples = line.split("\\s+").length; boolean header = true; try { Integer.valueOf(line.split("\\s+")[0]); header = false; } catch (Exception e) { } if (header) { for (String s : line.split("\\s+")) { samples.add(new Sample(s)); } } else { int sample = 0; for (String s : line.split("\\s+")) { samples.add(new Sample("" + sample)); samples.get(sample).addSpecies(Integer.valueOf(s)); sample++; } } int lineno = 2; while ((line = reader.readLine()) != null) { if (line.trim().equals("")) { continue; } int sample = 0; if (line.split("\\s+").length != numSamples) { System.err.println( "Line number " + lineno + " didn't have the expected number of samples (contained " + line.split("\\s+").length + ", expected " + numSamples + ")"); } for (String s : line.split("\\s+")) { samples.get(sample).addSpecies(Integer.valueOf(s)); sample++; } lineno++; } processSamples(samples, statCalcs, resultDir, inputFile.getName(), plotter); } }
From source file:edu.sdsc.scigraph.owlapi.loader.BatchOwlLoader.java
public static void main(String[] args) throws Exception { CommandLineParser parser = new PosixParser(); CommandLine cmd = null;/* w w w.ja va 2s. co m*/ try { cmd = parser.parse(getOptions(), args); } catch (ParseException e) { System.err.println(e.getMessage()); HelpFormatter formatter = new HelpFormatter(); formatter.printHelp(BatchOwlLoader.class.getSimpleName(), getOptions()); System.exit(-1); } ObjectMapper mapper = new ObjectMapper(new YAMLFactory()); OwlLoadConfiguration config = mapper.readValue(new File(cmd.getOptionValue('c').trim()), OwlLoadConfiguration.class); load(config); // TODO: Is Guice causing this to hang? #44 System.exit(0); }
From source file:it.unipd.dei.ims.falcon.CmdLine.java
public static void main(String[] args) { // last argument is always index path Options options = new Options(); // one of these actions has to be specified OptionGroup actionGroup = new OptionGroup(); actionGroup.addOption(new Option("i", true, "perform indexing")); // if dir, all files, else only one file actionGroup.addOption(new Option("q", true, "perform a single query")); actionGroup.addOption(new Option("b", false, "perform a query batch (read from stdin)")); actionGroup.setRequired(true);//from www.j a v a2 s. co m options.addOptionGroup(actionGroup); // other options options.addOption(new Option("l", "segment-length", true, "length of a segment (# of chroma vectors)")); options.addOption( new Option("o", "segment-overlap", true, "overlap portion of a segment (# of chroma vectors)")); options.addOption(new Option("Q", "quantization-level", true, "quantization level for chroma vectors")); options.addOption(new Option("k", "min-kurtosis", true, "minimum kurtosis for indexing chroma vectors")); options.addOption(new Option("s", "sub-sampling", true, "sub-sampling of chroma features")); options.addOption(new Option("v", "verbose", false, "verbose output (including timing info)")); options.addOption(new Option("T", "transposition-estimator-strategy", true, "parametrization for the transposition estimator strategy")); options.addOption(new Option("t", "n-transp", true, "number of transposition; if not specified, no transposition is performed")); options.addOption(new Option("f", "force-transp", true, "force transposition by an amount of semitones")); options.addOption(new Option("p", "pruning", false, "enable query pruning; if -P is unspecified, use default strategy")); options.addOption(new Option("P", "pruning-custom", true, "custom query pruning strategy")); // parse HelpFormatter formatter = new HelpFormatter(); CommandLineParser parser = new PosixParser(); CommandLine cmd = null; try { cmd = parser.parse(options, args); if (cmd.getArgs().length != 1) throw new ParseException("no index path was specified"); } catch (ParseException ex) { System.err.println("ERROR - parsing command line:"); System.err.println(ex.getMessage()); formatter.printHelp("falcon -{i,q,b} [options] index_path", options); return; } // default values final float[] DEFAULT_TRANSPOSITION_ESTIMATOR_STRATEGY = new float[] { 0.65192807f, 0.0f, 0.0f, 0.0f, 0.3532628f, 0.4997167f, 0.0f, 0.41703504f, 0.0f, 0.16297342f, 0.0f, 0.0f }; final String DEFAULT_QUERY_PRUNING_STRATEGY = "ntf:0.340765*[0.001694,0.995720];ndf:0.344143*[0.007224,0.997113];" + "ncf:0.338766*[0.001601,0.995038];nmf:0.331577*[0.002352,0.997884];"; // TODO not the final one int hashes_per_segment = Integer.parseInt(cmd.getOptionValue("l", "150")); int overlap_per_segment = Integer.parseInt(cmd.getOptionValue("o", "50")); int nranks = Integer.parseInt(cmd.getOptionValue("Q", "3")); int subsampling = Integer.parseInt(cmd.getOptionValue("s", "1")); double minkurtosis = Float.parseFloat(cmd.getOptionValue("k", "-100.")); boolean verbose = cmd.hasOption("v"); int ntransp = Integer.parseInt(cmd.getOptionValue("t", "1")); TranspositionEstimator tpe = null; if (cmd.hasOption("t")) { if (cmd.hasOption("T")) { // TODO this if branch is yet to test Pattern p = Pattern.compile("\\d\\.\\d*"); LinkedList<Double> tokens = new LinkedList<Double>(); Matcher m = p.matcher(cmd.getOptionValue("T")); while (m.find()) tokens.addLast(new Double(cmd.getOptionValue("T").substring(m.start(), m.end()))); float[] strategy = new float[tokens.size()]; if (strategy.length != 12) { System.err.println("invalid transposition estimator strategy"); System.exit(1); } for (int i = 0; i < strategy.length; i++) strategy[i] = new Float(tokens.pollFirst()); } else { tpe = new TranspositionEstimator(DEFAULT_TRANSPOSITION_ESTIMATOR_STRATEGY); } } else if (cmd.hasOption("f")) { int[] transps = parseIntArray(cmd.getOptionValue("f")); tpe = new ForcedTranspositionEstimator(transps); ntransp = transps.length; } QueryPruningStrategy qpe = null; if (cmd.hasOption("p")) { if (cmd.hasOption("P")) { qpe = new StaticQueryPruningStrategy(cmd.getOptionValue("P")); } else { qpe = new StaticQueryPruningStrategy(DEFAULT_QUERY_PRUNING_STRATEGY); } } // action if (cmd.hasOption("i")) { try { Indexing.index(new File(cmd.getOptionValue("i")), new File(cmd.getArgs()[0]), hashes_per_segment, overlap_per_segment, subsampling, nranks, minkurtosis, tpe, verbose); } catch (IndexingException ex) { Logger.getLogger(CmdLine.class.getName()).log(Level.SEVERE, null, ex); } catch (IOException ex) { Logger.getLogger(CmdLine.class.getName()).log(Level.SEVERE, null, ex); } } if (cmd.hasOption("q")) { String queryfilepath = cmd.getOptionValue("q"); doQuery(cmd, queryfilepath, hashes_per_segment, overlap_per_segment, nranks, subsampling, tpe, ntransp, minkurtosis, qpe, verbose); } if (cmd.hasOption("b")) { try { long starttime = System.currentTimeMillis(); BufferedReader in = new BufferedReader(new InputStreamReader(System.in)); String line = null; while ((line = in.readLine()) != null && !line.trim().isEmpty()) doQuery(cmd, line, hashes_per_segment, overlap_per_segment, nranks, subsampling, tpe, ntransp, minkurtosis, qpe, verbose); in.close(); long endtime = System.currentTimeMillis(); System.out.println(String.format("total time: %ds", (endtime - starttime) / 1000)); } catch (IOException ex) { Logger.getLogger(CmdLine.class.getName()).log(Level.SEVERE, null, ex); } } }