Example usage for org.apache.commons.cli PosixParser PosixParser

List of usage examples for org.apache.commons.cli PosixParser PosixParser

Introduction

In this page you can find the example usage for org.apache.commons.cli PosixParser PosixParser.

Prototype

PosixParser

Source Link

Usage

From source file:com.genentech.chemistry.openEye.apps.SDFMCSSNNFinder.java

public static void main(String... args) throws IOException {
    CommandLineParser parser = new PosixParser();
    CommandLine cmd = null;// w ww  .  j  ava2 s .c  o m
    try {
        cmd = parser.parse(options, args);
    } catch (Exception e) {
        System.err.println(e.getMessage());
        exitWithHelp();
    }

    args = cmd.getArgs();
    if (args.length > 0) {
        exitWithHelp("Unknown param: " + args[0]);
    }

    if (cmd.hasOption("d")) {
        System.err.println("Start debugger and press return:");
        new BufferedReader(new InputStreamReader(System.in)).readLine();
    }

    int nCpu = 1;
    int maxNeighbors = 1;
    double minSim = 0D;

    String idTag = cmd.getOptionValue("idTag");
    boolean printAll = cmd.hasOption("printAll");

    String d = cmd.getOptionValue("nCpu");
    if (d != null)
        nCpu = Integer.parseInt(d);

    d = cmd.getOptionValue("maxNeighbors");
    if (d != null)
        maxNeighbors = Integer.parseInt(d);

    d = cmd.getOptionValue("minSimilarity");
    if (d != null)
        minSim = Double.parseDouble(d);

    String countAboveSimilarityStr = cmd.getOptionValue("countSimilarAbove");

    String inFile = cmd.getOptionValue("in");
    String outFile = cmd.getOptionValue("out");
    String refFile = cmd.getOptionValue("ref");

    String tabOutput = cmd.getOptionValue("tabOutput");
    boolean outputDuplicates = cmd.hasOption("outputDuplicates");

    if (outputDuplicates && tabOutput != null)
        exitWithHelp("-outputDuplicates will not work with outputVTab");
    if (outputDuplicates && refFile == null)
        exitWithHelp("-outputDuplicates requires -ref ");
    if ("tab".equalsIgnoreCase(tabOutput) && refFile != null)
        exitWithHelp("-tabOutput tab: does not work with reference file");
    if ("tab".equalsIgnoreCase(tabOutput) && maxNeighbors == 1)
        exitWithHelp("-tabOutput tab: does not make sense with -maxNeighbors = 1");
    if (cmd.hasOption("countSimilarAbove") && tabOutput != null)
        exitWithHelp("-countSimilarAbove not supported for tab or vTab output");
    if (printAll && !(maxNeighbors > 1 || minSim > 0))
        exitWithHelp("printAll only supported if: maxNeighbors > 1 or minSim > 0");

    if (printAll && tabOutput != null)
        System.err.println("WARNING: printAll ignored for tab output!\n");

    SimComparatorFactory<OEMolBase, OEMolBase, SimComparator<OEMolBase>> compFact;
    compFact = getComparatorFactory(cmd);

    if (refFile == null) { // no reference file; run all by all comparison
        performMatrixNNSearch(inFile, outFile, tabOutput, compFact, minSim, maxNeighbors, idTag, nCpu,
                countAboveSimilarityStr, printAll);

    } else { // refrence file; compare inFile to refFile
        performReferenceSearch(inFile, refFile, outFile, tabOutput, compFact, minSim, maxNeighbors, idTag, nCpu,
                countAboveSimilarityStr, outputDuplicates, printAll);
    }

}

From source file:com.genentech.chemistry.openEye.apps.SDFCatsIndexer.java

/**
 * @param args// w ww. j a v  a  2  s  .  c  o  m
 */
public static void main(String... args) throws IOException { // create command line Options object
    Options options = new Options();
    Option opt = new Option(OPT_INFILE, true,
            "input file oe-supported Use .sdf|.smi to specify the file type.");
    opt.setRequired(true);
    opt.setArgName("fn");
    options.addOption(opt);

    opt = new Option(OPT_OUTFILE, true, "output file oe-supported. Use .sdf|.smi to specify the file type.");
    opt.setRequired(true);
    opt.setArgName("fn");
    options.addOption(opt);

    opt = new Option(OPT_NORMALIZATION, true,
            "Normalization method: Counts|CountsPerAtom|CountsPerFeature(def) multiple allowed");
    opt.setArgName("meth");
    options.addOption(opt);

    opt = new Option(OPT_PRINTDESC, false,
            "Causes the descriptor for describing each linear path in a molceule to be created");
    options.addOption(opt);

    opt = new Option(OPT_RGROUPTYPES, false, "treat RGroup attachement point ([U]) as atom type.");
    options.addOption(opt);

    CommandLineParser parser = new PosixParser();
    CommandLine cmd = null;
    try {
        cmd = parser.parse(options, args);
    } catch (Exception e) {
        System.err.println(e.getMessage());
        exitWithHelp(options);
    }

    String inFile = cmd.getOptionValue(OPT_INFILE);
    String outFile = cmd.getOptionValue(OPT_OUTFILE);

    AtomTyperInterface[] myTypes = CATSIndexer.typers;
    String tagPrefix = "";
    if (cmd.hasOption(OPT_RGROUPTYPES)) {
        myTypes = CATSIndexer.rgroupTypers;
        tagPrefix = "RG";
    }

    if (cmd.hasOption(OPT_PRINTDESC)) {
        SDFCatsIndexer sdfIndexer = new SDFCatsIndexer(myTypes, tagPrefix);
        sdfIndexer.printDescriptors(inFile, outFile);
        sdfIndexer.close();
        return;
    }

    EnumSet<Normalization> normMeth = EnumSet.noneOf(Normalization.class);
    if (cmd.hasOption(OPT_NORMALIZATION))
        for (String n : cmd.getOptionValues(OPT_NORMALIZATION))
            normMeth.add(Normalization.valueOf(n));
    else
        normMeth.add(Normalization.CountsPerFeature);

    SDFCatsIndexer sdfIndexer = new SDFCatsIndexer(myTypes, tagPrefix);
    sdfIndexer.run(inFile, outFile, normMeth);
    sdfIndexer.close();
}

From source file:com.flaptor.hounder.indexer.RmiIndexerStub.java

public static void main(String[] args) {

    // create the parser
    CommandLineParser parser = new PosixParser();
    CommandLine line = null;/*  w w  w .j  a  va2s.c  om*/
    Options options = getOptions();
    try {
        // parse the command line arguments
        line = parser.parse(options, args);
    } catch (ParseException exp) {
        // oops, something went wrong
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp("RmiIndexerStub -h <hostName> -p <basePort> [options] ", options);
        System.exit(1);
    }

    boolean doOptimize = line.hasOption("optimize");
    boolean doCheckpoint = line.hasOption("checkpoint");
    boolean doStop = line.hasOption("stop");
    Integer port = ((Long) line.getOptionObject("port")).intValue();
    String host = line.getOptionValue("host");

    try {
        RmiIndexerStub stub = new RmiIndexerStub(port, host);

        if (line.hasOption("deleteUrl")) {
            String url = line.getOptionValue("deleteUrl");
            Document dom = generateDeleteDocument(url);
            indexOrFail(stub, dom, "Could not delete " + url);
            System.out.println("delete " + url + " command accepted by indexer");
        }

        if (line.hasOption("deleteFile")) {

            BufferedReader reader = new BufferedReader(new FileReader(line.getOptionValue("deleteFile")));
            while (reader.ready()) {
                String url = reader.readLine();
                if (url.length() > 0 && url.charAt(0) != '#') { // ignore empty lines and comments
                    Document dom = generateDeleteDocument(url);
                    indexOrFail(stub, dom, "Could not delete " + url);
                    System.out.println("delete " + url + " command accepted by indexer");
                }
            }
            reader.close();
        }

        if (doOptimize) {
            Document dom = generateCommandDocument("optimize");
            indexOrFail(stub, dom, "Could not send optimize command.");
            System.out.println("optimize command accepted by indexer");
        }

        if (doCheckpoint) {
            Document dom = generateCommandDocument("checkpoint");
            indexOrFail(stub, dom, "Could not send checkpoint command.");
            System.out.println("checkpoint command accepted by indexer");

        }
        if (doStop) {
            Document dom = generateCommandDocument("close");
            indexOrFail(stub, dom, "Could not send stop command.");
            System.out.println("stop command accepted by indexer");
        }
    } catch (Exception e) {
        System.err.println("An error occurred: " + e.getMessage());
        e.printStackTrace();
    }
}

From source file:edu.msu.cme.rdp.kmer.KmerFilter.java

public static void main(String[] args) throws Exception {
    final KmerTrie kmerTrie;
    final SeqReader queryReader;
    final SequenceType querySeqType;
    final File queryFile;
    final KmerStartsWriter out;
    final boolean translQuery;
    final int wordSize;
    final int translTable;
    final boolean alignedSeqs;
    final List<String> refLabels = new ArrayList();
    final int maxThreads;

    try {//from www . j  a  v  a  2s . co m
        CommandLine cmdLine = new PosixParser().parse(options, args);
        args = cmdLine.getArgs();

        if (args.length < 3) {
            throw new Exception("Unexpected number of arguments");
        }

        if (cmdLine.hasOption("out")) {
            out = new KmerStartsWriter(cmdLine.getOptionValue("out"));
        } else {
            out = new KmerStartsWriter(System.out);
        }

        if (cmdLine.hasOption("aligned")) {
            alignedSeqs = true;
        } else {
            alignedSeqs = false;
        }

        if (cmdLine.hasOption("transl-table")) {
            translTable = Integer.valueOf(cmdLine.getOptionValue("transl-table"));
        } else {
            translTable = 11;
        }

        if (cmdLine.hasOption("threads")) {
            maxThreads = Integer.valueOf(cmdLine.getOptionValue("threads"));
        } else {
            maxThreads = Runtime.getRuntime().availableProcessors();
        }

        queryFile = new File(args[1]);
        wordSize = Integer.valueOf(args[0]);
        SequenceType refSeqType = null;

        querySeqType = SeqUtils.guessSequenceType(queryFile);
        queryReader = new SequenceReader(queryFile);

        if (querySeqType == SequenceType.Protein) {
            throw new Exception("Expected nucl query sequences");
        }

        refSeqType = SeqUtils
                .guessSequenceType(new File(args[2].contains("=") ? args[2].split("=")[1] : args[2]));

        translQuery = refSeqType == SequenceType.Protein;

        if (translQuery && wordSize % 3 != 0) {
            throw new Exception("Word size must be a multiple of 3 for nucl ref seqs");
        }

        int trieWordSize;
        if (translQuery) {
            trieWordSize = wordSize / 3;
        } else {
            trieWordSize = wordSize;
        }
        kmerTrie = new KmerTrie(trieWordSize, translQuery);

        for (int index = 2; index < args.length; index++) {
            String refName;
            String refFileName = args[index];
            if (refFileName.contains("=")) {
                String[] lexemes = refFileName.split("=");
                refName = lexemes[0];
                refFileName = lexemes[1];
            } else {
                String tmpName = new File(refFileName).getName();
                if (tmpName.contains(".")) {
                    refName = tmpName.substring(0, tmpName.lastIndexOf("."));
                } else {
                    refName = tmpName;
                }
            }

            File refFile = new File(refFileName);

            if (refSeqType != SeqUtils.guessSequenceType(refFile)) {
                throw new Exception(
                        "Reference file " + refFile + " contains " + SeqUtils.guessFileFormat(refFile)
                                + " sequences but expected " + refSeqType + " sequences");
            }

            SequenceReader seqReader = new SequenceReader(refFile);
            Sequence seq;

            while ((seq = seqReader.readNextSequence()) != null) {
                if (seq.getSeqName().startsWith("#")) {
                    continue;
                }
                if (alignedSeqs) {
                    kmerTrie.addModelSequence(seq, refLabels.size());
                } else {
                    kmerTrie.addSequence(seq, refLabels.size());
                }
            }
            seqReader.close();

            refLabels.add(refName);
        }

    } catch (Exception e) {
        new HelpFormatter().printHelp("KmerSearch <word_size> <query_file> [name=]<ref_file> ...", options);
        System.err.println(e.getMessage());
        e.printStackTrace();
        System.exit(1);
        throw new RuntimeException("Stupid jvm"); //While this will never get thrown it is required to make sure javac doesn't get confused about uninitialized variables
    }

    long startTime = System.currentTimeMillis();
    long seqCount = 0;
    final int maxTasks = 25000;

    /*
     * if (args.length == 4) { maxThreads = Integer.valueOf(args[3]); } else {
     */

    //}

    System.err.println("Starting kmer mapping at " + new Date());
    System.err.println("*  Number of threads:       " + maxThreads);
    System.err.println("*  References:              " + refLabels);
    System.err.println("*  Reads file:              " + queryFile);
    System.err.println("*  Kmer length:             " + kmerTrie.getWordSize());

    final AtomicInteger processed = new AtomicInteger();
    final AtomicInteger outstandingTasks = new AtomicInteger();

    ExecutorService service = Executors.newFixedThreadPool(maxThreads);

    Sequence querySeq;

    while ((querySeq = queryReader.readNextSequence()) != null) {
        seqCount++;

        String seqString = querySeq.getSeqString();

        if (seqString.length() < 3) {
            System.err.println("Sequence " + querySeq.getSeqName() + "'s length is less than 3");
            continue;
        }

        final Sequence threadSeq = querySeq;

        Runnable r = new Runnable() {

            public void run() {
                try {
                    processSeq(threadSeq, refLabels, kmerTrie, out, wordSize, translQuery, translTable, false);
                    processSeq(threadSeq, refLabels, kmerTrie, out, wordSize, translQuery, translTable, true);
                } catch (IOException e) {
                    throw new RuntimeException(e);
                }

                processed.incrementAndGet();
                outstandingTasks.decrementAndGet();
            }
        };

        outstandingTasks.incrementAndGet();
        service.submit(r);

        while (outstandingTasks.get() >= maxTasks)
            ;

        if ((processed.get() + 1) % 1000000 == 0) {
            System.err.println("Processed " + processed + " sequences in "
                    + (System.currentTimeMillis() - startTime) + " ms");
        }
    }

    service.shutdown();
    service.awaitTermination(1, TimeUnit.DAYS);

    System.err.println("Finished Processed " + processed + " sequences in "
            + (System.currentTimeMillis() - startTime) + " ms");

    out.close();
}

From source file:eqtlmappingpipeline.util.ModuleEqtlNeutrophilReplication.java

/**
 * @param args the command line arguments
 *//*  w  ww  . jav a2s.  c  o m*/
public static void main(String[] args) throws IOException, LdCalculatorException {

    System.out.println(HEADER);
    System.out.println();
    System.out.flush(); //flush to make sure header is before errors
    try {
        Thread.sleep(25); //Allows flush to complete
    } catch (InterruptedException ex) {
    }

    CommandLineParser parser = new PosixParser();
    final CommandLine commandLine;
    try {
        commandLine = parser.parse(OPTIONS, args, true);
    } catch (ParseException ex) {
        System.err.println("Invalid command line arguments: " + ex.getMessage());
        System.err.println();
        new HelpFormatter().printHelp(" ", OPTIONS);
        System.exit(1);
        return;
    }

    final String[] genotypesBasePaths = commandLine.getOptionValues("g");
    final RandomAccessGenotypeDataReaderFormats genotypeDataType;
    final String replicationQtlFilePath = commandLine.getOptionValue("e");
    final String interactionQtlFilePath = commandLine.getOptionValue("i");
    final String outputFilePath = commandLine.getOptionValue("o");
    final double ldCutoff = Double.parseDouble(commandLine.getOptionValue("ld"));
    final int window = Integer.parseInt(commandLine.getOptionValue("w"));

    System.out.println("Genotype: " + Arrays.toString(genotypesBasePaths));
    System.out.println("Interaction file: " + interactionQtlFilePath);
    System.out.println("Replication file: " + replicationQtlFilePath);
    System.out.println("Output: " + outputFilePath);
    System.out.println("LD: " + ldCutoff);
    System.out.println("Window: " + window);

    try {
        if (commandLine.hasOption("G")) {
            genotypeDataType = RandomAccessGenotypeDataReaderFormats
                    .valueOf(commandLine.getOptionValue("G").toUpperCase());
        } else {
            if (genotypesBasePaths[0].endsWith(".vcf")) {
                System.err.println(
                        "Only vcf.gz is supported. Please see manual on how to do create a vcf.gz file.");
                System.exit(1);
                return;
            }
            try {
                genotypeDataType = RandomAccessGenotypeDataReaderFormats
                        .matchFormatToPath(genotypesBasePaths[0]);
            } catch (GenotypeDataException e) {
                System.err
                        .println("Unable to determine input 1 type based on specified path. Please specify -G");
                System.exit(1);
                return;
            }
        }
    } catch (IllegalArgumentException e) {
        System.err.println("Error parsing --genotypesFormat \"" + commandLine.getOptionValue("G")
                + "\" is not a valid input data format");
        System.exit(1);
        return;
    }

    final RandomAccessGenotypeData genotypeData;

    try {
        genotypeData = genotypeDataType.createFilteredGenotypeData(genotypesBasePaths, 100, null, null, null,
                0.8);
    } catch (TabixFileNotFoundException e) {
        LOGGER.fatal("Tabix file not found for input data at: " + e.getPath() + "\n"
                + "Please see README on how to create a tabix file");
        System.exit(1);
        return;
    } catch (IOException e) {
        LOGGER.fatal("Error reading input data: " + e.getMessage(), e);
        System.exit(1);
        return;
    } catch (IncompatibleMultiPartGenotypeDataException e) {
        LOGGER.fatal("Error combining the impute genotype data files: " + e.getMessage(), e);
        System.exit(1);
        return;
    } catch (GenotypeDataException e) {
        LOGGER.fatal("Error reading input data: " + e.getMessage(), e);
        System.exit(1);
        return;
    }

    ChrPosTreeMap<ArrayList<ReplicationQtl>> replicationQtls = new ChrPosTreeMap<>();

    CSVReader replicationQtlReader = new CSVReader(new FileReader(replicationQtlFilePath), '\t');
    replicationQtlReader.readNext();//skip header
    String[] replicationLine;
    while ((replicationLine = replicationQtlReader.readNext()) != null) {

        try {

            GeneticVariant variant = genotypeData.getSnpVariantByPos(replicationLine[REPLICATION_SNP_CHR_COL],
                    Integer.parseInt(replicationLine[REPLICATION_SNP_POS_COL]));
            if (variant == null) {
                continue;
            }

            ReplicationQtl replicationQtl = new ReplicationQtl(replicationLine[REPLICATION_SNP_CHR_COL],
                    Integer.parseInt(replicationLine[REPLICATION_SNP_POS_COL]),
                    replicationLine[REPLICATION_GENE_COL],
                    Double.parseDouble(replicationLine[REPLICATION_BETA_COL]),
                    variant.getAlternativeAlleles().get(0).getAlleleAsString());
            ArrayList<ReplicationQtl> posReplicationQtls = replicationQtls.get(replicationQtl.getChr(),
                    replicationQtl.getPos());
            if (posReplicationQtls == null) {
                posReplicationQtls = new ArrayList<>();
                replicationQtls.put(replicationQtl.getChr(), replicationQtl.getPos(), posReplicationQtls);
            }
            posReplicationQtls.add(replicationQtl);

        } catch (Exception e) {
            System.out.println(Arrays.toString(replicationLine));
            throw e;
        }
    }

    int interactionSnpNotInGenotypeData = 0;
    int noReplicationQtlsInWindow = 0;
    int noReplicationQtlsInLd = 0;
    int multipleReplicationQtlsInLd = 0;
    int replicationTopSnpNotInGenotypeData = 0;

    final CSVWriter outputWriter = new CSVWriter(new FileWriter(new File(outputFilePath)), '\t', '\0');
    final String[] outputLine = new String[14];
    int c = 0;
    outputLine[c++] = "Chr";
    outputLine[c++] = "Pos";
    outputLine[c++] = "SNP";
    outputLine[c++] = "Gene";
    outputLine[c++] = "Module";
    outputLine[c++] = "DiscoveryZ";
    outputLine[c++] = "ReplicationZ";
    outputLine[c++] = "DiscoveryZCorrected";
    outputLine[c++] = "ReplicationZCorrected";
    outputLine[c++] = "DiscoveryAlleleAssessed";
    outputLine[c++] = "ReplicationAlleleAssessed";
    outputLine[c++] = "bestLd";
    outputLine[c++] = "bestLd_dist";
    outputLine[c++] = "nextLd";
    outputWriter.writeNext(outputLine);

    HashSet<String> notFound = new HashSet<>();

    CSVReader interactionQtlReader = new CSVReader(new FileReader(interactionQtlFilePath), '\t');
    interactionQtlReader.readNext();//skip header
    String[] interactionQtlLine;
    while ((interactionQtlLine = interactionQtlReader.readNext()) != null) {

        String snp = interactionQtlLine[1];
        String chr = interactionQtlLine[2];
        int pos = Integer.parseInt(interactionQtlLine[3]);
        String gene = interactionQtlLine[4];
        String alleleAssessed = interactionQtlLine[9];
        String module = interactionQtlLine[12];
        double discoveryZ = Double.parseDouble(interactionQtlLine[10]);

        GeneticVariant interactionQtlVariant = genotypeData.getSnpVariantByPos(chr, pos);

        if (interactionQtlVariant == null) {
            System.err.println("Interaction QTL SNP not found in genotype data: " + chr + ":" + pos);
            ++interactionSnpNotInGenotypeData;
            continue;
        }

        ReplicationQtl bestMatch = null;
        double bestMatchR2 = Double.NaN;
        Ld bestMatchLd = null;
        double nextBestR2 = Double.NaN;

        ArrayList<ReplicationQtl> sameSnpQtls = replicationQtls.get(chr, pos);

        if (sameSnpQtls != null) {
            for (ReplicationQtl sameSnpQtl : sameSnpQtls) {
                if (sameSnpQtl.getGene().equals(gene)) {
                    bestMatch = sameSnpQtl;
                    bestMatchR2 = 1;
                }
            }
        }

        NavigableMap<Integer, ArrayList<ReplicationQtl>> potentionalReplicationQtls = replicationQtls
                .getChrRange(chr, pos - window, true, pos + window, true);

        for (ArrayList<ReplicationQtl> potentialReplicationQtls : potentionalReplicationQtls.values()) {

            for (ReplicationQtl potentialReplicationQtl : potentialReplicationQtls) {

                if (!potentialReplicationQtl.getGene().equals(gene)) {
                    continue;
                }

                GeneticVariant potentialReplicationQtlVariant = genotypeData
                        .getSnpVariantByPos(potentialReplicationQtl.getChr(), potentialReplicationQtl.getPos());

                if (potentialReplicationQtlVariant == null) {
                    notFound.add(potentialReplicationQtl.getChr() + ":" + potentialReplicationQtl.getPos());
                    ++replicationTopSnpNotInGenotypeData;
                    continue;
                }

                Ld ld = interactionQtlVariant.calculateLd(potentialReplicationQtlVariant);
                double r2 = ld.getR2();

                if (r2 > 1) {
                    r2 = 1;
                }

                if (bestMatch == null) {
                    bestMatch = potentialReplicationQtl;
                    bestMatchR2 = r2;
                    bestMatchLd = ld;
                } else if (r2 > bestMatchR2) {
                    bestMatch = potentialReplicationQtl;
                    nextBestR2 = bestMatchR2;
                    bestMatchR2 = r2;
                    bestMatchLd = ld;
                }

            }
        }

        double replicationZ = Double.NaN;
        double replicationZCorrected = Double.NaN;
        double discoveryZCorrected = Double.NaN;

        String replicationAlleleAssessed = null;

        if (bestMatch != null) {
            replicationZ = bestMatch.getBeta();
            replicationAlleleAssessed = bestMatch.getAssessedAllele();

            if (pos != bestMatch.getPos()) {

                String commonHap = null;
                double commonHapFreq = -1;
                for (Map.Entry<String, Double> hapFreq : bestMatchLd.getHaplotypesFreq().entrySet()) {

                    double f = hapFreq.getValue();

                    if (f > commonHapFreq) {
                        commonHapFreq = f;
                        commonHap = hapFreq.getKey();
                    }

                }

                String[] commonHapAlleles = StringUtils.split(commonHap, '/');

                discoveryZCorrected = commonHapAlleles[0].equals(alleleAssessed) ? discoveryZ : discoveryZ * -1;
                replicationZCorrected = commonHapAlleles[1].equals(replicationAlleleAssessed) ? replicationZ
                        : replicationZ * -1;

            } else {

                discoveryZCorrected = discoveryZ;
                replicationZCorrected = alleleAssessed.equals(replicationAlleleAssessed) ? replicationZ
                        : replicationZ * -1;

            }

        }

        c = 0;
        outputLine[c++] = chr;
        outputLine[c++] = String.valueOf(pos);
        outputLine[c++] = snp;
        outputLine[c++] = gene;
        outputLine[c++] = module;
        outputLine[c++] = String.valueOf(discoveryZ);
        outputLine[c++] = bestMatch == null ? "NA" : String.valueOf(replicationZ);
        outputLine[c++] = bestMatch == null ? "NA" : String.valueOf(discoveryZCorrected);
        outputLine[c++] = bestMatch == null ? "NA" : String.valueOf(replicationZCorrected);
        outputLine[c++] = alleleAssessed;
        outputLine[c++] = bestMatch == null ? "NA" : String.valueOf(bestMatch.getAssessedAllele());
        outputLine[c++] = String.valueOf(bestMatchR2);
        outputLine[c++] = bestMatch == null ? "NA" : String.valueOf(Math.abs(pos - bestMatch.getPos()));
        outputLine[c++] = String.valueOf(nextBestR2);
        outputWriter.writeNext(outputLine);

    }

    outputWriter.close();

    for (String e : notFound) {
        System.err.println("Not found: " + e);
    }

    System.out.println("interactionSnpNotInGenotypeData: " + interactionSnpNotInGenotypeData);
    System.out.println("noReplicationQtlsInWindow: " + noReplicationQtlsInWindow);
    System.out.println("noReplicationQtlsInLd: " + noReplicationQtlsInLd);
    System.out.println("multipleReplicationQtlsInLd: " + multipleReplicationQtlsInLd);
    System.out.println("replicationTopSnpNotInGenotypeData: " + replicationTopSnpNotInGenotypeData);

}

From source file:edu.msu.cme.rdp.framebot.stat.TaxonAbundance.java

/**
 * this class group the nearest matches by phylum/class, or by match 
 * @param args/*  w  w w  .  j a v a 2 s  .  c o m*/
 * @throws Exception 
 */
public static void main(String[] args) throws Exception {
    HashMap<String, Double> coveragetMap = null;
    double identity = 0.0;
    try {
        CommandLine line = new PosixParser().parse(options, args);
        if (line.hasOption("seqCoverage")) {
            String coveragefile = line.getOptionValue("seqCoverage");
            coveragetMap = parseKmerCoverage(coveragefile);
        }
        if (line.hasOption("identity")) {
            identity = Double.parseDouble(line.getOptionValue("identity"));
            if (identity < 0 || identity > 100) {
                throw new IllegalArgumentException("identity cutoff should be in the range of 0 and 100");
            }
        }

        args = line.getArgs();
        if (args.length != 3) {
            throw new Exception("");
        }

    } catch (Exception e) {
        System.out.println("Command Error: " + e.getMessage());
        new HelpFormatter().printHelp(80, "[options] <FrameBot Alignment file or Dir> <seqLineage> <out file> ",
                "", options,
                "seqLineage: a tab-delimited file with ref seqID and lineage, or fasta of ref seq with lineage as the descrption"
                        + "\nframeBot alignment file or Dir: frameBot alignment files "
                        + "\noutfile: output with the nearest match count group by phylum/class; and by match name");
    }
    TaxonAbundance.mapAbundance(new File(args[0]), new File(args[1]), args[2], coveragetMap, identity);
}

From source file:edu.nyu.vida.data_polygamy.pre_processing.PreProcessing.java

/**
 * @param args/*ww  w.  j  av  a2 s.  c  o m*/
 * @throws IOException 
 * @throws ClassNotFoundException 
 * @throws InterruptedException 
 */
@SuppressWarnings("deprecation")
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {

    Options options = new Options();

    Option nameOption = new Option("dn", "name", true, "the name of the dataset");
    nameOption.setRequired(true);
    nameOption.setArgName("DATASET NAME");
    options.addOption(nameOption);

    Option headerOption = new Option("dh", "header", true, "the file that contains the header of the dataset");
    headerOption.setRequired(true);
    headerOption.setArgName("DATASET HEADER FILE");
    options.addOption(headerOption);

    Option deafultsOption = new Option("dd", "defaults", true,
            "the file that contains the default values of the dataset");
    deafultsOption.setRequired(true);
    deafultsOption.setArgName("DATASET DEFAULTS FILE");
    options.addOption(deafultsOption);

    Option tempResOption = new Option("t", "temporal", true,
            "desired temporal resolution (hour, day, week, or month)");
    tempResOption.setRequired(true);
    tempResOption.setArgName("TEMPORAL RESOLUTION");
    options.addOption(tempResOption);

    Option spatialResOption = new Option("s", "spatial", true,
            "desired spatial resolution (points, nbhd, zip, grid, or city)");
    spatialResOption.setRequired(true);
    spatialResOption.setArgName("SPATIAL RESOLUTION");
    options.addOption(spatialResOption);

    Option currentSpatialResOption = new Option("cs", "current-spatial", true,
            "current spatial resolution (points, nbhd, zip, grid, or city)");
    currentSpatialResOption.setRequired(true);
    currentSpatialResOption.setArgName("CURRENT SPATIAL RESOLUTION");
    options.addOption(currentSpatialResOption);

    Option indexResOption = new Option("i", "index", true, "indexes of the temporal and spatial attributes");
    indexResOption.setRequired(true);
    indexResOption.setArgName("INDEX OF SPATIO-TEMPORAL RESOLUTIONS");
    indexResOption.setArgs(Option.UNLIMITED_VALUES);
    options.addOption(indexResOption);

    Option machineOption = new Option("m", "machine", true, "machine identifier");
    machineOption.setRequired(true);
    machineOption.setArgName("MACHINE");
    machineOption.setArgs(1);
    options.addOption(machineOption);

    Option nodesOption = new Option("n", "nodes", true, "number of nodes");
    nodesOption.setRequired(true);
    nodesOption.setArgName("NODES");
    nodesOption.setArgs(1);
    options.addOption(nodesOption);

    Option s3Option = new Option("s3", "s3", false, "data on Amazon S3");
    s3Option.setRequired(false);
    options.addOption(s3Option);

    Option awsAccessKeyIdOption = new Option("aws_id", "aws-id", true,
            "aws access key id; " + "this is required if the execution is on aws");
    awsAccessKeyIdOption.setRequired(false);
    awsAccessKeyIdOption.setArgName("AWS-ACCESS-KEY-ID");
    awsAccessKeyIdOption.setArgs(1);
    options.addOption(awsAccessKeyIdOption);

    Option awsSecretAccessKeyOption = new Option("aws_key", "aws-id", true,
            "aws secrect access key; " + "this is required if the execution is on aws");
    awsSecretAccessKeyOption.setRequired(false);
    awsSecretAccessKeyOption.setArgName("AWS-SECRET-ACCESS-KEY");
    awsSecretAccessKeyOption.setArgs(1);
    options.addOption(awsSecretAccessKeyOption);

    Option bucketOption = new Option("b", "s3-bucket", true,
            "bucket on s3; " + "this is required if the execution is on aws");
    bucketOption.setRequired(false);
    bucketOption.setArgName("S3-BUCKET");
    bucketOption.setArgs(1);
    options.addOption(bucketOption);

    Option helpOption = new Option("h", "help", false, "display this message");
    helpOption.setRequired(false);
    options.addOption(helpOption);

    HelpFormatter formatter = new HelpFormatter();
    CommandLineParser parser = new PosixParser();
    CommandLine cmd = null;

    try {
        cmd = parser.parse(options, args);
    } catch (ParseException e) {
        formatter.printHelp(
                "hadoop jar data-polygamy.jar " + "edu.nyu.vida.data_polygamy.pre_processing.PreProcessing",
                options, true);
        System.exit(0);
    }

    if (cmd.hasOption("h")) {
        formatter.printHelp(
                "hadoop jar data-polygamy.jar " + "edu.nyu.vida.data_polygamy.pre_processing.PreProcessing",
                options, true);
        System.exit(0);
    }

    boolean s3 = cmd.hasOption("s3");
    String s3bucket = "";
    String awsAccessKeyId = "";
    String awsSecretAccessKey = "";

    if (s3) {
        if ((!cmd.hasOption("aws_id")) || (!cmd.hasOption("aws_key")) || (!cmd.hasOption("b"))) {
            System.out.println(
                    "Arguments 'aws_id', 'aws_key', and 'b'" + " are mandatory if execution is on AWS.");
            formatter.printHelp(
                    "hadoop jar data-polygamy.jar " + "edu.nyu.vida.data_polygamy.pre_processing.PreProcessing",
                    options, true);
            System.exit(0);
        }
        s3bucket = cmd.getOptionValue("b");
        awsAccessKeyId = cmd.getOptionValue("aws_id");
        awsSecretAccessKey = cmd.getOptionValue("aws_key");
    }

    boolean snappyCompression = false;
    boolean bzip2Compression = false;
    String machine = cmd.getOptionValue("m");
    int nbNodes = Integer.parseInt(cmd.getOptionValue("n"));

    Configuration s3conf = new Configuration();
    if (s3) {
        s3conf.set("fs.s3.awsAccessKeyId", awsAccessKeyId);
        s3conf.set("fs.s3.awsSecretAccessKey", awsSecretAccessKey);
        s3conf.set("bucket", s3bucket);
    }

    Configuration conf = new Configuration();
    Machine machineConf = new Machine(machine, nbNodes);
    String dataset = cmd.getOptionValue("dn");
    String header = cmd.getOptionValue("dh");
    String defaults = cmd.getOptionValue("dd");
    String temporalResolution = cmd.getOptionValue("t");
    String spatialResolution = cmd.getOptionValue("s");
    String gridResolution = "";
    String currentSpatialResolution = cmd.getOptionValue("cs");

    if (spatialResolution.contains("grid")) {
        String[] res = spatialResolution.split("-");
        spatialResolution = res[0];
        gridResolution = res[1];
    }

    conf.set("header", s3bucket + FrameworkUtils.dataDir + "/" + header);
    conf.set("defaults", s3bucket + FrameworkUtils.dataDir + "/" + defaults);
    conf.set("temporal-resolution", temporalResolution);
    conf.set("spatial-resolution", spatialResolution);
    conf.set("grid-resolution", gridResolution);
    conf.set("current-spatial-resolution", currentSpatialResolution);

    String[] indexes = cmd.getOptionValues("i");
    String temporalPos = "";
    Integer sizeSpatioTemp = 0;
    if (!(currentSpatialResolution.equals("points"))) {
        String spatialPos = "";
        for (int i = 0; i < indexes.length; i++) {
            temporalPos += indexes[i] + ",";
            spatialPos += indexes[++i] + ",";
            sizeSpatioTemp++;
        }
        conf.set("spatial-pos", spatialPos);
    } else {
        String xPositions = "", yPositions = "";
        for (int i = 0; i < indexes.length; i++) {
            temporalPos += indexes[i] + ",";
            xPositions += indexes[++i] + ",";
            yPositions += indexes[++i] + ",";
            sizeSpatioTemp++;
        }
        conf.set("xPositions", xPositions);
        conf.set("yPositions", yPositions);
    }
    conf.set("temporal-pos", temporalPos);

    conf.set("size-spatio-temporal", sizeSpatioTemp.toString());

    // checking resolutions

    if (utils.spatialResolution(spatialResolution) < 0) {
        System.out.println("Invalid spatial resolution: " + spatialResolution);
        System.exit(-1);
    }

    if (utils.spatialResolution(spatialResolution) == FrameworkUtils.POINTS) {
        System.out.println("The data needs to be reduced at least to neighborhoods or grid.");
        System.exit(-1);
    }

    if (utils.spatialResolution(currentSpatialResolution) < 0) {
        System.out.println("Invalid spatial resolution: " + currentSpatialResolution);
        System.exit(-1);
    }

    if (utils.spatialResolution(currentSpatialResolution) > utils.spatialResolution(spatialResolution)) {
        System.out.println("The current spatial resolution is coarser than "
                + "the desired one. You can only navigate from a fine resolution" + " to a coarser one.");
        System.exit(-1);
    }

    if (utils.temporalResolution(temporalResolution) < 0) {
        System.out.println("Invalid temporal resolution: " + temporalResolution);
        System.exit(-1);
    }

    String fileName = s3bucket + FrameworkUtils.preProcessingDir + "/" + dataset + "-" + temporalResolution
            + "-" + spatialResolution + gridResolution;
    conf.set("aggregates", fileName + ".aggregates");

    // making sure both files are removed, if they exist
    FrameworkUtils.removeFile(fileName, s3conf, s3);
    FrameworkUtils.removeFile(fileName + ".aggregates", s3conf, s3);

    /**
     * Hadoop Parameters
     * sources: http://www.slideshare.net/ImpetusInfo/ppt-on-advanced-hadoop-tuning-n-optimisation
     *          https://cloudcelebrity.wordpress.com/2013/08/14/12-key-steps-to-keep-your-hadoop-cluster-running-strong-and-performing-optimum/
     */

    conf.set("mapreduce.tasktracker.map.tasks.maximum", String.valueOf(machineConf.getMaximumTasks()));
    conf.set("mapreduce.tasktracker.reduce.tasks.maximum", String.valueOf(machineConf.getMaximumTasks()));
    conf.set("mapreduce.jobtracker.maxtasks.perjob", "-1");
    conf.set("mapreduce.reduce.shuffle.parallelcopies", "20");
    conf.set("mapreduce.input.fileinputformat.split.minsize", "0");
    conf.set("mapreduce.task.io.sort.mb", "200");
    conf.set("mapreduce.task.io.sort.factor", "100");

    // using SnappyCodec for intermediate and output data ?
    // TODO: for now, using SnappyCodec -- what about LZO + Protocol Buffer serialization?
    //   LZO - http://www.oberhumer.com/opensource/lzo/#download
    //   Hadoop-LZO - https://github.com/twitter/hadoop-lzo
    //   Protocol Buffer - https://github.com/twitter/elephant-bird
    //   General Info - http://www.devx.com/Java/Article/47913
    //   Compression - http://comphadoop.weebly.com/index.html
    if (snappyCompression) {
        conf.set("mapreduce.map.output.compress", "true");
        conf.set("mapreduce.map.output.compress.codec", "org.apache.hadoop.io.compress.SnappyCodec");
        conf.set("mapreduce.output.fileoutputformat.compress.codec",
                "org.apache.hadoop.io.compress.SnappyCodec");
    }
    if (bzip2Compression) {
        conf.set("mapreduce.map.output.compress", "true");
        conf.set("mapreduce.map.output.compress.codec", "org.apache.hadoop.io.compress.BZip2Codec");
        conf.set("mapreduce.output.fileoutputformat.compress.codec",
                "org.apache.hadoop.io.compress.BZip2Codec");
    }

    // TODO: this is dangerous!
    if (s3) {
        conf.set("fs.s3.awsAccessKeyId", awsAccessKeyId);
        conf.set("fs.s3.awsSecretAccessKey", awsSecretAccessKey);
    }

    Job job = new Job(conf);
    job.setJobName(dataset + "-" + temporalResolution + "-" + spatialResolution);

    job.setMapOutputKeyClass(MultipleSpatioTemporalWritable.class);
    job.setMapOutputValueClass(AggregationArrayWritable.class);

    job.setOutputKeyClass(MultipleSpatioTemporalWritable.class);
    job.setOutputValueClass(AggregationArrayWritable.class);

    job.setMapperClass(PreProcessingMapper.class);
    job.setCombinerClass(PreProcessingCombiner.class);
    job.setReducerClass(PreProcessingReducer.class);
    job.setNumReduceTasks(machineConf.getNumberReduces());
    //job.setNumReduceTasks(1);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    SequenceFileOutputFormat.setCompressOutput(job, true);
    SequenceFileOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK);

    FileInputFormat.setInputPaths(job, new Path(s3bucket + FrameworkUtils.dataDir + "/" + dataset));
    FileOutputFormat.setOutputPath(job, new Path(fileName));

    job.setJarByClass(PreProcessing.class);

    long start = System.currentTimeMillis();
    job.submit();
    job.waitForCompletion(true);
    System.out.println(fileName + "\t" + (System.currentTimeMillis() - start));

}

From source file:edu.msu.cme.rdp.abundstats.cli.AbundMain.java

public static void main(String[] args) throws IOException {
    File inputFile;/*ww  w  .  ja  v  a 2 s .  c  o m*/
    File resultDir = new File(".");
    RPlotter plotter = null;
    boolean isClusterFile = true;
    List<AbundStatsCalculator> statCalcs = new ArrayList();
    double clustCutoffFrom = Double.MIN_VALUE, clustCutoffTo = Double.MAX_VALUE;

    String usage = "Main [options] <cluster file>";
    try {
        CommandLine line = new PosixParser().parse(options, args);

        if (line.hasOption("result-dir")) {
            resultDir = new File(line.getOptionValue("result-dir"));
            if (!resultDir.exists() && !resultDir.mkdirs()) {
                throw new Exception(
                        "Result directory " + resultDir + " does not exist and could not be created");
            }
        }

        if (line.hasOption("R-location")) {
            plotter = new RPlotter();
            plotter.setCommandTemplate(rplotterTemplate);
            plotter.setRPath(line.getOptionValue("R-location"));
            plotter.setOutFileExt(".png");

            if (!new File(plotter.getRPath()).canExecute()) {
                throw new Exception(plotter.getRPath() + " does not exist or is not exectuable");
            }
        }

        if (line.hasOption("lower-cutoff")) {
            clustCutoffFrom = Double.valueOf(line.getOptionValue("lower-cutoff"));
        }

        if (line.hasOption("upper-cutoff")) {
            clustCutoffTo = Double.valueOf(line.getOptionValue("upper-cutoff"));
        }

        if (line.hasOption("jaccard")) {
            statCalcs.add(new Jaccard(true));
        }

        if (line.hasOption("sorensen")) {
            statCalcs.add(new Sorensen(true));
        }

        if (line.hasOption("otu-table")) {
            isClusterFile = false;
        }

        if (statCalcs.isEmpty()) {
            throw new Exception("Must specify at least one stat to compute (jaccard, sorensen)");
        }

        args = line.getArgs();
        if (args.length != 1) {
            throw new Exception("Unexpected number of command line arguments");
        }

        inputFile = new File(args[0]);

    } catch (Exception e) {
        new HelpFormatter().printHelp(usage, options);
        System.err.println("Error: " + e.getMessage());
        return;
    }

    if (isClusterFile) {
        RDPClustParser parser;
        parser = new RDPClustParser(inputFile);

        try {
            if (parser.getClusterSamples().size() == 1) {
                throw new IOException("Cluster file must have more than one sample");
            }

            List<Cutoff> cutoffs = parser.getCutoffs(clustCutoffFrom, clustCutoffTo);
            if (cutoffs.isEmpty()) {
                throw new IOException(
                        "No cutoffs in cluster file in range [" + clustCutoffFrom + "-" + clustCutoffTo + "]");
            }

            for (Cutoff cutoff : cutoffs) {
                List<Sample> samples = new ArrayList();

                for (ClusterSample clustSample : parser.getClusterSamples()) {
                    Sample s = new Sample(clustSample.getName());
                    for (Cluster clust : cutoff.getClusters().get(clustSample.getName())) {
                        s.addSpecies(clust.getNumberOfSeqs());
                    }
                    samples.add(s);
                }

                processSamples(samples, statCalcs, resultDir, cutoff.getCutoff() + "_", plotter);
            }

        } finally {
            parser.close();
        }
    } else {
        List<Sample> samples = new ArrayList();
        BufferedReader reader = new BufferedReader(new FileReader(inputFile));
        String line = reader.readLine();

        if (line == null || line.split("\\s+").length < 2) {
            throw new IOException("Must be 2 or more samples for abundance statistic calculations!");
        }
        int numSamples = line.split("\\s+").length;

        boolean header = true;
        try {
            Integer.valueOf(line.split("\\s+")[0]);
            header = false;
        } catch (Exception e) {
        }

        if (header) {
            for (String s : line.split("\\s+")) {
                samples.add(new Sample(s));
            }
        } else {
            int sample = 0;
            for (String s : line.split("\\s+")) {
                samples.add(new Sample("" + sample));
                samples.get(sample).addSpecies(Integer.valueOf(s));
                sample++;
            }
        }

        int lineno = 2;
        while ((line = reader.readLine()) != null) {
            if (line.trim().equals("")) {
                continue;
            }
            int sample = 0;
            if (line.split("\\s+").length != numSamples) {
                System.err.println(
                        "Line number " + lineno + " didn't have the expected number of samples (contained "
                                + line.split("\\s+").length + ", expected " + numSamples + ")");
            }

            for (String s : line.split("\\s+")) {
                samples.get(sample).addSpecies(Integer.valueOf(s));
                sample++;
            }

            lineno++;
        }

        processSamples(samples, statCalcs, resultDir, inputFile.getName(), plotter);
    }
}

From source file:edu.sdsc.scigraph.owlapi.loader.BatchOwlLoader.java

public static void main(String[] args) throws Exception {
    CommandLineParser parser = new PosixParser();
    CommandLine cmd = null;/* w  w w.ja va  2s.  co  m*/
    try {
        cmd = parser.parse(getOptions(), args);
    } catch (ParseException e) {
        System.err.println(e.getMessage());
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp(BatchOwlLoader.class.getSimpleName(), getOptions());
        System.exit(-1);
    }

    ObjectMapper mapper = new ObjectMapper(new YAMLFactory());
    OwlLoadConfiguration config = mapper.readValue(new File(cmd.getOptionValue('c').trim()),
            OwlLoadConfiguration.class);
    load(config);
    // TODO: Is Guice causing this to hang? #44
    System.exit(0);
}

From source file:it.unipd.dei.ims.falcon.CmdLine.java

public static void main(String[] args) {

    // last argument is always index path
    Options options = new Options();
    // one of these actions has to be specified
    OptionGroup actionGroup = new OptionGroup();
    actionGroup.addOption(new Option("i", true, "perform indexing")); // if dir, all files, else only one file
    actionGroup.addOption(new Option("q", true, "perform a single query"));
    actionGroup.addOption(new Option("b", false, "perform a query batch (read from stdin)"));
    actionGroup.setRequired(true);//from  www.j  a  v a2  s. co  m
    options.addOptionGroup(actionGroup);

    // other options
    options.addOption(new Option("l", "segment-length", true, "length of a segment (# of chroma vectors)"));
    options.addOption(
            new Option("o", "segment-overlap", true, "overlap portion of a segment (# of chroma vectors)"));
    options.addOption(new Option("Q", "quantization-level", true, "quantization level for chroma vectors"));
    options.addOption(new Option("k", "min-kurtosis", true, "minimum kurtosis for indexing chroma vectors"));
    options.addOption(new Option("s", "sub-sampling", true, "sub-sampling of chroma features"));
    options.addOption(new Option("v", "verbose", false, "verbose output (including timing info)"));
    options.addOption(new Option("T", "transposition-estimator-strategy", true,
            "parametrization for the transposition estimator strategy"));
    options.addOption(new Option("t", "n-transp", true,
            "number of transposition; if not specified, no transposition is performed"));
    options.addOption(new Option("f", "force-transp", true, "force transposition by an amount of semitones"));
    options.addOption(new Option("p", "pruning", false,
            "enable query pruning; if -P is unspecified, use default strategy"));
    options.addOption(new Option("P", "pruning-custom", true, "custom query pruning strategy"));

    // parse
    HelpFormatter formatter = new HelpFormatter();
    CommandLineParser parser = new PosixParser();
    CommandLine cmd = null;
    try {
        cmd = parser.parse(options, args);
        if (cmd.getArgs().length != 1)
            throw new ParseException("no index path was specified");
    } catch (ParseException ex) {
        System.err.println("ERROR - parsing command line:");
        System.err.println(ex.getMessage());
        formatter.printHelp("falcon -{i,q,b} [options] index_path", options);
        return;
    }

    // default values
    final float[] DEFAULT_TRANSPOSITION_ESTIMATOR_STRATEGY = new float[] { 0.65192807f, 0.0f, 0.0f, 0.0f,
            0.3532628f, 0.4997167f, 0.0f, 0.41703504f, 0.0f, 0.16297342f, 0.0f, 0.0f };
    final String DEFAULT_QUERY_PRUNING_STRATEGY = "ntf:0.340765*[0.001694,0.995720];ndf:0.344143*[0.007224,0.997113];"
            + "ncf:0.338766*[0.001601,0.995038];nmf:0.331577*[0.002352,0.997884];"; // TODO not the final one

    int hashes_per_segment = Integer.parseInt(cmd.getOptionValue("l", "150"));
    int overlap_per_segment = Integer.parseInt(cmd.getOptionValue("o", "50"));
    int nranks = Integer.parseInt(cmd.getOptionValue("Q", "3"));
    int subsampling = Integer.parseInt(cmd.getOptionValue("s", "1"));
    double minkurtosis = Float.parseFloat(cmd.getOptionValue("k", "-100."));
    boolean verbose = cmd.hasOption("v");
    int ntransp = Integer.parseInt(cmd.getOptionValue("t", "1"));
    TranspositionEstimator tpe = null;
    if (cmd.hasOption("t")) {
        if (cmd.hasOption("T")) {
            // TODO this if branch is yet to test
            Pattern p = Pattern.compile("\\d\\.\\d*");
            LinkedList<Double> tokens = new LinkedList<Double>();
            Matcher m = p.matcher(cmd.getOptionValue("T"));
            while (m.find())
                tokens.addLast(new Double(cmd.getOptionValue("T").substring(m.start(), m.end())));
            float[] strategy = new float[tokens.size()];
            if (strategy.length != 12) {
                System.err.println("invalid transposition estimator strategy");
                System.exit(1);
            }
            for (int i = 0; i < strategy.length; i++)
                strategy[i] = new Float(tokens.pollFirst());
        } else {
            tpe = new TranspositionEstimator(DEFAULT_TRANSPOSITION_ESTIMATOR_STRATEGY);
        }
    } else if (cmd.hasOption("f")) {
        int[] transps = parseIntArray(cmd.getOptionValue("f"));
        tpe = new ForcedTranspositionEstimator(transps);
        ntransp = transps.length;
    }
    QueryPruningStrategy qpe = null;
    if (cmd.hasOption("p")) {
        if (cmd.hasOption("P")) {
            qpe = new StaticQueryPruningStrategy(cmd.getOptionValue("P"));
        } else {
            qpe = new StaticQueryPruningStrategy(DEFAULT_QUERY_PRUNING_STRATEGY);
        }
    }

    // action
    if (cmd.hasOption("i")) {
        try {
            Indexing.index(new File(cmd.getOptionValue("i")), new File(cmd.getArgs()[0]), hashes_per_segment,
                    overlap_per_segment, subsampling, nranks, minkurtosis, tpe, verbose);
        } catch (IndexingException ex) {
            Logger.getLogger(CmdLine.class.getName()).log(Level.SEVERE, null, ex);
        } catch (IOException ex) {
            Logger.getLogger(CmdLine.class.getName()).log(Level.SEVERE, null, ex);
        }
    }
    if (cmd.hasOption("q")) {
        String queryfilepath = cmd.getOptionValue("q");
        doQuery(cmd, queryfilepath, hashes_per_segment, overlap_per_segment, nranks, subsampling, tpe, ntransp,
                minkurtosis, qpe, verbose);
    }
    if (cmd.hasOption("b")) {
        try {
            long starttime = System.currentTimeMillis();
            BufferedReader in = new BufferedReader(new InputStreamReader(System.in));
            String line = null;
            while ((line = in.readLine()) != null && !line.trim().isEmpty())
                doQuery(cmd, line, hashes_per_segment, overlap_per_segment, nranks, subsampling, tpe, ntransp,
                        minkurtosis, qpe, verbose);
            in.close();
            long endtime = System.currentTimeMillis();
            System.out.println(String.format("total time: %ds", (endtime - starttime) / 1000));
        } catch (IOException ex) {
            Logger.getLogger(CmdLine.class.getName()).log(Level.SEVERE, null, ex);
        }
    }
}