Example usage for org.apache.commons.cli PosixParser PosixParser

Introduction

In this page you can find the example usage for org.apache.commons.cli PosixParser PosixParser.

Prototype

PosixParser

Source Link

Usage

From source file:com.genentech.chemistry.openEye.apps.SDFMCSSNNFinder.java

public static void main(String... args) throws IOException {
    CommandLineParser parser = new PosixParser();
    CommandLine cmd = null;// w ww  .  j  ava2 s .c  o m
    try {
        cmd = parser.parse(options, args);
    } catch (Exception e) {
        System.err.println(e.getMessage());
        exitWithHelp();
    }

    args = cmd.getArgs();
    if (args.length > 0) {
        exitWithHelp("Unknown param: " + args[0]);
    }

    if (cmd.hasOption("d")) {
        System.err.println("Start debugger and press return:");
        new BufferedReader(new InputStreamReader(System.in)).readLine();
    }

    int nCpu = 1;
    int maxNeighbors = 1;
    double minSim = 0D;

    String idTag = cmd.getOptionValue("idTag");
    boolean printAll = cmd.hasOption("printAll");

    String d = cmd.getOptionValue("nCpu");
    if (d != null)
        nCpu = Integer.parseInt(d);

    d = cmd.getOptionValue("maxNeighbors");
    if (d != null)
        maxNeighbors = Integer.parseInt(d);

    d = cmd.getOptionValue("minSimilarity");
    if (d != null)
        minSim = Double.parseDouble(d);

    String countAboveSimilarityStr = cmd.getOptionValue("countSimilarAbove");

    String inFile = cmd.getOptionValue("in");
    String outFile = cmd.getOptionValue("out");
    String refFile = cmd.getOptionValue("ref");

    String tabOutput = cmd.getOptionValue("tabOutput");
    boolean outputDuplicates = cmd.hasOption("outputDuplicates");

    if (outputDuplicates && tabOutput != null)
        exitWithHelp("-outputDuplicates will not work with outputVTab");
    if (outputDuplicates && refFile == null)
        exitWithHelp("-outputDuplicates requires -ref ");
    if ("tab".equalsIgnoreCase(tabOutput) && refFile != null)
        exitWithHelp("-tabOutput tab: does not work with reference file");
    if ("tab".equalsIgnoreCase(tabOutput) && maxNeighbors == 1)
        exitWithHelp("-tabOutput tab: does not make sense with -maxNeighbors = 1");
    if (cmd.hasOption("countSimilarAbove") && tabOutput != null)
        exitWithHelp("-countSimilarAbove not supported for tab or vTab output");
    if (printAll && !(maxNeighbors > 1 || minSim > 0))
        exitWithHelp("printAll only supported if: maxNeighbors > 1 or minSim > 0");

    if (printAll && tabOutput != null)
        System.err.println("WARNING: printAll ignored for tab output!\n");

    SimComparatorFactory<OEMolBase, OEMolBase, SimComparator<OEMolBase>> compFact;
    compFact = getComparatorFactory(cmd);

    if (refFile == null) { // no reference file; run all by all comparison
        performMatrixNNSearch(inFile, outFile, tabOutput, compFact, minSim, maxNeighbors, idTag, nCpu,
                countAboveSimilarityStr, printAll);

    } else { // refrence file; compare inFile to refFile
        performReferenceSearch(inFile, refFile, outFile, tabOutput, compFact, minSim, maxNeighbors, idTag, nCpu,
                countAboveSimilarityStr, outputDuplicates, printAll);
    }

}

From source file:com.genentech.chemistry.openEye.apps.SDFCatsIndexer.java

/**
 * @param args// w ww. j a v  a  2  s  .  c  o  m
 */
public static void main(String... args) throws IOException { // create command line Options object
    Options options = new Options();
    Option opt = new Option(OPT_INFILE, true,
            "input file oe-supported Use .sdf|.smi to specify the file type.");
    opt.setRequired(true);
    opt.setArgName("fn");
    options.addOption(opt);

    opt = new Option(OPT_OUTFILE, true, "output file oe-supported. Use .sdf|.smi to specify the file type.");
    opt.setRequired(true);
    opt.setArgName("fn");
    options.addOption(opt);

    opt = new Option(OPT_NORMALIZATION, true,
            "Normalization method: Counts|CountsPerAtom|CountsPerFeature(def) multiple allowed");
    opt.setArgName("meth");
    options.addOption(opt);

    opt = new Option(OPT_PRINTDESC, false,
            "Causes the descriptor for describing each linear path in a molceule to be created");
    options.addOption(opt);

    opt = new Option(OPT_RGROUPTYPES, false, "treat RGroup attachement point ([U]) as atom type.");
    options.addOption(opt);

    CommandLineParser parser = new PosixParser();
    CommandLine cmd = null;
    try {
        cmd = parser.parse(options, args);
    } catch (Exception e) {
        System.err.println(e.getMessage());
        exitWithHelp(options);
    }

    String inFile = cmd.getOptionValue(OPT_INFILE);
    String outFile = cmd.getOptionValue(OPT_OUTFILE);

    AtomTyperInterface[] myTypes = CATSIndexer.typers;
    String tagPrefix = "";
    if (cmd.hasOption(OPT_RGROUPTYPES)) {
        myTypes = CATSIndexer.rgroupTypers;
        tagPrefix = "RG";
    }

    if (cmd.hasOption(OPT_PRINTDESC)) {
        SDFCatsIndexer sdfIndexer = new SDFCatsIndexer(myTypes, tagPrefix);
        sdfIndexer.printDescriptors(inFile, outFile);
        sdfIndexer.close();
        return;
    }

    EnumSet<Normalization> normMeth = EnumSet.noneOf(Normalization.class);
    if (cmd.hasOption(OPT_NORMALIZATION))
        for (String n : cmd.getOptionValues(OPT_NORMALIZATION))
            normMeth.add(Normalization.valueOf(n));
    else
        normMeth.add(Normalization.CountsPerFeature);

    SDFCatsIndexer sdfIndexer = new SDFCatsIndexer(myTypes, tagPrefix);
    sdfIndexer.run(inFile, outFile, normMeth);
    sdfIndexer.close();
}

From source file:com.flaptor.hounder.indexer.RmiIndexerStub.java

public static void main(String[] args) {

    // create the parser
    CommandLineParser parser = new PosixParser();
    CommandLine line = null;/*  w w  w .j  a  va2s.c  om*/
    Options options = getOptions();
    try {
        // parse the command line arguments
        line = parser.parse(options, args);
    } catch (ParseException exp) {
        // oops, something went wrong
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp("RmiIndexerStub -h <hostName> -p <basePort> [options] ", options);
        System.exit(1);
    }

    boolean doOptimize = line.hasOption("optimize");
    boolean doCheckpoint = line.hasOption("checkpoint");
    boolean doStop = line.hasOption("stop");
    Integer port = ((Long) line.getOptionObject("port")).intValue();
    String host = line.getOptionValue("host");

    try {
        RmiIndexerStub stub = new RmiIndexerStub(port, host);

        if (line.hasOption("deleteUrl")) {
            String url = line.getOptionValue("deleteUrl");
            Document dom = generateDeleteDocument(url);
            indexOrFail(stub, dom, "Could not delete " + url);
            System.out.println("delete " + url + " command accepted by indexer");
        }

        if (line.hasOption("deleteFile")) {

            BufferedReader reader = new BufferedReader(new FileReader(line.getOptionValue("deleteFile")));
            while (reader.ready()) {
                String url = reader.readLine();
                if (url.length() > 0 && url.charAt(0) != '#') { // ignore empty lines and comments
                    Document dom = generateDeleteDocument(url);
                    indexOrFail(stub, dom, "Could not delete " + url);
                    System.out.println("delete " + url + " command accepted by indexer");
                }
            }
            reader.close();
        }

        if (doOptimize) {
            Document dom = generateCommandDocument("optimize");
            indexOrFail(stub, dom, "Could not send optimize command.");
            System.out.println("optimize command accepted by indexer");
        }

        if (doCheckpoint) {
            Document dom = generateCommandDocument("checkpoint");
            indexOrFail(stub, dom, "Could not send checkpoint command.");
            System.out.println("checkpoint command accepted by indexer");

        }
        if (doStop) {
            Document dom = generateCommandDocument("close");
            indexOrFail(stub, dom, "Could not send stop command.");
            System.out.println("stop command accepted by indexer");
        }
    } catch (Exception e) {
        System.err.println("An error occurred: " + e.getMessage());
        e.printStackTrace();
    }
}

From source file:edu.msu.cme.rdp.kmer.KmerFilter.java

public static void main(String[] args) throws Exception {
    final KmerTrie kmerTrie;
    final SeqReader queryReader;
    final SequenceType querySeqType;
    final File queryFile;
    final KmerStartsWriter out;
    final boolean translQuery;
    final int wordSize;
    final int translTable;
    final boolean alignedSeqs;
    final List<String> refLabels = new ArrayList();
    final int maxThreads;

    try {//from www . j  a  v  a  2s . co m
        CommandLine cmdLine = new PosixParser().parse(options, args);
        args = cmdLine.getArgs();

        if (args.length < 3) {
            throw new Exception("Unexpected number of arguments");
        }

        if (cmdLine.hasOption("out")) {
            out = new KmerStartsWriter(cmdLine.getOptionValue("out"));
        } else {
            out = new KmerStartsWriter(System.out);
        }

        if (cmdLine.hasOption("aligned")) {
            alignedSeqs = true;
        } else {
            alignedSeqs = false;
        }

        if (cmdLine.hasOption("transl-table")) {
            translTable = Integer.valueOf(cmdLine.getOptionValue("transl-table"));
        } else {
            translTable = 11;
        }

        if (cmdLine.hasOption("threads")) {
            maxThreads = Integer.valueOf(cmdLine.getOptionValue("threads"));
        } else {
            maxThreads = Runtime.getRuntime().availableProcessors();
        }

        queryFile = new File(args[1]);
        wordSize = Integer.valueOf(args[0]);
        SequenceType refSeqType = null;

        querySeqType = SeqUtils.guessSequenceType(queryFile);
        queryReader = new SequenceReader(queryFile);

        if (querySeqType == SequenceType.Protein) {
            throw new Exception("Expected nucl query sequences");
        }

        refSeqType = SeqUtils
                .guessSequenceType(new File(args[2].contains("=") ? args[2].split("=")[1] : args[2]));

        translQuery = refSeqType == SequenceType.Protein;

        if (translQuery && wordSize % 3 != 0) {
            throw new Exception("Word size must be a multiple of 3 for nucl ref seqs");
        }

        int trieWordSize;
        if (translQuery) {
            trieWordSize = wordSize / 3;
        } else {
            trieWordSize = wordSize;
        }
        kmerTrie = new KmerTrie(trieWordSize, translQuery);

        for (int index = 2; index < args.length; index++) {
            String refName;
            String refFileName = args[index];
            if (refFileName.contains("=")) {
                String[] lexemes = refFileName.split("=");
                refName = lexemes[0];
                refFileName = lexemes[1];
            } else {
                String tmpName = new File(refFileName).getName();
                if (tmpName.contains(".")) {
                    refName = tmpName.substring(0, tmpName.lastIndexOf("."));
                } else {
                    refName = tmpName;
                }
            }

            File refFile = new File(refFileName);

            if (refSeqType != SeqUtils.guessSequenceType(refFile)) {
                throw new Exception(
                        "Reference file " + refFile + " contains " + SeqUtils.guessFileFormat(refFile)
                                + " sequences but expected " + refSeqType + " sequences");
            }

            SequenceReader seqReader = new SequenceReader(refFile);
            Sequence seq;

            while ((seq = seqReader.readNextSequence()) != null) {
                if (seq.getSeqName().startsWith("#")) {
                    continue;
                }
                if (alignedSeqs) {
                    kmerTrie.addModelSequence(seq, refLabels.size());
                } else {
                    kmerTrie.addSequence(seq, refLabels.size());
                }
            }
            seqReader.close();

            refLabels.add(refName);
        }

    } catch (Exception e) {
        new HelpFormatter().printHelp("KmerSearch <word_size> <query_file> [name=]<ref_file> ...", options);
        System.err.println(e.getMessage());
        e.printStackTrace();
        System.exit(1);
        throw new RuntimeException("Stupid jvm"); //While this will never get thrown it is required to make sure javac doesn't get confused about uninitialized variables
    }

    long startTime = System.currentTimeMillis();
    long seqCount = 0;
    final int maxTasks = 25000;

    /*
     * if (args.length == 4) { maxThreads = Integer.valueOf(args[3]); } else {
     */

    //}

    System.err.println("Starting kmer mapping at " + new Date());
    System.err.println("*  Number of threads:       " + maxThreads);
    System.err.println("*  References:              " + refLabels);
    System.err.println("*  Reads file:              " + queryFile);
    System.err.println("*  Kmer length:             " + kmerTrie.getWordSize());

    final AtomicInteger processed = new AtomicInteger();
    final AtomicInteger outstandingTasks = new AtomicInteger();

    ExecutorService service = Executors.newFixedThreadPool(maxThreads);

    Sequence querySeq;

    while ((querySeq = queryReader.readNextSequence()) != null) {
        seqCount++;

        String seqString = querySeq.getSeqString();

        if (seqString.length() < 3) {
            System.err.println("Sequence " + querySeq.getSeqName() + "'s length is less than 3");
            continue;
        }

        final Sequence threadSeq = querySeq;

        Runnable r = new Runnable() {

            public void run() {
                try {
                    processSeq(threadSeq, refLabels, kmerTrie, out, wordSize, translQuery, translTable, false);
                    processSeq(threadSeq, refLabels, kmerTrie, out, wordSize, translQuery, translTable, true);
                } catch (IOException e) {
                    throw new RuntimeException(e);
                }

                processed.incrementAndGet();
                outstandingTasks.decrementAndGet();
            }
        };

        outstandingTasks.incrementAndGet();
        service.submit(r);

        while (outstandingTasks.get() >= maxTasks)
            ;

        if ((processed.get() + 1) % 1000000 == 0) {
            System.err.println("Processed " + processed + " sequences in "
                    + (System.currentTimeMillis() - startTime) + " ms");
        }
    }

    service.shutdown();
    service.awaitTermination(1, TimeUnit.DAYS);

    System.err.println("Finished Processed " + processed + " sequences in "
            + (System.currentTimeMillis() - startTime) + " ms");

    out.close();
}

From source file:eqtlmappingpipeline.util.ModuleEqtlNeutrophilReplication.java

/**
 * @param args the command line arguments
 *//*  w  ww  . jav a2s.  c  o m*/
public static void main(String[] args) throws IOException, LdCalculatorException {

    System.out.println(HEADER);
    System.out.println();
    System.out.flush(); //flush to make sure header is before errors
    try {
        Thread.sleep(25); //Allows flush to complete
    } catch (InterruptedException ex) {
    }

    CommandLineParser parser = new PosixParser();
    final CommandLine commandLine;
    try {
        commandLine = parser.parse(OPTIONS, args, true);
    } catch (ParseException ex) {
        System.err.println("Invalid command line arguments: " + ex.getMessage());
        System.err.println();
        new HelpFormatter().printHelp(" ", OPTIONS);
        System.exit(1);
        return;
    }

    final String[] genotypesBasePaths = commandLine.getOptionValues("g");
    final RandomAccessGenotypeDataReaderFormats genotypeDataType;
    final String replicationQtlFilePath = commandLine.getOptionValue("e");
    final String interactionQtlFilePath = commandLine.getOptionValue("i");
    final String outputFilePath = commandLine.getOptionValue("o");
    final double ldCutoff = Double.parseDouble(commandLine.getOptionValue("ld"));
    final int window = Integer.parseInt(commandLine.getOptionValue("w"));

    System.out.println("Genotype: " + Arrays.toString(genotypesBasePaths));
    System.out.println("Interaction file: " + interactionQtlFilePath);
    System.out.println("Replication file: " + replicationQtlFilePath);
    System.out.println("Output: " + outputFilePath);
    System.out.println("LD: " + ldCutoff);
    System.out.println("Window: " + window);

    try {
        if (commandLine.hasOption("G")) {
            genotypeDataType = RandomAccessGenotypeDataReaderFormats
                    .valueOf(commandLine.getOptionValue("G").toUpperCase());
        } else {
            if (genotypesBasePaths[0].endsWith(".vcf")) {
                System.err.println(
                        "Only vcf.gz is supported. Please see manual on how to do create a vcf.gz file.");
                System.exit(1);
                return;
            }
            try {
                genotypeDataType = RandomAccessGenotypeDataReaderFormats
                        .matchFormatToPath(genotypesBasePaths[0]);
            } catch (GenotypeDataException e) {
                System.err
                        .println("Unable to determine input 1 type based on specified path. Please specify -G");
                System.exit(1);
                return;
            }
        }
    } catch (IllegalArgumentException e) {
        System.err.println("Error parsing --genotypesFormat \"" + commandLine.getOptionValue("G")
                + "\" is not a valid input data format");
        System.exit(1);
        return;
    }

    final RandomAccessGenotypeData genotypeData;

    try {
        genotypeData = genotypeDataType.createFilteredGenotypeData(genotypesBasePaths, 100, null, null, null,
                0.8);
    } catch (TabixFileNotFoundException e) {
        LOGGER.fatal("Tabix file not found for input data at: " + e.getPath() + "\n"
                + "Please see README on how to create a tabix file");
        System.exit(1);
        return;
    } catch (IOException e) {
        LOGGER.fatal("Error reading input data: " + e.getMessage(), e);
        System.exit(1);
        return;
    } catch (IncompatibleMultiPartGenotypeDataException e) {
        LOGGER.fatal("Error combining the impute genotype data files: " + e.getMessage(), e);
        System.exit(1);
        return;
    } catch (GenotypeDataException e) {
        LOGGER.fatal("Error reading input data: " + e.getMessage(), e);
        System.exit(1);
        return;
    }

    ChrPosTreeMap<ArrayList<ReplicationQtl>> replicationQtls = new ChrPosTreeMap<>();

    CSVReader replicationQtlReader = new CSVReader(new FileReader(replicationQtlFilePath), '\t');
    replicationQtlReader.readNext();//skip header
    String[] replicationLine;
    while ((replicationLine = replicationQtlReader.readNext()) != null) {

        try {

            GeneticVariant variant = genotypeData.getSnpVariantByPos(replicationLine[REPLICATION_SNP_CHR_COL],
                    Integer.parseInt(replicationLine[REPLICATION_SNP_POS_COL]));
            if (variant == null) {
                continue;
            }

            ReplicationQtl replicationQtl = new ReplicationQtl(replicationLine[REPLICATION_SNP_CHR_COL],
                    Integer.parseInt(replicationLine[REPLICATION_SNP_POS_COL]),
                    replicationLine[REPLICATION_GENE_COL],
                    Double.parseDouble(replicationLine[REPLICATION_BETA_COL]),
                    variant.getAlternativeAlleles().get(0).getAlleleAsString());
            ArrayList<ReplicationQtl> posReplicationQtls = replicationQtls.get(replicationQtl.getChr(),
                    replicationQtl.getPos());
            if (posReplicationQtls == null) {
                posReplicationQtls = new ArrayList<>();
                replicationQtls.put(replicationQtl.getChr(), replicationQtl.getPos(), posReplicationQtls);
            }
            posReplicationQtls.add(replicationQtl);

        } catch (Exception e) {
            System.out.println(Arrays.toString(replicationLine));
            throw e;
        }
    }

    int interactionSnpNotInGenotypeData = 0;
    int noReplicationQtlsInWindow = 0;
    int noReplicationQtlsInLd = 0;
    int multipleReplicationQtlsInLd = 0;
    int replicationTopSnpNotInGenotypeData = 0;

    final CSVWriter outputWriter = new CSVWriter(new FileWriter(new File(outputFilePath)), '\t', '\0');
    final String[] outputLine = new String[14];
    int c = 0;
    outputLine[c++] = "Chr";
    outputLine[c++] = "Pos";
    outputLine[c++] = "SNP";
    outputLine[c++] = "Gene";
    outputLine[c++] = "Module";
    outputLine[c++] = "DiscoveryZ";
    outputLine[c++] = "ReplicationZ";
    outputLine[c++] = "DiscoveryZCorrected";
    outputLine[c++] = "ReplicationZCorrected";
    outputLine[c++] = "DiscoveryAlleleAssessed";
    outputLine[c++] = "ReplicationAlleleAssessed";
    outputLine[c++] = "bestLd";
    outputLine[c++] = "bestLd_dist";
    outputLine[c++] = "nextLd";
    outputWriter.writeNext(outputLine);

    HashSet<String> notFound = new HashSet<>();

    CSVReader interactionQtlReader = new CSVReader(new FileReader(interactionQtlFilePath), '\t');
    interactionQtlReader.readNext();//skip header
    String[] interactionQtlLine;
    while ((interactionQtlLine = interactionQtlReader.readNext()) != null) {

        String snp = interactionQtlLine[1];
        String chr = interactionQtlLine[2];
        int pos = Integer.parseInt(interactionQtlLine[3]);
        String gene = interactionQtlLine[4];
        String alleleAssessed = interactionQtlLine[9];
        String module = interactionQtlLine[12];
        double discoveryZ = Double.parseDouble(interactionQtlLine[10]);

        GeneticVariant interactionQtlVariant = genotypeData.getSnpVariantByPos(chr, pos);

        if (interactionQtlVariant == null) {
            System.err.println("Interaction QTL SNP not found in genotype data: " + chr + ":" + pos);
            ++interactionSnpNotInGenotypeData;
            continue;
        }

        ReplicationQtl bestMatch = null;
        double bestMatchR2 = Double.NaN;
        Ld bestMatchLd = null;
        double nextBestR2 = Double.NaN;

        ArrayList<ReplicationQtl> sameSnpQtls = replicationQtls.get(chr, pos);

        if (sameSnpQtls != null) {
            for (ReplicationQtl sameSnpQtl : sameSnpQtls) {
                if (sameSnpQtl.getGene().equals(gene)) {
                    bestMatch = sameSnpQtl;
                    bestMatchR2 = 1;
                }
            }
        }

        NavigableMap<Integer, ArrayList<ReplicationQtl>> potentionalReplicationQtls = replicationQtls
                .getChrRange(chr, pos - window, true, pos + window, true);

        for (ArrayList<ReplicationQtl> potentialReplicationQtls : potentionalReplicationQtls.values()) {

            for (ReplicationQtl potentialReplicationQtl : potentialReplicationQtls) {

                if (!potentialReplicationQtl.getGene().equals(gene)) {
                    continue;
                }

                GeneticVariant potentialReplicationQtlVariant = genotypeData
                        .getSnpVariantByPos(potentialReplicationQtl.getChr(), potentialReplicationQtl.getPos());

                if (potentialReplicationQtlVariant == null) {
                    notFound.add(potentialReplicationQtl.getChr() + ":" + potentialReplicationQtl.getPos());
                    ++replicationTopSnpNotInGenotypeData;
                    continue;
                }

                Ld ld = interactionQtlVariant.calculateLd(potentialReplicationQtlVariant);
                double r2 = ld.getR2();

                if (r2 > 1) {
                    r2 = 1;
                }

                if (bestMatch == null) {
                    bestMatch = potentialReplicationQtl;
                    bestMatchR2 = r2;
                    bestMatchLd = ld;
                } else if (r2 > bestMatchR2) {
                    bestMatch = potentialReplicationQtl;
                    nextBestR2 = bestMatchR2;
                    bestMatchR2 = r2;
                    bestMatchLd = ld;
                }

            }
        }

        double replicationZ = Double.NaN;
        double replicationZCorrected = Double.NaN;
        double discoveryZCorrected = Double.NaN;

        String replicationAlleleAssessed = null;

        if (bestMatch != null) {
            replicationZ = bestMatch.getBeta();
            replicationAlleleAssessed = bestMatch.getAssessedAllele();

            if (pos != bestMatch.getPos()) {

                String commonHap = null;
                double commonHapFreq = -1;
                for (Map.Entry<String, Double> hapFreq : bestMatchLd.getHaplotypesFreq().entrySet()) {

                    double f = hapFreq.getValue();

                    if (f > commonHapFreq) {
                        commonHapFreq = f;
                        commonHap = hapFreq.getKey();
                    }

                }

                String[] commonHapAlleles = StringUtils.split(commonHap, '/');

                discoveryZCorrected = commonHapAlleles[0].equals(alleleAssessed) ? discoveryZ : discoveryZ * -1;
                replicationZCorrected = commonHapAlleles[1].equals(replicationAlleleAssessed) ? replicationZ
                        : replicationZ * -1;

            } else {

                discoveryZCorrected = discoveryZ;
                replicationZCorrected = alleleAssessed.equals(replicationAlleleAssessed) ? replicationZ
                        : replicationZ * -1;

            }

        }

        c = 0;
        outputLine[c++] = chr;
        outputLine[c++] = String.valueOf(pos);
        outputLine[c++] = snp;
        outputLine[c++] = gene;
        outputLine[c++] = module;
        outputLine[c++] = String.valueOf(discoveryZ);
        outputLine[c++] = bestMatch == null ? "NA" : String.valueOf(replicationZ);
        outputLine[c++] = bestMatch == null ? "NA" : String.valueOf(discoveryZCorrected);
        outputLine[c++] = bestMatch == null ? "NA" : String.valueOf(replicationZCorrected);
        outputLine[c++] = alleleAssessed;
        outputLine[c++] = bestMatch == null ? "NA" : String.valueOf(bestMatch.getAssessedAllele());
        outputLine[c++] = String.valueOf(bestMatchR2);
        outputLine[c++] = bestMatch == null ? "NA" : String.valueOf(Math.abs(pos - bestMatch.getPos()));
        outputLine[c++] = String.valueOf(nextBestR2);
        outputWriter.writeNext(outputLine);

    }

    outputWriter.close();

    for (String e : notFound) {
        System.err.println("Not found: " + e);
    }

    System.out.println("interactionSnpNotInGenotypeData: " + interactionSnpNotInGenotypeData);
    System.out.println("noReplicationQtlsInWindow: " + noReplicationQtlsInWindow);
    System.out.println("noReplicationQtlsInLd: " + noReplicationQtlsInLd);
    System.out.println("multipleReplicationQtlsInLd: " + multipleReplicationQtlsInLd);
    System.out.println("replicationTopSnpNotInGenotypeData: " + replicationTopSnpNotInGenotypeData);

}

From source file:edu.msu.cme.rdp.framebot.stat.TaxonAbundance.java

/**
 * this class group the nearest matches by phylum/class, or by match 
 * @param args/*  w  w w  .  j a v a 2 s  .  c o m*/
 * @throws Exception 
 */
public static void main(String[] args) throws Exception {
    HashMap<String, Double> coveragetMap = null;
    double identity = 0.0;
    try {
        CommandLine line = new PosixParser().parse(options, args);
        if (line.hasOption("seqCoverage")) {
            String coveragefile = line.getOptionValue("seqCoverage");
            coveragetMap = parseKmerCoverage(coveragefile);
        }
        if (line.hasOption("identity")) {
            identity = Double.parseDouble(line.getOptionValue("identity"));
            if (identity < 0 || identity > 100) {
                throw new IllegalArgumentException("identity cutoff should be in the range of 0 and 100");
            }
        }

        args = line.getArgs();
        if (args.length != 3) {
            throw new Exception("");
        }

    } catch (Exception e) {
        System.out.println("Command Error: " + e.getMessage());
        new HelpFormatter().printHelp(80, "[options] <FrameBot Alignment file or Dir> <seqLineage> <out file> ",
                "", options,
                "seqLineage: a tab-delimited file with ref seqID and lineage, or fasta of ref seq with lineage as the descrption"
                        + "\nframeBot alignment file or Dir: frameBot alignment files "
                        + "\noutfile: output with the nearest match count group by phylum/class; and by match name");
    }
    TaxonAbundance.mapAbundance(new File(args[0]), new File(args[1]), args[2], coveragetMap, identity);
}

From source file:edu.nyu.vida.data_polygamy.pre_processing.PreProcessing.java

/**
 * @param args/*ww  w.  j  av  a2 s.  c  o m*/
 * @throws IOException 
 * @throws ClassNotFoundException 
 * @throws InterruptedException 
 */
@SuppressWarnings("deprecation")
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {

    Options options = new Options();

    Option nameOption = new Option("dn", "name", true, "the name of the dataset");
    nameOption.setRequired(true);
    nameOption.setArgName("DATASET NAME");
    options.addOption(nameOption);

    Option headerOption = new Option("dh", "header", true, "the file that contains the header of the dataset");
    headerOption.setRequired(true);
    headerOption.setArgName("DATASET HEADER FILE");
    options.addOption(headerOption);

    Option deafultsOption = new Option("dd", "defaults", true,
            "the file that contains the default values of the dataset");
    deafultsOption.setRequired(true);
    deafultsOption.setArgName("DATASET DEFAULTS FILE");
    options.addOption(deafultsOption);

    Option tempResOption = new Option("t", "temporal", true,
            "desired temporal resolution (hour, day, week, or month)");
    tempResOption.setRequired(true);
    tempResOption.setArgName("TEMPORAL RESOLUTION");
    options.addOption(tempResOption);

    Option spatialResOption = new Option("s", "spatial", true,
            "desired spatial resolution (points, nbhd, zip, grid, or city)");
    spatialResOption.setRequired(true);
    spatialResOption.setArgName("SPATIAL RESOLUTION");
    options.addOption(spatialResOption);

    Option currentSpatialResOption = new Option("cs", "current-spatial", true,
            "current spatial resolution (points, nbhd, zip, grid, or city)");
    currentSpatialResOption.setRequired(true);
    currentSpatialResOption.setArgName("CURRENT SPATIAL RESOLUTION");
    options.addOption(currentSpatialResOption);

    Option indexResOption = new Option("i", "index", true, "indexes of the temporal and spatial attributes");
    indexResOption.setRequired(true);
    indexResOption.setArgName("INDEX OF SPATIO-TEMPORAL RESOLUTIONS");
    indexResOption.setArgs(Option.UNLIMITED_VALUES);
    options.addOption(indexResOption);

    Option machineOption = new Option("m", "machine", true, "machine identifier");
    machineOption.setRequired(true);
    machineOption.setArgName("MACHINE");
    machineOption.setArgs(1);
    options.addOption(machineOption);

    Option nodesOption = new Option("n", "nodes", true, "number of nodes");
    nodesOption.setRequired(true);
    nodesOption.setArgName("NODES");
    nodesOption.setArgs(1);
    options.addOption(nodesOption);

    Option s3Option = new Option("s3", "s3", false, "data on Amazon S3");
    s3Option.setRequired(false);
    options.addOption(s3Option);

    Option awsAccessKeyIdOption = new Option("aws_id", "aws-id", true,
            "aws access key id; " + "this is required if the execution is on aws");
    awsAccessKeyIdOption.setRequired(false);
    awsAccessKeyIdOption.setArgName("AWS-ACCESS-KEY-ID");
    awsAccessKeyIdOption.setArgs(1);
    options.addOption(awsAccessKeyIdOption);

    Option awsSecretAccessKeyOption = new Option("aws_key", "aws-id", true,
            "aws secrect access key; " + "this is required if the execution is on aws");
    awsSecretAccessKeyOption.setRequired(false);
    awsSecretAccessKeyOption.setArgName("AWS-SECRET-ACCESS-KEY");
    awsSecretAccessKeyOption.setArgs(1);
    options.addOption(awsSecretAccessKeyOption);

    Option bucketOption = new Option("b", "s3-bucket", true,
            "bucket on s3; " + "this is required if the execution is on aws");
    bucketOption.setRequired(false);
    bucketOption.setArgName("S3-BUCKET");
    bucketOption.setArgs(1);
    options.addOption(bucketOption);

    Option helpOption = new Option("h", "help", false, "display this message");
    helpOption.setRequired(false);
    options.addOption(helpOption);

    HelpFormatter formatter = new HelpFormatter();
    CommandLineParser parser = new PosixParser();
    CommandLine cmd = null;

    try {
        cmd = parser.parse(options, args);
    } catch (ParseException e) {
        formatter.printHelp(
                "hadoop jar data-polygamy.jar " + "edu.nyu.vida.data_polygamy.pre_processing.PreProcessing",
                options, true);
        System.exit(0);
    }

    if (cmd.hasOption("h")) {
        formatter.printHelp(
                "hadoop jar data-polygamy.jar " + "edu.nyu.vida.data_polygamy.pre_processing.PreProcessing",
                options, true);
        System.exit(0);
    }

    boolean s3 = cmd.hasOption("s3");
    String s3bucket = "";
    String awsAccessKeyId = "";
    String awsSecretAccessKey = "";

    if (s3) {
        if ((!cmd.hasOption("aws_id")) || (!cmd.hasOption("aws_key")) || (!cmd.hasOption("b"))) {
            System.out.println(
                    "Arguments 'aws_id', 'aws_key', and 'b'" + " are mandatory if execution is on AWS.");
            formatter.printHelp(
                    "hadoop jar data-polygamy.jar " + "edu.nyu.vida.data_polygamy.pre_processing.PreProcessing",
                    options, true);
            System.exit(0);
        }
        s3bucket = cmd.getOptionValue("b");
        awsAccessKeyId = cmd.getOptionValue("aws_id");
        awsSecretAccessKey = cmd.getOptionValue("aws_key");
    }

    boolean snappyCompression = false;
    boolean bzip2Compression = false;
    String machine = cmd.getOptionValue("m");
    int nbNodes = Integer.parseInt(cmd.getOptionValue("n"));

    Configuration s3conf = new Configuration();
    if (s3) {
        s3conf.set("fs.s3.awsAccessKeyId", awsAccessKeyId);
        s3conf.set("fs.s3.awsSecretAccessKey", awsSecretAccessKey);
        s3conf.set("bucket", s3bucket);
    }

    Configuration conf = new Configuration();
    Machine machineConf = new Machine(machine, nbNodes);
    String dataset = cmd.getOptionValue("dn");
    String header = cmd.getOptionValue("dh");
    String defaults = cmd.getOptionValue("dd");
    String temporalResolution = cmd.getOptionValue("t");
    String spatialResolution = cmd.getOptionValue("s");
    String gridResolution = "";
    String currentSpatialResolution = cmd.getOptionValue("cs");

    if (spatialResolution.contains("grid")) {
        String[] res = spatialResolution.split("-");
        spatialResolution = res[0];
        gridResolution = res[1];
    }

    conf.set("header", s3bucket + FrameworkUtils.dataDir + "/" + header);
    conf.set("defaults", s3bucket + FrameworkUtils.dataDir + "/" + defaults);
    conf.set("temporal-resolution", temporalResolution);
    conf.set("spatial-resolution", spatialResolution);
    conf.set("grid-resolution", gridResolution);
    conf.set("current-spatial-resolution", currentSpatialResolution);

    String[] indexes = cmd.getOptionValues("i");
    String temporalPos = "";
    Integer sizeSpatioTemp = 0;
    if (!(currentSpatialResolution.equals("points"))) {
        String spatialPos = "";
        for (int i = 0; i < indexes.length; i++) {
            temporalPos += indexes[i] + ",";
            spatialPos += indexes[++i] + ",";
            sizeSpatioTemp++;
        }
        conf.set("spatial-pos", spatialPos);
    } else {
        String xPositions = "", yPositions = "";
        for (int i = 0; i < indexes.length; i++) {
            temporalPos += indexes[i] + ",";
            xPositions += indexes[++i] + ",";
            yPositions += indexes[++i] + ",";
            sizeSpatioTemp++;
        }
        conf.set("xPositions", xPositions);
        conf.set("yPositions", yPositions);
    }
    conf.set("temporal-pos", temporalPos);

    conf.set("size-spatio-temporal", sizeSpatioTemp.toString());

    // checking resolutions

    if (utils.spatialResolution(spatialResolution) < 0) {
        System.out.println("Invalid spatial resolution: " + spatialResolution);
        System.exit(-1);
    }

    if (utils.spatialResolution(spatialResolution) == FrameworkUtils.POINTS) {
        System.out.println("The data needs to be reduced at least to neighborhoods or grid.");
        System.exit(-1);
    }

    if (utils.spatialResolution(currentSpatialResolution) < 0) {
        System.out.println("Invalid spatial resolution: " + currentSpatialResolution);
        System.exit(-1);
    }

    if (utils.spatialResolution(currentSpatialResolution) > utils.spatialResolution(spatialResolution)) {
        System.out.println("The current spatial resolution is coarser than "
                + "the desired one. You can only navigate from a fine resolution" + " to a coarser one.");
        System.exit(-1);
    }

    if (utils.temporalResolution(temporalResolution) < 0) {
        System.out.println("Invalid temporal resolution: " + temporalResolution);
        System.exit(-1);
    }

    String fileName = s3bucket + FrameworkUtils.preProcessingDir + "/" + dataset + "-" + temporalResolution
            + "-" + spatialResolution + gridResolution;
    conf.set("aggregates", fileName + ".aggregates");

    // making sure both files are removed, if they exist
    FrameworkUtils.removeFile(fileName, s3conf, s3);
    FrameworkUtils.removeFile(fileName + ".aggregates", s3conf, s3);

    /**
     * Hadoop Parameters
     * sources: http://www.slideshare.net/ImpetusInfo/ppt-on-advanced-hadoop-tuning-n-optimisation
     *          https://cloudcelebrity.wordpress.com/2013/08/14/12-key-steps-to-keep-your-hadoop-cluster-running-strong-and-performing-optimum/
     */

    conf.set("mapreduce.tasktracker.map.tasks.maximum", String.valueOf(machineConf.getMaximumTasks()));
    conf.set("mapreduce.tasktracker.reduce.tasks.maximum", String.valueOf(machineConf.getMaximumTasks()));
    conf.set("mapreduce.jobtracker.maxtasks.perjob", "-1");
    conf.set("mapreduce.reduce.shuffle.parallelcopies", "20");
    conf.set("mapreduce.input.fileinputformat.split.minsize", "0");
    conf.set("mapreduce.task.io.sort.mb", "200");
    conf.set("mapreduce.task.io.sort.factor", "100");

    // using SnappyCodec for intermediate and output data ?
    // TODO: for now, using SnappyCodec -- what about LZO + Protocol Buffer serialization?
    //   LZO - http://www.oberhumer.com/opensource/lzo/#download
    //   Hadoop-LZO - https://github.com/twitter/hadoop-lzo
    //   Protocol Buffer - https://github.com/twitter/elephant-bird
    //   General Info - http://www.devx.com/Java/Article/47913
    //   Compression - http://comphadoop.weebly.com/index.html
    if (snappyCompression) {
        conf.set("mapreduce.map.output.compress", "true");
        conf.set("mapreduce.map.output.compress.codec", "org.apache.hadoop.io.compress.SnappyCodec");
        conf.set("mapreduce.output.fileoutputformat.compress.codec",
                "org.apache.hadoop.io.compress.SnappyCodec");
    }
    if (bzip2Compression) {
        conf.set("mapreduce.map.output.compress", "true");
        conf.set("mapreduce.map.output.compress.codec", "org.apache.hadoop.io.compress.BZip2Codec");
        conf.set("mapreduce.output.fileoutputformat.compress.codec",
                "org.apache.hadoop.io.compress.BZip2Codec");
    }

    // TODO: this is dangerous!
    if (s3) {
        conf.set("fs.s3.awsAccessKeyId", awsAccessKeyId);
        conf.set("fs.s3.awsSecretAccessKey", awsSecretAccessKey);
    }

    Job job = new Job(conf);
    job.setJobName(dataset + "-" + temporalResolution + "-" + spatialResolution);

    job.setMapOutputKeyClass(MultipleSpatioTemporalWritable.class);
    job.setMapOutputValueClass(AggregationArrayWritable.class);

    job.setOutputKeyClass(MultipleSpatioTemporalWritable.class);
    job.setOutputValueClass(AggregationArrayWritable.class);

    job.setMapperClass(PreProcessingMapper.class);
    job.setCombinerClass(PreProcessingCombiner.class);
    job.setReducerClass(PreProcessingReducer.class);
    job.setNumReduceTasks(machineConf.getNumberReduces());
    //job.setNumReduceTasks(1);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    SequenceFileOutputFormat.setCompressOutput(job, true);
    SequenceFileOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK);

    FileInputFormat.setInputPaths(job, new Path(s3bucket + FrameworkUtils.dataDir + "/" + dataset));
    FileOutputFormat.setOutputPath(job, new Path(fileName));

    job.setJarByClass(PreProcessing.class);

    long start = System.currentTimeMillis();
    job.submit();
    job.waitForCompletion(true);
    System.out.println(fileName + "\t" + (System.currentTimeMillis() - start));

}

From source file:edu.msu.cme.rdp.abundstats.cli.AbundMain.java

public static void main(String[] args) throws IOException {
    File inputFile;/*ww  w  .  ja  v  a 2 s .  c  o m*/
    File resultDir = new File(".");
    RPlotter plotter = null;
    boolean isClusterFile = true;
    List<AbundStatsCalculator> statCalcs = new ArrayList();
    double clustCutoffFrom = Double.MIN_VALUE, clustCutoffTo = Double.MAX_VALUE;

    String usage = "Main [options] <cluster file>";
    try {
        CommandLine line = new PosixParser().parse(options, args);

        if (line.hasOption("result-dir")) {
            resultDir = new File(line.getOptionValue("result-dir"));
            if (!resultDir.exists() && !resultDir.mkdirs()) {
                throw new Exception(
                        "Result directory " + resultDir + " does not exist and could not be created");
            }
        }

        if (line.hasOption("R-location")) {
            plotter = new RPlotter();
            plotter.setCommandTemplate(rplotterTemplate);
            plotter.setRPath(line.getOptionValue("R-location"));
            plotter.setOutFileExt(".png");

            if (!new File(plotter.getRPath()).canExecute()) {
                throw new Exception(plotter.getRPath() + " does not exist or is not exectuable");
            }
        }

        if (line.hasOption("lower-cutoff")) {
            clustCutoffFrom = Double.valueOf(line.getOptionValue("lower-cutoff"));
        }

        if (line.hasOption("upper-cutoff")) {
            clustCutoffTo = Double.valueOf(line.getOptionValue("upper-cutoff"));
        }

        if (line.hasOption("jaccard")) {
            statCalcs.add(new Jaccard(true));
        }

        if (line.hasOption("sorensen")) {
            statCalcs.add(new Sorensen(true));
        }

        if (line.hasOption("otu-table")) {
            isClusterFile = false;
        }

        if (statCalcs.isEmpty()) {
            throw new Exception("Must specify at least one stat to compute (jaccard, sorensen)");
        }

        args = line.getArgs();
        if (args.length != 1) {
            throw new Exception("Unexpected number of command line arguments");
        }

        inputFile = new File(args[0]);

    } catch (Exception e) {
        new HelpFormatter().printHelp(usage, options);
        System.err.println("Error: " + e.getMessage());
        return;
    }

    if (isClusterFile) {
        RDPClustParser parser;
        parser = new RDPClustParser(inputFile);

        try {
            if (parser.getClusterSamples().size() == 1) {
                throw new IOException("Cluster file must have more than one sample");
            }

            List<Cutoff> cutoffs = parser.getCutoffs(clustCutoffFrom, clustCutoffTo);
            if (cutoffs.isEmpty()) {
                throw new IOException(
                        "No cutoffs in cluster file in range [" + clustCutoffFrom + "-" + clustCutoffTo + "]");
            }

            for (Cutoff cutoff : cutoffs) {
                List<Sample> samples = new ArrayList();

                for (ClusterSample clustSample : parser.getClusterSamples()) {
                    Sample s = new Sample(clustSample.getName());
                    for (Cluster clust : cutoff.getClusters().get(clustSample.getName())) {
                        s.addSpecies(clust.getNumberOfSeqs());
                    }
                    samples.add(s);
                }

                processSamples(samples, statCalcs, resultDir, cutoff.getCutoff() + "_", plotter);
            }

        } finally {
            parser.close();
        }
    } else {
        List<Sample> samples = new ArrayList();
        BufferedReader reader = new BufferedReader(new FileReader(inputFile));
        String line = reader.readLine();

        if (line == null || line.split("\\s+").length < 2) {
            throw new IOException("Must be 2 or more samples for abundance statistic calculations!");
        }
        int numSamples = line.split("\\s+").length;

        boolean header = true;
        try {
            Integer.valueOf(line.split("\\s+")[0]);
            header = false;
        } catch (Exception e) {
        }

        if (header) {
            for (String s : line.split("\\s+")) {
                samples.add(new Sample(s));
            }
        } else {
            int sample = 0;
            for (String s : line.split("\\s+")) {
                samples.add(new Sample("" + sample));
                samples.get(sample).addSpecies(Integer.valueOf(s));
                sample++;
            }
        }

        int lineno = 2;
        while ((line = reader.readLine()) != null) {
            if (line.trim().equals("")) {
                continue;
            }
            int sample = 0;
            if (line.split("\\s+").length != numSamples) {
                System.err.println(
                        "Line number " + lineno + " didn't have the expected number of samples (contained "
                                + line.split("\\s+").length + ", expected " + numSamples + ")");
            }

            for (String s : line.split("\\s+")) {
                samples.get(sample).addSpecies(Integer.valueOf(s));
                sample++;
            }

            lineno++;
        }

        processSamples(samples, statCalcs, resultDir, inputFile.getName(), plotter);
    }
}

From source file:edu.sdsc.scigraph.owlapi.loader.BatchOwlLoader.java

public static void main(String[] args) throws Exception {
    CommandLineParser parser = new PosixParser();
    CommandLine cmd = null;/* w  w w.ja va  2s.  co  m*/
    try {
        cmd = parser.parse(getOptions(), args);
    } catch (ParseException e) {
        System.err.println(e.getMessage());
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp(BatchOwlLoader.class.getSimpleName(), getOptions());
        System.exit(-1);
    }

    ObjectMapper mapper = new ObjectMapper(new YAMLFactory());
    OwlLoadConfiguration config = mapper.readValue(new File(cmd.getOptionValue('c').trim()),
            OwlLoadConfiguration.class);
    load(config);
    // TODO: Is Guice causing this to hang? #44
    System.exit(0);
}

From source file:it.unipd.dei.ims.falcon.CmdLine.java

public static void main(String[] args) {

    // last argument is always index path
    Options options = new Options();
    // one of these actions has to be specified
    OptionGroup actionGroup = new OptionGroup();
    actionGroup.addOption(new Option("i", true, "perform indexing")); // if dir, all files, else only one file
    actionGroup.addOption(new Option("q", true, "perform a single query"));
    actionGroup.addOption(new Option("b", false, "perform a query batch (read from stdin)"));
    actionGroup.setRequired(true);//from  www.j  a  v a2  s. co  m
    options.addOptionGroup(actionGroup);

    // other options
    options.addOption(new Option("l", "segment-length", true, "length of a segment (# of chroma vectors)"));
    options.addOption(
            new Option("o", "segment-overlap", true, "overlap portion of a segment (# of chroma vectors)"));
    options.addOption(new Option("Q", "quantization-level", true, "quantization level for chroma vectors"));
    options.addOption(new Option("k", "min-kurtosis", true, "minimum kurtosis for indexing chroma vectors"));
    options.addOption(new Option("s", "sub-sampling", true, "sub-sampling of chroma features"));
    options.addOption(new Option("v", "verbose", false, "verbose output (including timing info)"));
    options.addOption(new Option("T", "transposition-estimator-strategy", true,
            "parametrization for the transposition estimator strategy"));
    options.addOption(new Option("t", "n-transp", true,
            "number of transposition; if not specified, no transposition is performed"));
    options.addOption(new Option("f", "force-transp", true, "force transposition by an amount of semitones"));
    options.addOption(new Option("p", "pruning", false,
            "enable query pruning; if -P is unspecified, use default strategy"));
    options.addOption(new Option("P", "pruning-custom", true, "custom query pruning strategy"));

    // parse
    HelpFormatter formatter = new HelpFormatter();
    CommandLineParser parser = new PosixParser();
    CommandLine cmd = null;
    try {
        cmd = parser.parse(options, args);
        if (cmd.getArgs().length != 1)
            throw new ParseException("no index path was specified");
    } catch (ParseException ex) {
        System.err.println("ERROR - parsing command line:");
        System.err.println(ex.getMessage());
        formatter.printHelp("falcon -{i,q,b} [options] index_path", options);
        return;
    }

    // default values
    final float[] DEFAULT_TRANSPOSITION_ESTIMATOR_STRATEGY = new float[] { 0.65192807f, 0.0f, 0.0f, 0.0f,
            0.3532628f, 0.4997167f, 0.0f, 0.41703504f, 0.0f, 0.16297342f, 0.0f, 0.0f };
    final String DEFAULT_QUERY_PRUNING_STRATEGY = "ntf:0.340765*[0.001694,0.995720];ndf:0.344143*[0.007224,0.997113];"
            + "ncf:0.338766*[0.001601,0.995038];nmf:0.331577*[0.002352,0.997884];"; // TODO not the final one

    int hashes_per_segment = Integer.parseInt(cmd.getOptionValue("l", "150"));
    int overlap_per_segment = Integer.parseInt(cmd.getOptionValue("o", "50"));
    int nranks = Integer.parseInt(cmd.getOptionValue("Q", "3"));
    int subsampling = Integer.parseInt(cmd.getOptionValue("s", "1"));
    double minkurtosis = Float.parseFloat(cmd.getOptionValue("k", "-100."));
    boolean verbose = cmd.hasOption("v");
    int ntransp = Integer.parseInt(cmd.getOptionValue("t", "1"));
    TranspositionEstimator tpe = null;
    if (cmd.hasOption("t")) {
        if (cmd.hasOption("T")) {
            // TODO this if branch is yet to test
            Pattern p = Pattern.compile("\\d\\.\\d*");
            LinkedList<Double> tokens = new LinkedList<Double>();
            Matcher m = p.matcher(cmd.getOptionValue("T"));
            while (m.find())
                tokens.addLast(new Double(cmd.getOptionValue("T").substring(m.start(), m.end())));
            float[] strategy = new float[tokens.size()];
            if (strategy.length != 12) {
                System.err.println("invalid transposition estimator strategy");
                System.exit(1);
            }
            for (int i = 0; i < strategy.length; i++)
                strategy[i] = new Float(tokens.pollFirst());
        } else {
            tpe = new TranspositionEstimator(DEFAULT_TRANSPOSITION_ESTIMATOR_STRATEGY);
        }
    } else if (cmd.hasOption("f")) {
        int[] transps = parseIntArray(cmd.getOptionValue("f"));
        tpe = new ForcedTranspositionEstimator(transps);
        ntransp = transps.length;
    }
    QueryPruningStrategy qpe = null;
    if (cmd.hasOption("p")) {
        if (cmd.hasOption("P")) {
            qpe = new StaticQueryPruningStrategy(cmd.getOptionValue("P"));
        } else {
            qpe = new StaticQueryPruningStrategy(DEFAULT_QUERY_PRUNING_STRATEGY);
        }
    }

    // action
    if (cmd.hasOption("i")) {
        try {
            Indexing.index(new File(cmd.getOptionValue("i")), new File(cmd.getArgs()[0]), hashes_per_segment,
                    overlap_per_segment, subsampling, nranks, minkurtosis, tpe, verbose);
        } catch (IndexingException ex) {
            Logger.getLogger(CmdLine.class.getName()).log(Level.SEVERE, null, ex);
        } catch (IOException ex) {
            Logger.getLogger(CmdLine.class.getName()).log(Level.SEVERE, null, ex);
        }
    }
    if (cmd.hasOption("q")) {
        String queryfilepath = cmd.getOptionValue("q");
        doQuery(cmd, queryfilepath, hashes_per_segment, overlap_per_segment, nranks, subsampling, tpe, ntransp,
                minkurtosis, qpe, verbose);
    }
    if (cmd.hasOption("b")) {
        try {
            long starttime = System.currentTimeMillis();
            BufferedReader in = new BufferedReader(new InputStreamReader(System.in));
            String line = null;
            while ((line = in.readLine()) != null && !line.trim().isEmpty())
                doQuery(cmd, line, hashes_per_segment, overlap_per_segment, nranks, subsampling, tpe, ntransp,
                        minkurtosis, qpe, verbose);
            in.close();
            long endtime = System.currentTimeMillis();
            System.out.println(String.format("total time: %ds", (endtime - starttime) / 1000));
        } catch (IOException ex) {
            Logger.getLogger(CmdLine.class.getName()).log(Level.SEVERE, null, ex);
        }
    }
}