org.mitre.ccv.CompleteCompositionVectorMain.java Source code

Java tutorial

Introduction

Here is the source code for org.mitre.ccv.CompleteCompositionVectorMain.java

Source

/**
 * Created on Nov 23, 2007
 *
 * Copyright 2010- The MITRE Corporation. All rights reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you  may not 
 * use this file except in compliance with the License. You may obtain a copy of 
 * the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software 
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 
 * License for the specific language governing permissions andlimitations under
 * the License.
 *
 * $Id$
 */
package org.mitre.ccv;

import java.util.Properties;
import java.util.NoSuchElementException;
import java.util.ResourceBundle;
import java.util.MissingResourceException;

import java.io.FileWriter;
import java.io.BufferedWriter;
import java.io.IOException;
import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.FileReader;
import java.io.PrintWriter;

import java.sql.SQLException;

import org.apache.commons.cli.Options;
import org.apache.commons.cli.OptionBuilder;
import org.apache.commons.cli.GnuParser;
import org.apache.commons.cli.CommandLineParser;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.ParseException;

import org.apache.commons.logging.LogFactory;
import org.apache.commons.logging.Log;
import org.apache.log4j.Level;

import org.apache.log4j.Logger;
import org.apache.xml.serialize.XMLSerializer;
import org.apache.xml.serialize.OutputFormat;

import org.mitre.ccv.index.IndexedCompleteCompositionVectorSet;
import org.mitre.ccv.sql.AbstractVectorSetSQL;
import org.mitre.ccv.sql.EmbeddedVectorSetSQL;
import org.mitre.ccv.sql.VectorSetSQL;

import org.mitre.bio.Sequence;
import org.mitre.bio.io.FastaIterator;

import org.mitre.bio.phylo.DistanceMatrix;
import org.mitre.bio.phylo.dom.Forest;

import org.mitre.bio.phylo.tree.NeighborJoiningTree;
import org.mitre.bio.phylo.tree.NodeUtils;
import org.mitre.bio.phylo.tree.Tree;
import org.mitre.bio.phylo.tree.UPGMATree;

import org.mitre.bio.phylo.tree.io.NewickWriter;
import org.mitre.clustering.AffinityPropagation;

import weka.core.matrix.Matrix;

/**
 * Main Command Line Inteface Class for ccvs
 * 
 * @author Marc Colosimo
 * @author Matt Peterson
 */
public class CompleteCompositionVectorMain {

    /** Set-up our logger. */
    private static final Log LOG = LogFactory.getLog(CompleteCompositionVectorMain.class);
    public FastaIterator seqIter = null;
    public Integer begin = 3;
    public Integer end = 9;
    public Integer topNMers = null;
    public Integer seqNameParser = 1; // what to use for sequence name
    /** What distance calculation to use */
    public Integer distCalc = 2; // default: Cosine
    public Integer prefVal = 1; // default: Median
    public Boolean upgma = false; // default: Neighbor-Joined Tree
    public String nwkOutFile = null; // default: no output as newick
    public String xmlOutFile = null; // default: no output as PhyloXML
    public String apClusterOutfile = null; // default: Don't do ap clustering
    public String matrixOutFile = null; // default: Don't write the matrix out
    public String nmersOutFile = null; // default: Don't write nmers
    public String vectorsOutFile = null; // default: Don't write vectors to a json file
    public String vectorsInFile = null; // default: no input from a json file
    public String entOutFile = null; // default: Don't write entropy
    public Boolean embeddedSQL = false; // default: Use memory, not derby
    public Boolean useSQL = false; // default: if both SQLs set, this wins
    private Level logginLevel = Level.WARN; // default: Log level WARN

    /**
     * Set-up our static strings
     */
    private static String APPLICATION_TITLE = "CompleteCompositionVectorCLI";
    private static String APPLICATION_VERSION = "Unknown";
    private static String APPLICATION_BUILD = "Unknown";

    static {
        ResourceBundle bundle = null;
        try {
            bundle = ResourceBundle.getBundle("org.mitre.ccv.ApplicationInformation");
        } catch (MissingResourceException mre) {
            bundle = null;
        }
        if (bundle != null) {
            APPLICATION_VERSION = bundle.getString("application.version");
            APPLICATION_TITLE = APPLICATION_TITLE + " " + APPLICATION_VERSION;

            APPLICATION_BUILD = bundle.getString("subversion.version");
        }
    }

    /**
     * Returns a parsed sequence name usually from a FASTA file
     * 
     * @param seq the {@link Sequence}
     * @param seqParser 1-name(default);2-description;3-full header(name + description, which might break newick-style trees)
     */
    static public String parseSequenceName(Sequence seq, int seqParser) {
        String name = null;
        switch (seqParser) {
        case 3:
            name = seq.getName() + " " + seq.getDescription();
            break;
        case 2:
            name = seq.getDescription();
            break;
        case 1:
        default:
            name = seq.getName();
        }

        /** We need to return something */
        if (name == null) {
            if (seq.getName() == null) {
                return "[No Name]";
            } else {
                return "[Null]" + seq.getName();
            }
        } else {
            return name;
        }
    }

    /**
     * Generate the ccv set with the set parameters.
     * 
     * The parameters can be set on the command-line or read in from
     * the "ccv.properties" file.
     */
    public VectorSet generateCompleteCompositionVectorSet() throws Exception {

        VectorSet set = null;
        if (this.useSQL) {

            Properties prop;
            try {
                prop = getSQLProperties();
                set = new VectorSetSQL(this.begin, this.end, prop);
            } catch (IOException ioe) {
                throw new Exception("Unable to load ccv.properties file!");
            } catch (SQLException slqe) {
                AbstractVectorSetSQL.printSQLException(slqe);
                throw new Exception("Unable to connect to database!");
            }

        } else if (this.embeddedSQL) {
            set = new EmbeddedVectorSetSQL(this.begin, this.end);
        } else {
            set = new IndexedCompleteCompositionVectorSet(this.begin, this.end);
        }

        return set;
    }

    /**
     * Returns a {@link DistanceMatrix} using the set distance method
     * 
     * @param matrix <code>CompleteMatrix</code> of sequence vectors
     * @param nmers the top number of nmers to use 
     * (<code>null</code> value uses all).
     */
    public DistanceMatrix createDistanceMatrix(VectorSet set, CompleteMatrix matrix) {

        try {
            switch (this.distCalc) {
            case 4:
                if (this.topNMers == null) {
                    // much faster if we don't try to limit it to a set of nmers
                    return set.createJaccardDistanceMatrix(null);
                } else {
                    return set.createJaccardDistanceMatrix(matrix.getNmers());
                }
            case 3:
                return set.createESDistanceMatrix(matrix);
            case 2:
                return set.createCosineDistanceMatrix(matrix);
            case 1:
            default:
                return set.createEuclidianDistanceMatrix(matrix);
            }
        } catch (OutOfMemoryError e) {
            String errStr = "Out of memory while creating distance matrix!\n"
                    + "Try limiting top nmers or increasing heap space or "
                    + "changing the number of windows (begin/end).";
            LOG.fatal(errStr, e);
            return null;
        }
    }

    /**
     * Check to see if we are outputing a tree, if not then we do not calculate it
     * 
     * @return
     */
    public Boolean calculateTree() {
        /** Things to check: xmlOutFile, nwkOutFile */
        if (this.xmlOutFile != null || this.nwkOutFile != null) {
            return true;
        }
        return false;
    }

    /**
     * Check to see if we need to calculate distances. 
     * 
     * If we are outputing a tree, matrixOutFile, apClusterOutfile, then true.
     * @return
     */
    public Boolean calculateDistances() {
        if (calculateTree()) {
            return true;
        }
        if (this.matrixOutFile != null || this.apClusterOutfile != null) {
            return true;
        }

        return false;
    }

    /**
     * Check to see if need to calculate/generate the full vector matrix
     * @return true if we do
     */
    public Boolean calculateMatrix() {
        if (this.calculateDistances()) {
            return true;
        }
        if (this.vectorsOutFile != null || this.nmersOutFile != null) {
            return true;
        }
        return false;
    }

    /**
     * Create a tree using the given distance matrix
     * 
     * @param distMatrix
     * @return the tree (NJ is default, UPGMA optional)
     */
    public Tree createTree(DistanceMatrix distMatrix) {
        if (distMatrix == null) {
            return null;
        }

        Tree tree = null;
        if (this.upgma) {
            tree = new UPGMATree(distMatrix);
            LOG.warn("UPGMA is deprecated, should use ClusterTree!");
        } else {
            /** Build a NeighborJoined Tree */
            Tree njTree = new NeighborJoiningTree(distMatrix);
            tree = njTree.getMidpointRootedTree();
        }
        return tree;
    }

    /**
     * Calculates the median value of a 1-D array
     * 
     * @param array
     * @return
     */
    public double getPreference(double[] array, int type) {
        java.util.Arrays.sort(array);

        int arrayLength = array.length;

        if (type == 0) {
            return array[0];
        } else {

            double median;

            if (arrayLength % 2 == 0) {
                int n = (arrayLength + 1) / 2;
                median = (array[n] + array[n + 1]) / 2.0;
            } else {
                int n = arrayLength / 2;
                median = array[n];
            }

            if (type == 1) {
                return median;
            } else {
                return (median + array[0]) / 2;
            }

        }
    }

    /**
     * Writes a PAL tree in PhyloXML format to given BufferedWriter
     * 
     * @param tree the Tree
     * @param bw the BufferedWriter
     */
    public static void writePhyloXMLTree(Tree tree, BufferedWriter bw) throws IOException {

        Forest f = NodeUtils.getSingleton().convert2Forest(tree.getRoot());

        /**
         * Needs to be static because others call this: should be a util
         * Convert tree string to forest
         */
        OutputFormat of = new OutputFormat("XML", "ISO-8859-1", true);
        of.setIndent(1);
        of.setIndenting(true);

        XMLSerializer serial = new XMLSerializer(bw, of);
        if (f != null) {
            try {
                serial.serialize(f.getOwnerDocument());
            } catch (IOException ioe) {
                ioe.printStackTrace();
            }
        }
        bw.flush();
    }

    /**
     * Write the tree to a newick style file
     * 
     * @param tree the Tree.
     * @param bw BufferedWriter to write tree to.
     */
    public static void writeNwkTree(Tree tree, BufferedWriter bw) {
        try {
            NewickWriter.getSingleton().print(new PrintWriter(bw), tree.getRoot(), true, false);
            bw.flush();
        } catch (Exception ioe) {
            LOG.error("Error in writing out tree to newick file.", ioe);
        }
    }

    /**
     * Performs Affinity Propagation Clustering
     * 
     * @param dm
     * @param filename
     */
    public AffinityPropagation cluster(DistanceMatrix dm, int type) {
        double[][] mVals = dm.getClonedDistances();
        Matrix m = new Matrix(mVals);
        if (distCalc == 1 || distCalc == 3) {
            m = m.times(-1.0);
        } else if (distCalc == 2) {
            Matrix o = new Matrix(m.getRowDimension(), m.getColumnDimension(), 1.0);
            m = o.minus(m);
        }

        int total = m.getColumnDimension();
        double values[] = new double[(total * total - total) / 2];
        int count = 0;
        for (int i = 1; i < total; i++) {
            for (int j = i + 1; j < total - 1; j++) {
                values[count] = m.get(i, j);
                count++;
            }

        }

        double preference = getPreference(values, type);

        AffinityPropagation ap = new AffinityPropagation(m, 5000, 300, 0.9, preference);

        return ap;
    }

    /**
     * Write out the distance matrix to the given BufferedWriter.
     * 
     * @param dm the DistanceMatrix.
     * @param bw the BufferedWriter to write distance to.
     *
     */
    public void writeDistanceMatrix(DistanceMatrix dm, BufferedWriter bw) throws IOException {

        double[][] mVals = dm.getClonedDistances();
        Matrix m = new Matrix(mVals);
        /** was when this was writeSimilarityMatrix
        if (distCalc == 1 || distCalc == 3) {
        m = m.times(-1.0);
        } else if (distCalc == 2 || distCalc == 4) {
        Matrix o = new Matrix(m.getRowDimension(), m.getColumnDimension(), 1.0);
        m = o.minus(m);
        }
        */
        try {
            m.write(bw);
            // Write the matrix to the file.
            /**
            bw.write(this.distCalc.toString() + "\n");
            for (int i = 0; i < m.getRowDimension(); i++) {
            for (int j = 0; j < m.getColumnDimension(); j++) {
                
                String s = String.format("%03d\t%03d\t%f\n", i + 1, j + 1, m.get(i, j));
                
                bw.write(s);
            }
            }
             */
            bw.close();

        } catch (Exception ex) {
            throw new IOException(ex.getMessage());
        }
    }

    /**
     * Gets the properties for SQL access.
     * <P>
     * This <B>DOES NOT</B> check any of the fields.
     * <P>
     * @see org.mitre.bio.ccv.sql.VectorSet for field descriptions.
     * @return java.util.Properties;
     */
    public Properties getSQLProperties() throws IOException {
        Properties props = new Properties();
        props.load(new FileInputStream("ccv.properties"));
        return props;
    }

    /**
     * Loads the 'ccv.properties' file and initalizes from the fields.
     * <P>
     * This <B>DOES NOT</B> check any of the jdbc fields, only ccv fields.
     * <P>
     * @return
     */
    public Properties loadProperties() throws IOException {
        Properties props = new Properties();

        props.load(Thread.currentThread().getContextClassLoader().getResourceAsStream("ccv.properties"));

        /** Load */
        return props;
    }

    /**
     * Not the best way, but one way to set logging without changing the properties file
     * 
     * @param level
     */
    public void setLoggingLevel(Level level) {
        this.logginLevel = level;
        // using log4j's Logger!
        Logger rootLogger = Logger.getRootLogger();
        LOG.info("Setting root logger to " + level.toString());
        rootLogger.setLevel(level);
    }

    @SuppressWarnings("static-access")
    public static void main(String[] argv) throws Exception {

        CompleteCompositionVectorMain ccvm = new CompleteCompositionVectorMain();

        /** What we are */
        String cli_title = APPLICATION_TITLE + " [build: " + APPLICATION_BUILD
                + "]\n(C) Copyright 2007-2010, by The MITRE Corporation.";

        /** create the Options */
        Options options = new Options();
        options.addOption(OptionBuilder.withArgName("file").hasArg(true)
                .withDescription("use given file for generating ccv").create("file"));
        options.addOption(OptionBuilder.withArgName("number").hasArg(true)
                .withDescription("number of top nmers to use in calculations").create("topNmers"));
        options.addOption(OptionBuilder.withArgName("begin").hasArg(true).withDescription("initial length of tile")
                .create("begin"));
        options.addOption(OptionBuilder.withArgName("end").hasArg(true).withDescription("ending length of title")
                .create("end"));
        options.addOption(OptionBuilder.withArgName("number").hasArg(true).withDescription(
                "what name to use: 1-name(default);2-description;3-full header(might break newick-style trees)")
                .create("name"));
        options.addOption(
                OptionBuilder
                        .withArgName("number").hasArg(true).withDescription("what distance calculation "
                                + "to use: 1-euclidian;2-cosine(default);" + "3-ESDistance;4-Jaccard")
                        .create("distance"));
        options.addOption(OptionBuilder.withArgName("upgma").hasArg(false)
                .withDescription("Generate UPGMA tree (default is neighor-joined)").create("upgma"));

        options.addOption(OptionBuilder.withArgName("file").hasArg(true)
                .withDescription("Do affinity propagation and write cluster values to given file name")
                .create("cluster"));
        options.addOption(OptionBuilder.withArgName("file").hasArg(true)
                .withDescription("File to write entropies to").create("entfile"));

        options.addOption(OptionBuilder.withArgName("file").hasArg(true)
                .withDescription("File to write distance matrix to").create("distfile"));

        options.addOption(OptionBuilder.hasArg(true).withArgName("file")
                .withDescription("Write tree in PhyloXML format to file").create("xml"));

        options.addOption(OptionBuilder.hasArg(true).withArgName("file")
                .withDescription("Write tree in nwk format to file").create("nwk"));

        options.addOption(OptionBuilder.hasArg(false).withDescription("Use an embedded (Derby) SQL server")
                .create("embeddedSQL"));

        options.addOption(OptionBuilder.hasArg(false)
                .withDescription("Use SQL server. Properties are defined in ccv.properties.").create("sql"));

        options.addOption(OptionBuilder.hasArg(true).withArgName("file")
                .withDescription("JSON file to write out nmers to").create("nmersfile"));

        options.addOption(OptionBuilder.hasArg(true).withArgName("file").withDescription(
                "JSON file to write out vectors to " + "(Overrides nmersout, only one file will be written).")
                .create("vectorsout"));

        options.addOption(OptionBuilder.hasArg(true).withArgName("file")
                .withDescription("JSON file to read in vectors from").create("vectorsin"));

        options.addOption(OptionBuilder.withArgName("help").hasArg(false).withDescription("Print this message")
                .create("help"));

        options.addOption(OptionBuilder.withArgName("number").hasArg(true)
                .withDescription("What preference to use: 0-min 1-median 2-avg(min,med): default is median")
                .create("prefval"));
        options.addOption(OptionBuilder.withArgName("level").hasArg(true).withDescription(
                "Set the logging (verbosity) level: OFF, FATAL, ERROR, WARN, INFO, DEBUG. TRACE, ALL (none to everything)")
                .create("verbosity"));

        // automatically generate the help statement
        HelpFormatter formatter = new HelpFormatter();

        // create the parser
        CommandLineParser parser = new GnuParser();
        try {
            // parse the command line arguments
            CommandLine line = parser.parse(options, argv);
            if (line.getOptions().length == 0 || line.hasOption("help")) {
                System.out.println(cli_title);
                formatter.printHelp("ccv [options] -file ", options);
                return;
            }
            if (line.hasOption("verbosity")) {
                // if this fails then they get DEBUG!
                ccvm.setLoggingLevel(Level.toLevel(line.getOptionValue("verbosity")));
            }
            if (line.hasOption("file")) {
                String fileName = line.getOptionValue("file");
                BufferedReader br = new BufferedReader(new FileReader(fileName));
                ccvm.seqIter = new FastaIterator(br);
            }
            if (line.hasOption("vectorsin")) {
                ccvm.vectorsInFile = line.getOptionValue("vectorsin");
            }
            if (line.hasOption("topNmers")) {
                try {
                    ccvm.topNMers = Integer.parseInt(line.getOptionValue("topNmers"));
                } catch (NumberFormatException nfe) {
                    System.err.println("Error parsing topNmers option. Reason: " + nfe.getMessage());
                    return;
                }
            }
            if (line.hasOption("begin")) {
                try {
                    ccvm.begin = Integer.parseInt(line.getOptionValue("begin"));
                } catch (NumberFormatException nfe) {
                    throw new ParseException("Error parsing 'begin' option. Reason: " + nfe.getMessage());
                }
            }
            if (line.hasOption("end")) {
                try {
                    ccvm.end = Integer.parseInt(line.getOptionValue("end"));
                } catch (NumberFormatException nfe) {
                    throw new ParseException("Error parsing 'end' option. Reason: " + nfe.getMessage());
                }
            }
            if (line.hasOption("name")) {
                try {
                    ccvm.seqNameParser = Integer.parseInt(line.getOptionValue("name"));
                } catch (NumberFormatException nfe) {
                    throw new ParseException("Error parsing 'name' option. Reason: " + nfe.getMessage());
                }
            }
            if (line.hasOption("distance")) {
                try {
                    ccvm.distCalc = Integer.parseInt(line.getOptionValue("distance"));
                } catch (NumberFormatException nfe) {
                    throw new ParseException("Error parsing 'distance' option. Reason: " + nfe.getMessage());
                }
            }
            if (line.hasOption("upgma")) {
                ccvm.upgma = true;
            }
            if (line.hasOption("cluster")) {
                ccvm.apClusterOutfile = line.getOptionValue("cluster");
            }
            if (line.hasOption("distfile")) {
                ccvm.matrixOutFile = line.getOptionValue("distfile");
            }
            if (line.hasOption("xml")) {
                ccvm.xmlOutFile = line.getOptionValue("xml");
            }
            if (line.hasOption("nwk")) {
                ccvm.nwkOutFile = line.getOptionValue("nwk");
            }
            if (line.hasOption("embeddedSQL")) {
                ccvm.embeddedSQL = true;
            }
            if (line.hasOption("sql")) {
                ccvm.useSQL = true;
            }
            if (line.hasOption("entfile")) {
                ccvm.entOutFile = line.getOptionValue("entfile");
            }

            /** 
             * This is done inside creatDistanceMatrix.
             * The order of nmers and vectors is important!
             */
            if (line.hasOption("nmersfile")) {
                ccvm.nmersOutFile = line.getOptionValue("nmersfile");
            }
            if (line.hasOption("vectorsout")) {
                ccvm.vectorsOutFile = line.getOptionValue("vectorsout");
                ccvm.nmersOutFile = null;
            }

            if (line.hasOption("prefval")) {
                try {
                    ccvm.prefVal = Integer.parseInt(line.getOptionValue("prefval"));
                } catch (NumberFormatException nfe) {
                    throw new ParseException("Error parsing 'prefval' option. Reason: " + nfe.getMessage());
                }
            }

        } catch (ParseException exp) {
            // oops, something went wrong
            System.out.println(cli_title);
            System.out.println("Invalid option!  Reason: " + exp.getMessage());
            formatter.printHelp("ccv [options] -file ", options);
            return;
        }

        /** Print out who we are */
        LOG.info(cli_title);

        VectorSet set = null;
        DistanceMatrix distMatrix = null;
        CompleteMatrix matrix = null;

        if (ccvm.vectorsInFile != null) {
            LOG.info("Reading in CompleteCompositionVectors from " + ccvm.vectorsInFile);
            // we only save the data not everything that is in the vectorSet
            BufferedReader br = new BufferedReader(new FileReader(ccvm.vectorsInFile));
            matrix = CompleteMatrix.readJsonCompleteMatrix(br);
            br.close();
            ccvm.begin = matrix.getBegin();
            ccvm.end = matrix.getEnd();
            LOG.info(String.format("Loaded in %d samples and %d nmers (features)", matrix.getNames().size(),
                    matrix.getNmers().size()));

            /** just make an empty set */
            set = ccvm.generateCompleteCompositionVectorSet();

            /**
             * Need to think about this - should be able to use the nmer list
             * directly in generating the complete matrix then merge the two
             */
            if (ccvm.seqIter != null) {
                LOG.warn("Unable to process new samples when given a JSON vector set!");
            }
        } else {
            LOG.info("Generating complete composition vector set...");
            set = ccvm.generateCompleteCompositionVectorSet();

            /** If we have a sequnece iterator (i.e. a fasta file) then process those samples */
            if (ccvm.seqIter != null) {
                while (ccvm.seqIter.hasNext()) {
                    Sequence s = null;
                    try {
                        s = ccvm.seqIter.next();
                    } catch (NoSuchElementException e) {
                        LOG.fatal("Iteration error in sequence file!", e);
                        return;
                    }
                    String seqString = s.seqString();
                    set.addSequence(parseSequenceName(s, ccvm.seqNameParser), seqString);
                }
            }
        }

        if (!ccvm.calculateMatrix()) {
            LOG.info("No other operations left so finished!");
            return;
        }

        if (matrix == null) {
            LOG.info("Generating complete matrix of nmers...");
            try {
                matrix = set.getFullMatrix(ccvm.topNMers, ccvm.entOutFile);
            } catch (OutOfMemoryError e) {
                LOG.fatal("\nOut of memory while getting full matrix!\n"
                        + "Try limiting top nmers, increasing heap space, or "
                        + "changing the number of windows (begin/end).", e);
                return;
            }
        } // we loaded it in
        /**
         * set.getFullMatrix(matrix.getNmers)
         * and merge the two
         */

        BufferedWriter bw; // Used in several places

        /** Write out vectors or nmers before going on */
        if (ccvm.vectorsOutFile != null && ccvm.vectorsOutFile.length() != 0) {
            LOG.info(String.format("Writing %d nmers and %d vectors to file %s\n", matrix.getNmers().size(),
                    matrix.getNames().size(), ccvm.vectorsOutFile));
            //matrix.writeJSONObject(matrix.writeJsonCompleteMatrix(), ccvm.vectorsOutFile);
            bw = new BufferedWriter(new FileWriter(ccvm.vectorsOutFile));
            matrix.writeJsonCompleteMatrix(bw);
            bw.close();
        } else if (ccvm.nmersOutFile != null && ccvm.nmersOutFile.length() != 0) {
            LOG.info(String.format("Writing %d nmers to file %s\n", matrix.getNmers().size(), ccvm.nmersOutFile));
            //matrix.writeJSONObject(matrix.writeJsonNmers(), ccvm.nmersOutFile);
            bw = new BufferedWriter(new FileWriter(ccvm.nmersOutFile));
            matrix.writeJsonNmers(bw);
            bw.close();
        }

        /** Build distance matrix */
        if (!ccvm.calculateDistances()) {
            LOG.info("Done generating vector matrix. No other operations left so finished!");
            return;
        }
        LOG.info("Generating distance matrix...");
        distMatrix = ccvm.createDistanceMatrix(set, matrix);

        if (ccvm.calculateTree() && set.getSampleNames().size() > 2) {
            LOG.info("Creating tree");

            Tree tree = ccvm.createTree(distMatrix);
            if (ccvm.nwkOutFile != null) {
                try {
                    bw = new BufferedWriter(new FileWriter(ccvm.nwkOutFile));
                    ccvm.writeNwkTree(tree, bw);
                    bw.close();
                } catch (Exception ioe) {
                    LOG.error(String.format("Error in writing tree to newick file '%s'!", ccvm.nwkOutFile), ioe);
                }
            }

            /** We can output both */
            if (ccvm.xmlOutFile != null) {
                try {
                    bw = new BufferedWriter(new FileWriter(ccvm.xmlOutFile));
                    ccvm.writePhyloXMLTree(tree, bw);
                    bw.close();
                } catch (IOException ioe) {
                    LOG.error(String.format("Error in writing tree to phyloXML file '%s'!", ccvm.xmlOutFile), ioe);
                }
            }
        } else if (ccvm.calculateTree() && set.getSampleNames().size() <= 2) {
            LOG.error("Cannot build a tree with less than 3 samples!");
        }

        if (ccvm.matrixOutFile != null) {
            LOG.info("Writing distance matrix to file.");
            try {
                ccvm.writeDistanceMatrix(distMatrix, new BufferedWriter(new FileWriter(ccvm.matrixOutFile)));
            } catch (IOException ioe) {
                LOG.error(String.format("Error in writing distances to file '%s'!", ccvm.matrixOutFile), ioe);
            }
        }

        /** 
         * Run AP Clustering 
         * 
         * Cluster file format: sample_name(String)<tab>cluster_id(Integer)
         */
        if (ccvm.apClusterOutfile != null) {
            LOG.info("Running affinity propagation clustering...");
            AffinityPropagation ap = ccvm.cluster(distMatrix, ccvm.prefVal);
            int[] clusters = ap.getClusters();
            try {
                bw = new BufferedWriter(new FileWriter(ccvm.apClusterOutfile));
                for (int c = 0; c < clusters.length; c++) {
                    //String id = distMatrix.getIdentifier(c).getName();
                    String id = distMatrix.getIdentifier(c);
                    bw.write(id + "\t" + Integer.toString(clusters[c]) + "\n");

                }
                bw.close();
            } catch (IOException ioe) {
                LOG.error(String.format("Error in writing clustering results to the file '%s'!",
                        ccvm.apClusterOutfile), ioe);
            }
        }
        LOG.info("Finished!");
    }
}