uk.ac.ebi.mdk.domain.matrix.AbstractReactionMatrix.java Source code

Java tutorial

Introduction

Here is the source code for uk.ac.ebi.mdk.domain.matrix.AbstractReactionMatrix.java

Source

/*
 * Copyright (c) 2013. EMBL, European Bioinformatics Institute
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

package uk.ac.ebi.mdk.domain.matrix;

import com.google.common.collect.BiMap;
import com.google.common.collect.HashBiMap;
import com.google.common.collect.HashMultimap;
import com.google.common.collect.Multimap;
import org.apache.commons.lang.mutable.MutableInt;
import org.apache.log4j.Logger;

import java.io.FileWriter;
import java.io.IOException;
import java.io.PrintStream;
import java.io.Writer;
import java.lang.reflect.Array;
import java.security.InvalidParameterException;
import java.util.*;
import java.util.Map.Entry;

/**
 * Generic reaction matrix class
 *
 * @author johnmay
 * @param <T> Type of the matrix (Integer,Float,String)
 * @param <M> The type for the compound names (uses equals() method) this allows
 * to have the metabolites stored as InChIs,InChIKeys,IMolecules etc..
 * @param <R> The type of the reactions
 */
public abstract class AbstractReactionMatrix<T, M, R> implements ReactionMatrix<T, M, R> {

    private static final Logger LOGGER = Logger.getLogger(AbstractReactionMatrix.class);

    /**
     * Initial size of the matrix (molecules)
     */
    public static final int INTIAL_MOLECULE_CAPACITY = 10;

    /**
     * Initial size of the matrix (reactions)
     */
    public static final int INTIAL_REACTION_CAPACITY = 10;

    /**
     * Map for molecule look-up
     */
    private LinkedHashMap<M, Integer> moleculeMap;

    /**
     * Multimap of reactions to matrix row index
     */
    private Multimap<R, Integer> reactionMap;

    /**
     * Reactions define the row indices
     */
    protected M[] molecules;

    /**
     * Molecules define the column indices
     */
    private R[] reactions;

    /**
     * Value storage
     */
    private T[][] matrix;

    /**
     * Current molecule capacity
     */
    protected int moleculeCapacity;

    /**
     * Current reaction capacity
     */
    protected int reactionCapacity;

    /**
     * Current molecule count
     */
    public int moleculeCount = 0;

    /**
     * Current reaction count
     */
    public int reactionCount = 0;

    private int nonNullCount = 0;

    /**
     * Create a reaction matrix with default capacities
     */
    protected AbstractReactionMatrix() {
        this(INTIAL_MOLECULE_CAPACITY, INTIAL_REACTION_CAPACITY);
    }

    /**
     * Specifies a new stoichiometric matrix with specified initial capacities,
     * this can be used if the final or expected size of the final matrix is
     * known. Specifying the capacity here reduces resize penalty
     *
     * @param n Initial capacity of molecules
     * @param m Initial capacity of reactions
     */
    protected AbstractReactionMatrix(int n, int m) {

        // set the max capacities
        moleculeCapacity = n;
        reactionCapacity = m;

        this.moleculeMap = new LinkedHashMap<M, Integer>(10);
        this.reactionMap = HashMultimap.create(m, 1);

    }

    public AbstractReactionMatrix<T, M, R> init() {
        init(getMoleculeClass(), getReactionClass(), getValueClass());
        return this;
    }

    /**
     *
     * Initializes reflective data structures
     *
     * @param moleculeClass
     * @param reactionClass
     * @param typeClass
     *
     */
    public void init(Class<? extends M> moleculeClass, Class<? extends R> reactionClass,
            Class<? extends T> typeClass) {

        // we store this back to front as there are more reactions then molecules so less resizing this way
        matrix = (T[][]) Array.newInstance(typeClass, moleculeCapacity, reactionCapacity);

        this.molecules = (M[]) Array.newInstance(moleculeClass, moleculeCapacity);
        this.reactions = (R[]) Array.newInstance(reactionClass, reactionCapacity);

    }

    public abstract Class<? extends M> getMoleculeClass();

    public abstract Class<? extends R> getReactionClass();

    public abstract Class<? extends T> getValueClass();

    public boolean setMolecule(int i, M m) {
        if (i < molecules.length) {
            molecules[i] = m;
            if (i >= moleculeCount) {
                moleculeCount = i + 1;
            }
        }
        return false;
    }

    public boolean setReaction(int j, R m) {
        if (j < reactions.length) {
            reactions[j] = m;
            if (j >= reactionCount) {
                reactionCount = j + 1;
            }
        }
        return false;
    }

    public boolean setValue(int i, int j, T value) {
        if (i < matrix.length && j < matrix[i].length && value != null) {
            nonNullCount += matrix[i][j] == null ? 1 : 0;
            matrix[i][j] = value;
        }
        return false;
    }

    /**
     * Add a reaction to the matrix
     *
     *
     * @param reaction
     * @param newMolecules
     * @param values
     * @return the index of the reaction
     */
    @Override
    public int addReaction(R reaction, M[] newMolecules, T[] values) {

        //        if (matrix == null) {  // don't want to check this every time
        //            init();
        //        }

        // ensure there is enough space
        ensure(moleculeCount + newMolecules.length, reactionCount + 1);

        // add new molecules to hash if there are any new one
        boolean intersect = false;
        for (M m : newMolecules) {
            intersect = !ensure(m) || intersect;
        }

        // add the reaction to the fixed matrix
        if (intersect) {
            // no new molecules, check for clash
            Map<Integer, MutableInt> candidateReactions = new HashMap<Integer, MutableInt>(); // reuse
            for (int i = 0; i < newMolecules.length; i++) {
                for (Entry<Integer, T> e : getReactions(newMolecules[i]).entrySet()) {
                    if (values[i].equals(e.getValue())) {
                        if (!candidateReactions.containsKey(e.getKey())) {
                            candidateReactions.put(e.getKey(), new MutableInt());
                        }
                        candidateReactions.get(e.getKey()).increment();
                    }
                }
            }
            if (!candidateReactions.isEmpty()) {
                for (Entry<Integer, MutableInt> e : candidateReactions.entrySet()) {
                    if (e.getValue().intValue() == values.length
                            && getReactionValues(e.getKey()).length == values.length) {
                        LOGGER.debug("Duplicate reaction: " + reactions[e.getKey()] + " and " + reaction);
                        return e.getKey();
                    }
                }
            }
        }

        // set the values to the matrix
        for (int i = 0; i < newMolecules.length; i++) {
            nonNullCount += values[i] != null ? 1 : 0;
            matrix[moleculeMap.get(newMolecules[i])][reactionCount] = values[i];
        }

        // index the reaction
        reactions[reactionCount] = reaction;
        reactionMap.put(reaction, reactionCount);
        reactionCount++;

        return reactionCount - 1;

    }

    /**
     *
     * Ensures the matrix has the desired capcity to store the specified n
     * (molecules) and m (reactions). If not the the matrix is resized
     * accordingly.
     *
     * @param n number of molecules
     * @param m number of reactions
     *
     */
    public void ensure(int n, int m) {

        boolean resized = false;
        if (n >= moleculeCapacity) {
            moleculeCapacity = 1 + (Math.max(n, moleculeCapacity) * 2);
            molecules = Arrays.copyOf(molecules, moleculeCapacity);
            matrix = Arrays.copyOf(matrix, moleculeCapacity);
            resized = true;
        }
        if (resized || m >= reactionCapacity) {
            LOGGER.debug("Expanding capacity...");
            reactionCapacity = 1 + Math.max(m, reactionCapacity) * 2;
            reactions = Arrays.copyOf(reactions, reactionCapacity);
            // null fill new metabolite rows
            for (int i = 0; i < matrix.length; i++) {
                matrix[i] = matrix[i] == null ? (T[]) Array.newInstance(getValueClass(), reactionCapacity)
                        : Arrays.copyOf(matrix[i], reactionCapacity);
            }
            LOGGER.debug("...new capacity:" + moleculeCapacity + "," + reactionCapacity);

        }
    }

    /**
     *
     * Ensures the molecule is present, if the molecule is present then no
     * structural changes are made. If the molecule is not found then the
     * molecule is added to the index and the molecule count increased
     *
     * @param molecule A new molecule
     *
     * @return if the structure was modified
     *
     */
    private boolean ensure(M molecule) {
        if (!moleculeMap.containsKey(molecule)) {
            molecules[moleculeCount] = molecule;
            moleculeMap.put(molecule, moleculeCount);
            moleculeCount++;
            return true;
        }
        return false;
    }

    /**
     * Writes the matrix on System.out PrintStream
     */
    public void display() {
        display(System.out, ' ');
    }

    /**
     * Writes the matrix on a specified PrintStream
     *
     * @param stream Stream to write to
     */
    public void display(PrintStream stream) {
        display(stream, ' ');
    }

    /**
     * Writes the matrix to a specified PrintStream with the specified separator
     *
     * @param stream
     * @param separator
     */
    public void display(PrintStream stream, char separator) {
        display(stream, separator, " ", 4, 4);
    }

    /**
     * Displays the matrix to the desired PrintStream, Seperator, Value for
     * null, Ordering and Molecule/Reaction trim length
     *
     * @param stream
     * @param seperator
     * @param empty The value to replace null values with
     * @param molNameLength Trim molecule names to this length
     * @param rxnNameLength Trim reaction names to this length
     */
    public void display(PrintStream stream, char seperator, String empty, int molNameLength, int rxnNameLength) {

        // top-left corner
        stream.printf("%" + molNameLength + "s", "");

        // write reactions
        String format = seperator + " %" + rxnNameLength + "s";
        for (int i = 0; i < reactionCount; i++) {
            stream.printf(format, reactions[i]);
        }
        stream.println();

        String molNameFormat = "%" + molNameLength + "s";
        String valueFormat = seperator + " %" + rxnNameLength + "s";

        for (int i = 0; i < moleculeCount; i++) {

            // write molecule name...
            stream.printf(molNameFormat, molecules[i]);

            // ...and values
            for (int j = 0; j < reactionCount; j++) {
                T value = matrix[i][j];
                stream.printf(valueFormat, (value == null ? empty : value).toString());
            }
            stream.println();

        }

    }

    /**
     *
     * Returns a fixed size 2D array of the values. The matrix is trimmed to the
     * correct size
     *
     * @return reaction matrix
     *
     */
    public T[][] getFixedMatrix() {
        // truncate the reactions (removes null paddings)
        T[][] outputMatrix = Arrays.copyOf(matrix, moleculeCount);
        for (int i = 0; i < outputMatrix.length; i++) {
            outputMatrix[i] = Arrays.copyOf(outputMatrix[i], reactionCount);
        }
        return outputMatrix;
    }

    /**
     * Access to a value at a given index
     *
     * @param i
     * @param j
     * @return The value at the requested location
     */
    public T get(int i, int j) {
        return matrix[i][j];
    }

    /**
     *
     * Assigns the values from the specified 'other' matrix to this one. Note:
     * the values are not deep copied
     *
     * @return reaction indices (of those added)
     *
     */
    public BiMap<Integer, Integer> assign(AbstractReactionMatrix<T, M, R> other) {

        // ensure there is enough space
        this.ensure(getMoleculeCount() + other.getReactionCount(), getReactionCount() + other.getReactionCount());

        BiMap<Integer, Integer> map = HashBiMap.create();

        for (int j = 0; j < other.getReactionCount(); j++) {
            map.put(j, this.addReaction(other.getReaction(j), other.getReactionMolecules(j),
                    other.getReactionValues(j)));
        }

        return map;

    }

    public List<T> getValuesForMolecule(int i) {
        return Arrays.asList(getRow(i));
    }

    public int getNonNullCount() {
        return nonNullCount;
    }

    /**
     * Removes the row at index i
     */
    public void removeRow(int i) {
        for (int j = 0; j < reactionCount; j++) {
            if (matrix[i][j] != null) {
                nonNullCount--;
            }
        }
        System.arraycopy(matrix, i + 1, matrix, i, matrix.length - i - 1);
        moleculeCount--;
        System.arraycopy(molecules, i + 1, molecules, i, molecules.length - i - 1);
        buildMoleculeMap();
    }

    public void removeColumn(int j) {
        for (int i = 0; i < matrix.length; i++) {
            if (matrix[i][j] != null) {
                nonNullCount--;
            }
            System.arraycopy(matrix[i], j + 1, matrix[i], j, matrix[i].length - j - 1);
        }
        reactionCount--;
        System.arraycopy(reactions, j + 1, reactions, j, reactions.length - j - 1);
        buildReactionMap();
    }

    /**
     * Rebuilds the molecule map (note this also updates the internal map)
     *
     * @return
     */
    private Map<M, Integer> buildMoleculeMap() {
        moleculeMap.clear();
        for (int i = 0; i < moleculeCount; i++) {
            moleculeMap.put(molecules[i], i);
        }
        return moleculeMap;
    }

    private Multimap<R, Integer> buildReactionMap() {
        reactionMap.clear();
        for (int j = 0; j < reactionCount; j++) {
            reactionMap.put(reactions[j], j);
        }
        return reactionMap;
    }

    /**
     * Access for all stoichiometric values for molecules
     *
     * @param i The row to retrieve
     */
    public T[] getRow(int i) {
        return Arrays.copyOf(matrix[i], reactionCount);
    }

    public T[] getRow(M m) {
        if (!moleculeMap.containsKey(m)) {
            throw new InvalidParameterException("Molecule not found");
        }
        return getRow(this.moleculeMap.get(m));
    }

    public Integer[] getColumnIndicies(R r) {
        return reactionMap.get(r).toArray(new Integer[0]);
    }

    /**
     * Access to the values for a single reaction
     */
    public T[] getColumn(int j) {
        T[] copy = (T[]) new Object[moleculeCount];
        for (int i = 0; i < moleculeCount; i++) {
            copy[i] = matrix[i][j];
        }
        return copy;
    }

    public Map<Integer, T[]> getColumns(R r) {
        Map<Integer, T[]> columns = new HashMap<Integer, T[]>();
        for (Integer j : getColumnIndicies(r)) {
            columns.put(j, getColumn(j));
        }
        return columns;
    }

    /**
     * Counts the number of null objects in each column
     *
     * @return Array of the null count for each column
     */
    public Integer[] countColumnNulls() {
        Integer[] nulls = new Integer[moleculeCount];
        for (int i = 0; i < moleculeCount; i++) {
            nulls[i] = 0;
            for (int j = 0; j < reactionCount; j++) {
                nulls[i] += matrix[i][j] == null ? 1 : 0;
            }
        }
        return nulls;
    }

    /**
     * Returns a fixed size array of the molecules
     *
     * @return
     */
    @Override
    public M[] getMolecules() {
        return Arrays.copyOf(molecules, moleculeCount);
    }

    /**
     * Returns a list of the reactions
     *
     * @return
     */
    @Override
    public R[] getReactions() {
        return Arrays.copyOf(reactions, reactionCount);
    }

    public int getMoleculeCount() {
        return moleculeMap.size();
    }

    public int getReactionCount() {
        return reactionCount;
    }

    /**
     * Returns the molecule for a given position
     *
     * @param i
     * @return
     */
    @Override
    public M getMolecule(Integer i) {
        return molecules[i];
    }

    public Integer getIndex(M molecule) {
        return moleculeMap.get(molecule);
    }

    /**
     * Returns the reaction for a given position
     *
     * @param j
     * @return
     */
    @Override
    public R getReaction(Integer j) {
        return reactions[j];
    }

    public T[] getReactionValues(Integer j) {
        List<T> values = new ArrayList<T>();
        for (int i = 0; i < moleculeCount; i++) {
            if (matrix[i][j] != null) {
                values.add(matrix[i][j]);
            }
        }
        return values.toArray((T[]) Array.newInstance(getValueClass(), values.size()));
    }

    public M[] getReactionMolecules(Integer j) {
        List<M> values = new ArrayList<M>();
        for (int i = 0; i < moleculeCount; i++) {
            if (matrix[i][j] != null) {
                values.add(molecules[i]);
            }
        }
        return values.toArray((M[]) Array.newInstance(getMoleculeClass(), values.size()));
    }

    public boolean contains(M molecule) {
        return moleculeMap.containsKey(molecule);
    }

    /**
     * Access the reactions for a specific molecule
     *
     * @param molecule
     * @return Map of the reaction to the value
     */
    @Override
    public Map<Integer, T> getReactions(M molecule) {
        Integer i = moleculeMap.get(molecule);
        HashMap<Integer, T> subReactions = new HashMap<Integer, T>();
        for (int j = 0; j < reactionCount; j++) {
            if (matrix[i][j] != null) {
                subReactions.put(j, matrix[i][j]);
            }
        }
        return subReactions;
    }

    /**
     * Returns a HashSet of molecules that occur more then the provided
     * threshold
     *
     * @param threshold The value above which a molecule is considered highly
     * connected (0 returns an empty set)
     * @return Set of the high connected Molecules
     */
    @Override
    public Set<M> getHighlyConnectedMolecules(int threshold) {

        Integer[] numberOfNulls = countColumnNulls();
        Set<M> highlyConnected = new HashSet<M>();

        // return empty set if less then zero is specified
        if (threshold == 0) {
            return highlyConnected;
        }

        for (int i = 0; i < numberOfNulls.length; i++) {
            Integer integer = numberOfNulls[i];
            if ((reactionCount - integer) > threshold) {
                highlyConnected.add(getMolecule(i));
            }
        }

        return highlyConnected;

    }

    /**
     * Writes the reaction matrix to a text file that CytoScape can read and
     * build a connection file (for cytoscape) with all edge values set as "rc"
     * and a threshold at -0meaning no highly connected molecules are removed
     *
     * @param fw file writer to write the text file to
     * @throws java.io.IOException
     */
    public void toTextFile(FileWriter fw) throws IOException {
        toTextFile(fw, 0);
    }

    /**
     * * MOVE ME TO IO ****
     */
    /**
     * Writes the reaction matrix to a text file that CytoScape can read and
     * build a connection file (for cytoscape) with threshold of highly
     * connected molecule specified and all edge values set as "rc"
     *
     * @param fw file writer to write the text file to
     * @param connectivityThreshhold the value at which to consider components
     * highly connected and exempt from the file (-1 = do not remove)
     * @throws java.io.IOException
     */
    public Map<String, M> toTextFile(FileWriter fw, int connectivityThreshhold) throws IOException {
        return toSIF(fw, connectivityThreshhold, new ArrayList(), true);
    }

    /**
     * Writes the reaction matrix to a text file that CytoScape can read and
     * build a connection file (for Cytoscape) with threshold of highly
     * connected molecule specified and edge values specified
     *
     * If split is true the name of the highly connected molecule is prefixed
     * with a ticker integer and added the returned HashMap.
     *
     * @param writer writer to write the text format too
     * @param connectivityThreshhold the value at which to consider components
     * highly connected and exempt from the file (0 = do not remove)
     * @param edgeValues A list of edge values to use for the reactions this
     * allows you to provide a connection type i.e. directionality. The toStirng
     * method is used to print the value. Providing an empty list invokes
     * default behavior
     * @throws java.io.IOException
     */
    public Map<String, M> toSIF(Writer writer, int connectivityThreshhold, List edgeValues, boolean split)
            throws IOException {

        Map<String, M> highlyConnectedMap = new HashMap<String, M>();

        if (edgeValues != null && edgeValues.size() != getReactionCount() && !edgeValues.isEmpty()) {
            LOGGER.error("number of edge values does not equal the number reactions");
            return highlyConnectedMap;
        }

        Set<M> highlyConnected = getHighlyConnectedMolecules(connectivityThreshhold);
        M[] workingMolecules = getMolecules();

        throw new UnsupportedOperationException("FIXME");

        //        for (M molecule : workingMolecules) {
        //            if (highlyConnected.contains(molecule)) {
        //                if (split) {
        //                    Map<R, T> reactionMap = getReactions(molecule);
        //                    for (R r : reactionMap.keySet()) {
        //                        Object edge = edgeValues == null || edgeValues.isEmpty() ? matrix[molecules.get(molecule)][reactionsOld.get(r)] : edgeValues.get(reactionsOld.get(r));
        //                        String newName = ticker++ + "-" + molecule.toString();
        //                        writer.write(r + "\t" + edge.toString() + "\t" + newName + "\n");
        //                        highlyConnectedMap.put(newName, molecule);
        //                    }
        //                } else {
        //                    logger.debug("ignoring highly connected: " + highlyConnected);
        //                }
        //            } else {
        //                Map<R, T> reactionMap = getReactions(molecule);
        //                for (R r : reactionMap.keySet()) {
        //                    Object edge = edgeValues == null || edgeValues.isEmpty() ? matrix[molecules.get(molecule)][reactionsOld.get(r)] : edgeValues.get(reactionsOld.get(r));
        //                    writer.write(r + "\t" + edge.toString() + "\t" + molecule + "\n");
        //                }
        //            }
        //        }
        //       return highlyConnectedMap;
    }
}