us.levk.math.linear.HugeRealMatrix.java Source code

Java tutorial

Introduction

Here is the source code for us.levk.math.linear.HugeRealMatrix.java

Source

/**
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */
package us.levk.math.linear;

import static java.lang.Math.min;
import static java.nio.channels.FileChannel.MapMode.READ_WRITE;
import static us.levk.math.linear.Constants.LINEAR_MATH;
import static us.levk.math.linear.Constants.TEMP_FOLDER_KEY;

import java.io.Closeable;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.PushbackInputStream;
import java.io.RandomAccessFile;
import java.nio.MappedByteBuffer;
import java.text.NumberFormat;
import java.text.ParseException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import java.util.UUID;
import java.util.concurrent.locks.ReadWriteLock;
import java.util.concurrent.locks.ReentrantReadWriteLock;

import lombok.Getter;
import lombok.SneakyThrows;

import org.apache.commons.math3.exception.NotStrictlyPositiveException;
import org.apache.commons.math3.exception.OutOfRangeException;
import org.apache.commons.math3.linear.AbstractRealMatrix;
import org.apache.commons.math3.linear.RealMatrix;

/**
 * Real matrix backed by RandomAccessFile storage.
 * <p>
 * This will create temporary files for storage of the matrix in folder defined
 * by system property us.levk.math.linear.tmpdir, failing to find that will
 * default to java.io.tmpdir system property. Files will be named after
 * UUID.randomUUID() with an extension of ".matrix".
 * 
 * @author levk
 * 
 */
public class HugeRealMatrix extends AbstractRealMatrix implements Closeable {

    private static final int MAPPING_SIZE = 1 << 30;
    private static final int MAX_CELLS_IN_TO_STRING = 100;

    private final String name;
    private final RandomAccessFile file;
    private @Getter final int columnDimension;
    private @Getter int rowDimension;
    private final List<MappedByteBuffer> mappings = new ArrayList<>();
    private boolean closed = false;
    private final ReadWriteLock lock = new ReentrantReadWriteLock(); // For
                                                                     // closing

    /**
     * Construct an empty matrix
     * 
     * @param width
     * @param height
     * @throws IOException
     * @throws NotStrictlyPositiveException
     */
    public HugeRealMatrix(int width, int height) throws IOException, NotStrictlyPositiveException {
        if (width < 1)
            throw new NotStrictlyPositiveException(width);
        else if (height < 1)
            throw new NotStrictlyPositiveException(height);
        try (RandomAccessFile file = new RandomAccessFile(name = randomFileName(), "rw")) {
            this.file = file;
            this.columnDimension = width;
            this.rowDimension = height;
            long size = 8L * height * width;
            for (long offset = 0; offset < size; offset += MAPPING_SIZE)
                mappings.add(file.getChannel().map(READ_WRITE, offset, min(size - offset, MAPPING_SIZE)));
        }
    }

    /**
     * Parse a matrix from a stream
     * 
     * @param stream
     * @param columnDelimiters
     * @param rowDelimiters
     * @param parser
     * @throws IOException
     * @throws ParseException
     */
    public HugeRealMatrix(InputStream stream, char[] columnDelimiters, char[] rowDelimiters, NumberFormat parser)
            throws IOException, ParseException {
        StringBuilder word = new StringBuilder();
        Set<Integer> columnDelims = new HashSet<>();
        Set<Integer> rowDelims = new HashSet<>();
        for (char b : columnDelimiters)
            columnDelims.add((int) b);
        for (char b : rowDelimiters)
            rowDelims.add((int) b);
        String name = randomFileName();
        try (RandomAccessFile file = new RandomAccessFile(name, "rw")) {
            int columns = 0;
            int rows = 0;
            int prevColumns = -1;
            PushbackInputStream in = new PushbackInputStream(stream, 1024);
            startMatrixParse(in, columnDelims, rowDelims);
            boolean newRow = true;
            for (int c;;) {
                if (newRow) {
                    startRowParse(in, rows, columnDelims, parser);
                    newRow = false;
                }
                c = in.read();
                if (columnDelims.contains(c) || rowDelims.contains(c) || c < 0) {
                    try {
                        double value = parser.parse(word.toString().trim()).doubleValue();
                        file.writeDouble(value);
                    } catch (ParseException e) {
                        if (columns == 0)
                            break;
                        else
                            throw e;
                    }
                    columns++;
                    word = new StringBuilder();
                    if (rowDelims.contains(c) || c < 0) {
                        rows++;
                        if (columns != prevColumns && prevColumns >= 0)
                            throw new IOException("Jagged row detected, previous row " + prevColumns
                                    + " columns, current row " + columns + " columns");
                        prevColumns = columns;
                        columns = 0;
                        if (c < 0)
                            break;
                        else
                            newRow = true;
                    }
                } else
                    word.append((char) c);
            }
            endMatrixParse(in, prevColumns, rows);
            this.name = name;
            this.file = file;
            this.columnDimension = prevColumns;
            this.rowDimension = rows;
            long size = 8L * rowDimension * columnDimension;
            for (long offset = 0; offset < size; offset += MAPPING_SIZE)
                mappings.add(file.getChannel().map(READ_WRITE, offset, min(size - offset, MAPPING_SIZE)));
        } catch (IOException | ParseException | RuntimeException | Error e) {
            new File(name).delete();
            throw e;
        }
    }

    /**
     * Called on by the parsing constructor before anything is read from the
     * stream
     * 
     * @param in
     * @param columnDelimiters
     * @param rowDelimiters
     * @throws IOException
     */
    protected void startMatrixParse(PushbackInputStream in, Set<Integer> columnDelimiters,
            Set<Integer> rowDelimiters) throws IOException {
    }

    /**
     * Called before a new row is parsed, this is also called before the first
     * row, immediately after startMatrixParse()
     * 
     * @param in
     * @param row
     * @param columnDelimiters
     * @param parser
     * @throws IOException
     */
    protected void startRowParse(PushbackInputStream in, int row, Set<Integer> columnDelimiters,
            NumberFormat parser) throws IOException {
    }

    /**
     * Called after parsing the last row of the matrix
     * 
     * @param in
     * @param width
     * @param height
     * @throws IOException
     */
    protected void endMatrixParse(PushbackInputStream in, int width, int height) throws IOException {
    }

    /**
     * Creates a matrix from an iterator of values
     * 
     * @param values
     * @param columns
     * @throws IOException
     */
    public HugeRealMatrix(Iterator<Double> values, int columns) throws IOException {
        String name = randomFileName();
        try (RandomAccessFile file = new RandomAccessFile(name, "rw")) {
            long count = 0;
            for (; values.hasNext(); count++)
                file.writeDouble(values.next());
            if (count % columns != 0)
                throw new IOException("Uneven column length, total values = " + count + " column length = "
                        + columns + " remainder = " + (count % columns));
            this.name = name;
            this.file = file;
            this.columnDimension = columns;
            this.rowDimension = (int) (count / columns);
            long size = 8L * rowDimension * columnDimension;
            for (long offset = 0; offset < size; offset += MAPPING_SIZE)
                mappings.add(file.getChannel().map(READ_WRITE, offset, min(size - offset, MAPPING_SIZE)));
        } catch (IOException | RuntimeException | Error e) {
            new File(name).delete();
            throw e;
        }
    }

    /* (non-Javadoc)
     * @see java.io.Closeable#close() */
    @Override
    public void close() throws IOException {
        try {
            lock.writeLock().lock();
            if (closed)
                return;
            closed = true;
            mappings.clear(); // Let gc take care of it
            file.close();
            new File(name).delete();
        } finally {
            lock.writeLock().unlock();
        }
    }

    /* (non-Javadoc)
     * @see java.lang.Object#finalize() */
    @Override
    protected void finalize() throws Throwable {
        try {
            close();
        } catch (IllegalStateException e) {
        } finally {
            super.finalize();
        }
    }

    /* (non-Javadoc)
     * @see org.apache.commons.math3.linear.AbstractRealMatrix#createMatrix(int,
     * int) */
    @SneakyThrows(IOException.class /* I guess it's cleaner than wrapping,
                                     * getting an IOException is a severe
                                     * condition anyway */)
    @Override
    public HugeRealMatrix createMatrix(int rowDimension, int columnDimension) throws NotStrictlyPositiveException {
        return new HugeRealMatrix(columnDimension, rowDimension);
    }

    /* (non-Javadoc)
     * @see org.apache.commons.math3.linear.AbstractRealMatrix#copy() */
    @Override
    public RealMatrix copy() {
        HugeRealMatrix result = createMatrix(rowDimension, columnDimension);
        for (int row = rowDimension; --row >= 0;)
            for (int column = columnDimension; --column >= 0;) {
                double value = getEntry(row, column);
                if (Double.compare(value, 0.0) != 0)
                    result.setEntry(row, column, value);
            }
        return result;
    }

    /* (non-Javadoc)
     * @see org.apache.commons.math3.linear.AbstractRealMatrix#getEntry(int, int) */
    @Override
    public double getEntry(int row, int column) throws OutOfRangeException {
        try {
            lock.readLock().lock();
            if (closed)
                throw new IllegalStateException("Already closed");
            long position = position(column, row);
            return mappings.get((int) (position / MAPPING_SIZE)).getDouble((int) (position % MAPPING_SIZE));
        } finally {
            lock.readLock().unlock();
        }
    }

    /* (non-Javadoc)
     * @see org.apache.commons.math3.linear.AbstractRealMatrix#setEntry(int, int,
     * double) */
    @Override
    public void setEntry(int row, int column, double value) throws OutOfRangeException {
        try {
            lock.readLock().lock();
            if (closed)
                throw new IllegalStateException("Already closed");
            long position = position(column, row);
            mappings.get((int) (position / MAPPING_SIZE)).putDouble((int) (position % MAPPING_SIZE), value);
        } finally {
            lock.readLock().unlock();
        }
    }

    /* (non-Javadoc)
     * @see org.apache.commons.math3.linear.AbstractRealMatrix#toString() */
    @Override
    public String toString() {
        long num = new File(name).length();
        String[] suffix = new String[] { " Bytes", " KB", " MB", " GB", " TB" };
        int index = 0;
        for (; num > 1024 && index < suffix.length; num /= 1024, index++)
            ;
        String size = num + suffix[index];
        return rowDimension * columnDimension > MAX_CELLS_IN_TO_STRING ? getClass().getSimpleName() + "("
                + columnDimension + " column" + (columnDimension == 1 ? "" : "s") + ", " + rowDimension + " row"
                + (rowDimension == 1 ? "" : "s") + ", " + size + " table file)" : super.toString();
    }

    /**
     * Returns position of the cell contents in the backing file
     * 
     * @param x
     * @param y
     * @return
     * @throws OutOfRangeException
     * @throws IllegalStateException
     */
    private long position(int x, int y) throws OutOfRangeException, IllegalStateException {
        checkRow(y);
        checkColumn(x);
        return (((long) y) * columnDimension + x) * 8;
    }

    /**
     * Checks if the requested row is inbounds
     * 
     * @param row
     * @throws OutOfRangeException
     */
    protected final void checkRow(int row) throws OutOfRangeException {
        checkRange(row, 0, rowDimension);
    }

    /**
     * Checks if the requested column is inbounds
     * 
     * @param column
     * @throws OutOfRangeException
     */
    protected final void checkColumn(int column) throws OutOfRangeException {
        checkRange(column, 0, columnDimension);
    }

    /**
     * Checks if the passed number is within the allowed range
     * 
     * @param num
     * @param lo
     * @param hi
     * @throws OutOfRangeException
     */
    private static void checkRange(int num, int lo, int hi) throws OutOfRangeException {
        if (num < lo || num > hi)
            throw new OutOfRangeException(num, lo, hi);
    }

    /**
     * Generates a unique file name
     * 
     * @return
     */
    private static String randomFileName() {
        return LINEAR_MATH.getProperty(TEMP_FOLDER_KEY) + UUID.randomUUID() + ".matrix";
    }
}