com.ibm.bi.dml.runtime.io.MatrixReader.java Source code

Java tutorial

Introduction

Here is the source code for com.ibm.bi.dml.runtime.io.MatrixReader.java

Source

/**
 * (C) Copyright IBM Corp. 2010, 2015
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 * 
*/

package com.ibm.bi.dml.runtime.io;

import java.io.EOFException;
import java.io.IOException;
import java.util.LinkedList;

import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;

import com.ibm.bi.dml.hops.OptimizerUtils;
import com.ibm.bi.dml.runtime.DMLRuntimeException;
import com.ibm.bi.dml.runtime.matrix.data.MatrixBlock;
import com.ibm.bi.dml.runtime.util.MapReduceTool;

/**
 * Base class for all format-specific matrix readers. Every reader is required to implement the basic 
 * read functionality but might provide additional custom functionality. Any non-default parameters
 * (e.g., CSV read properties) should be passed into custom constructors. There is also a factory
 * for creating format-specific readers. 
 * 
 */
public abstract class MatrixReader {
    //internal configuration
    protected static final boolean AGGREGATE_BLOCK_NNZ = true;

    /**
     * 
     * @param fname
     * @param rlen
     * @param clen
     * @param brlen
     * @param bclen
     * @param expNnz
     * @return
     */
    public abstract MatrixBlock readMatrixFromHDFS(String fname, long rlen, long clen, int brlen, int bclen,
            long estnnz) throws IOException, DMLRuntimeException;

    /**
     * 
     * @param file
     * @return
     * @throws IOException
     */
    public static Path[] getSequenceFilePaths(FileSystem fs, Path file) throws IOException {
        Path[] ret = null;

        if (fs.isDirectory(file)) {
            LinkedList<Path> tmp = new LinkedList<Path>();
            FileStatus[] dStatus = fs.listStatus(file);
            for (FileStatus fdStatus : dStatus)
                if (!fdStatus.getPath().getName().startsWith("_")) //skip internal files
                    tmp.add(fdStatus.getPath());
            ret = tmp.toArray(new Path[0]);
        } else {
            ret = new Path[] { file };
        }

        return ret;
    }

    /**
     * NOTE: mallocDense controls if the output matrix blocks is fully allocated, this can be redundant
     * if binary block read and single block. 
     * 
     * @param rlen
     * @param clen
     * @param estnnz
     * @param mallocDense
     * @return
     * @throws DMLRuntimeException 
     * @throws IOException 
     */
    protected static MatrixBlock createOutputMatrixBlock(long rlen, long clen, long estnnz, boolean mallocDense,
            boolean mallocSparse) throws IOException, DMLRuntimeException {
        //check input dimension
        if (!OptimizerUtils.isValidCPDimensions(rlen, clen))
            throw new DMLRuntimeException("Matrix dimensions too large for CP runtime: " + rlen + " x " + clen);

        //determine target representation (sparse/dense)
        boolean sparse = MatrixBlock.evalSparseFormatInMemory(rlen, clen, estnnz);

        //prepare result matrix block
        MatrixBlock ret = new MatrixBlock((int) rlen, (int) clen, sparse, estnnz);
        if (!sparse && mallocDense)
            ret.allocateDenseBlockUnsafe((int) rlen, (int) clen);
        else if (sparse && mallocSparse)
            ret.allocateSparseRowsBlock();

        return ret;
    }

    /**
     * 
     * @param fs
     * @param path
     * @throws IOException 
     */
    protected static void checkValidInputFile(FileSystem fs, Path path) throws IOException {
        //check non-existing file
        if (!fs.exists(path))
            throw new IOException("File " + path.toString() + " does not exist on HDFS/LFS.");

        //check for empty file
        if (MapReduceTool.isFileEmpty(fs, path.toString()))
            throw new EOFException("Empty input file " + path.toString() + ".");

    }
}