Java tutorial
/** * (C) Copyright IBM Corp. 2010, 2015 * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * */ package com.ibm.bi.dml.runtime.io; import java.io.EOFException; import java.io.IOException; import java.util.LinkedList; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import com.ibm.bi.dml.hops.OptimizerUtils; import com.ibm.bi.dml.runtime.DMLRuntimeException; import com.ibm.bi.dml.runtime.matrix.data.MatrixBlock; import com.ibm.bi.dml.runtime.util.MapReduceTool; /** * Base class for all format-specific matrix readers. Every reader is required to implement the basic * read functionality but might provide additional custom functionality. Any non-default parameters * (e.g., CSV read properties) should be passed into custom constructors. There is also a factory * for creating format-specific readers. * */ public abstract class MatrixReader { //internal configuration protected static final boolean AGGREGATE_BLOCK_NNZ = true; /** * * @param fname * @param rlen * @param clen * @param brlen * @param bclen * @param expNnz * @return */ public abstract MatrixBlock readMatrixFromHDFS(String fname, long rlen, long clen, int brlen, int bclen, long estnnz) throws IOException, DMLRuntimeException; /** * * @param file * @return * @throws IOException */ public static Path[] getSequenceFilePaths(FileSystem fs, Path file) throws IOException { Path[] ret = null; if (fs.isDirectory(file)) { LinkedList<Path> tmp = new LinkedList<Path>(); FileStatus[] dStatus = fs.listStatus(file); for (FileStatus fdStatus : dStatus) if (!fdStatus.getPath().getName().startsWith("_")) //skip internal files tmp.add(fdStatus.getPath()); ret = tmp.toArray(new Path[0]); } else { ret = new Path[] { file }; } return ret; } /** * NOTE: mallocDense controls if the output matrix blocks is fully allocated, this can be redundant * if binary block read and single block. * * @param rlen * @param clen * @param estnnz * @param mallocDense * @return * @throws DMLRuntimeException * @throws IOException */ protected static MatrixBlock createOutputMatrixBlock(long rlen, long clen, long estnnz, boolean mallocDense, boolean mallocSparse) throws IOException, DMLRuntimeException { //check input dimension if (!OptimizerUtils.isValidCPDimensions(rlen, clen)) throw new DMLRuntimeException("Matrix dimensions too large for CP runtime: " + rlen + " x " + clen); //determine target representation (sparse/dense) boolean sparse = MatrixBlock.evalSparseFormatInMemory(rlen, clen, estnnz); //prepare result matrix block MatrixBlock ret = new MatrixBlock((int) rlen, (int) clen, sparse, estnnz); if (!sparse && mallocDense) ret.allocateDenseBlockUnsafe((int) rlen, (int) clen); else if (sparse && mallocSparse) ret.allocateSparseRowsBlock(); return ret; } /** * * @param fs * @param path * @throws IOException */ protected static void checkValidInputFile(FileSystem fs, Path path) throws IOException { //check non-existing file if (!fs.exists(path)) throw new IOException("File " + path.toString() + " does not exist on HDFS/LFS."); //check for empty file if (MapReduceTool.isFileEmpty(fs, path.toString())) throw new EOFException("Empty input file " + path.toString() + "."); } }