Java tutorial
/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package org.apache.sysml.runtime.controlprogram.caching; import java.io.IOException; import java.util.Arrays; import org.apache.commons.lang.ArrayUtils; import org.apache.commons.lang.mutable.MutableBoolean; import org.apache.sysml.parser.DataExpression; import org.apache.sysml.parser.Expression.DataType; import org.apache.sysml.parser.Expression.ValueType; import org.apache.sysml.runtime.DMLRuntimeException; import org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext; import org.apache.sysml.runtime.instructions.spark.data.RDDObject; import org.apache.sysml.runtime.io.FrameReader; import org.apache.sysml.runtime.io.FrameReaderFactory; import org.apache.sysml.runtime.io.FrameWriter; import org.apache.sysml.runtime.io.FrameWriterFactory; import org.apache.sysml.runtime.matrix.MatrixCharacteristics; import org.apache.sysml.runtime.matrix.MatrixDimensionsMetaData; import org.apache.sysml.runtime.matrix.MatrixFormatMetaData; import org.apache.sysml.runtime.matrix.MetaData; import org.apache.sysml.runtime.matrix.data.FileFormatProperties; import org.apache.sysml.runtime.matrix.data.FrameBlock; import org.apache.sysml.runtime.matrix.data.InputInfo; import org.apache.sysml.runtime.matrix.data.OutputInfo; import org.apache.sysml.runtime.util.UtilFunctions; public class FrameObject extends CacheableData<FrameBlock> { private static final long serialVersionUID = 1755082174281927785L; private ValueType[] _schema = null; protected FrameObject() { super(DataType.FRAME, ValueType.STRING); } public FrameObject(String fname) { this(); setFileName(fname); } public FrameObject(String fname, MetaData meta) { this(); setFileName(fname); setMetaData(meta); } public FrameObject(String fname, MetaData meta, ValueType[] schema) { this(); setFileName(fname); setMetaData(meta); setSchema(schema); } /** * Copy constructor that copies meta data but NO data. * * @param fo frame object */ public FrameObject(FrameObject fo) { super(fo); } @Override public ValueType[] getSchema() { return _schema; } /** * Obtain schema of value types * * @param cl column lower bound, inclusive * @param cu column upper bound, inclusive * @return schema of value types */ public ValueType[] getSchema(int cl, int cu) { return (_schema != null && _schema.length > cu) ? Arrays.copyOfRange(_schema, cl, cu + 1) : UtilFunctions.nCopies(cu - cl + 1, ValueType.STRING); } /** * Creates a new collection which contains the schema of the current * frame object concatenated with the schema of the passed frame object. * * @param fo frame object * @return schema of value types */ public ValueType[] mergeSchemas(FrameObject fo) { return (ValueType[]) ArrayUtils.addAll( (_schema != null) ? _schema : UtilFunctions.nCopies((int) getNumColumns(), ValueType.STRING), (fo._schema != null) ? fo._schema : UtilFunctions.nCopies((int) fo.getNumColumns(), ValueType.STRING)); } public void setSchema(String schema) { if (schema.equals("*")) { //populate default schema int clen = (int) getNumColumns(); if (clen > 0) //known number of cols _schema = UtilFunctions.nCopies(clen, ValueType.STRING); } else { //parse given schema String[] parts = schema.split(DataExpression.DEFAULT_DELIM_DELIMITER); _schema = new ValueType[parts.length]; for (int i = 0; i < parts.length; i++) _schema[i] = ValueType.valueOf(parts[i].toUpperCase()); } } public void setSchema(ValueType[] schema) { _schema = schema; } @Override public void refreshMetaData() throws CacheException { if (_data == null || _metaData == null) //refresh only for existing data throw new CacheException("Cannot refresh meta data because there is no data or meta data. "); //update matrix characteristics MatrixCharacteristics mc = ((MatrixDimensionsMetaData) _metaData).getMatrixCharacteristics(); mc.setDimension(_data.getNumRows(), _data.getNumColumns()); mc.setNonZeros(_data.getNumRows() * _data.getNumColumns()); //update schema information _schema = _data.getSchema(); } public long getNumRows() { MatrixCharacteristics mc = getMatrixCharacteristics(); return mc.getRows(); } public long getNumColumns() { MatrixCharacteristics mc = getMatrixCharacteristics(); return mc.getCols(); } @Override protected FrameBlock readBlobFromCache(String fname) throws IOException { return (FrameBlock) LazyWriteBuffer.readBlock(fname, false); } @Override protected FrameBlock readBlobFromHDFS(String fname, long rlen, long clen) throws IOException { MatrixFormatMetaData iimd = (MatrixFormatMetaData) _metaData; MatrixCharacteristics mc = iimd.getMatrixCharacteristics(); //handle missing schema if necessary ValueType[] lschema = (_schema != null) ? _schema : UtilFunctions.nCopies(clen >= 1 ? (int) clen : 1, ValueType.STRING); //read the frame block FrameBlock data = null; try { FrameReader reader = FrameReaderFactory.createFrameReader(iimd.getInputInfo(), getFileFormatProperties()); data = reader.readFrameFromHDFS(fname, lschema, mc.getRows(), mc.getCols()); } catch (DMLRuntimeException ex) { throw new IOException(ex); } //sanity check correct output if (data == null) throw new IOException("Unable to load frame from file: " + fname); return data; } @Override protected FrameBlock readBlobFromRDD(RDDObject rdd, MutableBoolean status) throws IOException { //note: the read of a frame block from an RDD might trigger //lazy evaluation of pending transformations. RDDObject lrdd = rdd; //prepare return status (by default only collect) status.setValue(false); MatrixFormatMetaData iimd = (MatrixFormatMetaData) _metaData; MatrixCharacteristics mc = iimd.getMatrixCharacteristics(); int rlen = (int) mc.getRows(); int clen = (int) mc.getCols(); //handle missing schema if necessary ValueType[] lschema = (_schema != null) ? _schema : UtilFunctions.nCopies(clen >= 1 ? (int) clen : 1, ValueType.STRING); FrameBlock fb = null; try { //prevent unnecessary collect through rdd checkpoint if (rdd.allowsShortCircuitCollect()) { lrdd = (RDDObject) rdd.getLineageChilds().get(0); } //collect frame block from binary block RDD fb = SparkExecutionContext.toFrameBlock(lrdd, lschema, rlen, clen); } catch (DMLRuntimeException ex) { throw new IOException(ex); } //sanity check correct output if (fb == null) { throw new IOException("Unable to load frame from rdd: " + lrdd.getVarName()); } return fb; } @Override protected void writeBlobToHDFS(String fname, String ofmt, int rep, FileFormatProperties fprop) throws IOException, DMLRuntimeException { OutputInfo oinfo = OutputInfo.stringToOutputInfo(ofmt); FrameWriter writer = FrameWriterFactory.createFrameWriter(oinfo, fprop); writer.writeFrameToHDFS(_data, fname, getNumRows(), getNumColumns()); } @Override protected void writeBlobFromRDDtoHDFS(RDDObject rdd, String fname, String ofmt) throws IOException, DMLRuntimeException { //prepare output info MatrixFormatMetaData iimd = (MatrixFormatMetaData) _metaData; OutputInfo oinfo = (ofmt != null ? OutputInfo.stringToOutputInfo(ofmt) : InputInfo.getMatchingOutputInfo(iimd.getInputInfo())); //note: the write of an RDD to HDFS might trigger //lazy evaluation of pending transformations. SparkExecutionContext.writeFrameRDDtoHDFS(rdd, fname, oinfo); } }