org.apache.sysml.runtime.controlprogram.caching.FrameObject.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.sysml.runtime.controlprogram.caching.FrameObject.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 * 
 *   http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.sysml.runtime.controlprogram.caching;

import java.io.IOException;
import java.util.Arrays;

import org.apache.commons.lang.ArrayUtils;
import org.apache.commons.lang.mutable.MutableBoolean;
import org.apache.sysml.parser.DataExpression;
import org.apache.sysml.parser.Expression.DataType;
import org.apache.sysml.parser.Expression.ValueType;
import org.apache.sysml.runtime.DMLRuntimeException;
import org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext;
import org.apache.sysml.runtime.instructions.spark.data.RDDObject;
import org.apache.sysml.runtime.io.FrameReader;
import org.apache.sysml.runtime.io.FrameReaderFactory;
import org.apache.sysml.runtime.io.FrameWriter;
import org.apache.sysml.runtime.io.FrameWriterFactory;
import org.apache.sysml.runtime.matrix.MatrixCharacteristics;
import org.apache.sysml.runtime.matrix.MatrixDimensionsMetaData;
import org.apache.sysml.runtime.matrix.MatrixFormatMetaData;
import org.apache.sysml.runtime.matrix.MetaData;
import org.apache.sysml.runtime.matrix.data.FileFormatProperties;
import org.apache.sysml.runtime.matrix.data.FrameBlock;
import org.apache.sysml.runtime.matrix.data.InputInfo;
import org.apache.sysml.runtime.matrix.data.OutputInfo;
import org.apache.sysml.runtime.util.UtilFunctions;

public class FrameObject extends CacheableData<FrameBlock> {
    private static final long serialVersionUID = 1755082174281927785L;

    private ValueType[] _schema = null;

    protected FrameObject() {
        super(DataType.FRAME, ValueType.STRING);
    }

    public FrameObject(String fname) {
        this();
        setFileName(fname);
    }

    public FrameObject(String fname, MetaData meta) {
        this();
        setFileName(fname);
        setMetaData(meta);
    }

    public FrameObject(String fname, MetaData meta, ValueType[] schema) {
        this();
        setFileName(fname);
        setMetaData(meta);
        setSchema(schema);
    }

    /**
     * Copy constructor that copies meta data but NO data.
     * 
     * @param fo frame object
     */
    public FrameObject(FrameObject fo) {
        super(fo);
    }

    @Override
    public ValueType[] getSchema() {
        return _schema;
    }

    /**
     * Obtain schema of value types
     * 
     * @param cl column lower bound, inclusive
     * @param cu column upper bound, inclusive
     * @return schema of value types
     */
    public ValueType[] getSchema(int cl, int cu) {
        return (_schema != null && _schema.length > cu) ? Arrays.copyOfRange(_schema, cl, cu + 1)
                : UtilFunctions.nCopies(cu - cl + 1, ValueType.STRING);
    }

    /**
     * Creates a new collection which contains the schema of the current
     * frame object concatenated with the schema of the passed frame object.
     * 
     * @param fo frame object
     * @return schema of value types
     */
    public ValueType[] mergeSchemas(FrameObject fo) {
        return (ValueType[]) ArrayUtils.addAll(
                (_schema != null) ? _schema : UtilFunctions.nCopies((int) getNumColumns(), ValueType.STRING),
                (fo._schema != null) ? fo._schema
                        : UtilFunctions.nCopies((int) fo.getNumColumns(), ValueType.STRING));
    }

    public void setSchema(String schema) {
        if (schema.equals("*")) {
            //populate default schema
            int clen = (int) getNumColumns();
            if (clen > 0) //known number of cols
                _schema = UtilFunctions.nCopies(clen, ValueType.STRING);
        } else {
            //parse given schema
            String[] parts = schema.split(DataExpression.DEFAULT_DELIM_DELIMITER);
            _schema = new ValueType[parts.length];
            for (int i = 0; i < parts.length; i++)
                _schema[i] = ValueType.valueOf(parts[i].toUpperCase());
        }
    }

    public void setSchema(ValueType[] schema) {
        _schema = schema;
    }

    @Override
    public void refreshMetaData() throws CacheException {
        if (_data == null || _metaData == null) //refresh only for existing data
            throw new CacheException("Cannot refresh meta data because there is no data or meta data. ");

        //update matrix characteristics
        MatrixCharacteristics mc = ((MatrixDimensionsMetaData) _metaData).getMatrixCharacteristics();
        mc.setDimension(_data.getNumRows(), _data.getNumColumns());
        mc.setNonZeros(_data.getNumRows() * _data.getNumColumns());

        //update schema information
        _schema = _data.getSchema();
    }

    public long getNumRows() {
        MatrixCharacteristics mc = getMatrixCharacteristics();
        return mc.getRows();
    }

    public long getNumColumns() {
        MatrixCharacteristics mc = getMatrixCharacteristics();
        return mc.getCols();
    }

    @Override
    protected FrameBlock readBlobFromCache(String fname) throws IOException {
        return (FrameBlock) LazyWriteBuffer.readBlock(fname, false);
    }

    @Override
    protected FrameBlock readBlobFromHDFS(String fname, long rlen, long clen) throws IOException {
        MatrixFormatMetaData iimd = (MatrixFormatMetaData) _metaData;
        MatrixCharacteristics mc = iimd.getMatrixCharacteristics();

        //handle missing schema if necessary
        ValueType[] lschema = (_schema != null) ? _schema
                : UtilFunctions.nCopies(clen >= 1 ? (int) clen : 1, ValueType.STRING);

        //read the frame block
        FrameBlock data = null;
        try {
            FrameReader reader = FrameReaderFactory.createFrameReader(iimd.getInputInfo(),
                    getFileFormatProperties());
            data = reader.readFrameFromHDFS(fname, lschema, mc.getRows(), mc.getCols());
        } catch (DMLRuntimeException ex) {
            throw new IOException(ex);
        }

        //sanity check correct output
        if (data == null)
            throw new IOException("Unable to load frame from file: " + fname);

        return data;
    }

    @Override
    protected FrameBlock readBlobFromRDD(RDDObject rdd, MutableBoolean status) throws IOException {
        //note: the read of a frame block from an RDD might trigger
        //lazy evaluation of pending transformations.
        RDDObject lrdd = rdd;

        //prepare return status (by default only collect)
        status.setValue(false);

        MatrixFormatMetaData iimd = (MatrixFormatMetaData) _metaData;
        MatrixCharacteristics mc = iimd.getMatrixCharacteristics();
        int rlen = (int) mc.getRows();
        int clen = (int) mc.getCols();

        //handle missing schema if necessary
        ValueType[] lschema = (_schema != null) ? _schema
                : UtilFunctions.nCopies(clen >= 1 ? (int) clen : 1, ValueType.STRING);

        FrameBlock fb = null;
        try {
            //prevent unnecessary collect through rdd checkpoint
            if (rdd.allowsShortCircuitCollect()) {
                lrdd = (RDDObject) rdd.getLineageChilds().get(0);
            }

            //collect frame block from binary block RDD
            fb = SparkExecutionContext.toFrameBlock(lrdd, lschema, rlen, clen);
        } catch (DMLRuntimeException ex) {
            throw new IOException(ex);
        }

        //sanity check correct output
        if (fb == null) {
            throw new IOException("Unable to load frame from rdd: " + lrdd.getVarName());
        }

        return fb;
    }

    @Override
    protected void writeBlobToHDFS(String fname, String ofmt, int rep, FileFormatProperties fprop)
            throws IOException, DMLRuntimeException {
        OutputInfo oinfo = OutputInfo.stringToOutputInfo(ofmt);
        FrameWriter writer = FrameWriterFactory.createFrameWriter(oinfo, fprop);
        writer.writeFrameToHDFS(_data, fname, getNumRows(), getNumColumns());
    }

    @Override
    protected void writeBlobFromRDDtoHDFS(RDDObject rdd, String fname, String ofmt)
            throws IOException, DMLRuntimeException {
        //prepare output info
        MatrixFormatMetaData iimd = (MatrixFormatMetaData) _metaData;
        OutputInfo oinfo = (ofmt != null ? OutputInfo.stringToOutputInfo(ofmt)
                : InputInfo.getMatchingOutputInfo(iimd.getInputInfo()));

        //note: the write of an RDD to HDFS might trigger
        //lazy evaluation of pending transformations.            
        SparkExecutionContext.writeFrameRDDtoHDFS(rdd, fname, oinfo);
    }

}