com.ibm.bi.dml.runtime.controlprogram.parfor.DataPartitionerRemoteSparkMapper.java Source code

Introduction

Here is the source code for com.ibm.bi.dml.runtime.controlprogram.parfor.DataPartitionerRemoteSparkMapper.java
Source

/**
 * (C) Copyright IBM Corp. 2010, 2015
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 * 
*/

package com.ibm.bi.dml.runtime.controlprogram.parfor;

import java.io.IOException;
import java.util.LinkedList;
import java.util.List;

import org.apache.hadoop.io.Writable;
import org.apache.spark.api.java.function.PairFlatMapFunction;

import com.ibm.bi.dml.runtime.DMLRuntimeException;
import com.ibm.bi.dml.runtime.DMLUnsupportedOperationException;
import com.ibm.bi.dml.runtime.controlprogram.ParForProgramBlock.PDataPartitionFormat;
import com.ibm.bi.dml.runtime.controlprogram.parfor.util.PairWritableBlock;
import com.ibm.bi.dml.runtime.matrix.MatrixCharacteristics;
import com.ibm.bi.dml.runtime.matrix.data.InputInfo;
import com.ibm.bi.dml.runtime.matrix.data.MatrixBlock;
import com.ibm.bi.dml.runtime.matrix.data.MatrixIndexes;
import com.ibm.bi.dml.runtime.matrix.data.OutputInfo;
import com.ibm.bi.dml.runtime.util.DataConverter;

import scala.Tuple2;

/**
 * NOTE: for the moment we only support binary block here
 * TODO extend impl for binarycell and textcell   
 * 
 * Interface of Writable output in order to support both PairWritableBlock and PairWritableCell.
 * 
 */
public class DataPartitionerRemoteSparkMapper extends ParWorker
        implements PairFlatMapFunction<Tuple2<MatrixIndexes, MatrixBlock>, Long, Writable> {

    private static final long serialVersionUID = 332887624852010957L;

    private long _rlen = -1;
    private long _clen = -1;
    private long _brlen = -1;
    private long _bclen = -1;
    private long _n = -1;
    //private InputInfo _ii = null;
    //private OutputInfo _oi = null;
    private PDataPartitionFormat _dpf = null;

    public DataPartitionerRemoteSparkMapper(MatrixCharacteristics mc, InputInfo ii, OutputInfo oi,
            PDataPartitionFormat dpf) throws DMLRuntimeException, DMLUnsupportedOperationException {
        _rlen = mc.getRows();
        _clen = mc.getCols();
        _brlen = mc.getRowsPerBlock();
        _bclen = mc.getColsPerBlock();
        //_ii = ii;
        //_oi = oi;
        _dpf = dpf;
    }

    @Override
    public Iterable<Tuple2<Long, Writable>> call(Tuple2<MatrixIndexes, MatrixBlock> arg0) throws Exception {
        List<Tuple2<Long, Writable>> ret = new LinkedList<Tuple2<Long, Writable>>();

        MatrixIndexes key2 = arg0._1();
        MatrixBlock value2 = arg0._2();
        long row_offset = (key2.getRowIndex() - 1) * _brlen;
        long col_offset = (key2.getColumnIndex() - 1) * _bclen;
        long rows = value2.getNumRows();
        long cols = value2.getNumColumns();

        //bound check per block
        if (row_offset + rows < 1 || row_offset + rows > _rlen || col_offset + cols < 1
                || col_offset + cols > _clen) {
            throw new IOException("Matrix block [" + (row_offset + 1) + ":" + (row_offset + rows) + ","
                    + (col_offset + 1) + ":" + (col_offset + cols) + "] " + "out of overall matrix range [1:"
                    + _rlen + ",1:" + _clen + "].");
        }

        //partition inputs according to partitioning scheme 
        switch (_dpf) {
        case ROW_WISE: {
            MatrixBlock[] blks = DataConverter.convertToMatrixBlockPartitions(value2, false);
            for (int i = 0; i < rows; i++) {
                PairWritableBlock tmp = new PairWritableBlock();
                tmp.indexes = new MatrixIndexes(1, col_offset / _bclen + 1);
                tmp.block = blks[i];
                ret.add(new Tuple2<Long, Writable>(new Long(row_offset + 1 + i), tmp));
            }
            break;
        }
        case ROW_BLOCK_WISE: {
            PairWritableBlock tmp = new PairWritableBlock();
            tmp.indexes = new MatrixIndexes(1, col_offset / _bclen + 1);
            tmp.block = new MatrixBlock(value2);
            ret.add(new Tuple2<Long, Writable>(new Long(row_offset / _brlen + 1), tmp));
            break;
        }
        case ROW_BLOCK_WISE_N: {
            PairWritableBlock tmp = new PairWritableBlock();
            tmp.indexes = new MatrixIndexes(((row_offset % _n) / _brlen) + 1, col_offset / _bclen + 1);
            tmp.block = new MatrixBlock(value2);
            ret.add(new Tuple2<Long, Writable>(new Long(row_offset / _n + 1), tmp));
            break;
        }
        case COLUMN_WISE: {
            MatrixBlock[] blks = DataConverter.convertToMatrixBlockPartitions(value2, true);
            for (int i = 0; i < cols; i++) {
                PairWritableBlock tmp = new PairWritableBlock();
                tmp.indexes = new MatrixIndexes(row_offset / _brlen + 1, 1);
                tmp.block = blks[i];
                ret.add(new Tuple2<Long, Writable>(new Long(col_offset + 1 + i), tmp));
            }
            break;
        }
        case COLUMN_BLOCK_WISE: {
            PairWritableBlock tmp = new PairWritableBlock();
            tmp.indexes = new MatrixIndexes(row_offset / _brlen + 1, 1);
            tmp.block = new MatrixBlock(value2);
            ret.add(new Tuple2<Long, Writable>(new Long(col_offset / _bclen + 1), tmp));
            break;
        }
        case COLUMN_BLOCK_WISE_N: {
            PairWritableBlock tmp = new PairWritableBlock();
            tmp.indexes = new MatrixIndexes(row_offset / _brlen + 1, ((col_offset % _n) / _bclen) + 1);
            tmp.block = new MatrixBlock(value2);
            ret.add(new Tuple2<Long, Writable>(new Long(col_offset / _n + 1), tmp));
            break;
        }

        default:
            throw new DMLRuntimeException("Unsupported partition format: " + _dpf);
        }

        return ret;
    }

}