org.apache.sysml.runtime.transform.encode.EncoderBin.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.sysml.runtime.transform.encode.EncoderBin.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 * 
 *   http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.sysml.runtime.transform.encode;

import java.io.IOException;
import java.util.Arrays;
import java.util.List;

import org.apache.commons.lang.ArrayUtils;
import org.apache.wink.json4j.JSONArray;
import org.apache.wink.json4j.JSONException;
import org.apache.wink.json4j.JSONObject;
import org.apache.sysml.lops.Lop;
import org.apache.sysml.runtime.matrix.data.FrameBlock;
import org.apache.sysml.runtime.matrix.data.MatrixBlock;
import org.apache.sysml.runtime.transform.TfUtils;
import org.apache.sysml.runtime.transform.meta.TfMetaUtils;
import org.apache.sysml.runtime.util.UtilFunctions;

public class EncoderBin extends Encoder {
    private static final long serialVersionUID = 1917445005206076078L;

    public static final String MIN_PREFIX = "min";
    public static final String MAX_PREFIX = "max";
    public static final String NBINS_PREFIX = "nbins";

    private int[] _numBins = null;
    private double[] _min = null, _max = null; // min and max among non-missing values

    //frame transform-apply attributes
    private double[][] _binMins = null;
    private double[][] _binMaxs = null;

    public EncoderBin(JSONObject parsedSpec, String[] colnames, int clen) throws JSONException, IOException {
        this(parsedSpec, colnames, clen, false);
    }

    public EncoderBin(JSONObject parsedSpec, String[] colnames, int clen, boolean colsOnly)
            throws JSONException, IOException {
        super(null, clen);
        if (!parsedSpec.containsKey(TfUtils.TXMETHOD_BIN))
            return;

        if (colsOnly) {
            List<Integer> collist = TfMetaUtils.parseBinningColIDs(parsedSpec, colnames);
            initColList(ArrayUtils.toPrimitive(collist.toArray(new Integer[0])));
        } else {
            JSONObject obj = (JSONObject) parsedSpec.get(TfUtils.TXMETHOD_BIN);
            JSONArray attrs = (JSONArray) obj.get(TfUtils.JSON_ATTRS);
            JSONArray nbins = (JSONArray) obj.get(TfUtils.JSON_NBINS);
            initColList(attrs);

            _numBins = new int[attrs.size()];
            for (int i = 0; i < _numBins.length; i++)
                _numBins[i] = UtilFunctions.toInt(nbins.get(i));

            // initialize internal transformation metadata
            _min = new double[_colList.length];
            Arrays.fill(_min, Double.POSITIVE_INFINITY);
            _max = new double[_colList.length];
            Arrays.fill(_max, Double.NEGATIVE_INFINITY);
        }
    }

    public void prepare(String[] words, TfUtils agents) {
        if (!isApplicable())
            return;

        for (int i = 0; i < _colList.length; i++) {
            int colID = _colList[i];

            String w = null;
            double d = 0;

            // equi-width
            w = UtilFunctions.unquote(words[colID - 1].trim());
            if (!TfUtils.isNA(agents.getNAStrings(), w)) {
                d = UtilFunctions.parseToDouble(w);
                if (d < _min[i])
                    _min[i] = d;
                if (d > _max[i])
                    _max[i] = d;
            }
        }
    }

    @Override
    public MatrixBlock encode(FrameBlock in, MatrixBlock out) {
        build(in);
        return apply(in, out);
    }

    @Override
    public void build(FrameBlock in) {
        // nothing to do
    }

    @Override
    public MatrixBlock apply(FrameBlock in, MatrixBlock out) {
        for (int j = 0; j < _colList.length; j++) {
            int colID = _colList[j];
            for (int i = 0; i < in.getNumRows(); i++) {
                double inVal = UtilFunctions.objectToDouble(in.getSchema()[colID - 1], in.get(i, colID - 1));
                int ix = Arrays.binarySearch(_binMaxs[j], inVal);
                int binID = ((ix < 0) ? Math.abs(ix + 1) : ix) + 1;
                out.quickSetValue(i, colID - 1, binID);
            }
        }
        return out;
    }

    @Override
    public FrameBlock getMetaData(FrameBlock meta) {
        return meta;
    }

    @Override
    public void initMetaData(FrameBlock meta) {
        _binMins = new double[_colList.length][];
        _binMaxs = new double[_colList.length][];
        for (int j = 0; j < _colList.length; j++) {
            int colID = _colList[j]; //1-based
            int nbins = (int) meta.getColumnMetadata()[colID - 1].getNumDistinct();
            _binMins[j] = new double[nbins];
            _binMaxs[j] = new double[nbins];
            for (int i = 0; i < nbins; i++) {
                String[] tmp = meta.get(i, colID - 1).toString().split(Lop.DATATYPE_PREFIX);
                _binMins[j][i] = Double.parseDouble(tmp[0]);
                _binMaxs[j][i] = Double.parseDouble(tmp[1]);
            }
        }
    }
}