Example usage for org.apache.commons.math.linear RealMatrix getColumnVector

List of usage examples for org.apache.commons.math.linear RealMatrix getColumnVector

Introduction

In this page you can find the example usage for org.apache.commons.math.linear RealMatrix getColumnVector.

Prototype

RealVector getColumnVector(int column) throws MatrixIndexException;

Source Link

Document

Returns the entries in column number column as a vector.

Usage

From source file:de.mpicbg.knime.hcs.base.nodes.preproc.OutlierRemoval.java

@Override
protected BufferedDataTable[] execute(BufferedDataTable[] inData, ExecutionContext exec) throws Exception {

    BufferedDataTable input = inData[0];
    DataTableSpec inputSpec = input.getDataTableSpec();

    // Get the parameter and make sure there all double value columns
    List<Attribute> parameter = new ArrayList<Attribute>();
    for (String item : parameterNames.getIncludeList()) {
        Attribute attribute = new InputTableAttribute(item, input);
        if (attribute.getType().isCompatible(DoubleValue.class)) {
            parameter.add(attribute);//  w w  w.  ja  va 2  s  . co m
        } else {
            logger.warn("The parameter '" + attribute.getName()
                    + "' will not be considered for outlier removal, since it is not compatible to double.");
        }
    }

    // Get the groups defined by the nominal column.
    Attribute groupingAttribute = new InputTableAttribute(this.groupingColumn.getStringValue(), input);
    Map<Object, List<DataRow>> subsets = AttributeUtils.splitRowsGeneric(input, groupingAttribute);

    // Initialize
    BufferedDataContainer keepContainer = exec.createDataContainer(inputSpec);
    BufferedDataContainer discartContainer = exec.createDataContainer(inputSpec);
    int S = subsets.size();
    int s = 1;

    // Outlier analysis for each subset
    for (Object key : subsets.keySet()) {

        // Get the subset having all constraints in common
        List<DataRow> rowSubset = subsets.get(key);

        // Get the valid values
        RealMatrix data = extractMatrix(rowSubset, parameter);

        int N = data.getColumnDimension();
        int M = data.getRowDimension();
        if (M == 0) {
            logger.warn("The group '" + key + "' has no valid values and will be removed entirely'");
        } else {

            // Determine upper and lower outlier bounds
            double[] lowerBound = new double[N];
            double[] upperBound = new double[N];
            if (method.getStringValue().equals("Boxplot")) {
                for (int c = 0; c < N; ++c) {
                    RealVector vect = data.getColumnVector(c);
                    DescriptiveStatistics stats = new DescriptiveStatistics();
                    for (double value : vect.getData()) {
                        stats.addValue(value);
                    }
                    double lowerQuantile = stats.getPercentile(25);
                    double upperQuantile = stats.getPercentile(85);
                    double whisker = factor.getDoubleValue() * Math.abs(lowerQuantile - upperQuantile);
                    lowerBound[c] = lowerQuantile - whisker;
                    upperBound[c] = upperQuantile + whisker;
                }
            } else {
                for (int c = 0; c < N; ++c) {
                    RealVector vect = data.getColumnVector(c);
                    double mean = StatUtils.mean(vect.getData());
                    double sd = Math.sqrt(StatUtils.variance(vect.getData()));
                    lowerBound[c] = mean - factor.getDoubleValue() * sd;
                    upperBound[c] = mean + factor.getDoubleValue() * sd;
                }
            }

            // Remove The outlier
            if (rule.getBooleanValue()) { // The row is only discarted if the row is an outlier in all parameter.
                for (DataRow row : rowSubset) {
                    int c = 0;
                    for (Attribute column : parameter) {

                        DataCell valueCell = row.getCell(((InputTableAttribute) column).getColumnIndex());

                        // a missing value will be treated as data point inside the bounds
                        if (valueCell.isMissing()) {
                            continue;
                        }

                        Double value = ((DoubleValue) valueCell).getDoubleValue();
                        if ((value != null) && (lowerBound[c] <= value) && (value <= upperBound[c])) {
                            break;
                        } else {
                            c++;
                        }
                    }
                    if (c != N) {
                        keepContainer.addRowToTable(row);
                    } else {
                        discartContainer.addRowToTable(row);
                    }
                }
            } else { // The row is discarted if it has a outlier for at least one parameter.
                for (DataRow row : rowSubset) {
                    int c = 0;
                    for (Attribute column : parameter) {

                        DataCell valueCell = row.getCell(((InputTableAttribute) column).getColumnIndex());

                        // a missing value will be treated as data point inside the bounds
                        if (valueCell.isMissing()) {
                            c++;
                            continue;
                        }

                        Double value = ((DoubleValue) valueCell).getDoubleValue();
                        if ((value != null) && (lowerBound[c] <= value) && (value <= upperBound[c])) {
                            c++;
                        } else {
                            break;
                        }
                    }
                    if (c == N) {
                        keepContainer.addRowToTable(row);
                    } else {
                        discartContainer.addRowToTable(row);
                    }
                }
            }
        }

        BufTableUtils.updateProgress(exec, s++, S);

    }

    keepContainer.close();
    discartContainer.close();
    return new BufferedDataTable[] { keepContainer.getTable(), discartContainer.getTable() };
}