List of usage examples for org.apache.commons.math.linear RealMatrix getColumnVector
RealVector getColumnVector(int column) throws MatrixIndexException;
column as a vector. From source file:de.mpicbg.knime.hcs.base.nodes.preproc.OutlierRemoval.java
@Override protected BufferedDataTable[] execute(BufferedDataTable[] inData, ExecutionContext exec) throws Exception { BufferedDataTable input = inData[0]; DataTableSpec inputSpec = input.getDataTableSpec(); // Get the parameter and make sure there all double value columns List<Attribute> parameter = new ArrayList<Attribute>(); for (String item : parameterNames.getIncludeList()) { Attribute attribute = new InputTableAttribute(item, input); if (attribute.getType().isCompatible(DoubleValue.class)) { parameter.add(attribute);// w w w. ja va 2 s . co m } else { logger.warn("The parameter '" + attribute.getName() + "' will not be considered for outlier removal, since it is not compatible to double."); } } // Get the groups defined by the nominal column. Attribute groupingAttribute = new InputTableAttribute(this.groupingColumn.getStringValue(), input); Map<Object, List<DataRow>> subsets = AttributeUtils.splitRowsGeneric(input, groupingAttribute); // Initialize BufferedDataContainer keepContainer = exec.createDataContainer(inputSpec); BufferedDataContainer discartContainer = exec.createDataContainer(inputSpec); int S = subsets.size(); int s = 1; // Outlier analysis for each subset for (Object key : subsets.keySet()) { // Get the subset having all constraints in common List<DataRow> rowSubset = subsets.get(key); // Get the valid values RealMatrix data = extractMatrix(rowSubset, parameter); int N = data.getColumnDimension(); int M = data.getRowDimension(); if (M == 0) { logger.warn("The group '" + key + "' has no valid values and will be removed entirely'"); } else { // Determine upper and lower outlier bounds double[] lowerBound = new double[N]; double[] upperBound = new double[N]; if (method.getStringValue().equals("Boxplot")) { for (int c = 0; c < N; ++c) { RealVector vect = data.getColumnVector(c); DescriptiveStatistics stats = new DescriptiveStatistics(); for (double value : vect.getData()) { stats.addValue(value); } double lowerQuantile = stats.getPercentile(25); double upperQuantile = stats.getPercentile(85); double whisker = factor.getDoubleValue() * Math.abs(lowerQuantile - upperQuantile); lowerBound[c] = lowerQuantile - whisker; upperBound[c] = upperQuantile + whisker; } } else { for (int c = 0; c < N; ++c) { RealVector vect = data.getColumnVector(c); double mean = StatUtils.mean(vect.getData()); double sd = Math.sqrt(StatUtils.variance(vect.getData())); lowerBound[c] = mean - factor.getDoubleValue() * sd; upperBound[c] = mean + factor.getDoubleValue() * sd; } } // Remove The outlier if (rule.getBooleanValue()) { // The row is only discarted if the row is an outlier in all parameter. for (DataRow row : rowSubset) { int c = 0; for (Attribute column : parameter) { DataCell valueCell = row.getCell(((InputTableAttribute) column).getColumnIndex()); // a missing value will be treated as data point inside the bounds if (valueCell.isMissing()) { continue; } Double value = ((DoubleValue) valueCell).getDoubleValue(); if ((value != null) && (lowerBound[c] <= value) && (value <= upperBound[c])) { break; } else { c++; } } if (c != N) { keepContainer.addRowToTable(row); } else { discartContainer.addRowToTable(row); } } } else { // The row is discarted if it has a outlier for at least one parameter. for (DataRow row : rowSubset) { int c = 0; for (Attribute column : parameter) { DataCell valueCell = row.getCell(((InputTableAttribute) column).getColumnIndex()); // a missing value will be treated as data point inside the bounds if (valueCell.isMissing()) { c++; continue; } Double value = ((DoubleValue) valueCell).getDoubleValue(); if ((value != null) && (lowerBound[c] <= value) && (value <= upperBound[c])) { c++; } else { break; } } if (c == N) { keepContainer.addRowToTable(row); } else { discartContainer.addRowToTable(row); } } } } BufTableUtils.updateProgress(exec, s++, S); } keepContainer.close(); discartContainer.close(); return new BufferedDataTable[] { keepContainer.getTable(), discartContainer.getTable() }; }