Source code for qikify.controllers.identifyOutliers

import numpy as np

[docs]def identifyOutliers(data, k=3): """Compare a dataset against mu +/- k*sigma limits, and return a boolean vector with False elements denoting outliers. Parameters ---------- data : Contains data stored in a pandas DataFrame or Series. """ mu = data.mean(0) sigma = data.std(0) lower, upper = mu-(k*sigma), mu+(k*sigma) # Change NaNs to +/- Inf lower[np.isnan(lower)] = -np.inf upper[np.isnan(upper)] = np.inf lsl = np.tile(lower.tolist(), (data.shape[0], 1)) usl = np.tile(upper.tolist(), (data.shape[0], 1)) pfMat = np.logical_and(data >= lsl, data <= usl) return np.logical_and.reduce(pfMat,1)
[docs]def identifyOutliersSpecs(data, specs, ind, k=3): """Compare a dataset against expanded spec limits, and return a boolean vector with False elements denoting outliers. Parameters ---------- data : Contains data stored in a pandas DataFrame or Series. """ pfMat = np.ones(data.shape) mu = data.mean(0) # Iterate over columns in pfData for j in xrange(pfData.shape[1]): lsl, usl = specs[data.columns[j]] if data.shape[1] > 1 else specs[data.name] lsl = mu[j] - k * abs(mu[j] - lsl) if not np.isnan(lsl) else np.nan usl = mu[j] + k * abs(mu[j] - usl) if not np.isnan(usl) else np.nan pfMat[:,j] = data.ix[:,j].apply(lambda x: x >= lsl and x <= usl) return (np.sum(pfMat, 1) == data.shape[1])