List of usage examples for weka.filters.unsupervised.attribute Standardize Standardize
Standardize
From source file:PrincipalComponents.java
License:Open Source License
private void fillCovariance() throws Exception { // first store the means m_means = new double[m_trainInstances.numAttributes()]; m_stdDevs = new double[m_trainInstances.numAttributes()]; for (int i = 0; i < m_trainInstances.numAttributes(); i++) { m_means[i] = m_trainInstances.meanOrMode(i); m_stdDevs[i] = Math.sqrt(Utils.variance(m_trainInstances.attributeToDoubleArray(i))); }/*w ww .j a va 2 s . c om*/ // just center the data or standardize it? if (m_center) { m_centerFilter = new Center(); m_centerFilter.setInputFormat(m_trainInstances); m_trainInstances = Filter.useFilter(m_trainInstances, m_centerFilter); } else { m_standardizeFilter = new Standardize(); m_standardizeFilter.setInputFormat(m_trainInstances); m_trainInstances = Filter.useFilter(m_trainInstances, m_standardizeFilter); } // now compute the covariance matrix m_correlation = new UpperSymmDenseMatrix(m_numAttribs); for (int i = 0; i < m_numAttribs; i++) { for (int j = i; j < m_numAttribs; j++) { double cov = 0; for (Instance inst : m_trainInstances) { cov += inst.value(i) * inst.value(j); } cov /= m_trainInstances.numInstances() - 1; m_correlation.set(i, j, cov); } } }
From source file:SMO.java
License:Open Source License
/** * Method for building the classifier. Implements a one-against-one * wrapper for multi-class problems./*from ww w.ja v a2 s.c om*/ * * @param insts the set of training instances * @throws Exception if the classifier can't be built successfully */ public void buildClassifier(Instances insts) throws Exception { if (!m_checksTurnedOff) { // can classifier handle the data? getCapabilities().testWithFail(insts); // remove instances with missing class insts = new Instances(insts); insts.deleteWithMissingClass(); /* Removes all the instances with weight equal to 0. MUST be done since condition (8) of Keerthi's paper is made with the assertion Ci > 0 (See equation (3a). */ Instances data = new Instances(insts, insts.numInstances()); for (int i = 0; i < insts.numInstances(); i++) { if (insts.instance(i).weight() > 0) data.add(insts.instance(i)); } if (data.numInstances() == 0) { throw new Exception("No training instances left after removing " + "instances with weight 0!"); } insts = data; } if (!m_checksTurnedOff) { m_Missing = new ReplaceMissingValues(); m_Missing.setInputFormat(insts); insts = Filter.useFilter(insts, m_Missing); } else { m_Missing = null; } if (getCapabilities().handles(Capability.NUMERIC_ATTRIBUTES)) { boolean onlyNumeric = true; if (!m_checksTurnedOff) { for (int i = 0; i < insts.numAttributes(); i++) { if (i != insts.classIndex()) { if (!insts.attribute(i).isNumeric()) { onlyNumeric = false; break; } } } } if (!onlyNumeric) { m_NominalToBinary = new NominalToBinary(); m_NominalToBinary.setInputFormat(insts); insts = Filter.useFilter(insts, m_NominalToBinary); } else { m_NominalToBinary = null; } } else { m_NominalToBinary = null; } if (m_filterType == FILTER_STANDARDIZE) { m_Filter = new Standardize(); m_Filter.setInputFormat(insts); insts = Filter.useFilter(insts, m_Filter); } else if (m_filterType == FILTER_NORMALIZE) { m_Filter = new Normalize(); m_Filter.setInputFormat(insts); insts = Filter.useFilter(insts, m_Filter); } else { m_Filter = null; } m_classIndex = insts.classIndex(); m_classAttribute = insts.classAttribute(); m_KernelIsLinear = (m_kernel instanceof PolyKernel) && (((PolyKernel) m_kernel).getExponent() == 1.0); // Generate subsets representing each class Instances[] subsets = new Instances[insts.numClasses()]; for (int i = 0; i < insts.numClasses(); i++) { subsets[i] = new Instances(insts, insts.numInstances()); } for (int j = 0; j < insts.numInstances(); j++) { Instance inst = insts.instance(j); subsets[(int) inst.classValue()].add(inst); } for (int i = 0; i < insts.numClasses(); i++) { subsets[i].compactify(); } // Build the binary classifiers Random rand = new Random(m_randomSeed); m_classifiers = new BinarySMO[insts.numClasses()][insts.numClasses()]; for (int i = 0; i < insts.numClasses(); i++) { for (int j = i + 1; j < insts.numClasses(); j++) { m_classifiers[i][j] = new BinarySMO(); m_classifiers[i][j].setKernel(Kernel.makeCopy(getKernel())); Instances data = new Instances(insts, insts.numInstances()); for (int k = 0; k < subsets[i].numInstances(); k++) { data.add(subsets[i].instance(k)); } for (int k = 0; k < subsets[j].numInstances(); k++) { data.add(subsets[j].instance(k)); } data.compactify(); data.randomize(rand); m_classifiers[i][j].buildClassifier(data, i, j, m_fitLogisticModels, m_numFolds, m_randomSeed); } } }
From source file:PCADetector.java
License:Apache License
public boolean runPCA(ArrayList<Double> newData, int slidewdSz, double cAlpha, int nAttrs) { try {//from w w w . j a v a 2 s . co m if (m_nDims == 0) { m_nDims = nAttrs; for (int i = 0; i < this.m_nDims; i++) { m_oriDataMatrix.add(new ArrayList<Double>()); // one list for each attribute } } verifyData(newData); this.c_alpha = cAlpha; if (false == prepareData(newData, slidewdSz)) return false; Instances oriDataInsts = getInstances(); if (oriDataInsts != null) { // standardization + PCA covariance matrix m_scaledInstances = new Instances(oriDataInsts); Standardize filter = new Standardize(); filter.setInputFormat(m_scaledInstances); m_scaledInstances = Standardize.useFilter(m_scaledInstances, filter); // standardization PrincipalComponents PCA = new PrincipalComponents(); PCA.setVarianceCovered(1.0); // means 100% PCA.setMaximumAttributeNames(-1); PCA.setCenterData(true); Ranker ranker = new Ranker(); AttributeSelection selector = new AttributeSelection(); selector.setSearch(ranker); selector.setEvaluator(PCA); selector.SelectAttributes(m_scaledInstances); // Instances transformedData = selector.reduceDimensionality(m_scaledInstances); // get sorted eigens double[] eigenValues = PCA.getEigenValues(); // eigenVectors[i][j] i: rows; j: cols double[][] eigenVectors = PCA.getUnsortedEigenVectors(); Sort(eigenValues, eigenVectors); setEigens(eigenValues); // get residual start dimension int residualStartDimension = -1; double sum = 0; double major = 0; for (int ss = 0; ss < eigenValues.length; ss++) { sum += eigenValues[ss]; } for (int ss = 0; ss < eigenValues.length; ss++) { major += eigenValues[ss]; if ((residualStartDimension < 0) && (major / sum > 0.95)) { residualStartDimension = ss + 1; break; } } // System.out.println("residualStartDim: "+residualStartDimension); m_threshold = computeThreshold(eigenValues, residualStartDimension); // check new data abnormal or not boolean bAbnormal = checkSPE(eigenVectors, residualStartDimension, newData); computeProjPCs(eigenVectors, residualStartDimension, newData); // only for demo if (bAbnormal) { // anomaly, now to diagnosis // check original space using all the lists diagnosis(eigenVectors, residualStartDimension, newData); } } } catch (Exception exc) { } return true; }
From source file:adams.data.instancesanalysis.pls.AbstractMultiClassPLS.java
License:Open Source License
/** * Preprocesses the data.//from w w w.j av a 2 s . c o m * * @param instances the data to process * @return the preprocessed data */ protected Instances preTransform(Instances instances, Map<String, Object> params) throws Exception { Map<Integer, double[]> classValues; int i; int index; switch (m_PredictionType) { case ALL: classValues = null; break; default: classValues = new HashMap<>(); for (i = 0; i < m_ClassAttributeIndices.size(); i++) { index = m_ClassAttributeIndices.get(i); classValues.put(index, instances.attributeToDoubleArray(index)); } } if (classValues != null) params.put(PARAM_CLASSVALUES, classValues); if (!isInitialized()) { if (m_ReplaceMissing) { m_Missing = new ReplaceMissingValues(); m_Missing.setInputFormat(instances); } else { m_Missing = null; } m_ClassMean = new HashMap<>(); m_ClassStdDev = new HashMap<>(); for (i = 0; i < m_ClassAttributeIndices.size(); i++) { index = m_ClassAttributeIndices.get(i); switch (m_PreprocessingType) { case CENTER: m_ClassMean.put(index, instances.meanOrMode(index)); m_ClassStdDev.put(index, 1.0); m_Filter = new Center(); ((Center) m_Filter).setIgnoreClass(true); break; case STANDARDIZE: m_ClassMean.put(index, instances.meanOrMode(index)); m_ClassStdDev.put(index, StrictMath.sqrt(instances.variance(index))); m_Filter = new Standardize(); ((Standardize) m_Filter).setIgnoreClass(true); break; case NONE: m_ClassMean.put(index, 0.0); m_ClassStdDev.put(index, 1.0); m_Filter = null; break; default: throw new IllegalStateException("Unhandled preprocessing type; " + m_PreprocessingType); } } if (m_Filter != null) m_Filter.setInputFormat(instances); } // filter data if (m_Missing != null) instances = Filter.useFilter(instances, m_Missing); if (m_Filter != null) instances = Filter.useFilter(instances, m_Filter); return instances; }
From source file:adams.data.instancesanalysis.pls.AbstractSingleClassPLS.java
License:Open Source License
/** * Preprocesses the data.// ww w. j a v a 2 s. c o m * * @param instances the data to process * @return the preprocessed data */ protected Instances preTransform(Instances instances, Map<String, Object> params) throws Exception { double[] classValues; switch (m_PredictionType) { case ALL: classValues = null; break; default: classValues = instances.attributeToDoubleArray(instances.classIndex()); } if (classValues != null) params.put(PARAM_CLASSVALUES, classValues); if (!isInitialized()) { if (m_ReplaceMissing) { m_Missing = new ReplaceMissingValues(); m_Missing.setInputFormat(instances); } else { m_Missing = null; } switch (m_PreprocessingType) { case CENTER: m_ClassMean = instances.meanOrMode(instances.classIndex()); m_ClassStdDev = 1; m_Filter = new Center(); ((Center) m_Filter).setIgnoreClass(true); break; case STANDARDIZE: m_ClassMean = instances.meanOrMode(instances.classIndex()); m_ClassStdDev = StrictMath.sqrt(instances.variance(instances.classIndex())); m_Filter = new Standardize(); ((Standardize) m_Filter).setIgnoreClass(true); break; case NONE: m_ClassMean = 0; m_ClassStdDev = 1; m_Filter = null; break; default: throw new IllegalStateException("Unhandled preprocessing type; " + m_PreprocessingType); } if (m_Filter != null) m_Filter.setInputFormat(instances); } // filter data if (m_Missing != null) instances = Filter.useFilter(instances, m_Missing); if (m_Filter != null) instances = Filter.useFilter(instances, m_Filter); return instances; }
From source file:assign00.ExperimentShell.java
/** * @param args the command line arguments *//* w w w . jav a2 s .com*/ public static void main(String[] args) throws Exception { DataSource source = new DataSource(file); Instances dataSet = source.getDataSet(); //Set up data dataSet.setClassIndex(dataSet.numAttributes() - 1); dataSet.randomize(new Random(1)); //determine sizes int trainingSize = (int) Math.round(dataSet.numInstances() * .7); int testSize = dataSet.numInstances() - trainingSize; Instances training = new Instances(dataSet, 0, trainingSize); Instances test = new Instances(dataSet, trainingSize, testSize); Standardize standardizedData = new Standardize(); standardizedData.setInputFormat(training); Instances newTest = Filter.useFilter(test, standardizedData); Instances newTraining = Filter.useFilter(training, standardizedData); NeuralNetworkClassifier NWC = new NeuralNetworkClassifier(); NWC.buildClassifier(newTraining); Evaluation eval = new Evaluation(newTraining); eval.evaluateModel(NWC, newTest); System.out.println(eval.toSummaryString("\nResults\n======\n", false)); }
From source file:br.com.ufu.lsi.rebfnetwork.RBFNetwork.java
License:Open Source License
/** * Builds the classifier// ww w. j a v a 2s . co m * * @param instances the training data * @throws Exception if the classifier could not be built successfully */ public void buildClassifier(Instances instances) throws Exception { // can classifier handle the data? getCapabilities().testWithFail(instances); // remove instances with missing class instances = new Instances(instances); instances.deleteWithMissingClass(); // only class? -> build ZeroR model if (instances.numAttributes() == 1) { System.err.println( "Cannot build model (only class attribute present in data!), " + "using ZeroR model instead!"); m_ZeroR = new weka.classifiers.rules.ZeroR(); m_ZeroR.buildClassifier(instances); return; } else { m_ZeroR = null; } m_standardize = new Standardize(); m_standardize.setInputFormat(instances); instances = Filter.useFilter(instances, m_standardize); SimpleKMeans sk = new SimpleKMeans(); sk.setNumClusters(m_numClusters); sk.setSeed(m_clusteringSeed); MakeDensityBasedClusterer dc = new MakeDensityBasedClusterer(); dc.setClusterer(sk); dc.setMinStdDev(m_minStdDev); m_basisFilter = new ClusterMembership(); m_basisFilter.setDensityBasedClusterer(dc); m_basisFilter.setInputFormat(instances); Instances transformed = Filter.useFilter(instances, m_basisFilter); if (instances.classAttribute().isNominal()) { m_linear = null; m_logistic = new Logistic(); m_logistic.setRidge(m_ridge); m_logistic.setMaxIts(m_maxIts); m_logistic.buildClassifier(transformed); } else { m_logistic = null; m_linear = new LinearRegression(); m_linear.setAttributeSelectionMethod( new SelectedTag(LinearRegression.SELECTION_NONE, LinearRegression.TAGS_SELECTION)); m_linear.setRidge(m_ridge); m_linear.buildClassifier(transformed); } }
From source file:com.mycompany.id3classifier.ID3Shell.java
public static void main(String[] args) throws Exception { ConverterUtils.DataSource source = new ConverterUtils.DataSource("lensesData.csv"); Instances dataSet = source.getDataSet(); Discretize filter = new Discretize(); filter.setInputFormat(dataSet);//from w w w . j a v a 2 s.c om dataSet = Filter.useFilter(dataSet, filter); Standardize standardize = new Standardize(); standardize.setInputFormat(dataSet); dataSet = Filter.useFilter(dataSet, standardize); dataSet.setClassIndex(dataSet.numAttributes() - 1); dataSet.randomize(new Random(9001)); //It's over 9000!! int folds = 10; //Perform crossvalidation Evaluation eval = new Evaluation(dataSet); for (int n = 0; n < folds; n++) { int trainingSize = (int) Math.round(dataSet.numInstances() * .7); int testSize = dataSet.numInstances() - trainingSize; Instances trainingData = dataSet.trainCV(folds, n); Instances testData = dataSet.testCV(folds, n); ID3Classifier classifier = new ID3Classifier(); // Id3 classifier = new Id3(); classifier.buildClassifier(trainingData); eval.evaluateModel(classifier, testData); } System.out.println(eval.toSummaryString("\nResults:\n", false)); }
From source file:com.mycompany.knnclassifier.kNNShell.java
public static void main(String[] args) throws Exception { ConverterUtils.DataSource source = new ConverterUtils.DataSource("carData.csv"); Instances dataSet = source.getDataSet(); Standardize standardize = new Standardize(); standardize.setInputFormat(dataSet); dataSet = Filter.useFilter(dataSet, standardize); dataSet.setClassIndex(dataSet.numAttributes() - 1); dataSet.randomize(new Random(9001)); //It's over 9000!! int trainingSize = (int) Math.round(dataSet.numInstances() * .7); int testSize = dataSet.numInstances() - trainingSize; Instances trainingData = new Instances(dataSet, 0, trainingSize); Instances testData = new Instances(dataSet, trainingSize, testSize); kNNClassifier classifier = new kNNClassifier(3); classifier.buildClassifier(trainingData); //Used to compare to Weka's built in KNN algorithm //Classifier classifier = new IBk(1); //classifier.buildClassifier(trainingData); Evaluation eval = new Evaluation(trainingData); eval.evaluateModel(classifier, testData); System.out.println(eval.toSummaryString("\nResults:\n", false)); }
From source file:com.mycompany.neuralnetwork.NeuralNetworkShell.java
public static void main(String[] args) throws Exception { ConverterUtils.DataSource source = new ConverterUtils.DataSource("irisData.csv"); Instances dataSet = source.getDataSet(); Standardize standardize = new Standardize(); standardize.setInputFormat(dataSet); dataSet = Filter.useFilter(dataSet, standardize); dataSet.setClassIndex(dataSet.numAttributes() - 1); dataSet.randomize(new Random(9001)); //It's over 9000!! int trainingSize = (int) Math.round(dataSet.numInstances() * .7); int testSize = dataSet.numInstances() - trainingSize; Instances trainingData = new Instances(dataSet, 0, trainingSize); Instances testData = new Instances(dataSet, trainingSize, testSize); //MultilayerPerceptron classifier = new MultilayerPerceptron(); NeuralNetworkClassifier classifier = new NeuralNetworkClassifier(3, 20000, 0.1); classifier.buildClassifier(trainingData); Evaluation eval = new Evaluation(trainingData); eval.evaluateModel(classifier, testData); System.out.println(eval.toSummaryString("\nResults:\n", false)); }