Example usage for weka.filters.unsupervised.attribute Standardize Standardize

List of usage examples for weka.filters.unsupervised.attribute Standardize Standardize

Introduction

In this page you can find the example usage for weka.filters.unsupervised.attribute Standardize Standardize.

Prototype

Standardize

Source Link

Usage

From source file:PrincipalComponents.java

License:Open Source License

private void fillCovariance() throws Exception {
    // first store the means
    m_means = new double[m_trainInstances.numAttributes()];
    m_stdDevs = new double[m_trainInstances.numAttributes()];
    for (int i = 0; i < m_trainInstances.numAttributes(); i++) {
        m_means[i] = m_trainInstances.meanOrMode(i);
        m_stdDevs[i] = Math.sqrt(Utils.variance(m_trainInstances.attributeToDoubleArray(i)));
    }/*w ww  .j  a  va 2  s  .  c  om*/

    // just center the data or standardize it?
    if (m_center) {
        m_centerFilter = new Center();
        m_centerFilter.setInputFormat(m_trainInstances);
        m_trainInstances = Filter.useFilter(m_trainInstances, m_centerFilter);
    } else {
        m_standardizeFilter = new Standardize();
        m_standardizeFilter.setInputFormat(m_trainInstances);
        m_trainInstances = Filter.useFilter(m_trainInstances, m_standardizeFilter);
    }

    // now compute the covariance matrix
    m_correlation = new UpperSymmDenseMatrix(m_numAttribs);
    for (int i = 0; i < m_numAttribs; i++) {
        for (int j = i; j < m_numAttribs; j++) {

            double cov = 0;
            for (Instance inst : m_trainInstances) {
                cov += inst.value(i) * inst.value(j);
            }

            cov /= m_trainInstances.numInstances() - 1;
            m_correlation.set(i, j, cov);
        }
    }
}

From source file:SMO.java

License:Open Source License

/**
 * Method for building the classifier. Implements a one-against-one
 * wrapper for multi-class problems./*from ww  w.ja v a2 s.c  om*/
 *
 * @param insts the set of training instances
 * @throws Exception if the classifier can't be built successfully
 */
public void buildClassifier(Instances insts) throws Exception {

    if (!m_checksTurnedOff) {
        // can classifier handle the data?
        getCapabilities().testWithFail(insts);

        // remove instances with missing class
        insts = new Instances(insts);
        insts.deleteWithMissingClass();

        /* Removes all the instances with weight equal to 0.
         MUST be done since condition (8) of Keerthi's paper 
         is made with the assertion Ci > 0 (See equation (3a). */
        Instances data = new Instances(insts, insts.numInstances());
        for (int i = 0; i < insts.numInstances(); i++) {
            if (insts.instance(i).weight() > 0)
                data.add(insts.instance(i));
        }
        if (data.numInstances() == 0) {
            throw new Exception("No training instances left after removing " + "instances with weight 0!");
        }
        insts = data;
    }

    if (!m_checksTurnedOff) {
        m_Missing = new ReplaceMissingValues();
        m_Missing.setInputFormat(insts);
        insts = Filter.useFilter(insts, m_Missing);
    } else {
        m_Missing = null;
    }

    if (getCapabilities().handles(Capability.NUMERIC_ATTRIBUTES)) {
        boolean onlyNumeric = true;
        if (!m_checksTurnedOff) {
            for (int i = 0; i < insts.numAttributes(); i++) {
                if (i != insts.classIndex()) {
                    if (!insts.attribute(i).isNumeric()) {
                        onlyNumeric = false;
                        break;
                    }
                }
            }
        }

        if (!onlyNumeric) {
            m_NominalToBinary = new NominalToBinary();
            m_NominalToBinary.setInputFormat(insts);
            insts = Filter.useFilter(insts, m_NominalToBinary);
        } else {
            m_NominalToBinary = null;
        }
    } else {
        m_NominalToBinary = null;
    }

    if (m_filterType == FILTER_STANDARDIZE) {
        m_Filter = new Standardize();
        m_Filter.setInputFormat(insts);
        insts = Filter.useFilter(insts, m_Filter);
    } else if (m_filterType == FILTER_NORMALIZE) {
        m_Filter = new Normalize();
        m_Filter.setInputFormat(insts);
        insts = Filter.useFilter(insts, m_Filter);
    } else {
        m_Filter = null;
    }

    m_classIndex = insts.classIndex();
    m_classAttribute = insts.classAttribute();
    m_KernelIsLinear = (m_kernel instanceof PolyKernel) && (((PolyKernel) m_kernel).getExponent() == 1.0);

    // Generate subsets representing each class
    Instances[] subsets = new Instances[insts.numClasses()];
    for (int i = 0; i < insts.numClasses(); i++) {
        subsets[i] = new Instances(insts, insts.numInstances());
    }
    for (int j = 0; j < insts.numInstances(); j++) {
        Instance inst = insts.instance(j);
        subsets[(int) inst.classValue()].add(inst);
    }
    for (int i = 0; i < insts.numClasses(); i++) {
        subsets[i].compactify();
    }

    // Build the binary classifiers
    Random rand = new Random(m_randomSeed);
    m_classifiers = new BinarySMO[insts.numClasses()][insts.numClasses()];
    for (int i = 0; i < insts.numClasses(); i++) {
        for (int j = i + 1; j < insts.numClasses(); j++) {
            m_classifiers[i][j] = new BinarySMO();
            m_classifiers[i][j].setKernel(Kernel.makeCopy(getKernel()));
            Instances data = new Instances(insts, insts.numInstances());
            for (int k = 0; k < subsets[i].numInstances(); k++) {
                data.add(subsets[i].instance(k));
            }
            for (int k = 0; k < subsets[j].numInstances(); k++) {
                data.add(subsets[j].instance(k));
            }
            data.compactify();
            data.randomize(rand);
            m_classifiers[i][j].buildClassifier(data, i, j, m_fitLogisticModels, m_numFolds, m_randomSeed);
        }
    }
}

From source file:PCADetector.java

License:Apache License

public boolean runPCA(ArrayList<Double> newData, int slidewdSz, double cAlpha, int nAttrs) {
    try {//from  w w w  . j a  v a 2  s . co m
        if (m_nDims == 0) {
            m_nDims = nAttrs;
            for (int i = 0; i < this.m_nDims; i++) {
                m_oriDataMatrix.add(new ArrayList<Double>()); // one list for each attribute
            }
        }
        verifyData(newData);
        this.c_alpha = cAlpha;
        if (false == prepareData(newData, slidewdSz))
            return false;
        Instances oriDataInsts = getInstances();
        if (oriDataInsts != null) {
            // standardization + PCA covariance matrix
            m_scaledInstances = new Instances(oriDataInsts);
            Standardize filter = new Standardize();

            filter.setInputFormat(m_scaledInstances);
            m_scaledInstances = Standardize.useFilter(m_scaledInstances, filter); // standardization

            PrincipalComponents PCA = new PrincipalComponents();
            PCA.setVarianceCovered(1.0); // means 100%
            PCA.setMaximumAttributeNames(-1);
            PCA.setCenterData(true);
            Ranker ranker = new Ranker();
            AttributeSelection selector = new AttributeSelection();
            selector.setSearch(ranker);
            selector.setEvaluator(PCA);
            selector.SelectAttributes(m_scaledInstances);
            //                Instances transformedData = selector.reduceDimensionality(m_scaledInstances);

            // get sorted eigens
            double[] eigenValues = PCA.getEigenValues();
            // eigenVectors[i][j]  i: rows; j: cols
            double[][] eigenVectors = PCA.getUnsortedEigenVectors();
            Sort(eigenValues, eigenVectors);
            setEigens(eigenValues);

            // get residual start dimension
            int residualStartDimension = -1;
            double sum = 0;
            double major = 0;
            for (int ss = 0; ss < eigenValues.length; ss++) {
                sum += eigenValues[ss];
            }
            for (int ss = 0; ss < eigenValues.length; ss++) {
                major += eigenValues[ss];
                if ((residualStartDimension < 0) && (major / sum > 0.95)) {
                    residualStartDimension = ss + 1;
                    break;
                }
            }
            //            System.out.println("residualStartDim: "+residualStartDimension);
            m_threshold = computeThreshold(eigenValues, residualStartDimension);

            // check new data abnormal or not
            boolean bAbnormal = checkSPE(eigenVectors, residualStartDimension, newData);
            computeProjPCs(eigenVectors, residualStartDimension, newData); // only for demo

            if (bAbnormal) { // anomaly, now to diagnosis
                // check original space using all the lists
                diagnosis(eigenVectors, residualStartDimension, newData);
            }

        }

    } catch (Exception exc) {
    }
    return true;
}

From source file:adams.data.instancesanalysis.pls.AbstractMultiClassPLS.java

License:Open Source License

/**
 * Preprocesses the data.//from   w w w.j av  a 2 s  . c  o m
 *
 * @param instances the data to process
 * @return the preprocessed data
 */
protected Instances preTransform(Instances instances, Map<String, Object> params) throws Exception {
    Map<Integer, double[]> classValues;
    int i;
    int index;

    switch (m_PredictionType) {
    case ALL:
        classValues = null;
        break;
    default:
        classValues = new HashMap<>();
        for (i = 0; i < m_ClassAttributeIndices.size(); i++) {
            index = m_ClassAttributeIndices.get(i);
            classValues.put(index, instances.attributeToDoubleArray(index));
        }
    }

    if (classValues != null)
        params.put(PARAM_CLASSVALUES, classValues);

    if (!isInitialized()) {
        if (m_ReplaceMissing) {
            m_Missing = new ReplaceMissingValues();
            m_Missing.setInputFormat(instances);
        } else {
            m_Missing = null;
        }

        m_ClassMean = new HashMap<>();
        m_ClassStdDev = new HashMap<>();
        for (i = 0; i < m_ClassAttributeIndices.size(); i++) {
            index = m_ClassAttributeIndices.get(i);
            switch (m_PreprocessingType) {
            case CENTER:
                m_ClassMean.put(index, instances.meanOrMode(index));
                m_ClassStdDev.put(index, 1.0);
                m_Filter = new Center();
                ((Center) m_Filter).setIgnoreClass(true);
                break;
            case STANDARDIZE:
                m_ClassMean.put(index, instances.meanOrMode(index));
                m_ClassStdDev.put(index, StrictMath.sqrt(instances.variance(index)));
                m_Filter = new Standardize();
                ((Standardize) m_Filter).setIgnoreClass(true);
                break;
            case NONE:
                m_ClassMean.put(index, 0.0);
                m_ClassStdDev.put(index, 1.0);
                m_Filter = null;
                break;
            default:
                throw new IllegalStateException("Unhandled preprocessing type; " + m_PreprocessingType);
            }
        }
        if (m_Filter != null)
            m_Filter.setInputFormat(instances);
    }

    // filter data
    if (m_Missing != null)
        instances = Filter.useFilter(instances, m_Missing);
    if (m_Filter != null)
        instances = Filter.useFilter(instances, m_Filter);

    return instances;
}

From source file:adams.data.instancesanalysis.pls.AbstractSingleClassPLS.java

License:Open Source License

/**
 * Preprocesses the data.// ww w.  j a  v  a  2 s.  c  o m
 *
 * @param instances the data to process
 * @return the preprocessed data
 */
protected Instances preTransform(Instances instances, Map<String, Object> params) throws Exception {
    double[] classValues;

    switch (m_PredictionType) {
    case ALL:
        classValues = null;
        break;
    default:
        classValues = instances.attributeToDoubleArray(instances.classIndex());
    }

    if (classValues != null)
        params.put(PARAM_CLASSVALUES, classValues);

    if (!isInitialized()) {
        if (m_ReplaceMissing) {
            m_Missing = new ReplaceMissingValues();
            m_Missing.setInputFormat(instances);
        } else {
            m_Missing = null;
        }

        switch (m_PreprocessingType) {
        case CENTER:
            m_ClassMean = instances.meanOrMode(instances.classIndex());
            m_ClassStdDev = 1;
            m_Filter = new Center();
            ((Center) m_Filter).setIgnoreClass(true);
            break;
        case STANDARDIZE:
            m_ClassMean = instances.meanOrMode(instances.classIndex());
            m_ClassStdDev = StrictMath.sqrt(instances.variance(instances.classIndex()));
            m_Filter = new Standardize();
            ((Standardize) m_Filter).setIgnoreClass(true);
            break;
        case NONE:
            m_ClassMean = 0;
            m_ClassStdDev = 1;
            m_Filter = null;
            break;
        default:
            throw new IllegalStateException("Unhandled preprocessing type; " + m_PreprocessingType);
        }
        if (m_Filter != null)
            m_Filter.setInputFormat(instances);
    }

    // filter data
    if (m_Missing != null)
        instances = Filter.useFilter(instances, m_Missing);
    if (m_Filter != null)
        instances = Filter.useFilter(instances, m_Filter);

    return instances;
}

From source file:assign00.ExperimentShell.java

/**
 * @param args the command line arguments
 *//* w  w  w . jav a2  s .com*/
public static void main(String[] args) throws Exception {
    DataSource source = new DataSource(file);
    Instances dataSet = source.getDataSet();

    //Set up data
    dataSet.setClassIndex(dataSet.numAttributes() - 1);
    dataSet.randomize(new Random(1));

    //determine sizes
    int trainingSize = (int) Math.round(dataSet.numInstances() * .7);
    int testSize = dataSet.numInstances() - trainingSize;

    Instances training = new Instances(dataSet, 0, trainingSize);

    Instances test = new Instances(dataSet, trainingSize, testSize);

    Standardize standardizedData = new Standardize();
    standardizedData.setInputFormat(training);

    Instances newTest = Filter.useFilter(test, standardizedData);
    Instances newTraining = Filter.useFilter(training, standardizedData);

    NeuralNetworkClassifier NWC = new NeuralNetworkClassifier();
    NWC.buildClassifier(newTraining);

    Evaluation eval = new Evaluation(newTraining);
    eval.evaluateModel(NWC, newTest);

    System.out.println(eval.toSummaryString("\nResults\n======\n", false));
}

From source file:br.com.ufu.lsi.rebfnetwork.RBFNetwork.java

License:Open Source License

/**
 * Builds the classifier// ww  w. j a v a 2s . co m
 *
 * @param instances the training data
 * @throws Exception if the classifier could not be built successfully
 */
public void buildClassifier(Instances instances) throws Exception {

    // can classifier handle the data?
    getCapabilities().testWithFail(instances);

    // remove instances with missing class
    instances = new Instances(instances);
    instances.deleteWithMissingClass();

    // only class? -> build ZeroR model
    if (instances.numAttributes() == 1) {
        System.err.println(
                "Cannot build model (only class attribute present in data!), " + "using ZeroR model instead!");
        m_ZeroR = new weka.classifiers.rules.ZeroR();
        m_ZeroR.buildClassifier(instances);
        return;
    } else {
        m_ZeroR = null;
    }

    m_standardize = new Standardize();
    m_standardize.setInputFormat(instances);
    instances = Filter.useFilter(instances, m_standardize);

    SimpleKMeans sk = new SimpleKMeans();
    sk.setNumClusters(m_numClusters);
    sk.setSeed(m_clusteringSeed);
    MakeDensityBasedClusterer dc = new MakeDensityBasedClusterer();
    dc.setClusterer(sk);
    dc.setMinStdDev(m_minStdDev);
    m_basisFilter = new ClusterMembership();
    m_basisFilter.setDensityBasedClusterer(dc);
    m_basisFilter.setInputFormat(instances);
    Instances transformed = Filter.useFilter(instances, m_basisFilter);

    if (instances.classAttribute().isNominal()) {
        m_linear = null;
        m_logistic = new Logistic();
        m_logistic.setRidge(m_ridge);
        m_logistic.setMaxIts(m_maxIts);
        m_logistic.buildClassifier(transformed);
    } else {
        m_logistic = null;
        m_linear = new LinearRegression();
        m_linear.setAttributeSelectionMethod(
                new SelectedTag(LinearRegression.SELECTION_NONE, LinearRegression.TAGS_SELECTION));
        m_linear.setRidge(m_ridge);
        m_linear.buildClassifier(transformed);
    }
}

From source file:com.mycompany.id3classifier.ID3Shell.java

public static void main(String[] args) throws Exception {
    ConverterUtils.DataSource source = new ConverterUtils.DataSource("lensesData.csv");
    Instances dataSet = source.getDataSet();

    Discretize filter = new Discretize();
    filter.setInputFormat(dataSet);//from w w w  .  j  a  v  a  2  s.c  om
    dataSet = Filter.useFilter(dataSet, filter);

    Standardize standardize = new Standardize();
    standardize.setInputFormat(dataSet);
    dataSet = Filter.useFilter(dataSet, standardize);

    dataSet.setClassIndex(dataSet.numAttributes() - 1);
    dataSet.randomize(new Random(9001)); //It's over 9000!!

    int folds = 10;
    //Perform crossvalidation
    Evaluation eval = new Evaluation(dataSet);
    for (int n = 0; n < folds; n++) {
        int trainingSize = (int) Math.round(dataSet.numInstances() * .7);
        int testSize = dataSet.numInstances() - trainingSize;

        Instances trainingData = dataSet.trainCV(folds, n);
        Instances testData = dataSet.testCV(folds, n);

        ID3Classifier classifier = new ID3Classifier();
        // Id3 classifier = new Id3();
        classifier.buildClassifier(trainingData);

        eval.evaluateModel(classifier, testData);
    }
    System.out.println(eval.toSummaryString("\nResults:\n", false));
}

From source file:com.mycompany.knnclassifier.kNNShell.java

public static void main(String[] args) throws Exception {
    ConverterUtils.DataSource source = new ConverterUtils.DataSource("carData.csv");
    Instances dataSet = source.getDataSet();

    Standardize standardize = new Standardize();
    standardize.setInputFormat(dataSet);
    dataSet = Filter.useFilter(dataSet, standardize);

    dataSet.setClassIndex(dataSet.numAttributes() - 1);
    dataSet.randomize(new Random(9001)); //It's over 9000!!

    int trainingSize = (int) Math.round(dataSet.numInstances() * .7);
    int testSize = dataSet.numInstances() - trainingSize;

    Instances trainingData = new Instances(dataSet, 0, trainingSize);
    Instances testData = new Instances(dataSet, trainingSize, testSize);

    kNNClassifier classifier = new kNNClassifier(3);
    classifier.buildClassifier(trainingData);

    //Used to compare to Weka's built in KNN algorithm
    //Classifier classifier = new IBk(1);
    //classifier.buildClassifier(trainingData);

    Evaluation eval = new Evaluation(trainingData);
    eval.evaluateModel(classifier, testData);

    System.out.println(eval.toSummaryString("\nResults:\n", false));
}

From source file:com.mycompany.neuralnetwork.NeuralNetworkShell.java

public static void main(String[] args) throws Exception {
    ConverterUtils.DataSource source = new ConverterUtils.DataSource("irisData.csv");
    Instances dataSet = source.getDataSet();

    Standardize standardize = new Standardize();
    standardize.setInputFormat(dataSet);
    dataSet = Filter.useFilter(dataSet, standardize);
    dataSet.setClassIndex(dataSet.numAttributes() - 1);
    dataSet.randomize(new Random(9001)); //It's over 9000!!

    int trainingSize = (int) Math.round(dataSet.numInstances() * .7);
    int testSize = dataSet.numInstances() - trainingSize;

    Instances trainingData = new Instances(dataSet, 0, trainingSize);
    Instances testData = new Instances(dataSet, trainingSize, testSize);

    //MultilayerPerceptron classifier = new MultilayerPerceptron();
    NeuralNetworkClassifier classifier = new NeuralNetworkClassifier(3, 20000, 0.1);
    classifier.buildClassifier(trainingData);

    Evaluation eval = new Evaluation(trainingData);
    eval.evaluateModel(classifier, testData);

    System.out.println(eval.toSummaryString("\nResults:\n", false));
}