Example usage for weka.filters.unsupervised.attribute Standardize setInputFormat

List of usage examples for weka.filters.unsupervised.attribute Standardize setInputFormat

Introduction

In this page you can find the example usage for weka.filters.unsupervised.attribute Standardize setInputFormat.

Prototype

public boolean setInputFormat(Instances instanceInfo) throws Exception 

Source Link

Document

Sets the format of the input instances.

Usage

From source file:PCADetector.java

License:Apache License

public boolean runPCA(ArrayList<Double> newData, int slidewdSz, double cAlpha, int nAttrs) {
    try {/* w w w. j  ava 2  s.  com*/
        if (m_nDims == 0) {
            m_nDims = nAttrs;
            for (int i = 0; i < this.m_nDims; i++) {
                m_oriDataMatrix.add(new ArrayList<Double>()); // one list for each attribute
            }
        }
        verifyData(newData);
        this.c_alpha = cAlpha;
        if (false == prepareData(newData, slidewdSz))
            return false;
        Instances oriDataInsts = getInstances();
        if (oriDataInsts != null) {
            // standardization + PCA covariance matrix
            m_scaledInstances = new Instances(oriDataInsts);
            Standardize filter = new Standardize();

            filter.setInputFormat(m_scaledInstances);
            m_scaledInstances = Standardize.useFilter(m_scaledInstances, filter); // standardization

            PrincipalComponents PCA = new PrincipalComponents();
            PCA.setVarianceCovered(1.0); // means 100%
            PCA.setMaximumAttributeNames(-1);
            PCA.setCenterData(true);
            Ranker ranker = new Ranker();
            AttributeSelection selector = new AttributeSelection();
            selector.setSearch(ranker);
            selector.setEvaluator(PCA);
            selector.SelectAttributes(m_scaledInstances);
            //                Instances transformedData = selector.reduceDimensionality(m_scaledInstances);

            // get sorted eigens
            double[] eigenValues = PCA.getEigenValues();
            // eigenVectors[i][j]  i: rows; j: cols
            double[][] eigenVectors = PCA.getUnsortedEigenVectors();
            Sort(eigenValues, eigenVectors);
            setEigens(eigenValues);

            // get residual start dimension
            int residualStartDimension = -1;
            double sum = 0;
            double major = 0;
            for (int ss = 0; ss < eigenValues.length; ss++) {
                sum += eigenValues[ss];
            }
            for (int ss = 0; ss < eigenValues.length; ss++) {
                major += eigenValues[ss];
                if ((residualStartDimension < 0) && (major / sum > 0.95)) {
                    residualStartDimension = ss + 1;
                    break;
                }
            }
            //            System.out.println("residualStartDim: "+residualStartDimension);
            m_threshold = computeThreshold(eigenValues, residualStartDimension);

            // check new data abnormal or not
            boolean bAbnormal = checkSPE(eigenVectors, residualStartDimension, newData);
            computeProjPCs(eigenVectors, residualStartDimension, newData); // only for demo

            if (bAbnormal) { // anomaly, now to diagnosis
                // check original space using all the lists
                diagnosis(eigenVectors, residualStartDimension, newData);
            }

        }

    } catch (Exception exc) {
    }
    return true;
}

From source file:assign00.ExperimentShell.java

/**
 * @param args the command line arguments
 *///from www . j  a v a2  s .c om
public static void main(String[] args) throws Exception {
    DataSource source = new DataSource(file);
    Instances dataSet = source.getDataSet();

    //Set up data
    dataSet.setClassIndex(dataSet.numAttributes() - 1);
    dataSet.randomize(new Random(1));

    //determine sizes
    int trainingSize = (int) Math.round(dataSet.numInstances() * .7);
    int testSize = dataSet.numInstances() - trainingSize;

    Instances training = new Instances(dataSet, 0, trainingSize);

    Instances test = new Instances(dataSet, trainingSize, testSize);

    Standardize standardizedData = new Standardize();
    standardizedData.setInputFormat(training);

    Instances newTest = Filter.useFilter(test, standardizedData);
    Instances newTraining = Filter.useFilter(training, standardizedData);

    NeuralNetworkClassifier NWC = new NeuralNetworkClassifier();
    NWC.buildClassifier(newTraining);

    Evaluation eval = new Evaluation(newTraining);
    eval.evaluateModel(NWC, newTest);

    System.out.println(eval.toSummaryString("\nResults\n======\n", false));
}

From source file:com.mycompany.id3classifier.ID3Shell.java

public static void main(String[] args) throws Exception {
    ConverterUtils.DataSource source = new ConverterUtils.DataSource("lensesData.csv");
    Instances dataSet = source.getDataSet();

    Discretize filter = new Discretize();
    filter.setInputFormat(dataSet);/* w w w .  ja  v  a  2s  . co  m*/
    dataSet = Filter.useFilter(dataSet, filter);

    Standardize standardize = new Standardize();
    standardize.setInputFormat(dataSet);
    dataSet = Filter.useFilter(dataSet, standardize);

    dataSet.setClassIndex(dataSet.numAttributes() - 1);
    dataSet.randomize(new Random(9001)); //It's over 9000!!

    int folds = 10;
    //Perform crossvalidation
    Evaluation eval = new Evaluation(dataSet);
    for (int n = 0; n < folds; n++) {
        int trainingSize = (int) Math.round(dataSet.numInstances() * .7);
        int testSize = dataSet.numInstances() - trainingSize;

        Instances trainingData = dataSet.trainCV(folds, n);
        Instances testData = dataSet.testCV(folds, n);

        ID3Classifier classifier = new ID3Classifier();
        // Id3 classifier = new Id3();
        classifier.buildClassifier(trainingData);

        eval.evaluateModel(classifier, testData);
    }
    System.out.println(eval.toSummaryString("\nResults:\n", false));
}

From source file:com.mycompany.knnclassifier.kNNShell.java

public static void main(String[] args) throws Exception {
    ConverterUtils.DataSource source = new ConverterUtils.DataSource("carData.csv");
    Instances dataSet = source.getDataSet();

    Standardize standardize = new Standardize();
    standardize.setInputFormat(dataSet);
    dataSet = Filter.useFilter(dataSet, standardize);

    dataSet.setClassIndex(dataSet.numAttributes() - 1);
    dataSet.randomize(new Random(9001)); //It's over 9000!!

    int trainingSize = (int) Math.round(dataSet.numInstances() * .7);
    int testSize = dataSet.numInstances() - trainingSize;

    Instances trainingData = new Instances(dataSet, 0, trainingSize);
    Instances testData = new Instances(dataSet, trainingSize, testSize);

    kNNClassifier classifier = new kNNClassifier(3);
    classifier.buildClassifier(trainingData);

    //Used to compare to Weka's built in KNN algorithm
    //Classifier classifier = new IBk(1);
    //classifier.buildClassifier(trainingData);

    Evaluation eval = new Evaluation(trainingData);
    eval.evaluateModel(classifier, testData);

    System.out.println(eval.toSummaryString("\nResults:\n", false));
}

From source file:com.mycompany.neuralnetwork.NeuralNetworkShell.java

public static void main(String[] args) throws Exception {
    ConverterUtils.DataSource source = new ConverterUtils.DataSource("irisData.csv");
    Instances dataSet = source.getDataSet();

    Standardize standardize = new Standardize();
    standardize.setInputFormat(dataSet);
    dataSet = Filter.useFilter(dataSet, standardize);
    dataSet.setClassIndex(dataSet.numAttributes() - 1);
    dataSet.randomize(new Random(9001)); //It's over 9000!!

    int trainingSize = (int) Math.round(dataSet.numInstances() * .7);
    int testSize = dataSet.numInstances() - trainingSize;

    Instances trainingData = new Instances(dataSet, 0, trainingSize);
    Instances testData = new Instances(dataSet, trainingSize, testSize);

    //MultilayerPerceptron classifier = new MultilayerPerceptron();
    NeuralNetworkClassifier classifier = new NeuralNetworkClassifier(3, 20000, 0.1);
    classifier.buildClassifier(trainingData);

    Evaluation eval = new Evaluation(trainingData);
    eval.evaluateModel(classifier, testData);

    System.out.println(eval.toSummaryString("\nResults:\n", false));
}

From source file:edu.utexas.cs.tactex.utils.RegressionUtils.java

License:Open Source License

public static WekaLinRegData createWekaLinRegData(int timeslot, Instances X, Double[] yvals,
        ArrayList<Double> candidateLambdas) throws Exception {
    WekaLinRegData result;// w w w  .j  a  va2  s  . com

    // normalize
    Standardize standardize = new Standardize();
    try {
        standardize.setInputFormat(X);
    } catch (Exception e) {
        log.error("PolyRegCust.predictNumSubs() data standardizing exception", e);
        throw e;
    }
    Instances nrmFeatures = RegressionUtils.featureNormalize(X, standardize);
    log.info("normalized features " + nrmFeatures);

    // add y to X since this is what weka expects
    Instances Xy = RegressionUtils.addYforWeka(nrmFeatures, yvals);

    // run cross validation for lambda
    Double bestLambda = findBestRegularizationParameter(Xy, candidateLambdas);
    if (null == bestLambda) {
        String message = "best regularization parameter is null, cannot predict";
        log.error(message);
        throw new Exception(message);
    }

    // run linear regression
    LinearRegression linearRegression = RegressionUtils.createLinearRegression();
    linearRegression.setRidge(bestLambda);
    try {
        linearRegression.buildClassifier(Xy);
        log.info("theta " + Arrays.toString(linearRegression.coefficients()));
    } catch (Exception e) {
        log.error("PolyRegCust.predictNumSubs() buildClassifier exception", e);
        throw e;
    }

    result = new WekaLinRegData(standardize, linearRegression, timeslot);
    return result;
}

From source file:expshell.ExpShell.java

/**
 * @param args the command line arguments
 * @throws java.lang.Exception//from  ww  w . ja va2  s . c o m
 */
public static void main(String[] args) throws Exception {
    String file = "C:\\Users\\YH Jonathan Kwok\\Documents\\NetBeansProjects\\ExpShell\\src\\expshell\\iris.csv";

    DataSource source = new DataSource(file);
    Instances data = source.getDataSet();

    if (data.classIndex() == -1)
        data.setClassIndex(data.numAttributes() - 1);

    //Randomize it
    data.randomize(new Random(1));

    RemovePercentage rp = new RemovePercentage();
    rp.setPercentage(70);

    rp.setInputFormat(data);
    Instances training = Filter.useFilter(data, rp);

    rp.setInvertSelection(true);
    rp.setInputFormat(data);
    Instances test = Filter.useFilter(data, rp);

    //standardize the data
    Standardize filter = new Standardize();
    filter.setInputFormat(training);

    Instances newTest = Filter.useFilter(test, filter);
    Instances newTraining = Filter.useFilter(training, filter);

    //Part 5 - Now it's a knn
    Classifier knn = new NeuralClassifier();
    knn.buildClassifier(newTraining);
    Evaluation eval = new Evaluation(newTraining);
    eval.evaluateModel(knn, newTest);

    System.out.println(eval.toSummaryString("***** Overall results: *****", false));

}

From source file:id3classifier.Main.java

public static void main(String[] args) throws Exception {

    ConverterUtils.DataSource source = new ConverterUtils.DataSource(file);
    Instances dataSet = source.getDataSet();

    // discretize the dataset
    Discretize filter = new Discretize();
    filter.setInputFormat(dataSet);//from w  w w .ja  v a 2  s  .  co m
    dataSet = Filter.useFilter(dataSet, filter);

    // standardize the dataset
    Standardize standardizedData = new Standardize();
    standardizedData.setInputFormat(dataSet);
    dataSet = Filter.useFilter(dataSet, standardizedData);

    // randomize the dataset
    dataSet.setClassIndex(dataSet.numAttributes() - 1);
    dataSet.randomize(new Debug.Random());

    // get the sizes of the training and testing sets and split
    int trainingSize = (int) Math.round(dataSet.numInstances() * .7);
    int testSize = dataSet.numInstances() - trainingSize;
    Instances training = new Instances(dataSet, 0, trainingSize);
    Instances test = new Instances(dataSet, trainingSize, testSize);

    // set up the ID3 classifier on the training data
    ID3Classifiers classifier = new ID3Classifiers();
    classifier.buildClassifier(training);

    // set up the evaluation and test using the classifier and test set
    Evaluation eval = new Evaluation(dataSet);
    eval.evaluateModel(classifier, test);

    // outup and kill, important to exit here to stop javaFX
    System.out.println(eval.toSummaryString("\nResults\n======\n", false));
    System.exit(0);
}

From source file:knnclassifier.Main.java

public static void main(String[] args) throws Exception {

    DataSource source = new DataSource(file);
    Instances dataSet = source.getDataSet();

    //Set up data
    dataSet.setClassIndex(dataSet.numAttributes() - 1);
    dataSet.randomize(new Random());

    int trainingSize = (int) Math.round(dataSet.numInstances() * .7);
    int testSize = dataSet.numInstances() - trainingSize;

    Instances training = new Instances(dataSet, 0, trainingSize);

    Instances test = new Instances(dataSet, trainingSize, testSize);

    Standardize standardizedData = new Standardize();
    standardizedData.setInputFormat(training);

    Instances newTest = Filter.useFilter(test, standardizedData);
    Instances newTraining = Filter.useFilter(training, standardizedData);

    KNNClassifier knn = new KNNClassifier();
    knn.buildClassifier(newTraining);//  w  w  w .j  a v a  2 s  .  com

    Evaluation eval = new Evaluation(newTraining);
    eval.evaluateModel(knn, newTest);

    System.out.println(eval.toSummaryString("\nResults\n======\n", false));
}

From source file:neuralnetwork.NeuralNetwork.java

/**
 * @param args the command line arguments
 * @throws java.lang.Exception//from w ww .ja va2  s  .c  o m
 */
public static void main(String[] args) throws Exception {

    ConverterUtils.DataSource source;
    source = new ConverterUtils.DataSource("C:\\Users\\Harvey\\Documents\\iris.csv");
    Instances data = source.getDataSet();

    if (data.classIndex() == -1) {
        data.setClassIndex(data.numAttributes() - 1);
    }

    data.randomize(new Debug.Random(1));

    RemovePercentage trainFilter = new RemovePercentage();
    trainFilter.setPercentage(70);
    trainFilter.setInputFormat(data);
    Instances train = Filter.useFilter(data, trainFilter);

    trainFilter.setInvertSelection(true);
    trainFilter.setInputFormat(data);
    Instances test = Filter.useFilter(data, trainFilter);

    Standardize filter = new Standardize();
    filter.setInputFormat(train);

    Instances newTrain = Filter.useFilter(test, filter);
    Instances newTest = Filter.useFilter(train, filter);

    Classifier nNet = new NeuralNet();
    nNet.buildClassifier(newTrain);
    Evaluation eval = new Evaluation(newTest);
    eval.evaluateModel(nNet, newTest);
    System.out.println(eval.toSummaryString("\nResults\n-------------\n", false));
}