List of usage examples for weka.filters.unsupervised.attribute Standardize setInputFormat
public boolean setInputFormat(Instances instanceInfo) throws Exception
From source file:PCADetector.java
License:Apache License
public boolean runPCA(ArrayList<Double> newData, int slidewdSz, double cAlpha, int nAttrs) { try {/* w w w. j ava 2 s. com*/ if (m_nDims == 0) { m_nDims = nAttrs; for (int i = 0; i < this.m_nDims; i++) { m_oriDataMatrix.add(new ArrayList<Double>()); // one list for each attribute } } verifyData(newData); this.c_alpha = cAlpha; if (false == prepareData(newData, slidewdSz)) return false; Instances oriDataInsts = getInstances(); if (oriDataInsts != null) { // standardization + PCA covariance matrix m_scaledInstances = new Instances(oriDataInsts); Standardize filter = new Standardize(); filter.setInputFormat(m_scaledInstances); m_scaledInstances = Standardize.useFilter(m_scaledInstances, filter); // standardization PrincipalComponents PCA = new PrincipalComponents(); PCA.setVarianceCovered(1.0); // means 100% PCA.setMaximumAttributeNames(-1); PCA.setCenterData(true); Ranker ranker = new Ranker(); AttributeSelection selector = new AttributeSelection(); selector.setSearch(ranker); selector.setEvaluator(PCA); selector.SelectAttributes(m_scaledInstances); // Instances transformedData = selector.reduceDimensionality(m_scaledInstances); // get sorted eigens double[] eigenValues = PCA.getEigenValues(); // eigenVectors[i][j] i: rows; j: cols double[][] eigenVectors = PCA.getUnsortedEigenVectors(); Sort(eigenValues, eigenVectors); setEigens(eigenValues); // get residual start dimension int residualStartDimension = -1; double sum = 0; double major = 0; for (int ss = 0; ss < eigenValues.length; ss++) { sum += eigenValues[ss]; } for (int ss = 0; ss < eigenValues.length; ss++) { major += eigenValues[ss]; if ((residualStartDimension < 0) && (major / sum > 0.95)) { residualStartDimension = ss + 1; break; } } // System.out.println("residualStartDim: "+residualStartDimension); m_threshold = computeThreshold(eigenValues, residualStartDimension); // check new data abnormal or not boolean bAbnormal = checkSPE(eigenVectors, residualStartDimension, newData); computeProjPCs(eigenVectors, residualStartDimension, newData); // only for demo if (bAbnormal) { // anomaly, now to diagnosis // check original space using all the lists diagnosis(eigenVectors, residualStartDimension, newData); } } } catch (Exception exc) { } return true; }
From source file:assign00.ExperimentShell.java
/** * @param args the command line arguments *///from www . j a v a2 s .c om public static void main(String[] args) throws Exception { DataSource source = new DataSource(file); Instances dataSet = source.getDataSet(); //Set up data dataSet.setClassIndex(dataSet.numAttributes() - 1); dataSet.randomize(new Random(1)); //determine sizes int trainingSize = (int) Math.round(dataSet.numInstances() * .7); int testSize = dataSet.numInstances() - trainingSize; Instances training = new Instances(dataSet, 0, trainingSize); Instances test = new Instances(dataSet, trainingSize, testSize); Standardize standardizedData = new Standardize(); standardizedData.setInputFormat(training); Instances newTest = Filter.useFilter(test, standardizedData); Instances newTraining = Filter.useFilter(training, standardizedData); NeuralNetworkClassifier NWC = new NeuralNetworkClassifier(); NWC.buildClassifier(newTraining); Evaluation eval = new Evaluation(newTraining); eval.evaluateModel(NWC, newTest); System.out.println(eval.toSummaryString("\nResults\n======\n", false)); }
From source file:com.mycompany.id3classifier.ID3Shell.java
public static void main(String[] args) throws Exception { ConverterUtils.DataSource source = new ConverterUtils.DataSource("lensesData.csv"); Instances dataSet = source.getDataSet(); Discretize filter = new Discretize(); filter.setInputFormat(dataSet);/* w w w . ja v a 2s . co m*/ dataSet = Filter.useFilter(dataSet, filter); Standardize standardize = new Standardize(); standardize.setInputFormat(dataSet); dataSet = Filter.useFilter(dataSet, standardize); dataSet.setClassIndex(dataSet.numAttributes() - 1); dataSet.randomize(new Random(9001)); //It's over 9000!! int folds = 10; //Perform crossvalidation Evaluation eval = new Evaluation(dataSet); for (int n = 0; n < folds; n++) { int trainingSize = (int) Math.round(dataSet.numInstances() * .7); int testSize = dataSet.numInstances() - trainingSize; Instances trainingData = dataSet.trainCV(folds, n); Instances testData = dataSet.testCV(folds, n); ID3Classifier classifier = new ID3Classifier(); // Id3 classifier = new Id3(); classifier.buildClassifier(trainingData); eval.evaluateModel(classifier, testData); } System.out.println(eval.toSummaryString("\nResults:\n", false)); }
From source file:com.mycompany.knnclassifier.kNNShell.java
public static void main(String[] args) throws Exception { ConverterUtils.DataSource source = new ConverterUtils.DataSource("carData.csv"); Instances dataSet = source.getDataSet(); Standardize standardize = new Standardize(); standardize.setInputFormat(dataSet); dataSet = Filter.useFilter(dataSet, standardize); dataSet.setClassIndex(dataSet.numAttributes() - 1); dataSet.randomize(new Random(9001)); //It's over 9000!! int trainingSize = (int) Math.round(dataSet.numInstances() * .7); int testSize = dataSet.numInstances() - trainingSize; Instances trainingData = new Instances(dataSet, 0, trainingSize); Instances testData = new Instances(dataSet, trainingSize, testSize); kNNClassifier classifier = new kNNClassifier(3); classifier.buildClassifier(trainingData); //Used to compare to Weka's built in KNN algorithm //Classifier classifier = new IBk(1); //classifier.buildClassifier(trainingData); Evaluation eval = new Evaluation(trainingData); eval.evaluateModel(classifier, testData); System.out.println(eval.toSummaryString("\nResults:\n", false)); }
From source file:com.mycompany.neuralnetwork.NeuralNetworkShell.java
public static void main(String[] args) throws Exception { ConverterUtils.DataSource source = new ConverterUtils.DataSource("irisData.csv"); Instances dataSet = source.getDataSet(); Standardize standardize = new Standardize(); standardize.setInputFormat(dataSet); dataSet = Filter.useFilter(dataSet, standardize); dataSet.setClassIndex(dataSet.numAttributes() - 1); dataSet.randomize(new Random(9001)); //It's over 9000!! int trainingSize = (int) Math.round(dataSet.numInstances() * .7); int testSize = dataSet.numInstances() - trainingSize; Instances trainingData = new Instances(dataSet, 0, trainingSize); Instances testData = new Instances(dataSet, trainingSize, testSize); //MultilayerPerceptron classifier = new MultilayerPerceptron(); NeuralNetworkClassifier classifier = new NeuralNetworkClassifier(3, 20000, 0.1); classifier.buildClassifier(trainingData); Evaluation eval = new Evaluation(trainingData); eval.evaluateModel(classifier, testData); System.out.println(eval.toSummaryString("\nResults:\n", false)); }
From source file:edu.utexas.cs.tactex.utils.RegressionUtils.java
License:Open Source License
public static WekaLinRegData createWekaLinRegData(int timeslot, Instances X, Double[] yvals, ArrayList<Double> candidateLambdas) throws Exception { WekaLinRegData result;// w w w .j a va2 s . com // normalize Standardize standardize = new Standardize(); try { standardize.setInputFormat(X); } catch (Exception e) { log.error("PolyRegCust.predictNumSubs() data standardizing exception", e); throw e; } Instances nrmFeatures = RegressionUtils.featureNormalize(X, standardize); log.info("normalized features " + nrmFeatures); // add y to X since this is what weka expects Instances Xy = RegressionUtils.addYforWeka(nrmFeatures, yvals); // run cross validation for lambda Double bestLambda = findBestRegularizationParameter(Xy, candidateLambdas); if (null == bestLambda) { String message = "best regularization parameter is null, cannot predict"; log.error(message); throw new Exception(message); } // run linear regression LinearRegression linearRegression = RegressionUtils.createLinearRegression(); linearRegression.setRidge(bestLambda); try { linearRegression.buildClassifier(Xy); log.info("theta " + Arrays.toString(linearRegression.coefficients())); } catch (Exception e) { log.error("PolyRegCust.predictNumSubs() buildClassifier exception", e); throw e; } result = new WekaLinRegData(standardize, linearRegression, timeslot); return result; }
From source file:expshell.ExpShell.java
/** * @param args the command line arguments * @throws java.lang.Exception//from ww w . ja va2 s . c o m */ public static void main(String[] args) throws Exception { String file = "C:\\Users\\YH Jonathan Kwok\\Documents\\NetBeansProjects\\ExpShell\\src\\expshell\\iris.csv"; DataSource source = new DataSource(file); Instances data = source.getDataSet(); if (data.classIndex() == -1) data.setClassIndex(data.numAttributes() - 1); //Randomize it data.randomize(new Random(1)); RemovePercentage rp = new RemovePercentage(); rp.setPercentage(70); rp.setInputFormat(data); Instances training = Filter.useFilter(data, rp); rp.setInvertSelection(true); rp.setInputFormat(data); Instances test = Filter.useFilter(data, rp); //standardize the data Standardize filter = new Standardize(); filter.setInputFormat(training); Instances newTest = Filter.useFilter(test, filter); Instances newTraining = Filter.useFilter(training, filter); //Part 5 - Now it's a knn Classifier knn = new NeuralClassifier(); knn.buildClassifier(newTraining); Evaluation eval = new Evaluation(newTraining); eval.evaluateModel(knn, newTest); System.out.println(eval.toSummaryString("***** Overall results: *****", false)); }
From source file:id3classifier.Main.java
public static void main(String[] args) throws Exception { ConverterUtils.DataSource source = new ConverterUtils.DataSource(file); Instances dataSet = source.getDataSet(); // discretize the dataset Discretize filter = new Discretize(); filter.setInputFormat(dataSet);//from w w w .ja v a 2 s . co m dataSet = Filter.useFilter(dataSet, filter); // standardize the dataset Standardize standardizedData = new Standardize(); standardizedData.setInputFormat(dataSet); dataSet = Filter.useFilter(dataSet, standardizedData); // randomize the dataset dataSet.setClassIndex(dataSet.numAttributes() - 1); dataSet.randomize(new Debug.Random()); // get the sizes of the training and testing sets and split int trainingSize = (int) Math.round(dataSet.numInstances() * .7); int testSize = dataSet.numInstances() - trainingSize; Instances training = new Instances(dataSet, 0, trainingSize); Instances test = new Instances(dataSet, trainingSize, testSize); // set up the ID3 classifier on the training data ID3Classifiers classifier = new ID3Classifiers(); classifier.buildClassifier(training); // set up the evaluation and test using the classifier and test set Evaluation eval = new Evaluation(dataSet); eval.evaluateModel(classifier, test); // outup and kill, important to exit here to stop javaFX System.out.println(eval.toSummaryString("\nResults\n======\n", false)); System.exit(0); }
From source file:knnclassifier.Main.java
public static void main(String[] args) throws Exception { DataSource source = new DataSource(file); Instances dataSet = source.getDataSet(); //Set up data dataSet.setClassIndex(dataSet.numAttributes() - 1); dataSet.randomize(new Random()); int trainingSize = (int) Math.round(dataSet.numInstances() * .7); int testSize = dataSet.numInstances() - trainingSize; Instances training = new Instances(dataSet, 0, trainingSize); Instances test = new Instances(dataSet, trainingSize, testSize); Standardize standardizedData = new Standardize(); standardizedData.setInputFormat(training); Instances newTest = Filter.useFilter(test, standardizedData); Instances newTraining = Filter.useFilter(training, standardizedData); KNNClassifier knn = new KNNClassifier(); knn.buildClassifier(newTraining);// w w w .j a v a 2 s . com Evaluation eval = new Evaluation(newTraining); eval.evaluateModel(knn, newTest); System.out.println(eval.toSummaryString("\nResults\n======\n", false)); }
From source file:neuralnetwork.NeuralNetwork.java
/** * @param args the command line arguments * @throws java.lang.Exception//from w ww .ja va2 s .c o m */ public static void main(String[] args) throws Exception { ConverterUtils.DataSource source; source = new ConverterUtils.DataSource("C:\\Users\\Harvey\\Documents\\iris.csv"); Instances data = source.getDataSet(); if (data.classIndex() == -1) { data.setClassIndex(data.numAttributes() - 1); } data.randomize(new Debug.Random(1)); RemovePercentage trainFilter = new RemovePercentage(); trainFilter.setPercentage(70); trainFilter.setInputFormat(data); Instances train = Filter.useFilter(data, trainFilter); trainFilter.setInvertSelection(true); trainFilter.setInputFormat(data); Instances test = Filter.useFilter(data, trainFilter); Standardize filter = new Standardize(); filter.setInputFormat(train); Instances newTrain = Filter.useFilter(test, filter); Instances newTest = Filter.useFilter(train, filter); Classifier nNet = new NeuralNet(); nNet.buildClassifier(newTrain); Evaluation eval = new Evaluation(newTest); eval.evaluateModel(nNet, newTest); System.out.println(eval.toSummaryString("\nResults\n-------------\n", false)); }