List of usage examples for weka.filters.unsupervised.attribute Normalize Normalize
Normalize
From source file:WLSVM.java
License:Open Source License
/** * Builds the model/*from w w w .j ava 2 s .c o m*/ */ public void buildClassifier(Instances insts) throws Exception { if (normalize == 1) { if (true) System.err.println("Normalizing..."); filter = new Normalize(); filter.setInputFormat(insts); insts = Filter.useFilter(insts, filter); } if (true) System.err.println("Converting to libsvm format..."); Vector sparseData = DataToSparse(insts); Vector vy = new Vector(); Vector vx = new Vector(); int max_index = 0; if (true) System.err.println("Tokenizing libsvm data..."); for (int d = 0; d < sparseData.size(); d++) { String line = (String) sparseData.get(d); StringTokenizer st = new StringTokenizer(line, " \t\n\r\f:"); vy.addElement(st.nextToken()); int m = st.countTokens() / 2; svm_node[] x = new svm_node[m]; for (int j = 0; j < m; j++) { x[j] = new svm_node(); x[j].index = atoi(st.nextToken()); x[j].value = atof(st.nextToken()); } if (m > 0) max_index = Math.max(max_index, x[m - 1].index); vx.addElement(x); } prob = new svm_problem(); prob.l = vy.size(); prob.x = new svm_node[prob.l][]; for (int i = 0; i < prob.l; i++) prob.x[i] = (svm_node[]) vx.elementAt(i); prob.y = new double[prob.l]; for (int i = 0; i < prob.l; i++) prob.y[i] = atof((String) vy.elementAt(i)); if (param.gamma == 0) param.gamma = 1.0 / max_index; error_msg = svm.svm_check_parameter(prob, param); if (error_msg != null) { System.err.print("Error: " + error_msg + "\n"); System.exit(1); } if (true) System.err.println("Training model"); try { model = svm.svm_train(prob, param); } catch (Exception e) { e.printStackTrace(); } }
From source file:SMO.java
License:Open Source License
/** * Method for building the classifier. Implements a one-against-one * wrapper for multi-class problems.//from ww w. j ava2s .com * * @param insts the set of training instances * @throws Exception if the classifier can't be built successfully */ public void buildClassifier(Instances insts) throws Exception { if (!m_checksTurnedOff) { // can classifier handle the data? getCapabilities().testWithFail(insts); // remove instances with missing class insts = new Instances(insts); insts.deleteWithMissingClass(); /* Removes all the instances with weight equal to 0. MUST be done since condition (8) of Keerthi's paper is made with the assertion Ci > 0 (See equation (3a). */ Instances data = new Instances(insts, insts.numInstances()); for (int i = 0; i < insts.numInstances(); i++) { if (insts.instance(i).weight() > 0) data.add(insts.instance(i)); } if (data.numInstances() == 0) { throw new Exception("No training instances left after removing " + "instances with weight 0!"); } insts = data; } if (!m_checksTurnedOff) { m_Missing = new ReplaceMissingValues(); m_Missing.setInputFormat(insts); insts = Filter.useFilter(insts, m_Missing); } else { m_Missing = null; } if (getCapabilities().handles(Capability.NUMERIC_ATTRIBUTES)) { boolean onlyNumeric = true; if (!m_checksTurnedOff) { for (int i = 0; i < insts.numAttributes(); i++) { if (i != insts.classIndex()) { if (!insts.attribute(i).isNumeric()) { onlyNumeric = false; break; } } } } if (!onlyNumeric) { m_NominalToBinary = new NominalToBinary(); m_NominalToBinary.setInputFormat(insts); insts = Filter.useFilter(insts, m_NominalToBinary); } else { m_NominalToBinary = null; } } else { m_NominalToBinary = null; } if (m_filterType == FILTER_STANDARDIZE) { m_Filter = new Standardize(); m_Filter.setInputFormat(insts); insts = Filter.useFilter(insts, m_Filter); } else if (m_filterType == FILTER_NORMALIZE) { m_Filter = new Normalize(); m_Filter.setInputFormat(insts); insts = Filter.useFilter(insts, m_Filter); } else { m_Filter = null; } m_classIndex = insts.classIndex(); m_classAttribute = insts.classAttribute(); m_KernelIsLinear = (m_kernel instanceof PolyKernel) && (((PolyKernel) m_kernel).getExponent() == 1.0); // Generate subsets representing each class Instances[] subsets = new Instances[insts.numClasses()]; for (int i = 0; i < insts.numClasses(); i++) { subsets[i] = new Instances(insts, insts.numInstances()); } for (int j = 0; j < insts.numInstances(); j++) { Instance inst = insts.instance(j); subsets[(int) inst.classValue()].add(inst); } for (int i = 0; i < insts.numClasses(); i++) { subsets[i].compactify(); } // Build the binary classifiers Random rand = new Random(m_randomSeed); m_classifiers = new BinarySMO[insts.numClasses()][insts.numClasses()]; for (int i = 0; i < insts.numClasses(); i++) { for (int j = i + 1; j < insts.numClasses(); j++) { m_classifiers[i][j] = new BinarySMO(); m_classifiers[i][j].setKernel(Kernel.makeCopy(getKernel())); Instances data = new Instances(insts, insts.numInstances()); for (int k = 0; k < subsets[i].numInstances(); k++) { data.add(subsets[i].instance(k)); } for (int k = 0; k < subsets[j].numInstances(); k++) { data.add(subsets[j].instance(k)); } data.compactify(); data.randomize(rand); m_classifiers[i][j].buildClassifier(data, i, j, m_fitLogisticModels, m_numFolds, m_randomSeed); } } }
From source file:ann.ANNOptions.java
public ANNOptions() { normalize = new Normalize(); ntb = new NominalToBinary(); output = new ArrayList<Neuron>(); layer = new ArrayList<List<Neuron>>(); layerNeuron = new ArrayList<Integer>(); weightOpt = 2;//from w w w .ja v a 2s . c o m topologyOpt = 4; activationFunctionOpt = 1; hiddenLayer = 1; layerNeuron.add(3); maxIteration = 1000; momentum = 0.2; learningRate = 0.3; threshold = 0.01; }
From source file:ANN.MultilayerPerceptron.java
public static void main(String[] args) throws Exception { ConverterUtils.DataSource source = new ConverterUtils.DataSource( ("D:\\Program Files\\Weka-3-8\\data\\iris.arff")); Instances train = source.getDataSet(); Normalize nm = new Normalize(); nm.setInputFormat(train);/*from w w w. ja v a2s.co m*/ train = Filter.useFilter(train, nm); train.setClassIndex(train.numAttributes() - 1); System.out.println(); // System.out.println(i + " "+0.8); MultilayerPerceptron slp = new MultilayerPerceptron(train, 0.1, 5000, 14); slp.buildClassifier(train); Evaluation eval = new Evaluation(train); eval.evaluateModel(slp, train); System.out.println(eval.toSummaryString()); System.out.print(eval.toMatrixString()); }
From source file:ANN.MultiplePerceptron.java
public static void main(String[] args) throws Exception { ConverterUtils.DataSource source = new ConverterUtils.DataSource( ("D:\\Program Files\\Weka-3-8\\data\\iris.arff")); Instances train = source.getDataSet(); Normalize nm = new Normalize(); nm.setInputFormat(train);/*from w ww . ja v a 2 s. com*/ train = Filter.useFilter(train, nm); train.setClassIndex(train.numAttributes() - 1); MultiplePerceptron mlp = new MultiplePerceptron(train, 20, 0.3); mlp.buildClassifier(train); Evaluation eval = new Evaluation(train); eval.evaluateModel(mlp, train); System.out.println(eval.toSummaryString()); System.out.print(eval.toMatrixString()); }
From source file:ann.SingleLayerPerceptron.java
@Override public void buildClassifier(Instances data) throws Exception { // can classifier handle the data? getCapabilities().testWithFail(data); annOptions = new ANNOptions(); annOptions = annOptions.loadConfiguration(); output = new ArrayList<Neuron>(); normalize = new Normalize(); ntb = new NominalToBinary(); output = annOptions.output;//from w ww . j a v a2 s . co m // remove instances with missing class data = new Instances(data); data.deleteWithMissingClass(); //nominal to binary filter ntb.setInputFormat(data); data = new Instances(Filter.useFilter(data, ntb)); //normalize filter normalize.setInputFormat(data); data = new Instances(Filter.useFilter(data, normalize)); // do main function doPerceptron(data); }
From source file:ANN_Single.SinglelayerPerceptron.java
public static void main(String[] args) throws Exception { ConverterUtils.DataSource source = new ConverterUtils.DataSource( ("D:\\Program Files\\Weka-3-8\\data\\diabetes.arff")); Instances train = source.getDataSet(); Normalize nm = new Normalize(); nm.setInputFormat(train);// w w w. jav a2s . co m train = Filter.useFilter(train, nm); train.setClassIndex(train.numAttributes() - 1); System.out.println(); // System.out.println(i + " "+0.8); SinglelayerPerceptron slp = new SinglelayerPerceptron(train, 0.1, 5000); slp.buildClassifier(train); Evaluation eval = new Evaluation(train); // eval.crossValidateModel(slp, train, 10, new Random(1)); eval.evaluateModel(slp, train); System.out.println(eval.toSummaryString()); System.out.print(eval.toMatrixString()); }
From source file:ANN_single2.MultilayerPerceptron.java
public static void main(String[] args) throws Exception { ConverterUtils.DataSource source = new ConverterUtils.DataSource( ("D:\\Program Files\\Weka-3-8\\data\\Team.arff")); Instances train = source.getDataSet(); Normalize nm = new Normalize(); nm.setInputFormat(train);//from w ww.jav a 2 s .c om train = Filter.useFilter(train, nm); train.setClassIndex(train.numAttributes() - 1); MultilayerPerceptron slp = new MultilayerPerceptron(train, 13, 0.1, 0.5); // slp.buildClassifier(train); Evaluation eval = new Evaluation(train); eval.crossValidateModel(slp, train, 10, new Random(1)); // eval.evaluateModel(slp, train); System.out.println(eval.toSummaryString()); System.out.println(eval.toMatrixString()); }
From source file:ANN_single2.SinglelayerPerceptron.java
public static void main(String[] args) throws Exception { ConverterUtils.DataSource source = new ConverterUtils.DataSource( ("D:\\Program Files\\Weka-3-8\\data\\Team.arff")); Instances train = source.getDataSet(); Normalize nm = new Normalize(); nm.setInputFormat(train);//w ww.j ava 2s . c o m train = Filter.useFilter(train, nm); train.setClassIndex(train.numAttributes() - 1); for (int i = 100; i < 3000; i += 100) { for (double j = 0.01; j < 1; j += 0.01) { System.out.println(i + " " + j); SinglelayerPerceptron slp = new SinglelayerPerceptron(i, j, 0.00); slp.buildClassifier(train); Evaluation eval = new Evaluation(train); // eval.crossValidateModel(slp, train,10, new Random(1)); eval.evaluateModel(slp, train); System.out.println(eval.toSummaryString()); System.out.println(eval.toMatrixString()); } } }
From source file:br.com.ufu.lsi.rebfnetwork.RBFModel.java
License:Open Source License
/** * Method used to pre-process the data, perform clustering, and * set the initial parameter vector.//from w ww. ja v a 2 s. co m */ protected Instances initializeClassifier(Instances data) throws Exception { // can classifier handle the data? getCapabilities().testWithFail(data); data = new Instances(data); data.deleteWithMissingClass(); // Make sure data is shuffled Random random = new Random(m_Seed); if (data.numInstances() > 2) { random = data.getRandomNumberGenerator(m_Seed); } data.randomize(random); double y0 = data.instance(0).classValue(); // This stuff is not relevant in classification case int index = 1; while (index < data.numInstances() && data.instance(index).classValue() == y0) { index++; } if (index == data.numInstances()) { // degenerate case, all class values are equal // we don't want to deal with this, too much hassle throw new Exception("All class values are the same. At least two class values should be different"); } double y1 = data.instance(index).classValue(); // Replace missing values m_ReplaceMissingValues = new ReplaceMissingValues(); m_ReplaceMissingValues.setInputFormat(data); data = Filter.useFilter(data, m_ReplaceMissingValues); // Remove useless attributes m_AttFilter = new RemoveUseless(); m_AttFilter.setInputFormat(data); data = Filter.useFilter(data, m_AttFilter); // only class? -> build ZeroR model if (data.numAttributes() == 1) { System.err.println( "Cannot build model (only class attribute present in data after removing useless attributes!), " + "using ZeroR model instead!"); m_ZeroR = new weka.classifiers.rules.ZeroR(); m_ZeroR.buildClassifier(data); return data; } else { m_ZeroR = null; } // Transform attributes m_NominalToBinary = new NominalToBinary(); m_NominalToBinary.setInputFormat(data); data = Filter.useFilter(data, m_NominalToBinary); m_Filter = new Normalize(); ((Normalize) m_Filter).setIgnoreClass(true); m_Filter.setInputFormat(data); data = Filter.useFilter(data, m_Filter); double z0 = data.instance(0).classValue(); // This stuff is not relevant in classification case double z1 = data.instance(index).classValue(); m_x1 = (y0 - y1) / (z0 - z1); // no division by zero, since y0 != y1 guaranteed => z0 != z1 ??? m_x0 = (y0 - m_x1 * z0); // = y1 - m_x1 * z1 m_classIndex = data.classIndex(); m_numClasses = data.numClasses(); m_numAttributes = data.numAttributes(); // Run k-means SimpleKMeans skm = new SimpleKMeans(); skm.setMaxIterations(10000); skm.setNumClusters(m_numUnits); Remove rm = new Remove(); data.setClassIndex(-1); rm.setAttributeIndices((m_classIndex + 1) + ""); rm.setInputFormat(data); Instances dataRemoved = Filter.useFilter(data, rm); data.setClassIndex(m_classIndex); skm.buildClusterer(dataRemoved); Instances centers = skm.getClusterCentroids(); if (centers.numInstances() < m_numUnits) { m_numUnits = centers.numInstances(); } // Set up arrays OFFSET_WEIGHTS = 0; if (m_useAttributeWeights) { OFFSET_ATTRIBUTE_WEIGHTS = (m_numUnits + 1) * m_numClasses; OFFSET_CENTERS = OFFSET_ATTRIBUTE_WEIGHTS + m_numAttributes; } else { OFFSET_ATTRIBUTE_WEIGHTS = -1; OFFSET_CENTERS = (m_numUnits + 1) * m_numClasses; } OFFSET_SCALES = OFFSET_CENTERS + m_numUnits * m_numAttributes; switch (m_scaleOptimizationOption) { case USE_GLOBAL_SCALE: m_RBFParameters = new double[OFFSET_SCALES + 1]; break; case USE_SCALE_PER_UNIT_AND_ATTRIBUTE: m_RBFParameters = new double[OFFSET_SCALES + m_numUnits * m_numAttributes]; break; default: m_RBFParameters = new double[OFFSET_SCALES + m_numUnits]; break; } // Set initial radius based on distance to nearest other basis function double maxMinDist = -1; for (int i = 0; i < centers.numInstances(); i++) { double minDist = Double.MAX_VALUE; for (int j = i + 1; j < centers.numInstances(); j++) { double dist = 0; for (int k = 0; k < centers.numAttributes(); k++) { if (k != centers.classIndex()) { double diff = centers.instance(i).value(k) - centers.instance(j).value(k); dist += diff * diff; } } if (dist < minDist) { minDist = dist; } } if ((minDist != Double.MAX_VALUE) && (minDist > maxMinDist)) { maxMinDist = minDist; } } // Initialize parameters if (m_scaleOptimizationOption == USE_GLOBAL_SCALE) { m_RBFParameters[OFFSET_SCALES] = Math.sqrt(maxMinDist); } for (int i = 0; i < m_numUnits; i++) { if (m_scaleOptimizationOption == USE_SCALE_PER_UNIT) { m_RBFParameters[OFFSET_SCALES + i] = Math.sqrt(maxMinDist); } int k = 0; for (int j = 0; j < m_numAttributes; j++) { if (k == centers.classIndex()) { k++; } if (j != data.classIndex()) { if (m_scaleOptimizationOption == USE_SCALE_PER_UNIT_AND_ATTRIBUTE) { m_RBFParameters[OFFSET_SCALES + (i * m_numAttributes + j)] = Math.sqrt(maxMinDist); } m_RBFParameters[OFFSET_CENTERS + (i * m_numAttributes) + j] = centers.instance(i).value(k); k++; } } } if (m_useAttributeWeights) { for (int j = 0; j < m_numAttributes; j++) { if (j != data.classIndex()) { m_RBFParameters[OFFSET_ATTRIBUTE_WEIGHTS + j] = 1.0; } } } initializeOutputLayer(random); return data; }