List of usage examples for weka.filters.unsupervised.attribute Discretize Discretize
public Discretize()
From source file:com.mycompany.id3classifier.ID3Shell.java
public static void main(String[] args) throws Exception { ConverterUtils.DataSource source = new ConverterUtils.DataSource("lensesData.csv"); Instances dataSet = source.getDataSet(); Discretize filter = new Discretize(); filter.setInputFormat(dataSet);/*from ww w . ja v a2s .com*/ dataSet = Filter.useFilter(dataSet, filter); Standardize standardize = new Standardize(); standardize.setInputFormat(dataSet); dataSet = Filter.useFilter(dataSet, standardize); dataSet.setClassIndex(dataSet.numAttributes() - 1); dataSet.randomize(new Random(9001)); //It's over 9000!! int folds = 10; //Perform crossvalidation Evaluation eval = new Evaluation(dataSet); for (int n = 0; n < folds; n++) { int trainingSize = (int) Math.round(dataSet.numInstances() * .7); int testSize = dataSet.numInstances() - trainingSize; Instances trainingData = dataSet.trainCV(folds, n); Instances testData = dataSet.testCV(folds, n); ID3Classifier classifier = new ID3Classifier(); // Id3 classifier = new Id3(); classifier.buildClassifier(trainingData); eval.evaluateModel(classifier, testData); } System.out.println(eval.toSummaryString("\nResults:\n", false)); }
From source file:id3classifier.Main.java
public static void main(String[] args) throws Exception { ConverterUtils.DataSource source = new ConverterUtils.DataSource(file); Instances dataSet = source.getDataSet(); // discretize the dataset Discretize filter = new Discretize(); filter.setInputFormat(dataSet);/*w w w. ja v a 2 s .c om*/ dataSet = Filter.useFilter(dataSet, filter); // standardize the dataset Standardize standardizedData = new Standardize(); standardizedData.setInputFormat(dataSet); dataSet = Filter.useFilter(dataSet, standardizedData); // randomize the dataset dataSet.setClassIndex(dataSet.numAttributes() - 1); dataSet.randomize(new Debug.Random()); // get the sizes of the training and testing sets and split int trainingSize = (int) Math.round(dataSet.numInstances() * .7); int testSize = dataSet.numInstances() - trainingSize; Instances training = new Instances(dataSet, 0, trainingSize); Instances test = new Instances(dataSet, trainingSize, testSize); // set up the ID3 classifier on the training data ID3Classifiers classifier = new ID3Classifiers(); classifier.buildClassifier(training); // set up the evaluation and test using the classifier and test set Evaluation eval = new Evaluation(dataSet); eval.evaluateModel(classifier, test); // outup and kill, important to exit here to stop javaFX System.out.println(eval.toSummaryString("\nResults\n======\n", false)); System.exit(0); }
From source file:imba.classifier.NBTubes.java
@Override public void buildClassifier(Instances data) { dataClassifier = new ArrayList<>(); infoClassifier = new ArrayList<>(); validAttribute = new ArrayList<>(); dataset = null;//www. j ava 2 s . c o m sumClass = null; dataSize = 0; header_Instances = data; Filter f; int i, j, k, l, m; int sumVal; int numAttr = data.numAttributes(); //ini beserta kelasnya, jadi atribut + 1 i = 0; while (i < numAttr && wasNumeric == false) { if (i == classIdx) { i++; } if (i != numAttr && data.attribute(i).isNumeric()) { wasNumeric = true; } i++; } Instance p; //kasih filter if (wasNumeric) { f = new Normalize(); //Filter f = new NumericToNominal(); try { f.setInputFormat(data); for (Instance i1 : data) { f.input(i1); } f.batchFinished(); } catch (Exception ex) { Logger.getLogger(NBTubes.class.getName()).log(Level.SEVERE, null, ex); } dataset = f.getOutputFormat(); while ((p = f.output()) != null) { dataset.add(p); } } //f = new NumericToNominal(); if (filter.equals("Discretize")) { f = new Discretize(); } else { f = new NumericToNominal(); } try { if (wasNumeric) { f.setInputFormat(dataset); for (Instance i1 : dataset) { f.input(i1); } } else { f.setInputFormat(data); for (Instance i1 : data) { f.input(i1); } } f.batchFinished(); } catch (Exception ex) { Logger.getLogger(NBTubes.class.getName()).log(Level.SEVERE, null, ex); } dataset = null; dataset = f.getOutputFormat(); while ((p = f.output()) != null) { dataset.add(p); } //building data structure classIdx = data.classIndex(); dataSize = data.size(); //isi data dan info classifier dengan array kosong i = 0; j = i; while (j < numAttr) { if (i == classIdx) { i++; } else { dataClassifier.add(new ArrayList<>()); infoClassifier.add(new ArrayList<>()); if (j < i) { m = j - 1; } else { m = j; } k = 0; while (k < dataset.attribute(j).numValues()) { dataClassifier.get(m).add(new ArrayList<>()); infoClassifier.get(m).add(new ArrayList<>()); l = 0; while (l < dataset.attribute(classIdx).numValues()) { dataClassifier.get(m).get(k).add(0); infoClassifier.get(m).get(k).add(0.0); l++; } k++; } } i++; j++; } //isi data classifier dari dataset sumClass = new int[data.numClasses()]; i = 0; while (i < dataset.size()) { j = 0; k = j; while (k < dataset.numAttributes()) { if (j == classIdx) { j++; } else { if (k < j) { m = k - 1; } else { m = k; } dataClassifier.get(m).get((int) dataset.get(i).value(k)).set( (int) dataset.get(i).value(classIdx), dataClassifier.get(m).get((int) dataset.get(i).value(k)) .get((int) dataset.get(i).value(classIdx)) + 1); if (m == 0) { sumClass[(int) dataset.get(i).value(classIdx)]++; } } k++; j++; } i++; } //proses double values i = 0; while (i < dataClassifier.size()) { j = 0; while (j < dataClassifier.get(i).size()) { k = 0; while (k < dataClassifier.get(i).get(j).size()) { infoClassifier.get(i).get(j).set(k, (double) dataClassifier.get(i).get(j).get(k) / sumClass[k]); k++; } j++; } i++; } /* //liat apakah ada nilai di tiap atribut //yang merepresentasikan lebih dari 80% data i = 0; while (i < dataClassifier.size()) { j = 0; while (j < dataClassifier.get(i).size()) { j++; } i++; } */ }
From source file:machinelearningq2.ExtendedNaiveBayes.java
public Instances discretize(Instances instnc) throws Exception { Discretize d = new Discretize(); d.setInputFormat(instnc);/*from w ww . ja va 2 s . c o m*/ Instances newData = Filter.useFilter(instnc, d); binCount = new double[d.getBins()]; for (Instance line : newData) { for (int j = 0; j < newData.numAttributes() - 1; j++) { binCount[(int) line.value(j)]++; } } return newData; }
From source file:milk.classifiers.MIBoost.java
License:Open Source License
/** * Builds the classifier//from ww w . j ava 2s .co m * * @param train the training data to be used for generating the * boosted classifier. * @exception Exception if the classifier could not be built successfully */ public void buildClassifier(Exemplars exps) throws Exception { Exemplars train = new Exemplars(exps); if (train.classAttribute().type() != Attribute.NOMINAL) { throw new Exception("Class attribute must be nominal."); } if (train.checkForStringAttributes()) { throw new Exception("Can't handle string attributes!"); } m_ClassIndex = train.classIndex(); m_IdIndex = train.idIndex(); m_NumClasses = train.numClasses(); m_NumIterations = m_MaxIterations; if (m_NumClasses > 2) { throw new Exception("Not yet prepared to deal with multiple classes!"); } if (m_Classifier == null) throw new Exception("A base classifier has not been specified!"); if (!(m_Classifier instanceof WeightedInstancesHandler)) throw new Exception("Base classifier cannot handle weighted instances!"); m_Models = Classifier.makeCopies(m_Classifier, getMaxIterations()); if (m_Debug) System.err.println("Base classifier: " + m_Classifier.getClass().getName()); m_Beta = new double[m_NumIterations]; m_Attributes = new Instances(train.exemplar(0).getInstances(), 0); double N = (double) train.numExemplars(), sumNi = 0; Instances data = new Instances(m_Attributes, 0);// Data to learn a model data.deleteAttributeAt(m_IdIndex);// ID attribute useless Instances dataset = new Instances(data, 0); // Initialize weights for (int i = 0; i < N; i++) sumNi += train.exemplar(i).getInstances().numInstances(); for (int i = 0; i < N; i++) { Exemplar exi = train.exemplar(i); exi.setWeight(sumNi / N); Instances insts = exi.getInstances(); double ni = (double) insts.numInstances(); for (int j = 0; j < ni; j++) { Instance ins = new Instance(insts.instance(j));// Copy //insts.instance(j).setWeight(1.0); ins.deleteAttributeAt(m_IdIndex); ins.setDataset(dataset); ins.setWeight(exi.weight() / ni); data.add(ins); } } // Assume the order of the instances are preserved in the Discretize filter if (m_DiscretizeBin > 0) { m_Filter = new Discretize(); m_Filter.setInputFormat(new Instances(data, 0)); m_Filter.setBins(m_DiscretizeBin); data = Filter.useFilter(data, m_Filter); } // Main algorithm int dataIdx; iterations: for (int m = 0; m < m_MaxIterations; m++) { if (m_Debug) System.err.println("\nIteration " + m); // Build a model m_Models[m].buildClassifier(data); // Prediction of each bag double[] err = new double[(int) N], weights = new double[(int) N]; boolean perfect = true, tooWrong = true; dataIdx = 0; for (int n = 0; n < N; n++) { Exemplar exn = train.exemplar(n); // Prediction of each instance and the predicted class distribution // of the bag double nn = (double) exn.getInstances().numInstances(); for (int p = 0; p < nn; p++) { Instance testIns = data.instance(dataIdx++); if ((int) m_Models[m].classifyInstance(testIns) != (int) exn.classValue()) // Weighted instance-wise 0-1 errors err[n]++; } weights[n] = exn.weight(); err[n] /= nn; if (err[n] > 0.5) perfect = false; if (err[n] < 0.5) tooWrong = false; } if (perfect || tooWrong) { // No or 100% classification error, cannot find beta if (m == 0) m_Beta[m] = 1.0; else m_Beta[m] = 0; m_NumIterations = m + 1; if (m_Debug) System.err.println("No errors"); break iterations; } double[] x = new double[1]; x[0] = 0; double[][] b = new double[2][x.length]; b[0][0] = Double.NaN; b[1][0] = Double.NaN; OptEng opt = new OptEng(); opt.setWeights(weights); opt.setErrs(err); //opt.setDebug(m_Debug); if (m_Debug) System.out.println("Start searching for c... "); x = opt.findArgmin(x, b); while (x == null) { x = opt.getVarbValues(); if (m_Debug) System.out.println("200 iterations finished, not enough!"); x = opt.findArgmin(x, b); } if (m_Debug) System.out.println("Finished."); m_Beta[m] = x[0]; if (m_Debug) System.err.println("c = " + m_Beta[m]); // Stop if error too small or error too big and ignore this model if (Double.isInfinite(m_Beta[m]) || Utils.smOrEq(m_Beta[m], 0)) { if (m == 0) m_Beta[m] = 1.0; else m_Beta[m] = 0; m_NumIterations = m + 1; if (m_Debug) System.err.println("Errors out of range!"); break iterations; } // Update weights of data and class label of wfData dataIdx = 0; double totWeights = 0; for (int r = 0; r < N; r++) { Exemplar exr = train.exemplar(r); exr.setWeight(weights[r] * Math.exp(m_Beta[m] * (2.0 * err[r] - 1.0))); totWeights += exr.weight(); } if (m_Debug) System.err.println("Total weights = " + totWeights); for (int r = 0; r < N; r++) { Exemplar exr = train.exemplar(r); double num = (double) exr.getInstances().numInstances(); exr.setWeight(sumNi * exr.weight() / totWeights); //if(m_Debug) // System.err.print("\nExemplar "+r+"="+exr.weight()+": \t"); for (int s = 0; s < num; s++) { Instance inss = data.instance(dataIdx); inss.setWeight(exr.weight() / num); // if(m_Debug) // System.err.print("instance "+s+"="+inss.weight()+ // "|ew*iw*sumNi="+data.instance(dataIdx).weight()+"\t"); if (Double.isNaN(inss.weight())) throw new Exception("instance " + s + " in bag " + r + " has weight NaN!"); dataIdx++; } //if(m_Debug) // System.err.println(); } } }
From source file:NaiveBayes.Atribut.java
public Atribut(Instances ints, int i, int classindex) throws Exception { if (ints.attribute(i).isNumeric()) { Instances newData = new Instances(ints); Discretize f = new Discretize(); f.setInputFormat(newData);/* ww w .j ava 2s. c o m*/ newData = Filter.useFilter(newData, f); name = ints.attribute(i).name(); listNilai = new ArrayList<>(); for (int j = 0; j < newData.attribute(i).numValues(); j++) { listNilai.add(new Nilai(ints, i, j, classindex)); } } else { name = ints.attribute(i).name().replaceAll("\\s+", ""); // System.out.println(name); listNilai = new ArrayList<>(); for (int j = 0; j < ints.attribute(i).numValues(); j++) { listNilai.add(new Nilai(ints, i, j, classindex)); } } }
From source file:NaiveBayes.Nilai.java
public Nilai(Instances inst, int i, int j, int classindex) throws Exception { Instances newData = new Instances(inst); numClass = NaiveBayes.getNumEachClass(newData); lower = 0;//w w w .j a va2 s . c o m upper = 0; kelas = new ArrayList<>(); //if(newData.instance(i).isMissing(j)) newData.instance(i).setValue(i, "b"); if (newData.attribute(i).isNumeric()) { Discretize f = new Discretize(); f.setInputFormat(newData); newData = Filter.useFilter(newData, f); name = newData.attribute(i).value(j); if (f.getCutPoints(i) != null) { if (j == 0) { lower = Double.NEGATIVE_INFINITY; upper = f.getCutPoints(i)[j]; } else { if (j != newData.attribute(0).numValues() - 1) { lower = f.getCutPoints(i)[j - 1]; upper = f.getCutPoints(i)[j]; } else { lower = f.getCutPoints(i)[j - 1]; upper = Double.POSITIVE_INFINITY; } } } else { lower = Double.NEGATIVE_INFINITY; upper = Double.POSITIVE_INFINITY; } for (int k = 0; k < inst.attribute(classindex).numValues(); k++) { //buat nama kelas double cnt = 1; int countClass = 0; for (int l = 0; l < inst.numInstances(); l++) { //jumlah seluruh instances double val = inst.get(l).value(i); if (countClass <= numClass[k]) { if (inst.attribute(classindex).value(k).equalsIgnoreCase( inst.get(l).toString(classindex).replaceAll("'", ""))) {/*nama kelasnya*/ if (val >= lower && val < upper) {//jika ada nilai yang sama pada atribut //dan kelas yang sama dan nilai dari atribut lebih besar sama dengan lower cnt += 1; } countClass++; } } else break; } kelas.add(new Kelas(newData.attribute(classindex).value(k), cnt)); } } else { //System.out.println(newData.attribute(i).value(j).replaceAll("\\s+", "")); name = newData.attribute(i).value(j).replaceAll("\\s", ""); //System.out.println(name); //System.out.println(name); for (int k = 0; k < inst.attribute(classindex).numValues(); k++) { //buat nama kelas double cnt = 1; int countClass = 0; for (int l = 0; l < inst.numInstances(); l++) { //jumlah seluruh instances if (countClass <= numClass[k]) { //System.out.println("with whitespace "+inst.attribute(i).value(j)+"without "+inst.attribute(i).value(j).replaceAll("\\s", "")+"p"); // System.out.println(inst.get(l).toString(classindex)); //System.out.println(inst.attribute(classindex).value(k)); if (inst.attribute(classindex).value(k).replaceAll("\\s", "") .equalsIgnoreCase(inst.get(l).toString(classindex).replaceAll("\\s", ""))//nama kelas && inst.attribute(i).value(j).replaceAll("\\s", "").//cek nama atribut equalsIgnoreCase(inst.get(l).toString(i).replaceAll("\\s", ""))) { //jika ada nilai yang sama pada atribut //dan kelas yang sama dan nilai dari atribut lebih besar sama dengan lower cnt += 1; countClass++; } } else break; } kelas.add(new Kelas(newData.attribute(classindex).value(k).replaceAll("\\s+", ""), cnt)); } } }
From source file:NaiveBayesPckge.NaiveBayesMain.java
public static Instances useFilterDiscritize(Instances dataSet) throws Exception { //set options String[] optionsFilter = new String[4]; //choose the number of intervals, e.g 2: optionsFilter[0] = "-B"; optionsFilter[1] = "6"; //choose the range of attributes on which to apply the filter: optionsFilter[2] = "-R"; optionsFilter[3] = "first-last"; System.out.println("> Filtering dataset using Discretize\n"); //Apply Discretization Discretize discretize = new Discretize(); discretize.setOptions(optionsFilter); discretize.setInputFormat(dataSet);/*w w w .j ava2 s .c o m*/ Instances newDataTemp = Filter.useFilter(dataSet, discretize); return newDataTemp; }
From source file:NaiveBayesPckge.NaiveBayesMain.java
public static void addNewInstance(Instances instances) throws Exception { Scanner scan = new Scanner(System.in); ArrayList<Attribute> atts = new ArrayList<Attribute>(); ArrayList<String> classVal = new ArrayList<String>(); int nConclus = instances.attribute(instances.numAttributes() - 1).numValues(); int numAttribut = instances.numAttributes(); //buat nambah kesimpulan. Misal T dan F for (int i = 0; i < nConclus; i++) { classVal.add(instances.attribute(instances.numAttributes() - 1).value(i)); }/*from w ww . j a v a 2 s. co m*/ //buat nambahin attribut for (int i = 0; i < numAttribut - 1; i++) { atts.add(new Attribute(instances.attribute(i).name())); } atts.add(new Attribute(instances.attribute(numAttribut - 1).name(), classVal)); double[] attValues = new double[numAttribut]; System.out.print("Masukkan nilai : "); for (int i = 0; i < numAttribut - 1; i++) { attValues[i] = scan.nextDouble(); } Discretize discretize = new Discretize(); String s = scan.nextLine(); Instance instance = new DenseInstance(1.0, attValues); instance.setDataset(instances); discretize.setInputFormat(instances); discretize.input(instance); int classify1 = (int) naive.classifyInstance(instance); System.out.print("Prediction Class : "); System.out.println(classVal.get(classify1)); }
From source file:org.tigr.microarray.mev.cluster.gui.impl.bn.PrepareArrayDataModule.java
License:Open Source License
/** * The <code>discretize</code> method is given a WEKA Instances object corresponding to the gene expression data * and returns a new WEKA Instances object with the given data discretized into a given number of equal-width bins * * @param data an <code>Instances</code> which is a WEKA Instances object corresponding to the gene expression data * @param numBins a <code>String</code> corresponding to the number of bins in which the data is to be discretized * @return an <code>Instances</code> a new WEKA Instances object with the given data discretized * into a given number of equal-width bins * @exception NullArgumentException if an error occurs if the data is null * @exception OutOfRangeException if an error occurs if the numBins is out of bounds (namely, negative or equal to zero) */// w w w.j ava2s. c o m public static Instances discretize(Instances data, String numBins) throws NullArgumentException, OutOfRangeException { if (data == null) { throw new NullArgumentException("Parameter data passed to discretize method was null!"); } if (Integer.parseInt(numBins) <= 0) { throw new OutOfRangeException( "numBins is out of range (should be strictly positive!\nnumBins=" + numBins); } try { String[] options = new String[2]; options[0] = "-B"; options[1] = numBins; Discretize discretize = new Discretize(); discretize.setOptions(options); discretize.setInputFormat(data); Instances newData = Filter.useFilter(data, discretize); return newData; } catch (Exception e) { System.out.println(e); e.printStackTrace(); } return null; }