List of usage examples for weka.filters.unsupervised.attribute Discretize setInputFormat
@Override public boolean setInputFormat(Instances instanceInfo) throws Exception
From source file:com.mycompany.id3classifier.ID3Shell.java
public static void main(String[] args) throws Exception { ConverterUtils.DataSource source = new ConverterUtils.DataSource("lensesData.csv"); Instances dataSet = source.getDataSet(); Discretize filter = new Discretize(); filter.setInputFormat(dataSet); dataSet = Filter.useFilter(dataSet, filter); Standardize standardize = new Standardize(); standardize.setInputFormat(dataSet); dataSet = Filter.useFilter(dataSet, standardize); dataSet.setClassIndex(dataSet.numAttributes() - 1); dataSet.randomize(new Random(9001)); //It's over 9000!! int folds = 10; //Perform crossvalidation Evaluation eval = new Evaluation(dataSet); for (int n = 0; n < folds; n++) { int trainingSize = (int) Math.round(dataSet.numInstances() * .7); int testSize = dataSet.numInstances() - trainingSize; Instances trainingData = dataSet.trainCV(folds, n); Instances testData = dataSet.testCV(folds, n); ID3Classifier classifier = new ID3Classifier(); // Id3 classifier = new Id3(); classifier.buildClassifier(trainingData); eval.evaluateModel(classifier, testData); }/*from ww w .j av a 2s.c o m*/ System.out.println(eval.toSummaryString("\nResults:\n", false)); }
From source file:demo.RunGUIProof.java
License:Open Source License
/** * @param args//from w ww.jav a2 s.c o m */ public static void main(String[] args) { JOptionPane.showMessageDialog(null, introductionMessage, "Chordalysis", JOptionPane.INFORMATION_MESSAGE); int result = JOptionPane.showOptionDialog(null, new JTextArea(agreeCitation), "Reference", JOptionPane.YES_NO_OPTION, JOptionPane.QUESTION_MESSAGE, null, null, null); if (result == JOptionPane.NO_OPTION || result == JOptionPane.CLOSED_OPTION) { JOptionPane.showMessageDialog(null, "Chordalysis will now stop, because you do not want to reference its source. ", "Chordalysis", JOptionPane.WARNING_MESSAGE); System.exit(0); } JFileChooser chooser = new JFileChooser(); FileNameExtensionFilter filter = new FileNameExtensionFilter("CSV file", "csv"); chooser.setFileFilter(filter); int returnVal = chooser.showOpenDialog(null); File csvFile = null; if (returnVal == JFileChooser.APPROVE_OPTION) { csvFile = chooser.getSelectedFile(); System.out.println("You chose to open: " + csvFile); } else { JOptionPane.showMessageDialog(null, noFileSelectedMessage, "Chordalysis", JOptionPane.ERROR_MESSAGE); return; } CSVLoader loader = new CSVLoader(); if (!csvFile.exists()) { JOptionPane.showMessageDialog(null, noFileMessage, "Chordalysis", JOptionPane.INFORMATION_MESSAGE); return; } double pValue = -1; while (pValue <= 0 || 1 <= pValue) { pValue = Double.valueOf(JOptionPane.showInputDialog("Desired p-value (between 0 and 1)", 0.05)); if (pValue <= 0 || 1 <= pValue) { JOptionPane.showMessageDialog(null, incorrectPValueMessage, "Chordalysis", JOptionPane.WARNING_MESSAGE); } } filter = new FileNameExtensionFilter("PNG or DOT or CSV file or DNE file", "png", "dot", "csv", "dne"); chooser = new JFileChooser(); chooser.setFileFilter(filter); chooser.setDialogTitle("Where to save the graph?"); chooser.setSelectedFile(new File(csvFile.getAbsolutePath() + ".png")); returnVal = chooser.showSaveDialog(null); File graphFile = null; if (returnVal == JFileChooser.APPROVE_OPTION) { graphFile = chooser.getSelectedFile(); System.out.println("You chose to save the graph to: " + graphFile.getAbsolutePath()); } else { JOptionPane.showMessageDialog(null, noFileSelectedMessage, "Chordalysis", JOptionPane.ERROR_MESSAGE); return; } try { loader.setFile(csvFile); returnVal = JOptionPane.showConfirmDialog(null, "Are all of your attribute nominal?", "Chordalysis", JOptionPane.YES_NO_OPTION); if (returnVal == JOptionPane.YES_OPTION) { loader.setNominalAttributes("first-last"); } Instances instances = loader.getDataSet(); String cols = ""; for (int i = 0; i < instances.numAttributes(); i++) { Attribute att = instances.attribute(i); if (!att.isNominal()) { cols += (i + 1) + ","; } } if (!cols.isEmpty()) { cols = cols.substring(0, cols.length() - 1); String message = "Some atributes are not nominal (number " + cols + "), please wait during discretization. "; JOptionPane.showMessageDialog(null, message, "Chordalysis", JOptionPane.INFORMATION_MESSAGE); Discretize discretizer = new Discretize(cols); discretizer.setUseEqualFrequency(true); discretizer.setBins(3); discretizer.setIgnoreClass(true); discretizer.setInputFormat(instances); instances = Filter.useFilter(instances, discretizer); JOptionPane.showMessageDialog(null, "Discretization is now finished.", "Chordalysis", JOptionPane.INFORMATION_MESSAGE); } String[] variablesNames = new String[instances.numAttributes()]; String[][] outcomes = new String[instances.numAttributes()][]; for (int i = 0; i < variablesNames.length; i++) { variablesNames[i] = instances.attribute(i).name(); outcomes[i] = new String[instances.attribute(i).numValues()]; for (int j = 0; j < outcomes[i].length; j++) { outcomes[i][j] = instances.attribute(i).value(j); } } ChordalysisModelling modeller = new ChordalysisModelling(pValue); modeller.buildModel(instances); DecomposableModel bestModel = modeller.getModel(); JOptionPane.showMessageDialog(null, new JTextArea("Chordalysis has now finished analysing your data. " + "\nIf you found something useful, please reference Chordalysis as" + "\n\t- F. Petitjean, G.I. Webb and A. Nicholson, Scaling log-linear analysis to high-dimensional data, ICDM 2013" + "\n\t- F. Petitjean and G.I. Webb, Scaling log-linear analysis to datasets with thousands of variables, SDM 2015" + "\n\nYou can find the output file at: '" + graphFile.getAbsolutePath() + "'"), "Citation", JOptionPane.INFORMATION_MESSAGE); System.out.println("The model selected is:"); System.out.println(bestModel.toString(variablesNames)); if (graphFile.getName().endsWith("dot")) { bestModel.exportDOT(graphFile, variablesNames); } else if (graphFile.getName().endsWith("png")) { ImageIO.write(bestModel.getImage(variablesNames), "png", graphFile); } else if (graphFile.getName().endsWith("dne")) { bestModel.exportBNNetica(graphFile, variablesNames, outcomes); bestModel.exportDOT(new File(graphFile.getAbsolutePath() + ".dot"), variablesNames); ImageIO.write(bestModel.getImage(variablesNames), "png", new File(graphFile.getAbsolutePath() + ".png")); bestModel.saveAssociations(variablesNames, new File(graphFile.getAbsolutePath() + ".csv")); } else { bestModel.saveAssociations(variablesNames, graphFile); } } catch (IOException e) { JOptionPane.showMessageDialog(null, "The file '" + csvFile.getAbsolutePath() + "'\ncannot be read properly.", "Error while reading file", JOptionPane.ERROR_MESSAGE); System.out.println("I/O error while loading csv file"); e.printStackTrace(); } catch (Exception e) { JOptionPane.showMessageDialog(null, "Error:" + e.getMessage(), "Chordalysis", JOptionPane.ERROR_MESSAGE); e.printStackTrace(); } }
From source file:id3classifier.Main.java
public static void main(String[] args) throws Exception { ConverterUtils.DataSource source = new ConverterUtils.DataSource(file); Instances dataSet = source.getDataSet(); // discretize the dataset Discretize filter = new Discretize(); filter.setInputFormat(dataSet); dataSet = Filter.useFilter(dataSet, filter); // standardize the dataset Standardize standardizedData = new Standardize(); standardizedData.setInputFormat(dataSet); dataSet = Filter.useFilter(dataSet, standardizedData); // randomize the dataset dataSet.setClassIndex(dataSet.numAttributes() - 1); dataSet.randomize(new Debug.Random()); // get the sizes of the training and testing sets and split int trainingSize = (int) Math.round(dataSet.numInstances() * .7); int testSize = dataSet.numInstances() - trainingSize; Instances training = new Instances(dataSet, 0, trainingSize); Instances test = new Instances(dataSet, trainingSize, testSize); // set up the ID3 classifier on the training data ID3Classifiers classifier = new ID3Classifiers(); classifier.buildClassifier(training); // set up the evaluation and test using the classifier and test set Evaluation eval = new Evaluation(dataSet); eval.evaluateModel(classifier, test); // outup and kill, important to exit here to stop javaFX System.out.println(eval.toSummaryString("\nResults\n======\n", false)); System.exit(0);/*from www .ja v a2 s.c o m*/ }
From source file:machinelearningq2.ExtendedNaiveBayes.java
public Instances discretize(Instances instnc) throws Exception { Discretize d = new Discretize(); d.setInputFormat(instnc); Instances newData = Filter.useFilter(instnc, d); binCount = new double[d.getBins()]; for (Instance line : newData) { for (int j = 0; j < newData.numAttributes() - 1; j++) { binCount[(int) line.value(j)]++; }/*from www. jav a2s . c o m*/ } return newData; }
From source file:NaiveBayes.Atribut.java
public Atribut(Instances ints, int i, int classindex) throws Exception { if (ints.attribute(i).isNumeric()) { Instances newData = new Instances(ints); Discretize f = new Discretize(); f.setInputFormat(newData); newData = Filter.useFilter(newData, f); name = ints.attribute(i).name(); listNilai = new ArrayList<>(); for (int j = 0; j < newData.attribute(i).numValues(); j++) { listNilai.add(new Nilai(ints, i, j, classindex)); }//from www . j av a 2s. c om } else { name = ints.attribute(i).name().replaceAll("\\s+", ""); // System.out.println(name); listNilai = new ArrayList<>(); for (int j = 0; j < ints.attribute(i).numValues(); j++) { listNilai.add(new Nilai(ints, i, j, classindex)); } } }
From source file:NaiveBayes.Nilai.java
public Nilai(Instances inst, int i, int j, int classindex) throws Exception { Instances newData = new Instances(inst); numClass = NaiveBayes.getNumEachClass(newData); lower = 0;//w w w .jav a2s. c om upper = 0; kelas = new ArrayList<>(); //if(newData.instance(i).isMissing(j)) newData.instance(i).setValue(i, "b"); if (newData.attribute(i).isNumeric()) { Discretize f = new Discretize(); f.setInputFormat(newData); newData = Filter.useFilter(newData, f); name = newData.attribute(i).value(j); if (f.getCutPoints(i) != null) { if (j == 0) { lower = Double.NEGATIVE_INFINITY; upper = f.getCutPoints(i)[j]; } else { if (j != newData.attribute(0).numValues() - 1) { lower = f.getCutPoints(i)[j - 1]; upper = f.getCutPoints(i)[j]; } else { lower = f.getCutPoints(i)[j - 1]; upper = Double.POSITIVE_INFINITY; } } } else { lower = Double.NEGATIVE_INFINITY; upper = Double.POSITIVE_INFINITY; } for (int k = 0; k < inst.attribute(classindex).numValues(); k++) { //buat nama kelas double cnt = 1; int countClass = 0; for (int l = 0; l < inst.numInstances(); l++) { //jumlah seluruh instances double val = inst.get(l).value(i); if (countClass <= numClass[k]) { if (inst.attribute(classindex).value(k).equalsIgnoreCase( inst.get(l).toString(classindex).replaceAll("'", ""))) {/*nama kelasnya*/ if (val >= lower && val < upper) {//jika ada nilai yang sama pada atribut //dan kelas yang sama dan nilai dari atribut lebih besar sama dengan lower cnt += 1; } countClass++; } } else break; } kelas.add(new Kelas(newData.attribute(classindex).value(k), cnt)); } } else { //System.out.println(newData.attribute(i).value(j).replaceAll("\\s+", "")); name = newData.attribute(i).value(j).replaceAll("\\s", ""); //System.out.println(name); //System.out.println(name); for (int k = 0; k < inst.attribute(classindex).numValues(); k++) { //buat nama kelas double cnt = 1; int countClass = 0; for (int l = 0; l < inst.numInstances(); l++) { //jumlah seluruh instances if (countClass <= numClass[k]) { //System.out.println("with whitespace "+inst.attribute(i).value(j)+"without "+inst.attribute(i).value(j).replaceAll("\\s", "")+"p"); // System.out.println(inst.get(l).toString(classindex)); //System.out.println(inst.attribute(classindex).value(k)); if (inst.attribute(classindex).value(k).replaceAll("\\s", "") .equalsIgnoreCase(inst.get(l).toString(classindex).replaceAll("\\s", ""))//nama kelas && inst.attribute(i).value(j).replaceAll("\\s", "").//cek nama atribut equalsIgnoreCase(inst.get(l).toString(i).replaceAll("\\s", ""))) { //jika ada nilai yang sama pada atribut //dan kelas yang sama dan nilai dari atribut lebih besar sama dengan lower cnt += 1; countClass++; } } else break; } kelas.add(new Kelas(newData.attribute(classindex).value(k).replaceAll("\\s+", ""), cnt)); } } }
From source file:NaiveBayesPckge.NaiveBayesMain.java
public static Instances useFilterDiscritize(Instances dataSet) throws Exception { //set options String[] optionsFilter = new String[4]; //choose the number of intervals, e.g 2: optionsFilter[0] = "-B"; optionsFilter[1] = "6"; //choose the range of attributes on which to apply the filter: optionsFilter[2] = "-R"; optionsFilter[3] = "first-last"; System.out.println("> Filtering dataset using Discretize\n"); //Apply Discretization Discretize discretize = new Discretize(); discretize.setOptions(optionsFilter); discretize.setInputFormat(dataSet); Instances newDataTemp = Filter.useFilter(dataSet, discretize); return newDataTemp; }
From source file:NaiveBayesPckge.NaiveBayesMain.java
public static void addNewInstance(Instances instances) throws Exception { Scanner scan = new Scanner(System.in); ArrayList<Attribute> atts = new ArrayList<Attribute>(); ArrayList<String> classVal = new ArrayList<String>(); int nConclus = instances.attribute(instances.numAttributes() - 1).numValues(); int numAttribut = instances.numAttributes(); //buat nambah kesimpulan. Misal T dan F for (int i = 0; i < nConclus; i++) { classVal.add(instances.attribute(instances.numAttributes() - 1).value(i)); }//from w w w . j a va2 s .c o m //buat nambahin attribut for (int i = 0; i < numAttribut - 1; i++) { atts.add(new Attribute(instances.attribute(i).name())); } atts.add(new Attribute(instances.attribute(numAttribut - 1).name(), classVal)); double[] attValues = new double[numAttribut]; System.out.print("Masukkan nilai : "); for (int i = 0; i < numAttribut - 1; i++) { attValues[i] = scan.nextDouble(); } Discretize discretize = new Discretize(); String s = scan.nextLine(); Instance instance = new DenseInstance(1.0, attValues); instance.setDataset(instances); discretize.setInputFormat(instances); discretize.input(instance); int classify1 = (int) naive.classifyInstance(instance); System.out.print("Prediction Class : "); System.out.println(classVal.get(classify1)); }
From source file:org.tigr.microarray.mev.cluster.gui.impl.bn.PrepareArrayDataModule.java
License:Open Source License
/** * The <code>discretize</code> method is given a WEKA Instances object corresponding to the gene expression data * and returns a new WEKA Instances object with the given data discretized into a given number of equal-width bins * * @param data an <code>Instances</code> which is a WEKA Instances object corresponding to the gene expression data * @param numBins a <code>String</code> corresponding to the number of bins in which the data is to be discretized * @return an <code>Instances</code> a new WEKA Instances object with the given data discretized * into a given number of equal-width bins * @exception NullArgumentException if an error occurs if the data is null * @exception OutOfRangeException if an error occurs if the numBins is out of bounds (namely, negative or equal to zero) *///from w w w .j a v a2 s . c o m public static Instances discretize(Instances data, String numBins) throws NullArgumentException, OutOfRangeException { if (data == null) { throw new NullArgumentException("Parameter data passed to discretize method was null!"); } if (Integer.parseInt(numBins) <= 0) { throw new OutOfRangeException( "numBins is out of range (should be strictly positive!\nnumBins=" + numBins); } try { String[] options = new String[2]; options[0] = "-B"; options[1] = numBins; Discretize discretize = new Discretize(); discretize.setOptions(options); discretize.setInputFormat(data); Instances newData = Filter.useFilter(data, discretize); return newData; } catch (Exception e) { System.out.println(e); e.printStackTrace(); } return null; }
From source file:org.uclab.mm.kcl.ddkat.datapreprocessor.DataDiscretizer.java
License:Apache License
/** * Method to discretize the input data using equal-width binning approach. * * @throws Exception the exception/*from w w w .j av a2 s .c o m*/ */ public void discretizeData() throws Exception { this.confirmationMessage = new ArrayList<String>(); Instances inputData, outputData; String inputFile = BASE_DIR + "OriginalDataSet.csv"; // load CSV file CSVLoader fileLoader = new CSVLoader(); fileLoader.setSource(new File(inputFile)); inputData = fileLoader.getDataSet(); Discretize discrete = new Discretize(); discrete.setInputFormat(inputData); outputData = Filter.useFilter(inputData, discrete); saveDiscretizedData(inputFile, outputData); }