Example usage for weka.filters.unsupervised.attribute Discretize setInputFormat

List of usage examples for weka.filters.unsupervised.attribute Discretize setInputFormat

Introduction

In this page you can find the example usage for weka.filters.unsupervised.attribute Discretize setInputFormat.

Prototype

@Override
public boolean setInputFormat(Instances instanceInfo) throws Exception 

Source Link

Document

Sets the format of the input instances.

Usage

From source file:com.mycompany.id3classifier.ID3Shell.java

public static void main(String[] args) throws Exception {
    ConverterUtils.DataSource source = new ConverterUtils.DataSource("lensesData.csv");
    Instances dataSet = source.getDataSet();

    Discretize filter = new Discretize();
    filter.setInputFormat(dataSet);
    dataSet = Filter.useFilter(dataSet, filter);

    Standardize standardize = new Standardize();
    standardize.setInputFormat(dataSet);
    dataSet = Filter.useFilter(dataSet, standardize);

    dataSet.setClassIndex(dataSet.numAttributes() - 1);
    dataSet.randomize(new Random(9001)); //It's over 9000!!

    int folds = 10;
    //Perform crossvalidation
    Evaluation eval = new Evaluation(dataSet);
    for (int n = 0; n < folds; n++) {
        int trainingSize = (int) Math.round(dataSet.numInstances() * .7);
        int testSize = dataSet.numInstances() - trainingSize;

        Instances trainingData = dataSet.trainCV(folds, n);
        Instances testData = dataSet.testCV(folds, n);

        ID3Classifier classifier = new ID3Classifier();
        // Id3 classifier = new Id3();
        classifier.buildClassifier(trainingData);

        eval.evaluateModel(classifier, testData);
    }/*from  ww  w  .j av  a  2s.c  o m*/
    System.out.println(eval.toSummaryString("\nResults:\n", false));
}

From source file:demo.RunGUIProof.java

License:Open Source License

/**
 * @param args//from   w ww.jav  a2  s.c  o  m
 */
public static void main(String[] args) {
    JOptionPane.showMessageDialog(null, introductionMessage, "Chordalysis", JOptionPane.INFORMATION_MESSAGE);

    int result = JOptionPane.showOptionDialog(null, new JTextArea(agreeCitation), "Reference",
            JOptionPane.YES_NO_OPTION, JOptionPane.QUESTION_MESSAGE, null, null, null);
    if (result == JOptionPane.NO_OPTION || result == JOptionPane.CLOSED_OPTION) {
        JOptionPane.showMessageDialog(null,
                "Chordalysis will now stop, because you do not want to reference its source. ", "Chordalysis",
                JOptionPane.WARNING_MESSAGE);
        System.exit(0);
    }

    JFileChooser chooser = new JFileChooser();
    FileNameExtensionFilter filter = new FileNameExtensionFilter("CSV file", "csv");
    chooser.setFileFilter(filter);
    int returnVal = chooser.showOpenDialog(null);
    File csvFile = null;
    if (returnVal == JFileChooser.APPROVE_OPTION) {
        csvFile = chooser.getSelectedFile();
        System.out.println("You chose to open: " + csvFile);
    } else {
        JOptionPane.showMessageDialog(null, noFileSelectedMessage, "Chordalysis", JOptionPane.ERROR_MESSAGE);
        return;
    }
    CSVLoader loader = new CSVLoader();
    if (!csvFile.exists()) {
        JOptionPane.showMessageDialog(null, noFileMessage, "Chordalysis", JOptionPane.INFORMATION_MESSAGE);
        return;
    }
    double pValue = -1;
    while (pValue <= 0 || 1 <= pValue) {
        pValue = Double.valueOf(JOptionPane.showInputDialog("Desired p-value (between 0 and 1)", 0.05));
        if (pValue <= 0 || 1 <= pValue) {
            JOptionPane.showMessageDialog(null, incorrectPValueMessage, "Chordalysis",
                    JOptionPane.WARNING_MESSAGE);
        }
    }

    filter = new FileNameExtensionFilter("PNG or DOT or CSV file or DNE file", "png", "dot", "csv", "dne");
    chooser = new JFileChooser();
    chooser.setFileFilter(filter);
    chooser.setDialogTitle("Where to save the graph?");
    chooser.setSelectedFile(new File(csvFile.getAbsolutePath() + ".png"));
    returnVal = chooser.showSaveDialog(null);
    File graphFile = null;
    if (returnVal == JFileChooser.APPROVE_OPTION) {
        graphFile = chooser.getSelectedFile();
        System.out.println("You chose to save the graph to: " + graphFile.getAbsolutePath());
    } else {
        JOptionPane.showMessageDialog(null, noFileSelectedMessage, "Chordalysis", JOptionPane.ERROR_MESSAGE);
        return;
    }

    try {
        loader.setFile(csvFile);

        returnVal = JOptionPane.showConfirmDialog(null, "Are all of your attribute nominal?", "Chordalysis",
                JOptionPane.YES_NO_OPTION);
        if (returnVal == JOptionPane.YES_OPTION) {
            loader.setNominalAttributes("first-last");
        }

        Instances instances = loader.getDataSet();

        String cols = "";
        for (int i = 0; i < instances.numAttributes(); i++) {
            Attribute att = instances.attribute(i);
            if (!att.isNominal()) {
                cols += (i + 1) + ",";
            }
        }
        if (!cols.isEmpty()) {
            cols = cols.substring(0, cols.length() - 1);
            String message = "Some atributes are not nominal (number " + cols
                    + "), please wait during discretization. ";
            JOptionPane.showMessageDialog(null, message, "Chordalysis", JOptionPane.INFORMATION_MESSAGE);
            Discretize discretizer = new Discretize(cols);
            discretizer.setUseEqualFrequency(true);
            discretizer.setBins(3);
            discretizer.setIgnoreClass(true);
            discretizer.setInputFormat(instances);
            instances = Filter.useFilter(instances, discretizer);
            JOptionPane.showMessageDialog(null, "Discretization is now finished.", "Chordalysis",
                    JOptionPane.INFORMATION_MESSAGE);
        }

        String[] variablesNames = new String[instances.numAttributes()];
        String[][] outcomes = new String[instances.numAttributes()][];
        for (int i = 0; i < variablesNames.length; i++) {
            variablesNames[i] = instances.attribute(i).name();
            outcomes[i] = new String[instances.attribute(i).numValues()];
            for (int j = 0; j < outcomes[i].length; j++) {
                outcomes[i][j] = instances.attribute(i).value(j);
            }
        }

        ChordalysisModelling modeller = new ChordalysisModelling(pValue);
        modeller.buildModel(instances);
        DecomposableModel bestModel = modeller.getModel();
        JOptionPane.showMessageDialog(null, new JTextArea("Chordalysis has now finished analysing your data. "
                + "\nIf you found something useful, please reference Chordalysis as"
                + "\n\t- F. Petitjean, G.I. Webb and A. Nicholson, Scaling log-linear analysis to high-dimensional data, ICDM 2013"
                + "\n\t- F. Petitjean and G.I. Webb, Scaling log-linear analysis to datasets with thousands of variables, SDM 2015"
                + "\n\nYou can find the output file at: '" + graphFile.getAbsolutePath() + "'"), "Citation",
                JOptionPane.INFORMATION_MESSAGE);
        System.out.println("The model selected is:");
        System.out.println(bestModel.toString(variablesNames));
        if (graphFile.getName().endsWith("dot")) {
            bestModel.exportDOT(graphFile, variablesNames);
        } else if (graphFile.getName().endsWith("png")) {
            ImageIO.write(bestModel.getImage(variablesNames), "png", graphFile);
        } else if (graphFile.getName().endsWith("dne")) {
            bestModel.exportBNNetica(graphFile, variablesNames, outcomes);
            bestModel.exportDOT(new File(graphFile.getAbsolutePath() + ".dot"), variablesNames);
            ImageIO.write(bestModel.getImage(variablesNames), "png",
                    new File(graphFile.getAbsolutePath() + ".png"));
            bestModel.saveAssociations(variablesNames, new File(graphFile.getAbsolutePath() + ".csv"));
        } else {
            bestModel.saveAssociations(variablesNames, graphFile);
        }

    } catch (IOException e) {
        JOptionPane.showMessageDialog(null,
                "The file '" + csvFile.getAbsolutePath() + "'\ncannot be read properly.",
                "Error while reading file", JOptionPane.ERROR_MESSAGE);
        System.out.println("I/O error while loading csv file");
        e.printStackTrace();
    } catch (Exception e) {
        JOptionPane.showMessageDialog(null, "Error:" + e.getMessage(), "Chordalysis",
                JOptionPane.ERROR_MESSAGE);
        e.printStackTrace();
    }

}

From source file:id3classifier.Main.java

public static void main(String[] args) throws Exception {

    ConverterUtils.DataSource source = new ConverterUtils.DataSource(file);
    Instances dataSet = source.getDataSet();

    // discretize the dataset
    Discretize filter = new Discretize();
    filter.setInputFormat(dataSet);
    dataSet = Filter.useFilter(dataSet, filter);

    // standardize the dataset
    Standardize standardizedData = new Standardize();
    standardizedData.setInputFormat(dataSet);
    dataSet = Filter.useFilter(dataSet, standardizedData);

    // randomize the dataset
    dataSet.setClassIndex(dataSet.numAttributes() - 1);
    dataSet.randomize(new Debug.Random());

    // get the sizes of the training and testing sets and split
    int trainingSize = (int) Math.round(dataSet.numInstances() * .7);
    int testSize = dataSet.numInstances() - trainingSize;
    Instances training = new Instances(dataSet, 0, trainingSize);
    Instances test = new Instances(dataSet, trainingSize, testSize);

    // set up the ID3 classifier on the training data
    ID3Classifiers classifier = new ID3Classifiers();
    classifier.buildClassifier(training);

    // set up the evaluation and test using the classifier and test set
    Evaluation eval = new Evaluation(dataSet);
    eval.evaluateModel(classifier, test);

    // outup and kill, important to exit here to stop javaFX
    System.out.println(eval.toSummaryString("\nResults\n======\n", false));
    System.exit(0);/*from   www  .ja  v  a2 s.c  o m*/
}

From source file:machinelearningq2.ExtendedNaiveBayes.java

public Instances discretize(Instances instnc) throws Exception {
    Discretize d = new Discretize();
    d.setInputFormat(instnc);
    Instances newData = Filter.useFilter(instnc, d);

    binCount = new double[d.getBins()];

    for (Instance line : newData) {
        for (int j = 0; j < newData.numAttributes() - 1; j++) {
            binCount[(int) line.value(j)]++;
        }/*from www.  jav  a2s . c  o m*/
    }
    return newData;
}

From source file:NaiveBayes.Atribut.java

public Atribut(Instances ints, int i, int classindex) throws Exception {
    if (ints.attribute(i).isNumeric()) {
        Instances newData = new Instances(ints);
        Discretize f = new Discretize();
        f.setInputFormat(newData);
        newData = Filter.useFilter(newData, f);
        name = ints.attribute(i).name();
        listNilai = new ArrayList<>();
        for (int j = 0; j < newData.attribute(i).numValues(); j++) {
            listNilai.add(new Nilai(ints, i, j, classindex));
        }//from www . j  av a 2s.  c  om
    } else {
        name = ints.attribute(i).name().replaceAll("\\s+", "");
        //            System.out.println(name);
        listNilai = new ArrayList<>();
        for (int j = 0; j < ints.attribute(i).numValues(); j++) {
            listNilai.add(new Nilai(ints, i, j, classindex));
        }
    }

}

From source file:NaiveBayes.Nilai.java

public Nilai(Instances inst, int i, int j, int classindex) throws Exception {
    Instances newData = new Instances(inst);
    numClass = NaiveBayes.getNumEachClass(newData);
    lower = 0;//w  w  w .jav a2s.  c  om
    upper = 0;
    kelas = new ArrayList<>();
    //if(newData.instance(i).isMissing(j)) newData.instance(i).setValue(i, "b");
    if (newData.attribute(i).isNumeric()) {
        Discretize f = new Discretize();
        f.setInputFormat(newData);
        newData = Filter.useFilter(newData, f);
        name = newData.attribute(i).value(j);
        if (f.getCutPoints(i) != null) {
            if (j == 0) {
                lower = Double.NEGATIVE_INFINITY;
                upper = f.getCutPoints(i)[j];
            } else {
                if (j != newData.attribute(0).numValues() - 1) {
                    lower = f.getCutPoints(i)[j - 1];
                    upper = f.getCutPoints(i)[j];
                } else {
                    lower = f.getCutPoints(i)[j - 1];
                    upper = Double.POSITIVE_INFINITY;
                }
            }
        } else {
            lower = Double.NEGATIVE_INFINITY;
            upper = Double.POSITIVE_INFINITY;
        }
        for (int k = 0; k < inst.attribute(classindex).numValues(); k++) { //buat nama kelas
            double cnt = 1;
            int countClass = 0;
            for (int l = 0; l < inst.numInstances(); l++) { //jumlah seluruh instances
                double val = inst.get(l).value(i);
                if (countClass <= numClass[k]) {
                    if (inst.attribute(classindex).value(k).equalsIgnoreCase(
                            inst.get(l).toString(classindex).replaceAll("'", ""))) {/*nama kelasnya*/
                        if (val >= lower && val < upper) {//jika ada nilai yang sama pada atribut 
                            //dan kelas yang sama dan nilai dari atribut lebih besar sama dengan lower
                            cnt += 1;
                        }
                        countClass++;
                    }
                } else
                    break;
            }
            kelas.add(new Kelas(newData.attribute(classindex).value(k), cnt));
        }
    } else {
        //System.out.println(newData.attribute(i).value(j).replaceAll("\\s+", ""));
        name = newData.attribute(i).value(j).replaceAll("\\s", "");
        //System.out.println(name);
        //System.out.println(name);
        for (int k = 0; k < inst.attribute(classindex).numValues(); k++) { //buat nama kelas
            double cnt = 1;
            int countClass = 0;
            for (int l = 0; l < inst.numInstances(); l++) { //jumlah seluruh instances
                if (countClass <= numClass[k]) {
                    //System.out.println("with whitespace "+inst.attribute(i).value(j)+"without "+inst.attribute(i).value(j).replaceAll("\\s", "")+"p");
                    //                        System.out.println(inst.get(l).toString(classindex));
                    //System.out.println(inst.attribute(classindex).value(k));
                    if (inst.attribute(classindex).value(k).replaceAll("\\s", "")
                            .equalsIgnoreCase(inst.get(l).toString(classindex).replaceAll("\\s", ""))//nama kelas
                            && inst.attribute(i).value(j).replaceAll("\\s", "").//cek nama atribut
                                    equalsIgnoreCase(inst.get(l).toString(i).replaceAll("\\s", ""))) {
                        //jika ada nilai yang sama pada atribut 
                        //dan kelas yang sama dan nilai dari atribut lebih besar sama dengan lower
                        cnt += 1;
                        countClass++;
                    }
                } else
                    break;
            }
            kelas.add(new Kelas(newData.attribute(classindex).value(k).replaceAll("\\s+", ""), cnt));
        }
    }
}

From source file:NaiveBayesPckge.NaiveBayesMain.java

public static Instances useFilterDiscritize(Instances dataSet) throws Exception {
    //set options
    String[] optionsFilter = new String[4];
    //choose the number of intervals, e.g 2:
    optionsFilter[0] = "-B";
    optionsFilter[1] = "6";
    //choose the range of attributes on which to apply the filter:
    optionsFilter[2] = "-R";
    optionsFilter[3] = "first-last";
    System.out.println("> Filtering dataset using Discretize\n");
    //Apply Discretization
    Discretize discretize = new Discretize();
    discretize.setOptions(optionsFilter);
    discretize.setInputFormat(dataSet);
    Instances newDataTemp = Filter.useFilter(dataSet, discretize);

    return newDataTemp;
}

From source file:NaiveBayesPckge.NaiveBayesMain.java

public static void addNewInstance(Instances instances) throws Exception {
    Scanner scan = new Scanner(System.in);
    ArrayList<Attribute> atts = new ArrayList<Attribute>();
    ArrayList<String> classVal = new ArrayList<String>();
    int nConclus = instances.attribute(instances.numAttributes() - 1).numValues();
    int numAttribut = instances.numAttributes();

    //buat nambah kesimpulan. Misal T dan F
    for (int i = 0; i < nConclus; i++) {
        classVal.add(instances.attribute(instances.numAttributes() - 1).value(i));
    }//from   w  w  w . j a  va2 s  .c o m

    //buat nambahin attribut
    for (int i = 0; i < numAttribut - 1; i++) {
        atts.add(new Attribute(instances.attribute(i).name()));
    }
    atts.add(new Attribute(instances.attribute(numAttribut - 1).name(), classVal));

    double[] attValues = new double[numAttribut];
    System.out.print("Masukkan nilai : ");
    for (int i = 0; i < numAttribut - 1; i++) {
        attValues[i] = scan.nextDouble();
    }
    Discretize discretize = new Discretize();
    String s = scan.nextLine();

    Instance instance = new DenseInstance(1.0, attValues);

    instance.setDataset(instances);

    discretize.setInputFormat(instances);
    discretize.input(instance);

    int classify1 = (int) naive.classifyInstance(instance);
    System.out.print("Prediction Class : ");
    System.out.println(classVal.get(classify1));
}

From source file:org.tigr.microarray.mev.cluster.gui.impl.bn.PrepareArrayDataModule.java

License:Open Source License

/**
 * The <code>discretize</code> method is given a WEKA Instances object corresponding to the gene expression data
 * and returns a new WEKA Instances object with the given data discretized into a given number of equal-width bins
 *
 * @param data an <code>Instances</code> which is a WEKA Instances object corresponding to the gene expression data
 * @param numBins a <code>String</code> corresponding to the number of bins in which the data is to be discretized
 * @return an <code>Instances</code> a new WEKA Instances object with the given data discretized 
 * into a given number of equal-width bins
 * @exception NullArgumentException if an error occurs if the data is null
 * @exception OutOfRangeException if an error occurs if the numBins is out of bounds (namely, negative or equal to zero)
 *///from   w w w  .j a v  a2 s . c  o m
public static Instances discretize(Instances data, String numBins)
        throws NullArgumentException, OutOfRangeException {
    if (data == null) {
        throw new NullArgumentException("Parameter data passed to discretize method was null!");
    }
    if (Integer.parseInt(numBins) <= 0) {
        throw new OutOfRangeException(
                "numBins is out of range (should be strictly positive!\nnumBins=" + numBins);
    }
    try {
        String[] options = new String[2];
        options[0] = "-B";
        options[1] = numBins;
        Discretize discretize = new Discretize();
        discretize.setOptions(options);
        discretize.setInputFormat(data);
        Instances newData = Filter.useFilter(data, discretize);
        return newData;
    } catch (Exception e) {
        System.out.println(e);
        e.printStackTrace();
    }
    return null;
}

From source file:org.uclab.mm.kcl.ddkat.datapreprocessor.DataDiscretizer.java

License:Apache License

/**
 * Method to discretize the input data using equal-width binning approach.
 *
 * @throws Exception the exception/*from  w w  w  .j  av a2 s .c  o m*/
 */
public void discretizeData() throws Exception {

    this.confirmationMessage = new ArrayList<String>();

    Instances inputData, outputData;
    String inputFile = BASE_DIR + "OriginalDataSet.csv";

    // load CSV file
    CSVLoader fileLoader = new CSVLoader();
    fileLoader.setSource(new File(inputFile));
    inputData = fileLoader.getDataSet();

    Discretize discrete = new Discretize();
    discrete.setInputFormat(inputData);
    outputData = Filter.useFilter(inputData, discrete);

    saveDiscretizedData(inputFile, outputData);

}