Example usage for weka.filters.unsupervised.attribute Discretize Discretize

Introduction

In this page you can find the example usage for weka.filters.unsupervised.attribute Discretize Discretize.

Prototype

public Discretize()

Source Link

Document

Constructor - initialises the filter

Usage

From source file:com.mycompany.id3classifier.ID3Shell.java

public static void main(String[] args) throws Exception {
    ConverterUtils.DataSource source = new ConverterUtils.DataSource("lensesData.csv");
    Instances dataSet = source.getDataSet();

    Discretize filter = new Discretize();
    filter.setInputFormat(dataSet);/*from  ww  w .  ja  v  a2s .com*/
    dataSet = Filter.useFilter(dataSet, filter);

    Standardize standardize = new Standardize();
    standardize.setInputFormat(dataSet);
    dataSet = Filter.useFilter(dataSet, standardize);

    dataSet.setClassIndex(dataSet.numAttributes() - 1);
    dataSet.randomize(new Random(9001)); //It's over 9000!!

    int folds = 10;
    //Perform crossvalidation
    Evaluation eval = new Evaluation(dataSet);
    for (int n = 0; n < folds; n++) {
        int trainingSize = (int) Math.round(dataSet.numInstances() * .7);
        int testSize = dataSet.numInstances() - trainingSize;

        Instances trainingData = dataSet.trainCV(folds, n);
        Instances testData = dataSet.testCV(folds, n);

        ID3Classifier classifier = new ID3Classifier();
        // Id3 classifier = new Id3();
        classifier.buildClassifier(trainingData);

        eval.evaluateModel(classifier, testData);
    }
    System.out.println(eval.toSummaryString("\nResults:\n", false));
}

From source file:id3classifier.Main.java

public static void main(String[] args) throws Exception {

    ConverterUtils.DataSource source = new ConverterUtils.DataSource(file);
    Instances dataSet = source.getDataSet();

    // discretize the dataset
    Discretize filter = new Discretize();
    filter.setInputFormat(dataSet);/*w  w w.  ja  v  a 2  s  .c  om*/
    dataSet = Filter.useFilter(dataSet, filter);

    // standardize the dataset
    Standardize standardizedData = new Standardize();
    standardizedData.setInputFormat(dataSet);
    dataSet = Filter.useFilter(dataSet, standardizedData);

    // randomize the dataset
    dataSet.setClassIndex(dataSet.numAttributes() - 1);
    dataSet.randomize(new Debug.Random());

    // get the sizes of the training and testing sets and split
    int trainingSize = (int) Math.round(dataSet.numInstances() * .7);
    int testSize = dataSet.numInstances() - trainingSize;
    Instances training = new Instances(dataSet, 0, trainingSize);
    Instances test = new Instances(dataSet, trainingSize, testSize);

    // set up the ID3 classifier on the training data
    ID3Classifiers classifier = new ID3Classifiers();
    classifier.buildClassifier(training);

    // set up the evaluation and test using the classifier and test set
    Evaluation eval = new Evaluation(dataSet);
    eval.evaluateModel(classifier, test);

    // outup and kill, important to exit here to stop javaFX
    System.out.println(eval.toSummaryString("\nResults\n======\n", false));
    System.exit(0);
}

From source file:imba.classifier.NBTubes.java

@Override
public void buildClassifier(Instances data) {
    dataClassifier = new ArrayList<>();
    infoClassifier = new ArrayList<>();
    validAttribute = new ArrayList<>();
    dataset = null;//www.  j ava 2 s  .  c  o  m
    sumClass = null;
    dataSize = 0;
    header_Instances = data;

    Filter f;
    int i, j, k, l, m;
    int sumVal;

    int numAttr = data.numAttributes(); //ini beserta kelasnya, jadi atribut + 1

    i = 0;
    while (i < numAttr && wasNumeric == false) {
        if (i == classIdx) {
            i++;
        }

        if (i != numAttr && data.attribute(i).isNumeric()) {
            wasNumeric = true;
        }

        i++;
    }

    Instance p;

    //kasih filter
    if (wasNumeric) {
        f = new Normalize();
        //Filter f = new NumericToNominal();
        try {
            f.setInputFormat(data);

            for (Instance i1 : data) {
                f.input(i1);
            }

            f.batchFinished();
        } catch (Exception ex) {
            Logger.getLogger(NBTubes.class.getName()).log(Level.SEVERE, null, ex);
        }

        dataset = f.getOutputFormat();

        while ((p = f.output()) != null) {
            dataset.add(p);
        }
    }

    //f = new NumericToNominal();
    if (filter.equals("Discretize")) {
        f = new Discretize();
    } else {
        f = new NumericToNominal();
    }

    try {
        if (wasNumeric) {
            f.setInputFormat(dataset);
            for (Instance i1 : dataset) {
                f.input(i1);
            }
        } else {
            f.setInputFormat(data);
            for (Instance i1 : data) {
                f.input(i1);
            }
        }

        f.batchFinished();
    } catch (Exception ex) {
        Logger.getLogger(NBTubes.class.getName()).log(Level.SEVERE, null, ex);
    }

    dataset = null;
    dataset = f.getOutputFormat();

    while ((p = f.output()) != null) {
        dataset.add(p);
    }

    //building data structure
    classIdx = data.classIndex();

    dataSize = data.size();

    //isi data dan info classifier dengan array kosong
    i = 0;
    j = i;
    while (j < numAttr) {
        if (i == classIdx) {
            i++;
        } else {
            dataClassifier.add(new ArrayList<>());
            infoClassifier.add(new ArrayList<>());

            if (j < i) {
                m = j - 1;
            } else {
                m = j;
            }

            k = 0;
            while (k < dataset.attribute(j).numValues()) {
                dataClassifier.get(m).add(new ArrayList<>());
                infoClassifier.get(m).add(new ArrayList<>());

                l = 0;
                while (l < dataset.attribute(classIdx).numValues()) {
                    dataClassifier.get(m).get(k).add(0);
                    infoClassifier.get(m).get(k).add(0.0);

                    l++;
                }

                k++;
            }
        }

        i++;
        j++;
    }

    //isi data classifier dari dataset
    sumClass = new int[data.numClasses()];

    i = 0;
    while (i < dataset.size()) {
        j = 0;
        k = j;
        while (k < dataset.numAttributes()) {
            if (j == classIdx) {
                j++;
            } else {
                if (k < j) {
                    m = k - 1;
                } else {
                    m = k;
                }

                dataClassifier.get(m).get((int) dataset.get(i).value(k)).set(
                        (int) dataset.get(i).value(classIdx),
                        dataClassifier.get(m).get((int) dataset.get(i).value(k))
                                .get((int) dataset.get(i).value(classIdx)) + 1);

                if (m == 0) {
                    sumClass[(int) dataset.get(i).value(classIdx)]++;
                }

            }

            k++;
            j++;
        }

        i++;
    }

    //proses double values
    i = 0;
    while (i < dataClassifier.size()) {
        j = 0;
        while (j < dataClassifier.get(i).size()) {
            k = 0;
            while (k < dataClassifier.get(i).get(j).size()) {
                infoClassifier.get(i).get(j).set(k, (double) dataClassifier.get(i).get(j).get(k) / sumClass[k]);

                k++;
            }

            j++;
        }

        i++;
    }

    /*
    //liat apakah ada nilai di tiap atribut
    //yang merepresentasikan lebih dari 80% data
    i = 0;
    while (i < dataClassifier.size()) {
    j = 0;
    while (j < dataClassifier.get(i).size()) {
                
                
        j++;
    }
            
    i++;
    }
    */
}

From source file:machinelearningq2.ExtendedNaiveBayes.java

public Instances discretize(Instances instnc) throws Exception {
    Discretize d = new Discretize();
    d.setInputFormat(instnc);/*from w ww  .  ja va 2 s .  c  o  m*/
    Instances newData = Filter.useFilter(instnc, d);

    binCount = new double[d.getBins()];

    for (Instance line : newData) {
        for (int j = 0; j < newData.numAttributes() - 1; j++) {
            binCount[(int) line.value(j)]++;
        }
    }
    return newData;
}

From source file:milk.classifiers.MIBoost.java

License:Open Source License

/**
  * Builds the classifier//from  ww  w  . j  ava 2s .co  m
  *
  * @param train the training data to be used for generating the
  * boosted classifier.
  * @exception Exception if the classifier could not be built successfully
  */
 public void buildClassifier(Exemplars exps) throws Exception {

     Exemplars train = new Exemplars(exps);

     if (train.classAttribute().type() != Attribute.NOMINAL) {
         throw new Exception("Class attribute must be nominal.");
     }
     if (train.checkForStringAttributes()) {
         throw new Exception("Can't handle string attributes!");
     }

     m_ClassIndex = train.classIndex();
     m_IdIndex = train.idIndex();
     m_NumClasses = train.numClasses();
     m_NumIterations = m_MaxIterations;

     if (m_NumClasses > 2) {
         throw new Exception("Not yet prepared to deal with multiple classes!");
     }

     if (m_Classifier == null)
         throw new Exception("A base classifier has not been specified!");
     if (!(m_Classifier instanceof WeightedInstancesHandler))
         throw new Exception("Base classifier cannot handle weighted instances!");

     m_Models = Classifier.makeCopies(m_Classifier, getMaxIterations());
     if (m_Debug)
         System.err.println("Base classifier: " + m_Classifier.getClass().getName());

     m_Beta = new double[m_NumIterations];
     m_Attributes = new Instances(train.exemplar(0).getInstances(), 0);

     double N = (double) train.numExemplars(), sumNi = 0;
     Instances data = new Instances(m_Attributes, 0);// Data to learn a model   
     data.deleteAttributeAt(m_IdIndex);// ID attribute useless   
     Instances dataset = new Instances(data, 0);

     // Initialize weights
     for (int i = 0; i < N; i++)
         sumNi += train.exemplar(i).getInstances().numInstances();

     for (int i = 0; i < N; i++) {
         Exemplar exi = train.exemplar(i);
         exi.setWeight(sumNi / N);
         Instances insts = exi.getInstances();
         double ni = (double) insts.numInstances();
         for (int j = 0; j < ni; j++) {
             Instance ins = new Instance(insts.instance(j));// Copy
             //insts.instance(j).setWeight(1.0);   

             ins.deleteAttributeAt(m_IdIndex);
             ins.setDataset(dataset);
             ins.setWeight(exi.weight() / ni);
             data.add(ins);
         }
     }

     // Assume the order of the instances are preserved in the Discretize filter
     if (m_DiscretizeBin > 0) {
         m_Filter = new Discretize();
         m_Filter.setInputFormat(new Instances(data, 0));
         m_Filter.setBins(m_DiscretizeBin);
         data = Filter.useFilter(data, m_Filter);
     }

     // Main algorithm
     int dataIdx;
     iterations: for (int m = 0; m < m_MaxIterations; m++) {
         if (m_Debug)
             System.err.println("\nIteration " + m);
         // Build a model
         m_Models[m].buildClassifier(data);

         // Prediction of each bag
         double[] err = new double[(int) N], weights = new double[(int) N];
         boolean perfect = true, tooWrong = true;
         dataIdx = 0;
         for (int n = 0; n < N; n++) {
             Exemplar exn = train.exemplar(n);
             // Prediction of each instance and the predicted class distribution
             // of the bag      
             double nn = (double) exn.getInstances().numInstances();
             for (int p = 0; p < nn; p++) {
                 Instance testIns = data.instance(dataIdx++);
                 if ((int) m_Models[m].classifyInstance(testIns) != (int) exn.classValue()) // Weighted instance-wise 0-1 errors
                     err[n]++;
             }
             weights[n] = exn.weight();
             err[n] /= nn;
             if (err[n] > 0.5)
                 perfect = false;
             if (err[n] < 0.5)
                 tooWrong = false;
         }

         if (perfect || tooWrong) { // No or 100% classification error, cannot find beta
             if (m == 0)
                 m_Beta[m] = 1.0;
             else
                 m_Beta[m] = 0;
             m_NumIterations = m + 1;
             if (m_Debug)
                 System.err.println("No errors");
             break iterations;
         }

         double[] x = new double[1];
         x[0] = 0;
         double[][] b = new double[2][x.length];
         b[0][0] = Double.NaN;
         b[1][0] = Double.NaN;

         OptEng opt = new OptEng();
         opt.setWeights(weights);
         opt.setErrs(err);
         //opt.setDebug(m_Debug);
         if (m_Debug)
             System.out.println("Start searching for c... ");
         x = opt.findArgmin(x, b);
         while (x == null) {
             x = opt.getVarbValues();
             if (m_Debug)
                 System.out.println("200 iterations finished, not enough!");
             x = opt.findArgmin(x, b);
         }
         if (m_Debug)
             System.out.println("Finished.");
         m_Beta[m] = x[0];

         if (m_Debug)
             System.err.println("c = " + m_Beta[m]);

         // Stop if error too small or error too big and ignore this model
         if (Double.isInfinite(m_Beta[m]) || Utils.smOrEq(m_Beta[m], 0)) {
             if (m == 0)
                 m_Beta[m] = 1.0;
             else
                 m_Beta[m] = 0;
             m_NumIterations = m + 1;
             if (m_Debug)
                 System.err.println("Errors out of range!");
             break iterations;
         }

         // Update weights of data and class label of wfData
         dataIdx = 0;
         double totWeights = 0;
         for (int r = 0; r < N; r++) {
             Exemplar exr = train.exemplar(r);
             exr.setWeight(weights[r] * Math.exp(m_Beta[m] * (2.0 * err[r] - 1.0)));
             totWeights += exr.weight();
         }

         if (m_Debug)
             System.err.println("Total weights = " + totWeights);

         for (int r = 0; r < N; r++) {
             Exemplar exr = train.exemplar(r);
             double num = (double) exr.getInstances().numInstances();
             exr.setWeight(sumNi * exr.weight() / totWeights);
             //if(m_Debug)
             //    System.err.print("\nExemplar "+r+"="+exr.weight()+": \t");
             for (int s = 0; s < num; s++) {
                 Instance inss = data.instance(dataIdx);
                 inss.setWeight(exr.weight() / num);
                 //    if(m_Debug)
                 //  System.err.print("instance "+s+"="+inss.weight()+
                 //          "|ew*iw*sumNi="+data.instance(dataIdx).weight()+"\t");
                 if (Double.isNaN(inss.weight()))
                     throw new Exception("instance " + s + " in bag " + r + " has weight NaN!");
                 dataIdx++;
             }
             //if(m_Debug)
             //    System.err.println();
         }
     }
 }

From source file:NaiveBayes.Atribut.java

public Atribut(Instances ints, int i, int classindex) throws Exception {
    if (ints.attribute(i).isNumeric()) {
        Instances newData = new Instances(ints);
        Discretize f = new Discretize();
        f.setInputFormat(newData);/* ww w .j  ava  2s.  c  o  m*/
        newData = Filter.useFilter(newData, f);
        name = ints.attribute(i).name();
        listNilai = new ArrayList<>();
        for (int j = 0; j < newData.attribute(i).numValues(); j++) {
            listNilai.add(new Nilai(ints, i, j, classindex));
        }
    } else {
        name = ints.attribute(i).name().replaceAll("\\s+", "");
        //            System.out.println(name);
        listNilai = new ArrayList<>();
        for (int j = 0; j < ints.attribute(i).numValues(); j++) {
            listNilai.add(new Nilai(ints, i, j, classindex));
        }
    }

}

From source file:NaiveBayes.Nilai.java

public Nilai(Instances inst, int i, int j, int classindex) throws Exception {
    Instances newData = new Instances(inst);
    numClass = NaiveBayes.getNumEachClass(newData);
    lower = 0;//w  w  w .j  a va2  s . c  o  m
    upper = 0;
    kelas = new ArrayList<>();
    //if(newData.instance(i).isMissing(j)) newData.instance(i).setValue(i, "b");
    if (newData.attribute(i).isNumeric()) {
        Discretize f = new Discretize();
        f.setInputFormat(newData);
        newData = Filter.useFilter(newData, f);
        name = newData.attribute(i).value(j);
        if (f.getCutPoints(i) != null) {
            if (j == 0) {
                lower = Double.NEGATIVE_INFINITY;
                upper = f.getCutPoints(i)[j];
            } else {
                if (j != newData.attribute(0).numValues() - 1) {
                    lower = f.getCutPoints(i)[j - 1];
                    upper = f.getCutPoints(i)[j];
                } else {
                    lower = f.getCutPoints(i)[j - 1];
                    upper = Double.POSITIVE_INFINITY;
                }
            }
        } else {
            lower = Double.NEGATIVE_INFINITY;
            upper = Double.POSITIVE_INFINITY;
        }
        for (int k = 0; k < inst.attribute(classindex).numValues(); k++) { //buat nama kelas
            double cnt = 1;
            int countClass = 0;
            for (int l = 0; l < inst.numInstances(); l++) { //jumlah seluruh instances
                double val = inst.get(l).value(i);
                if (countClass <= numClass[k]) {
                    if (inst.attribute(classindex).value(k).equalsIgnoreCase(
                            inst.get(l).toString(classindex).replaceAll("'", ""))) {/*nama kelasnya*/
                        if (val >= lower && val < upper) {//jika ada nilai yang sama pada atribut 
                            //dan kelas yang sama dan nilai dari atribut lebih besar sama dengan lower
                            cnt += 1;
                        }
                        countClass++;
                    }
                } else
                    break;
            }
            kelas.add(new Kelas(newData.attribute(classindex).value(k), cnt));
        }
    } else {
        //System.out.println(newData.attribute(i).value(j).replaceAll("\\s+", ""));
        name = newData.attribute(i).value(j).replaceAll("\\s", "");
        //System.out.println(name);
        //System.out.println(name);
        for (int k = 0; k < inst.attribute(classindex).numValues(); k++) { //buat nama kelas
            double cnt = 1;
            int countClass = 0;
            for (int l = 0; l < inst.numInstances(); l++) { //jumlah seluruh instances
                if (countClass <= numClass[k]) {
                    //System.out.println("with whitespace "+inst.attribute(i).value(j)+"without "+inst.attribute(i).value(j).replaceAll("\\s", "")+"p");
                    //                        System.out.println(inst.get(l).toString(classindex));
                    //System.out.println(inst.attribute(classindex).value(k));
                    if (inst.attribute(classindex).value(k).replaceAll("\\s", "")
                            .equalsIgnoreCase(inst.get(l).toString(classindex).replaceAll("\\s", ""))//nama kelas
                            && inst.attribute(i).value(j).replaceAll("\\s", "").//cek nama atribut
                                    equalsIgnoreCase(inst.get(l).toString(i).replaceAll("\\s", ""))) {
                        //jika ada nilai yang sama pada atribut 
                        //dan kelas yang sama dan nilai dari atribut lebih besar sama dengan lower
                        cnt += 1;
                        countClass++;
                    }
                } else
                    break;
            }
            kelas.add(new Kelas(newData.attribute(classindex).value(k).replaceAll("\\s+", ""), cnt));
        }
    }
}

From source file:NaiveBayesPckge.NaiveBayesMain.java

public static Instances useFilterDiscritize(Instances dataSet) throws Exception {
    //set options
    String[] optionsFilter = new String[4];
    //choose the number of intervals, e.g 2:
    optionsFilter[0] = "-B";
    optionsFilter[1] = "6";
    //choose the range of attributes on which to apply the filter:
    optionsFilter[2] = "-R";
    optionsFilter[3] = "first-last";
    System.out.println("> Filtering dataset using Discretize\n");
    //Apply Discretization
    Discretize discretize = new Discretize();
    discretize.setOptions(optionsFilter);
    discretize.setInputFormat(dataSet);/*w  w w .j ava2  s .c  o m*/
    Instances newDataTemp = Filter.useFilter(dataSet, discretize);

    return newDataTemp;
}

From source file:NaiveBayesPckge.NaiveBayesMain.java

public static void addNewInstance(Instances instances) throws Exception {
    Scanner scan = new Scanner(System.in);
    ArrayList<Attribute> atts = new ArrayList<Attribute>();
    ArrayList<String> classVal = new ArrayList<String>();
    int nConclus = instances.attribute(instances.numAttributes() - 1).numValues();
    int numAttribut = instances.numAttributes();

    //buat nambah kesimpulan. Misal T dan F
    for (int i = 0; i < nConclus; i++) {
        classVal.add(instances.attribute(instances.numAttributes() - 1).value(i));
    }/*from   w  ww . j a v a 2 s. co m*/

    //buat nambahin attribut
    for (int i = 0; i < numAttribut - 1; i++) {
        atts.add(new Attribute(instances.attribute(i).name()));
    }
    atts.add(new Attribute(instances.attribute(numAttribut - 1).name(), classVal));

    double[] attValues = new double[numAttribut];
    System.out.print("Masukkan nilai : ");
    for (int i = 0; i < numAttribut - 1; i++) {
        attValues[i] = scan.nextDouble();
    }
    Discretize discretize = new Discretize();
    String s = scan.nextLine();

    Instance instance = new DenseInstance(1.0, attValues);

    instance.setDataset(instances);

    discretize.setInputFormat(instances);
    discretize.input(instance);

    int classify1 = (int) naive.classifyInstance(instance);
    System.out.print("Prediction Class : ");
    System.out.println(classVal.get(classify1));
}

From source file:org.tigr.microarray.mev.cluster.gui.impl.bn.PrepareArrayDataModule.java

License:Open Source License

/**
 * The <code>discretize</code> method is given a WEKA Instances object corresponding to the gene expression data
 * and returns a new WEKA Instances object with the given data discretized into a given number of equal-width bins
 *
 * @param data an <code>Instances</code> which is a WEKA Instances object corresponding to the gene expression data
 * @param numBins a <code>String</code> corresponding to the number of bins in which the data is to be discretized
 * @return an <code>Instances</code> a new WEKA Instances object with the given data discretized 
 * into a given number of equal-width bins
 * @exception NullArgumentException if an error occurs if the data is null
 * @exception OutOfRangeException if an error occurs if the numBins is out of bounds (namely, negative or equal to zero)
 *///  w w w.j  ava2s. c o m
public static Instances discretize(Instances data, String numBins)
        throws NullArgumentException, OutOfRangeException {
    if (data == null) {
        throw new NullArgumentException("Parameter data passed to discretize method was null!");
    }
    if (Integer.parseInt(numBins) <= 0) {
        throw new OutOfRangeException(
                "numBins is out of range (should be strictly positive!\nnumBins=" + numBins);
    }
    try {
        String[] options = new String[2];
        options[0] = "-B";
        options[1] = numBins;
        Discretize discretize = new Discretize();
        discretize.setOptions(options);
        discretize.setInputFormat(data);
        Instances newData = Filter.useFilter(data, discretize);
        return newData;
    } catch (Exception e) {
        System.out.println(e);
        e.printStackTrace();
    }
    return null;
}