Example usage for weka.filters.unsupervised.attribute Discretize Discretize

List of usage examples for weka.filters.unsupervised.attribute Discretize Discretize

Introduction

In this page you can find the example usage for weka.filters.unsupervised.attribute Discretize Discretize.

Prototype

public Discretize() 

Source Link

Document

Constructor - initialises the filter

Usage

From source file:com.mycompany.id3classifier.ID3Shell.java

public static void main(String[] args) throws Exception {
    ConverterUtils.DataSource source = new ConverterUtils.DataSource("lensesData.csv");
    Instances dataSet = source.getDataSet();

    Discretize filter = new Discretize();
    filter.setInputFormat(dataSet);/*from  ww  w .  ja  v  a2s .com*/
    dataSet = Filter.useFilter(dataSet, filter);

    Standardize standardize = new Standardize();
    standardize.setInputFormat(dataSet);
    dataSet = Filter.useFilter(dataSet, standardize);

    dataSet.setClassIndex(dataSet.numAttributes() - 1);
    dataSet.randomize(new Random(9001)); //It's over 9000!!

    int folds = 10;
    //Perform crossvalidation
    Evaluation eval = new Evaluation(dataSet);
    for (int n = 0; n < folds; n++) {
        int trainingSize = (int) Math.round(dataSet.numInstances() * .7);
        int testSize = dataSet.numInstances() - trainingSize;

        Instances trainingData = dataSet.trainCV(folds, n);
        Instances testData = dataSet.testCV(folds, n);

        ID3Classifier classifier = new ID3Classifier();
        // Id3 classifier = new Id3();
        classifier.buildClassifier(trainingData);

        eval.evaluateModel(classifier, testData);
    }
    System.out.println(eval.toSummaryString("\nResults:\n", false));
}

From source file:id3classifier.Main.java

public static void main(String[] args) throws Exception {

    ConverterUtils.DataSource source = new ConverterUtils.DataSource(file);
    Instances dataSet = source.getDataSet();

    // discretize the dataset
    Discretize filter = new Discretize();
    filter.setInputFormat(dataSet);/*w  w w.  ja  v  a 2  s  .c  om*/
    dataSet = Filter.useFilter(dataSet, filter);

    // standardize the dataset
    Standardize standardizedData = new Standardize();
    standardizedData.setInputFormat(dataSet);
    dataSet = Filter.useFilter(dataSet, standardizedData);

    // randomize the dataset
    dataSet.setClassIndex(dataSet.numAttributes() - 1);
    dataSet.randomize(new Debug.Random());

    // get the sizes of the training and testing sets and split
    int trainingSize = (int) Math.round(dataSet.numInstances() * .7);
    int testSize = dataSet.numInstances() - trainingSize;
    Instances training = new Instances(dataSet, 0, trainingSize);
    Instances test = new Instances(dataSet, trainingSize, testSize);

    // set up the ID3 classifier on the training data
    ID3Classifiers classifier = new ID3Classifiers();
    classifier.buildClassifier(training);

    // set up the evaluation and test using the classifier and test set
    Evaluation eval = new Evaluation(dataSet);
    eval.evaluateModel(classifier, test);

    // outup and kill, important to exit here to stop javaFX
    System.out.println(eval.toSummaryString("\nResults\n======\n", false));
    System.exit(0);
}

From source file:imba.classifier.NBTubes.java

@Override
public void buildClassifier(Instances data) {
    dataClassifier = new ArrayList<>();
    infoClassifier = new ArrayList<>();
    validAttribute = new ArrayList<>();
    dataset = null;//www.  j ava 2 s  .  c  o  m
    sumClass = null;
    dataSize = 0;
    header_Instances = data;

    Filter f;
    int i, j, k, l, m;
    int sumVal;

    int numAttr = data.numAttributes(); //ini beserta kelasnya, jadi atribut + 1

    i = 0;
    while (i < numAttr && wasNumeric == false) {
        if (i == classIdx) {
            i++;
        }

        if (i != numAttr && data.attribute(i).isNumeric()) {
            wasNumeric = true;
        }

        i++;
    }

    Instance p;

    //kasih filter
    if (wasNumeric) {
        f = new Normalize();
        //Filter f = new NumericToNominal();
        try {
            f.setInputFormat(data);

            for (Instance i1 : data) {
                f.input(i1);
            }

            f.batchFinished();
        } catch (Exception ex) {
            Logger.getLogger(NBTubes.class.getName()).log(Level.SEVERE, null, ex);
        }

        dataset = f.getOutputFormat();

        while ((p = f.output()) != null) {
            dataset.add(p);
        }
    }

    //f = new NumericToNominal();
    if (filter.equals("Discretize")) {
        f = new Discretize();
    } else {
        f = new NumericToNominal();
    }

    try {
        if (wasNumeric) {
            f.setInputFormat(dataset);
            for (Instance i1 : dataset) {
                f.input(i1);
            }
        } else {
            f.setInputFormat(data);
            for (Instance i1 : data) {
                f.input(i1);
            }
        }

        f.batchFinished();
    } catch (Exception ex) {
        Logger.getLogger(NBTubes.class.getName()).log(Level.SEVERE, null, ex);
    }

    dataset = null;
    dataset = f.getOutputFormat();

    while ((p = f.output()) != null) {
        dataset.add(p);
    }

    //building data structure
    classIdx = data.classIndex();

    dataSize = data.size();

    //isi data dan info classifier dengan array kosong
    i = 0;
    j = i;
    while (j < numAttr) {
        if (i == classIdx) {
            i++;
        } else {
            dataClassifier.add(new ArrayList<>());
            infoClassifier.add(new ArrayList<>());

            if (j < i) {
                m = j - 1;
            } else {
                m = j;
            }

            k = 0;
            while (k < dataset.attribute(j).numValues()) {
                dataClassifier.get(m).add(new ArrayList<>());
                infoClassifier.get(m).add(new ArrayList<>());

                l = 0;
                while (l < dataset.attribute(classIdx).numValues()) {
                    dataClassifier.get(m).get(k).add(0);
                    infoClassifier.get(m).get(k).add(0.0);

                    l++;
                }

                k++;
            }
        }

        i++;
        j++;
    }

    //isi data classifier dari dataset
    sumClass = new int[data.numClasses()];

    i = 0;
    while (i < dataset.size()) {
        j = 0;
        k = j;
        while (k < dataset.numAttributes()) {
            if (j == classIdx) {
                j++;
            } else {
                if (k < j) {
                    m = k - 1;
                } else {
                    m = k;
                }

                dataClassifier.get(m).get((int) dataset.get(i).value(k)).set(
                        (int) dataset.get(i).value(classIdx),
                        dataClassifier.get(m).get((int) dataset.get(i).value(k))
                                .get((int) dataset.get(i).value(classIdx)) + 1);

                if (m == 0) {
                    sumClass[(int) dataset.get(i).value(classIdx)]++;
                }

            }

            k++;
            j++;
        }

        i++;
    }

    //proses double values
    i = 0;
    while (i < dataClassifier.size()) {
        j = 0;
        while (j < dataClassifier.get(i).size()) {
            k = 0;
            while (k < dataClassifier.get(i).get(j).size()) {
                infoClassifier.get(i).get(j).set(k, (double) dataClassifier.get(i).get(j).get(k) / sumClass[k]);

                k++;
            }

            j++;
        }

        i++;
    }

    /*
    //liat apakah ada nilai di tiap atribut
    //yang merepresentasikan lebih dari 80% data
    i = 0;
    while (i < dataClassifier.size()) {
    j = 0;
    while (j < dataClassifier.get(i).size()) {
                
                
        j++;
    }
            
    i++;
    }
    */
}

From source file:machinelearningq2.ExtendedNaiveBayes.java

public Instances discretize(Instances instnc) throws Exception {
    Discretize d = new Discretize();
    d.setInputFormat(instnc);/*from w ww  .  ja va 2 s .  c  o  m*/
    Instances newData = Filter.useFilter(instnc, d);

    binCount = new double[d.getBins()];

    for (Instance line : newData) {
        for (int j = 0; j < newData.numAttributes() - 1; j++) {
            binCount[(int) line.value(j)]++;
        }
    }
    return newData;
}

From source file:milk.classifiers.MIBoost.java

License:Open Source License

/**
  * Builds the classifier//from  ww  w  . j  ava 2s .co  m
  *
  * @param train the training data to be used for generating the
  * boosted classifier.
  * @exception Exception if the classifier could not be built successfully
  */
 public void buildClassifier(Exemplars exps) throws Exception {

     Exemplars train = new Exemplars(exps);

     if (train.classAttribute().type() != Attribute.NOMINAL) {
         throw new Exception("Class attribute must be nominal.");
     }
     if (train.checkForStringAttributes()) {
         throw new Exception("Can't handle string attributes!");
     }

     m_ClassIndex = train.classIndex();
     m_IdIndex = train.idIndex();
     m_NumClasses = train.numClasses();
     m_NumIterations = m_MaxIterations;

     if (m_NumClasses > 2) {
         throw new Exception("Not yet prepared to deal with multiple classes!");
     }

     if (m_Classifier == null)
         throw new Exception("A base classifier has not been specified!");
     if (!(m_Classifier instanceof WeightedInstancesHandler))
         throw new Exception("Base classifier cannot handle weighted instances!");

     m_Models = Classifier.makeCopies(m_Classifier, getMaxIterations());
     if (m_Debug)
         System.err.println("Base classifier: " + m_Classifier.getClass().getName());

     m_Beta = new double[m_NumIterations];
     m_Attributes = new Instances(train.exemplar(0).getInstances(), 0);

     double N = (double) train.numExemplars(), sumNi = 0;
     Instances data = new Instances(m_Attributes, 0);// Data to learn a model   
     data.deleteAttributeAt(m_IdIndex);// ID attribute useless   
     Instances dataset = new Instances(data, 0);

     // Initialize weights
     for (int i = 0; i < N; i++)
         sumNi += train.exemplar(i).getInstances().numInstances();

     for (int i = 0; i < N; i++) {
         Exemplar exi = train.exemplar(i);
         exi.setWeight(sumNi / N);
         Instances insts = exi.getInstances();
         double ni = (double) insts.numInstances();
         for (int j = 0; j < ni; j++) {
             Instance ins = new Instance(insts.instance(j));// Copy
             //insts.instance(j).setWeight(1.0);   

             ins.deleteAttributeAt(m_IdIndex);
             ins.setDataset(dataset);
             ins.setWeight(exi.weight() / ni);
             data.add(ins);
         }
     }

     // Assume the order of the instances are preserved in the Discretize filter
     if (m_DiscretizeBin > 0) {
         m_Filter = new Discretize();
         m_Filter.setInputFormat(new Instances(data, 0));
         m_Filter.setBins(m_DiscretizeBin);
         data = Filter.useFilter(data, m_Filter);
     }

     // Main algorithm
     int dataIdx;
     iterations: for (int m = 0; m < m_MaxIterations; m++) {
         if (m_Debug)
             System.err.println("\nIteration " + m);
         // Build a model
         m_Models[m].buildClassifier(data);

         // Prediction of each bag
         double[] err = new double[(int) N], weights = new double[(int) N];
         boolean perfect = true, tooWrong = true;
         dataIdx = 0;
         for (int n = 0; n < N; n++) {
             Exemplar exn = train.exemplar(n);
             // Prediction of each instance and the predicted class distribution
             // of the bag      
             double nn = (double) exn.getInstances().numInstances();
             for (int p = 0; p < nn; p++) {
                 Instance testIns = data.instance(dataIdx++);
                 if ((int) m_Models[m].classifyInstance(testIns) != (int) exn.classValue()) // Weighted instance-wise 0-1 errors
                     err[n]++;
             }
             weights[n] = exn.weight();
             err[n] /= nn;
             if (err[n] > 0.5)
                 perfect = false;
             if (err[n] < 0.5)
                 tooWrong = false;
         }

         if (perfect || tooWrong) { // No or 100% classification error, cannot find beta
             if (m == 0)
                 m_Beta[m] = 1.0;
             else
                 m_Beta[m] = 0;
             m_NumIterations = m + 1;
             if (m_Debug)
                 System.err.println("No errors");
             break iterations;
         }

         double[] x = new double[1];
         x[0] = 0;
         double[][] b = new double[2][x.length];
         b[0][0] = Double.NaN;
         b[1][0] = Double.NaN;

         OptEng opt = new OptEng();
         opt.setWeights(weights);
         opt.setErrs(err);
         //opt.setDebug(m_Debug);
         if (m_Debug)
             System.out.println("Start searching for c... ");
         x = opt.findArgmin(x, b);
         while (x == null) {
             x = opt.getVarbValues();
             if (m_Debug)
                 System.out.println("200 iterations finished, not enough!");
             x = opt.findArgmin(x, b);
         }
         if (m_Debug)
             System.out.println("Finished.");
         m_Beta[m] = x[0];

         if (m_Debug)
             System.err.println("c = " + m_Beta[m]);

         // Stop if error too small or error too big and ignore this model
         if (Double.isInfinite(m_Beta[m]) || Utils.smOrEq(m_Beta[m], 0)) {
             if (m == 0)
                 m_Beta[m] = 1.0;
             else
                 m_Beta[m] = 0;
             m_NumIterations = m + 1;
             if (m_Debug)
                 System.err.println("Errors out of range!");
             break iterations;
         }

         // Update weights of data and class label of wfData
         dataIdx = 0;
         double totWeights = 0;
         for (int r = 0; r < N; r++) {
             Exemplar exr = train.exemplar(r);
             exr.setWeight(weights[r] * Math.exp(m_Beta[m] * (2.0 * err[r] - 1.0)));
             totWeights += exr.weight();
         }

         if (m_Debug)
             System.err.println("Total weights = " + totWeights);

         for (int r = 0; r < N; r++) {
             Exemplar exr = train.exemplar(r);
             double num = (double) exr.getInstances().numInstances();
             exr.setWeight(sumNi * exr.weight() / totWeights);
             //if(m_Debug)
             //    System.err.print("\nExemplar "+r+"="+exr.weight()+": \t");
             for (int s = 0; s < num; s++) {
                 Instance inss = data.instance(dataIdx);
                 inss.setWeight(exr.weight() / num);
                 //    if(m_Debug)
                 //  System.err.print("instance "+s+"="+inss.weight()+
                 //          "|ew*iw*sumNi="+data.instance(dataIdx).weight()+"\t");
                 if (Double.isNaN(inss.weight()))
                     throw new Exception("instance " + s + " in bag " + r + " has weight NaN!");
                 dataIdx++;
             }
             //if(m_Debug)
             //    System.err.println();
         }
     }
 }

From source file:NaiveBayes.Atribut.java

public Atribut(Instances ints, int i, int classindex) throws Exception {
    if (ints.attribute(i).isNumeric()) {
        Instances newData = new Instances(ints);
        Discretize f = new Discretize();
        f.setInputFormat(newData);/* ww w .j  ava  2s.  c  o  m*/
        newData = Filter.useFilter(newData, f);
        name = ints.attribute(i).name();
        listNilai = new ArrayList<>();
        for (int j = 0; j < newData.attribute(i).numValues(); j++) {
            listNilai.add(new Nilai(ints, i, j, classindex));
        }
    } else {
        name = ints.attribute(i).name().replaceAll("\\s+", "");
        //            System.out.println(name);
        listNilai = new ArrayList<>();
        for (int j = 0; j < ints.attribute(i).numValues(); j++) {
            listNilai.add(new Nilai(ints, i, j, classindex));
        }
    }

}

From source file:NaiveBayes.Nilai.java

public Nilai(Instances inst, int i, int j, int classindex) throws Exception {
    Instances newData = new Instances(inst);
    numClass = NaiveBayes.getNumEachClass(newData);
    lower = 0;//w  w  w .j  a va2  s . c  o  m
    upper = 0;
    kelas = new ArrayList<>();
    //if(newData.instance(i).isMissing(j)) newData.instance(i).setValue(i, "b");
    if (newData.attribute(i).isNumeric()) {
        Discretize f = new Discretize();
        f.setInputFormat(newData);
        newData = Filter.useFilter(newData, f);
        name = newData.attribute(i).value(j);
        if (f.getCutPoints(i) != null) {
            if (j == 0) {
                lower = Double.NEGATIVE_INFINITY;
                upper = f.getCutPoints(i)[j];
            } else {
                if (j != newData.attribute(0).numValues() - 1) {
                    lower = f.getCutPoints(i)[j - 1];
                    upper = f.getCutPoints(i)[j];
                } else {
                    lower = f.getCutPoints(i)[j - 1];
                    upper = Double.POSITIVE_INFINITY;
                }
            }
        } else {
            lower = Double.NEGATIVE_INFINITY;
            upper = Double.POSITIVE_INFINITY;
        }
        for (int k = 0; k < inst.attribute(classindex).numValues(); k++) { //buat nama kelas
            double cnt = 1;
            int countClass = 0;
            for (int l = 0; l < inst.numInstances(); l++) { //jumlah seluruh instances
                double val = inst.get(l).value(i);
                if (countClass <= numClass[k]) {
                    if (inst.attribute(classindex).value(k).equalsIgnoreCase(
                            inst.get(l).toString(classindex).replaceAll("'", ""))) {/*nama kelasnya*/
                        if (val >= lower && val < upper) {//jika ada nilai yang sama pada atribut 
                            //dan kelas yang sama dan nilai dari atribut lebih besar sama dengan lower
                            cnt += 1;
                        }
                        countClass++;
                    }
                } else
                    break;
            }
            kelas.add(new Kelas(newData.attribute(classindex).value(k), cnt));
        }
    } else {
        //System.out.println(newData.attribute(i).value(j).replaceAll("\\s+", ""));
        name = newData.attribute(i).value(j).replaceAll("\\s", "");
        //System.out.println(name);
        //System.out.println(name);
        for (int k = 0; k < inst.attribute(classindex).numValues(); k++) { //buat nama kelas
            double cnt = 1;
            int countClass = 0;
            for (int l = 0; l < inst.numInstances(); l++) { //jumlah seluruh instances
                if (countClass <= numClass[k]) {
                    //System.out.println("with whitespace "+inst.attribute(i).value(j)+"without "+inst.attribute(i).value(j).replaceAll("\\s", "")+"p");
                    //                        System.out.println(inst.get(l).toString(classindex));
                    //System.out.println(inst.attribute(classindex).value(k));
                    if (inst.attribute(classindex).value(k).replaceAll("\\s", "")
                            .equalsIgnoreCase(inst.get(l).toString(classindex).replaceAll("\\s", ""))//nama kelas
                            && inst.attribute(i).value(j).replaceAll("\\s", "").//cek nama atribut
                                    equalsIgnoreCase(inst.get(l).toString(i).replaceAll("\\s", ""))) {
                        //jika ada nilai yang sama pada atribut 
                        //dan kelas yang sama dan nilai dari atribut lebih besar sama dengan lower
                        cnt += 1;
                        countClass++;
                    }
                } else
                    break;
            }
            kelas.add(new Kelas(newData.attribute(classindex).value(k).replaceAll("\\s+", ""), cnt));
        }
    }
}

From source file:NaiveBayesPckge.NaiveBayesMain.java

public static Instances useFilterDiscritize(Instances dataSet) throws Exception {
    //set options
    String[] optionsFilter = new String[4];
    //choose the number of intervals, e.g 2:
    optionsFilter[0] = "-B";
    optionsFilter[1] = "6";
    //choose the range of attributes on which to apply the filter:
    optionsFilter[2] = "-R";
    optionsFilter[3] = "first-last";
    System.out.println("> Filtering dataset using Discretize\n");
    //Apply Discretization
    Discretize discretize = new Discretize();
    discretize.setOptions(optionsFilter);
    discretize.setInputFormat(dataSet);/*w  w w .j ava2  s .c  o m*/
    Instances newDataTemp = Filter.useFilter(dataSet, discretize);

    return newDataTemp;
}

From source file:NaiveBayesPckge.NaiveBayesMain.java

public static void addNewInstance(Instances instances) throws Exception {
    Scanner scan = new Scanner(System.in);
    ArrayList<Attribute> atts = new ArrayList<Attribute>();
    ArrayList<String> classVal = new ArrayList<String>();
    int nConclus = instances.attribute(instances.numAttributes() - 1).numValues();
    int numAttribut = instances.numAttributes();

    //buat nambah kesimpulan. Misal T dan F
    for (int i = 0; i < nConclus; i++) {
        classVal.add(instances.attribute(instances.numAttributes() - 1).value(i));
    }/*from   w  ww . j a v a 2 s. co m*/

    //buat nambahin attribut
    for (int i = 0; i < numAttribut - 1; i++) {
        atts.add(new Attribute(instances.attribute(i).name()));
    }
    atts.add(new Attribute(instances.attribute(numAttribut - 1).name(), classVal));

    double[] attValues = new double[numAttribut];
    System.out.print("Masukkan nilai : ");
    for (int i = 0; i < numAttribut - 1; i++) {
        attValues[i] = scan.nextDouble();
    }
    Discretize discretize = new Discretize();
    String s = scan.nextLine();

    Instance instance = new DenseInstance(1.0, attValues);

    instance.setDataset(instances);

    discretize.setInputFormat(instances);
    discretize.input(instance);

    int classify1 = (int) naive.classifyInstance(instance);
    System.out.print("Prediction Class : ");
    System.out.println(classVal.get(classify1));
}

From source file:org.tigr.microarray.mev.cluster.gui.impl.bn.PrepareArrayDataModule.java

License:Open Source License

/**
 * The <code>discretize</code> method is given a WEKA Instances object corresponding to the gene expression data
 * and returns a new WEKA Instances object with the given data discretized into a given number of equal-width bins
 *
 * @param data an <code>Instances</code> which is a WEKA Instances object corresponding to the gene expression data
 * @param numBins a <code>String</code> corresponding to the number of bins in which the data is to be discretized
 * @return an <code>Instances</code> a new WEKA Instances object with the given data discretized 
 * into a given number of equal-width bins
 * @exception NullArgumentException if an error occurs if the data is null
 * @exception OutOfRangeException if an error occurs if the numBins is out of bounds (namely, negative or equal to zero)
 *///  w w w.j  ava2s. c o m
public static Instances discretize(Instances data, String numBins)
        throws NullArgumentException, OutOfRangeException {
    if (data == null) {
        throw new NullArgumentException("Parameter data passed to discretize method was null!");
    }
    if (Integer.parseInt(numBins) <= 0) {
        throw new OutOfRangeException(
                "numBins is out of range (should be strictly positive!\nnumBins=" + numBins);
    }
    try {
        String[] options = new String[2];
        options[0] = "-B";
        options[1] = numBins;
        Discretize discretize = new Discretize();
        discretize.setOptions(options);
        discretize.setInputFormat(data);
        Instances newData = Filter.useFilter(data, discretize);
        return newData;
    } catch (Exception e) {
        System.out.println(e);
        e.printStackTrace();
    }
    return null;
}