Example usage for weka.core Instances numInstances

List of usage examples for weka.core Instances numInstances

Introduction

In this page you can find the example usage for weka.core Instances numInstances.

Prototype


publicint numInstances() 

Source Link

Document

Returns the number of instances in the dataset.

Usage

From source file:id3.MyID3.java

/**
 * Membagi dataset menurut atribute value
 * @param data instance/*from   www  .  ja  va2  s. co  m*/
 * @param att atribut input
 * @return instance hasil split
 */
public Instances[] splitData(Instances data, Attribute att) {
    Instances[] instancesSplitBasedAttribute = new Instances[att.numValues()];
    for (int i = 0; i < att.numValues(); i++) {
        instancesSplitBasedAttribute[i] = new Instances(data, data.numInstances());
    }
    for (int i = 0; i < data.numInstances(); i++) {
        instancesSplitBasedAttribute[(int) data.instance(i).value(att)].add(data.instance(i));
    }
    return instancesSplitBasedAttribute;
}

From source file:id3classifier.ID3Classifiers.java

@Override
public void buildClassifier(Instances instances) throws Exception {

    // create list of instances of size instances' number of instances
    // create list of attributes of size instances' number of attributes
    List<Instance> instanceList = new ArrayList<>(instances.numInstances());
    List<Attribute> attributeList = new ArrayList<>(instances.numAttributes());

    // from index 0 to instances' number of instances, add instances' current
    // instance to the list of instances... mouthfull
    for (int i = 0; i < instances.numInstances(); i++) {

        instanceList.add(instances.instance(i));
    }//from  w  ww  .j a  v  a2s  .  co m

    // from index 0 to instances' number of attributes, if the index is not
    // equal to instances' class index... 
    for (int i = 0; i < instances.numAttributes(); i++) {

        if (i != instances.classIndex()) {

            // add instances' current attribute to the attribute list
            attributeList.add(instances.attribute(i));
        }
    }

    // set tree equal to the tree built by buildTree() using the instance
    // list and the attribute list
    tree = buildTree(instanceList, attributeList);
}

From source file:id3classifier.Main.java

public static void main(String[] args) throws Exception {

    ConverterUtils.DataSource source = new ConverterUtils.DataSource(file);
    Instances dataSet = source.getDataSet();

    // discretize the dataset
    Discretize filter = new Discretize();
    filter.setInputFormat(dataSet);/*  ww w.j  a  v a  2 s.  co m*/
    dataSet = Filter.useFilter(dataSet, filter);

    // standardize the dataset
    Standardize standardizedData = new Standardize();
    standardizedData.setInputFormat(dataSet);
    dataSet = Filter.useFilter(dataSet, standardizedData);

    // randomize the dataset
    dataSet.setClassIndex(dataSet.numAttributes() - 1);
    dataSet.randomize(new Debug.Random());

    // get the sizes of the training and testing sets and split
    int trainingSize = (int) Math.round(dataSet.numInstances() * .7);
    int testSize = dataSet.numInstances() - trainingSize;
    Instances training = new Instances(dataSet, 0, trainingSize);
    Instances test = new Instances(dataSet, trainingSize, testSize);

    // set up the ID3 classifier on the training data
    ID3Classifiers classifier = new ID3Classifiers();
    classifier.buildClassifier(training);

    // set up the evaluation and test using the classifier and test set
    Evaluation eval = new Evaluation(dataSet);
    eval.evaluateModel(classifier, test);

    // outup and kill, important to exit here to stop javaFX
    System.out.println(eval.toSummaryString("\nResults\n======\n", false));
    System.exit(0);
}

From source file:id3j48.WekaAccess.java

public static Evaluation percentageSplit(Instances data, Classifier classifier, int percentage)
        throws Exception {
    Instances tempdata = new Instances(data);
    tempdata.randomize(new Random(1));

    int trainSize = Math.round(tempdata.numInstances() * percentage / 100);
    int testSize = tempdata.numInstances() - trainSize;
    Instances train = new Instances(tempdata, 0, trainSize);
    Instances test = new Instances(tempdata, trainSize, testSize);

    classifier.buildClassifier(train);/* w w  w .ja  v a  2s  .  co m*/
    Evaluation eval = testModel(classifier, train, test);
    return eval;
}

From source file:id3j48.WekaAccess.java

public static void classify(String filename, Classifier classifier) throws Exception {
    Instances input = readArff(filename);
    input.setClassIndex(input.numAttributes() - 1);
    for (int i = 0; i < input.numInstances(); i++) {
        double classLabel = classifier.classifyInstance(input.instance(i));
        input.instance(i).setClassValue(classLabel);
        System.out.println("Instance: " + input.instance(i));
        System.out.println("Class: " + input.classAttribute().value((int) classLabel));
    }/*from  w  ww.java 2  s  .c  om*/

    try (BufferedWriter writer = new BufferedWriter(
            new FileWriter(classifiedFolder + File.separator + filename))) {
        writer.write(input.toString());
        writer.newLine();
        writer.flush();
    }
}

From source file:intensityclustering.IntensityClustering.java

/**
 * Draws the 2D Histogram Plot in the IntensityClustering. X-Axsis is
 * intensity value of chanel 2 image (where the stained nuclei are). Y-axis
 * are relative frequencies of present nuclei.
 *
 * @param tss The TMAspots whose nuclei are considered (both gold-standard
 * and estimated nuclei)./*w ww  .j a  va  2  s .c o m*/
 * @param doAlsoClustering If true, the TMApoints are also clustered
 * according to the histogram.
 */
void drawNucleiIntensities2D(List<TMAspot> tss, boolean doAlsoClustering) {
    // draw the plot
    Plot2DPanel plot;
    if (((java.awt.BorderLayout) (jPanel9.getLayout()))
            .getLayoutComponent(java.awt.BorderLayout.CENTER) != null) {
        plot = (Plot2DPanel) ((java.awt.BorderLayout) (jPanel9.getLayout()))
                .getLayoutComponent(java.awt.BorderLayout.CENTER);
        plot.removeAllPlots();
        plot.removeAllPlotables();
    } else {
        plot = new Plot2DPanel(PlotPanel.SOUTH);
        plot.setAxisLabels("Intensity", "Frequency");
        plot.plotCanvas.setBackground(jPanel9.getBackground());
        plot.plotLegend.setBackground(jPanel9.getBackground());
        plot.plotToolBar.setBackground(plot.plotCanvas.getBackground());
    }
    if (((java.awt.BorderLayout) (jPanel9.getLayout()))
            .getLayoutComponent(java.awt.BorderLayout.CENTER) == null) {
        jPanel9.add(plot, java.awt.BorderLayout.CENTER);
        jPanel15.setBackground(plot.plotCanvas.getBackground());
        jPanel15.setVisible(true);
        validate();
        pack();
    }

    if (tss.size() > 0) {
        try {
            this.setCursor(Cursor.getPredefinedCursor(Cursor.WAIT_CURSOR));

            List<Integer> intensities = new ArrayList<>();
            int intensity;
            int min = Integer.parseInt(jTextField1.getText());
            int max = Integer.parseInt(jTextField16.getText());
            for (TMAspot ts : tss) {
                //TODO: GET THE CHANNEL 2 Image
                //BufferedImage img = ts.getBufferedImage(TMAspot.SHOW_CHANNEL2_IMAGE, false);
                BufferedImage img = ts.getBufferedImage(false);
                // img can be null if color deconvolution has not been performed, yet.
                if (img != null) {
                    List<TMApoint> tps = ts.getPoints();
                    for (TMALabel tp : tps) {
                        intensity = TMAspot.getAverageColorAtPoint(img, tp.x, tp.y, ts.getParam_r(), false)
                                .getRed();
                        if (intensity >= min && intensity <= max) {
                            intensities.add(intensity);
                        }
                    }
                }
            }

            double[] intensities_array = new double[intensities.size()];

            for (int i = 0; i < intensities.size(); i++) {
                intensities_array[i] = intensities.get(i);
            }

            int nbins = jSlider7.getValue();
            if (intensities_array.length > 0) {
                plot.addHistogramPlot("TMA points", intensities_array, 0, 256, nbins);
            } //else {
              //  JOptionPane.showMessageDialog(this, "No TMA points have been found.", "No TMA points found.", JOptionPane.WARNING_MESSAGE);
              //}

            //// Cluster Points according to histograms
            if (doAlsoClustering) {
                // Find Clusters
                int n = getParam_nClusters();

                // Create ARFF Data
                FastVector atts;
                Instances data;
                int i;

                // 1. create arff data format
                atts = new FastVector(1);
                for (i = 0; i < 1; i++) {
                    atts.addElement(new Attribute(Integer.toString(i)));
                }

                // 2. create Instances object
                data = new Instances("TMA points", atts, tmarker.getNumberNuclei(tss));

                // 3. fill with data
                for (i = 0; i < intensities_array.length; i++) {
                    // add the instance
                    Instance inst = new Instance(1.0, new double[] { intensities_array[i] });
                    inst.setDataset(data);
                    data.add(inst);
                }

                // 4. set data class index (last attribute is the class)
                //data.setClassIndex(data.numAttributes() - 1); // not for weka 3.5.X
                if (tmarker.DEBUG > 4) {
                    java.util.logging.Logger.getLogger(getClass().getName()).log(java.util.logging.Level.INFO,
                            data.toString());
                }

                Clusterer clusterer = getClusterer();
                String[] options = getClustererOptions();

                if (tmarker.DEBUG > 3) {
                    if (options.length > 0) {
                        String info = "Clusterer should have options:\n";
                        for (String o : options) {
                            info += o + " ";
                        }
                        info += "\n";
                        java.util.logging.Logger.getLogger(getClass().getName())
                                .log(java.util.logging.Level.INFO, info);
                    }
                }

                clusterer.setOptions(options); // set the clusterer options
                clusterer.buildClusterer(data); // build the clusterer

                // order the clusters according to the brightness
                // The most bright cluster should be 0, then 1, then 2,...
                ArrayList<ArrayList<Double>> values = new ArrayList<>();
                for (i = 0; i < n; i++) {
                    values.add(new ArrayList<Double>());
                }
                int z;
                double value;
                for (i = 0; i < data.numInstances(); i++) {
                    z = clusterer.clusterInstance(data.instance(i));
                    value = data.instance(i).value(0);
                    values.get(z).add(value);
                }
                double[] means = new double[n];
                double[] stds = new double[n];
                for (i = 0; i < n; i++) {
                    means[i] = Misc.mean(values.get(i).toArray(new Double[values.get(i).size()]));
                    stds[i] = Misc.std(values.get(i).toArray(new Double[values.get(i).size()]));
                }
                int[] ordering = Misc.orderArray(means, true);

                for (i = 0; i < n; i++) {
                    int ind = Misc.IndexOf(ordering, i);
                    plot.addPlotable(new Line(getParam_ColorOfClassK(i),
                            new double[] { means[ind], plot.plotCanvas.base.roundXmin[1] },
                            new double[] { means[ind], plot.plotCanvas.base.roundXmax[1] }, 2 * stds[ind]));
                    plot.addPlot(Plot2DPanel.LINE, "Staining " + i, getParam_ColorOfClassK(i),
                            new double[][] { new double[] { means[ind], plot.plotCanvas.base.roundXmin[1] },
                                    new double[] { means[ind], plot.plotCanvas.base.roundXmax[1] } });
                }

                String clusterInfo = "";
                for (String o : clusterer.getOptions()) {
                    clusterInfo += o + " ";
                }
                clusterInfo += "\n\n";
                clusterInfo += clusterer.toString().trim();
                if (getParam_AutomaticClustererString().equalsIgnoreCase("Hierarchical")) {
                    try {
                        clusterInfo += ((HierarchicalClusterer) clusterer).graph();
                        HierarchyVisualizer a = new HierarchyVisualizer(
                                ((HierarchicalClusterer) clusterer).graph());
                        a.setSize(800, 600);
                        if (clusterVisualizer == null) {
                            clusterVisualizer = new JFrame("Hierarchical Clusterer Dendrogram");
                            clusterVisualizer.setIconImage(getIconImage());
                            clusterVisualizer.setDefaultCloseOperation(JFrame.DISPOSE_ON_CLOSE);
                            clusterVisualizer.setSize(800, 600);
                        }
                        Container contentPane = clusterVisualizer.getContentPane();
                        contentPane.removeAll();
                        contentPane.add(a);
                    } catch (Exception e) {
                        clusterVisualizer = null;
                    }
                }
                jTextArea1.setText(clusterInfo);

                if (tmarker.DEBUG > 3) {
                    String info = "Clusterer has options\n";
                    for (String o : clusterer.getOptions()) {
                        info += o + " ";
                    }
                    info += "\n";
                    info += clusterer.toString() + "\n";
                    // info += (clusterer).globalInfo() + "\n";
                    info += "\n";
                    info += clusterInfo + "\n";
                    java.util.logging.Logger.getLogger(getClass().getName()).log(java.util.logging.Level.INFO,
                            info);
                }

                // cluster all TMAspots and assign the corresponding class to them
                // Cluster the points
                List<List<Integer>> clustered_points = new ArrayList<>();
                for (i = 0; i < n; i++) {
                    clustered_points.add(new ArrayList<Integer>());
                }

                int k;
                for (TMAspot ts : tss) {
                    //TODO: GET THE CHANNEL 2 IMAGE
                    //BufferedImage img = ts.getBufferedImage(TMAspot.SHOW_CHANNEL2_IMAGE, false);
                    BufferedImage img = ts.getBufferedImage(false);
                    List<TMApoint> tps = ts.getPoints();
                    for (TMApoint tp : tps) {
                        intensity = TMAspot.getAverageColorAtPoint(img, tp.x, tp.y, ts.getParam_r(), false)
                                .getRed();

                        // add the instance
                        Instance inst = new Instance(1.0, new double[] { intensity });
                        inst.setDataset(data);
                        k = ordering[clusterer.clusterInstance(inst)];

                        // store the color for later visualization
                        clustered_points.get(k).add(intensity);

                        // set the staining of the TMApoint
                        switch (k) {
                        case 0:
                            tp.setStaining(TMALabel.STAINING_0);
                            break;
                        case 1:
                            tp.setStaining(TMALabel.STAINING_1);
                            break;
                        case 2:
                            tp.setStaining(TMALabel.STAINING_2);
                            break;
                        default:
                            tp.setStaining(TMALabel.STAINING_3);
                            break;
                        }
                    }
                    ts.dispStainingInfo();
                    if (manager.getVisibleTMAspot() == ts) {
                        manager.repaintVisibleTMAspot();
                    }
                }

                // Write the description
                String description = "Nuclei clustered with " + getParam_AutomaticClustererString();
                if (getParam_AutomaticClustererString().equalsIgnoreCase("Hierarchical")) {
                    description += " (" + getParam_HierarchicalClusteringMethod() + ")";
                }
                description += ", n=" + getParam_nClusters() + ", channel 2 intensity.";
                jLabel42.setText(description);
                jLabel41.setText(" ");

            }
        } catch (Exception e) {
            e.printStackTrace();
        } finally {
            this.setCursor(Cursor.getPredefinedCursor(Cursor.DEFAULT_CURSOR));
        }
    }
}

From source file:intensityclustering.IntensityClustering.java

/**
 * Clusters the TMApoints on given TMAspots according to their staining
 * intensity (color). All parameters (e.g. clusterer and parameters) are
 * selected by the user. Features are simple color features.
 *
 * @param tss The TMAspots of which all nuclei (gold-standard and estimated)
 * are clustered according to color./*w  w  w. ja va2  s  .c o  m*/
 */
private void clusterPointsAutomaticallyColorSpace(List<TMAspot> tss) {
    if (tss.size() > 0) {
        try {
            this.setCursor(Cursor.getPredefinedCursor(Cursor.WAIT_CURSOR));

            int n = getParam_nClusters();

            // Create ARFF Data
            FastVector atts;
            Instances data;
            int i;

            // 1. create arff data format
            atts = new FastVector(3);
            for (i = 0; i < 3; i++) {
                atts.addElement(new Attribute(Integer.toString(i)));
            }

            // 2. create Instances object
            data = new Instances("TMA points", atts, tmarker.getNumberNuclei(tss));

            // 3. fill with data
            BufferedImage img;
            Color c;
            float[] features = new float[3];
            String colorSpace = getParam_ColorSpace();
            for (TMAspot ts : tss) {
                img = ts.getBufferedImage();
                List<TMApoint> tps = ts.getPoints();
                for (TMApoint tp : tps) {
                    Color2Feature(TMAspot.getAverageColorAtPoint(img, tp.x, tp.y, ts.getParam_r(), false),
                            colorSpace, features);

                    // add the instance
                    Instance inst = new Instance(1.0, new double[] { features[0], features[1], features[2] });
                    inst.setDataset(data);
                    data.add(inst);
                }
            }

            // 4. set data class index (last attribute is the class)
            //data.setClassIndex(data.numAttributes() - 1); // not for weka 3.5.X
            if (tmarker.DEBUG > 4) {
                java.util.logging.Logger.getLogger(getClass().getName()).log(java.util.logging.Level.INFO,
                        data.toString());
            }

            Clusterer clusterer = getClusterer();
            String[] options = getClustererOptions();
            if (false && colorSpace.equalsIgnoreCase("hsb")) {
                String[] newoptions = new String[options.length + 2];
                System.arraycopy(options, 0, newoptions, 0, options.length);
                newoptions[options.length] = "-A";
                newoptions[options.length + 1] = "weka.core.MyHSBDistance";
                options = newoptions;
            }

            if (tmarker.DEBUG > 3) {
                if (options.length > 0) {
                    String info = "Clusterer should have options\n";
                    for (String o : options) {
                        info += o + " ";
                    }
                    info += "\n";
                    java.util.logging.Logger.getLogger(getClass().getName()).log(java.util.logging.Level.INFO,
                            info);
                }
            }

            clusterer.setOptions(options); // set the clusterer options
            clusterer.buildClusterer(data); // build the clusterer

            // order the clusters according to the brightness
            // The most bright cluster should be 0, then 1, then 2,...
            ArrayList<ArrayList<Double>> values = new ArrayList<>();
            for (i = 0; i < clusterer.numberOfClusters(); i++) {
                values.add(new ArrayList<Double>());
            }
            int z;
            double value;
            for (i = 0; i < data.numInstances(); i++) {
                z = clusterer.clusterInstance(data.instance(i));
                value = getParam_ColorSpace().equalsIgnoreCase("hsb") ? data.instance(i).value(2)
                        : Misc.RGBToGray(data.instance(i).value(0), data.instance(i).value(1),
                                data.instance(i).value(2));
                values.get(z).add(value);
            }
            double[] means = new double[clusterer.numberOfClusters()];
            for (i = 0; i < clusterer.numberOfClusters(); i++) {
                means[i] = Misc.mean(values.get(i).toArray(new Double[values.get(i).size()]));
            }
            int[] ordering = Misc.orderArray(means, !getParam_ColorSpace().equalsIgnoreCase("rtp"));

            String clusterInfo = "";
            for (String o : clusterer.getOptions()) {
                clusterInfo += o + " ";
            }
            clusterInfo += "\n\n";
            clusterInfo += clusterer.toString().trim();
            if (getParam_AutomaticClustererString().equalsIgnoreCase("Hierarchical")) {
                try {
                    clusterInfo += ((HierarchicalClusterer) clusterer).graph();
                    HierarchyVisualizer a = new HierarchyVisualizer(
                            ((HierarchicalClusterer) clusterer).graph());
                    a.setSize(800, 600);
                    if (clusterVisualizer == null) {
                        clusterVisualizer = new JFrame("Hierarchical Clusterer Dendrogram");
                        clusterVisualizer.setIconImage(getIconImage());
                        clusterVisualizer.setDefaultCloseOperation(JFrame.DISPOSE_ON_CLOSE);
                        clusterVisualizer.setSize(800, 600);
                    }
                    Container contentPane = clusterVisualizer.getContentPane();
                    contentPane.removeAll();
                    contentPane.add(a);
                } catch (Exception e) {
                    clusterVisualizer = null;
                }
            }
            jTextArea1.setText(clusterInfo);

            if (tmarker.DEBUG > 3) {
                String info = "Clusterer has options\n";
                for (String o : clusterer.getOptions()) {
                    info += o + " ";
                }
                info += "\n";
                info += clusterer.toString() + "\n";
                // info += (clusterer).globalInfo() + "\n";
                info += "\n";
                info += clusterInfo + "\n";
                java.util.logging.Logger.getLogger(getClass().getName()).log(java.util.logging.Level.INFO,
                        info);
            }

            // cluster all TMAspots and assign the corresponding class to them
            // Cluster the points
            List<List<Color>> clustered_points = new ArrayList<>();
            for (i = 0; i < clusterer.numberOfClusters(); i++) {
                clustered_points.add(new ArrayList<Color>());
            }

            int k;
            for (TMAspot ts : tss) {
                img = ts.getBufferedImage();
                List<TMApoint> tps = ts.getPoints();
                for (TMApoint tp : tps) {
                    c = TMAspot.getAverageColorAtPoint(img, tp.x, tp.y, ts.getParam_r(), false);
                    Color2Feature(c, colorSpace, features);

                    // add the instance
                    Instance inst = new Instance(1.0, new double[] { features[0], features[1], features[2] });
                    inst.setDataset(data);
                    k = ordering[clusterer.clusterInstance(inst)];

                    // store the color for later visualization
                    clustered_points.get(k).add(c);

                    // set the staining of the TMApoint
                    switch (k) {
                    case 0:
                        tp.setStaining(TMALabel.STAINING_0);
                        break;
                    case 1:
                        tp.setStaining(TMALabel.STAINING_1);
                        break;
                    case 2:
                        tp.setStaining(TMALabel.STAINING_2);
                        break;
                    default:
                        tp.setStaining(TMALabel.STAINING_3);
                        break;
                    }
                }
                ts.dispStainingInfo();
                if (manager.getVisibleTMAspot() == ts) {
                    manager.repaintVisibleTMAspot();
                }
            }

            // draw the points
            Plot3DPanel plot;
            if (((java.awt.BorderLayout) (jPanel2.getLayout()))
                    .getLayoutComponent(java.awt.BorderLayout.CENTER) != null) {
                plot = (Plot3DPanel) ((java.awt.BorderLayout) (jPanel2.getLayout()))
                        .getLayoutComponent(java.awt.BorderLayout.CENTER);
                plot.removeAllPlots();
            } else {
                plot = new Plot3DPanel();
                plot.plotCanvas.setBackground(jPanel2.getBackground());
                plot.addLegend(PlotPanel.SOUTH);
                plot.plotLegend.setBackground(jPanel2.getBackground());
            }
            if (colorSpace.equalsIgnoreCase("hsb")) {
                plot.setAxisLabels("Hue", "Saturation", "Brightness");
            } else if (colorSpace.equalsIgnoreCase("rtp")) {
                plot.setAxisLabels("R", "Theta", "Phi");
            } else {
                plot.setAxisLabels("Red", "Green", "Blue");
            }

            for (i = 0; i < clusterer.numberOfClusters(); i++) {
                double[] xs = new double[clustered_points.get(i).size()];
                double[] ys = new double[clustered_points.get(i).size()];
                double[] zs = new double[clustered_points.get(i).size()];
                for (int j = 0; j < clustered_points.get(i).size(); j++) {
                    Color2Feature(clustered_points.get(i).get(j), colorSpace, features);
                    xs[j] = features[0];
                    ys[j] = features[1];
                    zs[j] = features[2];
                }
                if (xs.length > 0) {
                    c = getParam_ColorOfClassK(i);
                    plot.addScatterPlot("Staining " + i, c, xs, ys, zs);
                }
            }

            // Write the description
            String description = "Nuclei clustered with " + getParam_AutomaticClustererString();
            if (getParam_AutomaticClustererString().equalsIgnoreCase("Hierarchical")) {
                description += " (" + getParam_HierarchicalClusteringMethod() + ")";
            }
            description += ", n=" + getParam_nClusters() + ", color space " + getParam_ColorSpace() + ".";
            jLabel41.setText(description);
            jLabel42.setText(" ");

            if (((java.awt.BorderLayout) (jPanel2.getLayout()))
                    .getLayoutComponent(java.awt.BorderLayout.CENTER) == null) {
                jPanel2.add(plot, java.awt.BorderLayout.CENTER);
                validate();
                pack();
            }
        } catch (Exception | OutOfMemoryError e) {
            java.util.logging.Logger.getLogger(getClass().getName()).log(java.util.logging.Level.SEVERE, null,
                    e);
            JOptionPane.showMessageDialog(this,
                    "The clustering could not be performed.\n\n" + "A possible reasons is:\n"
                            + "- Not enough memory (too many points), \n\n"
                            + "You might want to try a different clustering method or less TMAspots.\n\n"
                            + "The error message is: \n" + e.getMessage(),
                    "Error at Nucleus clustering", JOptionPane.WARNING_MESSAGE);
        } finally {
            this.setCursor(Cursor.getPredefinedCursor(Cursor.DEFAULT_CURSOR));
        }
    }
}

From source file:iris.ID3.java

public double calculateEntropy(Instances instances) {
    // Array to hold counts for each class
    double[] numInEachClass = new double[instances.numClasses()];

    // Loop through every instance in one bin
    for (int i = 0; i < instances.numInstances(); i++) {
        // Increment the count for the class that the instance belongs to
        numInEachClass[(int) instances.instance(i).classValue()]++;
    }//from  w ww  .  j  a  va2  s . c o m
    // Instantiate the entropy value
    double entropy = 0;

    // Loop through number of classes to sum log operations
    for (int i = 0; i < instances.numClasses(); i++) {
        // Handle missing data
        if (numInEachClass[i] > 0) {
            // Logarithm algorithm for entropy
            entropy -= (numInEachClass[i] / instances.numInstances())
                    * Utils.log2(numInEachClass[i] / instances.numInstances());
        }
    }
    return entropy;
}

From source file:iris.ID3.java

public double infoGain(Instances instances, Attribute att) {
    // Calculate total entropy
    double infoGain = calculateEntropy(instances);
    // Create bins
    Instances[] bins = makeBins(instances, att);
    // Loop through number of bins in attribute
    for (int i = 0; i < att.numValues(); i++) {
        // Applies weight to entropy value
        infoGain -= ((double) bins[i].numInstances() / (double) instances.numInstances())
                * calculateEntropy(bins[i]);
    }// w  ww. jav  a 2s.  c  om
    return infoGain;
}

From source file:iris.ID3.java

private Instances[] makeBins(Instances instances, Attribute att) {
    // Create array of bins based on numValues in Attribute parameter
    Instances[] bins = new Instances[att.numValues()];

    for (int i = 0; i < att.numValues(); i++) {
        bins[i] = new Instances(instances, instances.numInstances());
    }//from w ww.  java  2s.c o m

    // Create pointer to first instance
    Enumeration instanceEnum = instances.enumerateInstances();

    while (instanceEnum.hasMoreElements()) {
        // Create new instance from the one pointer is pointing at
        Instance oneInstance = (Instance) instanceEnum.nextElement();
        // Add instance to the proper bin
        bins[(int) oneInstance.value(att)].add(oneInstance);
    }

    // Compactify
    for (int i = 0; i < bins.length; i++) {
        bins[i].compactify();
    }
    return bins;
}