Example usage for weka.core Instances numClasses

Introduction

In this page you can find the example usage for weka.core Instances numClasses.

Prototype


publicint numClasses()

Source Link

Document

Returns the number of class labels.

Usage

From source file:newdtl.NewID3.java

/**
 * Computes the entropy of a dataset.//from  w ww  .  ja  va2 s . c o m
 *
 * @param data the data for which entropy is to be computed
 * @return the entropy of the data class distribution
 */
private static double computeEntropy(Instances data) {

    double[] labelCounts = new double[data.numClasses()];
    for (int i = 0; i < data.numInstances(); ++i) {
        labelCounts[(int) data.instance(i).classValue()]++;
    }

    double entropy = 0;
    for (int i = 0; i < labelCounts.length; i++) {
        if (labelCounts[i] > 0) {
            double proportion = labelCounts[i] / data.numInstances();
            entropy -= (proportion) * log2(proportion);
        }
    }
    return entropy;
}

From source file:newdtl.NewJ48.java

/**
 * Creates a J48 tree.//from www.ja  va  2  s  .  com
 *
 * @param data the training data
 * @exception Exception if tree failed to build
 */
private void makeTree(Instances data) throws Exception {

    // Mengecek apakah tidak terdapat instance dalam node ini
    if (data.numInstances() == 0) {
        splitAttribute = null;
        label = DOUBLE_MISSING_VALUE;
        classDistributions = new double[data.numClasses()];
        isLeaf = true;
    } else {
        // Mencari Gain Ratio maksimum
        double[] gainRatios = new double[data.numAttributes()];
        double[] thresholds = new double[data.numAttributes()];

        Enumeration attEnum = data.enumerateAttributes();
        while (attEnum.hasMoreElements()) {
            Attribute att = (Attribute) attEnum.nextElement();
            double[] result = computeGainRatio(data, att);
            gainRatios[att.index()] = result[0];
            thresholds[att.index()] = result[1];
        }

        splitAttribute = data.attribute(maxIndex(gainRatios));

        if (splitAttribute.isNumeric()) {
            splitThreshold = thresholds[maxIndex(gainRatios)];
        } else {
            splitThreshold = Double.NaN;
        }

        classDistributions = new double[data.numClasses()];
        for (int i = 0; i < data.numInstances(); i++) {
            Instance inst = (Instance) data.instance(i);
            classDistributions[(int) inst.classValue()]++;
        }

        // Membuat daun jika Gain Ratio-nya 0
        if (Double.compare(gainRatios[splitAttribute.index()], 0) == 0) {
            splitAttribute = null;

            label = maxIndex(classDistributions);
            classAttribute = data.classAttribute();
            isLeaf = true;
        } else {
            // Mengecek jika ada missing value
            if (isMissing(data, splitAttribute)) {
                // cari modus
                int index = modusIndex(data, splitAttribute);

                // ubah data yang punya missing value
                Enumeration dataEnum = data.enumerateInstances();
                while (dataEnum.hasMoreElements()) {
                    Instance inst = (Instance) dataEnum.nextElement();
                    if (inst.isMissing(splitAttribute)) {
                        inst.setValue(splitAttribute, splitAttribute.value(index));
                    }
                }
            }

            // Membuat tree baru di bawah node ini
            Instances[] splitData;
            if (splitAttribute.isNumeric()) {
                splitData = splitData(data, splitAttribute, splitThreshold);
                children = new NewJ48[2];
                for (int j = 0; j < 2; j++) {
                    children[j] = new NewJ48();
                    children[j].makeTree(splitData[j]);
                }
            } else {
                splitData = splitData(data, splitAttribute);
                children = new NewJ48[splitAttribute.numValues()];
                for (int j = 0; j < splitAttribute.numValues(); j++) {
                    children[j] = new NewJ48();
                    children[j].makeTree(splitData[j]);
                }
            }
            isLeaf = false;
        }
    }
}

From source file:newdtl.NewJ48.java

/**
 * Computes the entropy of a dataset./*w w  w  .  j  a  v  a2s . c  om*/
 *
 * @param data the data for which entropy is to be computed
 * @return the entropy of the data class distribution
 * @throws Exception if computation fails
 */
private double computeEntropy(Instances data) {

    double[] labelCounts = new double[data.numClasses()];
    for (int i = 0; i < data.numInstances(); ++i) {
        labelCounts[(int) data.instance(i).classValue()]++;
    }

    double entropy = 0;
    for (int i = 0; i < labelCounts.length; i++) {
        if (labelCounts[i] > 0) {
            double proportion = labelCounts[i] / data.numInstances();
            entropy -= (proportion) * log2(proportion);
        }
    }
    return entropy;
}

From source file:nl.detoren.ijc.neural.Voorspeller.java

License:Open Source License

/**
 * Lees trainingsdata in/*from   w  w  w . ja  v  a 2  s .  com*/
 *
 * @param trainingfile
 * @return
 * @throws FileNotFoundException
 * @throws IOException
 */
private Instances readTrainingData(String trainingfile) throws FileNotFoundException, IOException {
    // Reading training arff file
    FileReader trainreader = new FileReader(trainingfile);
    Instances train = new Instances(trainreader);
    train.setClassIndex(train.numAttributes() - 1);
    logger.log(Level.INFO, "num attributes : " + train.numAttributes());
    logger.log(Level.INFO, "num classes    : " + train.numClasses());
    logger.log(Level.INFO, "num data items : " + train.numInstances());
    return train;
}

From source file:org.knime.knip.suise.node.pixclassmodel.PixClassModelNodeModel.java

License:Open Source License

/**
 * {@inheritDoc}//  w w w  . j  a va 2  s  .  c o m
 */
@Override
protected PortObject[] execute(PortObject[] inObjects, ExecutionContext exec) throws Exception {

    BufferedDataTable inTable = (BufferedDataTable) inObjects[0];

    int imgColIdx = getImgColumnIndex(inTable.getDataTableSpec());
    int labColIdx = getLabelingColumnIndex(inTable.getDataTableSpec());

    // retrieve all available labels
    RowIterator it = inTable.iterator();
    DataRow row;
    Set<String> labels = new HashSet<String>();
    Instances trainingSet = null;
    int rowCount = inTable.getRowCount();
    int i = 0;
    while (it.hasNext()) {
        row = it.next();
        if (row.getCell(labColIdx).isMissing() || row.getCell(imgColIdx).isMissing()) {
            setWarningMessage("Errors occurred while execution! See console for details.");
            LOGGER.warn("Missing cell in row " + row.getKey() + ". Row skipped!");
            continue;
        }
        RandomAccessibleInterval<LabelingType<L>> lab = ((LabelingValue<L>) row.getCell(labColIdx))
                .getLabeling();
        ImgPlus<T> img = ((ImgPlusValue<T>) row.getCell(imgColIdx)).getImgPlus();

        // collect available labels
        LabelRegions<L> regions = KNIPGateway.regions().regions(lab);
        labels.addAll(regions.getExistingLabels().stream().map(l -> l.toString()).collect(Collectors.toList()));

        int[] tmp = m_featDimSelection.getSelectedDimIndices(img.numDimensions(), img);
        if (tmp.length == 0) {
            setWarningMessage("Errors occurred while execution! See console for details.");
            LOGGER.warn("Feature dimensions doesn't exist in image in row " + row.getKey() + ". Row skipped!");
            continue;
        }
        int featDim = tmp[0];

        int[] dimIndices = m_dimSelection.getSelectedDimIndices(img.numDimensions(), img);
        List<String> classLabels = new ArrayList<String>();
        for (L label : regions.getExistingLabels()) {
            classLabels.add(label.toString());
        }
        BuildTrainingData<L, T> btd = new BuildTrainingData<L, T>(classLabels, dimIndices, featDim,
                m_resampleRate.getDoubleValue(), m_balanceClassInstances.getBooleanValue());

        if (trainingSet == null) {
            trainingSet = btd.bufferFactory().instantiate(lab, img);
        }
        exec.setProgress("Building training set for row " + row.getKey());
        try {
            btd.compute(lab, img, trainingSet);
        } catch (KNIPRuntimeException e) {
            setWarningMessage("Errors occurred while execution! See console for details.");
            LOGGER.warn("Row " + row.getKey() + " skipped. " + e.getLocalizedMessage());
        }

        exec.checkCanceled();
        exec.setProgress((double) i / rowCount);
        i++;
    }

    // build classifier
    exec.setProgress("Build classifier ...");
    if (trainingSet == null) {
        throw new IllegalStateException(
                "No training set could be created due to the lack of training samples. Maybe wrong (i.e. non-existent) feature dimension selected!?");
    }

    // count instances per class for debugging purposes
    double[] classDistr = new double[trainingSet.numClasses()];
    for (Instance instance : trainingSet) {
        classDistr[(int) instance.classValue()]++;
    }
    Classifier classifier = m_classifierSelection.getClassifier();
    classifier.buildClassifier(trainingSet);
    return new PortObject[] { new WekaClassifierPortObject(classifier, trainingSet,
            new WekaClassifierPortObjectSpec(labels.toArray(new String[labels.size()]))) };

}

From source file:org.openml.webapplication.algorithm.InstancesHelper.java

License:Open Source License

public static double[] predictionToConfidences(Instances dataset, Instance prediction,
        int[] att_prediction_confidence, int att_prediction) throws Exception {
    double[] confidences = new double[dataset.numClasses()];
    boolean nonNullValue = false;
    for (int i = 0; i < dataset.numClasses(); i++) {
        if (Utils.isMissingValue(prediction.value(att_prediction_confidence[i]))) {
            throw new Exception("Prediction file contains missing values for important attribute ("
                    + prediction.attribute(att_prediction_confidence[i]).name() + "). ");
        }// www  . java  2s. c o m
        confidences[i] = prediction.value(att_prediction_confidence[i]);
        if (confidences[i] > 0) {
            nonNullValue = true;
        }
    }

    if (nonNullValue == false) {
        confidences[(int) prediction.value(att_prediction)] = 1;
    }

    return confidences;
}

From source file:org.openml.webapplication.fantail.dc.DCUntils.java

License:Open Source License

public static double computeClassEntropy(Instances data) {

    double[] classValueCounts = new double[data.numClasses()];
    for (int i = 0; i < data.numInstances(); i++) {
        Instance inst = data.instance(i);
        classValueCounts[(int) inst.classValue()]++;
    }/*from ww w. j  a  va  2 s .  c  o  m*/
    double classEntropy = 0;
    for (int c = 0; c < data.numClasses(); c++) {
        if (classValueCounts[c] > 0) {
            double prob_c = classValueCounts[c] / data.numInstances();
            classEntropy += prob_c * (Utils.log2(prob_c));
        }
    }
    classEntropy = classEntropy * -1.0;

    return classEntropy;
}

From source file:org.openml.webapplication.fantail.dc.DCUntils.java

License:Open Source License

private static double computeEntropy(Instances data) {

    double[] classCounts = new double[data.numClasses()];
    Enumeration<?> instEnum = data.enumerateInstances();
    while (instEnum.hasMoreElements()) {
        Instance inst = (Instance) instEnum.nextElement();
        classCounts[(int) inst.classValue()]++;
    }//from w  w  w  .  j a va 2s  .co m
    double entropy = 0;
    for (int j = 0; j < data.numClasses(); j++) {
        if (classCounts[j] > 0) {
            entropy -= classCounts[j] * Utils.log2(classCounts[j]);
        }
    }
    entropy /= (double) data.numInstances();
    return entropy + Utils.log2(data.numInstances());
}

From source file:org.openml.webapplication.fantail.dc.statistical.ClassAtt.java

License:Open Source License

@Override
public Map<String, Double> characterize(Instances instances) {

    int pCount = 0;
    int nCount = 0;

    int[] counts = new int[instances.numClasses()];
    for (int i = 0; i < instances.numInstances(); i++) {
        Instance instance = instances.instance(i);
        counts[(int) instance.classValue()]++;
    }//  ww  w .  j  a  v a  2 s  . c om

    pCount = counts[weka.core.Utils.minIndex(counts)];
    nCount = counts[weka.core.Utils.maxIndex(counts)];

    Map<String, Double> qualities = new HashMap<String, Double>();
    qualities.put(ids[0], instances.numClasses() * 1.0);
    qualities.put(ids[1], 1.0 * pCount / instances.numInstances());
    qualities.put(ids[2], 1.0 * nCount / instances.numInstances());
    return qualities;
}

From source file:org.scripps.branch.classifier.ManualTree.java

License:Open Source License

/**
 * Backfits the given data into the tree.
 *//*ww  w.jav  a2s. c  o m*/
public void backfitData(Instances data) throws Exception {

    // Compute initial class counts
    double[] classProbs = new double[data.numClasses()];
    for (int i = 0; i < data.numInstances(); i++) {
        Instance inst = data.instance(i);
        classProbs[(int) inst.classValue()] += inst.weight();
    }

    // Fit data into tree
    backfitData(data, classProbs);
}