List of usage examples for weka.core Instances numClasses
publicint numClasses()
From source file:newdtl.NewID3.java
/** * Computes the entropy of a dataset.//from w ww . ja va2 s . c o m * * @param data the data for which entropy is to be computed * @return the entropy of the data class distribution */ private static double computeEntropy(Instances data) { double[] labelCounts = new double[data.numClasses()]; for (int i = 0; i < data.numInstances(); ++i) { labelCounts[(int) data.instance(i).classValue()]++; } double entropy = 0; for (int i = 0; i < labelCounts.length; i++) { if (labelCounts[i] > 0) { double proportion = labelCounts[i] / data.numInstances(); entropy -= (proportion) * log2(proportion); } } return entropy; }
From source file:newdtl.NewJ48.java
/** * Creates a J48 tree.//from www.ja va 2 s . com * * @param data the training data * @exception Exception if tree failed to build */ private void makeTree(Instances data) throws Exception { // Mengecek apakah tidak terdapat instance dalam node ini if (data.numInstances() == 0) { splitAttribute = null; label = DOUBLE_MISSING_VALUE; classDistributions = new double[data.numClasses()]; isLeaf = true; } else { // Mencari Gain Ratio maksimum double[] gainRatios = new double[data.numAttributes()]; double[] thresholds = new double[data.numAttributes()]; Enumeration attEnum = data.enumerateAttributes(); while (attEnum.hasMoreElements()) { Attribute att = (Attribute) attEnum.nextElement(); double[] result = computeGainRatio(data, att); gainRatios[att.index()] = result[0]; thresholds[att.index()] = result[1]; } splitAttribute = data.attribute(maxIndex(gainRatios)); if (splitAttribute.isNumeric()) { splitThreshold = thresholds[maxIndex(gainRatios)]; } else { splitThreshold = Double.NaN; } classDistributions = new double[data.numClasses()]; for (int i = 0; i < data.numInstances(); i++) { Instance inst = (Instance) data.instance(i); classDistributions[(int) inst.classValue()]++; } // Membuat daun jika Gain Ratio-nya 0 if (Double.compare(gainRatios[splitAttribute.index()], 0) == 0) { splitAttribute = null; label = maxIndex(classDistributions); classAttribute = data.classAttribute(); isLeaf = true; } else { // Mengecek jika ada missing value if (isMissing(data, splitAttribute)) { // cari modus int index = modusIndex(data, splitAttribute); // ubah data yang punya missing value Enumeration dataEnum = data.enumerateInstances(); while (dataEnum.hasMoreElements()) { Instance inst = (Instance) dataEnum.nextElement(); if (inst.isMissing(splitAttribute)) { inst.setValue(splitAttribute, splitAttribute.value(index)); } } } // Membuat tree baru di bawah node ini Instances[] splitData; if (splitAttribute.isNumeric()) { splitData = splitData(data, splitAttribute, splitThreshold); children = new NewJ48[2]; for (int j = 0; j < 2; j++) { children[j] = new NewJ48(); children[j].makeTree(splitData[j]); } } else { splitData = splitData(data, splitAttribute); children = new NewJ48[splitAttribute.numValues()]; for (int j = 0; j < splitAttribute.numValues(); j++) { children[j] = new NewJ48(); children[j].makeTree(splitData[j]); } } isLeaf = false; } } }
From source file:newdtl.NewJ48.java
/** * Computes the entropy of a dataset./*w w w . j a v a2s . c om*/ * * @param data the data for which entropy is to be computed * @return the entropy of the data class distribution * @throws Exception if computation fails */ private double computeEntropy(Instances data) { double[] labelCounts = new double[data.numClasses()]; for (int i = 0; i < data.numInstances(); ++i) { labelCounts[(int) data.instance(i).classValue()]++; } double entropy = 0; for (int i = 0; i < labelCounts.length; i++) { if (labelCounts[i] > 0) { double proportion = labelCounts[i] / data.numInstances(); entropy -= (proportion) * log2(proportion); } } return entropy; }
From source file:nl.detoren.ijc.neural.Voorspeller.java
License:Open Source License
/** * Lees trainingsdata in/*from w w w . ja v a 2 s . com*/ * * @param trainingfile * @return * @throws FileNotFoundException * @throws IOException */ private Instances readTrainingData(String trainingfile) throws FileNotFoundException, IOException { // Reading training arff file FileReader trainreader = new FileReader(trainingfile); Instances train = new Instances(trainreader); train.setClassIndex(train.numAttributes() - 1); logger.log(Level.INFO, "num attributes : " + train.numAttributes()); logger.log(Level.INFO, "num classes : " + train.numClasses()); logger.log(Level.INFO, "num data items : " + train.numInstances()); return train; }
From source file:org.knime.knip.suise.node.pixclassmodel.PixClassModelNodeModel.java
License:Open Source License
/** * {@inheritDoc}// w w w . j a va 2 s . c o m */ @Override protected PortObject[] execute(PortObject[] inObjects, ExecutionContext exec) throws Exception { BufferedDataTable inTable = (BufferedDataTable) inObjects[0]; int imgColIdx = getImgColumnIndex(inTable.getDataTableSpec()); int labColIdx = getLabelingColumnIndex(inTable.getDataTableSpec()); // retrieve all available labels RowIterator it = inTable.iterator(); DataRow row; Set<String> labels = new HashSet<String>(); Instances trainingSet = null; int rowCount = inTable.getRowCount(); int i = 0; while (it.hasNext()) { row = it.next(); if (row.getCell(labColIdx).isMissing() || row.getCell(imgColIdx).isMissing()) { setWarningMessage("Errors occurred while execution! See console for details."); LOGGER.warn("Missing cell in row " + row.getKey() + ". Row skipped!"); continue; } RandomAccessibleInterval<LabelingType<L>> lab = ((LabelingValue<L>) row.getCell(labColIdx)) .getLabeling(); ImgPlus<T> img = ((ImgPlusValue<T>) row.getCell(imgColIdx)).getImgPlus(); // collect available labels LabelRegions<L> regions = KNIPGateway.regions().regions(lab); labels.addAll(regions.getExistingLabels().stream().map(l -> l.toString()).collect(Collectors.toList())); int[] tmp = m_featDimSelection.getSelectedDimIndices(img.numDimensions(), img); if (tmp.length == 0) { setWarningMessage("Errors occurred while execution! See console for details."); LOGGER.warn("Feature dimensions doesn't exist in image in row " + row.getKey() + ". Row skipped!"); continue; } int featDim = tmp[0]; int[] dimIndices = m_dimSelection.getSelectedDimIndices(img.numDimensions(), img); List<String> classLabels = new ArrayList<String>(); for (L label : regions.getExistingLabels()) { classLabels.add(label.toString()); } BuildTrainingData<L, T> btd = new BuildTrainingData<L, T>(classLabels, dimIndices, featDim, m_resampleRate.getDoubleValue(), m_balanceClassInstances.getBooleanValue()); if (trainingSet == null) { trainingSet = btd.bufferFactory().instantiate(lab, img); } exec.setProgress("Building training set for row " + row.getKey()); try { btd.compute(lab, img, trainingSet); } catch (KNIPRuntimeException e) { setWarningMessage("Errors occurred while execution! See console for details."); LOGGER.warn("Row " + row.getKey() + " skipped. " + e.getLocalizedMessage()); } exec.checkCanceled(); exec.setProgress((double) i / rowCount); i++; } // build classifier exec.setProgress("Build classifier ..."); if (trainingSet == null) { throw new IllegalStateException( "No training set could be created due to the lack of training samples. Maybe wrong (i.e. non-existent) feature dimension selected!?"); } // count instances per class for debugging purposes double[] classDistr = new double[trainingSet.numClasses()]; for (Instance instance : trainingSet) { classDistr[(int) instance.classValue()]++; } Classifier classifier = m_classifierSelection.getClassifier(); classifier.buildClassifier(trainingSet); return new PortObject[] { new WekaClassifierPortObject(classifier, trainingSet, new WekaClassifierPortObjectSpec(labels.toArray(new String[labels.size()]))) }; }
From source file:org.openml.webapplication.algorithm.InstancesHelper.java
License:Open Source License
public static double[] predictionToConfidences(Instances dataset, Instance prediction, int[] att_prediction_confidence, int att_prediction) throws Exception { double[] confidences = new double[dataset.numClasses()]; boolean nonNullValue = false; for (int i = 0; i < dataset.numClasses(); i++) { if (Utils.isMissingValue(prediction.value(att_prediction_confidence[i]))) { throw new Exception("Prediction file contains missing values for important attribute (" + prediction.attribute(att_prediction_confidence[i]).name() + "). "); }// www . java 2s. c o m confidences[i] = prediction.value(att_prediction_confidence[i]); if (confidences[i] > 0) { nonNullValue = true; } } if (nonNullValue == false) { confidences[(int) prediction.value(att_prediction)] = 1; } return confidences; }
From source file:org.openml.webapplication.fantail.dc.DCUntils.java
License:Open Source License
public static double computeClassEntropy(Instances data) { double[] classValueCounts = new double[data.numClasses()]; for (int i = 0; i < data.numInstances(); i++) { Instance inst = data.instance(i); classValueCounts[(int) inst.classValue()]++; }/*from ww w. j a va 2 s . c o m*/ double classEntropy = 0; for (int c = 0; c < data.numClasses(); c++) { if (classValueCounts[c] > 0) { double prob_c = classValueCounts[c] / data.numInstances(); classEntropy += prob_c * (Utils.log2(prob_c)); } } classEntropy = classEntropy * -1.0; return classEntropy; }
From source file:org.openml.webapplication.fantail.dc.DCUntils.java
License:Open Source License
private static double computeEntropy(Instances data) { double[] classCounts = new double[data.numClasses()]; Enumeration<?> instEnum = data.enumerateInstances(); while (instEnum.hasMoreElements()) { Instance inst = (Instance) instEnum.nextElement(); classCounts[(int) inst.classValue()]++; }//from w w w . j a va 2s .co m double entropy = 0; for (int j = 0; j < data.numClasses(); j++) { if (classCounts[j] > 0) { entropy -= classCounts[j] * Utils.log2(classCounts[j]); } } entropy /= (double) data.numInstances(); return entropy + Utils.log2(data.numInstances()); }
From source file:org.openml.webapplication.fantail.dc.statistical.ClassAtt.java
License:Open Source License
@Override public Map<String, Double> characterize(Instances instances) { int pCount = 0; int nCount = 0; int[] counts = new int[instances.numClasses()]; for (int i = 0; i < instances.numInstances(); i++) { Instance instance = instances.instance(i); counts[(int) instance.classValue()]++; }// ww w . j a v a 2 s . c om pCount = counts[weka.core.Utils.minIndex(counts)]; nCount = counts[weka.core.Utils.maxIndex(counts)]; Map<String, Double> qualities = new HashMap<String, Double>(); qualities.put(ids[0], instances.numClasses() * 1.0); qualities.put(ids[1], 1.0 * pCount / instances.numInstances()); qualities.put(ids[2], 1.0 * nCount / instances.numInstances()); return qualities; }
From source file:org.scripps.branch.classifier.ManualTree.java
License:Open Source License
/** * Backfits the given data into the tree. *//*ww w.jav a2s. c o m*/ public void backfitData(Instances data) throws Exception { // Compute initial class counts double[] classProbs = new double[data.numClasses()]; for (int i = 0; i < data.numInstances(); i++) { Instance inst = data.instance(i); classProbs[(int) inst.classValue()] += inst.weight(); } // Fit data into tree backfitData(data, classProbs); }