List of usage examples for weka.core Instance classValue
public double classValue();
From source file:org.if4071.myann.TopologyModel.java
public void insertDataToOutputNodes(Instance inputData) { int classValue = (int) inputData.classValue(); for (int i = 0; i < inputData.numClasses(); i++) { Node n = nodes.get(nodes.size() - layers.get(layers.size() - 1) + i); if (i == classValue) { n.setTarget(1);/*from w w w . ja v a2 s. co m*/ } else { n.setTarget(0); } } }
From source file:org.knime.knip.suise.node.boundarymodel.contourdata.IRI.java
License:Open Source License
private double ruleScore(IntervalRule rule, Instances data) throws Exception { double posCount = 0; double negCount = 0; double posSumWeights = 0; for (Instance inst : data) { if (inst.weight() > 0) { double dist[] = rule.distributionForInstance(inst); if (dist[1] > dist[0]) { if (inst.classValue() == 1) { posSumWeights += inst.weight(); posCount++;/*from w w w . j a v a2 s . co m*/ } else { negCount++; } } } } double score = posSumWeights / (posCount + negCount + m_bias); // System.out.println("\tpSW=" + posSumWeights + ";pC=" + posCount // + ";nC=" + negCount + ";score=" + score); return score; }
From source file:org.knime.knip.suise.node.pixclassmodel.PixClassModelNodeModel.java
License:Open Source License
/** * {@inheritDoc}/* w w w .j a v a2 s.c om*/ */ @Override protected PortObject[] execute(PortObject[] inObjects, ExecutionContext exec) throws Exception { BufferedDataTable inTable = (BufferedDataTable) inObjects[0]; int imgColIdx = getImgColumnIndex(inTable.getDataTableSpec()); int labColIdx = getLabelingColumnIndex(inTable.getDataTableSpec()); // retrieve all available labels RowIterator it = inTable.iterator(); DataRow row; Set<String> labels = new HashSet<String>(); Instances trainingSet = null; int rowCount = inTable.getRowCount(); int i = 0; while (it.hasNext()) { row = it.next(); if (row.getCell(labColIdx).isMissing() || row.getCell(imgColIdx).isMissing()) { setWarningMessage("Errors occurred while execution! See console for details."); LOGGER.warn("Missing cell in row " + row.getKey() + ". Row skipped!"); continue; } RandomAccessibleInterval<LabelingType<L>> lab = ((LabelingValue<L>) row.getCell(labColIdx)) .getLabeling(); ImgPlus<T> img = ((ImgPlusValue<T>) row.getCell(imgColIdx)).getImgPlus(); // collect available labels LabelRegions<L> regions = KNIPGateway.regions().regions(lab); labels.addAll(regions.getExistingLabels().stream().map(l -> l.toString()).collect(Collectors.toList())); int[] tmp = m_featDimSelection.getSelectedDimIndices(img.numDimensions(), img); if (tmp.length == 0) { setWarningMessage("Errors occurred while execution! See console for details."); LOGGER.warn("Feature dimensions doesn't exist in image in row " + row.getKey() + ". Row skipped!"); continue; } int featDim = tmp[0]; int[] dimIndices = m_dimSelection.getSelectedDimIndices(img.numDimensions(), img); List<String> classLabels = new ArrayList<String>(); for (L label : regions.getExistingLabels()) { classLabels.add(label.toString()); } BuildTrainingData<L, T> btd = new BuildTrainingData<L, T>(classLabels, dimIndices, featDim, m_resampleRate.getDoubleValue(), m_balanceClassInstances.getBooleanValue()); if (trainingSet == null) { trainingSet = btd.bufferFactory().instantiate(lab, img); } exec.setProgress("Building training set for row " + row.getKey()); try { btd.compute(lab, img, trainingSet); } catch (KNIPRuntimeException e) { setWarningMessage("Errors occurred while execution! See console for details."); LOGGER.warn("Row " + row.getKey() + " skipped. " + e.getLocalizedMessage()); } exec.checkCanceled(); exec.setProgress((double) i / rowCount); i++; } // build classifier exec.setProgress("Build classifier ..."); if (trainingSet == null) { throw new IllegalStateException( "No training set could be created due to the lack of training samples. Maybe wrong (i.e. non-existent) feature dimension selected!?"); } // count instances per class for debugging purposes double[] classDistr = new double[trainingSet.numClasses()]; for (Instance instance : trainingSet) { classDistr[(int) instance.classValue()]++; } Classifier classifier = m_classifierSelection.getClassifier(); classifier.buildClassifier(trainingSet); return new PortObject[] { new WekaClassifierPortObject(classifier, trainingSet, new WekaClassifierPortObjectSpec(labels.toArray(new String[labels.size()]))) }; }
From source file:org.openml.webapplication.algorithm.InstancesHelper.java
License:Open Source License
@SuppressWarnings("unchecked") public static void stratify(Instances dataset) { int numClasses = dataset.classAttribute().numValues(); int numInstances = dataset.numInstances(); double[] classRatios = classRatios(dataset); double[] currentRatios = new double[numClasses]; int[] currentCounts = new int[numClasses]; List<Instance>[] instancesSorted = new LinkedList[numClasses]; for (int i = 0; i < numClasses; ++i) { instancesSorted[i] = new LinkedList<Instance>(); }//from www . ja va2 s. c o m // first, sort all instances based on class in different lists for (int i = 0; i < numInstances; ++i) { Instance current = dataset.instance(i); instancesSorted[(int) current.classValue()].add(current); } // now empty the original dataset, all instances are stored in the L.L. for (int i = 0; i < numInstances; i++) { dataset.delete(dataset.numInstances() - 1); } for (int i = 0; i < numInstances; ++i) { int idx = biggestDifference(classRatios, currentRatios); dataset.add(instancesSorted[idx].remove(0)); currentCounts[idx]++; for (int j = 0; j < currentRatios.length; ++j) { currentRatios[j] = (currentCounts[j] * 1.0) / (i + 1); } } }
From source file:org.openml.webapplication.fantail.dc.DCUntils.java
License:Open Source License
public static double computeClassEntropy(Instances data) { double[] classValueCounts = new double[data.numClasses()]; for (int i = 0; i < data.numInstances(); i++) { Instance inst = data.instance(i); classValueCounts[(int) inst.classValue()]++; }// ww w.ja v a 2 s . co m double classEntropy = 0; for (int c = 0; c < data.numClasses(); c++) { if (classValueCounts[c] > 0) { double prob_c = classValueCounts[c] / data.numInstances(); classEntropy += prob_c * (Utils.log2(prob_c)); } } classEntropy = classEntropy * -1.0; return classEntropy; }
From source file:org.openml.webapplication.fantail.dc.DCUntils.java
License:Open Source License
private static double computeEntropy(Instances data) { double[] classCounts = new double[data.numClasses()]; Enumeration<?> instEnum = data.enumerateInstances(); while (instEnum.hasMoreElements()) { Instance inst = (Instance) instEnum.nextElement(); classCounts[(int) inst.classValue()]++; }//from ww w . j a v a 2 s . c om double entropy = 0; for (int j = 0; j < data.numClasses(); j++) { if (classCounts[j] > 0) { entropy -= classCounts[j] * Utils.log2(classCounts[j]); } } entropy /= (double) data.numInstances(); return entropy + Utils.log2(data.numInstances()); }
From source file:org.openml.webapplication.fantail.dc.statistical.ClassAtt.java
License:Open Source License
@Override public Map<String, Double> characterize(Instances instances) { int pCount = 0; int nCount = 0; int[] counts = new int[instances.numClasses()]; for (int i = 0; i < instances.numInstances(); i++) { Instance instance = instances.instance(i); counts[(int) instance.classValue()]++; }/*w w w. j a v a 2 s.c o m*/ pCount = counts[weka.core.Utils.minIndex(counts)]; nCount = counts[weka.core.Utils.maxIndex(counts)]; Map<String, Double> qualities = new HashMap<String, Double>(); qualities.put(ids[0], instances.numClasses() * 1.0); qualities.put(ids[1], 1.0 * pCount / instances.numInstances()); qualities.put(ids[2], 1.0 * nCount / instances.numInstances()); return qualities; }
From source file:org.packDataMining.SMOTE.java
License:Open Source License
/** * The procedure implementing the SMOTE algorithm. The output * instances are pushed onto the output queue for collection. * // w w w.jav a2 s. c o m * @throws Exception if provided options cannot be executed * on input instances */ protected void doSMOTE() throws Exception { int minIndex = 0; int min = Integer.MAX_VALUE; if (m_DetectMinorityClass) { // find minority class int[] classCounts = getInputFormat().attributeStats(getInputFormat().classIndex()).nominalCounts; for (int i = 0; i < classCounts.length; i++) { if (classCounts[i] != 0 && classCounts[i] < min) { min = classCounts[i]; minIndex = i; } } } else { String classVal = getClassValue(); if (classVal.equalsIgnoreCase("first")) { minIndex = 1; } else if (classVal.equalsIgnoreCase("last")) { minIndex = getInputFormat().numClasses(); } else { minIndex = Integer.parseInt(classVal); } if (minIndex > getInputFormat().numClasses()) { throw new Exception("value index must be <= the number of classes"); } minIndex--; // make it an index } int nearestNeighbors; if (min <= getNearestNeighbors()) { nearestNeighbors = min - 1; } else { nearestNeighbors = getNearestNeighbors(); } if (nearestNeighbors < 1) throw new Exception("Cannot use 0 neighbors!"); // compose minority class dataset // also push all dataset instances Instances sample = getInputFormat().stringFreeStructure(); Enumeration instanceEnum = getInputFormat().enumerateInstances(); while (instanceEnum.hasMoreElements()) { Instance instance = (Instance) instanceEnum.nextElement(); push((Instance) instance.copy()); if ((int) instance.classValue() == minIndex) { sample.add(instance); } } // compute Value Distance Metric matrices for nominal features Map vdmMap = new HashMap(); Enumeration attrEnum = getInputFormat().enumerateAttributes(); while (attrEnum.hasMoreElements()) { Attribute attr = (Attribute) attrEnum.nextElement(); if (!attr.equals(getInputFormat().classAttribute())) { if (attr.isNominal() || attr.isString()) { double[][] vdm = new double[attr.numValues()][attr.numValues()]; vdmMap.put(attr, vdm); int[] featureValueCounts = new int[attr.numValues()]; int[][] featureValueCountsByClass = new int[getInputFormat().classAttribute().numValues()][attr .numValues()]; instanceEnum = getInputFormat().enumerateInstances(); while (instanceEnum.hasMoreElements()) { Instance instance = (Instance) instanceEnum.nextElement(); int value = (int) instance.value(attr); int classValue = (int) instance.classValue(); featureValueCounts[value]++; featureValueCountsByClass[classValue][value]++; } for (int valueIndex1 = 0; valueIndex1 < attr.numValues(); valueIndex1++) { for (int valueIndex2 = 0; valueIndex2 < attr.numValues(); valueIndex2++) { double sum = 0; for (int classValueIndex = 0; classValueIndex < getInputFormat() .numClasses(); classValueIndex++) { double c1i = (double) featureValueCountsByClass[classValueIndex][valueIndex1]; double c2i = (double) featureValueCountsByClass[classValueIndex][valueIndex2]; double c1 = (double) featureValueCounts[valueIndex1]; double c2 = (double) featureValueCounts[valueIndex2]; double term1 = c1i / c1; double term2 = c2i / c2; sum += Math.abs(term1 - term2); } vdm[valueIndex1][valueIndex2] = sum; } } } } } // use this random source for all required randomness Random rand = new Random(getRandomSeed()); // find the set of extra indices to use if the percentage is not evenly divisible by 100 List extraIndices = new LinkedList(); double percentageRemainder = (getPercentage() / 100) - Math.floor(getPercentage() / 100.0); int extraIndicesCount = (int) (percentageRemainder * sample.numInstances()); if (extraIndicesCount >= 1) { for (int i = 0; i < sample.numInstances(); i++) { extraIndices.add(i); } } Collections.shuffle(extraIndices, rand); extraIndices = extraIndices.subList(0, extraIndicesCount); Set extraIndexSet = new HashSet(extraIndices); // the main loop to handle computing nearest neighbors and generating SMOTE // examples from each instance in the original minority class data Instance[] nnArray = new Instance[nearestNeighbors]; for (int i = 0; i < sample.numInstances(); i++) { Instance instanceI = sample.instance(i); // find k nearest neighbors for each instance List distanceToInstance = new LinkedList(); for (int j = 0; j < sample.numInstances(); j++) { Instance instanceJ = sample.instance(j); if (i != j) { double distance = 0; attrEnum = getInputFormat().enumerateAttributes(); while (attrEnum.hasMoreElements()) { Attribute attr = (Attribute) attrEnum.nextElement(); if (!attr.equals(getInputFormat().classAttribute())) { double iVal = instanceI.value(attr); double jVal = instanceJ.value(attr); if (attr.isNumeric()) { distance += Math.pow(iVal - jVal, 2); } else { distance += ((double[][]) vdmMap.get(attr))[(int) iVal][(int) jVal]; } } } distance = Math.pow(distance, .5); distanceToInstance.add(new Object[] { distance, instanceJ }); } } // sort the neighbors according to distance Collections.sort(distanceToInstance, new Comparator() { public int compare(Object o1, Object o2) { double distance1 = (Double) ((Object[]) o1)[0]; double distance2 = (Double) ((Object[]) o2)[0]; return (int) Math.ceil(distance1 - distance2); } }); // populate the actual nearest neighbor instance array Iterator entryIterator = distanceToInstance.iterator(); int j = 0; while (entryIterator.hasNext() && j < nearestNeighbors) { nnArray[j] = (Instance) ((Object[]) entryIterator.next())[1]; j++; } // create synthetic examples int n = (int) Math.floor(getPercentage() / 100); while (n > 0 || extraIndexSet.remove(i)) { double[] values = new double[sample.numAttributes()]; int nn = rand.nextInt(nearestNeighbors); attrEnum = getInputFormat().enumerateAttributes(); while (attrEnum.hasMoreElements()) { Attribute attr = (Attribute) attrEnum.nextElement(); if (!attr.equals(getInputFormat().classAttribute())) { if (attr.isNumeric()) { double dif = nnArray[nn].value(attr) - instanceI.value(attr); double gap = rand.nextDouble(); values[attr.index()] = (double) (instanceI.value(attr) + gap * dif); } else if (attr.isDate()) { double dif = nnArray[nn].value(attr) - instanceI.value(attr); double gap = rand.nextDouble(); values[attr.index()] = (long) (instanceI.value(attr) + gap * dif); } else { int[] valueCounts = new int[attr.numValues()]; int iVal = (int) instanceI.value(attr); valueCounts[iVal]++; for (int nnEx = 0; nnEx < nearestNeighbors; nnEx++) { int val = (int) nnArray[nnEx].value(attr); valueCounts[val]++; } int maxIndex = 0; int max = Integer.MIN_VALUE; for (int index = 0; index < attr.numValues(); index++) { if (valueCounts[index] > max) { max = valueCounts[index]; maxIndex = index; } } values[attr.index()] = maxIndex; } } } values[sample.classIndex()] = minIndex; Instance synthetic = new Instance(1.0, values); push(synthetic); n--; } } }
From source file:org.scripps.branch.classifier.ManualTree.java
License:Open Source License
/** * Trying to get generate distribution of classes * //w w w. j av a 2 s. co m * @param Instances * @Param Attribute index to get distribution of * @Param HashMap to put data into * * @return HashMap of class distribution data */ protected HashMap addDistributionData(Instances instances, int attIndex, HashMap distMap) throws Exception { Map<String, Comparable> temp = new HashMap<String, Comparable>(); ArrayList<Object> distData = new ArrayList(); // GenerateCSV csv = new GenerateCSV(); // String data = ""; boolean isNominal = false; instances.sort(attIndex); for (int i = 0; i < instances.numInstances(); i++) { Instance inst = instances.instance(i); if (!Double.isNaN(inst.value(attIndex))) { temp = new HashMap<String, Comparable>(); if (inst.attribute(attIndex).isNominal()) { temp.put("value", inst.attribute(attIndex).value((int) inst.value(attIndex))); isNominal = true; // data+=inst.attribute(m_Attribute).value((int)inst.value(m_Attribute))+","; } else { temp.put("value", inst.value(attIndex)); // data+=inst.value(att)+","; } temp.put("classprob", inst.classAttribute().value((int) inst.classValue())); // data+=inst.classAttribute().value((int) // inst.classValue())+"\n"; distData.add(temp); } } if (!distData.isEmpty()) { distMap.put("dataArray", distData); distMap.put("isNominal", isNominal); setDistributionData(distMap); } return distMap; // To check if data is being generated right. // csv.generateCsvFile("/home/karthik/Documents/distribution.csv", // data); }
From source file:org.scripps.branch.classifier.ManualTree.java
License:Open Source License
/** * Backfits the given data into the tree. *//*from ww w .java 2s . co m*/ public void backfitData(Instances data) throws Exception { // Compute initial class counts double[] classProbs = new double[data.numClasses()]; for (int i = 0; i < data.numInstances(); i++) { Instance inst = data.instance(i); classProbs[(int) inst.classValue()] += inst.weight(); } // Fit data into tree backfitData(data, classProbs); }