List of usage examples for weka.core Range getSelection
publicint[] getSelection()
From source file:MultiClassClassifier.java
License:Open Source License
/** * Returns the distribution for an instance. * * @param inst the instance to get the distribution for * @return the distribution//from www. java 2s . c om * @throws Exception if the distribution can't be computed successfully */ public double[] distributionForInstance(Instance inst) throws Exception { if (m_Classifiers.length == 1) { return m_Classifiers[0].distributionForInstance(inst); } double[] probs = new double[inst.numClasses()]; if (m_Method == METHOD_1_AGAINST_1) { double[][] r = new double[inst.numClasses()][inst.numClasses()]; double[][] n = new double[inst.numClasses()][inst.numClasses()]; for (int i = 0; i < m_ClassFilters.length; i++) { if (m_Classifiers[i] != null) { Instance tempInst = (Instance) inst.copy(); tempInst.setDataset(m_TwoClassDataset); double[] current = m_Classifiers[i].distributionForInstance(tempInst); Range range = new Range(((RemoveWithValues) m_ClassFilters[i]).getNominalIndices()); range.setUpper(m_ClassAttribute.numValues()); int[] pair = range.getSelection(); if (m_pairwiseCoupling && inst.numClasses() > 2) { r[pair[0]][pair[1]] = current[0]; n[pair[0]][pair[1]] = m_SumOfWeights[i]; } else { if (current[0] > current[1]) { probs[pair[0]] += 1.0; } else { probs[pair[1]] += 1.0; } } } } if (m_pairwiseCoupling && inst.numClasses() > 2) { return pairwiseCoupling(n, r); } } else { // error correcting style methods for (int i = 0; i < m_ClassFilters.length; i++) { m_ClassFilters[i].input(inst); m_ClassFilters[i].batchFinished(); double[] current = m_Classifiers[i].distributionForInstance(m_ClassFilters[i].output()); //Calibrate the binary classifier scores for (int j = 0; j < m_ClassAttribute.numValues(); j++) { if (((MakeIndicator) m_ClassFilters[i]).getValueRange().isInRange(j)) { probs[j] += current[1]; } else { probs[j] += current[0]; } } } } if (Utils.gr(Utils.sum(probs), 0)) { Utils.normalize(probs); return probs; } else { return m_ZeroR.distributionForInstance(inst); } }
From source file:MultiClassClassifier.java
License:Open Source License
/** * Prints the classifiers./*ww w . jav a 2s. c o m*/ * * @return a string representation of the classifier */ public String toString() { if (m_Classifiers == null) { return "MultiClassClassifier: No model built yet."; } StringBuffer text = new StringBuffer(); text.append("MultiClassClassifier\n\n"); for (int i = 0; i < m_Classifiers.length; i++) { text.append("Classifier ").append(i + 1); if (m_Classifiers[i] != null) { if ((m_ClassFilters != null) && (m_ClassFilters[i] != null)) { if (m_ClassFilters[i] instanceof RemoveWithValues) { Range range = new Range(((RemoveWithValues) m_ClassFilters[i]).getNominalIndices()); range.setUpper(m_ClassAttribute.numValues()); int[] pair = range.getSelection(); text.append(", " + (pair[0] + 1) + " vs " + (pair[1] + 1)); } else if (m_ClassFilters[i] instanceof MakeIndicator) { text.append(", using indicator values: "); text.append(((MakeIndicator) m_ClassFilters[i]).getValueRange()); } } text.append('\n'); text.append(m_Classifiers[i].toString() + "\n\n"); } else { text.append(" Skipped (no training examples)\n"); } } return text.toString(); }
From source file:org.uclab.mm.kcl.ddkat.datapreprocessor.MissingValueHandler.java
License:Apache License
/** * Method to replace the identified missing values. * * @throws Exception the exception/*from www . j a v a 2s . c om*/ */ public void replaceMissingValues() throws Exception { this.confirmationMessage = new ArrayList<String>(); Instances outputData; String inputFile = BASE_DIR + "OriginalDataSet.csv"; // load CSV file CSVLoader fileLoader = new CSVLoader(); fileLoader.setSource(new File(inputFile)); outputData = fileLoader.getDataSet(); int numInstances = outputData.numInstances(); int numAttributes = outputData.numAttributes(); final int NON_NUMERIC = -1; int[] m_AttributeIndices = null; Range m_Attributes = new Range("first-last"); // attributes must be numeric m_Attributes.setUpper(outputData.numAttributes() - 1); m_AttributeIndices = m_Attributes.getSelection(); for (int i = 0; i < m_AttributeIndices.length; i++) { // ignore class if (m_AttributeIndices[i] == outputData.classIndex()) { m_AttributeIndices[i] = NON_NUMERIC; continue; } // not numeric -> ignore it if (!outputData.attribute(m_AttributeIndices[i]).isNumeric()) m_AttributeIndices[i] = NON_NUMERIC; } double sum; int missingCounter; double attributeMean; // identify the missing values for (int attributeIndex = 0; attributeIndex < numAttributes; attributeIndex++) { // non-numeric attribute? if (m_AttributeIndices[attributeIndex] == NON_NUMERIC) { continue; } double tempArr[] = outputData.attributeToDoubleArray(attributeIndex); sum = 0; missingCounter = 0; for (int i = 0; i < tempArr.length; i++) { sum = sum + tempArr[i]; if (tempArr[i] == 0) missingCounter++; } attributeMean = sum / (numInstances - missingCounter); for (int instanceIndex = 0; instanceIndex < numInstances; instanceIndex++) { // replace the missing values with attribute mean values if (outputData.instance(instanceIndex).value(attributeIndex) == 0) { outputData.instance(instanceIndex).setValue(attributeIndex, attributeMean); } } } outputData.deleteAttributeAt(outputData.numAttributes() - 1); outputData.deleteAttributeAt(outputData.numAttributes() - 1); saveFilledData(inputFile, outputData); }
From source file:org.uclab.mm.kcl.ddkat.datapreprocessor.OutlierHandler.java
License:Apache License
/** * Method to replace the detected outlier values. * * @throws Exception the exception// w ww. ja v a 2 s.c o m */ public void replaceOutliers() throws Exception { Instances inputData, outputData; String inputFile = BASE_DIR + "OriginalDataSet.csv"; // load CSV file CSVLoader fileLoader = new CSVLoader(); fileLoader.setSource(new File(inputFile)); inputData = fileLoader.getDataSet(); this.setInputFormat(inputData); outputData = Filter.useFilter(inputData, this); int numInstances = outputData.numInstances(); int numAttributes = outputData.numAttributes(); final int NON_NUMERIC = -1; double[] outlier_AttributeValues = null; double[] extreme_AttributeValues = null; int[] m_AttributeIndices = null; Range m_Attributes = new Range("first-last"); // attributes must be numeric m_Attributes.setUpper(outputData.numAttributes() - 1); m_AttributeIndices = m_Attributes.getSelection(); for (int i = 0; i < m_AttributeIndices.length; i++) { // ignore class if (m_AttributeIndices[i] == outputData.classIndex()) { m_AttributeIndices[i] = NON_NUMERIC; continue; } // not numeric -> ignore it if (!outputData.attribute(m_AttributeIndices[i]).isNumeric()) m_AttributeIndices[i] = NON_NUMERIC; } for (int instanceIndex = 0; instanceIndex < numInstances; instanceIndex++) { // access instance Instance tempInstance = outputData.instance(instanceIndex); for (int attributeIndex = 0; attributeIndex < numAttributes; attributeIndex++) { // non-numeric attribute? if (m_AttributeIndices[attributeIndex] == NON_NUMERIC) { continue; } // detect the outlier values using Interquartile approach if (this.isOutlier(tempInstance, m_AttributeIndices[attributeIndex])) { double outlierValue = tempInstance.value(attributeIndex); int outlierColumnIndex = attributeIndex; double sum = 0.0; outlier_AttributeValues = outputData.attributeToDoubleArray(outlierColumnIndex); for (int i = 0; i < outlier_AttributeValues.length; i++) { sum = sum + outlier_AttributeValues[i]; } sum = sum - outlierValue; double replacedValue = sum / (outlier_AttributeValues.length - 1); replacedValue = Math.round(replacedValue * 100D) / 100D; // replace the outliers with attribute mean values outputData.instance(instanceIndex).setValue(outlierColumnIndex, replacedValue); } // extreme value? if (this.isExtremeValue(tempInstance, m_AttributeIndices[attributeIndex])) { double extremeValue = tempInstance.value(attributeIndex); int extremeColumnIndex = attributeIndex; double sum = 0.0; extreme_AttributeValues = outputData.attributeToDoubleArray(extremeColumnIndex); for (int i = 0; i < extreme_AttributeValues.length; i++) { sum = sum + extreme_AttributeValues[i]; } sum = sum - extremeValue; double replacedValue = sum / (extreme_AttributeValues.length - 1); replacedValue = Math.round(replacedValue * 100D) / 100D; outputData.instance(instanceIndex).setValue(extremeColumnIndex, replacedValue); } } } outputData.deleteAttributeAt(outputData.numAttributes() - 1); outputData.deleteAttributeAt(outputData.numAttributes() - 1); saveConsistentData(inputFile, outputData); }