Example usage for weka.core Range getSelection

List of usage examples for weka.core Range getSelection

Introduction

In this page you can find the example usage for weka.core Range getSelection.

Prototype


publicint[] getSelection() 

Source Link

Document

Gets an array containing all the selected values, in the order that they were selected (or ascending order if range inversion is on).

Usage

From source file:MultiClassClassifier.java

License:Open Source License

/**
 * Returns the distribution for an instance.
 *
 * @param inst the instance to get the distribution for
 * @return the distribution//from www. java  2s .  c  om
 * @throws Exception if the distribution can't be computed successfully
 */
public double[] distributionForInstance(Instance inst) throws Exception {

    if (m_Classifiers.length == 1) {
        return m_Classifiers[0].distributionForInstance(inst);
    }

    double[] probs = new double[inst.numClasses()];

    if (m_Method == METHOD_1_AGAINST_1) {
        double[][] r = new double[inst.numClasses()][inst.numClasses()];
        double[][] n = new double[inst.numClasses()][inst.numClasses()];

        for (int i = 0; i < m_ClassFilters.length; i++) {
            if (m_Classifiers[i] != null) {
                Instance tempInst = (Instance) inst.copy();
                tempInst.setDataset(m_TwoClassDataset);
                double[] current = m_Classifiers[i].distributionForInstance(tempInst);
                Range range = new Range(((RemoveWithValues) m_ClassFilters[i]).getNominalIndices());
                range.setUpper(m_ClassAttribute.numValues());
                int[] pair = range.getSelection();
                if (m_pairwiseCoupling && inst.numClasses() > 2) {
                    r[pair[0]][pair[1]] = current[0];
                    n[pair[0]][pair[1]] = m_SumOfWeights[i];
                } else {
                    if (current[0] > current[1]) {
                        probs[pair[0]] += 1.0;
                    } else {
                        probs[pair[1]] += 1.0;
                    }
                }
            }
        }
        if (m_pairwiseCoupling && inst.numClasses() > 2) {
            return pairwiseCoupling(n, r);
        }
    } else {
        // error correcting style methods
        for (int i = 0; i < m_ClassFilters.length; i++) {
            m_ClassFilters[i].input(inst);
            m_ClassFilters[i].batchFinished();
            double[] current = m_Classifiers[i].distributionForInstance(m_ClassFilters[i].output());
            //Calibrate the binary classifier scores

            for (int j = 0; j < m_ClassAttribute.numValues(); j++) {
                if (((MakeIndicator) m_ClassFilters[i]).getValueRange().isInRange(j)) {
                    probs[j] += current[1];
                } else {
                    probs[j] += current[0];
                }
            }
        }
    }

    if (Utils.gr(Utils.sum(probs), 0)) {
        Utils.normalize(probs);
        return probs;
    } else {
        return m_ZeroR.distributionForInstance(inst);
    }
}

From source file:MultiClassClassifier.java

License:Open Source License

/**
   * Prints the classifiers./*ww w .  jav a  2s. c o m*/
   * 
   * @return a string representation of the classifier
   */
public String toString() {

    if (m_Classifiers == null) {
        return "MultiClassClassifier: No model built yet.";
    }
    StringBuffer text = new StringBuffer();
    text.append("MultiClassClassifier\n\n");
    for (int i = 0; i < m_Classifiers.length; i++) {
        text.append("Classifier ").append(i + 1);
        if (m_Classifiers[i] != null) {
            if ((m_ClassFilters != null) && (m_ClassFilters[i] != null)) {
                if (m_ClassFilters[i] instanceof RemoveWithValues) {
                    Range range = new Range(((RemoveWithValues) m_ClassFilters[i]).getNominalIndices());
                    range.setUpper(m_ClassAttribute.numValues());
                    int[] pair = range.getSelection();
                    text.append(", " + (pair[0] + 1) + " vs " + (pair[1] + 1));
                } else if (m_ClassFilters[i] instanceof MakeIndicator) {
                    text.append(", using indicator values: ");
                    text.append(((MakeIndicator) m_ClassFilters[i]).getValueRange());
                }
            }
            text.append('\n');
            text.append(m_Classifiers[i].toString() + "\n\n");
        } else {
            text.append(" Skipped (no training examples)\n");
        }
    }

    return text.toString();
}

From source file:org.uclab.mm.kcl.ddkat.datapreprocessor.MissingValueHandler.java

License:Apache License

/**
 * Method to replace the identified missing values.
 *
 * @throws Exception the exception/*from  www .  j  a v  a 2s .  c  om*/
 */
public void replaceMissingValues() throws Exception {

    this.confirmationMessage = new ArrayList<String>();

    Instances outputData;

    String inputFile = BASE_DIR + "OriginalDataSet.csv";

    // load CSV file
    CSVLoader fileLoader = new CSVLoader();
    fileLoader.setSource(new File(inputFile));
    outputData = fileLoader.getDataSet();

    int numInstances = outputData.numInstances();
    int numAttributes = outputData.numAttributes();

    final int NON_NUMERIC = -1;
    int[] m_AttributeIndices = null;

    Range m_Attributes = new Range("first-last");

    // attributes must be numeric
    m_Attributes.setUpper(outputData.numAttributes() - 1);
    m_AttributeIndices = m_Attributes.getSelection();

    for (int i = 0; i < m_AttributeIndices.length; i++) {
        // ignore class
        if (m_AttributeIndices[i] == outputData.classIndex()) {
            m_AttributeIndices[i] = NON_NUMERIC;
            continue;
        }
        // not numeric -> ignore it
        if (!outputData.attribute(m_AttributeIndices[i]).isNumeric())
            m_AttributeIndices[i] = NON_NUMERIC;
    }

    double sum;
    int missingCounter;
    double attributeMean;

    // identify the missing values               
    for (int attributeIndex = 0; attributeIndex < numAttributes; attributeIndex++) {

        // non-numeric attribute?
        if (m_AttributeIndices[attributeIndex] == NON_NUMERIC) {
            continue;
        }

        double tempArr[] = outputData.attributeToDoubleArray(attributeIndex);
        sum = 0;
        missingCounter = 0;
        for (int i = 0; i < tempArr.length; i++) {
            sum = sum + tempArr[i];
            if (tempArr[i] == 0)
                missingCounter++;
        }

        attributeMean = sum / (numInstances - missingCounter);

        for (int instanceIndex = 0; instanceIndex < numInstances; instanceIndex++) {

            // replace the missing values with attribute mean values
            if (outputData.instance(instanceIndex).value(attributeIndex) == 0) {
                outputData.instance(instanceIndex).setValue(attributeIndex, attributeMean);
            }
        }
    }

    outputData.deleteAttributeAt(outputData.numAttributes() - 1);
    outputData.deleteAttributeAt(outputData.numAttributes() - 1);

    saveFilledData(inputFile, outputData);

}

From source file:org.uclab.mm.kcl.ddkat.datapreprocessor.OutlierHandler.java

License:Apache License

/**
 * Method to replace the detected outlier values.
 *
 * @throws Exception the exception//  w  ww. ja v  a 2 s.c o m
 */
public void replaceOutliers() throws Exception {

    Instances inputData, outputData;

    String inputFile = BASE_DIR + "OriginalDataSet.csv";

    // load CSV file
    CSVLoader fileLoader = new CSVLoader();
    fileLoader.setSource(new File(inputFile));
    inputData = fileLoader.getDataSet();
    this.setInputFormat(inputData);
    outputData = Filter.useFilter(inputData, this);

    int numInstances = outputData.numInstances();
    int numAttributes = outputData.numAttributes();

    final int NON_NUMERIC = -1;
    double[] outlier_AttributeValues = null;
    double[] extreme_AttributeValues = null;
    int[] m_AttributeIndices = null;

    Range m_Attributes = new Range("first-last");

    // attributes must be numeric
    m_Attributes.setUpper(outputData.numAttributes() - 1);
    m_AttributeIndices = m_Attributes.getSelection();

    for (int i = 0; i < m_AttributeIndices.length; i++) {
        // ignore class
        if (m_AttributeIndices[i] == outputData.classIndex()) {
            m_AttributeIndices[i] = NON_NUMERIC;
            continue;
        }
        // not numeric -> ignore it
        if (!outputData.attribute(m_AttributeIndices[i]).isNumeric())
            m_AttributeIndices[i] = NON_NUMERIC;
    }

    for (int instanceIndex = 0; instanceIndex < numInstances; instanceIndex++) {
        // access instance 
        Instance tempInstance = outputData.instance(instanceIndex);

        for (int attributeIndex = 0; attributeIndex < numAttributes; attributeIndex++) {
            // non-numeric attribute?
            if (m_AttributeIndices[attributeIndex] == NON_NUMERIC) {
                continue;
            }

            // detect the outlier values using Interquartile approach
            if (this.isOutlier(tempInstance, m_AttributeIndices[attributeIndex])) {

                double outlierValue = tempInstance.value(attributeIndex);
                int outlierColumnIndex = attributeIndex;
                double sum = 0.0;

                outlier_AttributeValues = outputData.attributeToDoubleArray(outlierColumnIndex);

                for (int i = 0; i < outlier_AttributeValues.length; i++) {
                    sum = sum + outlier_AttributeValues[i];
                }

                sum = sum - outlierValue;
                double replacedValue = sum / (outlier_AttributeValues.length - 1);
                replacedValue = Math.round(replacedValue * 100D) / 100D;

                // replace the outliers with attribute mean values
                outputData.instance(instanceIndex).setValue(outlierColumnIndex, replacedValue);
            }

            // extreme value?
            if (this.isExtremeValue(tempInstance, m_AttributeIndices[attributeIndex])) {

                double extremeValue = tempInstance.value(attributeIndex);
                int extremeColumnIndex = attributeIndex;
                double sum = 0.0;

                extreme_AttributeValues = outputData.attributeToDoubleArray(extremeColumnIndex);

                for (int i = 0; i < extreme_AttributeValues.length; i++) {
                    sum = sum + extreme_AttributeValues[i];
                }

                sum = sum - extremeValue;
                double replacedValue = sum / (extreme_AttributeValues.length - 1);
                replacedValue = Math.round(replacedValue * 100D) / 100D;
                outputData.instance(instanceIndex).setValue(extremeColumnIndex, replacedValue);
            }

        }
    }
    outputData.deleteAttributeAt(outputData.numAttributes() - 1);
    outputData.deleteAttributeAt(outputData.numAttributes() - 1);

    saveConsistentData(inputFile, outputData);

}