Example usage for weka.core Utils sort

List of usage examples for weka.core Utils sort

Introduction

In this page you can find the example usage for weka.core Utils sort.

Prototype

public staticint[] sort(double[] array) 

Source Link

Document

Sorts a given array of doubles in ascending order and returns an array of integers with the positions of the elements of the original array in the sorted array.

Usage

From source file:PrincipalComponents.java

License:Open Source License

private void buildAttributeConstructor(Instances data) throws Exception {
    m_eigenvalues = null;//w ww .ja  va2  s.  c om
    m_outputNumAtts = -1;
    m_attributeFilter = null;
    m_nominalToBinFilter = null;
    m_sumOfEigenValues = 0.0;
    m_trainInstances = new Instances(data);

    // make a copy of the training data so that we can get the class
    // column to append to the transformed data (if necessary)
    m_trainHeader = new Instances(m_trainInstances, 0);

    m_replaceMissingFilter = new ReplaceMissingValues();
    m_replaceMissingFilter.setInputFormat(m_trainInstances);
    m_trainInstances = Filter.useFilter(m_trainInstances, m_replaceMissingFilter);

    /*
       * if (m_normalize) { m_normalizeFilter = new Normalize();
     * m_normalizeFilter.setInputFormat(m_trainInstances); m_trainInstances
     * = Filter.useFilter(m_trainInstances, m_normalizeFilter); }
     */

    m_nominalToBinFilter = new NominalToBinary();
    m_nominalToBinFilter.setInputFormat(m_trainInstances);
    m_trainInstances = Filter.useFilter(m_trainInstances, m_nominalToBinFilter);

    // delete any attributes with only one distinct value or are all missing
    Vector<Integer> deleteCols = new Vector<Integer>();
    for (int i = 0; i < m_trainInstances.numAttributes(); i++) {
        if (m_trainInstances.numDistinctValues(i) <= 1) {
            deleteCols.addElement(new Integer(i));
        }
    }

    if (m_trainInstances.classIndex() >= 0) {
        // get rid of the class column
        m_hasClass = true;
        m_classIndex = m_trainInstances.classIndex();
        deleteCols.addElement(new Integer(m_classIndex));
    }

    // remove columns from the data if necessary
    if (deleteCols.size() > 0) {
        m_attributeFilter = new Remove();
        int[] todelete = new int[deleteCols.size()];
        for (int i = 0; i < deleteCols.size(); i++) {
            todelete[i] = (deleteCols.elementAt(i)).intValue();
        }
        m_attributeFilter.setAttributeIndicesArray(todelete);
        m_attributeFilter.setInvertSelection(false);
        m_attributeFilter.setInputFormat(m_trainInstances);
        m_trainInstances = Filter.useFilter(m_trainInstances, m_attributeFilter);
    }

    // can evaluator handle the processed data ? e.g., enough attributes?
    getCapabilities().testWithFail(m_trainInstances);

    m_numInstances = m_trainInstances.numInstances();
    m_numAttribs = m_trainInstances.numAttributes();

    fillCovariance();

    SymmDenseEVD evd = SymmDenseEVD.factorize(m_correlation);

    m_eigenvectors = Matrices.getArray(evd.getEigenvectors());
    m_eigenvalues = evd.getEigenvalues();

    /*
       * for (int i = 0; i < m_numAttribs; i++) { for (int j = 0; j <
     * m_numAttribs; j++) { System.err.println(v[i][j] + " "); }
     * System.err.println(d[i]); }
     */

    // any eigenvalues less than 0 are not worth anything --- change to 0
    for (int i = 0; i < m_eigenvalues.length; i++) {
        if (m_eigenvalues[i] < 0) {
            m_eigenvalues[i] = 0.0;
        }
    }
    m_sortedEigens = Utils.sort(m_eigenvalues);
    m_sumOfEigenValues = Utils.sum(m_eigenvalues);

    m_transformedFormat = setOutputFormat();
    if (m_transBackToOriginal) {
        m_originalSpaceFormat = setOutputFormatOriginal();

        // new ordered eigenvector matrix
        int numVectors = (m_transformedFormat.classIndex() < 0) ? m_transformedFormat.numAttributes()
                : m_transformedFormat.numAttributes() - 1;

        double[][] orderedVectors = new double[m_eigenvectors.length][numVectors + 1];

        // try converting back to the original space
        for (int i = m_numAttribs - 1; i > (m_numAttribs - numVectors - 1); i--) {
            for (int j = 0; j < m_numAttribs; j++) {
                orderedVectors[j][m_numAttribs - i] = m_eigenvectors[j][m_sortedEigens[i]];
            }
        }

        // transpose the matrix
        int nr = orderedVectors.length;
        int nc = orderedVectors[0].length;
        m_eTranspose = new double[nc][nr];
        for (int i = 0; i < nc; i++) {
            for (int j = 0; j < nr; j++) {
                m_eTranspose[i][j] = orderedVectors[j][i];
            }
        }
    }
}

From source file:PrincipalComponents.java

License:Open Source License

/**
 * Set the format for the transformed data
 *
 * @return a set of empty Instances (header only) in the new format
 * @throws Exception if the output format can't be set
 *//*  w w  w.  j  a v  a2  s .c  o m*/
private Instances setOutputFormat() throws Exception {
    if (m_eigenvalues == null) {
        return null;
    }

    double cumulative = 0.0;
    ArrayList<Attribute> attributes = new ArrayList<Attribute>();
    for (int i = m_numAttribs - 1; i >= 0; i--) {
        StringBuffer attName = new StringBuffer();
        // build array of coefficients
        double[] coeff_mags = new double[m_numAttribs];
        for (int j = 0; j < m_numAttribs; j++) {
            coeff_mags[j] = -Math.abs(m_eigenvectors[j][m_sortedEigens[i]]);
        }
        int num_attrs = (m_maxAttrsInName > 0) ? Math.min(m_numAttribs, m_maxAttrsInName) : m_numAttribs;
        // this array contains the sorted indices of the coefficients
        int[] coeff_inds;
        if (m_numAttribs > 0) {
            // if m_maxAttrsInName > 0, sort coefficients by decreasing
            // magnitude
            coeff_inds = Utils.sort(coeff_mags);
        } else {
            // if m_maxAttrsInName <= 0, use all coeffs in original order
            coeff_inds = new int[m_numAttribs];
            for (int j = 0; j < m_numAttribs; j++) {
                coeff_inds[j] = j;
            }
        }
        // build final attName string
        for (int j = 0; j < num_attrs; j++) {
            double coeff_value = m_eigenvectors[coeff_inds[j]][m_sortedEigens[i]];
            if (j > 0 && coeff_value >= 0) {
                attName.append("+");
            }
            attName.append(
                    Utils.doubleToString(coeff_value, 5, 3) + m_trainInstances.attribute(coeff_inds[j]).name());
        }
        if (num_attrs < m_numAttribs) {
            attName.append("...");
        }

        attributes.add(new Attribute(attName.toString()));
        cumulative += m_eigenvalues[m_sortedEigens[i]];

        if ((cumulative / m_sumOfEigenValues) >= m_coverVariance) {
            break;
        }
    }

    if (m_hasClass) {
        attributes.add((Attribute) m_trainHeader.classAttribute().copy());
    }

    Instances outputFormat = new Instances(m_trainInstances.relationName() + "_principal components",
            attributes, 0);

    // set the class to be the last attribute if necessary
    if (m_hasClass) {
        outputFormat.setClassIndex(outputFormat.numAttributes() - 1);
    }

    m_outputNumAtts = outputFormat.numAttributes();
    return outputFormat;
}

From source file:REPTree.java

License:Open Source License

/**
 * Builds classifier./*from w w  w .  j  a v  a  2  s . co  m*/
 * 
 * @param data the data to train with
 * @throws Exception if building fails
 */
public void buildClassifier(Instances data) throws Exception {

    // can classifier handle the data?
    getCapabilities().testWithFail(data);

    // remove instances with missing class
    data = new Instances(data);
    data.deleteWithMissingClass();

    Random random = new Random(m_Seed);

    m_zeroR = null;
    if (data.numAttributes() == 1) {
        m_zeroR = new ZeroR();
        m_zeroR.buildClassifier(data);
        return;
    }

    // Randomize and stratify
    data.randomize(random);
    if (data.classAttribute().isNominal()) {
        data.stratify(m_NumFolds);
    }

    // Split data into training and pruning set
    Instances train = null;
    Instances prune = null;
    if (!m_NoPruning) {
        train = data.trainCV(m_NumFolds, 0, random);
        prune = data.testCV(m_NumFolds, 0);
    } else {
        train = data;
    }

    // Create array of sorted indices and weights
    int[][][] sortedIndices = new int[1][train.numAttributes()][0];
    double[][][] weights = new double[1][train.numAttributes()][0];
    double[] vals = new double[train.numInstances()];
    for (int j = 0; j < train.numAttributes(); j++) {
        if (j != train.classIndex()) {
            weights[0][j] = new double[train.numInstances()];
            if (train.attribute(j).isNominal()) {

                // Handling nominal attributes. Putting indices of
                // instances with missing values at the end.
                sortedIndices[0][j] = new int[train.numInstances()];
                int count = 0;
                for (int i = 0; i < train.numInstances(); i++) {
                    Instance inst = train.instance(i);
                    if (!inst.isMissing(j)) {
                        sortedIndices[0][j][count] = i;
                        weights[0][j][count] = inst.weight();
                        count++;
                    }
                }
                for (int i = 0; i < train.numInstances(); i++) {
                    Instance inst = train.instance(i);
                    if (inst.isMissing(j)) {
                        sortedIndices[0][j][count] = i;
                        weights[0][j][count] = inst.weight();
                        count++;
                    }
                }
            } else {

                // Sorted indices are computed for numeric attributes
                for (int i = 0; i < train.numInstances(); i++) {
                    Instance inst = train.instance(i);
                    vals[i] = inst.value(j);
                }
                sortedIndices[0][j] = Utils.sort(vals);
                for (int i = 0; i < train.numInstances(); i++) {
                    weights[0][j][i] = train.instance(sortedIndices[0][j][i]).weight();
                }
            }
        }
    }

    // Compute initial class counts
    double[] classProbs = new double[train.numClasses()];
    double totalWeight = 0, totalSumSquared = 0;
    for (int i = 0; i < train.numInstances(); i++) {
        Instance inst = train.instance(i);
        if (data.classAttribute().isNominal()) {
            classProbs[(int) inst.classValue()] += inst.weight();
            totalWeight += inst.weight();
        } else {
            classProbs[0] += inst.classValue() * inst.weight();
            totalSumSquared += inst.classValue() * inst.classValue() * inst.weight();
            totalWeight += inst.weight();
        }
    }
    m_Tree = new Tree();
    double trainVariance = 0;
    if (data.classAttribute().isNumeric()) {
        trainVariance = m_Tree.singleVariance(classProbs[0], totalSumSquared, totalWeight) / totalWeight;
        classProbs[0] /= totalWeight;
    }

    // Build tree
    m_Tree.buildTree(sortedIndices, weights, train, totalWeight, classProbs, new Instances(train, 0), m_MinNum,
            m_MinVarianceProp * trainVariance, 0, m_MaxDepth);

    // Insert pruning data and perform reduced error pruning
    if (!m_NoPruning) {
        m_Tree.insertHoldOutSet(prune);
        m_Tree.reducedErrorPrune();
        m_Tree.backfitHoldOutSet();
    }
}

From source file:REPRandomTree.java

License:Open Source License

/**
 * Builds classifier./*from w w w .  j  ava2s.co  m*/
 * 
 * @param data the data to train with
 * @throws Exception if building fails
 */
public void buildClassifier(Instances data) throws Exception {

    // can classifier handle the data?
    getCapabilities().testWithFail(data);

    // remove instances with missing class
    data = new Instances(data);
    data.deleteWithMissingClass();

    Random random = new Random(m_Seed);

    m_zeroR = null;
    if (data.numAttributes() == 1) {
        m_zeroR = new ZeroR();
        m_zeroR.buildClassifier(data);
        return;
    }

    // Randomize and stratify
    data.randomize(random);
    if (data.classAttribute().isNominal()) {
        data.stratify(m_NumFolds);
    }

    // Split data into training and pruning set
    Instances train = null;
    Instances prune = null;
    if (!m_NoPruning) {
        train = data.trainCV(m_NumFolds, 0, random);
        prune = data.testCV(m_NumFolds, 0);
    } else {
        train = data;
    }

    // Create array of sorted indices and weights
    int[][][] sortedIndices = new int[1][train.numAttributes()][0];
    double[][][] weights = new double[1][train.numAttributes()][0];
    double[] vals = new double[train.numInstances()];
    for (int j = 0; j < train.numAttributes(); j++) {
        if (j != train.classIndex()) {
            weights[0][j] = new double[train.numInstances()];
            if (train.attribute(j).isNominal()) {

                // Handling nominal attributes. Putting indices of
                // instances with missing values at the end.
                sortedIndices[0][j] = new int[train.numInstances()];
                int count = 0;
                for (int i = 0; i < train.numInstances(); i++) {
                    Instance inst = train.instance(i);
                    if (!inst.isMissing(j)) {
                        sortedIndices[0][j][count] = i;
                        weights[0][j][count] = inst.weight();
                        count++;
                    }
                }
                for (int i = 0; i < train.numInstances(); i++) {
                    Instance inst = train.instance(i);
                    if (inst.isMissing(j)) {
                        sortedIndices[0][j][count] = i;
                        weights[0][j][count] = inst.weight();
                        count++;
                    }
                }
            } else {

                // Sorted indices are computed for numeric attributes
                for (int i = 0; i < train.numInstances(); i++) {
                    Instance inst = train.instance(i);
                    vals[i] = inst.value(j);
                }
                sortedIndices[0][j] = Utils.sort(vals);
                for (int i = 0; i < train.numInstances(); i++) {
                    weights[0][j][i] = train.instance(sortedIndices[0][j][i]).weight();
                }
            }
        }
    }

    // Compute initial class counts
    double[] classProbs = new double[train.numClasses()];
    double totalWeight = 0, totalSumSquared = 0;
    for (int i = 0; i < train.numInstances(); i++) {
        Instance inst = train.instance(i);
        if (data.classAttribute().isNominal()) {
            classProbs[(int) inst.classValue()] += inst.weight();
            totalWeight += inst.weight();
        } else {
            classProbs[0] += inst.classValue() * inst.weight();
            totalSumSquared += inst.classValue() * inst.classValue() * inst.weight();
            totalWeight += inst.weight();
        }
    }
    m_Tree = new Tree();
    double trainVariance = 0;
    if (data.classAttribute().isNumeric()) {
        trainVariance = m_Tree.singleVariance(classProbs[0], totalSumSquared, totalWeight) / totalWeight;
        classProbs[0] /= totalWeight;
    }

    // Build tree
    m_Tree.buildTree(sortedIndices, weights, train, totalWeight, classProbs, new Instances(train, 0), m_MinNum,
            m_MinVarianceProp * trainVariance, 0, m_MaxDepth, m_FeatureFrac, random);

    // Insert pruning data and perform reduced error pruning
    if (!m_NoPruning) {
        m_Tree.insertHoldOutSet(prune);
        m_Tree.reducedErrorPrune();
        m_Tree.backfitHoldOutSet();
    }
}

From source file:PCADetector.java

License:Apache License

public int[] Sort(double[] eigenValues, double[][] eigenVectors) {
    int numDims = eigenValues.length;
    int numRows = eigenVectors.length;
    int[] order = Utils.sort(eigenValues);
    double[] sortedEigenValues = new double[numDims];
    double[][] sortedEigenVectors = new double[eigenVectors.length][numDims];
    for (int i = 0; i < numDims; i++) {
        sortedEigenValues[i] = eigenValues[order[numDims - 1 - i]];
        for (int j = 0; j < numRows; j++) {
            sortedEigenVectors[j][i] = eigenVectors[j][order[numDims - 1 - i]];
        }//from w ww . jav  a 2 s. c o  m
    }
    for (int i = 0; i < numDims; i++) {
        eigenValues[i] = sortedEigenValues[i];
        for (int j = 0; j < numRows; j++) {
            eigenVectors[j][i] = sortedEigenVectors[j][i];
        }
    }
    return order;
}

From source file:bme.mace.logicdomain.Evaluation.java

License:Open Source License

/**
 * Sets up the priors for numeric class attributes from the training class
 * values that have been seen so far./* w  w w. j  av a  2s  . com*/
 */
protected void setNumericPriorsFromBuffer() {

    double numPrecision = 0.01; // Default value
    if (m_NumTrainClassVals > 1) {
        double[] temp = new double[m_NumTrainClassVals];
        System.arraycopy(m_TrainClassVals, 0, temp, 0, m_NumTrainClassVals);
        int[] index = Utils.sort(temp);
        double lastVal = temp[index[0]];
        double deltaSum = 0;
        int distinct = 0;
        for (int i = 1; i < temp.length; i++) {
            double current = temp[index[i]];
            if (current != lastVal) {
                deltaSum += current - lastVal;
                lastVal = current;
                distinct++;
            }
        }
        if (distinct > 0) {
            numPrecision = deltaSum / distinct;
        }
    }
    m_PriorErrorEstimator = new KernelEstimator(numPrecision);
    m_ErrorEstimator = new KernelEstimator(numPrecision);
    m_ClassPriors[0] = m_ClassPriorsSum = 0;
    for (int i = 0; i < m_NumTrainClassVals; i++) {
        m_ClassPriors[0] += m_TrainClassVals[i] * m_TrainClassWeights[i];
        m_ClassPriorsSum += m_TrainClassWeights[i];
        m_PriorErrorEstimator.addValue(m_TrainClassVals[i], m_TrainClassWeights[i]);
    }
}

From source file:data.generation.target.utils.PrincipalComponents.java

License:Open Source License

private void buildAttributeConstructor(Instances data) throws Exception {
    m_eigenvalues = null;/*  ww w. j  a v a2s. c o  m*/
    m_outputNumAtts = -1;
    m_attributeFilter = null;
    m_nominalToBinFilter = null;
    m_sumOfEigenValues = 0.0;
    m_trainInstances = new Instances(data);

    // make a copy of the training data so that we can get the class
    // column to append to the transformed data (if necessary)
    m_trainHeader = new Instances(m_trainInstances, 0);

    m_replaceMissingFilter = new ReplaceMissingValues();
    m_replaceMissingFilter.setInputFormat(m_trainInstances);
    m_trainInstances = Filter.useFilter(m_trainInstances, m_replaceMissingFilter);

    /*if (m_normalize) {
      m_normalizeFilter = new Normalize();
      m_normalizeFilter.setInputFormat(m_trainInstances);
      m_trainInstances = Filter.useFilter(m_trainInstances, m_normalizeFilter);
    } */

    m_nominalToBinFilter = new NominalToBinary();
    m_nominalToBinFilter.setInputFormat(m_trainInstances);
    m_trainInstances = Filter.useFilter(m_trainInstances, m_nominalToBinFilter);

    // delete any attributes with only one distinct value or are all missing
    Vector deleteCols = new Vector();
    for (int i = 0; i < m_trainInstances.numAttributes(); i++) {
        if (m_trainInstances.numDistinctValues(i) <= 1) {
            deleteCols.addElement(new Integer(i));
        }
    }

    if (m_trainInstances.classIndex() >= 0) {
        // get rid of the class column
        m_hasClass = true;
        m_classIndex = m_trainInstances.classIndex();
        deleteCols.addElement(new Integer(m_classIndex));
    }

    // remove columns from the data if necessary
    if (deleteCols.size() > 0) {
        m_attributeFilter = new Remove();
        int[] todelete = new int[deleteCols.size()];
        for (int i = 0; i < deleteCols.size(); i++) {
            todelete[i] = ((Integer) (deleteCols.elementAt(i))).intValue();
        }
        m_attributeFilter.setAttributeIndicesArray(todelete);
        m_attributeFilter.setInvertSelection(false);
        m_attributeFilter.setInputFormat(m_trainInstances);
        m_trainInstances = Filter.useFilter(m_trainInstances, m_attributeFilter);
    }

    // can evaluator handle the processed data ? e.g., enough attributes?
    getCapabilities().testWithFail(m_trainInstances);

    m_numInstances = m_trainInstances.numInstances();
    m_numAttribs = m_trainInstances.numAttributes();

    //fillCorrelation();
    fillCovariance();

    double[] d = new double[m_numAttribs];
    double[][] v = new double[m_numAttribs][m_numAttribs];

    Matrix corr = new Matrix(m_correlation);
    corr.eigenvalueDecomposition(v, d);
    m_eigenvectors = (double[][]) v.clone();
    m_eigenvalues = (double[]) d.clone();

    /*for (int i = 0; i < m_numAttribs; i++) {
      for (int j = 0; j < m_numAttribs; j++) {
        System.err.println(v[i][j] + " ");
      }
      System.err.println(d[i]);
    } */

    // any eigenvalues less than 0 are not worth anything --- change to 0
    for (int i = 0; i < m_eigenvalues.length; i++) {
        if (m_eigenvalues[i] < 0) {
            m_eigenvalues[i] = 0.0;
        }
    }
    m_sortedEigens = Utils.sort(m_eigenvalues);
    m_sumOfEigenValues = Utils.sum(m_eigenvalues);

    m_transformedFormat = setOutputFormat();
    if (m_transBackToOriginal) {
        m_originalSpaceFormat = setOutputFormatOriginal();

        // new ordered eigenvector matrix
        int numVectors = (m_transformedFormat.classIndex() < 0) ? m_transformedFormat.numAttributes()
                : m_transformedFormat.numAttributes() - 1;

        double[][] orderedVectors = new double[m_eigenvectors.length][numVectors + 1];

        // try converting back to the original space
        for (int i = m_numAttribs - 1; i > (m_numAttribs - numVectors - 1); i--) {
            for (int j = 0; j < m_numAttribs; j++) {
                orderedVectors[j][m_numAttribs - i] = m_eigenvectors[j][m_sortedEigens[i]];
            }
        }

        // transpose the matrix
        int nr = orderedVectors.length;
        int nc = orderedVectors[0].length;
        m_eTranspose = new double[nc][nr];
        for (int i = 0; i < nc; i++) {
            for (int j = 0; j < nr; j++) {
                m_eTranspose[i][j] = orderedVectors[j][i];
            }
        }
    }
}

From source file:data.generation.target.utils.PrincipalComponents.java

License:Open Source License

/**
 * Set the format for the transformed data
 * @return a set of empty Instances (header only) in the new format
 * @throws Exception if the output format can't be set
 *///from  w  w  w.  j  av a2  s . c  om
private Instances setOutputFormat() throws Exception {
    if (m_eigenvalues == null) {
        return null;
    }

    double cumulative = 0.0;
    FastVector attributes = new FastVector();
    for (int i = m_numAttribs - 1; i >= 0; i--) {
        StringBuffer attName = new StringBuffer();
        // build array of coefficients
        double[] coeff_mags = new double[m_numAttribs];
        for (int j = 0; j < m_numAttribs; j++)
            coeff_mags[j] = -Math.abs(m_eigenvectors[j][m_sortedEigens[i]]);
        int num_attrs = (m_maxAttrsInName > 0) ? Math.min(m_numAttribs, m_maxAttrsInName) : m_numAttribs;
        // this array contains the sorted indices of the coefficients
        int[] coeff_inds;
        if (m_numAttribs > 0) {
            // if m_maxAttrsInName > 0, sort coefficients by decreasing magnitude
            coeff_inds = Utils.sort(coeff_mags);
        } else {
            // if  m_maxAttrsInName <= 0, use all coeffs in original order
            coeff_inds = new int[m_numAttribs];
            for (int j = 0; j < m_numAttribs; j++)
                coeff_inds[j] = j;
        }
        // build final attName string
        for (int j = 0; j < num_attrs; j++) {
            double coeff_value = m_eigenvectors[coeff_inds[j]][m_sortedEigens[i]];
            if (j > 0 && coeff_value >= 0)
                attName.append("+");
            attName.append(
                    Utils.doubleToString(coeff_value, 5, 3) + m_trainInstances.attribute(coeff_inds[j]).name());
        }
        if (num_attrs < m_numAttribs)
            attName.append("...");

        attributes.addElement(new Attribute(attName.toString()));
        cumulative += m_eigenvalues[m_sortedEigens[i]];

        if ((cumulative / m_sumOfEigenValues) >= m_coverVariance) {
            break;
        }
    }

    if (m_hasClass) {
        attributes.addElement(m_trainHeader.classAttribute().copy());
    }

    Instances outputFormat = new Instances(m_trainInstances.relationName() + "_principal components",
            attributes, 0);

    // set the class to be the last attribute if necessary
    if (m_hasClass) {
        outputFormat.setClassIndex(outputFormat.numAttributes() - 1);
    }

    m_outputNumAtts = outputFormat.numAttributes();
    return outputFormat;
}

From source file:de.uni_potsdam.hpi.bpt.promnicat.analysisModules.clustering.ProcessInstances.java

License:Open Source License

/**
 * Returns the number of distinct values of a given attribute. The value
 * 'missing' is not counted.//from   w  ww.  j ava2 s. co  m
 * 
 * @param attIndex
 *            the attribute (index starts with 0)
 * @return the number of distinct values of a given attribute
 */
// @ requires 0 <= attIndex;
// @ requires attIndex < numAttributes();
public/* @pure@ */int numDistinctValues(int attIndex) {

    if (attribute(attIndex).isNumeric()) {
        double[] attVals = attributeToDoubleArray(attIndex);
        int[] sorted = Utils.sort(attVals);
        double prev = 0;
        int counter = 0;
        for (int i = 0; i < sorted.length; i++) {
            ProcessInstance current = getInstance(sorted[i]);
            if (current.isMissing(attIndex)) {
                break;
            }
            if ((i == 0) || (current.value(attIndex) > prev)) {
                prev = current.value(attIndex);
                counter++;
            }
        }
        return counter;
    }

    //same for string values
    String[] strAttVals = attributeToStringArray(attIndex);
    int[] sorted = new int[strAttVals.length];
    for (int i = 0; i < strAttVals.length; i++) {
        sorted[i] = i;
    }
    String prev = "";
    int counter = 0;
    for (int i = 0; i < sorted.length; i++) {
        ProcessInstance current = getInstance(sorted[i]);
        if (current.isMissing(attIndex)) {
            break;
        }
        if ((i == 0) || (!current.stringValue(attIndex).equals(prev))) {
            prev = current.stringValue(attIndex);
            counter++;
        }
    }
    return counter;
}

From source file:faster_pca.faster_pca.java

License:Open Source License

/**
* Initializes the filter with the given input data.
* 
* @param instances the data to process//from  w  w w .  j av  a2s.c om
* @throws Exception in case the processing goes wrong
* @see #batchFinished()
*/
protected void setup(Instances instances) throws Exception {
    int i;
    int j;
    Vector<Integer> deleteCols;
    int[] todelete;
    double[][] v;
    Matrix corr;
    EigenvalueDecomposition eig;
    Matrix V;

    m_TrainInstances = new Instances(instances);

    // make a copy of the training data so that we can get the class
    // column to append to the transformed data (if necessary)
    m_TrainCopy = new Instances(m_TrainInstances, 0);

    /*m_ReplaceMissingFilter = new ReplaceMissingValues();
    m_ReplaceMissingFilter.setInputFormat(m_TrainInstances);
    m_TrainInstances = Filter.useFilter(m_TrainInstances,
      m_ReplaceMissingFilter);*/

    m_NominalToBinaryFilter = new NominalToBinary();
    m_NominalToBinaryFilter.setInputFormat(m_TrainInstances);
    m_TrainInstances = Filter.useFilter(m_TrainInstances, m_NominalToBinaryFilter);

    // delete any attributes with only one distinct value or are all missing
    deleteCols = new Vector<Integer>();
    /*for (i = 0; i < m_TrainInstances.numAttributes(); i++) {
      if (m_TrainInstances.numDistinctValues(i) <= 1) {
        deleteCols.addElement(i);
      }
    }*/

    if (m_TrainInstances.classIndex() >= 0) {
        // get rid of the class column
        m_HasClass = true;
        m_ClassIndex = m_TrainInstances.classIndex();
        deleteCols.addElement(new Integer(m_ClassIndex));
    }

    // remove columns from the data if necessary
    if (deleteCols.size() > 0) {
        m_AttributeFilter = new Remove();
        todelete = new int[deleteCols.size()];
        for (i = 0; i < deleteCols.size(); i++) {
            todelete[i] = (deleteCols.elementAt(i)).intValue();
        }
        m_AttributeFilter.setAttributeIndicesArray(todelete);
        m_AttributeFilter.setInvertSelection(false);
        m_AttributeFilter.setInputFormat(m_TrainInstances);
        m_TrainInstances = Filter.useFilter(m_TrainInstances, m_AttributeFilter);
    }

    // can evaluator handle the processed data ? e.g., enough attributes?
    getCapabilities().testWithFail(m_TrainInstances);

    m_NumInstances = m_TrainInstances.numInstances();
    m_NumAttribs = m_TrainInstances.numAttributes();

    // fillCorrelation();
    fillCovariance();

    // get eigen vectors/values
    corr = new Matrix(m_Correlation);
    eig = corr.eig();
    V = eig.getV();
    v = new double[m_NumAttribs][m_NumAttribs];
    for (i = 0; i < v.length; i++) {
        for (j = 0; j < v[0].length; j++) {
            v[i][j] = V.get(i, j);
        }
    }
    m_Eigenvectors = v.clone();
    m_Eigenvalues = eig.getRealEigenvalues().clone();

    // any eigenvalues less than 0 are not worth anything --- change to 0
    for (i = 0; i < m_Eigenvalues.length; i++) {
        if (m_Eigenvalues[i] < 0) {
            m_Eigenvalues[i] = 0.0;
        }
    }
    m_SortedEigens = Utils.sort(m_Eigenvalues);
    m_SumOfEigenValues = Utils.sum(m_Eigenvalues);

    m_TransformedFormat = determineOutputFormat(m_TrainInstances);
    setOutputFormat(m_TransformedFormat);

    m_TrainInstances = null;
}