Example usage for weka.core Instances attributeToDoubleArray

List of usage examples for weka.core Instances attributeToDoubleArray

Introduction

In this page you can find the example usage for weka.core Instances attributeToDoubleArray.

Prototype


publicdouble[] attributeToDoubleArray(int index) 

Source Link

Document

Gets the value of all instances in this dataset for a particular attribute.

Usage

From source file:meka.classifiers.multilabel.PLST.java

License:Open Source License

/**
 * The method to transform the labels into another set of latent labels,
 * typically a compression method is used, e.g., Boolean matrix decomposition
 * in the case of MLC-BMaD, or matrix multiplication based on SVD for PLST.
 *
 * @param D the instances to transform into new instances with transformed labels. The
 * Instances consist of features and original labels.
 * @return The resulting instances. Instances consist of features and transformed labels.
 *//*ww  w  . j  a  va2 s.  co  m*/
@Override
public Instances transformLabels(Instances D) throws Exception {
    Instances features = this.extractPart(D, false);
    Instances labels = this.extractPart(D, true);

    Matrix labelMatrix = MatrixUtils.instancesToMatrix(labels);

    // first, lets do the preprocessing as in the original implementation
    double[] averages = new double[labels.numAttributes()];

    for (int i = 0; i < labels.numAttributes(); i++) {
        double[] column = labels.attributeToDoubleArray(i);
        double sum = 0.0;
        for (int j = 0; j < column.length; j++) {
            if (column[j] == 1.0) {
                sum += 1.0;
            } else {
                sum += -1;
                // The algorithm needs 1/-1 coding, so let's
                // change the matrix here
                labelMatrix.set(j, i, -1.0);
            }
        }
        averages[i] = sum / column.length;
    }

    double[][] shiftMatrix = new double[1][labels.numAttributes()];

    shiftMatrix[0] = averages;

    // remember shift for prediction
    this.m_Shift = new Matrix(shiftMatrix);

    double[][] shiftTrainMatrix = new double[labels.numInstances()][labels.numAttributes()];

    for (int i = 0; i < labels.numInstances(); i++) {
        shiftTrainMatrix[i] = averages;
    }

    Matrix trainShift = new Matrix(shiftTrainMatrix);

    SingularValueDecomposition svd = new SingularValueDecomposition(labelMatrix.minus(trainShift));

    // The paper uses U here, but the implementation by the authors uses V, so
    // we used V here too.
    m_v = svd.getV();

    //remove columns so only size are left
    double[][] newArr = new double[m_v.getRowDimension()][this.getSize()];

    for (int i = 0; i < newArr.length; i++) {
        for (int j = 0; j < newArr[i].length; j++) {
            newArr[i][j] = m_v.getArray()[i][j];
        }
    }

    m_v = new Matrix(newArr);

    // now the multiplication (last step of the algorithm)
    Matrix compressed = MatrixUtils.instancesToMatrix(labels).times(this.m_v);

    // and transform it to Instances
    ArrayList<Attribute> attinfos = new ArrayList<Attribute>();

    for (int i = 0; i < compressed.getColumnDimension(); i++) {

        Attribute att = new Attribute("att" + i);
        attinfos.add(att);
    }

    // create pattern instances (also used in prediction) note: this is a regression
    // problem now, labels are not binary
    this.m_PatternInstances = new Instances("compressedlabels", attinfos, compressed.getRowDimension());

    // fill result Instances
    Instances result = Instances.mergeInstances(MatrixUtils.matrixToInstances(compressed, m_PatternInstances),
            features);

    result.setClassIndex(this.getSize());
    return result;
}

From source file:meka.core.MatrixUtils.java

License:Open Source License

/**
 * Helper method that transforma an Instances object to a Matrix object.
 *
 * @param inst The Instances to transform.
 * @return  The resulting Matrix object.
 *//*  w  w w .j a  v  a 2 s.c  o m*/
public static Matrix instancesToMatrix(Instances inst) {
    double[][] darr = new double[inst.numInstances()][inst.numAttributes()];
    for (int i = 0; i < inst.numAttributes(); i++) {
        for (int j = 0; j < inst.attributeToDoubleArray(i).length; j++) {
            darr[j][i] = inst.attributeToDoubleArray(i)[j];
        }
    }
    return new Matrix(darr);
}

From source file:ml.dataprocess.CorrelationAttributeEval.java

License:Open Source License

/**
 * Initializes an information gain attribute evaluator. Replaces missing
 * values with means/modes; Deletes instances with missing class values.
 * /*from   w w  w.  j a  v  a 2 s .c  o m*/
 * @param data set of instances serving as training data
 * @throws Exception if the evaluator has not been generated successfully
 */
@Override
public void buildEvaluator(Instances data) throws Exception {
    data = new Instances(data);
    data.deleteWithMissingClass();

    ReplaceMissingValues rmv = new ReplaceMissingValues();
    rmv.setInputFormat(data);
    data = Filter.useFilter(data, rmv);

    int numClasses = data.classAttribute().numValues();
    int classIndex = data.classIndex();
    int numInstances = data.numInstances();
    m_correlations = new double[data.numAttributes()];
    /*
     * boolean hasNominals = false; boolean hasNumerics = false;
     */
    List<Integer> numericIndexes = new ArrayList<Integer>();
    List<Integer> nominalIndexes = new ArrayList<Integer>();
    if (m_detailedOutput) {
        m_detailedOutputBuff = new StringBuffer();
    }

    // TODO for instance weights (folded into computing weighted correlations)
    // add another dimension just before the last [2] (0 for 0/1 binary vector
    // and
    // 1 for corresponding instance weights for the 1's)
    double[][][] nomAtts = new double[data.numAttributes()][][];
    for (int i = 0; i < data.numAttributes(); i++) {
        if (data.attribute(i).isNominal() && i != classIndex) {
            nomAtts[i] = new double[data.attribute(i).numValues()][data.numInstances()];
            Arrays.fill(nomAtts[i][0], 1.0); // set zero index for this att to all
                                             // 1's
            nominalIndexes.add(i);
        } else if (data.attribute(i).isNumeric() && i != classIndex) {
            numericIndexes.add(i);
        }
    }

    // do the nominal attributes
    if (nominalIndexes.size() > 0) {
        for (int i = 0; i < data.numInstances(); i++) {
            Instance current = data.instance(i);
            for (int j = 0; j < current.numValues(); j++) {
                if (current.attribute(current.index(j)).isNominal() && current.index(j) != classIndex) {
                    // Will need to check for zero in case this isn't a sparse
                    // instance (unless we add 1 and subtract 1)
                    nomAtts[current.index(j)][(int) current.valueSparse(j)][i] += 1;
                    nomAtts[current.index(j)][0][i] -= 1;
                }
            }
        }
    }

    if (data.classAttribute().isNumeric()) {
        double[] classVals = data.attributeToDoubleArray(classIndex);

        // do the numeric attributes
        for (Integer i : numericIndexes) {
            double[] numAttVals = data.attributeToDoubleArray(i);
            m_correlations[i] = Utils.correlation(numAttVals, classVals, numAttVals.length);

            if (m_correlations[i] == 1.0) {
                // check for zero variance (useless numeric attribute)
                if (Utils.variance(numAttVals) == 0) {
                    m_correlations[i] = 0;
                }
            }
        }

        // do the nominal attributes
        if (nominalIndexes.size() > 0) {

            // now compute the correlations for the binarized nominal attributes
            for (Integer i : nominalIndexes) {
                double sum = 0;
                double corr = 0;
                double sumCorr = 0;
                double sumForValue = 0;

                if (m_detailedOutput) {
                    m_detailedOutputBuff.append("\n\n").append(data.attribute(i).name());
                }

                for (int j = 0; j < data.attribute(i).numValues(); j++) {
                    sumForValue = Utils.sum(nomAtts[i][j]);
                    corr = Utils.correlation(nomAtts[i][j], classVals, classVals.length);

                    // useless attribute - all instances have the same value
                    if (sumForValue == numInstances || sumForValue == 0) {
                        corr = 0;
                    }
                    if (corr < 0.0) {
                        corr = -corr;
                    }
                    sumCorr += sumForValue * corr;
                    sum += sumForValue;

                    if (m_detailedOutput) {
                        m_detailedOutputBuff.append("\n\t").append(data.attribute(i).value(j)).append(": ");
                        m_detailedOutputBuff.append(Utils.doubleToString(corr, 6));
                    }
                }
                m_correlations[i] = (sum > 0) ? sumCorr / sum : 0;
            }
        }
    } else {
        // class is nominal
        // TODO extra dimension for storing instance weights too
        double[][] binarizedClasses = new double[data.classAttribute().numValues()][data.numInstances()];

        // this is equal to the number of instances for all inst weights = 1
        double[] classValCounts = new double[data.classAttribute().numValues()];

        for (int i = 0; i < data.numInstances(); i++) {
            Instance current = data.instance(i);
            binarizedClasses[(int) current.classValue()][i] = 1;
        }
        for (int i = 0; i < data.classAttribute().numValues(); i++) {
            classValCounts[i] = Utils.sum(binarizedClasses[i]);
        }

        double sumClass = Utils.sum(classValCounts);

        // do numeric attributes first
        if (numericIndexes.size() > 0) {
            for (Integer i : numericIndexes) {
                double[] numAttVals = data.attributeToDoubleArray(i);
                double corr = 0;
                double sumCorr = 0;

                for (int j = 0; j < data.classAttribute().numValues(); j++) {
                    corr = Utils.correlation(numAttVals, binarizedClasses[j], numAttVals.length);
                    if (corr < 0.0) {
                        corr = -corr;
                    }

                    if (corr == 1.0) {
                        // check for zero variance (useless numeric attribute)
                        if (Utils.variance(numAttVals) == 0) {
                            corr = 0;
                        }
                    }

                    sumCorr += classValCounts[j] * corr;
                }
                m_correlations[i] = sumCorr / sumClass;
            }
        }

        if (nominalIndexes.size() > 0) {
            for (Integer i : nominalIndexes) {
                if (m_detailedOutput) {
                    m_detailedOutputBuff.append("\n\n").append(data.attribute(i).name());
                }

                double sumForAtt = 0;
                double corrForAtt = 0;
                for (int j = 0; j < data.attribute(i).numValues(); j++) {
                    double sumForValue = Utils.sum(nomAtts[i][j]);
                    double corr = 0;
                    double sumCorr = 0;
                    double avgCorrForValue = 0;

                    sumForAtt += sumForValue;
                    for (int k = 0; k < numClasses; k++) {

                        // corr between value j and class k
                        corr = Utils.correlation(nomAtts[i][j], binarizedClasses[k],
                                binarizedClasses[k].length);

                        // useless attribute - all instances have the same value
                        if (sumForValue == numInstances || sumForValue == 0) {
                            corr = 0;
                        }
                        if (corr < 0.0) {
                            corr = -corr;
                        }
                        sumCorr += classValCounts[k] * corr;
                    }
                    avgCorrForValue = sumCorr / sumClass;
                    corrForAtt += sumForValue * avgCorrForValue;

                    if (m_detailedOutput) {
                        m_detailedOutputBuff.append("\n\t").append(data.attribute(i).value(j)).append(": ");
                        m_detailedOutputBuff.append(Utils.doubleToString(avgCorrForValue, 6));
                    }
                }

                // the weighted average corr for att i as
                // a whole (wighted by value frequencies)
                m_correlations[i] = (sumForAtt > 0) ? corrForAtt / sumForAtt : 0;
            }
        }
    }

    if (m_detailedOutputBuff != null && m_detailedOutputBuff.length() > 0) {
        m_detailedOutputBuff.append("\n");
    }
}

From source file:mlda.attributes.AvgAbsoluteCorrelationBetweenNumericAttributes.java

License:Open Source License

/**
 * Calculate metric value/*from  www.  j a v a2  s  . c  om*/
 * 
 * @param mlData Multi-label dataset to which calculate the metric
 * @return Value of the metric
 */
public double calculate(MultiLabelInstances mlData) {
    Instances instances = mlData.getDataSet();

    int numInstances = mlData.getNumInstances();

    double res = 0.0;
    int count = 0;

    int[] featureIndices = mlData.getFeatureIndices();

    Vector<Integer> numericFeatureIndices = new Vector<>();
    for (int fIndex : featureIndices) {
        if (instances.attribute(fIndex).isNumeric()) {
            numericFeatureIndices.add(fIndex);
        }
    }

    if (numericFeatureIndices.size() <= 0) {
        return Double.NaN;
    }

    double[][] attributesToDoubleArray = new double[numericFeatureIndices.size()][numInstances];
    for (int fIndex : numericFeatureIndices) {
        attributesToDoubleArray[fIndex] = instances.attributeToDoubleArray(fIndex);
    }

    for (int fIndex1 : numericFeatureIndices) {
        for (int fIndex2 = fIndex1 + 1; fIndex2 < numericFeatureIndices.size(); fIndex2++) {
            count++;
            res += Utils.correlation(attributesToDoubleArray[fIndex1], attributesToDoubleArray[fIndex2],
                    numInstances);
        }
    }

    if (count > 0) {
        this.value = res / count;
    } else {
        this.value = Double.NaN;
    }

    //this.value = res/count;
    return value;
}

From source file:mlda.attributes.ProportionNumericAttributesWithOutliers.java

License:Open Source License

/**
 * Calculate metric value//  w  w w. ja  va  2s .c  om
 * 
 * @param mlData Multi-label dataset to which calculate the metric
 * @return Value of the metric
 */
public double calculate(MultiLabelInstances mlData) {
    Instances instances = mlData.getDataSet();
    int nInstances = mlData.getNumInstances();

    double alpha = 0.05;
    int numToTrimAtSide = (int) (nInstances * alpha / 2);
    int nNumeric = 0;
    int nOutliers = 0;
    Set<Attribute> attributeSet = mlData.getFeatureAttributes();

    double variance, varianceTrimmed;
    double[] values;
    double[] trimmed = new double[nInstances - (numToTrimAtSide * 2)];
    double ratio;

    for (Attribute att : attributeSet) {
        if (att.isNumeric()) {
            nNumeric++;
            variance = instances.variance(att);
            values = instances.attributeToDoubleArray(att.index());
            Arrays.sort(values);

            for (int i = 0; i < trimmed.length; i++) {
                trimmed[i] = values[i + numToTrimAtSide];
            }
            varianceTrimmed = Utils.variance(trimmed);
            ratio = varianceTrimmed / variance;

            if (ratio < 0.7) {
                nOutliers++;
            }
        }
    }

    if (nNumeric > 0) {
        this.value = ((double) nOutliers) / nNumeric;
    } else {
        this.value = Double.NaN;
    }

    return value;
}

From source file:myJ48.MyJ48.java

public Instances NumericToNominalByThreshold(Instances numericSet, int idx_attribute, double threshold)
        throws Exception {
    double[] values;
    Instances NominalizedSet = new Instances(numericSet);
    //System.out.println("number of instances: " + NominalizedSet.numInstances());
    values = numericSet.attributeToDoubleArray(idx_attribute);
    List<String> nominalValue = new ArrayList<String>();
    nominalValue.add("low");
    nominalValue.add("high");
    Attribute nominalAttrib = new Attribute(numericSet.attribute(idx_attribute).name() + "_nominal",
            nominalValue);/*from   w w  w .j  a  v  a  2 s.  co  m*/
    NominalizedSet.insertAttributeAt(nominalAttrib, idx_attribute);
    for (int i = 0; i < values.length; i++) {
        if (values[i] <= threshold) {
            NominalizedSet.instance(i).setValue(idx_attribute, "low");
        } else {
            NominalizedSet.instance(i).setValue(idx_attribute, "high");
        }
    }
    String[] options = { "-R", String.valueOf(idx_attribute + 2) };
    Filter remove = (Filter) Class.forName("weka.filters.unsupervised.attribute.Remove").newInstance();
    ((OptionHandler) remove).setOptions(options);
    remove.setInputFormat(NominalizedSet);
    NominalizedSet = Filter.useFilter(NominalizedSet, remove);

    return NominalizedSet;
}

From source file:org.opentox.jaqpot3.qsar.util.WekaInstancesProcess.java

License:Open Source License

private static double normedValue(Instances dataInst, int attributeIndex) {

    double[] attrValues = dataInst.attributeToDoubleArray(attributeIndex);
    StandardDeviation std = new StandardDeviation();
    return std.evaluate(attrValues);
}

From source file:org.sr.recognition.paleo.paleoNN.PaleoTrainer.java

License:BSD License

/**
 * Converts ARFF instances into a format readable by LibSVM and outputs it
 * to file/*from w ww . j a v a 2 s  .  com*/
 * 
 * @param filename
 *            output file name
 * @param data
 *            ARFF instances (the data)
 * @throws IOException
 *             if output fails
 */
public static void libSVMToFile(String filename, Instances data) throws IOException {
    BufferedWriter writer = new BufferedWriter(new FileWriter(filename));
    for (int i = 0; i < data.numInstances(); i++) {
        Instance inst = data.instance(i);
        double label = inst.value(inst.numValues() - 1);
        writer.write(label + "\t");
        for (int a = 0; a < inst.numValues() - 1; a++) {
            double value = inst.value(a);
            if (Double.isInfinite(value) || Double.isNaN(value))
                value = mean(data.attributeToDoubleArray(a));
            writer.write((a + 1) + ":" + value);
            if (a < inst.numValues() - 2)
                writer.write(" ");
        }
        writer.newLine();
    }
}

From source file:org.uclab.mm.kcl.ddkat.datapreprocessor.MissingValueHandler.java

License:Apache License

/**
 * Method to replace the identified missing values.
 *
 * @throws Exception the exception//  ww w. j a  va 2s. co m
 */
public void replaceMissingValues() throws Exception {

    this.confirmationMessage = new ArrayList<String>();

    Instances outputData;

    String inputFile = BASE_DIR + "OriginalDataSet.csv";

    // load CSV file
    CSVLoader fileLoader = new CSVLoader();
    fileLoader.setSource(new File(inputFile));
    outputData = fileLoader.getDataSet();

    int numInstances = outputData.numInstances();
    int numAttributes = outputData.numAttributes();

    final int NON_NUMERIC = -1;
    int[] m_AttributeIndices = null;

    Range m_Attributes = new Range("first-last");

    // attributes must be numeric
    m_Attributes.setUpper(outputData.numAttributes() - 1);
    m_AttributeIndices = m_Attributes.getSelection();

    for (int i = 0; i < m_AttributeIndices.length; i++) {
        // ignore class
        if (m_AttributeIndices[i] == outputData.classIndex()) {
            m_AttributeIndices[i] = NON_NUMERIC;
            continue;
        }
        // not numeric -> ignore it
        if (!outputData.attribute(m_AttributeIndices[i]).isNumeric())
            m_AttributeIndices[i] = NON_NUMERIC;
    }

    double sum;
    int missingCounter;
    double attributeMean;

    // identify the missing values               
    for (int attributeIndex = 0; attributeIndex < numAttributes; attributeIndex++) {

        // non-numeric attribute?
        if (m_AttributeIndices[attributeIndex] == NON_NUMERIC) {
            continue;
        }

        double tempArr[] = outputData.attributeToDoubleArray(attributeIndex);
        sum = 0;
        missingCounter = 0;
        for (int i = 0; i < tempArr.length; i++) {
            sum = sum + tempArr[i];
            if (tempArr[i] == 0)
                missingCounter++;
        }

        attributeMean = sum / (numInstances - missingCounter);

        for (int instanceIndex = 0; instanceIndex < numInstances; instanceIndex++) {

            // replace the missing values with attribute mean values
            if (outputData.instance(instanceIndex).value(attributeIndex) == 0) {
                outputData.instance(instanceIndex).setValue(attributeIndex, attributeMean);
            }
        }
    }

    outputData.deleteAttributeAt(outputData.numAttributes() - 1);
    outputData.deleteAttributeAt(outputData.numAttributes() - 1);

    saveFilledData(inputFile, outputData);

}

From source file:prismcrossvalidation.Preview.java

/**
 * method to write arff data into s.o.p.
 * @throws IOException //from w  ww .  j a  v a2s.c  o m
 */

public static void showData() throws IOException {

    String source = MainWindow.pathChooseField.getText();
    Instances data = DataLoad.loadData(source.replace("\\", "/"));
    data.setClassIndex(data.numAttributes() - 1);
    String field = "";
    for (int i = 0; i < data.numAttributes(); i++) {
        // Print the current attribute.
        System.out.print(data.attribute(i).name() + ": ");
        previewTextArea.append("\n" + data.attribute(i).name() + ": ");
        // Print the values associated with the current attribute.
        double[] values = data.attributeToDoubleArray(i);

        System.out.println(Arrays.toString(values));
        previewTextArea.append(Arrays.toString(values));
    }
}