Example usage for weka.core SparseInstance SparseInstance

List of usage examples for weka.core SparseInstance SparseInstance

Introduction

In this page you can find the example usage for weka.core SparseInstance SparseInstance.

Prototype

public SparseInstance(double weight, double[] attValues) 

Source Link

Document

Constructor that generates a sparse instance from the given parameters.

Usage

From source file:PrincipalComponents.java

License:Open Source License

/**
 * Convert a pc transformed instance back to the original space
 *
 * @param inst the instance to convert/*  www . ja  va2s .  c  o m*/
 * @return the processed instance
 * @throws Exception if something goes wrong
 */
private Instance convertInstanceToOriginal(Instance inst) throws Exception {
    double[] newVals = null;

    if (m_hasClass) {
        newVals = new double[m_numAttribs + 1];
    } else {
        newVals = new double[m_numAttribs];
    }

    if (m_hasClass) {
        // class is always appended as the last attribute
        newVals[m_numAttribs] = inst.value(inst.numAttributes() - 1);
    }

    for (int i = 0; i < m_eTranspose[0].length; i++) {
        double tempval = 0.0;
        for (int j = 1; j < m_eTranspose.length; j++) {
            tempval += (m_eTranspose[j][i] * inst.value(j - 1));
        }
        newVals[i] = tempval;
        if (!m_center) {
            newVals[i] *= m_stdDevs[i];
        }
        newVals[i] += m_means[i];
    }

    if (inst instanceof SparseInstance) {
        return new SparseInstance(inst.weight(), newVals);
    } else {
        return new DenseInstance(inst.weight(), newVals);
    }
}

From source file:PrincipalComponents.java

License:Open Source License

/**
 * Transform an instance in original (unormalized) format. Convert back to
 * the original space if requested./*from  ww w  . ja  va  2 s .  com*/
 *
 * @param instance an instance in the original (unormalized) format
 * @return a transformed instance
 * @throws Exception if instance cant be transformed
 */
@Override
public Instance convertInstance(Instance instance) throws Exception {

    if (m_eigenvalues == null) {
        throw new Exception("convertInstance: Principal components not " + "built yet");
    }

    double[] newVals = new double[m_outputNumAtts];
    Instance tempInst = (Instance) instance.copy();
    if (!instance.dataset().equalHeaders(m_trainHeader)) {
        throw new Exception("Can't convert instance: header's don't match: " + "PrincipalComponents\n"
                + instance.dataset().equalHeadersMsg(m_trainHeader));
    }

    m_replaceMissingFilter.input(tempInst);
    m_replaceMissingFilter.batchFinished();
    tempInst = m_replaceMissingFilter.output();

    /*
     * if (m_normalize) { m_normalizeFilter.input(tempInst);
     * m_normalizeFilter.batchFinished(); tempInst =
     * m_normalizeFilter.output(); }
     */

    m_nominalToBinFilter.input(tempInst);
    m_nominalToBinFilter.batchFinished();
    tempInst = m_nominalToBinFilter.output();

    if (m_attributeFilter != null) {
        m_attributeFilter.input(tempInst);
        m_attributeFilter.batchFinished();
        tempInst = m_attributeFilter.output();
    }

    if (!m_center) {
        m_standardizeFilter.input(tempInst);
        m_standardizeFilter.batchFinished();
        tempInst = m_standardizeFilter.output();
    } else {
        m_centerFilter.input(tempInst);
        m_centerFilter.batchFinished();
        tempInst = m_centerFilter.output();
    }

    if (m_hasClass) {
        newVals[m_outputNumAtts - 1] = instance.value(instance.classIndex());
    }

    double cumulative = 0;
    int numAttAdded = 0;
    for (int i = m_numAttribs - 1; i >= 0; i--) {
        double tempval = 0.0;
        for (int j = 0; j < m_numAttribs; j++) {
            tempval += (m_eigenvectors[j][m_sortedEigens[i]] * tempInst.value(j));
        }
        newVals[m_numAttribs - i - 1] = tempval;
        cumulative += m_eigenvalues[m_sortedEigens[i]];
        if ((cumulative / m_sumOfEigenValues) >= m_coverVariance) {
            break;
        }
        if (numAttAdded > m_maxNumAttr) {
            break;
        }
        numAttAdded++;
    }

    if (!m_transBackToOriginal) {
        if (instance instanceof SparseInstance) {
            return new SparseInstance(instance.weight(), newVals);
        } else {
            return new DenseInstance(instance.weight(), newVals);
        }
    } else {
        if (instance instanceof SparseInstance) {
            return convertInstanceToOriginal(new SparseInstance(instance.weight(), newVals));
        } else {
            return convertInstanceToOriginal(new DenseInstance(instance.weight(), newVals));
        }
    }
}

From source file:MPCKMeans.java

License:Open Source License

/** M-step of the KMeans clustering algorithm -- updates cluster centroids
 *//*www . j  a v a  2 s. co m*/
protected void updateClusterCentroids() throws Exception {
    Instances[] tempI = new Instances[m_NumClusters];
    Instances tempCentroids = m_ClusterCentroids;
    Instances tempNewCentroids = new Instances(m_Instances, m_NumClusters);
    m_ClusterCentroids = new Instances(m_Instances, m_NumClusters);

    // tempI[i] stores the cluster instances for cluster i
    for (int i = 0; i < m_NumClusters; i++) {
        tempI[i] = new Instances(m_Instances, 0);
    }
    for (int i = 0; i < m_Instances.numInstances(); i++) {
        tempI[m_ClusterAssignments[i]].add(m_Instances.instance(i));
    }

    // Calculates cluster centroids
    for (int i = 0; i < m_NumClusters; i++) {
        double[] values = new double[m_Instances.numAttributes()];
        Instance centroid = null;

        if (m_isSparseInstance) { // uses fast meanOrMode
            values = ClusterUtils.meanOrMode(tempI[i]);
            centroid = new SparseInstance(1.0, values);
        } else { // non-sparse, go through each attribute
            for (int j = 0; j < m_Instances.numAttributes(); j++) {
                values[j] = tempI[i].meanOrMode(j); // uses usual meanOrMode
            }
            centroid = new Instance(1.0, values);
        }

        //        // debugging:  compare  previous centroid w/current:
        //        double w = 0; 
        //        for (int j = 0; j < m_Instances.numAttributes(); j++)  w += values[j] * values[j];
        //        double w1 = 0; 
        //        for (int j = 0; j < m_Instances.numAttributes(); j++)  w1 += tempCentroids.instance(i).value(j) * tempCentroids.instance(i).value(j);

        //        System.out.println("\tOldCentroid=" + w1);
        //        System.out.println("\tNewCentroid=" + w); 
        //        double prevObj = 0, currObj = 0;
        //        for (int j = 0; j < tempI[i].numInstances(); j++) {
        //     Instance instance = tempI[i].instance(j);
        //     double prevPen = m_metrics[i].penalty(instance, tempCentroids.instance(i));
        //     double currPen = m_metrics[i].penalty(instance, centroid);
        //     prevObj += prevPen;
        //     currObj += currPen; 
        //     //System.out.println("\t\t" + j + " " + prevPen + " -> " + currPen + "\t" + prevObj + " -> " + currObj); 
        //        }
        //        // dump instances out if there is a problem.
        //        System.out.println("\t\t" + prevObj + " -> " + currObj); 
        //        if (currObj > prevObj) {

        //     PrintWriter out = new PrintWriter(new BufferedOutputStream(new FileOutputStream("/tmp/INST.arff")), true);
        //     out.println(new Instances(tempI[i], 0));
        //     out.println(centroid);
        //     out.println(tempCentroids.instance(i)); 
        //     for (int j = 0; j < tempI[i].numInstances(); j++) {
        //       out.println(tempI[i].instance(j));
        //     }
        //     out.close();
        //     System.out.println("  Updated cluster " + i + "("
        //              + tempI[i].numInstances());
        //     System.exit(0); 
        //        } 

        // if we are using a smoothing metric, smooth the centroids
        if (m_metric instanceof SmoothingMetric && ((SmoothingMetric) m_metric).getUseSmoothing()) {
            System.out.println("\tSmoothing...");
            SmoothingMetric smoothingMetric = (SmoothingMetric) m_metric;
            centroid = smoothingMetric.smoothInstance(centroid);
        }

        //   DEBUGGING:  replaced line under with block below
        m_ClusterCentroids.add(centroid);
        //        {
        //     tempNewCentroids.add(centroid);
        //     m_ClusterCentroids.delete(); 
        //     for (int j = 0; j <= i; j++) {
        //       m_ClusterCentroids.add(tempNewCentroids.instance(j));
        //     }
        //     for (int j = i+1; j < m_NumClusters; j++) {
        //       m_ClusterCentroids.add(tempCentroids.instance(j));
        //     } 
        //     double objBackup = m_Objective;
        //     System.out.println("  Updated cluster " + i + "("
        //              + tempI[i].numInstances() + "); obj=" +
        //              calculateObjectiveFunction(false));
        //     m_Objective = objBackup;
        //        }

        // in SPKMeans, cluster centroids need to be normalized
        if (m_metric.doesNormalizeData()) {
            m_metric.normalizeInstanceWeighted(m_ClusterCentroids.instance(i));
        }
    }

    if (m_metric instanceof SmoothingMetric && ((SmoothingMetric) m_metric).getUseSmoothing())
        updateSmoothingMetrics();

    for (int i = 0; i < m_NumClusters; i++)
        tempI[i] = null; // free memory
}

From source file:adams.data.conversion.AbstractMatchWekaInstanceAgainstHeader.java

License:Open Source License

/**
 * Matches the input instance against the header.
 *
 * @param input   the Instance to align to the header
 * @return      the aligned Instance/*from   w  w  w  . j a  v  a 2  s .c  o m*/
 */
protected Instance match(Instance input) {
    Instance result;
    double[] values;
    int i;

    values = new double[m_Dataset.numAttributes()];
    for (i = 0; i < m_Dataset.numAttributes(); i++) {
        values[i] = Utils.missingValue();
        switch (m_Dataset.attribute(i).type()) {
        case Attribute.NUMERIC:
        case Attribute.DATE:
            values[i] = input.value(i);
            break;
        case Attribute.NOMINAL:
            if (m_Dataset.attribute(i).indexOfValue(input.stringValue(i)) != -1)
                values[i] = m_Dataset.attribute(i).indexOfValue(input.stringValue(i));
            break;
        case Attribute.STRING:
            values[i] = m_Dataset.attribute(i).addStringValue(input.stringValue(i));
            break;
        case Attribute.RELATIONAL:
            values[i] = m_Dataset.attribute(i).addRelation(input.relationalValue(i));
            break;
        default:
            throw new IllegalStateException(
                    "Unhandled attribute type: " + Attribute.typeToString(m_Dataset.attribute(i).type()));
        }
    }

    if (input instanceof SparseInstance)
        result = new SparseInstance(input.weight(), values);
    else
        result = new DenseInstance(input.weight(), values);
    result.setDataset(m_Dataset);

    // fix class index, if necessary
    if ((input.classIndex() != m_Dataset.classIndex()) && (m_Dataset.classIndex() < 0))
        m_Dataset.setClassIndex(input.classIndex());

    return result;
}

From source file:adams.flow.transformer.WekaInstanceBuffer.java

License:Open Source License

/**
 * Executes the flow item./*from   w ww .  j  a  va2 s  .c o  m*/
 *
 * @return      null if everything is fine, otherwise error message
 */
@Override
protected String doExecute() {
    String result;
    Instance[] insts;
    Instance inst;
    double[] values;
    int i;
    int n;
    boolean updated;

    result = null;

    if (m_Operation == Operation.INSTANCE_TO_INSTANCES) {
        if (m_InputToken.getPayload() instanceof Instance) {
            insts = new Instance[] { (Instance) m_InputToken.getPayload() };
        } else {
            insts = (Instance[]) m_InputToken.getPayload();
        }

        for (n = 0; n < insts.length; n++) {
            inst = insts[n];

            if ((m_Buffer != null) && m_CheckHeader) {
                if (!m_Buffer.equalHeaders(inst.dataset())) {
                    getLogger().info("Header changed, resetting buffer");
                    m_Buffer = null;
                }
            }

            // buffer instance
            if (m_Buffer == null)
                m_Buffer = new Instances(inst.dataset(), 0);

            // we need to make sure that string and relational values are in our
            // buffer header and update the current Instance accordingly before
            // buffering it
            values = inst.toDoubleArray();
            updated = false;
            for (i = 0; i < values.length; i++) {
                if (inst.isMissing(i))
                    continue;
                if (inst.attribute(i).isString()) {
                    values[i] = m_Buffer.attribute(i).addStringValue(inst.stringValue(i));
                    updated = true;
                } else if (inst.attribute(i).isRelationValued()) {
                    values[i] = m_Buffer.attribute(i).addRelation(inst.relationalValue(i));
                    updated = true;
                }
            }

            if (updated) {
                if (inst instanceof SparseInstance) {
                    inst = new SparseInstance(inst.weight(), values);
                } else if (inst instanceof BinarySparseInstance) {
                    inst = new BinarySparseInstance(inst.weight(), values);
                } else {
                    if (!(inst instanceof DenseInstance)) {
                        getLogger().severe("Unhandled instance class (" + inst.getClass().getName() + "), "
                                + "defaulting to " + DenseInstance.class.getName());
                    }
                    inst = new DenseInstance(inst.weight(), values);
                }
            } else {
                inst = (Instance) inst.copy();
            }

            m_Buffer.add(inst);
        }

        if (m_Buffer.numInstances() % m_Interval == 0) {
            m_OutputToken = new Token(m_Buffer);
            if (m_ClearBuffer)
                m_Buffer = null;
        }
    } else if (m_Operation == Operation.INSTANCES_TO_INSTANCE) {
        m_Buffer = (Instances) m_InputToken.getPayload();
        m_Iterator = m_Buffer.iterator();
    } else {
        throw new IllegalStateException("Unhandled operation: " + m_Operation);
    }

    return result;
}

From source file:adams.flow.transformer.WekaInstancesAppend.java

License:Open Source License

/**
 * Executes the flow item.//from   w w  w.  java 2s  .c o  m
 *
 * @return      null if everything is fine, otherwise error message
 */
@Override
protected String doExecute() {
    String result;
    String[] filesStr;
    File[] files;
    int i;
    int n;
    Instances[] inst;
    Instances full;
    String msg;
    StringBuilder relation;
    double[] values;

    result = null;

    // get filenames
    files = null;
    inst = null;
    if (m_InputToken.getPayload() instanceof String[]) {
        filesStr = (String[]) m_InputToken.getPayload();
        files = new File[filesStr.length];
        for (i = 0; i < filesStr.length; i++)
            files[i] = new PlaceholderFile(filesStr[i]);
    } else if (m_InputToken.getPayload() instanceof File[]) {
        files = (File[]) m_InputToken.getPayload();
    } else if (m_InputToken.getPayload() instanceof Instances[]) {
        inst = (Instances[]) m_InputToken.getPayload();
    } else {
        throw new IllegalStateException("Unhandled input type: " + m_InputToken.getPayload().getClass());
    }

    // load data?
    if (files != null) {
        inst = new Instances[files.length];
        for (i = 0; i < files.length; i++) {
            try {
                inst[i] = DataSource.read(files[i].getAbsolutePath());
            } catch (Exception e) {
                result = handleException("Failed to load dataset: " + files[i], e);
                break;
            }
        }
    }

    // test compatibility
    if (result == null) {
        for (i = 0; i < inst.length - 1; i++) {
            for (n = i + 1; n < inst.length; n++) {
                if ((msg = inst[i].equalHeadersMsg(inst[n])) != null) {
                    result = "Dataset #" + (i + 1) + " and #" + (n + 1) + " are not compatible:\n" + msg;
                    break;
                }
            }
            if (result != null)
                break;
        }
    }

    // append
    if (result == null) {
        full = new Instances(inst[0]);
        relation = new StringBuilder(inst[0].relationName());
        for (i = 1; i < inst.length; i++) {
            relation.append("+" + inst[i].relationName());
            for (Instance row : inst[i]) {
                values = row.toDoubleArray();
                for (n = 0; n < values.length; n++) {
                    if (row.attribute(n).isString())
                        values[n] = full.attribute(n).addStringValue(row.stringValue(n));
                    else if (row.attribute(n).isRelationValued())
                        values[n] = full.attribute(n).addRelation(row.relationalValue(n));
                }
                if (row instanceof SparseInstance)
                    row = new SparseInstance(row.weight(), values);
                else
                    row = new DenseInstance(row.weight(), values);
                full.add(row);
            }
        }
        full.setRelationName(relation.toString());
        m_OutputToken = new Token(full);
    }

    return result;
}

From source file:cluster.ABC.ClusterUtils.java

License:Open Source License

/** Finds sum of 2 instances (handles sparse and non-sparse)
 *///w  w w .ja  va 2  s  .c  o  m

public static Instance sumInstances(Instance inst1, Instance inst2, Instances m_Instances) throws Exception {
    int numAttributes = inst1.numAttributes();
    if (inst2.numAttributes() != numAttributes) {
        throw new Exception("Error!! inst1 and inst2 should have same number of attributes.");
    }
    double weight1 = inst1.weight(), weight2 = inst2.weight();
    double[] values = new double[numAttributes];

    for (int i = 0; i < numAttributes; i++) {
        values[i] = 0;
    }

    if (inst1 instanceof SparseInstance && inst2 instanceof SparseInstance) {
        for (int i = 0; i < inst1.numValues(); i++) {
            int indexOfIndex = inst1.index(i);
            values[indexOfIndex] = inst1.valueSparse(i);
        }
        for (int i = 0; i < inst2.numValues(); i++) {
            int indexOfIndex = inst2.index(i);
            values[indexOfIndex] += inst2.valueSparse(i);
        }
        SparseInstance newInst = new SparseInstance(weight1 + weight2, values);
        newInst.setDataset(m_Instances);
        return newInst;
    } else if (!(inst1 instanceof SparseInstance) && !(inst2 instanceof SparseInstance)) {
        for (int i = 0; i < numAttributes; i++) {
            values[i] = inst1.value(i) + inst2.value(i);
        }
    } else {
        throw new Exception("Error!! inst1 and inst2 should be both of same type -- sparse or non-sparse");
    }
    Instance newInst = new Instance(weight1 + weight2, values);
    newInst.setDataset(m_Instances);
    return newInst;
}

From source file:com.rokittech.ml.server.utils.MLUtils.java

License:Open Source License

public static Instance toTesInstance(MLInstance mlInstance, List<String> features) {
    return new SparseInstance(1, toTestAttribute(mlInstance, features));
}

From source file:com.rokittech.ml.server.utils.MLUtils.java

License:Open Source License

public static Instance toInstance(MLInstance mlInstance, List<String> features) {
    return new SparseInstance(1, toAttribute(mlInstance, features));
}

From source file:data.generation.target.utils.PrincipalComponents.java

License:Open Source License

/**
 * Convert a pc transformed instance back to the original space
 * //  w  w  w. j a v  a2s .  c om
 * @param inst        the instance to convert
 * @return            the processed instance
 * @throws Exception  if something goes wrong
 */
private Instance convertInstanceToOriginal(Instance inst) throws Exception {
    double[] newVals = null;

    if (m_hasClass) {
        newVals = new double[m_numAttribs + 1];
    } else {
        newVals = new double[m_numAttribs];
    }

    if (m_hasClass) {
        // class is always appended as the last attribute
        newVals[m_numAttribs] = inst.value(inst.numAttributes() - 1);
    }

    for (int i = 0; i < m_eTranspose[0].length; i++) {
        double tempval = 0.0;
        for (int j = 1; j < m_eTranspose.length; j++) {
            tempval += (m_eTranspose[j][i] * inst.value(j - 1));
        }
        newVals[i] = tempval;
        if (!m_center) {
            newVals[i] *= m_stdDevs[i];
        }
        newVals[i] += m_means[i];
    }

    if (inst instanceof SparseInstance) {
        return new SparseInstance(inst.weight(), newVals);
    } else {
        return new Instance(inst.weight(), newVals);
    }
}