Example usage for weka.core Instance numValues

Introduction

In this page you can find the example usage for weka.core Instance numValues.

Prototype

public int numValues();

Source Link

Document

Returns the number of values present in a sparse representation.

Usage

From source file:MPCKMeans.java

License:Open Source License

/** Actual KMeans function */
protected void runKMeans() throws Exception {
    boolean converged = false;
    m_Iterations = 0;/*from  w  ww . j  a v a 2 s .c o m*/
    m_numBlankIterations = 0;
    m_Objective = Double.POSITIVE_INFINITY;

    if (!m_isOfflineMetric) {
        if (m_useMultipleMetrics) {
            for (int i = 0; i < m_metrics.length; i++) {
                m_metrics[i].resetMetric();
                m_metricLearners[i].resetLearner();
            }
        } else {
            m_metric.resetMetric();
            m_metricLearner.resetLearner();
        }
        // initialize max CL penalties
        if (m_ConstraintsHash.size() > 0) {
            m_maxCLPenalties = calculateMaxCLPenalties();
        }
    }

    // initialize m_ClusterAssignments
    for (int i = 0; i < m_NumClusters; i++) {
        m_ClusterAssignments[i] = -1;
    }

    PrintStream fincoh = null;
    if (m_ConstraintIncoherenceFile != null) {
        fincoh = new PrintStream(new FileOutputStream(m_ConstraintIncoherenceFile));
    }

    while (!converged) {
        System.out.println("\n" + m_Iterations + ". Objective function: " + ((float) m_Objective));
        m_OldObjective = m_Objective;

        // E-step
        int numMovedPoints = findBestAssignments();

        m_numBlankIterations = (numMovedPoints == 0) ? m_numBlankIterations + 1 : 0;

        //      calculateObjectiveFunction(false);
        System.out.println((float) m_Objective + " - Objective function after point assignment(CALC)");
        System.out.println("\tvar=" + ((float) m_objVariance) + "\tC=" + ((float) m_objCannotLinks) + "\tM="
                + ((float) m_objMustLinks) + "\tLOG=" + ((float) m_objNormalizer) + "\tREG="
                + ((float) m_objRegularizer));

        // M-step
        updateClusterCentroids();

        //      calculateObjectiveFunction(false);
        System.out.println((float) m_Objective + " - Objective function after centroid estimation");
        System.out.println("\tvar=" + ((float) m_objVariance) + "\tC=" + ((float) m_objCannotLinks) + "\tM="
                + ((float) m_objMustLinks) + "\tLOG=" + ((float) m_objNormalizer) + "\tREG="
                + ((float) m_objRegularizer));

        if (m_Trainable == TRAINING_INTERNAL && !m_isOfflineMetric) {
            updateMetricWeights();
            if (m_verbose) {
                calculateObjectiveFunction(true);
                System.out.println((float) m_Objective + " - Objective function after metric update");
                System.out.println("\tvar=" + ((float) m_objVariance) + "\tC=" + ((float) m_objCannotLinks)
                        + "\tM=" + ((float) m_objMustLinks) + "\tLOG=" + ((float) m_objNormalizer) + "\tREG="
                        + ((float) m_objRegularizer));
            }

            if (m_ConstraintsHash.size() > 0) {
                m_maxCLPenalties = calculateMaxCLPenalties();
            }
        }

        if (fincoh != null) {
            printConstraintIncoherence(fincoh);
        }

        converged = convergenceCheck(m_OldObjective, m_Objective);
        m_Iterations++;
    }

    if (fincoh != null) {
        fincoh.close();
    }
    System.out.println("Converged!");
    System.err.print("Its\t" + m_Iterations + "\t");

    if (m_verbose) {
        System.out.println("Done clustering; top cluster features: ");
        for (int i = 0; i < m_NumClusters; i++) {
            System.out.println("Centroid " + i);
            TreeMap map = new TreeMap(Collections.reverseOrder());
            Instance centroid = m_ClusterCentroids.instance(i);
            for (int j = 0; j < centroid.numValues(); j++) {
                Attribute attr = centroid.attributeSparse(j);
                map.put(new Double(centroid.value(attr)), attr.name());
            }
            Iterator it = map.entrySet().iterator();
            for (int j = 0; j < 5 && it.hasNext(); j++) {
                Map.Entry entry = (Map.Entry) it.next();
                System.out.println("\t" + entry.getKey() + "\t" + entry.getValue());
            }
        }
    }
}

From source file:br.com.ufu.lsi.utils.DocumentFrequencyAttributeEval.java

License:Open Source License

/**
 * Initializes an information gain attribute evaluator. Discretizes all attributes that are
 * numeric.//from  w ww.  j av a2  s  .c  o  m
 *
 * @param data set of instances serving as training data
 * @throws Exception if the evaluator has not been generated successfully
 */
public void buildEvaluator(Instances data) throws Exception {

    // can evaluator handle data?
    getCapabilities().testWithFail(data);

    int classIndex = data.classIndex();

    int numAttributes = data.numAttributes();
    m_DFs = new int[numAttributes];
    Enumeration e = data.enumerateInstances();
    while (e.hasMoreElements()) {
        Instance instance = (Instance) e.nextElement();
        int numValues = instance.numValues();
        for (int valueIndex = 0; valueIndex < numValues; valueIndex++) {
            int attIndex = instance.index(valueIndex);
            if (attIndex != classIndex) {
                double value = instance.valueSparse(valueIndex);
                //missingvalues werden also 0 betrachtet.
                if (m_missingAsZero) {
                    if (!Instance.isMissingValue(value) && value != 0.0) { //man knnte auch isMissingSparce(valueIndex) verwenden, oder ineffizienterweise isMissing(attIndex)
                        m_DFs[attIndex]++;
                        //m_DFs[ attIndex ]+=value ;
                    }
                } else {
                    if (value != 0.0) {
                        m_DFs[attIndex]++;
                        //m_DFs[ attIndex ]+=value ;
                    }
                }
            }
        }
    }
}

From source file:cba.ItemSet.java

License:Open Source License

/**
 * Checks if an instance contains an item set.
 *
 * @param instance the instance to be tested
 * @return true if the given instance contains this item set
 *///from ww  w . j a va2  s  .  c  o  m

public boolean containedBy(Instance instance) {

    if (instance instanceof weka.core.SparseInstance && m_treatZeroAsMissing) {
        int numInstVals = instance.numValues();
        int numItemSetVals = m_items.length;

        for (int p1 = 0, p2 = 0; p1 < numInstVals || p2 < numItemSetVals;) {
            int instIndex = Integer.MAX_VALUE;
            if (p1 < numInstVals) {
                instIndex = instance.index(p1);
            }
            int itemIndex = p2;

            if (m_items[itemIndex] > -1) {
                if (itemIndex != instIndex) {
                    return false;
                } else {
                    if (instance.isMissingSparse(p1)) {
                        return false;
                    }
                    if (m_items[itemIndex] != (int) instance.valueSparse(p1)) {
                        return false;
                    }
                }

                p1++;
                p2++;
            } else {
                if (itemIndex < instIndex) {
                    p2++;
                } else if (itemIndex == instIndex) {
                    p2++;
                    p1++;
                }
            }
        }
    } else {
        for (int i = 0; i < instance.numAttributes(); i++)
            if (m_items[i] > -1) {
                if (instance.isMissing(i) || (m_treatZeroAsMissing && (int) instance.value(i) == 0))
                    return false;
                if (m_items[i] != (int) instance.value(i))
                    return false;
            }
    }

    return true;
}

From source file:ChiSquare.ChiSquaredAttributeEval.java

License:Open Source License

/**
 * Initializes a chi-squared attribute evaluator.
 * Discretizes all attributes that are numeric.
 *
 * @param data set of instances serving as training data 
 * @throws Exception if the evaluator has not been 
 * generated successfully// ww w. ja  va2 s  . c om
 */
public void buildEvaluator(Instances data) throws Exception {

    // can evaluator handle data?
    getCapabilities().testWithFail(data);

    int classIndex = data.classIndex();
    int numInstances = data.numInstances();

    if (!m_Binarize) {
        Discretize disTransform = new Discretize();
        disTransform.setUseBetterEncoding(true);
        disTransform.setInputFormat(data);
        data = Filter.useFilter(data, disTransform);
    } else {
        NumericToBinary binTransform = new NumericToBinary();
        binTransform.setInputFormat(data);
        data = Filter.useFilter(data, binTransform);
    }
    int numClasses = data.attribute(classIndex).numValues();

    // Reserve space and initialize counters
    double[][][] counts = new double[data.numAttributes()][][];
    for (int k = 0; k < data.numAttributes(); k++) {
        if (k != classIndex) {
            int numValues = data.attribute(k).numValues();
            counts[k] = new double[numValues + 1][numClasses + 1];
        }
    }

    // Initialize counters
    double[] temp = new double[numClasses + 1];
    for (int k = 0; k < numInstances; k++) {
        Instance inst = data.instance(k);
        if (inst.classIsMissing()) {
            temp[numClasses] += inst.weight();
        } else {
            temp[(int) inst.classValue()] += inst.weight();
        }
    }
    for (int k = 0; k < counts.length; k++) {
        if (k != classIndex) {
            for (int i = 0; i < temp.length; i++) {
                counts[k][0][i] = temp[i];
            }
        }
    }

    // Get counts
    for (int k = 0; k < numInstances; k++) {
        Instance inst = data.instance(k);
        for (int i = 0; i < inst.numValues(); i++) {
            if (inst.index(i) != classIndex) {
                if (inst.isMissingSparse(i) || inst.classIsMissing()) {
                    if (!inst.isMissingSparse(i)) {
                        counts[inst.index(i)][(int) inst.valueSparse(i)][numClasses] += inst.weight();
                        counts[inst.index(i)][0][numClasses] -= inst.weight();
                    } else if (!inst.classIsMissing()) {
                        counts[inst.index(i)][data.attribute(inst.index(i)).numValues()][(int) inst
                                .classValue()] += inst.weight();
                        counts[inst.index(i)][0][(int) inst.classValue()] -= inst.weight();
                    } else {
                        counts[inst.index(i)][data.attribute(inst.index(i)).numValues()][numClasses] += inst
                                .weight();
                        counts[inst.index(i)][0][numClasses] -= inst.weight();
                    }
                } else {
                    counts[inst.index(i)][(int) inst.valueSparse(i)][(int) inst.classValue()] += inst.weight();
                    counts[inst.index(i)][0][(int) inst.classValue()] -= inst.weight();
                }
            }
        }
    }

    // distribute missing counts if required
    if (m_missing_merge) {

        for (int k = 0; k < data.numAttributes(); k++) {
            if (k != classIndex) {
                int numValues = data.attribute(k).numValues();

                // Compute marginals
                double[] rowSums = new double[numValues];
                double[] columnSums = new double[numClasses];
                double sum = 0;
                for (int i = 0; i < numValues; i++) {
                    for (int j = 0; j < numClasses; j++) {
                        rowSums[i] += counts[k][i][j];
                        columnSums[j] += counts[k][i][j];
                    }
                    sum += rowSums[i];
                }

                if (Utils.gr(sum, 0)) {
                    double[][] additions = new double[numValues][numClasses];

                    // Compute what needs to be added to each row
                    for (int i = 0; i < numValues; i++) {
                        for (int j = 0; j < numClasses; j++) {
                            additions[i][j] = (rowSums[i] / sum) * counts[k][numValues][j];
                        }
                    }

                    // Compute what needs to be added to each column
                    for (int i = 0; i < numClasses; i++) {
                        for (int j = 0; j < numValues; j++) {
                            additions[j][i] += (columnSums[i] / sum) * counts[k][j][numClasses];
                        }
                    }

                    // Compute what needs to be added to each cell
                    for (int i = 0; i < numClasses; i++) {
                        for (int j = 0; j < numValues; j++) {
                            additions[j][i] += (counts[k][j][i] / sum) * counts[k][numValues][numClasses];
                        }
                    }

                    // Make new contingency table
                    double[][] newTable = new double[numValues][numClasses];
                    for (int i = 0; i < numValues; i++) {
                        for (int j = 0; j < numClasses; j++) {
                            newTable[i][j] = counts[k][i][j] + additions[i][j];
                        }
                    }
                    counts[k] = newTable;
                }
            }
        }
    }

    // Compute chi-squared values
    m_ChiSquareds = new double[data.numAttributes()];
    for (int i = 0; i < data.numAttributes(); i++) {
        if (i != classIndex) {
            m_ChiSquareds[i] = ContingencyTables.chiVal(ContingencyTables.reduceMatrix(counts[i]), false);
        }
    }
}

From source file:classifier.CustomStringToWordVector.java

License:Open Source License

/**
 * Signify that this batch of input to the filter is finished. If the filter
 * requires all instances prior to filtering, output() may now be called to
 * retrieve the filtered instances.// w  ww  . ja va2  s  .c o  m
 * 
 * @return true if there are instances pending output.
 * @throws IllegalStateException
 *             if no input structure has been defined.
 */
public boolean batchFinished() throws Exception {

    if (getInputFormat() == null) {
        throw new IllegalStateException("No input instance format defined");
    }

    // We only need to do something in this method
    // if the first batch hasn't been processed. Otherwise
    // input() has already done all the work.
    if (!isFirstBatchDone()) {

        // Determine the dictionary from the first batch (training data)
        determineDictionary();

        // Convert all instances w/o normalization
        FastVector fv = new FastVector();
        int firstCopy = 0;
        for (int i = 0; i < m_NumInstances; i++) {
            firstCopy = convertInstancewoDocNorm(getInputFormat().instance(i), fv);
        }

        // Need to compute average document length if necessary
        if (m_filterType != FILTER_NONE) {
            m_AvgDocLength = 0;
            for (int i = 0; i < fv.size(); i++) {
                Instance inst = (Instance) fv.elementAt(i);
                double docLength = 0;
                for (int j = 0; j < inst.numValues(); j++) {
                    if (inst.index(j) >= firstCopy) {
                        docLength += inst.valueSparse(j) * inst.valueSparse(j);
                    }
                }
                m_AvgDocLength += Math.sqrt(docLength);
            }
            m_AvgDocLength /= m_NumInstances;
        }

        // Perform normalization if necessary.
        if (m_filterType == FILTER_NORMALIZE_ALL) {
            for (int i = 0; i < fv.size(); i++) {
                normalizeInstance((Instance) fv.elementAt(i), firstCopy);
            }
        }

        // Push all instances into the output queue
        for (int i = 0; i < fv.size(); i++) {
            push((Instance) fv.elementAt(i));
        }
    }

    // Flush the input
    flushInput();

    m_NewBatch = true;
    m_FirstBatchDone = true;
    return (numPendingOutput() != 0);
}

From source file:classifier.CustomStringToWordVector.java

License:Open Source License

/**
 * Normalizes given instance to average doc length (only the newly
 * constructed attributes)./*from  www  . j a v a 2  s  . c  o  m*/
 * 
 * @param inst
 *            the instance to normalize
 * @param firstCopy
 * @throws Exception
 *             if avg. doc length not set
 */
private void normalizeInstance(Instance inst, int firstCopy) throws Exception {

    double docLength = 0;

    if (m_AvgDocLength < 0) {
        throw new Exception("Average document length not set.");
    }

    // Compute length of document vector
    for (int j = 0; j < inst.numValues(); j++) {
        if (inst.index(j) >= firstCopy) {
            docLength += inst.valueSparse(j) * inst.valueSparse(j);
        }
    }
    docLength = Math.sqrt(docLength);

    // Normalize document vector
    for (int j = 0; j < inst.numValues(); j++) {
        if (inst.index(j) >= firstCopy) {
            double val = inst.valueSparse(j) * m_AvgDocLength / docLength;
            inst.setValueSparse(j, val);
            if (val == 0) {
                System.err.println("setting value " + inst.index(j) + " to zero.");
                j--;
            }
        }
    }
}

From source file:cluster.ABC.ClusterUtils.java

License:Open Source License

/** Normalizes the values of a SparseInstance in L2 norm
 *
 * @author Sugato Basu/* www  .  ja va  2 s.  c o m*/
 * @param inst SparseInstance to be normalized
 */

public static void normalizeSparseInstance(Instance inst) throws Exception {
    double norm = 0;
    int length = inst.numValues();

    if (!(inst instanceof SparseInstance)) {
        System.err.println("Not SparseInstance, using normalizeInstance function instead");
        normalizeInstance(inst);
    }

    for (int i = 0; i < length; i++) {
        if (inst.index(i) != inst.classIndex()) { // don't normalize the class index
            norm += inst.valueSparse(i) * inst.valueSparse(i);
        }
    }
    norm = Math.sqrt(norm);
    for (int i = 0; i < length; i++) { // don't normalize the class index
        if (inst.index(i) != inst.classIndex()) {
            inst.setValueSparse(i, inst.valueSparse(i) / norm);
        }
    }
}

From source file:cluster.ABC.ClusterUtils.java

License:Open Source License

/** This function divides every attribute value in an instance by
 *  the instance weight -- useful to find the mean of a cluster in
 *  Euclidean space /*from  w ww .  ja va2s . c  o  m*/
 *  @param inst Instance passed in for normalization (destructive update)
 */
public static void normalizeByWeight(Instance inst) {
    double weight = inst.weight();
    if (inst instanceof SparseInstance) {
        for (int i = 0; i < inst.numValues(); i++) {
            inst.setValueSparse(i, inst.valueSparse(i) / weight);
        }
    } else if (!(inst instanceof SparseInstance)) {
        for (int i = 0; i < inst.numAttributes(); i++) {
            inst.setValue(i, inst.value(i) / weight);
        }
    }
}

From source file:cluster.ABC.ClusterUtils.java

License:Open Source License

/** Finds sum of 2 instances (handles sparse and non-sparse)
 *///from ww  w.  java2s.  c o  m

public static Instance sumInstances(Instance inst1, Instance inst2, Instances m_Instances) throws Exception {
    int numAttributes = inst1.numAttributes();
    if (inst2.numAttributes() != numAttributes) {
        throw new Exception("Error!! inst1 and inst2 should have same number of attributes.");
    }
    double weight1 = inst1.weight(), weight2 = inst2.weight();
    double[] values = new double[numAttributes];

    for (int i = 0; i < numAttributes; i++) {
        values[i] = 0;
    }

    if (inst1 instanceof SparseInstance && inst2 instanceof SparseInstance) {
        for (int i = 0; i < inst1.numValues(); i++) {
            int indexOfIndex = inst1.index(i);
            values[indexOfIndex] = inst1.valueSparse(i);
        }
        for (int i = 0; i < inst2.numValues(); i++) {
            int indexOfIndex = inst2.index(i);
            values[indexOfIndex] += inst2.valueSparse(i);
        }
        SparseInstance newInst = new SparseInstance(weight1 + weight2, values);
        newInst.setDataset(m_Instances);
        return newInst;
    } else if (!(inst1 instanceof SparseInstance) && !(inst2 instanceof SparseInstance)) {
        for (int i = 0; i < numAttributes; i++) {
            values[i] = inst1.value(i) + inst2.value(i);
        }
    } else {
        throw new Exception("Error!! inst1 and inst2 should be both of same type -- sparse or non-sparse");
    }
    Instance newInst = new Instance(weight1 + weight2, values);
    newInst.setDataset(m_Instances);
    return newInst;
}

From source file:cn.edu.xjtu.dbmine.StringToWordVector.java

License:Open Source License

/**
 * Normalizes given instance to average doc length (only the newly
 * constructed attributes)./*w w w  .  j a  v a  2 s  .com*/
 * 
 * @param inst
 *            the instance to normalize
 * @param firstCopy
 * @throws Exception
 *             if avg. doc length not set
 */

private void normalizeInstance(Instance inst, int firstCopy) throws Exception {

    double docLength = 0;

    if (m_AvgDocLength < 0) {
        throw new Exception("Average document length not set.");
    }

    // Compute length of document vector
    for (int j = 0; j < inst.numValues(); j++) {
        if (inst.index(j) >= firstCopy) {
            docLength += inst.valueSparse(j) * inst.valueSparse(j);
        }
    }
    docLength = Math.sqrt(docLength);

    // Normalize document vector
    for (int j = 0; j < inst.numValues(); j++) {
        if (inst.index(j) >= firstCopy) {
            double val = inst.valueSparse(j) * m_AvgDocLength / docLength;
            inst.setValueSparse(j, val);
            if (val == 0) {
                System.err.println("setting value " + inst.index(j) + " to zero.");
                j--;
            }
        }
    }
}