Example usage for weka.core Instance numValues

List of usage examples for weka.core Instance numValues

Introduction

In this page you can find the example usage for weka.core Instance numValues.

Prototype

public int numValues();

Source Link

Document

Returns the number of values present in a sparse representation.

Usage

From source file:MPCKMeans.java

License:Open Source License

/** Actual KMeans function */
protected void runKMeans() throws Exception {
    boolean converged = false;
    m_Iterations = 0;/*from  w  ww . j  a v a 2 s .c o m*/
    m_numBlankIterations = 0;
    m_Objective = Double.POSITIVE_INFINITY;

    if (!m_isOfflineMetric) {
        if (m_useMultipleMetrics) {
            for (int i = 0; i < m_metrics.length; i++) {
                m_metrics[i].resetMetric();
                m_metricLearners[i].resetLearner();
            }
        } else {
            m_metric.resetMetric();
            m_metricLearner.resetLearner();
        }
        // initialize max CL penalties
        if (m_ConstraintsHash.size() > 0) {
            m_maxCLPenalties = calculateMaxCLPenalties();
        }
    }

    // initialize m_ClusterAssignments
    for (int i = 0; i < m_NumClusters; i++) {
        m_ClusterAssignments[i] = -1;
    }

    PrintStream fincoh = null;
    if (m_ConstraintIncoherenceFile != null) {
        fincoh = new PrintStream(new FileOutputStream(m_ConstraintIncoherenceFile));
    }

    while (!converged) {
        System.out.println("\n" + m_Iterations + ". Objective function: " + ((float) m_Objective));
        m_OldObjective = m_Objective;

        // E-step
        int numMovedPoints = findBestAssignments();

        m_numBlankIterations = (numMovedPoints == 0) ? m_numBlankIterations + 1 : 0;

        //      calculateObjectiveFunction(false);
        System.out.println((float) m_Objective + " - Objective function after point assignment(CALC)");
        System.out.println("\tvar=" + ((float) m_objVariance) + "\tC=" + ((float) m_objCannotLinks) + "\tM="
                + ((float) m_objMustLinks) + "\tLOG=" + ((float) m_objNormalizer) + "\tREG="
                + ((float) m_objRegularizer));

        // M-step
        updateClusterCentroids();

        //      calculateObjectiveFunction(false);
        System.out.println((float) m_Objective + " - Objective function after centroid estimation");
        System.out.println("\tvar=" + ((float) m_objVariance) + "\tC=" + ((float) m_objCannotLinks) + "\tM="
                + ((float) m_objMustLinks) + "\tLOG=" + ((float) m_objNormalizer) + "\tREG="
                + ((float) m_objRegularizer));

        if (m_Trainable == TRAINING_INTERNAL && !m_isOfflineMetric) {
            updateMetricWeights();
            if (m_verbose) {
                calculateObjectiveFunction(true);
                System.out.println((float) m_Objective + " - Objective function after metric update");
                System.out.println("\tvar=" + ((float) m_objVariance) + "\tC=" + ((float) m_objCannotLinks)
                        + "\tM=" + ((float) m_objMustLinks) + "\tLOG=" + ((float) m_objNormalizer) + "\tREG="
                        + ((float) m_objRegularizer));
            }

            if (m_ConstraintsHash.size() > 0) {
                m_maxCLPenalties = calculateMaxCLPenalties();
            }
        }

        if (fincoh != null) {
            printConstraintIncoherence(fincoh);
        }

        converged = convergenceCheck(m_OldObjective, m_Objective);
        m_Iterations++;
    }

    if (fincoh != null) {
        fincoh.close();
    }
    System.out.println("Converged!");
    System.err.print("Its\t" + m_Iterations + "\t");

    if (m_verbose) {
        System.out.println("Done clustering; top cluster features: ");
        for (int i = 0; i < m_NumClusters; i++) {
            System.out.println("Centroid " + i);
            TreeMap map = new TreeMap(Collections.reverseOrder());
            Instance centroid = m_ClusterCentroids.instance(i);
            for (int j = 0; j < centroid.numValues(); j++) {
                Attribute attr = centroid.attributeSparse(j);
                map.put(new Double(centroid.value(attr)), attr.name());
            }
            Iterator it = map.entrySet().iterator();
            for (int j = 0; j < 5 && it.hasNext(); j++) {
                Map.Entry entry = (Map.Entry) it.next();
                System.out.println("\t" + entry.getKey() + "\t" + entry.getValue());
            }
        }
    }
}

From source file:br.com.ufu.lsi.utils.DocumentFrequencyAttributeEval.java

License:Open Source License

/**
 * Initializes an information gain attribute evaluator. Discretizes all attributes that are
 * numeric.//from  w ww.  j av a2  s  .c  o  m
 *
 * @param data set of instances serving as training data
 * @throws Exception if the evaluator has not been generated successfully
 */
public void buildEvaluator(Instances data) throws Exception {

    // can evaluator handle data?
    getCapabilities().testWithFail(data);

    int classIndex = data.classIndex();

    int numAttributes = data.numAttributes();
    m_DFs = new int[numAttributes];
    Enumeration e = data.enumerateInstances();
    while (e.hasMoreElements()) {
        Instance instance = (Instance) e.nextElement();
        int numValues = instance.numValues();
        for (int valueIndex = 0; valueIndex < numValues; valueIndex++) {
            int attIndex = instance.index(valueIndex);
            if (attIndex != classIndex) {
                double value = instance.valueSparse(valueIndex);
                //missingvalues werden also 0 betrachtet.
                if (m_missingAsZero) {
                    if (!Instance.isMissingValue(value) && value != 0.0) { //man knnte auch isMissingSparce(valueIndex) verwenden, oder ineffizienterweise isMissing(attIndex)
                        m_DFs[attIndex]++;
                        //m_DFs[ attIndex ]+=value ;
                    }
                } else {
                    if (value != 0.0) {
                        m_DFs[attIndex]++;
                        //m_DFs[ attIndex ]+=value ;
                    }
                }
            }
        }
    }
}

From source file:cba.ItemSet.java

License:Open Source License

/**
 * Checks if an instance contains an item set.
 *
 * @param instance the instance to be tested
 * @return true if the given instance contains this item set
 *///from ww  w . j a va2  s  .  c  o  m

public boolean containedBy(Instance instance) {

    if (instance instanceof weka.core.SparseInstance && m_treatZeroAsMissing) {
        int numInstVals = instance.numValues();
        int numItemSetVals = m_items.length;

        for (int p1 = 0, p2 = 0; p1 < numInstVals || p2 < numItemSetVals;) {
            int instIndex = Integer.MAX_VALUE;
            if (p1 < numInstVals) {
                instIndex = instance.index(p1);
            }
            int itemIndex = p2;

            if (m_items[itemIndex] > -1) {
                if (itemIndex != instIndex) {
                    return false;
                } else {
                    if (instance.isMissingSparse(p1)) {
                        return false;
                    }
                    if (m_items[itemIndex] != (int) instance.valueSparse(p1)) {
                        return false;
                    }
                }

                p1++;
                p2++;
            } else {
                if (itemIndex < instIndex) {
                    p2++;
                } else if (itemIndex == instIndex) {
                    p2++;
                    p1++;
                }
            }
        }
    } else {
        for (int i = 0; i < instance.numAttributes(); i++)
            if (m_items[i] > -1) {
                if (instance.isMissing(i) || (m_treatZeroAsMissing && (int) instance.value(i) == 0))
                    return false;
                if (m_items[i] != (int) instance.value(i))
                    return false;
            }
    }

    return true;
}

From source file:ChiSquare.ChiSquaredAttributeEval.java

License:Open Source License

/**
 * Initializes a chi-squared attribute evaluator.
 * Discretizes all attributes that are numeric.
 *
 * @param data set of instances serving as training data 
 * @throws Exception if the evaluator has not been 
 * generated successfully// ww w. ja  va2 s  . c om
 */
public void buildEvaluator(Instances data) throws Exception {

    // can evaluator handle data?
    getCapabilities().testWithFail(data);

    int classIndex = data.classIndex();
    int numInstances = data.numInstances();

    if (!m_Binarize) {
        Discretize disTransform = new Discretize();
        disTransform.setUseBetterEncoding(true);
        disTransform.setInputFormat(data);
        data = Filter.useFilter(data, disTransform);
    } else {
        NumericToBinary binTransform = new NumericToBinary();
        binTransform.setInputFormat(data);
        data = Filter.useFilter(data, binTransform);
    }
    int numClasses = data.attribute(classIndex).numValues();

    // Reserve space and initialize counters
    double[][][] counts = new double[data.numAttributes()][][];
    for (int k = 0; k < data.numAttributes(); k++) {
        if (k != classIndex) {
            int numValues = data.attribute(k).numValues();
            counts[k] = new double[numValues + 1][numClasses + 1];
        }
    }

    // Initialize counters
    double[] temp = new double[numClasses + 1];
    for (int k = 0; k < numInstances; k++) {
        Instance inst = data.instance(k);
        if (inst.classIsMissing()) {
            temp[numClasses] += inst.weight();
        } else {
            temp[(int) inst.classValue()] += inst.weight();
        }
    }
    for (int k = 0; k < counts.length; k++) {
        if (k != classIndex) {
            for (int i = 0; i < temp.length; i++) {
                counts[k][0][i] = temp[i];
            }
        }
    }

    // Get counts
    for (int k = 0; k < numInstances; k++) {
        Instance inst = data.instance(k);
        for (int i = 0; i < inst.numValues(); i++) {
            if (inst.index(i) != classIndex) {
                if (inst.isMissingSparse(i) || inst.classIsMissing()) {
                    if (!inst.isMissingSparse(i)) {
                        counts[inst.index(i)][(int) inst.valueSparse(i)][numClasses] += inst.weight();
                        counts[inst.index(i)][0][numClasses] -= inst.weight();
                    } else if (!inst.classIsMissing()) {
                        counts[inst.index(i)][data.attribute(inst.index(i)).numValues()][(int) inst
                                .classValue()] += inst.weight();
                        counts[inst.index(i)][0][(int) inst.classValue()] -= inst.weight();
                    } else {
                        counts[inst.index(i)][data.attribute(inst.index(i)).numValues()][numClasses] += inst
                                .weight();
                        counts[inst.index(i)][0][numClasses] -= inst.weight();
                    }
                } else {
                    counts[inst.index(i)][(int) inst.valueSparse(i)][(int) inst.classValue()] += inst.weight();
                    counts[inst.index(i)][0][(int) inst.classValue()] -= inst.weight();
                }
            }
        }
    }

    // distribute missing counts if required
    if (m_missing_merge) {

        for (int k = 0; k < data.numAttributes(); k++) {
            if (k != classIndex) {
                int numValues = data.attribute(k).numValues();

                // Compute marginals
                double[] rowSums = new double[numValues];
                double[] columnSums = new double[numClasses];
                double sum = 0;
                for (int i = 0; i < numValues; i++) {
                    for (int j = 0; j < numClasses; j++) {
                        rowSums[i] += counts[k][i][j];
                        columnSums[j] += counts[k][i][j];
                    }
                    sum += rowSums[i];
                }

                if (Utils.gr(sum, 0)) {
                    double[][] additions = new double[numValues][numClasses];

                    // Compute what needs to be added to each row
                    for (int i = 0; i < numValues; i++) {
                        for (int j = 0; j < numClasses; j++) {
                            additions[i][j] = (rowSums[i] / sum) * counts[k][numValues][j];
                        }
                    }

                    // Compute what needs to be added to each column
                    for (int i = 0; i < numClasses; i++) {
                        for (int j = 0; j < numValues; j++) {
                            additions[j][i] += (columnSums[i] / sum) * counts[k][j][numClasses];
                        }
                    }

                    // Compute what needs to be added to each cell
                    for (int i = 0; i < numClasses; i++) {
                        for (int j = 0; j < numValues; j++) {
                            additions[j][i] += (counts[k][j][i] / sum) * counts[k][numValues][numClasses];
                        }
                    }

                    // Make new contingency table
                    double[][] newTable = new double[numValues][numClasses];
                    for (int i = 0; i < numValues; i++) {
                        for (int j = 0; j < numClasses; j++) {
                            newTable[i][j] = counts[k][i][j] + additions[i][j];
                        }
                    }
                    counts[k] = newTable;
                }
            }
        }
    }

    // Compute chi-squared values
    m_ChiSquareds = new double[data.numAttributes()];
    for (int i = 0; i < data.numAttributes(); i++) {
        if (i != classIndex) {
            m_ChiSquareds[i] = ContingencyTables.chiVal(ContingencyTables.reduceMatrix(counts[i]), false);
        }
    }
}

From source file:classifier.CustomStringToWordVector.java

License:Open Source License

/**
 * Signify that this batch of input to the filter is finished. If the filter
 * requires all instances prior to filtering, output() may now be called to
 * retrieve the filtered instances.// w  ww  . ja va2  s  .c o  m
 * 
 * @return true if there are instances pending output.
 * @throws IllegalStateException
 *             if no input structure has been defined.
 */
public boolean batchFinished() throws Exception {

    if (getInputFormat() == null) {
        throw new IllegalStateException("No input instance format defined");
    }

    // We only need to do something in this method
    // if the first batch hasn't been processed. Otherwise
    // input() has already done all the work.
    if (!isFirstBatchDone()) {

        // Determine the dictionary from the first batch (training data)
        determineDictionary();

        // Convert all instances w/o normalization
        FastVector fv = new FastVector();
        int firstCopy = 0;
        for (int i = 0; i < m_NumInstances; i++) {
            firstCopy = convertInstancewoDocNorm(getInputFormat().instance(i), fv);
        }

        // Need to compute average document length if necessary
        if (m_filterType != FILTER_NONE) {
            m_AvgDocLength = 0;
            for (int i = 0; i < fv.size(); i++) {
                Instance inst = (Instance) fv.elementAt(i);
                double docLength = 0;
                for (int j = 0; j < inst.numValues(); j++) {
                    if (inst.index(j) >= firstCopy) {
                        docLength += inst.valueSparse(j) * inst.valueSparse(j);
                    }
                }
                m_AvgDocLength += Math.sqrt(docLength);
            }
            m_AvgDocLength /= m_NumInstances;
        }

        // Perform normalization if necessary.
        if (m_filterType == FILTER_NORMALIZE_ALL) {
            for (int i = 0; i < fv.size(); i++) {
                normalizeInstance((Instance) fv.elementAt(i), firstCopy);
            }
        }

        // Push all instances into the output queue
        for (int i = 0; i < fv.size(); i++) {
            push((Instance) fv.elementAt(i));
        }
    }

    // Flush the input
    flushInput();

    m_NewBatch = true;
    m_FirstBatchDone = true;
    return (numPendingOutput() != 0);
}

From source file:classifier.CustomStringToWordVector.java

License:Open Source License

/**
 * Normalizes given instance to average doc length (only the newly
 * constructed attributes)./*from  www  . j a v a 2  s  . c  o  m*/
 * 
 * @param inst
 *            the instance to normalize
 * @param firstCopy
 * @throws Exception
 *             if avg. doc length not set
 */
private void normalizeInstance(Instance inst, int firstCopy) throws Exception {

    double docLength = 0;

    if (m_AvgDocLength < 0) {
        throw new Exception("Average document length not set.");
    }

    // Compute length of document vector
    for (int j = 0; j < inst.numValues(); j++) {
        if (inst.index(j) >= firstCopy) {
            docLength += inst.valueSparse(j) * inst.valueSparse(j);
        }
    }
    docLength = Math.sqrt(docLength);

    // Normalize document vector
    for (int j = 0; j < inst.numValues(); j++) {
        if (inst.index(j) >= firstCopy) {
            double val = inst.valueSparse(j) * m_AvgDocLength / docLength;
            inst.setValueSparse(j, val);
            if (val == 0) {
                System.err.println("setting value " + inst.index(j) + " to zero.");
                j--;
            }
        }
    }
}

From source file:cluster.ABC.ClusterUtils.java

License:Open Source License

/** Normalizes the values of a SparseInstance in L2 norm
 *
 * @author Sugato Basu/* www  .  ja va  2 s.  c o m*/
 * @param inst SparseInstance to be normalized
 */

public static void normalizeSparseInstance(Instance inst) throws Exception {
    double norm = 0;
    int length = inst.numValues();

    if (!(inst instanceof SparseInstance)) {
        System.err.println("Not SparseInstance, using normalizeInstance function instead");
        normalizeInstance(inst);
    }

    for (int i = 0; i < length; i++) {
        if (inst.index(i) != inst.classIndex()) { // don't normalize the class index
            norm += inst.valueSparse(i) * inst.valueSparse(i);
        }
    }
    norm = Math.sqrt(norm);
    for (int i = 0; i < length; i++) { // don't normalize the class index
        if (inst.index(i) != inst.classIndex()) {
            inst.setValueSparse(i, inst.valueSparse(i) / norm);
        }
    }
}

From source file:cluster.ABC.ClusterUtils.java

License:Open Source License

/** This function divides every attribute value in an instance by
 *  the instance weight -- useful to find the mean of a cluster in
 *  Euclidean space /*from  w ww .  ja va2s . c  o  m*/
 *  @param inst Instance passed in for normalization (destructive update)
 */
public static void normalizeByWeight(Instance inst) {
    double weight = inst.weight();
    if (inst instanceof SparseInstance) {
        for (int i = 0; i < inst.numValues(); i++) {
            inst.setValueSparse(i, inst.valueSparse(i) / weight);
        }
    } else if (!(inst instanceof SparseInstance)) {
        for (int i = 0; i < inst.numAttributes(); i++) {
            inst.setValue(i, inst.value(i) / weight);
        }
    }
}

From source file:cluster.ABC.ClusterUtils.java

License:Open Source License

/** Finds sum of 2 instances (handles sparse and non-sparse)
 *///from ww  w.  java2s.  c o  m

public static Instance sumInstances(Instance inst1, Instance inst2, Instances m_Instances) throws Exception {
    int numAttributes = inst1.numAttributes();
    if (inst2.numAttributes() != numAttributes) {
        throw new Exception("Error!! inst1 and inst2 should have same number of attributes.");
    }
    double weight1 = inst1.weight(), weight2 = inst2.weight();
    double[] values = new double[numAttributes];

    for (int i = 0; i < numAttributes; i++) {
        values[i] = 0;
    }

    if (inst1 instanceof SparseInstance && inst2 instanceof SparseInstance) {
        for (int i = 0; i < inst1.numValues(); i++) {
            int indexOfIndex = inst1.index(i);
            values[indexOfIndex] = inst1.valueSparse(i);
        }
        for (int i = 0; i < inst2.numValues(); i++) {
            int indexOfIndex = inst2.index(i);
            values[indexOfIndex] += inst2.valueSparse(i);
        }
        SparseInstance newInst = new SparseInstance(weight1 + weight2, values);
        newInst.setDataset(m_Instances);
        return newInst;
    } else if (!(inst1 instanceof SparseInstance) && !(inst2 instanceof SparseInstance)) {
        for (int i = 0; i < numAttributes; i++) {
            values[i] = inst1.value(i) + inst2.value(i);
        }
    } else {
        throw new Exception("Error!! inst1 and inst2 should be both of same type -- sparse or non-sparse");
    }
    Instance newInst = new Instance(weight1 + weight2, values);
    newInst.setDataset(m_Instances);
    return newInst;
}

From source file:cn.edu.xjtu.dbmine.StringToWordVector.java

License:Open Source License

/**
 * Normalizes given instance to average doc length (only the newly
 * constructed attributes)./*w w w  .  j a  v a  2 s  .com*/
 * 
 * @param inst
 *            the instance to normalize
 * @param firstCopy
 * @throws Exception
 *             if avg. doc length not set
 */

private void normalizeInstance(Instance inst, int firstCopy) throws Exception {

    double docLength = 0;

    if (m_AvgDocLength < 0) {
        throw new Exception("Average document length not set.");
    }

    // Compute length of document vector
    for (int j = 0; j < inst.numValues(); j++) {
        if (inst.index(j) >= firstCopy) {
            docLength += inst.valueSparse(j) * inst.valueSparse(j);
        }
    }
    docLength = Math.sqrt(docLength);

    // Normalize document vector
    for (int j = 0; j < inst.numValues(); j++) {
        if (inst.index(j) >= firstCopy) {
            double val = inst.valueSparse(j) * m_AvgDocLength / docLength;
            inst.setValueSparse(j, val);
            if (val == 0) {
                System.err.println("setting value " + inst.index(j) + " to zero.");
                j--;
            }
        }
    }
}