List of usage examples for weka.core Instance numValues
public int numValues();
From source file:MPCKMeans.java
License:Open Source License
/** Actual KMeans function */ protected void runKMeans() throws Exception { boolean converged = false; m_Iterations = 0;/*from w ww . j a v a 2 s .c o m*/ m_numBlankIterations = 0; m_Objective = Double.POSITIVE_INFINITY; if (!m_isOfflineMetric) { if (m_useMultipleMetrics) { for (int i = 0; i < m_metrics.length; i++) { m_metrics[i].resetMetric(); m_metricLearners[i].resetLearner(); } } else { m_metric.resetMetric(); m_metricLearner.resetLearner(); } // initialize max CL penalties if (m_ConstraintsHash.size() > 0) { m_maxCLPenalties = calculateMaxCLPenalties(); } } // initialize m_ClusterAssignments for (int i = 0; i < m_NumClusters; i++) { m_ClusterAssignments[i] = -1; } PrintStream fincoh = null; if (m_ConstraintIncoherenceFile != null) { fincoh = new PrintStream(new FileOutputStream(m_ConstraintIncoherenceFile)); } while (!converged) { System.out.println("\n" + m_Iterations + ". Objective function: " + ((float) m_Objective)); m_OldObjective = m_Objective; // E-step int numMovedPoints = findBestAssignments(); m_numBlankIterations = (numMovedPoints == 0) ? m_numBlankIterations + 1 : 0; // calculateObjectiveFunction(false); System.out.println((float) m_Objective + " - Objective function after point assignment(CALC)"); System.out.println("\tvar=" + ((float) m_objVariance) + "\tC=" + ((float) m_objCannotLinks) + "\tM=" + ((float) m_objMustLinks) + "\tLOG=" + ((float) m_objNormalizer) + "\tREG=" + ((float) m_objRegularizer)); // M-step updateClusterCentroids(); // calculateObjectiveFunction(false); System.out.println((float) m_Objective + " - Objective function after centroid estimation"); System.out.println("\tvar=" + ((float) m_objVariance) + "\tC=" + ((float) m_objCannotLinks) + "\tM=" + ((float) m_objMustLinks) + "\tLOG=" + ((float) m_objNormalizer) + "\tREG=" + ((float) m_objRegularizer)); if (m_Trainable == TRAINING_INTERNAL && !m_isOfflineMetric) { updateMetricWeights(); if (m_verbose) { calculateObjectiveFunction(true); System.out.println((float) m_Objective + " - Objective function after metric update"); System.out.println("\tvar=" + ((float) m_objVariance) + "\tC=" + ((float) m_objCannotLinks) + "\tM=" + ((float) m_objMustLinks) + "\tLOG=" + ((float) m_objNormalizer) + "\tREG=" + ((float) m_objRegularizer)); } if (m_ConstraintsHash.size() > 0) { m_maxCLPenalties = calculateMaxCLPenalties(); } } if (fincoh != null) { printConstraintIncoherence(fincoh); } converged = convergenceCheck(m_OldObjective, m_Objective); m_Iterations++; } if (fincoh != null) { fincoh.close(); } System.out.println("Converged!"); System.err.print("Its\t" + m_Iterations + "\t"); if (m_verbose) { System.out.println("Done clustering; top cluster features: "); for (int i = 0; i < m_NumClusters; i++) { System.out.println("Centroid " + i); TreeMap map = new TreeMap(Collections.reverseOrder()); Instance centroid = m_ClusterCentroids.instance(i); for (int j = 0; j < centroid.numValues(); j++) { Attribute attr = centroid.attributeSparse(j); map.put(new Double(centroid.value(attr)), attr.name()); } Iterator it = map.entrySet().iterator(); for (int j = 0; j < 5 && it.hasNext(); j++) { Map.Entry entry = (Map.Entry) it.next(); System.out.println("\t" + entry.getKey() + "\t" + entry.getValue()); } } } }
From source file:br.com.ufu.lsi.utils.DocumentFrequencyAttributeEval.java
License:Open Source License
/** * Initializes an information gain attribute evaluator. Discretizes all attributes that are * numeric.//from w ww. j av a2 s .c o m * * @param data set of instances serving as training data * @throws Exception if the evaluator has not been generated successfully */ public void buildEvaluator(Instances data) throws Exception { // can evaluator handle data? getCapabilities().testWithFail(data); int classIndex = data.classIndex(); int numAttributes = data.numAttributes(); m_DFs = new int[numAttributes]; Enumeration e = data.enumerateInstances(); while (e.hasMoreElements()) { Instance instance = (Instance) e.nextElement(); int numValues = instance.numValues(); for (int valueIndex = 0; valueIndex < numValues; valueIndex++) { int attIndex = instance.index(valueIndex); if (attIndex != classIndex) { double value = instance.valueSparse(valueIndex); //missingvalues werden also 0 betrachtet. if (m_missingAsZero) { if (!Instance.isMissingValue(value) && value != 0.0) { //man knnte auch isMissingSparce(valueIndex) verwenden, oder ineffizienterweise isMissing(attIndex) m_DFs[attIndex]++; //m_DFs[ attIndex ]+=value ; } } else { if (value != 0.0) { m_DFs[attIndex]++; //m_DFs[ attIndex ]+=value ; } } } } } }
From source file:cba.ItemSet.java
License:Open Source License
/** * Checks if an instance contains an item set. * * @param instance the instance to be tested * @return true if the given instance contains this item set *///from ww w . j a va2 s . c o m public boolean containedBy(Instance instance) { if (instance instanceof weka.core.SparseInstance && m_treatZeroAsMissing) { int numInstVals = instance.numValues(); int numItemSetVals = m_items.length; for (int p1 = 0, p2 = 0; p1 < numInstVals || p2 < numItemSetVals;) { int instIndex = Integer.MAX_VALUE; if (p1 < numInstVals) { instIndex = instance.index(p1); } int itemIndex = p2; if (m_items[itemIndex] > -1) { if (itemIndex != instIndex) { return false; } else { if (instance.isMissingSparse(p1)) { return false; } if (m_items[itemIndex] != (int) instance.valueSparse(p1)) { return false; } } p1++; p2++; } else { if (itemIndex < instIndex) { p2++; } else if (itemIndex == instIndex) { p2++; p1++; } } } } else { for (int i = 0; i < instance.numAttributes(); i++) if (m_items[i] > -1) { if (instance.isMissing(i) || (m_treatZeroAsMissing && (int) instance.value(i) == 0)) return false; if (m_items[i] != (int) instance.value(i)) return false; } } return true; }
From source file:ChiSquare.ChiSquaredAttributeEval.java
License:Open Source License
/** * Initializes a chi-squared attribute evaluator. * Discretizes all attributes that are numeric. * * @param data set of instances serving as training data * @throws Exception if the evaluator has not been * generated successfully// ww w. ja va2 s . c om */ public void buildEvaluator(Instances data) throws Exception { // can evaluator handle data? getCapabilities().testWithFail(data); int classIndex = data.classIndex(); int numInstances = data.numInstances(); if (!m_Binarize) { Discretize disTransform = new Discretize(); disTransform.setUseBetterEncoding(true); disTransform.setInputFormat(data); data = Filter.useFilter(data, disTransform); } else { NumericToBinary binTransform = new NumericToBinary(); binTransform.setInputFormat(data); data = Filter.useFilter(data, binTransform); } int numClasses = data.attribute(classIndex).numValues(); // Reserve space and initialize counters double[][][] counts = new double[data.numAttributes()][][]; for (int k = 0; k < data.numAttributes(); k++) { if (k != classIndex) { int numValues = data.attribute(k).numValues(); counts[k] = new double[numValues + 1][numClasses + 1]; } } // Initialize counters double[] temp = new double[numClasses + 1]; for (int k = 0; k < numInstances; k++) { Instance inst = data.instance(k); if (inst.classIsMissing()) { temp[numClasses] += inst.weight(); } else { temp[(int) inst.classValue()] += inst.weight(); } } for (int k = 0; k < counts.length; k++) { if (k != classIndex) { for (int i = 0; i < temp.length; i++) { counts[k][0][i] = temp[i]; } } } // Get counts for (int k = 0; k < numInstances; k++) { Instance inst = data.instance(k); for (int i = 0; i < inst.numValues(); i++) { if (inst.index(i) != classIndex) { if (inst.isMissingSparse(i) || inst.classIsMissing()) { if (!inst.isMissingSparse(i)) { counts[inst.index(i)][(int) inst.valueSparse(i)][numClasses] += inst.weight(); counts[inst.index(i)][0][numClasses] -= inst.weight(); } else if (!inst.classIsMissing()) { counts[inst.index(i)][data.attribute(inst.index(i)).numValues()][(int) inst .classValue()] += inst.weight(); counts[inst.index(i)][0][(int) inst.classValue()] -= inst.weight(); } else { counts[inst.index(i)][data.attribute(inst.index(i)).numValues()][numClasses] += inst .weight(); counts[inst.index(i)][0][numClasses] -= inst.weight(); } } else { counts[inst.index(i)][(int) inst.valueSparse(i)][(int) inst.classValue()] += inst.weight(); counts[inst.index(i)][0][(int) inst.classValue()] -= inst.weight(); } } } } // distribute missing counts if required if (m_missing_merge) { for (int k = 0; k < data.numAttributes(); k++) { if (k != classIndex) { int numValues = data.attribute(k).numValues(); // Compute marginals double[] rowSums = new double[numValues]; double[] columnSums = new double[numClasses]; double sum = 0; for (int i = 0; i < numValues; i++) { for (int j = 0; j < numClasses; j++) { rowSums[i] += counts[k][i][j]; columnSums[j] += counts[k][i][j]; } sum += rowSums[i]; } if (Utils.gr(sum, 0)) { double[][] additions = new double[numValues][numClasses]; // Compute what needs to be added to each row for (int i = 0; i < numValues; i++) { for (int j = 0; j < numClasses; j++) { additions[i][j] = (rowSums[i] / sum) * counts[k][numValues][j]; } } // Compute what needs to be added to each column for (int i = 0; i < numClasses; i++) { for (int j = 0; j < numValues; j++) { additions[j][i] += (columnSums[i] / sum) * counts[k][j][numClasses]; } } // Compute what needs to be added to each cell for (int i = 0; i < numClasses; i++) { for (int j = 0; j < numValues; j++) { additions[j][i] += (counts[k][j][i] / sum) * counts[k][numValues][numClasses]; } } // Make new contingency table double[][] newTable = new double[numValues][numClasses]; for (int i = 0; i < numValues; i++) { for (int j = 0; j < numClasses; j++) { newTable[i][j] = counts[k][i][j] + additions[i][j]; } } counts[k] = newTable; } } } } // Compute chi-squared values m_ChiSquareds = new double[data.numAttributes()]; for (int i = 0; i < data.numAttributes(); i++) { if (i != classIndex) { m_ChiSquareds[i] = ContingencyTables.chiVal(ContingencyTables.reduceMatrix(counts[i]), false); } } }
From source file:classifier.CustomStringToWordVector.java
License:Open Source License
/** * Signify that this batch of input to the filter is finished. If the filter * requires all instances prior to filtering, output() may now be called to * retrieve the filtered instances.// w ww . ja va2 s .c o m * * @return true if there are instances pending output. * @throws IllegalStateException * if no input structure has been defined. */ public boolean batchFinished() throws Exception { if (getInputFormat() == null) { throw new IllegalStateException("No input instance format defined"); } // We only need to do something in this method // if the first batch hasn't been processed. Otherwise // input() has already done all the work. if (!isFirstBatchDone()) { // Determine the dictionary from the first batch (training data) determineDictionary(); // Convert all instances w/o normalization FastVector fv = new FastVector(); int firstCopy = 0; for (int i = 0; i < m_NumInstances; i++) { firstCopy = convertInstancewoDocNorm(getInputFormat().instance(i), fv); } // Need to compute average document length if necessary if (m_filterType != FILTER_NONE) { m_AvgDocLength = 0; for (int i = 0; i < fv.size(); i++) { Instance inst = (Instance) fv.elementAt(i); double docLength = 0; for (int j = 0; j < inst.numValues(); j++) { if (inst.index(j) >= firstCopy) { docLength += inst.valueSparse(j) * inst.valueSparse(j); } } m_AvgDocLength += Math.sqrt(docLength); } m_AvgDocLength /= m_NumInstances; } // Perform normalization if necessary. if (m_filterType == FILTER_NORMALIZE_ALL) { for (int i = 0; i < fv.size(); i++) { normalizeInstance((Instance) fv.elementAt(i), firstCopy); } } // Push all instances into the output queue for (int i = 0; i < fv.size(); i++) { push((Instance) fv.elementAt(i)); } } // Flush the input flushInput(); m_NewBatch = true; m_FirstBatchDone = true; return (numPendingOutput() != 0); }
From source file:classifier.CustomStringToWordVector.java
License:Open Source License
/** * Normalizes given instance to average doc length (only the newly * constructed attributes)./*from www . j a v a 2 s . c o m*/ * * @param inst * the instance to normalize * @param firstCopy * @throws Exception * if avg. doc length not set */ private void normalizeInstance(Instance inst, int firstCopy) throws Exception { double docLength = 0; if (m_AvgDocLength < 0) { throw new Exception("Average document length not set."); } // Compute length of document vector for (int j = 0; j < inst.numValues(); j++) { if (inst.index(j) >= firstCopy) { docLength += inst.valueSparse(j) * inst.valueSparse(j); } } docLength = Math.sqrt(docLength); // Normalize document vector for (int j = 0; j < inst.numValues(); j++) { if (inst.index(j) >= firstCopy) { double val = inst.valueSparse(j) * m_AvgDocLength / docLength; inst.setValueSparse(j, val); if (val == 0) { System.err.println("setting value " + inst.index(j) + " to zero."); j--; } } } }
From source file:cluster.ABC.ClusterUtils.java
License:Open Source License
/** Normalizes the values of a SparseInstance in L2 norm * * @author Sugato Basu/* www . ja va 2 s. c o m*/ * @param inst SparseInstance to be normalized */ public static void normalizeSparseInstance(Instance inst) throws Exception { double norm = 0; int length = inst.numValues(); if (!(inst instanceof SparseInstance)) { System.err.println("Not SparseInstance, using normalizeInstance function instead"); normalizeInstance(inst); } for (int i = 0; i < length; i++) { if (inst.index(i) != inst.classIndex()) { // don't normalize the class index norm += inst.valueSparse(i) * inst.valueSparse(i); } } norm = Math.sqrt(norm); for (int i = 0; i < length; i++) { // don't normalize the class index if (inst.index(i) != inst.classIndex()) { inst.setValueSparse(i, inst.valueSparse(i) / norm); } } }
From source file:cluster.ABC.ClusterUtils.java
License:Open Source License
/** This function divides every attribute value in an instance by * the instance weight -- useful to find the mean of a cluster in * Euclidean space /*from w ww . ja va2s . c o m*/ * @param inst Instance passed in for normalization (destructive update) */ public static void normalizeByWeight(Instance inst) { double weight = inst.weight(); if (inst instanceof SparseInstance) { for (int i = 0; i < inst.numValues(); i++) { inst.setValueSparse(i, inst.valueSparse(i) / weight); } } else if (!(inst instanceof SparseInstance)) { for (int i = 0; i < inst.numAttributes(); i++) { inst.setValue(i, inst.value(i) / weight); } } }
From source file:cluster.ABC.ClusterUtils.java
License:Open Source License
/** Finds sum of 2 instances (handles sparse and non-sparse) *///from ww w. java2s. c o m public static Instance sumInstances(Instance inst1, Instance inst2, Instances m_Instances) throws Exception { int numAttributes = inst1.numAttributes(); if (inst2.numAttributes() != numAttributes) { throw new Exception("Error!! inst1 and inst2 should have same number of attributes."); } double weight1 = inst1.weight(), weight2 = inst2.weight(); double[] values = new double[numAttributes]; for (int i = 0; i < numAttributes; i++) { values[i] = 0; } if (inst1 instanceof SparseInstance && inst2 instanceof SparseInstance) { for (int i = 0; i < inst1.numValues(); i++) { int indexOfIndex = inst1.index(i); values[indexOfIndex] = inst1.valueSparse(i); } for (int i = 0; i < inst2.numValues(); i++) { int indexOfIndex = inst2.index(i); values[indexOfIndex] += inst2.valueSparse(i); } SparseInstance newInst = new SparseInstance(weight1 + weight2, values); newInst.setDataset(m_Instances); return newInst; } else if (!(inst1 instanceof SparseInstance) && !(inst2 instanceof SparseInstance)) { for (int i = 0; i < numAttributes; i++) { values[i] = inst1.value(i) + inst2.value(i); } } else { throw new Exception("Error!! inst1 and inst2 should be both of same type -- sparse or non-sparse"); } Instance newInst = new Instance(weight1 + weight2, values); newInst.setDataset(m_Instances); return newInst; }
From source file:cn.edu.xjtu.dbmine.StringToWordVector.java
License:Open Source License
/** * Normalizes given instance to average doc length (only the newly * constructed attributes)./*w w w . j a v a 2 s .com*/ * * @param inst * the instance to normalize * @param firstCopy * @throws Exception * if avg. doc length not set */ private void normalizeInstance(Instance inst, int firstCopy) throws Exception { double docLength = 0; if (m_AvgDocLength < 0) { throw new Exception("Average document length not set."); } // Compute length of document vector for (int j = 0; j < inst.numValues(); j++) { if (inst.index(j) >= firstCopy) { docLength += inst.valueSparse(j) * inst.valueSparse(j); } } docLength = Math.sqrt(docLength); // Normalize document vector for (int j = 0; j < inst.numValues(); j++) { if (inst.index(j) >= firstCopy) { double val = inst.valueSparse(j) * m_AvgDocLength / docLength; inst.setValueSparse(j, val); if (val == 0) { System.err.println("setting value " + inst.index(j) + " to zero."); j--; } } } }