List of usage examples for weka.core Instances attributeToDoubleArray
publicdouble[] attributeToDoubleArray(int index)
From source file:meka.classifiers.multilabel.PLST.java
License:Open Source License
/** * The method to transform the labels into another set of latent labels, * typically a compression method is used, e.g., Boolean matrix decomposition * in the case of MLC-BMaD, or matrix multiplication based on SVD for PLST. * * @param D the instances to transform into new instances with transformed labels. The * Instances consist of features and original labels. * @return The resulting instances. Instances consist of features and transformed labels. *//*ww w . j a va2 s. co m*/ @Override public Instances transformLabels(Instances D) throws Exception { Instances features = this.extractPart(D, false); Instances labels = this.extractPart(D, true); Matrix labelMatrix = MatrixUtils.instancesToMatrix(labels); // first, lets do the preprocessing as in the original implementation double[] averages = new double[labels.numAttributes()]; for (int i = 0; i < labels.numAttributes(); i++) { double[] column = labels.attributeToDoubleArray(i); double sum = 0.0; for (int j = 0; j < column.length; j++) { if (column[j] == 1.0) { sum += 1.0; } else { sum += -1; // The algorithm needs 1/-1 coding, so let's // change the matrix here labelMatrix.set(j, i, -1.0); } } averages[i] = sum / column.length; } double[][] shiftMatrix = new double[1][labels.numAttributes()]; shiftMatrix[0] = averages; // remember shift for prediction this.m_Shift = new Matrix(shiftMatrix); double[][] shiftTrainMatrix = new double[labels.numInstances()][labels.numAttributes()]; for (int i = 0; i < labels.numInstances(); i++) { shiftTrainMatrix[i] = averages; } Matrix trainShift = new Matrix(shiftTrainMatrix); SingularValueDecomposition svd = new SingularValueDecomposition(labelMatrix.minus(trainShift)); // The paper uses U here, but the implementation by the authors uses V, so // we used V here too. m_v = svd.getV(); //remove columns so only size are left double[][] newArr = new double[m_v.getRowDimension()][this.getSize()]; for (int i = 0; i < newArr.length; i++) { for (int j = 0; j < newArr[i].length; j++) { newArr[i][j] = m_v.getArray()[i][j]; } } m_v = new Matrix(newArr); // now the multiplication (last step of the algorithm) Matrix compressed = MatrixUtils.instancesToMatrix(labels).times(this.m_v); // and transform it to Instances ArrayList<Attribute> attinfos = new ArrayList<Attribute>(); for (int i = 0; i < compressed.getColumnDimension(); i++) { Attribute att = new Attribute("att" + i); attinfos.add(att); } // create pattern instances (also used in prediction) note: this is a regression // problem now, labels are not binary this.m_PatternInstances = new Instances("compressedlabels", attinfos, compressed.getRowDimension()); // fill result Instances Instances result = Instances.mergeInstances(MatrixUtils.matrixToInstances(compressed, m_PatternInstances), features); result.setClassIndex(this.getSize()); return result; }
From source file:meka.core.MatrixUtils.java
License:Open Source License
/** * Helper method that transforma an Instances object to a Matrix object. * * @param inst The Instances to transform. * @return The resulting Matrix object. *//* w w w .j a v a 2 s.c o m*/ public static Matrix instancesToMatrix(Instances inst) { double[][] darr = new double[inst.numInstances()][inst.numAttributes()]; for (int i = 0; i < inst.numAttributes(); i++) { for (int j = 0; j < inst.attributeToDoubleArray(i).length; j++) { darr[j][i] = inst.attributeToDoubleArray(i)[j]; } } return new Matrix(darr); }
From source file:ml.dataprocess.CorrelationAttributeEval.java
License:Open Source License
/** * Initializes an information gain attribute evaluator. Replaces missing * values with means/modes; Deletes instances with missing class values. * /*from w w w. j a v a 2 s .c o m*/ * @param data set of instances serving as training data * @throws Exception if the evaluator has not been generated successfully */ @Override public void buildEvaluator(Instances data) throws Exception { data = new Instances(data); data.deleteWithMissingClass(); ReplaceMissingValues rmv = new ReplaceMissingValues(); rmv.setInputFormat(data); data = Filter.useFilter(data, rmv); int numClasses = data.classAttribute().numValues(); int classIndex = data.classIndex(); int numInstances = data.numInstances(); m_correlations = new double[data.numAttributes()]; /* * boolean hasNominals = false; boolean hasNumerics = false; */ List<Integer> numericIndexes = new ArrayList<Integer>(); List<Integer> nominalIndexes = new ArrayList<Integer>(); if (m_detailedOutput) { m_detailedOutputBuff = new StringBuffer(); } // TODO for instance weights (folded into computing weighted correlations) // add another dimension just before the last [2] (0 for 0/1 binary vector // and // 1 for corresponding instance weights for the 1's) double[][][] nomAtts = new double[data.numAttributes()][][]; for (int i = 0; i < data.numAttributes(); i++) { if (data.attribute(i).isNominal() && i != classIndex) { nomAtts[i] = new double[data.attribute(i).numValues()][data.numInstances()]; Arrays.fill(nomAtts[i][0], 1.0); // set zero index for this att to all // 1's nominalIndexes.add(i); } else if (data.attribute(i).isNumeric() && i != classIndex) { numericIndexes.add(i); } } // do the nominal attributes if (nominalIndexes.size() > 0) { for (int i = 0; i < data.numInstances(); i++) { Instance current = data.instance(i); for (int j = 0; j < current.numValues(); j++) { if (current.attribute(current.index(j)).isNominal() && current.index(j) != classIndex) { // Will need to check for zero in case this isn't a sparse // instance (unless we add 1 and subtract 1) nomAtts[current.index(j)][(int) current.valueSparse(j)][i] += 1; nomAtts[current.index(j)][0][i] -= 1; } } } } if (data.classAttribute().isNumeric()) { double[] classVals = data.attributeToDoubleArray(classIndex); // do the numeric attributes for (Integer i : numericIndexes) { double[] numAttVals = data.attributeToDoubleArray(i); m_correlations[i] = Utils.correlation(numAttVals, classVals, numAttVals.length); if (m_correlations[i] == 1.0) { // check for zero variance (useless numeric attribute) if (Utils.variance(numAttVals) == 0) { m_correlations[i] = 0; } } } // do the nominal attributes if (nominalIndexes.size() > 0) { // now compute the correlations for the binarized nominal attributes for (Integer i : nominalIndexes) { double sum = 0; double corr = 0; double sumCorr = 0; double sumForValue = 0; if (m_detailedOutput) { m_detailedOutputBuff.append("\n\n").append(data.attribute(i).name()); } for (int j = 0; j < data.attribute(i).numValues(); j++) { sumForValue = Utils.sum(nomAtts[i][j]); corr = Utils.correlation(nomAtts[i][j], classVals, classVals.length); // useless attribute - all instances have the same value if (sumForValue == numInstances || sumForValue == 0) { corr = 0; } if (corr < 0.0) { corr = -corr; } sumCorr += sumForValue * corr; sum += sumForValue; if (m_detailedOutput) { m_detailedOutputBuff.append("\n\t").append(data.attribute(i).value(j)).append(": "); m_detailedOutputBuff.append(Utils.doubleToString(corr, 6)); } } m_correlations[i] = (sum > 0) ? sumCorr / sum : 0; } } } else { // class is nominal // TODO extra dimension for storing instance weights too double[][] binarizedClasses = new double[data.classAttribute().numValues()][data.numInstances()]; // this is equal to the number of instances for all inst weights = 1 double[] classValCounts = new double[data.classAttribute().numValues()]; for (int i = 0; i < data.numInstances(); i++) { Instance current = data.instance(i); binarizedClasses[(int) current.classValue()][i] = 1; } for (int i = 0; i < data.classAttribute().numValues(); i++) { classValCounts[i] = Utils.sum(binarizedClasses[i]); } double sumClass = Utils.sum(classValCounts); // do numeric attributes first if (numericIndexes.size() > 0) { for (Integer i : numericIndexes) { double[] numAttVals = data.attributeToDoubleArray(i); double corr = 0; double sumCorr = 0; for (int j = 0; j < data.classAttribute().numValues(); j++) { corr = Utils.correlation(numAttVals, binarizedClasses[j], numAttVals.length); if (corr < 0.0) { corr = -corr; } if (corr == 1.0) { // check for zero variance (useless numeric attribute) if (Utils.variance(numAttVals) == 0) { corr = 0; } } sumCorr += classValCounts[j] * corr; } m_correlations[i] = sumCorr / sumClass; } } if (nominalIndexes.size() > 0) { for (Integer i : nominalIndexes) { if (m_detailedOutput) { m_detailedOutputBuff.append("\n\n").append(data.attribute(i).name()); } double sumForAtt = 0; double corrForAtt = 0; for (int j = 0; j < data.attribute(i).numValues(); j++) { double sumForValue = Utils.sum(nomAtts[i][j]); double corr = 0; double sumCorr = 0; double avgCorrForValue = 0; sumForAtt += sumForValue; for (int k = 0; k < numClasses; k++) { // corr between value j and class k corr = Utils.correlation(nomAtts[i][j], binarizedClasses[k], binarizedClasses[k].length); // useless attribute - all instances have the same value if (sumForValue == numInstances || sumForValue == 0) { corr = 0; } if (corr < 0.0) { corr = -corr; } sumCorr += classValCounts[k] * corr; } avgCorrForValue = sumCorr / sumClass; corrForAtt += sumForValue * avgCorrForValue; if (m_detailedOutput) { m_detailedOutputBuff.append("\n\t").append(data.attribute(i).value(j)).append(": "); m_detailedOutputBuff.append(Utils.doubleToString(avgCorrForValue, 6)); } } // the weighted average corr for att i as // a whole (wighted by value frequencies) m_correlations[i] = (sumForAtt > 0) ? corrForAtt / sumForAtt : 0; } } } if (m_detailedOutputBuff != null && m_detailedOutputBuff.length() > 0) { m_detailedOutputBuff.append("\n"); } }
From source file:mlda.attributes.AvgAbsoluteCorrelationBetweenNumericAttributes.java
License:Open Source License
/** * Calculate metric value/*from www. j a v a2 s . c om*/ * * @param mlData Multi-label dataset to which calculate the metric * @return Value of the metric */ public double calculate(MultiLabelInstances mlData) { Instances instances = mlData.getDataSet(); int numInstances = mlData.getNumInstances(); double res = 0.0; int count = 0; int[] featureIndices = mlData.getFeatureIndices(); Vector<Integer> numericFeatureIndices = new Vector<>(); for (int fIndex : featureIndices) { if (instances.attribute(fIndex).isNumeric()) { numericFeatureIndices.add(fIndex); } } if (numericFeatureIndices.size() <= 0) { return Double.NaN; } double[][] attributesToDoubleArray = new double[numericFeatureIndices.size()][numInstances]; for (int fIndex : numericFeatureIndices) { attributesToDoubleArray[fIndex] = instances.attributeToDoubleArray(fIndex); } for (int fIndex1 : numericFeatureIndices) { for (int fIndex2 = fIndex1 + 1; fIndex2 < numericFeatureIndices.size(); fIndex2++) { count++; res += Utils.correlation(attributesToDoubleArray[fIndex1], attributesToDoubleArray[fIndex2], numInstances); } } if (count > 0) { this.value = res / count; } else { this.value = Double.NaN; } //this.value = res/count; return value; }
From source file:mlda.attributes.ProportionNumericAttributesWithOutliers.java
License:Open Source License
/** * Calculate metric value// w w w. ja va 2s .c om * * @param mlData Multi-label dataset to which calculate the metric * @return Value of the metric */ public double calculate(MultiLabelInstances mlData) { Instances instances = mlData.getDataSet(); int nInstances = mlData.getNumInstances(); double alpha = 0.05; int numToTrimAtSide = (int) (nInstances * alpha / 2); int nNumeric = 0; int nOutliers = 0; Set<Attribute> attributeSet = mlData.getFeatureAttributes(); double variance, varianceTrimmed; double[] values; double[] trimmed = new double[nInstances - (numToTrimAtSide * 2)]; double ratio; for (Attribute att : attributeSet) { if (att.isNumeric()) { nNumeric++; variance = instances.variance(att); values = instances.attributeToDoubleArray(att.index()); Arrays.sort(values); for (int i = 0; i < trimmed.length; i++) { trimmed[i] = values[i + numToTrimAtSide]; } varianceTrimmed = Utils.variance(trimmed); ratio = varianceTrimmed / variance; if (ratio < 0.7) { nOutliers++; } } } if (nNumeric > 0) { this.value = ((double) nOutliers) / nNumeric; } else { this.value = Double.NaN; } return value; }
From source file:myJ48.MyJ48.java
public Instances NumericToNominalByThreshold(Instances numericSet, int idx_attribute, double threshold) throws Exception { double[] values; Instances NominalizedSet = new Instances(numericSet); //System.out.println("number of instances: " + NominalizedSet.numInstances()); values = numericSet.attributeToDoubleArray(idx_attribute); List<String> nominalValue = new ArrayList<String>(); nominalValue.add("low"); nominalValue.add("high"); Attribute nominalAttrib = new Attribute(numericSet.attribute(idx_attribute).name() + "_nominal", nominalValue);/*from w w w .j a v a 2 s. co m*/ NominalizedSet.insertAttributeAt(nominalAttrib, idx_attribute); for (int i = 0; i < values.length; i++) { if (values[i] <= threshold) { NominalizedSet.instance(i).setValue(idx_attribute, "low"); } else { NominalizedSet.instance(i).setValue(idx_attribute, "high"); } } String[] options = { "-R", String.valueOf(idx_attribute + 2) }; Filter remove = (Filter) Class.forName("weka.filters.unsupervised.attribute.Remove").newInstance(); ((OptionHandler) remove).setOptions(options); remove.setInputFormat(NominalizedSet); NominalizedSet = Filter.useFilter(NominalizedSet, remove); return NominalizedSet; }
From source file:org.opentox.jaqpot3.qsar.util.WekaInstancesProcess.java
License:Open Source License
private static double normedValue(Instances dataInst, int attributeIndex) { double[] attrValues = dataInst.attributeToDoubleArray(attributeIndex); StandardDeviation std = new StandardDeviation(); return std.evaluate(attrValues); }
From source file:org.sr.recognition.paleo.paleoNN.PaleoTrainer.java
License:BSD License
/** * Converts ARFF instances into a format readable by LibSVM and outputs it * to file/*from w ww . j a v a 2 s . com*/ * * @param filename * output file name * @param data * ARFF instances (the data) * @throws IOException * if output fails */ public static void libSVMToFile(String filename, Instances data) throws IOException { BufferedWriter writer = new BufferedWriter(new FileWriter(filename)); for (int i = 0; i < data.numInstances(); i++) { Instance inst = data.instance(i); double label = inst.value(inst.numValues() - 1); writer.write(label + "\t"); for (int a = 0; a < inst.numValues() - 1; a++) { double value = inst.value(a); if (Double.isInfinite(value) || Double.isNaN(value)) value = mean(data.attributeToDoubleArray(a)); writer.write((a + 1) + ":" + value); if (a < inst.numValues() - 2) writer.write(" "); } writer.newLine(); } }
From source file:org.uclab.mm.kcl.ddkat.datapreprocessor.MissingValueHandler.java
License:Apache License
/** * Method to replace the identified missing values. * * @throws Exception the exception// ww w. j a va 2s. co m */ public void replaceMissingValues() throws Exception { this.confirmationMessage = new ArrayList<String>(); Instances outputData; String inputFile = BASE_DIR + "OriginalDataSet.csv"; // load CSV file CSVLoader fileLoader = new CSVLoader(); fileLoader.setSource(new File(inputFile)); outputData = fileLoader.getDataSet(); int numInstances = outputData.numInstances(); int numAttributes = outputData.numAttributes(); final int NON_NUMERIC = -1; int[] m_AttributeIndices = null; Range m_Attributes = new Range("first-last"); // attributes must be numeric m_Attributes.setUpper(outputData.numAttributes() - 1); m_AttributeIndices = m_Attributes.getSelection(); for (int i = 0; i < m_AttributeIndices.length; i++) { // ignore class if (m_AttributeIndices[i] == outputData.classIndex()) { m_AttributeIndices[i] = NON_NUMERIC; continue; } // not numeric -> ignore it if (!outputData.attribute(m_AttributeIndices[i]).isNumeric()) m_AttributeIndices[i] = NON_NUMERIC; } double sum; int missingCounter; double attributeMean; // identify the missing values for (int attributeIndex = 0; attributeIndex < numAttributes; attributeIndex++) { // non-numeric attribute? if (m_AttributeIndices[attributeIndex] == NON_NUMERIC) { continue; } double tempArr[] = outputData.attributeToDoubleArray(attributeIndex); sum = 0; missingCounter = 0; for (int i = 0; i < tempArr.length; i++) { sum = sum + tempArr[i]; if (tempArr[i] == 0) missingCounter++; } attributeMean = sum / (numInstances - missingCounter); for (int instanceIndex = 0; instanceIndex < numInstances; instanceIndex++) { // replace the missing values with attribute mean values if (outputData.instance(instanceIndex).value(attributeIndex) == 0) { outputData.instance(instanceIndex).setValue(attributeIndex, attributeMean); } } } outputData.deleteAttributeAt(outputData.numAttributes() - 1); outputData.deleteAttributeAt(outputData.numAttributes() - 1); saveFilledData(inputFile, outputData); }
From source file:prismcrossvalidation.Preview.java
/** * method to write arff data into s.o.p. * @throws IOException //from w ww . j a v a2s.c o m */ public static void showData() throws IOException { String source = MainWindow.pathChooseField.getText(); Instances data = DataLoad.loadData(source.replace("\\", "/")); data.setClassIndex(data.numAttributes() - 1); String field = ""; for (int i = 0; i < data.numAttributes(); i++) { // Print the current attribute. System.out.print(data.attribute(i).name() + ": "); previewTextArea.append("\n" + data.attribute(i).name() + ": "); // Print the values associated with the current attribute. double[] values = data.attributeToDoubleArray(i); System.out.println(Arrays.toString(values)); previewTextArea.append(Arrays.toString(values)); } }