List of usage examples for weka.core Instance numAttributes
public int numAttributes();
From source file:kea.NumbersFilter.java
License:Open Source License
/** * Converts an instance. A phrase boundary is inserted where * a number is found./*from www. jav a 2 s.c o m*/ */ private void convertInstance(Instance instance) throws Exception { double[] instVals = new double[instance.numAttributes()]; for (int i = 0; i < instance.numAttributes(); i++) { if ((!instance.attribute(i).isString()) || instance.isMissing(i)) { instVals[i] = instance.value(i); } else { String str = instance.stringValue(i); StringBuffer resultStr = new StringBuffer(); StringTokenizer tok = new StringTokenizer(str, " \t\n", true); while (tok.hasMoreTokens()) { String token = tok.nextToken(); // Everything that doesn't contain at least // one letter is considered to be a number boolean isNumber = true; for (int j = 0; j < token.length(); j++) { if (Character.isLetter(token.charAt(j))) { isNumber = false; break; } } if (!isNumber) { resultStr.append(token); } else { if (token.equals(" ") || token.equals("\t") || token.equals("\n")) { resultStr.append(token); } else { resultStr.append(" \n "); } } } int index = getOutputFormat().attribute(i).addStringValue(resultStr.toString()); instVals[i] = (double) index; } } Instance inst = new Instance(instance.weight(), instVals); inst.setDataset(getOutputFormat()); push(inst); }
From source file:knn.KNNClassifier.java
double EuclideanDistance(Instance instanceLHS, Instance instanceRHS) { // set dist to 0 double distance = 0; // from index 0 to left and right side's number of attributes - 1... for (int i = 0; i < instanceLHS.numAttributes() - 1 && i < instanceRHS.numAttributes() - 1; i++) { // if left and right side's attributes are numeric, set the distance equal // to the value of left value - right value all squared if (instanceLHS.attribute(i).isNumeric() && instanceRHS.attribute(i).isNumeric()) { distance += pow(instanceLHS.value(i) - instanceRHS.value(i), 2); } else {//from w w w .j a v a2 s. c o m // else add 5 to the distance unless left and right side's string converted // values are equal to one another, in which case set dist back to zero if (instanceLHS.stringValue(i).equals(instanceRHS.stringValue(i))) { distance += 0; } distance += 5; } } return distance; }
From source file:knn.KNNClassifier.java
double ManhattenDistance(Instance instanceLHS, Instance instanceRHS) { // set dist to 0 double distance = 0; // from index 0 to number of attributes - 1 on both sides... for (int i = 0; i < instanceLHS.numAttributes() - 1 && i < instanceRHS.numAttributes() - 1; i++) { // if left and right side's attributes are numbers set distance equal // to absolute value of left's value - right's value if (instanceLHS.attribute(i).isNumeric() && instanceRHS.attribute(i).isNumeric()) { distance += abs(instanceLHS.value(i) - instanceRHS.value(i)); } else {//from w ww .j av a2s. c om // else add 5 to distance unless left and right are equal, in which // case set the distance back to 0 if (instanceLHS.stringValue(i).equals(instanceRHS.stringValue(i))) { distance = 0; } distance += 5; } } return distance; }
From source file:knnclassifier.KNNClassifier.java
double EuclideanDistance(Instance instanceLHS, Instance instanceRHS) { double distance = 0; for (int i = 0; i < instanceLHS.numAttributes() - 1 && i < instanceRHS.numAttributes() - 1; i++) { if (instanceLHS.attribute(i).isNumeric() && instanceRHS.attribute(i).isNumeric()) { distance += pow(instanceLHS.value(i) - instanceRHS.value(i), 2); } else {//from w ww. j a v a2 s .c o m if (instanceLHS.stringValue(i).equals(instanceRHS.stringValue(i))) { distance += 0; } distance += 5; } } return distance; }
From source file:knnclassifier.KNNClassifier.java
double ManhattenDistance(Instance instanceLHS, Instance instanceRHS) { double distance = 0; for (int i = 0; i < instanceLHS.numAttributes() - 1 && i < instanceRHS.numAttributes() - 1; i++) { if (instanceLHS.attribute(i).isNumeric() && instanceRHS.attribute(i).isNumeric()) { distance += abs(instanceLHS.value(i) - instanceRHS.value(i)); } else {//ww w . j a v a 2 s . c o m if (instanceLHS.stringValue(i).equals(instanceRHS.stringValue(i))) { distance = 0; } distance += 5; } } return distance; }
From source file:lineage.AAFClusterer.java
License:Open Source License
/** * K-Means Clustering//from w w w.ja va 2 s. c o m * @param data - matrix of observations (numObs x numFeatures) * @param k - number of clusters */ public Cluster[] kmeans(double[][] data, int numObs, int numFeatures, int k) { Instances ds = convertMatrixToWeka(data, numObs, numFeatures); // uses Euclidean distance by default SimpleKMeans clusterer = new SimpleKMeans(); try { clusterer.setPreserveInstancesOrder(true); clusterer.setNumClusters(k); clusterer.buildClusterer(ds); // cluster centers Instances centers = clusterer.getClusterCentroids(); Cluster[] clusters = new Cluster[centers.numInstances()]; for (int i = 0; i < centers.numInstances(); i++) { Instance inst = centers.instance(i); double[] mean = new double[inst.numAttributes()]; for (int j = 0; j < mean.length; j++) { mean[j] = inst.value(j); } clusters[i] = new Cluster(mean, i); } // cluster members int[] assignments = clusterer.getAssignments(); for (int i = 0; i < assignments.length; i++) { clusters[assignments[i]].addMember(i); } return clusters; } catch (Exception e) { e.printStackTrace(); System.exit(-1); return null; } }
From source file:lu.lippmann.cdb.common.gui.dataset.InstanceFormatter.java
License:Open Source License
public static String htmlFormat(final Instance inst, final boolean withHTMLHeader) { final StringBuilder sb = new StringBuilder(); if (withHTMLHeader) sb.append("<html><body>"); for (int i = 0; i < inst.numAttributes(); i++) { sb.append(StringEscapeUtils.escapeHtml(inst.attribute(i).name())).append(" = "); sb.append("<b>"); if (inst.attribute(i).isNominal() || inst.attribute(i).isString()) { sb.append(StringEscapeUtils.escapeHtml(inst.stringValue(i))); } else if (inst.attribute(i).isDate()) { final Calendar cal = Calendar.getInstance(); cal.setTimeInMillis((long) inst.value(i)); sb.append(FormatterUtil.DATE_FORMAT.format(cal.getTime())); } else if (inst.attribute(i).isNumeric()) { sb.append(inst.value(i));//from w ww .j a v a 2s . c o m } sb.append("</b>"); sb.append("<br/>"); } if (withHTMLHeader) sb.append("</body></html>"); return sb.toString(); }
From source file:lu.lippmann.cdb.lab.beta.shih.Shih2010.java
License:Open Source License
/** * /*from w ww . j av a 2 s . co m*/ * @return */ public Instances getModifiedInstances() { //Copy attribute list (and change categorical by numerical) final ArrayList<Attribute> lAttrs = new ArrayList<Attribute>(); for (int i = 0; i < instances.numAttributes(); i++) { Attribute attr = instances.attribute(i); if (attr.isNumeric() || attr.index() == instances.classIndex()) { lAttrs.add(attr); } else { Attribute newAttr = new Attribute(attr.name()); lAttrs.add(newAttr); } } //Build new instance final Instances newInstances = new Instances("Shih instance", lAttrs, instances.numInstances()); newInstances.setClassIndex(instances.classIndex()); for (int i = 0; i < instances.numInstances(); i++) { final Instance instance = instances.instance(i); final Instance cpyInstance = (Instance) instance.copy(); for (int j = 0; j < instance.numAttributes(); j++) { Attribute attribute = instance.attribute(j); int k = 0; if (attribute.index() == instances.classIndex()) { //The class index is nominal cpyInstance.setValue(attribute, instance.stringValue(j)); } else if (!attribute.isNumeric()) { String elt = attribute.value((int) instance.value(j)); cpyInstance.setValue(attribute, F.get(new TupleSI(elt, j))); } else { if (maxNum[k] > 1) { cpyInstance.setValue(attribute, instance.value(j) / maxNum[k]); } k++; } } newInstances.add(cpyInstance); } if (ignoreClass && instances.classIndex() != -1) { newInstances.deleteAttributeAt(instances.classIndex()); } return newInstances; }
From source file:lu.lippmann.cdb.lab.beta.util.WekaUtil2.java
License:Open Source License
/** * Generate the centroid coordinates based * on it's members (objects assigned to the cluster of the centroid) and the distance * function being used./*from ww w . j a v a 2 s. com*/ * @return the centroid */ public static MixedCentroid computeMixedCentroid(final boolean preserveOrder, final NormalizableDistance distanceFunction, final Instances numericInstances, final Instances originalInstances, final int clusterIndex) { final int numInstances = numericInstances.numInstances(); final int numAttributes = numericInstances.numAttributes(); final Map<TupleSI, Integer> addedAttr = new HashMap<TupleSI, Integer>(); if (numInstances == 1) { Instance uniqueNumInstance = numericInstances.firstInstance(); Instance uniqueMixInstance = originalInstances.firstInstance(); double[] centroid = uniqueNumInstance.toDoubleArray(); for (int i = 0; i < uniqueMixInstance.numAttributes(); i++) { if (!uniqueMixInstance.attribute(i).isNumeric()) { final String catVal = uniqueMixInstance.attribute(i).value((int) uniqueMixInstance.value(i)); addedAttr.put(new TupleSI(catVal, i), 1); } } return new MixedCentroid(clusterIndex, centroid, addedAttr); } final double[] vals = new double[numAttributes]; //used only for Manhattan Distance Instances sortedMembers = null; int middle = 0; boolean dataIsEven = false; final boolean isManhattanDist = (distanceFunction instanceof ManhattanDistance); final boolean isEuclideanDist = (distanceFunction instanceof EuclideanDistance); if (isManhattanDist) { middle = (numInstances - 1) / 2; dataIsEven = ((numInstances % 2) == 0); if (preserveOrder) { sortedMembers = numericInstances; } else { sortedMembers = new Instances(numericInstances); } } for (int j = 0; j < numAttributes; j++) { //in case of Euclidian distance the centroid is the mean point //in case of Manhattan distance the centroid is the median point //in both cases, if the attribute is nominal, the centroid is the mode if (isEuclideanDist) { vals[j] = numericInstances.meanOrMode(j); for (int i = 0; i < numInstances; i++) { if (!originalInstances.attribute(j).isNumeric()) { final Instance instance = originalInstances.instance(i); final String catVal = instance.attribute(j).value((int) instance.value(j)); //Initialize map final TupleSI key = new TupleSI(catVal, j); if (!addedAttr.containsKey(key)) addedAttr.put(key, 0); addedAttr.put(key, addedAttr.get(key) + 1); } } } else if (isManhattanDist) { sortedMembers.kthSmallestValue(j, middle + 1); vals[j] = sortedMembers.instance(middle).value(j); if (dataIsEven) { sortedMembers.kthSmallestValue(j, middle + 2); vals[j] = (vals[j] + sortedMembers.instance(middle + 1).value(j)) / 2; } } else { throw new IllegalStateException("Not handled distance ..."); } } return new MixedCentroid(clusterIndex, vals, addedAttr); }
From source file:machinelearningcw.EnhancedLinearPerceptron.java
@Override public double classifyInstance(Instance instnc) throws Exception { double y = 0; //create a new instance so it doesnt change the orginal dataset Instance newInstance = new DenseInstance(instnc); if (setStandardiseAttributes) { standardizeAtrrbutes(newInstance); }//from ww w. j a v a2 s .c om for (int i = 0; i < newInstance.numAttributes() - 1; i++) { y += w[i] * (newInstance.value(i)); } return (y >= 0) ? 1 : 0; }