List of usage examples for weka.core Instances classIndex
publicint classIndex()
From source file:edu.oregonstate.eecs.mcplan.abstraction.WekaUtil.java
License:Open Source License
public static Pair<ArrayList<double[]>, int[]> splitLabels(final Instances train) { assert (train.classAttribute() != null); final ArrayList<double[]> X = new ArrayList<double[]>(); final int[] Y = new int[train.size()]; for (int i = 0; i < train.size(); ++i) { final Instance inst = train.get(i); final double[] x = new double[train.numAttributes() - 1]; int idx = 0; for (int j = 0; j < train.numAttributes(); ++j) { if (j == train.classIndex()) { Y[i] = (int) inst.classValue(); } else { x[idx++] = inst.value(j); }// w w w.ja v a2 s . co m } X.add(x); } return Pair.makePair(X, Y); }
From source file:edu.oregonstate.eecs.mcplan.abstraction.WekaUtil.java
License:Open Source License
public static Instances allPairwiseProducts(final Instances single, final boolean reflexive, final boolean symmetric) { final int c = single.classIndex(); System.out.println("Class attribute = " + c); final ArrayList<Attribute> pair_attributes = new ArrayList<Attribute>(); for (int i = 0; i < single.numAttributes(); ++i) { if (i == c) { continue; }//from ww w . j ava2s . com final Attribute ai = single.attribute(i); final int j0 = (symmetric ? 0 : i); for (int j = j0; j < single.numAttributes(); ++j) { if (j == c) { continue; } if (!reflexive && i == j) { continue; } final Attribute aj = single.attribute(j); final String name = ai.name() + "_x_" + aj.name(); pair_attributes.add(new Attribute(name)); } } String pair_name = single.relationName(); pair_name += "_x"; if (reflexive) { pair_name += "r"; } if (symmetric) { pair_name += "s"; } pair_name += "_"; pair_name += single.relationName(); final Instances result = new Instances(pair_name, pair_attributes, 0); for (final Instance inst : single) { final double[] xp = new double[pair_attributes.size()]; int idx = 0; for (int i = 0; i < single.numAttributes(); ++i) { if (i == c) { continue; } final double xi = inst.value(i); final int j0 = (symmetric ? 0 : i); for (int j = j0; j < single.numAttributes(); ++j) { if (j == c) { continue; } if (!reflexive && i == j) { continue; } final double xj = inst.value(j); xp[idx++] = xi * xj; } } WekaUtil.addInstance(result, new DenseInstance(inst.weight(), xp)); } return result; }
From source file:edu.oregonstate.eecs.mcplan.ml.Memorizer.java
License:Open Source License
@Override public void buildClassifier(final Instances data) throws Exception { Nclasses_ = data.numClasses();/* w w w . j a v a2 s . c o m*/ class_idx_ = data.classIndex(); for (final Instance i : data) { final double[] x = i.toDoubleArray(); final int c = (int) x[class_idx_]; x[class_idx_] = 0; m_.put(new ArrayHolder(x), c); } }
From source file:edu.oregonstate.eecs.mcplan.ml.WekaGlue.java
License:Open Source License
public static SequentialProjectionHashLearner createSequentialProjectionHashLearner(final RandomGenerator rng, final Instances labeled, final Instances unlabeled, final int K, final double eta, final double alpha) { assert (labeled.classIndex() >= 0); final int Nfeatures = labeled.numAttributes() - 1; final RealMatrix X = new Array2DRowRealMatrix(Nfeatures, labeled.size() + unlabeled.size()); final RealMatrix XL = new Array2DRowRealMatrix(Nfeatures, labeled.size() * 2); final RealMatrix S = new Array2DRowRealMatrix(XL.getColumnDimension(), XL.getColumnDimension()); for (int j = 0; j < labeled.size(); ++j) { final Instance inst = labeled.get(j); for (int i = 0; i < XL.getRowDimension(); ++i) { X.setEntry(i, j, inst.value(i)); XL.setEntry(i, j, inst.value(i)); }/*from www . j a v a2 s. c o m*/ int sj = -1; Instance s = null; do { sj = rng.nextInt(labeled.size()); s = labeled.get(sj); } while (s == inst || s.classValue() != inst.classValue()); S.setEntry(j, sj, 1); int dj = -1; Instance d = null; do { dj = rng.nextInt(labeled.size()); d = labeled.get(dj); } while (d == inst || d.classValue() == inst.classValue()); S.setEntry(j, dj, -1); } for (int j = 0; j < unlabeled.size(); ++j) { final Instance inst = unlabeled.get(j); for (int i = 0; i < X.getRowDimension(); ++i) { X.setEntry(i, labeled.size() + j, inst.value(i)); } } return new SequentialProjectionHashLearner(X, XL, S, K, eta, alpha); }
From source file:edu.uga.cs.fluxbuster.classification.Classifier.java
License:Open Source License
/** * Executes the classifier./* w w w . ja v a 2s. com*/ * * @param prepfeatures the prepared features in arff format * @param modelfile the path to the serialized model * @param clusters the clusters to classify * @return a map of the classified clusters, the keys are the classes * and the values are lists of cluster id's belonging to those classes */ private Map<ClusterClass, List<StoredDomainCluster>> executeClassifier(String prepfeatures, String modelfile, List<StoredDomainCluster> clusters) { Map<ClusterClass, List<StoredDomainCluster>> retval = new HashMap<ClusterClass, List<StoredDomainCluster>>(); try { DataSource source = new DataSource(new ByteArrayInputStream(prepfeatures.getBytes())); Instances data = source.getDataSet(); if (data.classIndex() == -1) { data.setClassIndex(data.numAttributes() - 1); } String[] options = weka.core.Utils.splitOptions("-p 0"); J48 cls = (J48) weka.core.SerializationHelper.read(modelfile); cls.setOptions(options); for (int i = 0; i < data.numInstances(); i++) { double pred = cls.classifyInstance(data.instance(i)); ClusterClass clusClass = ClusterClass .valueOf(data.classAttribute().value((int) pred).toUpperCase()); if (!retval.containsKey(clusClass)) { retval.put(clusClass, new ArrayList<StoredDomainCluster>()); } retval.get(clusClass).add(clusters.get(i)); } } catch (Exception e) { if (log.isErrorEnabled()) { log.error("Error executing classifier.", e); } } return retval; }
From source file:edu.umbc.cs.maple.utils.WekaUtils.java
License:Open Source License
/** Converts the instances in the given dataset to binary, setting the specified labels to positive. * Note this method is destructive to data, directly modifying its contents. * @param data the multiclass dataset to be converted to binary. * @param positiveClassValue the class value to treat as positive. *//*from ww w .ja va 2s . co m*/ public static void convertMulticlassToBinary(Instances data, String positiveClassValue) { // ensure that data is nominal if (!data.classAttribute().isNominal()) throw new IllegalArgumentException("Instances must have a nominal class."); // create the new class attribute FastVector newClasses = new FastVector(2); newClasses.addElement("Y"); newClasses.addElement("N"); Attribute newClassAttribute = new Attribute("class", newClasses); // alter the class attribute to be binary int newClassAttIdx = data.classIndex(); data.insertAttributeAt(newClassAttribute, newClassAttIdx); int classAttIdx = data.classIndex(); // set the instances classes to be binary, with the labels [Y,N] (indices 0 and 1 respectively) int numInstances = data.numInstances(); for (int instIdx = 0; instIdx < numInstances; instIdx++) { Instance inst = data.instance(instIdx); if (inst.stringValue(classAttIdx).equals(positiveClassValue)) { inst.setValue(newClassAttIdx, 0); // set it to the first class, which will be Y } else { inst.setValue(newClassAttIdx, 1); // set it to the second class, which will be 0 } } // switch the class index to the new class and delete the old class data.setClassIndex(newClassAttIdx); data.deleteAttributeAt(classAttIdx); // alter the dataset name data.setRelationName(data.relationName() + "-" + positiveClassValue); }
From source file:edu.umbc.cs.maple.utils.WekaUtils.java
License:Open Source License
/** Converts a set of instances to svm-light format * @param data the weka instances/* w w w . ja va 2s . c o m*/ * @return the weka instances in svm-light format */ public static String arffToSVMLight(Instances data, SVMLightLabelFormat labelFormat) { if (labelFormat == SVMLightLabelFormat.CLASSIFICATION && data.numClasses() != 2) { throw new IllegalArgumentException( "SVM-light classification label format requires that the data contain only two classes."); } String str = ""; String endline = System.getProperty("line.separator"); int numInstances = data.numInstances(); int numAttributes = data.numAttributes(); int classAttIdx = data.classIndex(); for (int instIdx = 0; instIdx < numInstances; instIdx++) { Instance inst = data.instance(instIdx); // convert the instance label if (labelFormat == SVMLightLabelFormat.CLASSIFICATION) { str += (inst.classValue() == 0) ? "-1" : "1"; } else { str += inst.classValue(); } str += " "; // convert each feature for (int attIdx = 0; attIdx < numAttributes; attIdx++) { // skip the class attribute if (attIdx == classAttIdx) continue; str += (attIdx + 1) + ":" + inst.value(attIdx) + " "; } // append the instance info string str += "# " + instIdx; str += endline; } return str; }
From source file:entity.DifficultyResamplingManager.java
License:Open Source License
/** * Return max dimensions of subdataset for a PR (total, p, n) * @param originalDataset//w ww.ja v a2s . c om * @param positiveExamplePercentProportion * @return */ public SubdatasetDimensions calculateSubdatasetDimensionsForProportion(Instances originalDataset, BigDecimal positiveExamplePercentProportion) { // size of subdataset, initialized to original size int total = originalDataset.numInstances(); // number of positive instances int p = 0; // number of negative instances int n = 0; // current PR int pp = 0; // count positives for (int i = 0; i < total; i++) { if (originalDataset.instance(i).stringValue(originalDataset.classIndex()).equals(Settings.buggyLabel)) { p++; } } n = total - p; // finds actual PR pp = calculatePositivePercentCeil(p + n, p); if (verbose) System.out.println( "[DifficultyResamplingManager , calculateSubdatasetDimensionsForProportion] attuale: p=" + p + " n=" + n + " pp = " + pp); // if current PR equals desired one, return current dimensions if (pp == positiveExamplePercentProportion.intValue()) return new SubdatasetDimensions(p, n); // if current PR is greater than the desired one // decrements p until ceiling of current PR is greater than the desired one if (pp > positiveExamplePercentProportion.intValue()) { while (pp > positiveExamplePercentProportion.intValue()) { p--; pp = calculatePositivePercentCeil(p + n, p); if (verbose) System.out .println("[DifficultyResamplingManager , calculateSubdatasetDimensionsForProportion] p=" + p + " n=" + n + " pp = " + pp); } // goes back if the previous PR was "nearer" to the desired than the current one if (isPPPNearerThanPPToDesiredPercent(calculatePositivePercentCeil(p + 1 + n, p + 1), pp, positiveExamplePercentProportion.intValue())) { p++; pp = calculatePositivePercentCeil(p + n, p); } } // if current PR is less than the desired one // decrements n until ceiling of current PR is less than the desired one if (pp < positiveExamplePercentProportion.intValue()) { while (pp < positiveExamplePercentProportion.intValue()) { n--; pp = calculatePositivePercentCeil(p + n, p); if (verbose) System.out .println("[DifficultyResamplingManager , calculateSubdatasetDimensionsForProportion] p=" + p + " n=" + n + " pp = " + pp); } // goes back if the previous PR was "nearer" to the desired than the current one if (isPPPNearerThanPPToDesiredPercent(calculatePositivePercentCeil(p + n + 1, p), pp, positiveExamplePercentProportion.intValue())) { n++; pp = calculatePositivePercentCeil(p + n, p); } } if (verbose) System.out .println("[DifficultyResamplingManager , calculateSubdatasetDimensionsForProportion] finale p=" + p + " n=" + n + " pp = " + pp); return new SubdatasetDimensions(p, n); }
From source file:entity.DifficultyResamplingManager.java
License:Open Source License
/** * called by generateResampledSubdataset * /*from w ww . ja va 2 s. co m*/ * @param originalDataset * @param subdatasetDimensions * @return */ private Instances generateResampledSubdataset(Instances originalDataset, SubdatasetDimensions subdatasetDimensions) { // creates an empty dataset Instances resampledSubdataset = new Instances(originalDataset); resampledSubdataset.delete(); // randomize dataset instances order originalDataset.randomize(RandomizationManager.randomGenerator); // calc number of positives to insert int positivesToInsert = subdatasetDimensions.getP(); if (verbose) System.out.println("[DifficultyResamplingManager, generateResampledSubdataset] positivesToInsert = " + positivesToInsert); // calc number of negatives to insert int negativesToInsert = subdatasetDimensions.getN(); // iterates over the original dataset instances for (int i = 0; i < originalDataset.numInstances(); i++) { // if instance is positive and more are needed in the new dataset, inserts into new dataset if ((positivesToInsert > 0) && (originalDataset.instance(i).stringValue(originalDataset.classIndex()) .equals(Settings.buggyLabel))) { resampledSubdataset.add(originalDataset.instance(i)); positivesToInsert--; } // if instance is negative and more are needed in the new dataset, inserts into new dataset if ((negativesToInsert > 0) && (originalDataset.instance(i).stringValue(originalDataset.classIndex()) .equals(Settings.nonbuggyLabel))) { resampledSubdataset.add(originalDataset.instance(i)); negativesToInsert--; } } if (verbose) System.out.println("[DifficultyResamplingManager, generateResampledSubdataset] resampling terminato: " + this.printDatasetInfo(resampledSubdataset)); return resampledSubdataset; }
From source file:entity.DifficultyResamplingManager.java
License:Open Source License
/** * prints number of posive and negative instances and respective percentaghes * @param dataset/*from ww w .java 2s .c o m*/ * @return */ public String printDatasetInfo(Instances dataset) { int positives = 0; int negatives = 0; for (int i = 0; i < dataset.numInstances(); i++) { if (dataset.instance(i).stringValue(dataset.classIndex()).equals(Settings.buggyLabel)) { positives++; } if (dataset.instance(i).stringValue(dataset.classIndex()).equals(Settings.nonbuggyLabel)) { negatives++; } } double percent = ((double) positives / (double) dataset.numInstances()) * 100; return new String("totale istanze: " + dataset.numInstances() + ", p+n=" + (positives + negatives) + ", p: " + positives + ", n: " + negatives + ", %p : " + percent); }