List of usage examples for weka.core DenseInstance DenseInstance
public DenseInstance(int numAttributes)
From source file:CopiaSeg3.java
public static void main(String[] args) throws Exception { BufferedReader datafile = readDataFile("breast-cancer-wisconsin.arff"); Instances data = new Instances(datafile); data.setClassIndex(data.numAttributes() - 1); // Elije el nmero de particiones para la valicacin (4 = 75% Train, 25% Test) Instances[] split = split(data, 4);//from w ww . j a v a2 s. c om // Separa los conjuntos en los arrays trainning y testing Instances trainingSplits = split[0]; Instances testingSplits = split[1]; // Elegir un conjunto de clasificadores Classifier[] models = { new MultilayerPerceptron() //, new J48 //, ... }; FastVector fvWekaAttributes = new FastVector(9); // Ejecutar cada clasificador for (int j = 0; j < models.length; j++) { // Collect every group of predictions for current model in a FastVector FastVector predictions = new FastVector(); // For each training-testing split pair, train and test the classifier Evaluation validation = simpleClassify(models[j], trainingSplits, testingSplits); predictions.appendElements(validation.predictions()); // Uncomment to see the summary for each training-testing pair. System.out.println(models[j].toString()); // Calculate overall accuracy of current classifier on all splits double accuracy = calculateAccuracy(predictions); // // Print current classifier's name and accuracy in a complicated, but nice-looking way. System.out.println(models[j].getClass().getSimpleName() + " Accuracy: " + String.format("%.2f%%", accuracy) + "\n====================="); // // // Step 4: use the classifier // // For real world applications, the actual use of the classifier is the ultimate goal. Heres the simplest way to achieve that. Lets say weve built an instance (named iUse) as explained in step 2: // // Specify that the instance belong to the training set // // in order to inherit from the set description Instance iUse = new DenseInstance(9); iUse.setValue((Attribute) predictions.elementAt(0), 4); iUse.setValue((Attribute) predictions.elementAt(1), 8); iUse.setValue((Attribute) predictions.elementAt(2), 8); iUse.setValue((Attribute) predictions.elementAt(3), 5); iUse.setValue((Attribute) predictions.elementAt(4), 4); iUse.setValue((Attribute) predictions.elementAt(5), 5); iUse.setValue((Attribute) predictions.elementAt(6), 10); iUse.setValue((Attribute) predictions.elementAt(7), 4); iUse.setValue((Attribute) predictions.elementAt(8), 1); iUse.setDataset(trainingSplits); // // // Get the likelihood of each classes // fDistribution[0] is the probability of being positive? // fDistribution[1] is the probability of being negative? double[] fDistribution = models[j].distributionForInstance(iUse); System.out.println("Probabilidad positivo: " + fDistribution[0]); System.out.println("Probabilidad negativo: " + fDistribution[1]); } }
From source file:PredictMention.java
protected void setTestData(String title, String description, String keywords) { testData = new Instances(trainedData); testData.clear();/* ww w . j av a 2s . c om*/ Instance inst = new DenseInstance(4); inst.setDataset(testData); inst.setValue(0, title); inst.setValue(1, description); inst.setValue(2, keywords); inst.setMissing(3); testData.add(inst); }
From source file:CJWeka.java
License:Open Source License
/** Convert a sting of floats separated by spaces into an Instance *//*from w ww.j a v a 2 s . c om*/ private Instance floatstringToInst(String floatvalues, Instances ii, boolean hasClass) { String[] flostr = floatvalues.split(" "); int nvals = flostr.length; Instance i = new DenseInstance(nvals); int j; if (hasClass) nvals--; for (j = 0; j < nvals; j++) { if (!flostr[j].equals("")) { Float f = new Float(flostr[j]); i.setValue(j, f); } } i.setDataset(ii); if (hasClass) { Attribute clsAttrib = ii.classAttribute(); //clsAttrib.addStringValue(flostr[j]); i.setValue(clsAttrib, flostr[j]); } return i; }
From source file:PCADetector.java
License:Apache License
public Instances getInstances() { int numAtts = m_oriDataMatrix.size(); if (numAtts < 0) return null; ArrayList<Attribute> atts = new ArrayList<Attribute>(numAtts); for (int att = 0; att < numAtts; att++) { atts.add(new Attribute(Integer.toString(att), att)); }//from ww w . j a va 2 s .c om int numInstances = m_oriDataMatrix.get(0).size(); if (numInstances <= 0) return null; Instances dataset = new Instances("MetricInstances", atts, numInstances); for (int inst = 0; inst < numInstances; inst++) { Instance newInst = new DenseInstance(numAtts); for (int att = 0; att < numAtts; att++) { newInst.setValue(att, m_oriDataMatrix.get(att).get(inst)); } dataset.add(newInst); } return dataset; }
From source file:adams.flow.transformer.WekaInstancesMerge.java
License:Open Source License
/** * Merges the datasets based on the collected IDs. * * @param orig the original datasets/*www .j a v a 2 s. com*/ * @param inst the processed datasets to merge into one * @param ids the IDs for identifying the rows * @return the merged dataset */ protected Instances merge(Instances[] orig, Instances[] inst, HashSet ids) { Instances result; ArrayList<Attribute> atts; int i; int n; int m; int index; String relation; List sortedIDs; Attribute att; int[] indexStart; double value; double[] values; HashMap<Integer, Integer> hashmap; HashSet<Instance> hs; // create header if (isLoggingEnabled()) getLogger().info("Creating merged header..."); atts = new ArrayList<>(); relation = ""; indexStart = new int[inst.length]; for (i = 0; i < inst.length; i++) { indexStart[i] = atts.size(); for (n = 0; n < inst[i].numAttributes(); n++) atts.add((Attribute) inst[i].attribute(n).copy()); // assemble relation name if (i > 0) relation += "_"; relation += inst[i].relationName(); } result = new Instances(relation, atts, ids.size()); // fill with missing values if (isLoggingEnabled()) getLogger().info("Filling with missing values..."); for (i = 0; i < ids.size(); i++) { if (isStopped()) return null; // progress if (isLoggingEnabled() && ((i + 1) % 1000 == 0)) getLogger().info("" + (i + 1)); result.add(new DenseInstance(result.numAttributes())); } // sort IDs if (isLoggingEnabled()) getLogger().info("Sorting indices..."); sortedIDs = new ArrayList(ids); Collections.sort(sortedIDs); // generate rows hashmap = new HashMap<>(); for (i = 0; i < inst.length; i++) { if (isStopped()) return null; if (isLoggingEnabled()) getLogger().info("Adding file #" + (i + 1)); att = orig[i].attribute(m_UniqueID); for (n = 0; n < inst[i].numInstances(); n++) { // progress if (isLoggingEnabled() && ((n + 1) % 1000 == 0)) getLogger().info("" + (n + 1)); // determine index of row if (m_AttType == Attribute.NUMERIC) index = Collections.binarySearch(sortedIDs, inst[i].instance(n).value(att)); else index = Collections.binarySearch(sortedIDs, inst[i].instance(n).stringValue(att)); if (index < 0) throw new IllegalStateException( "Failed to determine index for row #" + (n + 1) + " of dataset #" + (i + 1) + "!"); if (!hashmap.containsKey(index)) hashmap.put(index, 0); hashmap.put(index, hashmap.get(index) + 1); // use internal representation for faster access values = result.instance(index).toDoubleArray(); // add attribute values for (m = 0; m < inst[i].numAttributes(); m++) { // missing value? if (inst[i].instance(n).isMissing(m)) continue; switch (inst[i].attribute(m).type()) { case Attribute.NUMERIC: case Attribute.DATE: case Attribute.NOMINAL: values[indexStart[i] + m] = inst[i].instance(n).value(m); break; case Attribute.STRING: value = result.attribute(indexStart[i] + m) .addStringValue(inst[i].instance(n).stringValue(m)); values[indexStart[i] + m] = value; break; case Attribute.RELATIONAL: value = result.attribute(indexStart[i] + m) .addRelation(inst[i].instance(n).relationalValue(m)); values[indexStart[i] + m] = value; break; default: throw new IllegalStateException("Unhandled attribute type: " + inst[i].attribute(m).type()); } } // update row result.set(index, new DenseInstance(1.0, values)); } } if (getRemove()) { hs = new HashSet<>(); for (Integer x : hashmap.keySet()) { if (hashmap.get(x) != inst.length) hs.add(result.get(x)); } result.removeAll(hs); } return result; }
From source file:adams.ml.data.InstancesView.java
License:Open Source License
/** * Appends a row to the spreadsheet./*from ww w .j av a2s .c o m*/ * * @return the created row */ @Override public DataRow addRow() { DenseInstance inst; inst = new DenseInstance(getColumnCount()); inst.setDataset(m_Data); m_Data.add(inst); return new InstanceView(this, inst); }
From source file:adams.ml.data.InstancesView.java
License:Open Source License
/** * Inserts a row at the specified location. * * @param index the index where to insert the row * @return the created row/* w w w .j ava 2 s. co m*/ */ @Override public DataRow insertRow(int index) { DenseInstance inst; inst = new DenseInstance(getColumnCount()); inst.setDataset(m_Data); m_Data.add(index, inst); return new InstanceView(this, inst); }
From source file:br.fapesp.myutils.MyUtils.java
License:Open Source License
/** * Generates a Gaussian data set with K clusters and m dimensions * //from w ww.j a v a2 s . c om * @param centers * K x m matrix * @param sigmas * K x m matrix * @param pointsPerCluster * number of points per cluster * @param seed * for the RNG * @param randomize * should the order of the instances be randomized? * @param supervised * should class label be present? if true, the class is the m+1 * attribute * * @return */ public static Instances genGaussianDataset(double[][] centers, double[][] sigmas, int pointsPerCluster, long seed, boolean randomize, boolean supervised) { Random r = new Random(seed); int K = centers.length; // number of clusters int m = centers[0].length; // number of dimensions FastVector atts = new FastVector(m); for (int i = 0; i < m; i++) atts.addElement(new Attribute("at" + i)); if (supervised) { FastVector cls = new FastVector(K); for (int i = 0; i < K; i++) cls.addElement("Gauss-" + i); atts.addElement(new Attribute("Class", cls)); } Instances data; if (supervised) data = new Instances(K + "-Gaussians-supervised", atts, K * pointsPerCluster); else data = new Instances(K + "-Gaussians", atts, K * pointsPerCluster); if (supervised) data.setClassIndex(m); Instance ith; for (int i = 0; i < K; i++) { for (int j = 0; j < pointsPerCluster; j++) { if (!supervised) ith = new DenseInstance(m); else ith = new DenseInstance(m + 1); ith.setDataset(data); for (int k = 0; k < m; k++) ith.setValue(k, centers[i][k] + (r.nextGaussian() * sigmas[i][k])); if (supervised) ith.setValue(m, "Gauss-" + i); data.add(ith); } } // run randomization filter if desired if (randomize) data.randomize(r); return data; }
From source file:br.puc_rio.ele.lvc.interimage.datamining.DataParser.java
License:Apache License
@SuppressWarnings({ "unchecked", "rawtypes" })
public Instances parseData(Object objData) {
try {//from w ww. j a va 2 s. co m
Instances dataInstance;
DataBag values = (DataBag) objData;
int numAttributes = values.iterator().next().size(); // N_Features + 1 Class
int bagSize = 0; // To set the number of train samples
// To find the number of samples (instances in a bag)
for (Iterator<Tuple> it = values.iterator(); it.hasNext();) {
it.next();
bagSize = bagSize + 1;
}
// Code for find the different classes names in the input
String[] inputClass = new String[bagSize]; // String vector with the samples class's names
int index = 0;
for (Iterator<Tuple> it = values.iterator(); it.hasNext();) {
Tuple tuple = it.next();
inputClass[index] = DataType.toString(tuple.get(numAttributes - 1));
index = index + 1;
}
HashSet classSet = new HashSet(Arrays.asList(inputClass));
String[] classValue = (String[]) classSet.toArray(new String[0]);
// To set the classes names in the attribute for the instance
FastVector classNames = new FastVector();
for (int i = 0; i < classValue.length; i++)
classNames.addElement(classValue[i]);
// Creating the instance model N_Features + 1_ClassNames
FastVector atts = new FastVector();
for (int i = 0; i < numAttributes - 1; i++)
atts.addElement(new Attribute("att" + i));
dataInstance = new Instances("MyRelation", atts, numAttributes);
dataInstance.insertAttributeAt(new Attribute("ClassNames", classNames), numAttributes - 1);
// To set the instance values for the dataInstance model created
Instance tmpData = new DenseInstance(numAttributes);
index = 0;
for (Iterator<Tuple> it = values.iterator(); it.hasNext();) {
Tuple tuple = it.next();
for (int i = 0; i < numAttributes - 1; i++)
tmpData.setValue((weka.core.Attribute) atts.elementAt(i), DataType.toDouble(tuple.get(i)));
//tmpData.setValue((weka.core.Attribute) atts.elementAt(numAttributes-1), DataType.toString(tuple.get(numAttributes-1)));
dataInstance.add(tmpData);
dataInstance.instance(index).setValue(numAttributes - 1,
DataType.toString(tuple.get(numAttributes - 1)));
index = index + 1;
}
// Setting the class index
dataInstance.setClassIndex(dataInstance.numAttributes() - 1);
return dataInstance;
} catch (Exception e) {
System.err.println("Failed to process input; error - " + e.getMessage());
return null;
}
}
From source file:br.puc_rio.ele.lvc.interimage.datamining.DataParser.java
License:Apache License
@SuppressWarnings({ "unchecked", "rawtypes" })
public Instances parseData(BufferedReader buff) {
try {/* ww w. j a va 2s. c o m*/
Instances dataInstance;
//DataBag values = (DataBag)objData;
int numAttributes = 0; // N_Features + 1 Class
List<String> inputClass = new ArrayList<String>();
List<String[]> dataset = new ArrayList<String[]>();
// To find the number of samples (instances in a bag)
String line;
while ((line = buff.readLine()) != null) {
if (!line.isEmpty()) {
String[] data = line.split(",");
if (numAttributes == 0)
numAttributes = data.length;
inputClass.add(data[data.length - 1]);
dataset.add(data);
}
}
HashSet classSet = new HashSet(inputClass);
String[] classValue = (String[]) classSet.toArray(new String[0]);
// To set the classes names in the attribute for the instance
FastVector classNames = new FastVector();
for (int i = 0; i < classValue.length; i++)
classNames.addElement(classValue[i]);
// Creating the instance model N_Features + 1_ClassNames
FastVector atts = new FastVector();
for (int i = 0; i < numAttributes - 1; i++)
atts.addElement(new Attribute("att" + i));
dataInstance = new Instances("MyRelation", atts, numAttributes);
dataInstance.insertAttributeAt(new Attribute("ClassNames", classNames), numAttributes - 1);
// To set the instance values for the dataInstance model created
Instance tmpData = new DenseInstance(numAttributes);
int index = 0;
for (int k = 0; k < dataset.size(); k++) {
for (int i = 0; i < numAttributes - 1; i++)
tmpData.setValue((weka.core.Attribute) atts.elementAt(i), DataType.toDouble(dataset.get(k)[i]));
//tmpData.setValue((weka.core.Attribute) atts.elementAt(numAttributes-1), DataType.toString(tuple.get(numAttributes-1)));
dataInstance.add(tmpData);
dataInstance.instance(index).setValue(numAttributes - 1,
DataType.toString(dataset.get(k)[numAttributes - 1]));
index = index + 1;
}
// Setting the class index
dataInstance.setClassIndex(dataInstance.numAttributes() - 1);
return dataInstance;
} catch (Exception e) {
System.err.println("Failed to process input; error - " + e.getMessage());
return null;
}
}