List of usage examples for weka.core Instances numInstances
publicint numInstances()
From source file:csav2.Weka_additive.java
public void classifyTestSet6(String input) throws Exception { String ids = ""; ReaderWriter rw = new ReaderWriter(); //ATTRIBUTES//from ww w.j a v a 2 s. com Attribute attr[] = new Attribute[50]; //numeric attr[0] = new Attribute("Autosentiment"); attr[1] = new Attribute("PositiveMatch"); attr[2] = new Attribute("NegativeMatch"); attr[3] = new Attribute("FW"); attr[4] = new Attribute("JJ"); attr[5] = new Attribute("RB"); attr[6] = new Attribute("RB_JJ"); attr[7] = new Attribute("amod"); attr[8] = new Attribute("acomp"); attr[9] = new Attribute("advmod"); attr[10] = new Attribute("BLPos"); attr[11] = new Attribute("BLNeg"); attr[12] = new Attribute("VSPos"); attr[13] = new Attribute("VSNeg"); //class FastVector classValue = new FastVector(3); classValue.addElement("p"); classValue.addElement("n"); classValue.addElement("o"); attr[14] = new Attribute("answer", classValue); FastVector attrs = new FastVector(); attrs.addElement(attr[0]); attrs.addElement(attr[1]); attrs.addElement(attr[2]); attrs.addElement(attr[3]); attrs.addElement(attr[4]); attrs.addElement(attr[5]); attrs.addElement(attr[6]); attrs.addElement(attr[7]); attrs.addElement(attr[8]); attrs.addElement(attr[9]); attrs.addElement(attr[10]); attrs.addElement(attr[11]); attrs.addElement(attr[12]); attrs.addElement(attr[13]); attrs.addElement(attr[14]); // Add Instances Instances dataset = new Instances("my_dataset", attrs, 0); StringTokenizer tokenizer = new StringTokenizer(input); while (tokenizer.hasMoreTokens()) { Instance example = new Instance(15); for (int j = 0; j < 15; j++) { String st = tokenizer.nextToken(); System.out.println(j + " " + st); if (j == 0) example.setValue(attr[j], Float.parseFloat(st)); else if (j == 14) example.setValue(attr[j], st); else example.setValue(attr[j], Integer.parseInt(st)); } ids += tokenizer.nextToken() + "\t"; dataset.add(example); } //Save dataset String file = "Classifier\\featurefile_additive_test6.arff"; ArffSaver saver = new ArffSaver(); saver.setInstances(dataset); saver.setFile(new File(file)); saver.writeBatch(); //Read dataset ArffLoader loader = new ArffLoader(); loader.setFile(new File(file)); dataset = loader.getDataSet(); //Build classifier dataset.setClassIndex(14); //Read classifier back String file1 = "Classifier\\classifier_asAndpolarwordsAndposAnddepAndblAndvs.model"; InputStream is = new FileInputStream(file1); Classifier classifier; ObjectInputStream objectInputStream = new ObjectInputStream(is); classifier = (Classifier) objectInputStream.readObject(); //Evaluate Instances test = new Instances(dataset, 0, dataset.numInstances()); test.setClassIndex(14); //Do eval Evaluation eval = new Evaluation(test); //trainset eval.evaluateModel(classifier, test); //testset System.out.println(eval.toSummaryString()); System.out.println("WEIGHTED F-MEASURE:" + eval.weightedFMeasure()); System.out.println("WEIGHTED PRECISION:" + eval.weightedPrecision()); System.out.println("WEIGHTED RECALL:" + eval.weightedRecall()); //output predictions String optest = "", val = ""; StringTokenizer op = new StringTokenizer(ids); int count = 0; while (op.hasMoreTokens()) { double[] prediction = classifier.distributionForInstance(test.instance(count)); count += 1; if (prediction[0] > prediction[1]) { if (prediction[0] > prediction[2]) { val = "p: " + Double.toString((double) Math.round((prediction[0]) * 1000) / 1000); } else { val = "o: " + Double.toString((double) Math.round((prediction[2]) * 1000) / 1000); } } else { if (prediction[1] > prediction[2]) { val = "n: " + Double.toString((double) Math.round((prediction[1]) * 1000) / 1000); } else { val = "o: " + Double.toString((double) Math.round((prediction[2]) * 1000) / 1000); } } optest += op.nextToken() + "\t" + val + "\n"; } rw.writeToFile(optest, "Answers_additive_Test6", "txt"); }
From source file:cyber009.udal.functions.StatisticalAnalysis.java
/** * //w w w. j a v a 2 s . c om * @param classifier * @param trainingDataSet * @param unLabelDataSets * @param unLabelSet * @param classTarget * @return */ public double conditionalEntropy(Classifier classifier, Instances trainingDataSet, Instances unLabelDataSets, Instance unLabelSet, double classTarget) { double cEnt = 0.0D; double entropy = 0.0D; unLabelSet.setClassValue(classTarget); trainingDataSet.add(trainingDataSet.numInstances(), unLabelSet); AttributeStats classStats = trainingDataSet.attributeStats(trainingDataSet.classIndex()); for (Instance set : unLabelDataSets) { if (instanceCMPWithoutClass(set, unLabelSet) == true) continue; for (int i = 0; i < classStats.nominalCounts.length; i++) { double target = new Double(trainingDataSet.attribute(trainingDataSet.classIndex()).value(i)); set.setClassValue(target); entropy = posteriorDistribution(classifier, trainingDataSet, set, classTarget); //System.out.println("entropy:"+entropy); cEnt += -(entropy) * Math.log10(entropy); set.setClassMissing(); } } trainingDataSet.remove(trainingDataSet.numInstances() - 1); return cEnt; }
From source file:data.generation.target.utils.PrincipalComponents.java
License:Open Source License
/** * Gets the transformed training data./* www. ja v a 2 s . co m*/ * @return the transformed training data * @throws Exception if transformed data can't be returned */ public Instances transformedData(Instances data) throws Exception { if (m_eigenvalues == null) { throw new Exception("Principal components hasn't been built yet"); } Instances output = null; if (m_transBackToOriginal) { output = new Instances(m_originalSpaceFormat); } else { output = new Instances(m_transformedFormat); } for (int i = 0; i < data.numInstances(); i++) { Instance converted = convertInstance(data.instance(i)); output.add(converted); } return output; }
From source file:data.statistics.MILStatistics.java
License:Open Source License
/** * Calculates various MIML statistics, such as instancesPerBag and * attributesPerBag//from w w w .j av a 2s .com * * @param dataSet * A MIL dataset */ public void calculateStats(Instances dataSet) { numBags = dataSet.numInstances(); attributesPerBag = dataSet.instance(0).relationalValue(1).numAttributes(); minInstancesPerBag = Integer.MAX_VALUE; maxInstancesPerBag = Integer.MIN_VALUE; // Each pair <Integer, Integer> stores <numberOfInstances, numberOfBags> distributionBags = new HashMap<Integer, Integer>(); for (int i = 0; i < numBags; i++) { int nInstances = dataSet.instance(i).relationalValue(1).numInstances(); if (nInstances < minInstancesPerBag) { minInstancesPerBag = nInstances; } if (nInstances > maxInstancesPerBag) { maxInstancesPerBag = nInstances; } if (distributionBags.containsKey(nInstances)) { distributionBags.put(nInstances, distributionBags.get(nInstances) + 1); } else { distributionBags.put(nInstances, 1); } } avgInstancesPerBag = 0.0; for (Integer set : distributionBags.keySet()) { avgInstancesPerBag += set * distributionBags.get(set); } avgInstancesPerBag = avgInstancesPerBag / numBags; }
From source file:data.statistics.MLStatistics.java
License:Open Source License
/** * Calculates Phi and Chi-square correlation matrix. * * @param dataSet/* ww w. j av a2 s . c o m*/ * A multi-label dataset. * @throws java.lang.Exception * To be handled in an upper level. */ public void calculatePhiChi2(MultiLabelInstances dataSet) throws Exception { numLabels = dataSet.getNumLabels(); // The indices of the label attributes int[] labelIndices; labelIndices = dataSet.getLabelIndices(); numLabels = dataSet.getNumLabels(); phi = new double[numLabels][numLabels]; chi2 = new double[numLabels][numLabels]; Remove remove = new Remove(); remove.setInvertSelection(true); remove.setAttributeIndicesArray(labelIndices); remove.setInputFormat(dataSet.getDataSet()); Instances result = Filter.useFilter(dataSet.getDataSet(), remove); result.setClassIndex(result.numAttributes() - 1); for (int i = 0; i < numLabels; i++) { int a[] = new int[numLabels]; int b[] = new int[numLabels]; int c[] = new int[numLabels]; int d[] = new int[numLabels]; double e[] = new double[numLabels]; double f[] = new double[numLabels]; double g[] = new double[numLabels]; double h[] = new double[numLabels]; for (int j = 0; j < result.numInstances(); j++) { for (int l = 0; l < numLabels; l++) { if (result.instance(j).stringValue(i).equals("0")) { if (result.instance(j).stringValue(l).equals("0")) { a[l]++; } else { c[l]++; } } else { if (result.instance(j).stringValue(l).equals("0")) { b[l]++; } else { d[l]++; } } } } for (int l = 0; l < numLabels; l++) { e[l] = a[l] + b[l]; f[l] = c[l] + d[l]; g[l] = a[l] + c[l]; h[l] = b[l] + d[l]; double mult = e[l] * f[l] * g[l] * h[l]; double denominator = Math.sqrt(mult); double nominator = a[l] * d[l] - b[l] * c[l]; phi[i][l] = nominator / denominator; chi2[i][l] = phi[i][l] * phi[i][l] * (a[l] + b[l] + c[l] + d[l]); } } }
From source file:dataHandlers.DataClusterHandler.java
private int seedAmount(Instances dataPoints) { return dataPoints.numInstances(); }
From source file:dataHandlers.DataClusterHandler.java
private int clusterCount(Instances dataPoints) { int totalDataCount = dataPoints.numInstances(); return (int) Math.sqrt(totalDataCount); }
From source file:DataMiningLogHistoriKIRI.DecisionTree.java
public String[] id3(Instances arff) { J48 tree = new J48(); try {/* w ww .ja v a 2 s .c om*/ tree.buildClassifier(arff); } catch (Exception ex) { Logger.getLogger(Controller.class.getName()).log(Level.SEVERE, null, ex); } System.out.println(tree.toString()); int nilaiBenar = 0, resultInt; float result = 0; for (int i = 0; i < arff.numInstances(); i++) { try { result = (float) tree.classifyInstance(arff.instance(i)); resultInt = Math.round(result); //System.out.println(dataAfterPreprocessing.get(i)[6] + " " + arff.instance(i).stringValue(6)); if (resultInt == Integer.parseInt(arff.instance(i).stringValue(6))) { nilaiBenar++; } } catch (Exception ex) { Logger.getLogger(Controller.class.getName()).log(Level.SEVERE, null, ex); } } System.out.println("nilai: " + nilaiBenar + " " + arff.numInstances()); double confident = nilaiBenar * 1.0 / arff.numInstances() * 100; System.out.println("Confident = " + confident + "%"); String[] result2 = new String[5]; return result2; }
From source file:DataMiningLogHistoriKIRIPercobaan2.DecisionTree.java
public double calculateConfiden(Instances arff) { // mengecek confiden int nilaiBenar = 0, resultInt; float result = 0; for (int i = 0; i < arff.numInstances(); i++) { try {//from ww w . j a va 2 s . co m result = (float) tree.classifyInstance(arff.instance(i)); resultInt = Math.round(result); if (resultInt == Integer.parseInt(arff.instance(i).stringValue(6))) { nilaiBenar++; } } catch (Exception ex) { Logger.getLogger(Controller.class.getName()).log(Level.SEVERE, null, ex); } } double confident = nilaiBenar * 1.0 / arff.numInstances() * 100; return confident; }
From source file:de.fub.maps.project.detector.model.inference.processhandler.CrossValidationProcessHandler.java
License:Open Source License
@Override protected void handle() { Collection<Attribute> attributeList = getInferenceModel().getAttributes(); Instances trainingSet = new Instances("Classes", new ArrayList<Attribute>(attributeList), 9); trainingSet.setClassIndex(0);/* w w w . j a v a2s . c o m*/ HashMap<String, HashSet<TrackSegment>> dataset = getInferenceModel().getInput().getTrainingsSet(); for (Entry<String, HashSet<TrackSegment>> entry : dataset.entrySet()) { for (TrackSegment trackSegment : entry.getValue()) { Instance instance = getInstance(entry.getKey(), trackSegment); trainingSet.add(instance); } } assert trainingSet.numInstances() > 0 : "Training set is empty and has no instances"; //NO18N evaluate(trainingSet); }