List of usage examples for weka.core Instances size
@Override publicint size()
From source file:sentinets.Prediction.java
License:Open Source License
public String updateModel(String inputFile, ArrayList<Double[]> metrics) { String output = ""; this.setInstances(inputFile); FilteredClassifier fcls = (FilteredClassifier) this.cls; SGD cls = (SGD) fcls.getClassifier(); Filter filter = fcls.getFilter(); Instances insAll; try {/* ww w . j ava 2s. c o m*/ insAll = Filter.useFilter(this.unlabled, filter); if (insAll.size() > 0) { Random rand = new Random(10); int folds = 10 > insAll.size() ? 2 : 10; Instances randData = new Instances(insAll); randData.randomize(rand); if (randData.classAttribute().isNominal()) { randData.stratify(folds); } Evaluation eval = new Evaluation(randData); eval.evaluateModel(cls, insAll); System.out.println("Initial Evaluation"); System.out.println(eval.toSummaryString()); System.out.println(eval.toClassDetailsString()); metrics.add(new Double[] { eval.fMeasure(0), eval.fMeasure(1), eval.weightedFMeasure() }); output += "\n====" + "Initial Evaluation" + "====\n"; output += "\n" + eval.toSummaryString(); output += "\n" + eval.toClassDetailsString(); System.out.println("Cross Validated Evaluation"); output += "\n====" + "Cross Validated Evaluation" + "====\n"; for (int n = 0; n < folds; n++) { Instances train = randData.trainCV(folds, n); Instances test = randData.testCV(folds, n); for (int i = 0; i < train.numInstances(); i++) { cls.updateClassifier(train.instance(i)); } eval.evaluateModel(cls, test); System.out.println("Cross Validated Evaluation fold: " + n); output += "\n====" + "Cross Validated Evaluation fold (" + n + ")====\n"; System.out.println(eval.toSummaryString()); System.out.println(eval.toClassDetailsString()); output += "\n" + eval.toSummaryString(); output += "\n" + eval.toClassDetailsString(); metrics.add(new Double[] { eval.fMeasure(0), eval.fMeasure(1), eval.weightedFMeasure() }); } for (int i = 0; i < insAll.numInstances(); i++) { cls.updateClassifier(insAll.instance(i)); } eval.evaluateModel(cls, insAll); System.out.println("Final Evaluation"); System.out.println(eval.toSummaryString()); System.out.println(eval.toClassDetailsString()); output += "\n====" + "Final Evaluation" + "====\n"; output += "\n" + eval.toSummaryString(); output += "\n" + eval.toClassDetailsString(); metrics.add(new Double[] { eval.fMeasure(0), eval.fMeasure(1), eval.weightedFMeasure() }); fcls.setClassifier(cls); String modelFilePath = outputDir + "/" + Utils.getOutDir(Utils.OutDirIndex.MODELS) + "/updatedClassifier.model"; weka.core.SerializationHelper.write(modelFilePath, fcls); output += "\n" + "Updated Model saved at: " + modelFilePath; } else { output += "No new instances for training the model."; } } catch (Exception e) { e.printStackTrace(); } return output; }
From source file:src.BigDataClassifier.GenerateFolds.java
public void generateFolds(Instances trainDataset) throws Exception { //randomize data Random rand = new Random(1); //set folds/*from w w w.j a v a 2 s . c o m*/ int folds = 3; //create random dataset Instances randData = new Instances(trainDataset); randData.randomize(rand); Instances[] result = new Instances[folds * 2]; //cross-validate for (int n = 0; n < folds; n++) { trainDataset = randData.trainCV(folds, n); System.out.println("Train dataset size is = " + trainDataset.size()); Instances testDataset = randData.testCV(folds, n); System.out.println("Test dataset size is = " + testDataset.size()); result[n] = trainDataset; result[n + 1] = testDataset; trainDataset2 = trainDataset; testDataset2 = testDataset; } trainDatasetSize = trainDataset2.size(); testDatasetSize = testDataset2.size(); }
From source file:test.org.moa.opencl.IBk.java
License:Open Source License
/** * Calculates the class membership probabilities for the given test instance. * * @param instance the instance to be classified * @return predicted class probability distribution * @throws Exception if an error occurred during the prediction *///from www . j a v a 2 s .c o m public double[] distributionForInstance(Instance instance) throws Exception { if (m_Train.numInstances() == 0) { //throw new Exception("No training instances!"); return m_defaultModel.distributionForInstance(instance); } if ((m_WindowSize > 0) && (m_Train.numInstances() > m_WindowSize)) { m_kNNValid = false; boolean deletedInstance = false; while (m_Train.numInstances() > m_WindowSize) { m_Train.delete(0); } //rebuild datastructure KDTree currently can't delete if (deletedInstance == true) m_NNSearch.setInstances(m_Train); } // Select k by cross validation if (!m_kNNValid && (m_CrossValidate) && (m_kNNUpper >= 1)) { crossValidate(); } m_NNSearch.addInstanceInfo(instance); Instances neighbours = m_NNSearch.kNearestNeighbours(instance, m_kNN); double[] distances = m_NNSearch.getDistances(); System.out.print("distances weka "); for (int i = 0; i < distances.length; ++i) System.out.print(" " + distances[i]); System.out.println(); System.out.println("Neighbours"); for (int i = 0; i < neighbours.size(); ++i) System.out.println(neighbours.get(i)); double[] distribution = makeDistribution(neighbours, distances); return distribution; }
From source file:tr.gov.ulakbim.jDenetX.experiments.wrappers.EvalActiveBoostingID.java
License:Open Source License
protected void selfTrain(Instance testInst) { int maxInstances = this.maxInstancesOption.getValue(); int poolSizeRatio = poolSizeOption.getValue(); int poolLimit = maxInstances / poolSizeRatio; int poolCount = 0; VotedInstancePool vInstPool = SelfOzaBoostID.getVotedInstancePool(); noOfClassesInPool = vInstPool.getNoOfClasses(); System.out.println("No of instances in the pool: " + vInstPool.getSize()); System.out.println("No of classes in the pool: " + noOfClassesInPool); if (vInstPool.getSize() > 10) { ArrayList<Attribute> attrs = new ArrayList<Attribute>(); for (int i = 0; i < testInst.numAttributes(); i++) { attrs.add(testInst.attribute(i)); }//w w w . j a va 2 s.co m Instances instances = new Instances("instances", attrs, vInstPool.getSize()); Iterator instanceIt = vInstPool.iterator(); System.out.println("Size of pool: " + vInstPool.getSize()); while (instanceIt.hasNext() && poolCount < poolLimit) { VotedInstance vInstance = (VotedInstance) instanceIt.next(); ((Instances) instances).add(vInstance.getInstance()); poolCount++; } System.out.println("Size of instances: " + instances.size()); instances = clusterInstances(instances); InstanceStream activeStream = new CachedInstancesStream((Instances) instances); System.out.println("Selftraining have been started"); System.out.println("Number of self training instances: " + instances.numInstances()); long treeSize = vInstPool.getSize(); long limit = treeSize / SAMPLING_LIMIT; Instance inst = null; for (long j = 0; j < limit && activeStream.hasMoreInstances(); j++) { inst = activeStream.nextInstance(); if (inst.numAttributes() == attrs.size()) { model.trainOnInstance(inst); } } } }
From source file:tr.gov.ulakbim.jDenetX.experiments.wrappers.EvalActiveBoostingID.java
License:Open Source License
public static Instances clusterInstances(Instances data) { XMeans xmeans = new XMeans(); Remove filter = new Remove(); Instances dataClusterer = null;/*from ww w.j a v a2s. com*/ if (data == null) { throw new NullPointerException("Data is null at clusteredInstances method"); } //Get the attributes from the data for creating the sampled_data object ArrayList<Attribute> attrList = new ArrayList<Attribute>(); Enumeration attributes = data.enumerateAttributes(); while (attributes.hasMoreElements()) { attrList.add((Attribute) attributes.nextElement()); } Instances sampled_data = new Instances(data.relationName(), attrList, 0); data.setClassIndex(data.numAttributes() - 1); sampled_data.setClassIndex(data.numAttributes() - 1); filter.setAttributeIndices("" + (data.classIndex() + 1)); data.remove(0);//In Wavelet Stream of MOA always the first element comes without class try { filter.setInputFormat(data); dataClusterer = Filter.useFilter(data, filter); String[] options = new String[4]; options[0] = "-L"; // max. iterations options[1] = Integer.toString(noOfClassesInPool - 1); if (noOfClassesInPool > 2) { options[1] = Integer.toString(noOfClassesInPool - 1); xmeans.setMinNumClusters(noOfClassesInPool - 1); } else { options[1] = Integer.toString(noOfClassesInPool); xmeans.setMinNumClusters(noOfClassesInPool); } xmeans.setMaxNumClusters(data.numClasses() + 1); System.out.println("No of classes in the pool: " + noOfClassesInPool); xmeans.setUseKDTree(true); //xmeans.setOptions(options); xmeans.buildClusterer(dataClusterer); System.out.println("Xmeans\n:" + xmeans); } catch (Exception e) { e.printStackTrace(); } //System.out.println("Assignments\n: " + assignments); ClusterEvaluation eval = new ClusterEvaluation(); eval.setClusterer(xmeans); try { eval.evaluateClusterer(data); int classesToClustersMap[] = eval.getClassesToClusters(); //check the classes to cluster map int clusterNo = 0; for (int i = 0; i < data.size(); i++) { clusterNo = xmeans.clusterInstance(dataClusterer.get(i)); //Check if the class value of instance and class value of cluster matches if ((int) data.get(i).classValue() == classesToClustersMap[clusterNo]) { sampled_data.add(data.get(i)); } } } catch (Exception e) { e.printStackTrace(); } return ((Instances) sampled_data); }
From source file:utils.AttributePairsUtils.java
License:Open Source License
/** * Get pairs of attributes// ww w. ja v a2 s. c o m * * @param dataset Dataset * @return List of pairs */ public static ArrayList<AttributesPair> getAttributePairs(MultiLabelInstances dataset) { Instances instances = dataset.getDataSet(); //Return possible combinations among labels int possibleCombinations = getPossibleCombinations(dataset.getNumLabels()); int[] labelPairAppearances = new int[possibleCombinations]; int[] currentLabelValues; int[] labelIndices = dataset.getLabelIndices(); for (int i = 0; i < instances.size(); i++) { currentLabelValues = DataInfoUtils.getCurrentValueLabels(instances, i, labelIndices); labelPairAppearances = updateAttributePairs(labelPairAppearances, currentLabelValues); } return makeAttributePairs(labelPairAppearances, labelIndices, dataset); }
From source file:utils.DataInfoUtils.java
License:Open Source License
/** * Get label frequency given the index/* www . ja v a 2s. c om*/ * * @param dataset Dataset * @param labelIndex Label index * @return Frequency of label */ public static double getLabelFrequency(MultiLabelInstances dataset, int labelIndex) { double value = 0.0; Instances instances = dataset.getDataSet(); double isLabel; for (int i = 0; i < instances.size(); i++) { isLabel = instances.instance(i).value(labelIndex); if (isLabel == 1.0) { value++; } } return value / dataset.getNumInstances(); }
From source file:utils.MetricUtils.java
License:Open Source License
/** * Obtain labels ordered by IR inter class * //from w ww . j a v a 2s .c om * @param dataset Dataset * @param labelsByFrequency Labels * @return Labels sorted by IR inter class */ public static ImbalancedFeature[] getImbalancedDataByIRInterClass(MultiLabelInstances dataset, ImbalancedFeature[] labelsByFrequency) { int[] labelIndices = dataset.getLabelIndices(); ImbalancedFeature[] imbalancedData = new ImbalancedFeature[labelIndices.length]; Instances instances = dataset.getDataSet(); int n1 = 0, n0 = 0, maxAppearance; double is, IRIntraClass, variance, IRInterClass; double mean = dataset.getNumInstances() / 2; Attribute currentAttribute; ImbalancedFeature currentLabel; for (int i = 0; i < labelIndices.length; i++) { currentAttribute = instances.attribute(labelIndices[i]); for (int j = 0; j < instances.size(); j++) { is = instances.instance(j).value(currentAttribute); if (is == 1.0) { n1++; } else { n0++; } } try { if (n0 == 0 || n1 == 0) { IRIntraClass = 0; } else if (n0 > n1) { IRIntraClass = n0 / (n1 * 1.0); } else { IRIntraClass = n1 / (n0 * 1.0); } } catch (Exception e1) { e1.printStackTrace(); IRIntraClass = 0; } variance = (Math.pow((n0 - mean), 2) + Math.pow((n1 - mean), 2)) / 2; currentLabel = getLabelByLabelname(currentAttribute.name(), labelsByFrequency); maxAppearance = labelsByFrequency[0].getAppearances(); if (currentLabel.getAppearances() <= 0) { IRInterClass = Double.NaN; } else { IRInterClass = maxAppearance / (currentLabel.getAppearances() * 1.0); } imbalancedData[i] = new ImbalancedFeature(currentAttribute.name(), currentLabel.getAppearances(), IRIntraClass, variance, IRInterClass); n0 = 0; n1 = 0; } return imbalancedData; }
From source file:utils.MetricUtils.java
License:Open Source License
/** * Obtain labels as ImbalancedFeature objects * //from w ww .jav a 2s. co m * @param dataset Datasets * @return Labels as ImbalanceFeature array */ public static ImbalancedFeature[] getImbalancedData(MultiLabelInstances dataset) { int[] labelIndices = dataset.getLabelIndices(); ImbalancedFeature[] imbalancedData = new ImbalancedFeature[labelIndices.length]; Instances instances = dataset.getDataSet(); int n1 = 0, n0 = 0; double is, IR, variance; double mean = dataset.getNumInstances() / 2; Attribute current; for (int i = 0; i < labelIndices.length; i++) { current = instances.attribute(labelIndices[i]); for (int j = 0; j < instances.size(); j++) { is = instances.instance(j).value(current); if (is == 1.0) { n1++; } else { n0++; } } try { if (n0 == 0 || n1 == 0) { IR = 0; } else if (n0 > n1) { IR = n0 / (n1 * 1.0); } else { IR = n1 / (n0 * 1.0); } } catch (Exception e1) { e1.printStackTrace(); IR = 0; } variance = (Math.pow((n0 - mean), 2) + Math.pow((n1 - mean), 2)) / 2; imbalancedData[i] = new ImbalancedFeature(current.name(), IR, variance); n0 = 0; n1 = 0; } return imbalancedData; }
From source file:utils.MetricUtils.java
License:Open Source License
/** * Obtain labels ordered by number of appearances * /*from w w w. ja v a 2s .c om*/ * @param dataset Dataset * @return Labels as ImbalanceFeature objects */ public static ImbalancedFeature[] getImbalancedDataByAppearances(MultiLabelInstances dataset) { int[] labelIndices = dataset.getLabelIndices(); ImbalancedFeature[] imbalancedData = new ImbalancedFeature[labelIndices.length]; Instances instances = dataset.getDataSet(); int appearances = 0; double is; Attribute current; for (int i = 0; i < labelIndices.length; i++) { current = instances.attribute(labelIndices[i]); for (int j = 0; j < instances.size(); j++) { is = instances.instance(j).value(current); if (is == 1.0) { appearances++; } } imbalancedData[i] = new ImbalancedFeature(current.name(), appearances); appearances = 0; } return imbalancedData; }