List of usage examples for weka.core Instances size
@Override publicint size()
From source file:lu.lippmann.cdb.lab.mds.MDSViewBuilder.java
License:Open Source License
/** * // w ww .j av a 2 s . c o m */ private static void buildFilteredSeries(final MDSResult mdsResult, final XYPlot xyPlot, final String... attrNameToUseAsPointTitle) throws Exception { final CollapsedInstances distMdsRes = mdsResult.getCInstances(); final Instances instances = distMdsRes.getInstances(); final SimpleMatrix coordinates = mdsResult.getCoordinates(); final Instances collapsedInstances = mdsResult.getCollapsedInstances(); int maxSize = 0; if (distMdsRes.isCollapsed()) { final List<Instances> clusters = distMdsRes.getCentroidMap().getClusters(); final int nbCentroids = clusters.size(); maxSize = clusters.get(0).size(); for (int i = 1; i < nbCentroids; i++) { final int currentSize = clusters.get(i).size(); if (currentSize > maxSize) { maxSize = currentSize; } } } Attribute clsAttribute = null; int nbClass = 1; if (instances.classIndex() != -1) { clsAttribute = instances.classAttribute(); nbClass = clsAttribute.numValues(); } final XYSeriesCollection dataset = (XYSeriesCollection) xyPlot.getDataset(); final int fMaxSize = maxSize; final List<XYSeries> lseries = new ArrayList<XYSeries>(); //No class : add one dummy serie if (nbClass <= 1) { lseries.add(new XYSeries("Serie #1", false)); } else { //Some class : add one serie per class for (int i = 0; i < nbClass; i++) { lseries.add(new XYSeries(clsAttribute.value(i), false)); } } dataset.removeAllSeries(); /** * Initialize filtered series */ final List<Instances> filteredInstances = new ArrayList<Instances>(); for (int i = 0; i < lseries.size(); i++) { filteredInstances.add(new Instances(collapsedInstances, 0)); } final Map<Tuple<Integer, Integer>, Integer> correspondanceMap = new HashMap<Tuple<Integer, Integer>, Integer>(); for (int i = 0; i < collapsedInstances.numInstances(); i++) { final Instance oInst = collapsedInstances.instance(i); int indexOfSerie = 0; if (oInst.classIndex() != -1) { if (distMdsRes.isCollapsed()) { indexOfSerie = getStrongestClass(i, distMdsRes); } else { indexOfSerie = (int) oInst.value(oInst.classAttribute()); } } lseries.get(indexOfSerie).add(coordinates.get(i, 0), coordinates.get(i, 1)); filteredInstances.get(indexOfSerie).add(oInst); if (distMdsRes.isCollapsed()) { correspondanceMap.put(new Tuple<Integer, Integer>(indexOfSerie, filteredInstances.get(indexOfSerie).numInstances() - 1), i); } } final List<Paint> colors = new ArrayList<Paint>(); for (final XYSeries series : lseries) { dataset.addSeries(series); } if (distMdsRes.isCollapsed()) { final XYLineAndShapeRenderer xyRenderer = new XYLineAndShapeRenderer(false, true) { private static final long serialVersionUID = -6019883886470934528L; @Override public void drawItem(Graphics2D g2, XYItemRendererState state, java.awt.geom.Rectangle2D dataArea, PlotRenderingInfo info, XYPlot plot, ValueAxis domainAxis, ValueAxis rangeAxis, XYDataset dataset, int series, int item, CrosshairState crosshairState, int pass) { if (distMdsRes.isCollapsed()) { final Integer centroidIndex = correspondanceMap .get(new Tuple<Integer, Integer>(series, item)); final Instances cluster = distMdsRes.getCentroidMap().getClusters().get(centroidIndex); int size = cluster.size(); final int shapeSize = (int) (MAX_POINT_SIZE * size / fMaxSize + 1); final double x1 = plot.getDataset().getX(series, item).doubleValue(); final double y1 = plot.getDataset().getY(series, item).doubleValue(); Map<Object, Integer> mapRepartition = new HashMap<Object, Integer>(); mapRepartition.put("No class", size); if (cluster.classIndex() != -1) { mapRepartition = WekaDataStatsUtil.getClassRepartition(cluster); } final RectangleEdge xAxisLocation = plot.getDomainAxisEdge(); final RectangleEdge yAxisLocation = plot.getRangeAxisEdge(); final double fx = domainAxis.valueToJava2D(x1, dataArea, xAxisLocation); final double fy = rangeAxis.valueToJava2D(y1, dataArea, yAxisLocation); setSeriesShape(series, new Ellipse2D.Double(-shapeSize / 2, -shapeSize / 2, shapeSize, shapeSize)); super.drawItem(g2, state, dataArea, info, plot, domainAxis, rangeAxis, dataset, series, item, crosshairState, pass); //Draw pie if (ENABLE_PIE_SHART) { createPieChart(g2, (int) (fx - shapeSize / 2), (int) (fy - shapeSize / 2), shapeSize, mapRepartition, size, colors); } } else { super.drawItem(g2, state, dataArea, info, plot, domainAxis, rangeAxis, dataset, series, item, crosshairState, pass); } } }; xyPlot.setRenderer(xyRenderer); } final XYToolTipGenerator gen = new XYToolTipGenerator() { @Override public String generateToolTip(XYDataset dataset, int series, int item) { if (distMdsRes.isCollapsed()) { final StringBuilder res = new StringBuilder("<html>"); final Integer centroidIndex = correspondanceMap.get(new Tuple<Integer, Integer>(series, item)); final Instance centroid = distMdsRes.getCentroidMap().getCentroids().get(centroidIndex); final Instances cluster = distMdsRes.getCentroidMap().getClusters().get(centroidIndex); //Set same class index for cluster than for original instances //System.out.println("Cluster index = " + cluster.classIndex() + "/" + instances.classIndex()); cluster.setClassIndex(instances.classIndex()); Map<Object, Integer> mapRepartition = new HashMap<Object, Integer>(); mapRepartition.put("No class", cluster.size()); if (cluster.classIndex() != -1) { mapRepartition = WekaDataStatsUtil.getClassRepartition(cluster); } res.append(InstanceFormatter.htmlFormat(centroid, false)).append("<br/>"); for (final Map.Entry<Object, Integer> entry : mapRepartition.entrySet()) { if (entry.getValue() != 0) { res.append("Class :<b>'" + StringEscapeUtils.escapeHtml(entry.getKey().toString()) + "</b>' -> " + entry.getValue()).append("<br/>"); } } res.append("</html>"); return res.toString(); } else { //return InstanceFormatter.htmlFormat(filteredInstances.get(series).instance(item),true); return InstanceFormatter.shortHtmlFormat(filteredInstances.get(series).instance(item)); } } }; final Shape shape = new Ellipse2D.Float(0f, 0f, MAX_POINT_SIZE, MAX_POINT_SIZE); ((XYLineAndShapeRenderer) xyPlot.getRenderer()).setUseOutlinePaint(true); for (int p = 0; p < nbClass; p++) { xyPlot.getRenderer().setSeriesToolTipGenerator(p, gen); ((XYLineAndShapeRenderer) xyPlot.getRenderer()).setLegendShape(p, shape); xyPlot.getRenderer().setSeriesOutlinePaint(p, Color.BLACK); } for (int ii = 0; ii < nbClass; ii++) { colors.add(xyPlot.getRenderer().getItemPaint(ii, 0)); } if (attrNameToUseAsPointTitle.length > 0) { final Attribute attrToUseAsPointTitle = instances.attribute(attrNameToUseAsPointTitle[0]); if (attrToUseAsPointTitle != null) { final XYItemLabelGenerator lg = new XYItemLabelGenerator() { @Override public String generateLabel(final XYDataset dataset, final int series, final int item) { return filteredInstances.get(series).instance(item).stringValue(attrToUseAsPointTitle); } }; xyPlot.getRenderer().setBaseItemLabelGenerator(lg); xyPlot.getRenderer().setBaseItemLabelsVisible(true); } } }
From source file:machinelearningproject.Tree.java
public String getModeClass(Instances instances, int classIdx) { HashMap<String, Integer> classMap = new HashMap<>(); int numInstances = instances.size(); for (int i = 0; i < numInstances; i++) { Instance instance = instances.get(i); String key = instance.stringValue(classIdx); if (classMap.isEmpty() || !classMap.containsKey(key)) { classMap.put(key, 1);/* w ww . ja v a 2s.co m*/ } else { if (classMap.containsKey(key)) { classMap.put(key, classMap.get(key) + 1); } } } Iterator<String> keySetIterator = classMap.keySet().iterator(); String modeClass = ""; int count = 0; while (keySetIterator.hasNext()) { String key = keySetIterator.next(); System.out.println("key: " + key + " value: " + classMap.get(key)); if (count < classMap.get(key)) { modeClass = key; count = classMap.get(key); } } return modeClass; }
From source file:machinelearningproject.Tree.java
public double calculateEntropy(Instances instances, int attrIdx) { HashMap<String, Integer> classMap = new HashMap<>(); double entropy = (double) 0; int numInstances = instances.size(); for (int i = 0; i < numInstances; i++) { Instance instance = instances.get(i); String key = instance.stringValue(attrIdx); if (classMap.isEmpty() || !classMap.containsKey(key)) { classMap.put(key, 1);/*from w w w . ja v a2 s.c om*/ } else { if (classMap.containsKey(key)) { classMap.put(key, classMap.get(key) + 1); } } } Iterator<String> keySetIterator = classMap.keySet().iterator(); while (keySetIterator.hasNext()) { String key = keySetIterator.next(); // reference source code http://onoffswitch.net/building-decision-tree/ double prob = (double) classMap.get(key) / (double) numInstances; entropy -= prob * (Math.log(prob) / Math.log(2)); } return entropy; }
From source file:machinelearningproject.Tree.java
public double calculateInformationGain(Instances instances, int attrIdx, int classIdx) throws Exception { HashMap<String, Integer> attrCount = new HashMap<>(); HashMap<String, Integer> attrClassCount = new HashMap<>(); int numInstances = instances.size(); for (int i = 0; i < numInstances; i++) { Instance instance = instances.get(i); String attrKey = instance.stringValue(attrIdx); if (attrCount.isEmpty() || !attrCount.containsKey(attrKey)) { attrCount.put(attrKey, 1);/*from w w w . j a va 2 s. c o m*/ } else { if (attrCount.containsKey(attrKey)) { attrCount.put(attrKey, attrCount.get(attrKey) + 1); } } String attrClassKey = instance.stringValue(attrIdx) + "-" + instance.stringValue(classIdx); if (attrClassCount.isEmpty() || !attrClassCount.containsKey(attrClassKey)) { attrClassCount.put(attrClassKey, 1); } else { if (attrClassCount.containsKey(attrClassKey)) { attrClassCount.put(attrClassKey, attrClassCount.get(attrClassKey) + 1); } } } double attrEntropy = (double) 0; Iterator<String> attrKeySetIterator = attrCount.keySet().iterator(); while (attrKeySetIterator.hasNext()) { String attrKey = attrKeySetIterator.next(); double bufferEntropy = (double) 0; Iterator<String> keySetIterator = attrClassCount.keySet().iterator(); while (keySetIterator.hasNext()) { String key = keySetIterator.next(); String[] keys = key.split("-"); String attrValue = keys[0]; if (attrKey.equals(attrValue)) { double prob = (double) attrClassCount.get(key) / (double) attrCount.get(attrKey); bufferEntropy -= prob * (Math.log(prob) / Math.log(2)); } } attrEntropy += (attrCount.get(attrKey) / (double) numInstances) * bufferEntropy; } double classEntropy = calculateEntropy(instances, classIdx); return (classEntropy - attrEntropy); }
From source file:machinelearningq2.BasicNaiveBayesV1.java
/** * * This initial classifier will contain a two dimension array of counts * * @param ins//ww w. j av a 2s . com * @throws Exception */ @Override public void buildClassifier(Instances ins) throws Exception { ins.setClassIndex(ins.numAttributes() - 1); countData = ins.size(); // assigns the class position of the instance classValueCounts = new int[ins.numClasses()]; System.out.println(ins); if (laplace == true) { laplaceCorrection(ins); } // store the values for (Instance line : ins) { double classValue = line.classValue(); classValueCounts[(int) classValue]++; for (int i = 0; i < line.numAttributes() - 1; i++) { double attributeValue = line.value(i); DataFound d = new DataFound(attributeValue, classValue, i); int index = data.indexOf(d); // then it doesn't exist if (index == -1) { data.add(d); } else { data.get(index).incrementCount(); } } } System.out.println(""); System.out.println(Arrays.toString(classValueCounts)); }
From source file:machinelearningq2.ExtendedNaiveBayes.java
/** * * Build classifier will either build a gaussian or a discrete classifier * dependent on user input/* w w w . j av a 2 s. c o m*/ * * @param ins * @throws Exception */ @Override public void buildClassifier(Instances ins) throws Exception { if ("d".equals(gausianOrDiscretise)) { buildDiscreteClassifier(ins); } else { countData = ins.size(); // assigns the class position of the instance ins.setClassIndex(ins.numAttributes() - 1); classValueCounts = new int[ins.numClasses()]; attributeMeans = new double[ins.numClasses()][ins.numAttributes() - 1]; attributeVariance = new double[ins.numClasses()][ins.numAttributes() - 1]; // store the values for (Instance line : ins) { double classValue = line.classValue(); classValueCounts[(int) classValue]++; for (int i = 0; i < line.numAttributes() - 1; i++) { double attributeValue = line.value(i); attributeMeans[(int) classValue][i] += attributeValue; DataFound d = new DataFound(attributeValue, classValue, i); int index = data.indexOf(d); // then it doesn't exist if (index == -1) { data.add(d); } else { data.get(index).incrementCount(); } } } System.out.println("Attribute Totals: " + Arrays.deepToString(attributeMeans)); // computes the means for (int j = 0; j < classValueCounts.length; j++) { for (int i = 0; i < ins.numAttributes() - 1; i++) { attributeMeans[j][i] = attributeMeans[j][i] / classValueCounts[j]; } } // calculate the variance for (int i = 0; i < data.size(); i++) { double cv = data.get(i).getClassValue(); double atIn = data.get(i).getAttributeIndex(); double squareDifference = Math .pow(data.get(i).getAttributeValue() - attributeMeans[(int) cv][(int) atIn], 2); attributeVariance[(int) cv][(int) atIn] += squareDifference; } for (int j = 0; j < classValueCounts.length; j++) { for (int i = 0; i < ins.numAttributes() - 1; i++) { attributeVariance[j][i] = attributeVariance[j][i] / (classValueCounts[j] - 1); attributeVariance[j][i] = Math.sqrt(attributeVariance[j][i]); } } System.out.println("Attribute Means: " + Arrays.deepToString(attributeMeans)); System.out.println("Variance: " + Arrays.deepToString(attributeVariance)); } }
From source file:machinelearningq2.ExtendedNaiveBayes.java
/** * The method buildDiscreteClassifier discretizes the data and then builds a * classifer/*from w w w.jav a2 s . c o m*/ * * @param instnc * @return * @throws Exception */ public void buildDiscreteClassifier(Instances ins) throws Exception { ins = discretize(ins); ins.setClassIndex(ins.numAttributes() - 1); countData = ins.size(); // assigns the class position of the instance classValueCounts = new int[ins.numClasses()]; // store the values for (Instance line : ins) { double classValue = line.classValue(); classValueCounts[(int) classValue]++; for (int i = 0; i < line.numAttributes() - 1; i++) { double attributeValue = line.value(i); DataFound d = new DataFound(attributeValue, classValue, i); int index = data.indexOf(d); // then it doesn't exist if (index == -1) { data.add(d); } else { data.get(index).incrementCount(); } } } }
From source file:machinelearning_cw.KNN.java
/** * //from ww w . j av a 2s. c om * A slower method for estimating the accuracy of using a value as k by * applying Leave-One-Out-Cross-Validation(LOOCV). * * @param k value of k to be tested. * @param trainingData The data against which the classifier is to be * tested. * * @return Accuracy of the calling classifier using the given value of k. * @throws Exception */ private double estimateAccuracyByLOOCV(int k, Instances trainingData) throws Exception { ArrayList<Double> accuracies = new ArrayList<Double>(); /* In a training set of n, train the model on n-1 and test on 1 */ int n = trainingData.size(); for (int i = 0; i < n; i++) { Instances trainingSet = new Instances(trainingData); Instance testInstance = trainingSet.remove(i); BasicKNN classifier = new BasicKNN(); classifier.setK(k); classifier.buildClassifier(trainingSet); /* Test classifer on test instance and measure accuracy */ double accuracy = Helpers.findClassifierAccuracy(classifier, testInstance); accuracies.add(accuracy); } /* find average accuracy */ double count = accuracies.size(); double sum = 0; for (Double eachAccuracy : accuracies) { sum += eachAccuracy; } double averageAccuracy = sum / count; return averageAccuracy; }
From source file:machinelearning_cw.MachineLearning_CW.java
/** * @param args the command line arguments *//*from w ww.ja v a2 s . com*/ public static void main(String[] args) throws Exception { // TODO code application logic here /* Initializing test datasets */ ArrayList<Instances> trainData = new ArrayList<Instances>(); ArrayList<Instances> testData = new ArrayList<Instances>(); Instances train = WekaLoader.loadData("PitcherTrain.arff"); Instances test = WekaLoader.loadData("PitcherTest.arff"); trainData.add(train); testData.add(test); Instances bananaTrain = WekaLoader.loadData("banana-train.arff"); Instances bananaTest = WekaLoader.loadData("banana-test.arff"); trainData.add(bananaTrain); testData.add(bananaTest); Instances cloudTrain = WekaLoader.loadData("clouds-train.arff"); Instances cloudTest = WekaLoader.loadData("clouds-test.arff"); trainData.add(cloudTrain); testData.add(cloudTest); Instances concentricTrain = WekaLoader.loadData("concentric-train.arff"); Instances concentricTest = WekaLoader.loadData("concentric-test.arff"); trainData.add(concentricTrain); testData.add(concentricTest); // 3 dimensional data set Instances habermanTrain = WekaLoader.loadData("haberman-train.arff"); Instances habermanTest = WekaLoader.loadData("haberman-test.arff"); trainData.add(habermanTrain); testData.add(habermanTest); // >3 dimensional data sets Instances thyroidTrain = WekaLoader.loadData("thyroid-train.arff"); Instances thyroidTest = WekaLoader.loadData("thyroid-test.arff"); trainData.add(thyroidTrain); testData.add(thyroidTest); Instances heartTrain = WekaLoader.loadData("heart-train.arff"); Instances heartTest = WekaLoader.loadData("heart-test.arff"); trainData.add(heartTrain); testData.add(heartTest); Instances liverTrain = WekaLoader.loadData("liver-train.arff"); Instances liverTest = WekaLoader.loadData("liver-test.arff"); trainData.add(liverTrain); testData.add(liverTest); Instances pendigitisTrain = WekaLoader.loadData("pendigitis-train.arff"); Instances pendigitisTest = WekaLoader.loadData("pendigitis-test.arff"); trainData.add(pendigitisTrain); testData.add(pendigitisTest); Instances phonemeTrain = WekaLoader.loadData("phoneme-train.arff"); Instances phonemeTest = WekaLoader.loadData("phoneme-test.arff"); trainData.add(phonemeTrain); testData.add(phonemeTest); Instances yeastTrain = WekaLoader.loadData("yeast-train.arff"); Instances yeastTest = WekaLoader.loadData("yeast-test.arff"); trainData.add(yeastTrain); testData.add(yeastTest); /* Test to see that BasicKNN provides the same results obtained from * the hand exercise. */ System.out.println( "Test to see that BasicKNN provides the same" + " results obtained from the hand exercise:"); System.out.println("(Ties are settled randomly)"); BasicKNN basicKNN = new BasicKNN(); basicKNN.buildClassifier(train); for (int i = 0; i < test.size(); i++) { Instance inst = test.get(i); System.out.println(i + 1 + ": " + basicKNN.classifyInstance(inst)); } /* Initializing alternative classifiers */ IBk wekaKNN = new IBk(); NaiveBayes naiveBayes = new NaiveBayes(); J48 decisionTree = new J48(); SMO svm = new SMO(); /* Tests for experiments 1,2 & 3 */ KNN myKNN = new KNN(); myKNN.setUseStandardisedAttributes(true); myKNN.setAutoDetermineK(false); myKNN.setUseWeightedVoting(true); myKNN.buildClassifier(train); //myKNN.setUseAcceleratedNNSearch(true); System.out.println("\nAccuracy Experiments:"); MachineLearning_CW.performClassifierAccuracyTests(myKNN, trainData, testData, 1); /* Timing tests */ System.out.println("\n\nTiming Experiments:"); MachineLearning_CW.performClassifierTimingTests(wekaKNN, trainData, testData); }
From source file:machinelearning_cw.MachineLearning_CW.java
/** * /*from ww w . j a va 2 s . c o m*/ * Tests the accuracy of a classifier against a collection of datasets * by Resampling. * * @param classifier The classifier to be tested * @param trainingDatasets A collection of Instances objects containing * the training data for different datasets. * @param testDatasets A collection of Instances objects containing * the test data for different datasets. * @param t The number of times the data should be sampled * @throws Exception */ public static void performClassifierAccuracyTests(Classifier classifier, ArrayList<Instances> trainingDatasets, ArrayList<Instances> testDatasets, int t) throws Exception { ArrayList<Double> accuracies = new ArrayList<Double>(); Random randomGenerator = new Random(); for (int i = 0; i < trainingDatasets.size(); i++) { Instances train = trainingDatasets.get(i); Instances test = testDatasets.get(i); /* Test by Resampling. First, merge train and test data */ for (int j = 0; j < t; j++) { Instances mergedDataSet = mergeDataSets(train, test); train.clear(); test.clear(); /* Randomly sample n instances from the merged dataset * (without replacement) to form the train set */ int n = mergedDataSet.size() / 2; for (int k = 0; k < n; k++) { int indexToRemove = randomGenerator.nextInt(mergedDataSet.size()); train.add(mergedDataSet.remove(indexToRemove)); } /* Reserve remainingdata as test data */ for (int k = 0; k < mergedDataSet.size(); k++) { test.add(mergedDataSet.remove(k)); } /* Train classifier. Recalculates k */ classifier.buildClassifier(train); /* Measure and record the accuracy of the classifier on * the test set */ double accuracy = Helpers.findClassifierAccuracy(classifier, test); accuracies.add(accuracy); } double accuracyAverage = average(accuracies); System.out.println(accuracyAverage); } }