List of usage examples for weka.core Instances enumerateInstances
publicEnumeration<Instance> enumerateInstances()
From source file:net.sf.bddbddb.order.MyId3.java
License:LGPL
/** * Splits a dataset according to the values of a nominal attribute. * // w w w . ja va2 s . com * @param data * the data which is to be split * @param att * the attribute to be used for splitting * @return the sets of instances produced by the split */ private Instances[] splitData(Instances data, Attribute att) { numI = 0; splitDataSize = new int[att.numValues()]; Instances[] splitData = new Instances[att.numValues()]; for (int j = 0; j < att.numValues(); j++) { splitData[j] = new Instances(data, data.numInstances()); } Enumeration instEnum = data.enumerateInstances(); while (instEnum.hasMoreElements()) { Instance inst = (Instance) instEnum.nextElement(); if (inst.isMissing(att)) { // Add to all children. for (int k = 0; k < att.numValues(); ++k) { splitData[k].add(inst); } } else { int k = (int) inst.value(att); splitData[k].add(inst); splitDataSize[k]++; numI++; } } return splitData; }
From source file:net.sf.bddbddb.order.WekaInterface.java
License:LGPL
public static double cvError(int numFolds, Instances data0, String cClassName) { if (data0.numInstances() < numFolds) return Double.NaN; //more folds than elements if (numFolds == 0) return Double.NaN; // no folds if (data0.numInstances() == 0) return 0; //no instances Instances data = new Instances(data0); //data.randomize(new Random(System.currentTimeMillis())); data.stratify(numFolds);/*from w w w. j a v a 2 s .com*/ Assert._assert(data.classAttribute() != null); double[] estimates = new double[numFolds]; for (int i = 0; i < numFolds; ++i) { Instances trainData = data.trainCV(numFolds, i); Assert._assert(trainData.classAttribute() != null); Assert._assert(trainData.numInstances() != 0, "Cannot train classifier on 0 instances."); Instances testData = data.testCV(numFolds, i); Assert._assert(testData.classAttribute() != null); Assert._assert(testData.numInstances() != 0, "Cannot test classifier on 0 instances."); int temp = FindBestDomainOrder.TRACE; FindBestDomainOrder.TRACE = 0; Classifier classifier = buildClassifier(cClassName, trainData); FindBestDomainOrder.TRACE = temp; int count = testData.numInstances(); double loss = 0; double sum = 0; for (Enumeration e = testData.enumerateInstances(); e.hasMoreElements();) { Instance instance = (Instance) e.nextElement(); Assert._assert(instance != null); Assert._assert(instance.classAttribute() != null && instance.classAttribute() == trainData.classAttribute()); try { double testClass = classifier.classifyInstance(instance); double weight = instance.weight(); if (testClass != instance.classValue()) loss += weight; sum += weight; } catch (Exception ex) { FindBestDomainOrder.out.println("Exception while classifying: " + instance + "\n" + ex); } } estimates[i] = 1 - loss / sum; } double average = 0; for (int i = 0; i < numFolds; ++i) average += estimates[i]; return average / numFolds; }
From source file:net.sf.mzmine.modules.peaklistmethods.dataanalysis.clustering.em.EMClusterer.java
License:Open Source License
@Override public ClusteringResult performClustering(Instances dataset, ParameterSet parameters) { List<Integer> clusters = new ArrayList<Integer>(); String[] options = new String[2]; EM clusterer = new EM(); int numberOfIterations = parameters.getParameter(EMClustererParameters.numberOfIterations).getValue(); options[0] = "-I"; options[1] = String.valueOf(numberOfIterations); try {/* w w w .j a v a 2 s . co m*/ clusterer.setOptions(options); clusterer.buildClusterer(dataset); Enumeration<?> e = dataset.enumerateInstances(); while (e.hasMoreElements()) { clusters.add(clusterer.clusterInstance((Instance) e.nextElement())); } ClusteringResult result = new ClusteringResult(clusters, null, clusterer.numberOfClusters(), parameters.getParameter(EMClustererParameters.visualization).getValue()); return result; } catch (Exception ex) { logger.log(Level.SEVERE, null, ex); return null; } }
From source file:net.sf.mzmine.modules.peaklistmethods.dataanalysis.clustering.farthestfirst.FarthestFirstClusterer.java
License:Open Source License
@Override public ClusteringResult performClustering(Instances dataset, ParameterSet parameters) { List<Integer> clusters = new ArrayList<Integer>(); String[] options = new String[2]; FarthestFirst clusterer = new FarthestFirst(); int numberOfGroups = parameters.getParameter(FarthestFirstClustererParameters.numberOfGroups).getValue(); options[0] = "-N"; options[1] = String.valueOf(numberOfGroups); try {/*w w w . j av a2 s . co m*/ clusterer.setOptions(options); clusterer.buildClusterer(dataset); Enumeration<?> e = dataset.enumerateInstances(); while (e.hasMoreElements()) { clusters.add(clusterer.clusterInstance((Instance) e.nextElement())); } ClusteringResult result = new ClusteringResult(clusters, null, clusterer.numberOfClusters(), parameters.getParameter(EMClustererParameters.visualization).getValue()); return result; } catch (Exception ex) { logger.log(Level.SEVERE, null, ex); return null; } }
From source file:net.sf.mzmine.modules.peaklistmethods.dataanalysis.clustering.simplekmeans.SimpleKMeansClusterer.java
License:Open Source License
@Override public ClusteringResult performClustering(Instances dataset, ParameterSet parameters) { List<Integer> clusters = new ArrayList<Integer>(); String[] options = new String[2]; SimpleKMeans clusterer = new SimpleKMeans(); int numberOfGroups = parameters.getParameter(SimpleKMeansClustererParameters.numberOfGroups).getValue(); options[0] = "-N"; options[1] = String.valueOf(numberOfGroups); try {//from w w w . j a v a 2s .c o m clusterer.setOptions(options); clusterer.buildClusterer(dataset); Enumeration<?> e = dataset.enumerateInstances(); while (e.hasMoreElements()) { clusters.add(clusterer.clusterInstance((Instance) e.nextElement())); } ClusteringResult result = new ClusteringResult(clusters, null, clusterer.numberOfClusters(), parameters.getParameter(EMClustererParameters.visualization).getValue()); return result; } catch (Exception ex) { logger.log(Level.SEVERE, null, ex); return null; } }
From source file:newdtl.NewJ48.java
/** * Creates a J48 tree.// w w w.j a v a2s . c o m * * @param data the training data * @exception Exception if tree failed to build */ private void makeTree(Instances data) throws Exception { // Mengecek apakah tidak terdapat instance dalam node ini if (data.numInstances() == 0) { splitAttribute = null; label = DOUBLE_MISSING_VALUE; classDistributions = new double[data.numClasses()]; isLeaf = true; } else { // Mencari Gain Ratio maksimum double[] gainRatios = new double[data.numAttributes()]; double[] thresholds = new double[data.numAttributes()]; Enumeration attEnum = data.enumerateAttributes(); while (attEnum.hasMoreElements()) { Attribute att = (Attribute) attEnum.nextElement(); double[] result = computeGainRatio(data, att); gainRatios[att.index()] = result[0]; thresholds[att.index()] = result[1]; } splitAttribute = data.attribute(maxIndex(gainRatios)); if (splitAttribute.isNumeric()) { splitThreshold = thresholds[maxIndex(gainRatios)]; } else { splitThreshold = Double.NaN; } classDistributions = new double[data.numClasses()]; for (int i = 0; i < data.numInstances(); i++) { Instance inst = (Instance) data.instance(i); classDistributions[(int) inst.classValue()]++; } // Membuat daun jika Gain Ratio-nya 0 if (Double.compare(gainRatios[splitAttribute.index()], 0) == 0) { splitAttribute = null; label = maxIndex(classDistributions); classAttribute = data.classAttribute(); isLeaf = true; } else { // Mengecek jika ada missing value if (isMissing(data, splitAttribute)) { // cari modus int index = modusIndex(data, splitAttribute); // ubah data yang punya missing value Enumeration dataEnum = data.enumerateInstances(); while (dataEnum.hasMoreElements()) { Instance inst = (Instance) dataEnum.nextElement(); if (inst.isMissing(splitAttribute)) { inst.setValue(splitAttribute, splitAttribute.value(index)); } } } // Membuat tree baru di bawah node ini Instances[] splitData; if (splitAttribute.isNumeric()) { splitData = splitData(data, splitAttribute, splitThreshold); children = new NewJ48[2]; for (int j = 0; j < 2; j++) { children[j] = new NewJ48(); children[j].makeTree(splitData[j]); } } else { splitData = splitData(data, splitAttribute); children = new NewJ48[splitAttribute.numValues()]; for (int j = 0; j < splitAttribute.numValues(); j++) { children[j] = new NewJ48(); children[j].makeTree(splitData[j]); } } isLeaf = false; } } }
From source file:newdtl.NewJ48.java
/** * search data that has missing value for attribute * * @param data the data for searching//from w w w . jav a2s .c o m * @param att the attribute for searching * @return if data has missing value for attribute */ private boolean isMissing(Instances data, Attribute att) { boolean isMissingValue = false; Enumeration dataEnum = data.enumerateInstances(); while (dataEnum.hasMoreElements() && !isMissingValue) { Instance inst = (Instance) dataEnum.nextElement(); if (inst.isMissing(att)) { isMissingValue = true; } } return isMissingValue; }
From source file:newdtl.NewJ48.java
/** * search index of attribute that has most common value * * @param data the data for searching//from w w w .j a v a 2 s .co m * @param att the attribute for searching * @return index of attribute that has most common value */ private int modusIndex(Instances data, Attribute att) { // cari modus int[] modus = new int[att.numValues()]; Enumeration dataEnumeration = data.enumerateInstances(); while (dataEnumeration.hasMoreElements()) { Instance inst = (Instance) dataEnumeration.nextElement(); if (!inst.isMissing(att)) { modus[(int) inst.value(att)]++; } } // cari modus terbesar int indexMax = 0; for (int i = 1; i < modus.length; ++i) { if (modus[i] > modus[indexMax]) { indexMax = i; } } return indexMax; }
From source file:org.montp2.m1decol.ter.clustering.XMeansClustering.java
License:Open Source License
public Clusterer computeClustering(String inPath, String outPath, Properties propertiesCluster) throws Exception { Instances inputInstances = WekaUtils.loadARFF(inPath); EuclideanDistance euclideanDistance = new EuclideanDistance(); euclideanDistance.setAttributeIndices("first-last"); euclideanDistance.setDontNormalize(false); euclideanDistance.setInvertSelection(false); XMeans xmeans = new XMeans(); xmeans.setMaxIterations(500);// w w w .j av a 2 s .c om xmeans.setSeed(10); xmeans.setMinNumClusters(5); xmeans.setMaxNumClusters(12); xmeans.setMaxKMeans(1000); xmeans.setMaxKMeansForChildren(1000); xmeans.setBinValue(1.0); xmeans.setCutOffFactor(0.5); xmeans.setDebugLevel(0); xmeans.setMaxIterations(1); xmeans.buildClusterer(inputInstances); Enumeration<Instance> e = inputInstances.enumerateInstances(); while (e.hasMoreElements()) { Instance ins = e.nextElement(); int cluster_num = xmeans.clusterInstance(ins); System.out.println(ins.toString()); System.out.println(cluster_num); } WekaUtils.saveModel(xmeans, outPath); return xmeans; }
From source file:org.montp2.m1decol.ter.preprocessing.GlobalPreProcessing.java
License:Open Source License
public Map<Integer, Integer> getMapOfInstanceArffToIdUser(String dirPath, String inArff) throws Exception { Pattern pattern = Pattern.compile("^( \\p{Print}+ ) (_(\\p{Digit}+).txt)$", Pattern.COMMENTS); Map<Integer, String> arffIds = new HashMap<Integer, String>(); for (File file : FileUtils.ls(dirPath)) { Matcher matcher = pattern.matcher(file.getAbsolutePath()); matcher.matches();//from w ww . ja v a2 s . c o m arffIds.put(Integer.parseInt(matcher.group(3)), InputStreamUtils.readInputStream(new BufferedInputStream(new FileInputStream(file)))); } Instances instances = WekaUtils.loadARFF(inArff); Enumeration<Instance> en = instances.enumerateInstances(); Map<Integer, Integer> arffTOIdUser = new HashMap<Integer, Integer>(); int index = 0; while (en.hasMoreElements()) { String value = en.nextElement().toString(); value = value.substring(1, value.length() - 1); String works[] = value.split("\\s"); for (Map.Entry<Integer, String> arff : arffIds.entrySet()) { String wordArff[] = arff.getValue().split("\\s"); boolean isEqual = true; if (wordArff.length == works.length) { for (int j = 0; j < wordArff.length; j++) { if (!wordArff[j].equals(works[j])) { isEqual = false; break; } } if (isEqual) { arffTOIdUser.put(index, arff.getKey()); break; } } } index++; } return arffTOIdUser; }