List of usage examples for weka.core Instances enumerateInstances
publicEnumeration<Instance> enumerateInstances()
From source file:lascer.WekaClassifier.java
License:Open Source License
/** * Generates the classifier.//w w w . j av a2 s . co m * * @param data the data to be used. * * @exception Exception if the classifier can't built successfully. */ public void buildClassifier(Instances data) throws Exception { weka.coreExtended.Instances extendedInstances; weka.coreExtended.BasicInstance extInst; weka.coreExtended.BasicAttribute classAttribut; de.unistuttgart.commandline.Option formelnArtOption; de.unistuttgart.commandline.Option formelnKlasseOption; de.unistuttgart.commandline.Option loggingSwitch; Instance readInst; Beispieldaten invDatensatz; StringReader stringReader; Enumeration instEnum; Enumeration attribEnum; PraedErzParameter praedErzParameter = null; KonzErzParameter konzErzParameter = null; Pruning pruning; String formelArt; String formelKlasse; String optionWert; float posPruneAnt, negPruneAnt; int instNumber; boolean unbekannteWertBsp; Steuerung.parseArguments(parser); formelArt = Konstanten.WEKA_FORMEL_ART; formelnArtOption = parser.getOption("formelArt"); if (parser.isEnabled(formelnArtOption)) { optionWert = parser.getParameter(formelnArtOption); if (!optionWert.equals("dis") && !optionWert.equals("kon") && !optionWert.equals("beste")) { System.err.println("Wert der Option formelArt unzulssig"); System.err.println("Zulssig: " + formelnArtOption.toString()); throw (new RuntimeException("Wert von Option unzulssig.")); } formelArt = optionWert; } formelKlasse = Konstanten.WEKA_FORMEL_KLASSE; formelnKlasseOption = parser.getOption("formelKlasse"); if (parser.isEnabled(formelnKlasseOption)) { optionWert = parser.getParameter(formelnKlasseOption); if (!optionWert.equals("pos") && !optionWert.equals("neg") && !optionWert.equals("beste") && !optionWert.equals("beide")) { System.err.println("Wert der Option formelKlasse unzulssig"); System.err.println("Zulssig: " + formelnKlasseOption.toString()); throw (new RuntimeException("Wert von Option unzulssig.")); } formelKlasse = optionWert; } loggingSwitch = parser.getOption("logging"); if (debugMode || parser.isEnabled(loggingSwitch)) { Steuerung.setLogLevel(Konstanten.LOGGING_LEVEL); } // Ermittlung der Parameter. unbekannteWertBsp = Steuerung.unbekannteWertBeispiele(parser); posPruneAnt = Steuerung.posPruneAnteil(parser); negPruneAnt = Steuerung.negPruneAnteil(parser); praedErzParameter = Steuerung.praedErzParameter(parser); konzErzParameter = Steuerung.konzErzParameter(parser); // Einlesen der Daten und Erzeugung des Instanzen-Objekts. instNumber = data.numInstances(); stringReader = new StringReader(data.toString()); extendedInstances = new weka.coreExtended.Instances(stringReader, instNumber); instEnum = data.enumerateInstances(); while (instEnum.hasMoreElements()) { readInst = (Instance) instEnum.nextElement(); extInst = new weka.coreExtended.BasicInstance(readInst.weight(), readInst.toDoubleArray()); extendedInstances.addBasicInstance(extInst); } // Erzeugung der Datenstze. posDatensatz = ArffDateiEinlesen.beispieldaten(extendedInstances, unbekannteWertBsp); negDatensatz = posDatensatz.kopie(true); // Erzeugung der Liste der Attribute. attributListe = new LinkedList(); attribEnum = extendedInstances.enumerateBasicAttributes(); while (attribEnum.hasMoreElements()) { attributListe.add(attribEnum.nextElement()); } // Ermittlung der Werte der Klassifikation. classAttribut = extendedInstances.basicClassAttribute(); wekaClassTrue = classAttribut.indexOfValue("true"); wekaClassFalse = classAttribut.indexOfValue("false"); // Die Formel zur Klasse der positiven Beispiele erzeugen. if (formelKlasse.equals("pos") || formelKlasse.equals("beste") || formelKlasse.equals("beide")) { posFormel = generatedFormula(posDatensatz, praedErzParameter, konzErzParameter, formelArt); } // Die Formel zur Klasse der negativen Beispiele erzeugen. if (formelKlasse.equals("neg") || formelKlasse.equals("beste") || formelKlasse.equals("beide")) { negFormel = generatedFormula(negDatensatz, praedErzParameter, konzErzParameter, formelArt); } if (formelKlasse.equals("beste")) { // Die schlechtere Formel lschen. if (negFormel.istBesser(posFormel)) { posFormel = null; } else { negFormel = null; } } if ((posPruneAnt > 0) || (negPruneAnt > 0)) { pruning = new Pruning(); if (posFormel != null) { posDatensatz = pruning.reduzierteDaten(posDatensatz, posFormel, posPruneAnt, negPruneAnt); posFormel = generatedFormula(posDatensatz, praedErzParameter, konzErzParameter, formelArt); } if (negFormel != null) { negDatensatz = pruning.reduzierteDaten(negDatensatz, negFormel, negPruneAnt, posPruneAnt); negFormel = generatedFormula(negDatensatz, praedErzParameter, konzErzParameter, formelArt); } } }
From source file:machine_learing_clasifier.MyC45.java
public double BestContinousAttribute(Instances i, Attribute att) { i.sort(att);/*from www . j av a 2 s.co m*/ Enumeration enumForMissingAttr = i.enumerateInstances(); double temp = i.get(0).classValue(); double igtemp = 0; double bestthreshold = 0; double a; double b = i.get(0).value(att); while (enumForMissingAttr.hasMoreElements()) { Instance inst = (Instance) enumForMissingAttr.nextElement(); if (temp != inst.classValue()) { temp = inst.classValue(); a = b; b = inst.value(att); double threshold = a + ((b - a) / 2); double igtemp2 = computeInformationGainContinous(i, att, threshold); if (igtemp < igtemp2) { bestthreshold = threshold; igtemp = igtemp2; } } } return bestthreshold; }
From source file:moa.classifiers.rules.GeRules.java
License:Open Source License
public static void main(String[] args) throws Exception { // TODO Auto-generated method stub //ArffFileStream arffFileStream = new ArffFileStream("resources/UCI_KDD/nominal/cmc.arff", -1); // read arff file WEKA way DataSource source = new DataSource("data/cmc.arff"); // stream generator RandomTreeGenerator treeGenerator = new RandomTreeGenerator(); treeGenerator.numClassesOption.setValue(5); treeGenerator.numNumericsOption.setValue(0); treeGenerator.prepareForUse();//w w w . j a v a 2 s .com // HoeffdingRules classifier GeRules gErules = new GeRules(); gErules.prepareForUse(); // load data into instances set Instances data = source.getDataSet(); // setting class attribute if the data format does not provide this information // For example, the XRFF format saves the class attribute information as well if (data.classIndex() == -1) data.setClassIndex(data.numAttributes() - 1); // Using Prism classifier //hoeffdingRules.learnRules(Collections.list(data.enumerateInstances())); for (Instance instance : Collections.list(data.enumerateInstances())) { gErules.trainOnInstanceImpl(instance); gErules.correctlyClassifies(instance); } Instance anInstance = Collections.list(data.enumerateInstances()).get(10); System.out.println(anInstance); for (Rule aRule : gErules.RulesCoveredInstance(anInstance)) { System.out.println(aRule.printRule()); } for (Rule aRule : gErules.rulesList) { System.out.println(aRule.printRule()); } }
From source file:model.clustering.Clustering.java
public String filledFile(Instances data, int numOfClusters, String remove) throws Exception { String mainData = data.toString(); int index = mainData.indexOf("@data"); String clusterData = mainData.substring(0, index + 6); Remove removeFilter = new Remove(); removeFilter.setAttributeIndices(remove); kMeansCLusterer = new SimpleKMeans(); kMeansCLusterer.setNumClusters(numOfClusters); FilteredClusterer filteredClusterer = new FilteredClusterer(); filteredClusterer.setClusterer(kMeansCLusterer); filteredClusterer.setFilter(removeFilter); filteredClusterer.buildClusterer(data); Enumeration<Instance> newData = data.enumerateInstances(); eval = new ClusterEvaluation(); eval.setClusterer(filteredClusterer); eval.evaluateClusterer(data);//from w w w . j a v a 2 s . c o m while (newData.hasMoreElements()) { Instance i = (Instance) newData.nextElement(); int kluster = filteredClusterer.clusterInstance(i); String instanceString = i.toString() + "," + kluster; clusterData = clusterData + instanceString + "\n"; } return clusterData; }
From source file:myclassifier.MyC45.java
/** * Method building ID3 tree.// w w w .ja v a 2 s . com * * @param data the training data * @exception Exception if decision tree can't be built successfully */ private void makeTree(Instances data) throws Exception { // Check if no instances have reached this node. if (data.numInstances() == 0) { m_Attribute = null; m_ClassValue = -1; //Instance.missingValue(); m_Distribution = new double[data.numClasses()]; return; } // Compute attribute with maximum information gain. double[] gainRatios = new double[data.numAttributes()]; Enumeration attEnum = data.enumerateAttributes(); while (attEnum.hasMoreElements()) { Attribute att = (Attribute) attEnum.nextElement(); gainRatios[att.index()] = computeGainRatio(data, att); } m_Attribute = data.attribute(Utils.maxIndex(gainRatios)); // Make leaf if information gain is zero. // Otherwise create successors. if (Utils.eq(gainRatios[m_Attribute.index()], 0)) { m_Attribute = null; m_Distribution = new double[data.numClasses()]; Enumeration instEnum = data.enumerateInstances(); while (instEnum.hasMoreElements()) { Instance inst = (Instance) instEnum.nextElement(); m_Distribution[(int) inst.classValue()]++; } Utils.normalize(m_Distribution); m_ClassValue = Utils.maxIndex(m_Distribution); m_ClassAttribute = data.classAttribute(); } else { Instances[] splitData = splitData(data, m_Attribute); m_Successors = new MyC45[m_Attribute.numValues()]; for (int j = 0; j < m_Attribute.numValues(); j++) { m_Successors[j] = new MyC45(); m_Successors[j].makeTree(splitData[j]); } } }
From source file:myclassifier.MyC45.java
private double computeEntropyFromData(Instances data) throws Exception { double[] classCounts = new double[data.numClasses()]; Enumeration instEnum = data.enumerateInstances(); while (instEnum.hasMoreElements()) { Instance inst = (Instance) instEnum.nextElement(); classCounts[(int) inst.classValue()]++; }/*from w ww . j a va 2 s . c o m*/ double entropy = 0; for (int j = 0; j < data.numClasses(); j++) { if (classCounts[j] > 0) entropy -= (double) (classCounts[j] / data.numInstances()) * Utils.log2((double) classCounts[j] / data.numInstances()); } //return entropy + Utils.log2(data.numInstances()); return entropy; }
From source file:myclassifier.MyC45.java
/** * Splits a dataset according to the values of a nominal attribute. * * @param data the data which is to be split * @param att the attribute to be used for splitting * @return the sets of instances produced by the split *//*from w ww . j a v a2 s .c o m*/ private Instances[] splitData(Instances data, Attribute att) { Instances[] splitData = new Instances[att.numValues()]; for (int j = 0; j < att.numValues(); j++) splitData[j] = new Instances(data, data.numInstances()); Enumeration instEnum = data.enumerateInstances(); while (instEnum.hasMoreElements()) { Instance inst = (Instance) instEnum.nextElement(); splitData[(int) inst.value(att)].add(inst); } return splitData; }
From source file:myclassifier.myC45Pack.ClassDistribution.java
/** * Constructor distribution dengan satu dataset * @param dataSet//from w ww . ja v a2 s . c om * * @exception Exception if something goes wrong */ public ClassDistribution(Instances dataSet) throws Exception { w_perClassPerSubdataset = new double[1][dataSet.numClasses()]; w_perSubdataset = new double[1]; w_perClass = new double[dataSet.numClasses()]; totalWeights = 0; Enumeration E = dataSet.enumerateInstances(); while (E.hasMoreElements()) { Instance inst = (Instance) E.nextElement(); addInstance(0, inst); } }
From source file:myclassifier.myC45Pack.ClassDistribution.java
/** * Creates a distribution according to given instances and * split model.// w w w .j a v a2 s . co m * * @exception Exception if something goes wrong */ public ClassDistribution(Instances source, C45ClassifierSplitModel modelToUse) throws Exception { int index; Instance instance; double[] weights; w_perClassPerSubdataset = new double[modelToUse.numSubsets()][0]; w_perSubdataset = new double[modelToUse.numSubsets()]; totalWeights = 0; w_perClass = new double[source.numClasses()]; for (int i = 0; i < modelToUse.numSubsets(); i++) { w_perClassPerSubdataset[i] = new double[source.numClasses()]; } Enumeration E = source.enumerateInstances(); while (E.hasMoreElements()) { instance = (Instance) E.nextElement(); index = modelToUse.getSubsetIndex(instance); if (index != -1) { addInstance(index, instance); } else { weights = modelToUse.getWeights(instance); addWeights(instance, weights); } } }
From source file:myclassifier.myC45Pack.ClassDistribution.java
/** * Adds all instances with unknown values for given attribute, weighted * according to frequency of instances in each bag. * * @exception Exception if something goes wrong *///from ww w.j av a 2 s. c om public void addInstWithMissValue(Instances dataSet, int attIndex) throws Exception { double[] valueProbs; double weight, newWeight; int classIndex; Instance instance; valueProbs = new double[w_perSubdataset.length]; for (int i = 0; i < w_perSubdataset.length; i++) { if (totalWeights == 0) { valueProbs[i] = 1.0 / valueProbs.length; } else { valueProbs[i] = w_perSubdataset[i] / totalWeights; } } Enumeration E = dataSet.enumerateInstances(); while (E.hasMoreElements()) { instance = (Instance) E.nextElement(); if (instance.isMissing(attIndex)) { classIndex = (int) instance.classValue(); weight = instance.weight(); w_perClass[classIndex] = w_perClass[classIndex] + weight; totalWeights += weight; for (int i = 0; i < w_perSubdataset.length; i++) { newWeight = valueProbs[i] * weight; w_perClassPerSubdataset[i][classIndex] += newWeight; w_perSubdataset[i] += newWeight; } } } }