List of usage examples for weka.core Utils eq
public staticboolean eq(double a, double b)
From source file:Bilbo.java
License:Open Source License
/** * Bagging method./*from w ww .j ava 2 s . c om*/ * * @param data the training data to be used for generating the * bagged classifier. * @throws Exception if the classifier could not be built successfully */ public void buildClassifier(Instances data, Instances p_unlabeledData) throws Exception { // can classifier handle the data? getCapabilities().testWithFail(data); // Has user asked to represent copies using weights? if (getRepresentCopiesUsingWeights() && !(m_Classifier instanceof WeightedInstancesHandler)) { throw new IllegalArgumentException("Cannot represent copies using weights when " + "base learner in bagging does not implement " + "WeightedInstancesHandler."); } // get fresh Instances object m_data = new Instances(data); m_unlabeledData = new Instances(p_unlabeledData); super.buildClassifier(m_data); if (m_CalcOutOfBag && (m_BagSizePercent != 100)) { throw new IllegalArgumentException( "Bag size needs to be 100% if " + "out-of-bag error is to be calculated!"); } m_random = new Random(m_Seed); m_inBag = null; if (m_CalcOutOfBag) m_inBag = new boolean[m_Classifiers.length][]; for (int j = 0; j < m_Classifiers.length; j++) { if (m_Classifier instanceof Randomizable) { ((Randomizable) m_Classifiers[j]).setSeed(m_random.nextInt()); } } //Insert oracle loop here TODO buildClassifiers(); Instances inst = new Instances(m_data); for (int i = 0; i < m_Classifiers.length; i++) { inst.clear(); ((NewTree) m_Classifiers[i]).GetTransductedInstances(inst); ((NewTree) m_Classifiers[i]).DoInduction(inst); // Ehm, do something boyski } // calc OOB error? if (getCalcOutOfBag()) { double outOfBagCount = 0.0; double errorSum = 0.0; boolean numeric = m_data.classAttribute().isNumeric(); for (int i = 0; i < m_data.numInstances(); i++) { double vote; double[] votes; if (numeric) votes = new double[1]; else votes = new double[m_data.numClasses()]; // determine predictions for instance int voteCount = 0; for (int j = 0; j < m_Classifiers.length; j++) { if (m_inBag[j][i]) continue; if (numeric) { double pred = ((NewTree) m_Classifiers[j]).classifyInstance(m_data.instance(i)); if (!Utils.isMissingValue(pred)) { votes[0] += pred; voteCount++; } } else { voteCount++; double[] newProbs = ((NewTree) m_Classifiers[j]) .distributionForInstance(m_data.instance(i)); // average the probability estimates for (int k = 0; k < newProbs.length; k++) { votes[k] += newProbs[k]; } } } // "vote" if (numeric) { if (voteCount == 0) { vote = Utils.missingValue(); } else { vote = votes[0] / voteCount; // average } } else { if (Utils.eq(Utils.sum(votes), 0)) { vote = Utils.missingValue(); } else { vote = Utils.maxIndex(votes); // predicted class Utils.normalize(votes); } } // error for instance if (!Utils.isMissingValue(vote) && !m_data.instance(i).classIsMissing()) { outOfBagCount += m_data.instance(i).weight(); if (numeric) { errorSum += (StrictMath.abs(vote - m_data.instance(i).classValue()) * m_data.instance(i).weight()) / m_data.instance(i).classValue(); } else { if (vote != m_data.instance(i).classValue()) errorSum += m_data.instance(i).weight(); } } } if (outOfBagCount > 0) { m_OutOfBagError = errorSum / outOfBagCount; } } else { m_OutOfBagError = 0; } // save memory m_data = null; }
From source file:Bilbo.java
License:Open Source License
/** * Calculates the class membership probabilities for the given test * instance.//from w w w. j a v a2 s.c o m * * @param instance the instance to be classified * @return preedicted class probability distribution * @throws Exception if distribution can't be computed successfully */ @Override public double[] distributionForInstance(Instance instance) throws Exception { double[] sums = new double[instance.numClasses()], newProbs; double numPreds = 0; for (int i = 0; i < m_NumIterations; i++) { if (instance.classAttribute().isNumeric() == true) { double pred = ((NewTree) m_Classifiers[i]).classifyInstance(instance); if (!Utils.isMissingValue(pred)) { sums[0] += pred; numPreds++; } } else { newProbs = ((NewTree) m_Classifiers[i]).distributionForInstance(instance); for (int j = 0; j < newProbs.length; j++) sums[j] += newProbs[j]; } } if (instance.classAttribute().isNumeric() == true) { if (numPreds == 0) { sums[0] = Utils.missingValue(); } else { sums[0] /= numPreds; } return sums; } else if (Utils.eq(Utils.sum(sums), 0)) { return sums; } else { Utils.normalize(sums); return sums; } }
From source file:BaggingImprove.java
/** * Bagging method./*w ww. j a v a 2 s . com*/ * * @param data the training data to be used for generating the bagged * classifier. * @throws Exception if the classifier could not be built successfully */ public void buildClassifier(Instances data) throws Exception { // can classifier handle the data? getCapabilities().testWithFail(data); // remove instances with missing class data = new Instances(data); //data.deleteWithMissingClass(); super.buildClassifier(data); if (m_CalcOutOfBag && (m_BagSizePercent != 100)) { throw new IllegalArgumentException( "Bag size needs to be 100% if " + "out-of-bag error is to be calculated!"); } //+ System.out.println("Classifier length" + m_Classifiers.length); int bagSize = data.numInstances() * m_BagSizePercent / 100; //+ System.out.println("Bag Size " + bagSize); Random random = new Random(m_Seed); boolean[][] inBag = null; if (m_CalcOutOfBag) { inBag = new boolean[m_Classifiers.length][]; } //+ //inisialisasi nama penamaan model BufferedWriter writer = new BufferedWriter(new FileWriter("Bootstrap.txt")); for (int j = 0; j < m_Classifiers.length; j++) { Instances bagData = null; // create the in-bag dataset if (m_CalcOutOfBag) { inBag[j] = new boolean[data.numInstances()]; //System.out.println("Inbag1 " + inBag[0][1]); //bagData = resampleWithWeights(data, random, inBag[j]); bagData = data.resampleWithWeights(random, inBag[j]); //System.out.println("num after resample " + bagData.numInstances()); //+ // for (int k = 0; k < bagData.numInstances(); k++) { // System.out.println("Bag Data after resample [calc out bag]" + bagData.instance(k)); // } } else { //+ System.out.println("Not m_Calc out of bag"); System.out.println("Please configure code inside!"); bagData = data.resampleWithWeights(random); if (bagSize < data.numInstances()) { bagData.randomize(random); Instances newBagData = new Instances(bagData, 0, bagSize); bagData = newBagData; } } if (m_Classifier instanceof Randomizable) { //+ System.out.println("Randomizable"); ((Randomizable) m_Classifiers[j]).setSeed(random.nextInt()); } //write bootstrap into file writer.write("Bootstrap " + j); writer.newLine(); writer.write(bagData.toString()); writer.newLine(); System.out.println("Berhasil menyimpan bootstrap ke file "); System.out.println("Bootstrap " + j + 1); // textarea.append("\nBootsrap " + (j + 1)); //System.out.println("num instance kedua kali "+bagData.numInstances()); for (int b = 1; b < bagData.numInstances(); b++) { System.out.println("" + bagData.instance(b)); // textarea.append("\n" + bagData.instance(b)); } // //+ // build the classifier m_Classifiers[j].buildClassifier(bagData); // //+ // // SerializationHelper serialization = new SerializationHelper(); // serialization.write("KnnData"+model+".model", m_Classifiers[j]); // System.out.println("Finish write into model"); // model++; } writer.flush(); writer.close(); // calc OOB error? if (getCalcOutOfBag()) { double outOfBagCount = 0.0; double errorSum = 0.0; boolean numeric = data.classAttribute().isNumeric(); for (int i = 0; i < data.numInstances(); i++) { double vote; double[] votes; if (numeric) { votes = new double[1]; } else { votes = new double[data.numClasses()]; } // determine predictions for instance int voteCount = 0; for (int j = 0; j < m_Classifiers.length; j++) { if (inBag[j][i]) { continue; } voteCount++; // double pred = m_Classifiers[j].classifyInstance(data.instance(i)); if (numeric) { // votes[0] += pred; votes[0] = m_Classifiers[j].classifyInstance(data.instance(i)); } else { // votes[(int) pred]++; double[] newProbs = m_Classifiers[j].distributionForInstance(data.instance(i)); //- // for(double a : newProbs) // { // System.out.println("Double new probs %.f "+a); // } // average the probability estimates for (int k = 0; k < newProbs.length; k++) { votes[k] += newProbs[k]; } } } System.out.println("Vote count %d" + voteCount); // "vote" if (numeric) { vote = votes[0]; if (voteCount > 0) { vote /= voteCount; // average } } else { if (Utils.eq(Utils.sum(votes), 0)) { } else { Utils.normalize(votes); } vote = Utils.maxIndex(votes); // predicted class //- System.out.println("Vote " + vote); } // error for instance outOfBagCount += data.instance(i).weight(); if (numeric) { errorSum += StrictMath.abs(vote - data.instance(i).classValue()) * data.instance(i).weight(); } else if (vote != data.instance(i).classValue()) { //+ System.out.println("Vote terakhir" + data.instance(i).classValue()); errorSum += data.instance(i).weight(); } } m_OutOfBagError = errorSum / outOfBagCount; } else { m_OutOfBagError = 0; } }
From source file:BaggingImprove.java
/** * Calculates the class membership probabilities for the given test * instance.//from w w w .ja v a 2s.c o m * * @param instance the instance to be classified * @return predicted class probability distribution * @throws Exception if distribution can't be computed successfully */ @Override public double[] distributionForInstance(Instance instance) throws Exception { double[] sums = new double[instance.numClasses()], newProbs; //- //System.out.println("\nDistribution For Instance\n"); for (int i = 0; i < m_NumIterations; i++) { if (instance.classAttribute().isNumeric() == true) { //System.out.println(m_Classifiers[i].classifyInstance(instance)); sums[0] += m_Classifiers[i].classifyInstance(instance); } else { //System.out.println(m_Classifiers[i].distributionForInstance(instance)); newProbs = m_Classifiers[i].distributionForInstance(instance); //- // for (int j = 0; j < newProbs.length; j++) { // sums[j] += newProbs[j]; // System.out.println("Sums "+sums[j]); // } //+ } } if (instance.classAttribute().isNumeric() == true) { sums[0] /= m_NumIterations; return sums; } else if (Utils.eq(Utils.sum(sums), 0)) { return sums; } else { Utils.normalize(sums); return sums; } }
From source file:MLKNNCS.java
License:Open Source License
/** * Computing Prior and PriorN Probabilities for each class of the training * set//from w ww . j a va 2s . com */ private void ComputePrior() { for (int i = 0; i < numLabels; i++) { int temp_Ci = 0; for (int j = 0; j < train.numInstances(); j++) { double value = Double.parseDouble( train.attribute(labelIndices[i]).value((int) train.instance(j).value(labelIndices[i]))); if (Utils.eq(value, 1.0)) { temp_Ci++; } } PriorProbabilities[i] = (smooth + temp_Ci) / (smooth * 2 + train.numInstances()); PriorNProbabilities[i] = 1 - PriorProbabilities[i]; Cost[i] = Math.log10((train.numInstances() - temp_Ci - 1) / (temp_Ci + 1)) + 1; } }
From source file:MLKNNCS.java
License:Open Source License
/** * Computing Cond and CondN Probabilities for each class of the training set * * @throws Exception Potential exception thrown. To be handled in an upper level. *///from w ww .j a va 2s . c o m private void ComputeCond() throws Exception { int[][] temp_Ci = new int[numLabels][numOfNeighbors + 1]; int[][] temp_NCi = new int[numLabels][numOfNeighbors + 1]; for (int i = 0; i < train.numInstances(); i++) { Instances knn = new Instances(lnn.kNearestNeighbours(train.instance(i), numOfNeighbors)); // now compute values of temp_Ci and temp_NCi for every class label for (int j = 0; j < numLabels; j++) { int aces = 0; // num of aces in Knn for j for (int k = 0; k < numOfNeighbors; k++) { double value = Double.parseDouble( train.attribute(labelIndices[j]).value((int) knn.instance(k).value(labelIndices[j]))); if (Utils.eq(value, 1.0)) { aces++; } } // raise the counter of temp_Ci[j][aces] and temp_NCi[j][aces] by 1 if (Utils.eq(Double.parseDouble( train.attribute(labelIndices[j]).value((int) train.instance(i).value(labelIndices[j]))), 1.0)) { temp_Ci[j][aces]++; } else { temp_NCi[j][aces]++; } } } // compute CondProbabilities[i][..] for labels based on temp_Ci[] for (int i = 0; i < numLabels; i++) { int temp1 = 0; int temp2 = 0; for (int j = 0; j < numOfNeighbors + 1; j++) { temp1 += temp_Ci[i][j]; temp2 += temp_NCi[i][j]; } for (int j = 0; j < numOfNeighbors + 1; j++) { CondProbabilities[i][j] = (smooth + temp_Ci[i][j]) / (smooth * (numOfNeighbors + 1) + temp1); CondNProbabilities[i][j] = (smooth + temp_NCi[i][j]) / (smooth * (numOfNeighbors + 1) + temp2); } } }
From source file:MLKNNCS.java
License:Open Source License
protected MultiLabelOutput makePredictionInternal(Instance instance) throws Exception { double[] confidences = new double[numLabels]; boolean[] predictions = new boolean[numLabels]; Instances knn = null;/*from w ww .j a v a 2s . c om*/ try { knn = new Instances(lnn.kNearestNeighbours(instance, numOfNeighbors)); } catch (Exception ex) { Logger.getLogger(MLKNNCS.class.getName()).log(Level.SEVERE, null, ex); } int trueCount = 0; for (int i = 0; i < numLabels; i++) { // compute sum of aces in KNN int aces = 0; // num of aces in Knn for i for (int k = 0; k < numOfNeighbors; k++) { double value = Double.parseDouble( train.attribute(labelIndices[i]).value((int) knn.instance(k).value(labelIndices[i]))); if (Utils.eq(value, 1.0)) { aces++; } } double Prob_in = PriorProbabilities[i] * CondProbabilities[i][aces]; double Prob_out = PriorNProbabilities[i] * CondNProbabilities[i][aces]; confidences[i] = Cost[i] * Prob_in / (Cost[i] * Prob_in + Prob_out); //confidences[i] = 6*Prob_in/(6*Prob_in + Prob_out); if (confidences[i] > 0.5) { predictions[i] = true; trueCount++; } else if (confidences[i] < 0.5) { predictions[i] = false; } else { Random rnd = new Random(); predictions[i] = (rnd.nextInt(2) == 1) ? true : false; } // ranking function } MultiLabelOutput mlo = new MultiLabelOutput(predictions, confidences); if (trueCount < 3) { double[] confidence = mlo.getConfidences(); double[] confidenceTop4 = new double[4]; int[] top4 = new int[4]; Arrays.fill(top4, 0); Arrays.fill(confidenceTop4, 0); for (int i = 0; i < confidence.length; i++) { if (confidence[i] > confidenceTop4[0]) { top4[3] = top4[2]; confidenceTop4[3] = confidenceTop4[2]; top4[2] = top4[1]; confidenceTop4[2] = confidenceTop4[1]; top4[1] = top4[0]; confidenceTop4[1] = confidenceTop4[0]; top4[0] = i; confidenceTop4[0] = confidence[i]; } else if (confidence[i] > confidenceTop4[1]) { top4[3] = top4[2]; confidenceTop4[3] = confidenceTop4[2]; top4[2] = top4[1]; confidenceTop4[2] = confidenceTop4[1]; top4[1] = i; confidenceTop4[1] = confidence[i]; } else if (confidence[i] > confidenceTop4[2]) { top4[3] = top4[2]; confidenceTop4[3] = confidenceTop4[2]; top4[2] = i; confidenceTop4[2] = confidence[i]; } else if (confidence[i] > confidenceTop4[3]) { top4[3] = i; confidenceTop4[3] = confidence[i]; } } for (int i = trueCount; i < 4; i++) { if ((confidence[top4[i]] > 0.25 && i == 3) || confidence[top4[i]] > 0.2 && i < 3) { predictions[top4[i]] = true; trueCount++; } } if (trueCount == 0) { predictions[top4[0]] = true; } mlo = new MultiLabelOutput(predictions, confidences); } return mlo; }
From source file:ID3Chi.java
License:Open Source License
/** * Method for building an ID3Chi tree./*from w w w .j av a2s . c om*/ * * @param data * the training data * @exception Exception * if decision tree can't be built successfully */ private void makeTree(Instances data) throws Exception { // Check if no instances have reached this node. /* if (data.numInstances() == 0) { m_Attribute = null; m_ClassValue = Instance.missingValue(); m_Distribution = new double[data.numClasses()]; return; } /**/ if (data.numInstances() == 0) { SetNullDistribution(data); } // Compute attribute with maximum information gain. double[] infoGains = new double[data.numAttributes()]; Enumeration attEnum = data.enumerateAttributes(); double entropyOfAllData = computeEntropy(data); while (attEnum.hasMoreElements()) { Attribute att = (Attribute) attEnum.nextElement(); infoGains[att.index()] = computeInfoGain(data, att, entropyOfAllData); } m_Attribute = data.attribute(Utils.maxIndex(infoGains)); double chiSquare = computeChiSquare(data, m_Attribute); int degreesOfFreedom = m_Attribute.numValues() - 1; ChiSquaredDistribution chi = new ChiSquaredDistribution(degreesOfFreedom); double threshold = chi.inverseCumulativeProbability(m_confidenceLevel); // Make leaf if information gain is zero. // Otherwise create successors. if (Utils.eq(infoGains[m_Attribute.index()], 0)) { MakeALeaf(data); } else { // Discard unknown values for selected attribute //data.deleteWithMissing(m_Attribute); Instances[] subset = splitData(data, m_Attribute); if (CheckIfCanApplyChiSquare(subset) && (chiSquare <= threshold)) { MakeALeaf(data); return; } m_Successors = new ID3Chi[m_Attribute.numValues()]; for (int j = 0; j < m_Attribute.numValues(); j++) { m_Successors[j] = new ID3Chi(this.m_confidenceLevel); m_Successors[j].m_Ratio = (double) subset[j].numInstances() / (double) data.numInstances(); m_Successors[j].makeTree(subset[j]); } } }
From source file:adaptedClusteringAlgorithms.MyFarthestFirst.java
License:Open Source License
/** * Normalizes a given value of a numeric attribute. * * @param x the value to be normalized/*ww w . java 2 s. c o m*/ * @param i the attribute's index * @return the normalized value */ protected double norm(double x, int i) { if (Double.isNaN(m_Min[i]) || Utils.eq(m_Max[i], m_Min[i])) { return 0; } else { return (x - m_Min[i]) / (m_Max[i] - m_Min[i]); } }
From source file:cerebro.Id3.java
License:Open Source License
/** * Method for building an Id3 tree.//ww w . j av a 2s .c o m * * @param data the training data * @exception Exception if decision tree can't be built successfully */ private void makeTree(Instances data) throws Exception { // Check if no instances have reached this node. if (data.numInstances() == 0) { m_Attribute = null; m_ClassValue = Instance.missingValue(); m_Distribution = new double[data.numClasses()]; return; } // Compute attribute with maximum information gain. double[] infoGains = new double[data.numAttributes()]; Enumeration attEnum = data.enumerateAttributes(); while (attEnum.hasMoreElements()) { Attribute att = (Attribute) attEnum.nextElement(); infoGains[att.index()] = computeInfoGain(data, att); } m_Attribute = data.attribute(Utils.maxIndex(infoGains)); // Make leaf if information gain is zero. // Otherwise create successors. if (Utils.eq(infoGains[m_Attribute.index()], 0)) { m_Attribute = null; m_Distribution = new double[data.numClasses()]; Enumeration instEnum = data.enumerateInstances(); while (instEnum.hasMoreElements()) { Instance inst = (Instance) instEnum.nextElement(); m_Distribution[(int) inst.classValue()]++; } Utils.normalize(m_Distribution); m_ClassValue = Utils.maxIndex(m_Distribution); m_ClassAttribute = data.classAttribute(); } else { Instances[] splitData = splitData(data, m_Attribute); m_Successors = new Id3[m_Attribute.numValues()]; for (int j = 0; j < m_Attribute.numValues(); j++) { m_Successors[j] = new Id3(); m_Successors[j].makeTree(splitData[j]); } } }