Example usage for weka.core Utils eq

List of usage examples for weka.core Utils eq

Introduction

In this page you can find the example usage for weka.core Utils eq.

Prototype

public staticboolean eq(double a, double b) 

Source Link

Document

Tests if a is equal to b.

Usage

From source file:Bilbo.java

License:Open Source License

/**
 * Bagging method./*from   w  ww .j ava  2 s  . c om*/
 *
 * @param data the training data to be used for generating the
 * bagged classifier.
 * @throws Exception if the classifier could not be built successfully
 */
public void buildClassifier(Instances data, Instances p_unlabeledData) throws Exception {

    // can classifier handle the data?
    getCapabilities().testWithFail(data);

    // Has user asked to represent copies using weights?
    if (getRepresentCopiesUsingWeights() && !(m_Classifier instanceof WeightedInstancesHandler)) {
        throw new IllegalArgumentException("Cannot represent copies using weights when "
                + "base learner in bagging does not implement " + "WeightedInstancesHandler.");
    }

    // get fresh Instances object
    m_data = new Instances(data);
    m_unlabeledData = new Instances(p_unlabeledData);

    super.buildClassifier(m_data);

    if (m_CalcOutOfBag && (m_BagSizePercent != 100)) {
        throw new IllegalArgumentException(
                "Bag size needs to be 100% if " + "out-of-bag error is to be calculated!");
    }

    m_random = new Random(m_Seed);

    m_inBag = null;
    if (m_CalcOutOfBag)
        m_inBag = new boolean[m_Classifiers.length][];

    for (int j = 0; j < m_Classifiers.length; j++) {
        if (m_Classifier instanceof Randomizable) {
            ((Randomizable) m_Classifiers[j]).setSeed(m_random.nextInt());
        }
    }
    //Insert oracle loop here TODO

    buildClassifiers();
    Instances inst = new Instances(m_data);
    for (int i = 0; i < m_Classifiers.length; i++) {
        inst.clear();
        ((NewTree) m_Classifiers[i]).GetTransductedInstances(inst);
        ((NewTree) m_Classifiers[i]).DoInduction(inst);
        // Ehm, do something boyski
    }
    // calc OOB error?
    if (getCalcOutOfBag()) {
        double outOfBagCount = 0.0;
        double errorSum = 0.0;
        boolean numeric = m_data.classAttribute().isNumeric();

        for (int i = 0; i < m_data.numInstances(); i++) {
            double vote;
            double[] votes;
            if (numeric)
                votes = new double[1];
            else
                votes = new double[m_data.numClasses()];

            // determine predictions for instance
            int voteCount = 0;
            for (int j = 0; j < m_Classifiers.length; j++) {
                if (m_inBag[j][i])
                    continue;

                if (numeric) {
                    double pred = ((NewTree) m_Classifiers[j]).classifyInstance(m_data.instance(i));
                    if (!Utils.isMissingValue(pred)) {
                        votes[0] += pred;
                        voteCount++;
                    }
                } else {
                    voteCount++;
                    double[] newProbs = ((NewTree) m_Classifiers[j])
                            .distributionForInstance(m_data.instance(i));
                    // average the probability estimates
                    for (int k = 0; k < newProbs.length; k++) {
                        votes[k] += newProbs[k];
                    }
                }
            }

            // "vote"
            if (numeric) {
                if (voteCount == 0) {
                    vote = Utils.missingValue();
                } else {
                    vote = votes[0] / voteCount; // average
                }
            } else {
                if (Utils.eq(Utils.sum(votes), 0)) {
                    vote = Utils.missingValue();
                } else {
                    vote = Utils.maxIndex(votes); // predicted class
                    Utils.normalize(votes);
                }
            }

            // error for instance
            if (!Utils.isMissingValue(vote) && !m_data.instance(i).classIsMissing()) {
                outOfBagCount += m_data.instance(i).weight();
                if (numeric) {
                    errorSum += (StrictMath.abs(vote - m_data.instance(i).classValue())
                            * m_data.instance(i).weight()) / m_data.instance(i).classValue();
                } else {
                    if (vote != m_data.instance(i).classValue())
                        errorSum += m_data.instance(i).weight();
                }
            }
        }

        if (outOfBagCount > 0) {
            m_OutOfBagError = errorSum / outOfBagCount;
        }
    } else {
        m_OutOfBagError = 0;
    }

    // save memory
    m_data = null;
}

From source file:Bilbo.java

License:Open Source License

/**
 * Calculates the class membership probabilities for the given test
 * instance.//from w w  w.  j a  v a2  s.c  o m
 *
 * @param instance the instance to be classified
 * @return preedicted class probability distribution
 * @throws Exception if distribution can't be computed successfully 
 */
@Override
public double[] distributionForInstance(Instance instance) throws Exception {

    double[] sums = new double[instance.numClasses()], newProbs;

    double numPreds = 0;
    for (int i = 0; i < m_NumIterations; i++) {
        if (instance.classAttribute().isNumeric() == true) {
            double pred = ((NewTree) m_Classifiers[i]).classifyInstance(instance);
            if (!Utils.isMissingValue(pred)) {
                sums[0] += pred;
                numPreds++;
            }
        } else {
            newProbs = ((NewTree) m_Classifiers[i]).distributionForInstance(instance);
            for (int j = 0; j < newProbs.length; j++)
                sums[j] += newProbs[j];
        }
    }
    if (instance.classAttribute().isNumeric() == true) {
        if (numPreds == 0) {
            sums[0] = Utils.missingValue();
        } else {
            sums[0] /= numPreds;
        }
        return sums;
    } else if (Utils.eq(Utils.sum(sums), 0)) {
        return sums;
    } else {
        Utils.normalize(sums);
        return sums;
    }
}

From source file:BaggingImprove.java

/**
 * Bagging method./*w  ww.  j a v a 2 s  . com*/
 *
 * @param data the training data to be used for generating the bagged
 * classifier.
 * @throws Exception if the classifier could not be built successfully
 */
public void buildClassifier(Instances data) throws Exception {

    // can classifier handle the data?
    getCapabilities().testWithFail(data);

    // remove instances with missing class
    data = new Instances(data);
    //data.deleteWithMissingClass();

    super.buildClassifier(data);

    if (m_CalcOutOfBag && (m_BagSizePercent != 100)) {
        throw new IllegalArgumentException(
                "Bag size needs to be 100% if " + "out-of-bag error is to be calculated!");
    }
    //+
    System.out.println("Classifier length" + m_Classifiers.length);

    int bagSize = data.numInstances() * m_BagSizePercent / 100;
    //+
    System.out.println("Bag Size " + bagSize);

    Random random = new Random(m_Seed);

    boolean[][] inBag = null;
    if (m_CalcOutOfBag) {
        inBag = new boolean[m_Classifiers.length][];
    }

    //+
    //inisialisasi nama penamaan model
    BufferedWriter writer = new BufferedWriter(new FileWriter("Bootstrap.txt"));

    for (int j = 0; j < m_Classifiers.length; j++) {

        Instances bagData = null;

        // create the in-bag dataset
        if (m_CalcOutOfBag) {
            inBag[j] = new boolean[data.numInstances()];

            //System.out.println("Inbag1 " + inBag[0][1]);
            //bagData = resampleWithWeights(data, random, inBag[j]);
            bagData = data.resampleWithWeights(random, inBag[j]);
            //System.out.println("num after resample " + bagData.numInstances());
            //+
            //                for (int k = 0; k < bagData.numInstances(); k++) {
            //                    System.out.println("Bag Data after resample [calc out bag]" + bagData.instance(k));
            //                }

        } else {
            //+
            System.out.println("Not m_Calc out of bag");
            System.out.println("Please configure code inside!");

            bagData = data.resampleWithWeights(random);
            if (bagSize < data.numInstances()) {
                bagData.randomize(random);
                Instances newBagData = new Instances(bagData, 0, bagSize);
                bagData = newBagData;
            }
        }

        if (m_Classifier instanceof Randomizable) {
            //+
            System.out.println("Randomizable");
            ((Randomizable) m_Classifiers[j]).setSeed(random.nextInt());
        }

        //write bootstrap into file
        writer.write("Bootstrap " + j);
        writer.newLine();
        writer.write(bagData.toString());
        writer.newLine();

        System.out.println("Berhasil menyimpan bootstrap ke file ");

        System.out.println("Bootstrap " + j + 1);
        //            textarea.append("\nBootsrap " + (j + 1));
        //System.out.println("num instance kedua kali "+bagData.numInstances());

        for (int b = 1; b < bagData.numInstances(); b++) {
            System.out.println("" + bagData.instance(b));
            //                textarea.append("\n" + bagData.instance(b));
        }
        //            //+

        // build the classifier
        m_Classifiers[j].buildClassifier(bagData);
        //            //+
        //            
        //            SerializationHelper serialization = new SerializationHelper();
        //            serialization.write("KnnData"+model+".model", m_Classifiers[j]);
        //            System.out.println("Finish write into model");
        //            model++;
    }

    writer.flush();
    writer.close();
    // calc OOB error?
    if (getCalcOutOfBag()) {
        double outOfBagCount = 0.0;
        double errorSum = 0.0;
        boolean numeric = data.classAttribute().isNumeric();

        for (int i = 0; i < data.numInstances(); i++) {
            double vote;
            double[] votes;
            if (numeric) {
                votes = new double[1];
            } else {
                votes = new double[data.numClasses()];
            }

            // determine predictions for instance
            int voteCount = 0;
            for (int j = 0; j < m_Classifiers.length; j++) {
                if (inBag[j][i]) {
                    continue;
                }
                voteCount++;
                // double pred = m_Classifiers[j].classifyInstance(data.instance(i));
                if (numeric) {
                    // votes[0] += pred;
                    votes[0] = m_Classifiers[j].classifyInstance(data.instance(i));
                } else {
                    // votes[(int) pred]++;
                    double[] newProbs = m_Classifiers[j].distributionForInstance(data.instance(i));
                    //-
                    //                        for(double a : newProbs)
                    //                        {
                    //                            System.out.println("Double new probs %.f "+a);
                    //                        }
                    // average the probability estimates
                    for (int k = 0; k < newProbs.length; k++) {
                        votes[k] += newProbs[k];
                    }

                }
            }
            System.out.println("Vote count %d" + voteCount);

            // "vote"
            if (numeric) {
                vote = votes[0];
                if (voteCount > 0) {
                    vote /= voteCount; // average
                }
            } else {
                if (Utils.eq(Utils.sum(votes), 0)) {
                } else {
                    Utils.normalize(votes);

                }
                vote = Utils.maxIndex(votes); // predicted class
                //-
                System.out.println("Vote " + vote);

            }

            // error for instance
            outOfBagCount += data.instance(i).weight();
            if (numeric) {
                errorSum += StrictMath.abs(vote - data.instance(i).classValue()) * data.instance(i).weight();
            } else if (vote != data.instance(i).classValue()) {
                //+
                System.out.println("Vote terakhir" + data.instance(i).classValue());
                errorSum += data.instance(i).weight();
            }
        }

        m_OutOfBagError = errorSum / outOfBagCount;
    } else {
        m_OutOfBagError = 0;
    }
}

From source file:BaggingImprove.java

/**
 * Calculates the class membership probabilities for the given test
 * instance.//from w  w w .ja  v a 2s.c o  m
 *
 * @param instance the instance to be classified
 * @return predicted class probability distribution
 * @throws Exception if distribution can't be computed successfully
 */
@Override
public double[] distributionForInstance(Instance instance) throws Exception {

    double[] sums = new double[instance.numClasses()], newProbs;
    //-
    //System.out.println("\nDistribution For Instance\n");
    for (int i = 0; i < m_NumIterations; i++) {
        if (instance.classAttribute().isNumeric() == true) {
            //System.out.println(m_Classifiers[i].classifyInstance(instance));
            sums[0] += m_Classifiers[i].classifyInstance(instance);
        } else {
            //System.out.println(m_Classifiers[i].distributionForInstance(instance));
            newProbs = m_Classifiers[i].distributionForInstance(instance);
            //-
            //                for (int j = 0; j < newProbs.length; j++) {
            //                    sums[j] += newProbs[j];
            //                    System.out.println("Sums "+sums[j]);
            //                }
            //+

        }
    }
    if (instance.classAttribute().isNumeric() == true) {
        sums[0] /= m_NumIterations;
        return sums;
    } else if (Utils.eq(Utils.sum(sums), 0)) {
        return sums;
    } else {
        Utils.normalize(sums);
        return sums;
    }
}

From source file:MLKNNCS.java

License:Open Source License

/**
 * Computing Prior and PriorN Probabilities for each class of the training
 * set//from w  ww .  j a  va 2s .  com
 */
private void ComputePrior() {
    for (int i = 0; i < numLabels; i++) {
        int temp_Ci = 0;
        for (int j = 0; j < train.numInstances(); j++) {
            double value = Double.parseDouble(
                    train.attribute(labelIndices[i]).value((int) train.instance(j).value(labelIndices[i])));
            if (Utils.eq(value, 1.0)) {
                temp_Ci++;
            }
        }
        PriorProbabilities[i] = (smooth + temp_Ci) / (smooth * 2 + train.numInstances());
        PriorNProbabilities[i] = 1 - PriorProbabilities[i];
        Cost[i] = Math.log10((train.numInstances() - temp_Ci - 1) / (temp_Ci + 1)) + 1;
    }
}

From source file:MLKNNCS.java

License:Open Source License

/**
 * Computing Cond and CondN Probabilities for each class of the training set
 *
 * @throws Exception Potential exception thrown. To be handled in an upper level.
 *///from  w ww .j  a  va 2s .  c o  m
private void ComputeCond() throws Exception {
    int[][] temp_Ci = new int[numLabels][numOfNeighbors + 1];
    int[][] temp_NCi = new int[numLabels][numOfNeighbors + 1];

    for (int i = 0; i < train.numInstances(); i++) {

        Instances knn = new Instances(lnn.kNearestNeighbours(train.instance(i), numOfNeighbors));

        // now compute values of temp_Ci and temp_NCi for every class label
        for (int j = 0; j < numLabels; j++) {

            int aces = 0; // num of aces in Knn for j
            for (int k = 0; k < numOfNeighbors; k++) {
                double value = Double.parseDouble(
                        train.attribute(labelIndices[j]).value((int) knn.instance(k).value(labelIndices[j])));
                if (Utils.eq(value, 1.0)) {
                    aces++;
                }
            }
            // raise the counter of temp_Ci[j][aces] and temp_NCi[j][aces] by 1
            if (Utils.eq(Double.parseDouble(
                    train.attribute(labelIndices[j]).value((int) train.instance(i).value(labelIndices[j]))),
                    1.0)) {
                temp_Ci[j][aces]++;
            } else {
                temp_NCi[j][aces]++;
            }
        }
    }

    // compute CondProbabilities[i][..] for labels based on temp_Ci[]
    for (int i = 0; i < numLabels; i++) {
        int temp1 = 0;
        int temp2 = 0;
        for (int j = 0; j < numOfNeighbors + 1; j++) {
            temp1 += temp_Ci[i][j];
            temp2 += temp_NCi[i][j];
        }
        for (int j = 0; j < numOfNeighbors + 1; j++) {
            CondProbabilities[i][j] = (smooth + temp_Ci[i][j]) / (smooth * (numOfNeighbors + 1) + temp1);
            CondNProbabilities[i][j] = (smooth + temp_NCi[i][j]) / (smooth * (numOfNeighbors + 1) + temp2);
        }
    }
}

From source file:MLKNNCS.java

License:Open Source License

protected MultiLabelOutput makePredictionInternal(Instance instance) throws Exception {
    double[] confidences = new double[numLabels];
    boolean[] predictions = new boolean[numLabels];

    Instances knn = null;/*from w  ww  .j a  v a  2s . c om*/
    try {
        knn = new Instances(lnn.kNearestNeighbours(instance, numOfNeighbors));
    } catch (Exception ex) {
        Logger.getLogger(MLKNNCS.class.getName()).log(Level.SEVERE, null, ex);
    }
    int trueCount = 0;
    for (int i = 0; i < numLabels; i++) {
        // compute sum of aces in KNN
        int aces = 0; // num of aces in Knn for i
        for (int k = 0; k < numOfNeighbors; k++) {
            double value = Double.parseDouble(
                    train.attribute(labelIndices[i]).value((int) knn.instance(k).value(labelIndices[i])));
            if (Utils.eq(value, 1.0)) {
                aces++;
            }
        }
        double Prob_in = PriorProbabilities[i] * CondProbabilities[i][aces];
        double Prob_out = PriorNProbabilities[i] * CondNProbabilities[i][aces];

        confidences[i] = Cost[i] * Prob_in / (Cost[i] * Prob_in + Prob_out);
        //confidences[i] = 6*Prob_in/(6*Prob_in + Prob_out);

        if (confidences[i] > 0.5) {
            predictions[i] = true;
            trueCount++;
        } else if (confidences[i] < 0.5) {
            predictions[i] = false;
        } else {
            Random rnd = new Random();
            predictions[i] = (rnd.nextInt(2) == 1) ? true : false;
        }
        // ranking function
    }

    MultiLabelOutput mlo = new MultiLabelOutput(predictions, confidences);

    if (trueCount < 3) {
        double[] confidence = mlo.getConfidences();
        double[] confidenceTop4 = new double[4];
        int[] top4 = new int[4];

        Arrays.fill(top4, 0);
        Arrays.fill(confidenceTop4, 0);

        for (int i = 0; i < confidence.length; i++) {
            if (confidence[i] > confidenceTop4[0]) {
                top4[3] = top4[2];
                confidenceTop4[3] = confidenceTop4[2];
                top4[2] = top4[1];
                confidenceTop4[2] = confidenceTop4[1];
                top4[1] = top4[0];
                confidenceTop4[1] = confidenceTop4[0];
                top4[0] = i;
                confidenceTop4[0] = confidence[i];
            } else if (confidence[i] > confidenceTop4[1]) {
                top4[3] = top4[2];
                confidenceTop4[3] = confidenceTop4[2];
                top4[2] = top4[1];
                confidenceTop4[2] = confidenceTop4[1];
                top4[1] = i;
                confidenceTop4[1] = confidence[i];
            } else if (confidence[i] > confidenceTop4[2]) {
                top4[3] = top4[2];
                confidenceTop4[3] = confidenceTop4[2];
                top4[2] = i;
                confidenceTop4[2] = confidence[i];
            } else if (confidence[i] > confidenceTop4[3]) {
                top4[3] = i;
                confidenceTop4[3] = confidence[i];
            }

        }
        for (int i = trueCount; i < 4; i++) {
            if ((confidence[top4[i]] > 0.25 && i == 3) || confidence[top4[i]] > 0.2 && i < 3) {
                predictions[top4[i]] = true;
                trueCount++;
            }
        }
        if (trueCount == 0) {
            predictions[top4[0]] = true;
        }
        mlo = new MultiLabelOutput(predictions, confidences);
    }
    return mlo;
}

From source file:ID3Chi.java

License:Open Source License

/**
 * Method for building an ID3Chi tree./*from w w w .j av  a2s  .  c om*/
 *
 * @param data
 *            the training data
 * @exception Exception
 *                if decision tree can't be built successfully
 */
private void makeTree(Instances data) throws Exception {

    // Check if no instances have reached this node.
    /*
    if (data.numInstances() == 0) {
       m_Attribute = null;
       m_ClassValue = Instance.missingValue();
       m_Distribution = new double[data.numClasses()];
       return;
    }
    /**/
    if (data.numInstances() == 0) {
        SetNullDistribution(data);
    }

    // Compute attribute with maximum information gain.
    double[] infoGains = new double[data.numAttributes()];
    Enumeration attEnum = data.enumerateAttributes();
    double entropyOfAllData = computeEntropy(data);

    while (attEnum.hasMoreElements()) {
        Attribute att = (Attribute) attEnum.nextElement();
        infoGains[att.index()] = computeInfoGain(data, att, entropyOfAllData);
    }
    m_Attribute = data.attribute(Utils.maxIndex(infoGains));

    double chiSquare = computeChiSquare(data, m_Attribute);

    int degreesOfFreedom = m_Attribute.numValues() - 1;
    ChiSquaredDistribution chi = new ChiSquaredDistribution(degreesOfFreedom);
    double threshold = chi.inverseCumulativeProbability(m_confidenceLevel);

    // Make leaf if information gain is zero.
    // Otherwise create successors.
    if (Utils.eq(infoGains[m_Attribute.index()], 0)) {
        MakeALeaf(data);
    } else {
        // Discard unknown values for selected attribute
        //data.deleteWithMissing(m_Attribute);
        Instances[] subset = splitData(data, m_Attribute);

        if (CheckIfCanApplyChiSquare(subset) && (chiSquare <= threshold)) {
            MakeALeaf(data);
            return;
        }

        m_Successors = new ID3Chi[m_Attribute.numValues()];
        for (int j = 0; j < m_Attribute.numValues(); j++) {
            m_Successors[j] = new ID3Chi(this.m_confidenceLevel);
            m_Successors[j].m_Ratio = (double) subset[j].numInstances() / (double) data.numInstances();
            m_Successors[j].makeTree(subset[j]);
        }
    }
}

From source file:adaptedClusteringAlgorithms.MyFarthestFirst.java

License:Open Source License

/**
 * Normalizes a given value of a numeric attribute.
 *
 * @param x the value to be normalized/*ww  w  .  java  2 s. c o m*/
 * @param i the attribute's index
 * @return the normalized value
 */
protected double norm(double x, int i) {

    if (Double.isNaN(m_Min[i]) || Utils.eq(m_Max[i], m_Min[i])) {
        return 0;
    } else {
        return (x - m_Min[i]) / (m_Max[i] - m_Min[i]);
    }
}

From source file:cerebro.Id3.java

License:Open Source License

/**
 * Method for building an Id3 tree.//ww  w  . j av a 2s  .c o m
 *
 * @param data the training data
 * @exception Exception if decision tree can't be built successfully
 */
private void makeTree(Instances data) throws Exception {

    // Check if no instances have reached this node.
    if (data.numInstances() == 0) {
        m_Attribute = null;
        m_ClassValue = Instance.missingValue();
        m_Distribution = new double[data.numClasses()];
        return;
    }

    // Compute attribute with maximum information gain.
    double[] infoGains = new double[data.numAttributes()];
    Enumeration attEnum = data.enumerateAttributes();
    while (attEnum.hasMoreElements()) {
        Attribute att = (Attribute) attEnum.nextElement();
        infoGains[att.index()] = computeInfoGain(data, att);
    }
    m_Attribute = data.attribute(Utils.maxIndex(infoGains));

    // Make leaf if information gain is zero.
    // Otherwise create successors.
    if (Utils.eq(infoGains[m_Attribute.index()], 0)) {
        m_Attribute = null;
        m_Distribution = new double[data.numClasses()];
        Enumeration instEnum = data.enumerateInstances();
        while (instEnum.hasMoreElements()) {
            Instance inst = (Instance) instEnum.nextElement();
            m_Distribution[(int) inst.classValue()]++;
        }
        Utils.normalize(m_Distribution);
        m_ClassValue = Utils.maxIndex(m_Distribution);
        m_ClassAttribute = data.classAttribute();
    } else {
        Instances[] splitData = splitData(data, m_Attribute);
        m_Successors = new Id3[m_Attribute.numValues()];
        for (int j = 0; j < m_Attribute.numValues(); j++) {
            m_Successors[j] = new Id3();
            m_Successors[j].makeTree(splitData[j]);
        }
    }
}