Example usage for weka.core Utils log2

List of usage examples for weka.core Utils log2

Introduction

In this page you can find the example usage for weka.core Utils log2.

Prototype

double log2

To view the source code for weka.core Utils log2.

Click Source Link

Document

The natural logarithm of 2.

Usage

From source file:ID3Chi.java

License:Open Source License

/**
 * Computes the entropy of a dataset./*from  w  w w .j  av  a  2s  . co  m*/
 * 
 * @param data
 *            the data for which entropy is to be computed
 * @return the entropy of the data's class distribution
 * @throws Exception
 *             if computation fails
 */
private double computeEntropy(Instances data) throws Exception {

    double[] classCounts = GetClassCounts(data);
    double entropy = 0;
    for (int j = 0; j < data.numClasses(); j++) {
        if (classCounts[j] > 0) {
            entropy -= classCounts[j] * Utils.log2(classCounts[j]);
        }
    }
    entropy /= (double) data.numInstances();
    return entropy + Utils.log2(data.numInstances());
}

From source file:ID3Chi.java

License:Open Source License

private double computeEntropyWithUnknowns(Instances data, Instances unknownData,
        double[] classCountsUnknownData, double ratio) throws Exception {

    double[] classCounts = GetClassCounts(data);
    double entropy = 0;
    for (int j = 0; j < data.numClasses(); j++) {
        double p = classCounts[j] + classCountsUnknownData[j] * ratio;
        if (p > 0) {
            entropy -= p * Utils.log2(p);
        }//  ww  w  .  j  a va2 s.  c  om
    }
    entropy /= (double) data.numInstances();
    return entropy + Utils.log2(data.numInstances());
}

From source file:ai.BalancedRandomForest.java

License:GNU General Public License

/**
 * Build Balanced Random Forest//from  w  w  w .  j  a  va  2s. c  o  m
 */
public void buildClassifier(final Instances data) throws Exception {
    // If number of features is 0 then set it to log2 of M (number of attributes)
    if (numFeatures < 1)
        numFeatures = (int) Utils.log2(data.numAttributes()) + 1;
    // Check maximum number of random features
    if (numFeatures >= data.numAttributes())
        numFeatures = data.numAttributes() - 1;

    // Initialize array of trees
    tree = new BalancedRandomTree[numTrees];

    // total number of instances
    final int numInstances = data.numInstances();
    // total number of classes
    final int numClasses = data.numClasses();

    final ArrayList<Integer>[] indexSample = new ArrayList[numClasses];
    for (int i = 0; i < numClasses; i++)
        indexSample[i] = new ArrayList<Integer>();

    //System.out.println("numClasses = " + numClasses);

    // fill indexSample with the indices of each class
    for (int i = 0; i < numInstances; i++) {
        //System.out.println("data.get("+i+").classValue() = " + data.get(i).classValue());
        indexSample[(int) data.get(i).classValue()].add(i);
    }

    final Random random = new Random(seed);

    // Executor service to run concurrent trees
    final ExecutorService exe = Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors());

    List<Future<BalancedRandomTree>> futures = new ArrayList<Future<BalancedRandomTree>>(numTrees);

    final boolean[][] inBag = new boolean[numTrees][numInstances];

    try {
        for (int i = 0; i < numTrees; i++) {
            final ArrayList<Integer> bagIndices = new ArrayList<Integer>();

            // Randomly select the indices in a balanced way
            for (int j = 0; j < numInstances; j++) {
                // Select first the class
                final int randomClass = random.nextInt(numClasses);
                // Select then a random sample of that class
                final int randomSample = random.nextInt(indexSample[randomClass].size());
                bagIndices.add(indexSample[randomClass].get(randomSample));
                inBag[i][indexSample[randomClass].get(randomSample)] = true;
            }

            // Create random tree
            final Splitter splitter = new Splitter(
                    new GiniFunction(numFeatures, data.getRandomNumberGenerator(random.nextInt())));

            futures.add(exe.submit(new Callable<BalancedRandomTree>() {
                public BalancedRandomTree call() {
                    return new BalancedRandomTree(data, bagIndices, splitter);
                }
            }));
        }

        // Grab all trained trees before proceeding
        for (int treeIdx = 0; treeIdx < numTrees; treeIdx++)
            tree[treeIdx] = futures.get(treeIdx).get();

        // Calculate out of bag error
        final boolean numeric = data.classAttribute().isNumeric();

        List<Future<Double>> votes = new ArrayList<Future<Double>>(data.numInstances());

        for (int i = 0; i < data.numInstances(); i++) {
            VotesCollector aCollector = new VotesCollector(tree, i, data, inBag);
            votes.add(exe.submit(aCollector));
        }

        double outOfBagCount = 0.0;
        double errorSum = 0.0;

        for (int i = 0; i < data.numInstances(); i++) {

            double vote = votes.get(i).get();

            // error for instance
            outOfBagCount += data.instance(i).weight();
            if (numeric) {
                errorSum += StrictMath.abs(vote - data.instance(i).classValue()) * data.instance(i).weight();
            } else {
                if (vote != data.instance(i).classValue())
                    errorSum += data.instance(i).weight();
            }

        }

        outOfBagError = errorSum / outOfBagCount;

    } catch (Exception ex) {
        ex.printStackTrace();
    } finally {
        exe.shutdownNow();
    }

}

From source file:bme.mace.logicdomain.Evaluation.java

License:Open Source License

/**
 * Calculate the entropy of the prior distribution
 * //from   w w w  .java 2 s . c  om
 * @return the entropy of the prior distribution
 * @throws Exception if the class is not nominal
 */
public final double priorEntropy() throws Exception {

    if (!m_ClassIsNominal) {
        throw new Exception("Can't compute entropy of class prior: " + "class numeric!");
    }

    if (m_NoPriors) {
        return Double.NaN;
    }

    double entropy = 0;
    for (int i = 0; i < m_NumClasses; i++) {
        entropy -= m_ClassPriors[i] / m_ClassPriorsSum * Utils.log2(m_ClassPriors[i] / m_ClassPriorsSum);
    }
    return entropy;
}

From source file:bme.mace.logicdomain.Evaluation.java

License:Open Source License

/**
 * Updates all the statistics about a classifiers performance for the current
 * test instance./*from w w  w  .ja  va2  s .com*/
 * 
 * @param predictedDistribution the probabilities assigned to each class
 * @param instance the instance to be classified
 * @throws Exception if the class of the instance is not set
 */
protected void updateStatsForClassifier(double[] predictedDistribution, Instance instance) throws Exception {

    int actualClass = (int) instance.classValue();

    if (!instance.classIsMissing()) {
        updateMargins(predictedDistribution, actualClass, instance.weight());

        // Determine the predicted class (doesn't detect multiple
        // classifications)
        int predictedClass = -1;
        double bestProb = 0.0;
        for (int i = 0; i < m_NumClasses; i++) {
            if (predictedDistribution[i] > bestProb) {
                predictedClass = i;
                bestProb = predictedDistribution[i];
            }
        }

        m_WithClass += instance.weight();

        // Determine misclassification cost
        if (m_CostMatrix != null) {
            if (predictedClass < 0) {
                // For missing predictions, we assume the worst possible cost.
                // This is pretty harsh.
                // Perhaps we could take the negative of the cost of a correct
                // prediction (-m_CostMatrix.getElement(actualClass,actualClass)),
                // although often this will be zero
                m_TotalCost += instance.weight() * m_CostMatrix.getMaxCost(actualClass, instance);
            } else {
                m_TotalCost += instance.weight()
                        * m_CostMatrix.getElement(actualClass, predictedClass, instance);
            }
        }

        // Update counts when no class was predicted
        if (predictedClass < 0) {
            m_Unclassified += instance.weight();
            return;
        }

        double predictedProb = Math.max(MIN_SF_PROB, predictedDistribution[actualClass]);
        double priorProb = Math.max(MIN_SF_PROB, m_ClassPriors[actualClass] / m_ClassPriorsSum);
        if (predictedProb >= priorProb) {
            m_SumKBInfo += (Utils.log2(predictedProb) - Utils.log2(priorProb)) * instance.weight();
        } else {
            m_SumKBInfo -= (Utils.log2(1.0 - predictedProb) - Utils.log2(1.0 - priorProb)) * instance.weight();
        }

        m_SumSchemeEntropy -= Utils.log2(predictedProb) * instance.weight();
        m_SumPriorEntropy -= Utils.log2(priorProb) * instance.weight();

        updateNumericScores(predictedDistribution, makeDistribution(instance.classValue()), instance.weight());

        // Update other stats
        m_ConfusionMatrix[actualClass][predictedClass] += instance.weight();
        if (predictedClass != actualClass) {
            m_Incorrect += instance.weight();
        } else {
            m_Correct += instance.weight();
        }
    } else {
        m_MissingClass += instance.weight();
    }
}

From source file:bme.mace.logicdomain.Evaluation.java

License:Open Source License

/**
 * Updates all the statistics about a predictors performance for the current
 * test instance.//w  w w.  j  av  a2s  .  c  om
 * 
 * @param predictedValue the numeric value the classifier predicts
 * @param instance the instance to be classified
 * @throws Exception if the class of the instance is not set
 */
protected void updateStatsForPredictor(double predictedValue, Instance instance) throws Exception {

    if (!instance.classIsMissing()) {

        // Update stats
        m_WithClass += instance.weight();
        if (Instance.isMissingValue(predictedValue)) {
            m_Unclassified += instance.weight();
            return;
        }
        m_SumClass += instance.weight() * instance.classValue();
        m_SumSqrClass += instance.weight() * instance.classValue() * instance.classValue();
        m_SumClassPredicted += instance.weight() * instance.classValue() * predictedValue;
        m_SumPredicted += instance.weight() * predictedValue;
        m_SumSqrPredicted += instance.weight() * predictedValue * predictedValue;

        if (m_ErrorEstimator == null) {
            setNumericPriorsFromBuffer();
        }
        double predictedProb = Math.max(m_ErrorEstimator.getProbability(predictedValue - instance.classValue()),
                MIN_SF_PROB);
        double priorProb = Math.max(m_PriorErrorEstimator.getProbability(instance.classValue()), MIN_SF_PROB);

        m_SumSchemeEntropy -= Utils.log2(predictedProb) * instance.weight();
        m_SumPriorEntropy -= Utils.log2(priorProb) * instance.weight();
        m_ErrorEstimator.addValue(predictedValue - instance.classValue(), instance.weight());

        updateNumericScores(makeDistribution(predictedValue), makeDistribution(instance.classValue()),
                instance.weight());

    } else {
        m_MissingClass += instance.weight();
    }
}

From source file:cerebro.Id3.java

License:Open Source License

/**
 * Computes the entropy of a dataset./*from  w  w  w  .j a  v  a  2 s  .  co  m*/
 *
 * @param data the data for which entropy is to be computed
 * @return the entropy of the data's class distribution
 * @throws Exception if computation fails
 */
private double computeEntropy(Instances data) throws Exception {

    double[] classCounts = new double[data.numClasses()];
    Enumeration instEnum = data.enumerateInstances();
    while (instEnum.hasMoreElements()) {
        Instance inst = (Instance) instEnum.nextElement();
        classCounts[(int) inst.classValue()]++;
    }
    double entropy = 0;
    for (int j = 0; j < data.numClasses(); j++) {
        if (classCounts[j] > 0) {
            entropy -= classCounts[j] * Utils.log2(classCounts[j]);
        }
    }
    entropy /= (double) data.numInstances();
    return entropy + Utils.log2(data.numInstances());
}

From source file:com.tum.classifiertest.FastRandomForest.java

License:Open Source License

/**
 * Builds a classifier for a set of instances.
 *
 * @param data the instances to train the classifier with
 *
 * @throws Exception if something goes wrong
 *///from w  w  w. j a  v a2 s . co m
public void buildClassifier(Instances data) throws Exception {

    // can classifier handle the data?
    getCapabilities().testWithFail(data);

    // remove instances with missing class
    data = new Instances(data);
    data.deleteWithMissingClass();

    // only class? -> build ZeroR model
    if (data.numAttributes() == 1) {
        System.err.println(
                "Cannot build model (only class attribute present in data!), " + "using ZeroR model instead!");
        m_ZeroR = new weka.classifiers.rules.ZeroR();
        m_ZeroR.buildClassifier(data);
        return;
    } else {
        m_ZeroR = null;
    }

    /* Save header with attribute info. Can be accessed later by FastRfTrees
     * through their m_MotherForest field. */
    setM_Info(new Instances(data, 0));

    m_bagger = new FastRfBagging();

    // Set up the tree options which are held in the motherForest.
    m_KValue = m_numFeatures;
    if (m_KValue > data.numAttributes() - 1)
        m_KValue = data.numAttributes() - 1;
    if (m_KValue < 1)
        m_KValue = (int) Utils.log2(data.numAttributes()) + 1;

    FastRandomTree rTree = new FastRandomTree();
    rTree.m_MotherForest = this; // allows to retrieve KValue and MaxDepth
    // some temporary arrays which need to be separate for every tree, so
    // that the trees can be trained in parallel in different threads

    // set up the bagger and build the forest
    m_bagger.setClassifier(rTree);
    m_bagger.setSeed(m_randomSeed);
    m_bagger.setNumIterations(m_numTrees);
    m_bagger.setCalcOutOfBag(true);
    m_bagger.setComputeImportances(this.getComputeImportances());

    m_bagger.buildClassifier(data, m_NumThreads, this);

}

From source file:com.walmart.productgenome.matching.models.EMSRandomForest.java

License:Open Source License

/**
 * Builds a classifier for a set of instances.
 *
 * @param data the instances to train the classifier with
 * @throws Exception if something goes wrong
 *///from w w w .  j  a va2  s .  c o  m
public void buildClassifier(Instances data) throws Exception {

    // can classifier handle the data?
    getCapabilities().testWithFail(data);

    // remove instances with missing class
    data = new Instances(data);
    data.deleteWithMissingClass();

    m_bagger = new Bagging();
    RandomTree rTree = new RandomTree();

    // set up the random tree options
    m_KValue = m_numFeatures;
    if (m_KValue < 1)
        m_KValue = (int) Utils.log2(data.numAttributes()) + 1;
    rTree.setKValue(m_KValue);
    rTree.setMaxDepth(getMaxDepth());

    // set up the bagger and build the forest
    m_bagger.setClassifier(rTree);
    m_bagger.setSeed(m_randomSeed);
    m_bagger.setNumIterations(m_numTrees);
    m_bagger.setCalcOutOfBag(true);
    m_bagger.setNumExecutionSlots(m_numExecutionSlots);
    m_bagger.buildClassifier(data);
}

From source file:cotraining.copy.Evaluation_D.java

License:Open Source License

/**
 * Calculate the entropy of the prior distribution
 *
 * @return the entropy of the prior distribution
 * @throws Exception if the class is not nominal
 *//*ww  w.  j av a 2  s  .c o m*/
public final double priorEntropy() throws Exception {

    if (!m_ClassIsNominal) {
        throw new Exception("Can't compute entropy of class prior: " + "class numeric!");
    }

    if (m_NoPriors)
        return Double.NaN;

    double entropy = 0;
    for (int i = 0; i < m_NumClasses; i++) {
        entropy -= m_ClassPriors[i] / m_ClassPriorsSum * Utils.log2(m_ClassPriors[i] / m_ClassPriorsSum);
    }
    return entropy;
}