Example usage for weka.core Instances deleteWithMissingClass

List of usage examples for weka.core Instances deleteWithMissingClass

Introduction

In this page you can find the example usage for weka.core Instances deleteWithMissingClass.

Prototype

public void deleteWithMissingClass() 

Source Link

Document

Removes all instances with a missing class value from the dataset.

Usage

From source file:dewaweebtreeclassifier.veranda.VerandaTree.java

/**
 * //w  w w  .j a v  a  2  s  .  c  o  m
 * @param data 
 */
@Override
public void buildClassifier(Instances data) {
    // remove all instance with missing class value
    data.deleteWithMissingClass();

    buildTree(data);
}

From source file:gyc.OverBoostM1.java

License:Open Source License

/**
 * Boosting method./*from w  w w  .  j  a v  a2 s  .  c o m*/
 *
 * @param data the training data to be used for generating the
 * boosted classifier.
 * @throws Exception if the classifier could not be built successfully
 */

public void buildClassifier(Instances data) throws Exception {

    super.buildClassifier(data);

    // can classifier handle the data?
    getCapabilities().testWithFail(data);

    // remove instances with missing class
    data = new Instances(data);
    data.deleteWithMissingClass();

    // only class? -> build ZeroR model
    if (data.numAttributes() == 1) {
        System.err.println(
                "Cannot build model (only class attribute present in data!), " + "using ZeroR model instead!");
        m_ZeroR = new weka.classifiers.rules.ZeroR();
        m_ZeroR.buildClassifier(data);
        return;
    } else {
        m_ZeroR = null;
    }

    m_NumClasses = data.numClasses();
    if (m_NumClasses != 2)
        System.err.println("Can only build model for binary class data");

    /*
    we do not use the method buildClassifierWithWeights
     as we think that some base learning algorithms could 
     not deal with weighted data.
     */
    buildClassifierUsingResampling(data);
}

From source file:gyc.SMOTEBagging.java

License:Open Source License

/**
 * Bagging method.//from   ww  w . j a  v  a 2s .c  o  m
 *
 * @param data the training data to be used for generating the
 * bagged classifier.
 * @throws Exception if the classifier could not be built successfully
 */
public void buildClassifier(Instances data) throws Exception {

    // can classifier handle the data?
    getCapabilities().testWithFail(data);

    // remove instances with missing class
    data = new Instances(data);
    data.deleteWithMissingClass();

    super.buildClassifier(data);

    if (m_CalcOutOfBag && (m_BagSizePercent != 100)) {
        throw new IllegalArgumentException(
                "Bag size needs to be 100% if " + "out-of-bag error is to be calculated!");
    }

    int bagSize = data.numInstances() * m_BagSizePercent / 100;
    Random random = new Random(m_Seed);

    boolean[][] inBag = null;
    if (m_CalcOutOfBag)
        inBag = new boolean[m_Classifiers.length][];
    int b = 0;
    for (int j = 0; j < m_Classifiers.length; j++) {

        //
        int classNum[] = data.attributeStats(data.classIndex()).nominalCounts;
        int minC, nMin = classNum[0];
        int majC, nMaj = classNum[1];
        if (nMin < nMaj) {
            minC = 0;
            majC = 1;
        } else {
            minC = 1;
            majC = 0;
            nMin = classNum[1];
            nMaj = classNum[0];
        }

        b = b + 10;
        Instances bagData = randomSampling(data, majC, minC, b, random);

        /*      // create the in-bag dataset
              if (m_CalcOutOfBag) {
           inBag[j] = new boolean[data.numInstances()];
           bagData = resampleWithWeights(data, random, inBag[j]);
              } else {
           bagData = data.resampleWithWeights(random);
           if (bagSize < data.numInstances()) {
             bagData.randomize(random);
             Instances newBagData = new Instances(bagData, 0, bagSize);
             bagData = newBagData;
           }
              }
                      
              if (m_Classifier instanceof Randomizable) {
           ((Randomizable) m_Classifiers[j]).setSeed(random.nextInt());
              }*/

        // build the classifier
        m_Classifiers[j].buildClassifier(bagData);
        //classNum=bagData.attributeStats(bagData.classIndex()).nominalCounts;
        //System.out.println("after:"+classNum[0]+"-"+classNum[1]);
    }

    // calc OOB error?
    if (getCalcOutOfBag()) {
        double outOfBagCount = 0.0;
        double errorSum = 0.0;
        boolean numeric = data.classAttribute().isNumeric();

        for (int i = 0; i < data.numInstances(); i++) {
            double vote;
            double[] votes;
            if (numeric)
                votes = new double[1];
            else
                votes = new double[data.numClasses()];

            // determine predictions for instance
            int voteCount = 0;
            for (int j = 0; j < m_Classifiers.length; j++) {
                if (inBag[j][i])
                    continue;

                voteCount++;
                double pred = m_Classifiers[j].classifyInstance(data.instance(i));
                if (numeric)
                    votes[0] += pred;
                else
                    votes[(int) pred]++;
            }

            // "vote"
            if (numeric) {
                vote = votes[0];
                if (voteCount > 0) {
                    vote /= voteCount; // average
                }
            } else {
                vote = Utils.maxIndex(votes); // majority vote
            }

            // error for instance
            outOfBagCount += data.instance(i).weight();
            if (numeric) {
                errorSum += StrictMath.abs(vote - data.instance(i).classValue()) * data.instance(i).weight();
            } else {
                if (vote != data.instance(i).classValue())
                    errorSum += data.instance(i).weight();
            }
        }

        m_OutOfBagError = errorSum / outOfBagCount;
    } else {
        m_OutOfBagError = 0;
    }
}

From source file:hr.irb.fastRandomForest.FastRandomForest.java

License:Open Source License

/**
 * Builds a classifier for a set of instances.
 *
 * @param data the instances to train the classifier with
 *
 * @throws Exception if something goes wrong
 *///w ww  .  j av  a 2  s .c o  m
public void buildClassifier(Instances data) throws Exception {

    // can classifier handle the data?
    getCapabilities().testWithFail(data);

    // remove instances with missing class
    data = new Instances(data);
    data.deleteWithMissingClass();

    // only class? -> build ZeroR model
    if (data.numAttributes() == 1) {
        System.err.println(
                "Cannot build model (only class attribute present in data!), " + "using ZeroR model instead!");
        m_ZeroR = new weka.classifiers.rules.ZeroR();
        m_ZeroR.buildClassifier(data);
        return;
    } else {
        m_ZeroR = null;
    }

    /* Save header with attribute info. Can be accessed later by FastRfTrees
     * through their m_MotherForest field. */
    m_Info = new Instances(data, 0);

    m_bagger = new FastRfBagging();

    // Set up the tree options which are held in the motherForest.
    m_KValue = m_numFeatures;
    if (m_KValue > data.numAttributes() - 1)
        m_KValue = data.numAttributes() - 1;
    if (m_KValue < 1)
        m_KValue = (int) Utils.log2(data.numAttributes()) + 1;

    FastRandomTree rTree = new FastRandomTree();
    rTree.m_MotherForest = this; // allows to retrieve KValue and MaxDepth
    // some temporary arrays which need to be separate for every tree, so
    // that the trees can be trained in parallel in different threads

    // set up the bagger and build the forest
    m_bagger.setClassifier(rTree);
    m_bagger.setSeed(m_randomSeed);
    m_bagger.setNumIterations(m_numTrees);
    m_bagger.setCalcOutOfBag(true);
    m_bagger.setComputeImportances(this.getComputeImportances());

    m_bagger.buildClassifier(data, m_NumThreads, this);

}

From source file:hr.irb.fastRandomForest.NakedFastRandomForest.java

License:Open Source License

/**
 * Builds a classifier for a set of instances.
 * //  ww w . j a  v a2  s .c o m
 * Copy-pasted from FastRandomForest, except that it uses
 * NakedFastRandomTree as the mother classifier.
 * 
 * @param data
 *            the instances to train the classifier with
 * 
 * @throws Exception
 *             if something goes wrong
 */
@Override
public void buildClassifier(Instances data) throws Exception {

    // can classifier handle the data?
    getCapabilities().testWithFail(data);

    // remove instances with missing class
    data = new Instances(data);
    data.deleteWithMissingClass();

    // only class? -> build ZeroR model
    if (data.numAttributes() == 1) {
        System.err.println(
                "Cannot build model (only class attribute present in data!), " + "using ZeroR model instead!");
        m_ZeroR = new weka.classifiers.rules.ZeroR();
        m_ZeroR.buildClassifier(data);
        return;
    } else {
        m_ZeroR = null;
    }

    /*
     * Save header with attribute info. Can be accessed later by FastRfTrees
     * through their m_MotherForest field.
     */
    m_Info = new Instances(data, 0);

    m_bagger = new NakedFastRfBagging();

    // Set up the tree options which are held in the motherForest.
    m_KValue = m_numFeatures;
    if (m_KValue > data.numAttributes() - 1)
        m_KValue = data.numAttributes() - 1;
    if (m_KValue < 1)
        m_KValue = (int) Utils.log2(data.numAttributes()) + 1;

    // [jhostetler] This line is the only change from FastRandomForest.buildClassifier
    final FastRandomTree rTree = new NakedFastRandomTree();

    rTree.m_MotherForest = this; // allows to retrieve KValue and MaxDepth
    // some temporary arrays which need to be separate for every tree, so
    // that the trees can be trained in parallel in different threads

    // set up the bagger and build the forest
    m_bagger.setClassifier(rTree);
    m_bagger.setSeed(m_randomSeed);
    m_bagger.setNumIterations(m_numTrees);
    m_bagger.setCalcOutOfBag(true);
    m_bagger.setComputeImportances(this.getComputeImportances());

    ((NakedFastRfBagging) m_bagger).buildClassifier(data, m_NumThreads, this);

}

From source file:hr.irb.fastRandomForest.NakedFastRfBagging.java

License:Open Source License

/**
 * Bagging method. Produces DataCache objects with bootstrap samples of the
 * original data, and feeds them to the base classifier (which can only be a
 * FastRandomTree).//from  www  .j av  a  2 s.  c o  m
 * 
 * @param data
 *            The training set to be used for generating the bagged
 *            classifier.
 * @param numThreads
 *            The number of simultaneous threads to use for computation.
 *            Pass zero (0) for autodetection.
 * @param motherForest
 *            A reference to the FastRandomForest object that invoked this.
 * 
 * @throws Exception
 *             if the classifier could not be built successfully
 */
public void buildClassifier(Instances data, final int numThreads, final NakedFastRandomForest motherForest)
        throws Exception {

    // can classifier handle the vals?
    getCapabilities().testWithFail(data);

    // remove instances with missing class
    data = new Instances(data);
    data.deleteWithMissingClass();

    if (!(m_Classifier instanceof NakedFastRandomTree))
        throw new IllegalArgumentException(
                "The NakedFastRfBagging class accepts " + "only NakedFastRandomTree as its base classifier.");

    /*
     * We fill the m_Classifiers array by creating lots of trees with new()
     * because this is much faster than using serialization to deep-copy the
     * one tree in m_Classifier - this is what the
     * super.buildClassifier(data) normally does.
     */
    m_Classifiers = new Classifier[m_NumIterations];
    for (int i = 0; i < m_Classifiers.length; i++) {
        final NakedFastRandomTree curTree = new NakedFastRandomTree();
        // all parameters for training will be looked up in the motherForest
        // (maxDepth, k_Value)
        curTree.m_MotherForest = motherForest;
        // 0.99: reference to these arrays will get passed down all nodes so
        // the array can be re-used
        // 0.99: this array is of size two as now all splits are binary -
        // even categorical ones
        curTree.tempProps = new double[2];
        curTree.tempDists = new double[2][];
        curTree.tempDists[0] = new double[data.numClasses()];
        curTree.tempDists[1] = new double[data.numClasses()];
        curTree.tempDistsOther = new double[2][];
        curTree.tempDistsOther[0] = new double[data.numClasses()];
        curTree.tempDistsOther[1] = new double[data.numClasses()];
        m_Classifiers[i] = curTree;
    }

    // this was SLOW.. takes approx 1/2 time as training the forest
    // afterwards (!!!)
    // super.buildClassifier(data);

    if (m_CalcOutOfBag && (m_BagSizePercent != 100)) {
        throw new IllegalArgumentException(
                "Bag size needs to be 100% if " + "out-of-bag error is to be calculated!");
    }

    // sorting is performed inside this constructor
    final DataCache myData = new DataCache(data);

    final int bagSize = data.numInstances() * m_BagSizePercent / 100;
    final Random random = new Random(m_Seed);

    final boolean[][] inBag = new boolean[m_Classifiers.length][];

    // thread management
    final ExecutorService threadPool = Executors
            .newFixedThreadPool(numThreads > 0 ? numThreads : Runtime.getRuntime().availableProcessors());
    final List<Future<?>> futures = new ArrayList<Future<?>>(m_Classifiers.length);

    try {

        for (int treeIdx = 0; treeIdx < m_Classifiers.length; treeIdx++) {

            // create the in-bag dataset (and be sure to remember what's in
            // bag)
            // for computing the out-of-bag error later
            final DataCache bagData = myData.resample(bagSize, random);
            bagData.reusableRandomGenerator = bagData.getRandomNumberGenerator(random.nextInt());
            inBag[treeIdx] = bagData.inBag; // store later for OOB error
            // calculation

            // build the classifier
            if (m_Classifiers[treeIdx] instanceof NakedFastRandomTree) {

                final FastRandomTree aTree = (FastRandomTree) m_Classifiers[treeIdx];
                aTree.data = bagData;

                final Future<?> future = threadPool.submit(aTree);
                futures.add(future);

            } else {
                throw new IllegalArgumentException("The FastRfBagging class accepts "
                        + "only NakedFastRandomTree as its base classifier.");
            }

        }

        // make sure all trees have been trained before proceeding
        for (int treeIdx = 0; treeIdx < m_Classifiers.length; treeIdx++) {
            futures.get(treeIdx).get();

        }

        // [jhostetler] 'm_FeatureImportances' and 'computeOOBError()' are
        // private, so we'll just not compute them.

        // calc OOB error?
        // if( getCalcOutOfBag() || getComputeImportances() ) {
        // // m_OutOfBagError = computeOOBError(data, inBag, threadPool);
        // m_OutOfBagError = computeOOBError( myData, inBag, threadPool );
        // }
        // else {
        // m_OutOfBagError = 0;
        // }

        // // calc feature importances
        // m_FeatureImportances = null;
        // // m_FeatureNames = null;
        // if( getComputeImportances() ) {
        // m_FeatureImportances = new double[data.numAttributes()];
        // // /m_FeatureNames = new String[data.numAttributes()];
        // // Instances dataCopy = new Instances(data); //To scramble
        // // int[] permutation =
        // // FastRfUtils.randomPermutation(data.numInstances(), random);
        // for( int j = 0; j < data.numAttributes(); j++ ) {
        // if( j != data.classIndex() ) {
        // // double sError =
        // // computeOOBError(FastRfUtils.scramble(data, dataCopy,
        // // j, permutation), inBag, threadPool);
        // // double sError = computeOOBError(data, inBag,
        // // threadPool, j, 0);
        // final float[] unscrambled = myData.scrambleOneAttribute( j,
        // random );
        // final double sError = computeOOBError( myData, inBag,
        // threadPool );
        // myData.vals[j] = unscrambled; // restore the original
        // // state
        // m_FeatureImportances[j] = sError - m_OutOfBagError;
        // }
        // // m_FeatureNames[j] = data.attribute(j).name();
        // }
        // }

        threadPool.shutdown();

    } finally {
        threadPool.shutdownNow();
    }
}

From source file:id3.MyID3.java

/**
 * Membuat pohon keputusan//w  w w  .ja  v  a2  s  .  c o m
 * @param instances data train
 * @throws Exception
 */
@Override
public void buildClassifier(Instances instances) throws Exception {
    // Check if classifier can handle the data
    getCapabilities().testWithFail(instances);

    // Remove missing value instance from instances
    instances = new Instances(instances);
    instances.deleteWithMissingClass();

    // Gather list of attribute in instances
    ArrayList<Attribute> remainingAttributes = new ArrayList<>();
    Enumeration enumAttributes = instances.enumerateAttributes();
    while (enumAttributes.hasMoreElements()) {
        remainingAttributes.add((Attribute) enumAttributes.nextElement());
    }
    // Start build classifier ID3
    buildMyID3(instances, remainingAttributes);
}

From source file:imba.classifier.FFNNTubes.java

@Override
public void buildClassifier(Instances data) throws Exception {
    getCapabilities().testWithFail(data);

    data.deleteWithMissingClass();

    nAttribute = data.numAttributes() - 1;
    nOutput = data.numClasses();/*from  w ww.  j a v a 2  s . c o m*/
    nData = data.size();

    //set target data
    setTarget(data);

    //generate weight
    generateRandomWeight();

    //normalisasi data
    Normalize norm = new Normalize();
    Filter filter = new NominalToBinary();

    norm.setInputFormat(data);

    Instances filteredData = Filter.useFilter(data, norm);

    try {
        filter.setInputFormat(filteredData);

        for (Instance i1 : filteredData) {
            filter.input(i1);
        }

        filter.batchFinished();
    } catch (Exception ex) {
        Logger.getLogger(NBTubes.class.getName()).log(Level.SEVERE, null, ex);
    }

    int z = 0;
    double valMSE = 100.0;
    while ((z <= nEpoch) && (valMSE >= 0.00001)) {
        for (int j = 0; j < nData; j++) {
            feedForward(filteredData.get(j));

            if (nHidden == 0) {
                updateWeight(target[j]);
            } else {
                backPropagation(target[j]);
            }
        }

        countError(filteredData);
        valMSE = countMSE(filteredData);
        System.out.println("ACCURACY " + z + " : " + accuracy);
        System.out.println("MSE " + z + " : " + valMSE);
        z++;
    }
}

From source file:iris.ID3.java

@Override
public void buildClassifier(Instances instance) throws Exception {
    instance = new Instances(instance);

    // Removes instances that have missing data
    instance.deleteWithMissingClass();

    makeLikeAWhat(instance);/* w  ww.ja  va2  s  .c om*/
}

From source file:j48.C45PruneableClassifierTree.java

License:Open Source License

/**
 * Method for building a pruneable classifier tree.
 *
 * @param data the data for building the tree
 * @throws Exception if something goes wrong
 *//*from   w  w w .j av a 2s .c  o m*/
public void buildClassifier(Instances data) throws Exception {

    // can classifier tree handle the data?
    getCapabilities().testWithFail(data);

    // remove instances with missing class
    data = new Instances(data);
    data.deleteWithMissingClass();

    buildTree(data, m_subtreeRaising || !m_cleanup);
    collapse();
    if (m_pruneTheTree) {
        prune();
    }
    if (m_cleanup) {
        cleanup(new Instances(data, 0));
    }
}