Example usage for weka.filters.unsupervised.instance Resample setSampleSizePercent

List of usage examples for weka.filters.unsupervised.instance Resample setSampleSizePercent

Introduction

In this page you can find the example usage for weka.filters.unsupervised.instance Resample setSampleSizePercent.

Prototype

public void setSampleSizePercent(double newSampleSizePercent) 

Source Link

Document

Sets the size of the subsample, as a percentage of the original set.

Usage

From source file:fantail.algorithms.ARTForests.java

License:Open Source License

@Override
public void buildRanker(Instances metaData) throws Exception {

    Random r = new Random(m_RandomSeed);
    Instances workingData = new Instances(metaData);
    m_WeakRankers = new Ranker[m_T];

    for (int i = 0; i < m_T; i++) {
        Instances baggingSample = workingData.resampleWithWeights(r);

        if (m_BaggingPercentage < 100.0) {
            weka.filters.unsupervised.instance.Resample res = new weka.filters.unsupervised.instance.Resample();
            res.setSampleSizePercent(m_BaggingPercentage);
            res.setNoReplacement(false);
            res.setInputFormat(baggingSample);
            baggingSample = Filter.useFilter(baggingSample, res);
        }/*from  ww  w  .j  a v a 2 s . co  m*/

        BinaryART ranker = new BinaryART();
        ranker.setMiniLeaf(m_NumMinInstances);
        ranker.setK(m_K);
        ranker.setRandomSeed(i);
        ranker.setUseMedian(m_UseMedian);

        m_WeakRankers[i] = ranker;
        m_WeakRankers[i].buildRanker(baggingSample);
    }
}

From source file:hero.unstable.util.classification.wekaData.java

public void setData(String dataPath, double percentageClaseControl, int classIdx) {
    // Load data//w ww  . j  a  v  a  2 s. co m
    //Instances data = IO.csvToInstances(dataPath);
    ConverterUtils.DataSource source = null;
    try {
        source = new ConverterUtils.DataSource(dataPath);
        dataOriginal = source.getDataSet();
    } catch (Exception ex) {
        logger.info(ClusteringBinaryPD.class.getName());
        ex.printStackTrace();
    }

    // Set first column as CLASS
    dataOriginal.setClassIndex(classIdx);
    //logger.info("Data correctly loaded from " + dataPath);
    //logger.info("Data filtered: Class is the FIRST column");
    //logger.info("Number of attributes: " + data.numAttributes() );
    //logger.info("Number of instances: " + data.numInstances() );
    // Get TRAINING and TEST sets:
    Resample splitter = new Resample();
    try {
        splitter.setInvertSelection(false);
        splitter.setNoReplacement(true);
        splitter.setSampleSizePercent(percentageClaseControl);
        splitter.setInputFormat(dataOriginal);
        dataTraining = Filter.useFilter(dataOriginal, splitter);

        splitter = new Resample();
        splitter.setInvertSelection(true);
        splitter.setNoReplacement(true);
        splitter.setSampleSizePercent(percentageClaseControl);
        splitter.setInputFormat(dataOriginal);
        dataTest = Filter.useFilter(dataOriginal, splitter);
    } catch (Exception ex) {
        Logger.getLogger(ClusteringBinaryPD.class.getName()).log(Level.SEVERE, null, ex);
    }
}

From source file:lu.lippmann.cdb.lab.beta.shih.Shih2010.java

License:Open Source License

public Shih2010(Instances instances, boolean ignoreClass, boolean needsToResample, double theta) {
    try {/*  ww  w  .ja  v a 2 s  .c o  m*/
        this.instances = instances;
        this.theta = theta;
        this.resample = needsToResample;
        if (needsToResample) {
            final Resample rs = new Resample();
            if (this.instances.numInstances() > MAX_INSTANCES_TAKEN) {
                rs.setInputFormat(instances);
                rs.setSampleSizePercent(MAX_INSTANCES_TAKEN * 100.0 / this.instances.numInstances());
                this.instances = Filter.useFilter(instances, rs);
            }
        }

        //System.out.println("Size = " + this.instances.numInstances());

        this.mapDomain = new HashMap<Integer, Set<String>>();

        if (ignoreClass) {
            this.instances.setClassIndex(-1);
        }

        //Save index of nominal & categorial attributes
        //Build a map i-DOM -> Attribute index
        this.idxsC = new ArrayList<Integer>();
        this.idxsN = new ArrayList<Integer>();
        int nn = 0;

        for (int i = 0; i < instances.numAttributes(); i++) {
            if (!instances.attribute(i).isNumeric())
                mapDomain.put(i, new HashSet<String>());
        }

        //Create map index & domain
        this.mapIndex = new HashMap<Integer, Map<String, Integer>>();
        int mapIdx = 0;
        for (int i = 0; i < instances.numAttributes(); i++) {
            Attribute attribute = instances.attribute(i);
            if (!attribute.isNumeric()) {
                idxsC.add(i); //i-th attribute is nominal
                final Map<String, Integer> mapIndexAttribute = new HashMap<String, Integer>();
                mapIndex.put(i, mapIndexAttribute);
                Enumeration<?> en = attribute.enumerateValues();
                while (en.hasMoreElements()) {
                    String catVal = en.nextElement().toString();
                    boolean created = mapDomain.get(i).add(catVal);
                    if (created) {
                        mapIndexAttribute.put(catVal, mapIdx++);
                    }
                }
                nn += mapDomain.get(i).size(); //count total nominal values
            } else {
                idxsN.add(i);
            }
        }

        this.n = nn;
        this.base = new ArrayList<TupleSI>();
        this.noBase = new ArrayList<TupleSI>();
        this.M = new int[n][n];
        this.D = new double[n][n];
        this.F = new HashMap<TupleSI, Double>();
        this.computeBase();
        this.computeMatrixMDF();
    } catch (Exception e) {
        e.printStackTrace();
    }

}