Example usage for weka.filters.unsupervised.instance Resample Resample

List of usage examples for weka.filters.unsupervised.instance Resample Resample

Introduction

In this page you can find the example usage for weka.filters.unsupervised.instance Resample Resample.

Prototype

Resample

Source Link

Usage

From source file:com.relationalcloud.partitioning.explanation.ExplanationHandler.java

License:Open Source License

/**
 * Applies sample training to reduce the input Instances to a specified size.
 * /*w ww . j  ava 2s.co m*/
 * @param classifier_sampling_threshold
 * @param newData1
 * @return
 * @throws Exception
 */
public static Instances sampleTraining(Double classifier_sampling_threshold, Instances newData1)
        throws Exception {
    String[] options;
    Instances newData;
    if (newData1.numInstances() > classifier_sampling_threshold) {
        double percentage = (double) 100 * ((double) classifier_sampling_threshold)
                / ((double) newData1.numInstances());

        Resample r = new Resample();

        options = new String[4];
        options[0] = "-C";
        options[1] = "last";
        options[2] = "-Z";
        options[3] = "" + percentage;

        r.setOptions(options);
        r.setInputFormat(newData1);
        newData = Filter.useFilter(newData1, r);

    } else {
        newData = newData1;
    }

    return newData;
}

From source file:hero.unstable.util.classification.wekaData.java

public void setData(String dataPath, double percentageClaseControl, int classIdx) {
    // Load data/*from  w ww. j  a va2 s  .  c om*/
    //Instances data = IO.csvToInstances(dataPath);
    ConverterUtils.DataSource source = null;
    try {
        source = new ConverterUtils.DataSource(dataPath);
        dataOriginal = source.getDataSet();
    } catch (Exception ex) {
        logger.info(ClusteringBinaryPD.class.getName());
        ex.printStackTrace();
    }

    // Set first column as CLASS
    dataOriginal.setClassIndex(classIdx);
    //logger.info("Data correctly loaded from " + dataPath);
    //logger.info("Data filtered: Class is the FIRST column");
    //logger.info("Number of attributes: " + data.numAttributes() );
    //logger.info("Number of instances: " + data.numInstances() );
    // Get TRAINING and TEST sets:
    Resample splitter = new Resample();
    try {
        splitter.setInvertSelection(false);
        splitter.setNoReplacement(true);
        splitter.setSampleSizePercent(percentageClaseControl);
        splitter.setInputFormat(dataOriginal);
        dataTraining = Filter.useFilter(dataOriginal, splitter);

        splitter = new Resample();
        splitter.setInvertSelection(true);
        splitter.setNoReplacement(true);
        splitter.setSampleSizePercent(percentageClaseControl);
        splitter.setInputFormat(dataOriginal);
        dataTest = Filter.useFilter(dataOriginal, splitter);
    } catch (Exception ex) {
        Logger.getLogger(ClusteringBinaryPD.class.getName()).log(Level.SEVERE, null, ex);
    }
}

From source file:lu.lippmann.cdb.lab.beta.shih.Shih2010.java

License:Open Source License

public Shih2010(Instances instances, boolean ignoreClass, boolean needsToResample, double theta) {
    try {/*from   w w w.j av  a2  s  .  com*/
        this.instances = instances;
        this.theta = theta;
        this.resample = needsToResample;
        if (needsToResample) {
            final Resample rs = new Resample();
            if (this.instances.numInstances() > MAX_INSTANCES_TAKEN) {
                rs.setInputFormat(instances);
                rs.setSampleSizePercent(MAX_INSTANCES_TAKEN * 100.0 / this.instances.numInstances());
                this.instances = Filter.useFilter(instances, rs);
            }
        }

        //System.out.println("Size = " + this.instances.numInstances());

        this.mapDomain = new HashMap<Integer, Set<String>>();

        if (ignoreClass) {
            this.instances.setClassIndex(-1);
        }

        //Save index of nominal & categorial attributes
        //Build a map i-DOM -> Attribute index
        this.idxsC = new ArrayList<Integer>();
        this.idxsN = new ArrayList<Integer>();
        int nn = 0;

        for (int i = 0; i < instances.numAttributes(); i++) {
            if (!instances.attribute(i).isNumeric())
                mapDomain.put(i, new HashSet<String>());
        }

        //Create map index & domain
        this.mapIndex = new HashMap<Integer, Map<String, Integer>>();
        int mapIdx = 0;
        for (int i = 0; i < instances.numAttributes(); i++) {
            Attribute attribute = instances.attribute(i);
            if (!attribute.isNumeric()) {
                idxsC.add(i); //i-th attribute is nominal
                final Map<String, Integer> mapIndexAttribute = new HashMap<String, Integer>();
                mapIndex.put(i, mapIndexAttribute);
                Enumeration<?> en = attribute.enumerateValues();
                while (en.hasMoreElements()) {
                    String catVal = en.nextElement().toString();
                    boolean created = mapDomain.get(i).add(catVal);
                    if (created) {
                        mapIndexAttribute.put(catVal, mapIdx++);
                    }
                }
                nn += mapDomain.get(i).size(); //count total nominal values
            } else {
                idxsN.add(i);
            }
        }

        this.n = nn;
        this.base = new ArrayList<TupleSI>();
        this.noBase = new ArrayList<TupleSI>();
        this.M = new int[n][n];
        this.D = new double[n][n];
        this.F = new HashMap<TupleSI, Double>();
        this.computeBase();
        this.computeMatrixMDF();
    } catch (Exception e) {
        e.printStackTrace();
    }

}