Example usage for weka.filters.unsupervised.instance Resample setInputFormat

List of usage examples for weka.filters.unsupervised.instance Resample setInputFormat

Introduction

In this page you can find the example usage for weka.filters.unsupervised.instance Resample setInputFormat.

Prototype

@Override
public boolean setInputFormat(Instances instanceInfo) throws Exception 

Source Link

Document

Sets the format of the input instances.

Usage

From source file:com.relationalcloud.partitioning.explanation.ExplanationHandler.java

License:Open Source License

/**
 * Applies sample training to reduce the input Instances to a specified size.
 * //from w  w w  . j  a  va  2s . c  o  m
 * @param classifier_sampling_threshold
 * @param newData1
 * @return
 * @throws Exception
 */
public static Instances sampleTraining(Double classifier_sampling_threshold, Instances newData1)
        throws Exception {
    String[] options;
    Instances newData;
    if (newData1.numInstances() > classifier_sampling_threshold) {
        double percentage = (double) 100 * ((double) classifier_sampling_threshold)
                / ((double) newData1.numInstances());

        Resample r = new Resample();

        options = new String[4];
        options[0] = "-C";
        options[1] = "last";
        options[2] = "-Z";
        options[3] = "" + percentage;

        r.setOptions(options);
        r.setInputFormat(newData1);
        newData = Filter.useFilter(newData1, r);

    } else {
        newData = newData1;
    }

    return newData;
}

From source file:fantail.algorithms.ARTForests.java

License:Open Source License

@Override
public void buildRanker(Instances metaData) throws Exception {

    Random r = new Random(m_RandomSeed);
    Instances workingData = new Instances(metaData);
    m_WeakRankers = new Ranker[m_T];

    for (int i = 0; i < m_T; i++) {
        Instances baggingSample = workingData.resampleWithWeights(r);

        if (m_BaggingPercentage < 100.0) {
            weka.filters.unsupervised.instance.Resample res = new weka.filters.unsupervised.instance.Resample();
            res.setSampleSizePercent(m_BaggingPercentage);
            res.setNoReplacement(false);
            res.setInputFormat(baggingSample);
            baggingSample = Filter.useFilter(baggingSample, res);
        }//  w  w w  .  j a  va  2s  .  c o  m

        BinaryART ranker = new BinaryART();
        ranker.setMiniLeaf(m_NumMinInstances);
        ranker.setK(m_K);
        ranker.setRandomSeed(i);
        ranker.setUseMedian(m_UseMedian);

        m_WeakRankers[i] = ranker;
        m_WeakRankers[i].buildRanker(baggingSample);
    }
}

From source file:hero.unstable.util.classification.wekaData.java

public void setData(String dataPath, double percentageClaseControl, int classIdx) {
    // Load data//from  www  .j  a  v a  2  s.co m
    //Instances data = IO.csvToInstances(dataPath);
    ConverterUtils.DataSource source = null;
    try {
        source = new ConverterUtils.DataSource(dataPath);
        dataOriginal = source.getDataSet();
    } catch (Exception ex) {
        logger.info(ClusteringBinaryPD.class.getName());
        ex.printStackTrace();
    }

    // Set first column as CLASS
    dataOriginal.setClassIndex(classIdx);
    //logger.info("Data correctly loaded from " + dataPath);
    //logger.info("Data filtered: Class is the FIRST column");
    //logger.info("Number of attributes: " + data.numAttributes() );
    //logger.info("Number of instances: " + data.numInstances() );
    // Get TRAINING and TEST sets:
    Resample splitter = new Resample();
    try {
        splitter.setInvertSelection(false);
        splitter.setNoReplacement(true);
        splitter.setSampleSizePercent(percentageClaseControl);
        splitter.setInputFormat(dataOriginal);
        dataTraining = Filter.useFilter(dataOriginal, splitter);

        splitter = new Resample();
        splitter.setInvertSelection(true);
        splitter.setNoReplacement(true);
        splitter.setSampleSizePercent(percentageClaseControl);
        splitter.setInputFormat(dataOriginal);
        dataTest = Filter.useFilter(dataOriginal, splitter);
    } catch (Exception ex) {
        Logger.getLogger(ClusteringBinaryPD.class.getName()).log(Level.SEVERE, null, ex);
    }
}

From source file:lu.lippmann.cdb.lab.beta.shih.Shih2010.java

License:Open Source License

public Shih2010(Instances instances, boolean ignoreClass, boolean needsToResample, double theta) {
    try {/*  ww w  .j a  va 2s.  c  om*/
        this.instances = instances;
        this.theta = theta;
        this.resample = needsToResample;
        if (needsToResample) {
            final Resample rs = new Resample();
            if (this.instances.numInstances() > MAX_INSTANCES_TAKEN) {
                rs.setInputFormat(instances);
                rs.setSampleSizePercent(MAX_INSTANCES_TAKEN * 100.0 / this.instances.numInstances());
                this.instances = Filter.useFilter(instances, rs);
            }
        }

        //System.out.println("Size = " + this.instances.numInstances());

        this.mapDomain = new HashMap<Integer, Set<String>>();

        if (ignoreClass) {
            this.instances.setClassIndex(-1);
        }

        //Save index of nominal & categorial attributes
        //Build a map i-DOM -> Attribute index
        this.idxsC = new ArrayList<Integer>();
        this.idxsN = new ArrayList<Integer>();
        int nn = 0;

        for (int i = 0; i < instances.numAttributes(); i++) {
            if (!instances.attribute(i).isNumeric())
                mapDomain.put(i, new HashSet<String>());
        }

        //Create map index & domain
        this.mapIndex = new HashMap<Integer, Map<String, Integer>>();
        int mapIdx = 0;
        for (int i = 0; i < instances.numAttributes(); i++) {
            Attribute attribute = instances.attribute(i);
            if (!attribute.isNumeric()) {
                idxsC.add(i); //i-th attribute is nominal
                final Map<String, Integer> mapIndexAttribute = new HashMap<String, Integer>();
                mapIndex.put(i, mapIndexAttribute);
                Enumeration<?> en = attribute.enumerateValues();
                while (en.hasMoreElements()) {
                    String catVal = en.nextElement().toString();
                    boolean created = mapDomain.get(i).add(catVal);
                    if (created) {
                        mapIndexAttribute.put(catVal, mapIdx++);
                    }
                }
                nn += mapDomain.get(i).size(); //count total nominal values
            } else {
                idxsN.add(i);
            }
        }

        this.n = nn;
        this.base = new ArrayList<TupleSI>();
        this.noBase = new ArrayList<TupleSI>();
        this.M = new int[n][n];
        this.D = new double[n][n];
        this.F = new HashMap<TupleSI, Double>();
        this.computeBase();
        this.computeMatrixMDF();
    } catch (Exception e) {
        e.printStackTrace();
    }

}