Example usage for weka.filters.supervised.instance Resample setInputFormat

List of usage examples for weka.filters.supervised.instance Resample setInputFormat

Introduction

In this page you can find the example usage for weka.filters.supervised.instance Resample setInputFormat.

Prototype

@Override
public boolean setInputFormat(Instances instanceInfo) throws Exception 

Source Link

Document

Sets the format of the input instances.

Usage

From source file:de.ugoe.cs.cpdp.dataprocessing.Oversampling.java

License:Apache License

@Override
public void apply(Instances testdata, Instances traindata) {

    final int[] counts = traindata.attributeStats(traindata.classIndex()).nominalCounts;
    if (counts[1] < counts[0]) {
        Instances negatives = new Instances(traindata);
        Instances positives = new Instances(traindata);

        for (int i = traindata.size() - 1; i >= 0; i--) {
            if (Double.compare(1.0, negatives.get(i).classValue()) == 0) {
                negatives.remove(i);/*w w  w  . ja  v  a 2s .c o m*/
            }
            if (Double.compare(0.0, positives.get(i).classValue()) == 0) {
                positives.remove(i);
            }
        }

        Resample resample = new Resample();
        resample.setSampleSizePercent((100.0 * counts[0]) / counts[1]);
        try {
            resample.setInputFormat(traindata);
            positives = Filter.useFilter(positives, resample);
        } catch (Exception e) {
            throw new RuntimeException(e);
        }
        traindata.clear();
        for (int i = 0; i < negatives.size(); i++) {
            traindata.add(negatives.get(i));
        }
        for (int i = 0; i < positives.size(); i++) {
            traindata.add(positives.get(i));
        }
    }
}

From source file:de.ugoe.cs.cpdp.dataprocessing.Resampling.java

License:Apache License

@Override
public void apply(Instances testdata, Instances traindata) {
    Resample resample = new Resample();
    resample.setSampleSizePercent(100);//w w w.  j  a v a2  s.c om
    resample.setBiasToUniformClass(1.0);

    Instances traindataSample;
    try {
        resample.setInputFormat(traindata);
        traindataSample = Filter.useFilter(traindata, resample);
    } catch (Exception e) {
        throw new RuntimeException(e);
    }
    traindata.clear();
    for (int i = 0; i < traindataSample.size(); i++) {
        traindata.add(traindataSample.get(i));
    }
}

From source file:de.ugoe.cs.cpdp.dataselection.LACE2.java

License:Apache License

@Override
public void apply(Instances testdata, SetUniqueList<Instances> traindataSet) {
    Instances selectedData = new Instances(testdata);
    selectedData.clear();/*from w  w w.ja  va 2s .c om*/

    LinkedList<Instances> traindataCopy = new LinkedList<>(traindataSet);
    Collections.shuffle(traindataCopy);

    CLIFF cliff = new CLIFF();
    cliff.setParameter(Double.toString(percentage));
    MORPH morph = new MORPH();
    Median median = new Median();
    double minDist = Double.MIN_VALUE;

    for (Instances traindata : traindataCopy) {
        Instances cliffedData = cliff.applyCLIFF(traindata);
        if (minDist == Double.MIN_VALUE) {
            // determine distance for leader-follower algorithm
            Instances sample;
            if (traindata.size() > 100) {
                Resample resample = new Resample();
                resample.setSampleSizePercent(100.0 / traindata.size() * 100.0);
                resample.setBiasToUniformClass(0.0);
                resample.setNoReplacement(true);
                try {
                    resample.setInputFormat(traindata);
                    sample = Filter.useFilter(traindata, resample);
                } catch (Exception e) {
                    throw new RuntimeException(e);
                }
            } else {
                sample = new Instances(traindata);
            }
            double[] distances = new double[sample.size()];
            for (int i = 0; i < sample.size(); i++) {
                Instance unlikeNeighbor = morph.getNearestUnlikeNeighbor(sample.get(i), sample);
                distances[i] = MathArrays.distance(WekaUtils.instanceValues(sample.get(i)),
                        WekaUtils.instanceValues(unlikeNeighbor));
            }
            minDist = median.evaluate(distances);
        }
        for (int i = 0; i < cliffedData.size(); i++) {
            Instance unlikeNeighbor = morph.getNearestUnlikeNeighbor(cliffedData.get(i), selectedData);
            if (unlikeNeighbor == null) {
                selectedData.add(cliffedData.get(i));
            } else {
                double distance = MathArrays.distance(WekaUtils.instanceValues(cliffedData.get(i)),
                        WekaUtils.instanceValues(unlikeNeighbor));
                if (distance > minDist) {
                    morph.morphInstance(cliffedData.get(i), cliffedData);
                    selectedData.add(cliffedData.get(i));
                }
            }
        }
    }
}

From source file:function.FilterResample.java

public static Instances filterResample(Instances inst) {
    Resample filter = new Resample();
    Instances instResample = null;//from   w  w  w.  j ava2  s  .c o  m
    filter.setBiasToUniformClass(1.0);
    try {
        filter.setInputFormat(inst);
        filter.setNoReplacement(false);
        filter.setSampleSizePercent(100);
        instResample = Filter.useFilter(inst, filter);
    } catch (Exception e) {
        System.out.println("Error when resampling input data!");
        e.printStackTrace();
    }

    return instResample;
}

From source file:gyc.SMOTEBagging.java

License:Open Source License

/**
 * //from w  w  w  .j  av  a2s . c o  m
 * 100%majminSMOTE (k, a).
 * @param data
 * @param i
 * @return
 */
protected Instances randomSampling(Instances copia, int majC, int minC, int a, Random simplingRandom) {
    int[] majExamples = new int[copia.numInstances()];
    int[] minExamples = new int[copia.numInstances()];
    int majCount = 0, minCount = 0;
    // First, we copy the examples from the minority class and save the indexes of the majority
    // resample min at rate (Nmaj/Nmin)*a%
    int size = copia.attributeStats(copia.classIndex()).nominalCounts[majC] * a / 100;
    // class name
    String majClassName = copia.attribute(copia.classIndex()).value(majC);

    for (int i = 0; i < copia.numInstances(); i++) {
        if (copia.instance(i).stringValue(copia.classIndex()).equalsIgnoreCase(majClassName)) {
            // save index
            majExamples[majCount] = i;
            majCount++;
        } else {
            minExamples[minCount] = i;
            minCount++;
        }
    }

    /* random undersampling of the majority */
    Instances myDataset = new Instances(copia, 0);
    int r;
    //100%majC
    for (int i = 0; i < majCount; i++) {
        myDataset.add(copia.instance(majExamples[i]));
    }
    if (minCount == 0)
        return myDataset;
    //(Nmaj/Nmin)*a% minC
    for (int i = 0; i < size; i++) {
        r = simplingRandom.nextInt(minCount);
        myDataset.add(copia.instance(minExamples[r]));
    }
    myDataset.randomize(simplingRandom);

    if (size == 1) {
        try {
            //neighbor
            Resample filter = new Resample();
            filter.setInputFormat(myDataset);
            filter.setBiasToUniformClass(1.0);
            filter.setRandomSeed(simplingRandom.nextInt());
            myDataset = Filter.useFilter(myDataset, filter);
        } catch (Exception e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
    }
    if (size > 1) {
        try {
            SMOTE filter = new SMOTE();
            filter.setInputFormat(myDataset); // filter capabilities are checked here
            //data.
            double value = 100.0 * majCount / size - 100;
            //Percentage
            filter.setPercentage(value);
            //if (nMin<5) filter.setNearestNeighbors(nMin);
            filter.setRandomSeed(simplingRandom.nextInt());
            //filterSMOTESMOTE
            myDataset = Filter.useFilter(myDataset, filter);
            //t.stop();
        } catch (Exception e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
    }

    return myDataset;
}

From source file:Helper.CustomFilter.java

public Instances resampling(Instances structure) {
    Resample filter = new Resample();
    Instances filteredIns = null;//from www  .  ja  v  a  2 s.co  m
    filter.setBiasToUniformClass(1.0);
    try {
        filter.setInputFormat(structure);
        filter.setNoReplacement(false);
        filter.setSampleSizePercent(100);
        filteredIns = Filter.useFilter(structure, filter);
    } catch (Exception e) {
        e.printStackTrace();
    }
    return filteredIns;
}

From source file:id3j48.WekaAccess.java

public static Instances resampleData(Instances data) throws Exception {
    Resample resample = new Resample();
    String filterOptions = "-B 0.0 -S 1 -Z 100.0";
    resample.setOptions(Utils.splitOptions(filterOptions));
    resample.setRandomSeed(1);/*from w w  w .ja v a  2 s. co  m*/
    resample.setInputFormat(data);
    Instances newDataSet = Filter.useFilter(data, resample);
    return newDataSet;
}

From source file:meansagnes.MeansAgnes.java

public void resample(double b, double z, int seed) {
    try {//w w  w  . j ava  2 s .c om
        System.out.println(data.toString() + "\n");
        Resample resampleFilter = new Resample();

        resampleFilter.setInputFormat(data);
        resampleFilter.setNoReplacement(false);
        resampleFilter.setBiasToUniformClass(b); // Uniform distribution of class
        resampleFilter.setSampleSizePercent(z);
        resampleFilter.setRandomSeed(seed);

        data = Filter.useFilter(data, resampleFilter);

        /*Random R = new Random();
        data.resample(R);*/
        System.out.println("HASIL RESAMPLE\n\n");
        System.out.println(data.toString() + "\n");
    } catch (Exception ex) {
        Logger.getLogger(MeansAgnes.class.getName()).log(Level.SEVERE, null, ex);
    }
}

From source file:myclassifier.wekaCode.java

public static Instances resampleData(Instances data) throws Exception {
    Resample resample = new Resample();
    resample.setInputFormat(data);
    Instances filterData = Filter.useFilter(data, resample);
    return filterData;
}

From source file:org.openml.webapplication.generatefolds.GenerateFolds.java

License:Open Source License

private Instances sample_splits_bootstrap(String name) throws Exception {
    Instances splits = new Instances(name, am.getArffHeader(), splits_size);
    for (int r = 0; r < evaluationMethod.getRepeats(); ++r) {
        Resample resample = new Resample();
        String[] resampleOptions = { "-B", "0.0", "-Z", "100.0", "-S", r + "" };
        resample.setOptions(resampleOptions);
        resample.setInputFormat(dataset);
        Instances trainingsset = Filter.useFilter(dataset, resample);

        // create training set, consisting of instances from 
        for (int i = 0; i < trainingsset.numInstances(); ++i) {
            int rowid = (int) trainingsset.instance(i).value(0);
            splits.add(am.createInstance(true, rowid, r, 0));
        }//from  w  w w.ja va  2s. c  om
        for (int i = 0; i < dataset.numInstances(); ++i) {
            int rowid = (int) dataset.instance(i).value(0);
            splits.add(am.createInstance(false, rowid, r, 0));
        }
    }
    return splits;
}