List of usage examples for weka.filters.unsupervised.instance Resample Resample
Resample
From source file:com.relationalcloud.partitioning.explanation.ExplanationHandler.java
License:Open Source License
/** * Applies sample training to reduce the input Instances to a specified size. * /*w ww . j ava 2s.co m*/ * @param classifier_sampling_threshold * @param newData1 * @return * @throws Exception */ public static Instances sampleTraining(Double classifier_sampling_threshold, Instances newData1) throws Exception { String[] options; Instances newData; if (newData1.numInstances() > classifier_sampling_threshold) { double percentage = (double) 100 * ((double) classifier_sampling_threshold) / ((double) newData1.numInstances()); Resample r = new Resample(); options = new String[4]; options[0] = "-C"; options[1] = "last"; options[2] = "-Z"; options[3] = "" + percentage; r.setOptions(options); r.setInputFormat(newData1); newData = Filter.useFilter(newData1, r); } else { newData = newData1; } return newData; }
From source file:hero.unstable.util.classification.wekaData.java
public void setData(String dataPath, double percentageClaseControl, int classIdx) { // Load data/*from w ww. j a va2 s . c om*/ //Instances data = IO.csvToInstances(dataPath); ConverterUtils.DataSource source = null; try { source = new ConverterUtils.DataSource(dataPath); dataOriginal = source.getDataSet(); } catch (Exception ex) { logger.info(ClusteringBinaryPD.class.getName()); ex.printStackTrace(); } // Set first column as CLASS dataOriginal.setClassIndex(classIdx); //logger.info("Data correctly loaded from " + dataPath); //logger.info("Data filtered: Class is the FIRST column"); //logger.info("Number of attributes: " + data.numAttributes() ); //logger.info("Number of instances: " + data.numInstances() ); // Get TRAINING and TEST sets: Resample splitter = new Resample(); try { splitter.setInvertSelection(false); splitter.setNoReplacement(true); splitter.setSampleSizePercent(percentageClaseControl); splitter.setInputFormat(dataOriginal); dataTraining = Filter.useFilter(dataOriginal, splitter); splitter = new Resample(); splitter.setInvertSelection(true); splitter.setNoReplacement(true); splitter.setSampleSizePercent(percentageClaseControl); splitter.setInputFormat(dataOriginal); dataTest = Filter.useFilter(dataOriginal, splitter); } catch (Exception ex) { Logger.getLogger(ClusteringBinaryPD.class.getName()).log(Level.SEVERE, null, ex); } }
From source file:lu.lippmann.cdb.lab.beta.shih.Shih2010.java
License:Open Source License
public Shih2010(Instances instances, boolean ignoreClass, boolean needsToResample, double theta) { try {/*from w w w.j av a2 s . com*/ this.instances = instances; this.theta = theta; this.resample = needsToResample; if (needsToResample) { final Resample rs = new Resample(); if (this.instances.numInstances() > MAX_INSTANCES_TAKEN) { rs.setInputFormat(instances); rs.setSampleSizePercent(MAX_INSTANCES_TAKEN * 100.0 / this.instances.numInstances()); this.instances = Filter.useFilter(instances, rs); } } //System.out.println("Size = " + this.instances.numInstances()); this.mapDomain = new HashMap<Integer, Set<String>>(); if (ignoreClass) { this.instances.setClassIndex(-1); } //Save index of nominal & categorial attributes //Build a map i-DOM -> Attribute index this.idxsC = new ArrayList<Integer>(); this.idxsN = new ArrayList<Integer>(); int nn = 0; for (int i = 0; i < instances.numAttributes(); i++) { if (!instances.attribute(i).isNumeric()) mapDomain.put(i, new HashSet<String>()); } //Create map index & domain this.mapIndex = new HashMap<Integer, Map<String, Integer>>(); int mapIdx = 0; for (int i = 0; i < instances.numAttributes(); i++) { Attribute attribute = instances.attribute(i); if (!attribute.isNumeric()) { idxsC.add(i); //i-th attribute is nominal final Map<String, Integer> mapIndexAttribute = new HashMap<String, Integer>(); mapIndex.put(i, mapIndexAttribute); Enumeration<?> en = attribute.enumerateValues(); while (en.hasMoreElements()) { String catVal = en.nextElement().toString(); boolean created = mapDomain.get(i).add(catVal); if (created) { mapIndexAttribute.put(catVal, mapIdx++); } } nn += mapDomain.get(i).size(); //count total nominal values } else { idxsN.add(i); } } this.n = nn; this.base = new ArrayList<TupleSI>(); this.noBase = new ArrayList<TupleSI>(); this.M = new int[n][n]; this.D = new double[n][n]; this.F = new HashMap<TupleSI, Double>(); this.computeBase(); this.computeMatrixMDF(); } catch (Exception e) { e.printStackTrace(); } }