Example usage for weka.core Instances enumerateInstances

Introduction

In this page you can find the example usage for weka.core Instances enumerateInstances.

Prototype

publicEnumeration<Instance> enumerateInstances()

Source Link

Document

Returns an enumeration of all instances in the dataset.

Usage

From source file:net.sf.bddbddb.order.MyId3.java

License:LGPL

/**
 * Splits a dataset according to the values of a nominal attribute.
 * //  w w  w  . ja va2  s .  com
 * @param data
 *            the data which is to be split
 * @param att
 *            the attribute to be used for splitting
 * @return the sets of instances produced by the split
 */
private Instances[] splitData(Instances data, Attribute att) {
    numI = 0;
    splitDataSize = new int[att.numValues()];
    Instances[] splitData = new Instances[att.numValues()];
    for (int j = 0; j < att.numValues(); j++) {
        splitData[j] = new Instances(data, data.numInstances());
    }
    Enumeration instEnum = data.enumerateInstances();
    while (instEnum.hasMoreElements()) {
        Instance inst = (Instance) instEnum.nextElement();
        if (inst.isMissing(att)) {
            // Add to all children.
            for (int k = 0; k < att.numValues(); ++k) {
                splitData[k].add(inst);
            }
        } else {
            int k = (int) inst.value(att);
            splitData[k].add(inst);
            splitDataSize[k]++;
            numI++;
        }
    }
    return splitData;
}

From source file:net.sf.bddbddb.order.WekaInterface.java

License:LGPL

public static double cvError(int numFolds, Instances data0, String cClassName) {
    if (data0.numInstances() < numFolds)
        return Double.NaN; //more folds than elements
    if (numFolds == 0)
        return Double.NaN; // no folds
    if (data0.numInstances() == 0)
        return 0; //no instances

    Instances data = new Instances(data0);
    //data.randomize(new Random(System.currentTimeMillis()));
    data.stratify(numFolds);/*from  w  w  w. j a  v  a  2  s .com*/
    Assert._assert(data.classAttribute() != null);
    double[] estimates = new double[numFolds];
    for (int i = 0; i < numFolds; ++i) {
        Instances trainData = data.trainCV(numFolds, i);
        Assert._assert(trainData.classAttribute() != null);
        Assert._assert(trainData.numInstances() != 0, "Cannot train classifier on 0 instances.");

        Instances testData = data.testCV(numFolds, i);
        Assert._assert(testData.classAttribute() != null);
        Assert._assert(testData.numInstances() != 0, "Cannot test classifier on 0 instances.");

        int temp = FindBestDomainOrder.TRACE;
        FindBestDomainOrder.TRACE = 0;
        Classifier classifier = buildClassifier(cClassName, trainData);
        FindBestDomainOrder.TRACE = temp;
        int count = testData.numInstances();
        double loss = 0;
        double sum = 0;
        for (Enumeration e = testData.enumerateInstances(); e.hasMoreElements();) {
            Instance instance = (Instance) e.nextElement();
            Assert._assert(instance != null);
            Assert._assert(instance.classAttribute() != null
                    && instance.classAttribute() == trainData.classAttribute());
            try {
                double testClass = classifier.classifyInstance(instance);
                double weight = instance.weight();
                if (testClass != instance.classValue())
                    loss += weight;
                sum += weight;
            } catch (Exception ex) {
                FindBestDomainOrder.out.println("Exception while classifying: " + instance + "\n" + ex);
            }
        }
        estimates[i] = 1 - loss / sum;
    }
    double average = 0;
    for (int i = 0; i < numFolds; ++i)
        average += estimates[i];

    return average / numFolds;
}

From source file:net.sf.mzmine.modules.peaklistmethods.dataanalysis.clustering.em.EMClusterer.java

License:Open Source License

@Override
public ClusteringResult performClustering(Instances dataset, ParameterSet parameters) {

    List<Integer> clusters = new ArrayList<Integer>();
    String[] options = new String[2];
    EM clusterer = new EM();

    int numberOfIterations = parameters.getParameter(EMClustererParameters.numberOfIterations).getValue();
    options[0] = "-I";
    options[1] = String.valueOf(numberOfIterations);

    try {/*  w w w  .j  a v a  2  s  .  co  m*/
        clusterer.setOptions(options);
        clusterer.buildClusterer(dataset);
        Enumeration<?> e = dataset.enumerateInstances();
        while (e.hasMoreElements()) {
            clusters.add(clusterer.clusterInstance((Instance) e.nextElement()));
        }
        ClusteringResult result = new ClusteringResult(clusters, null, clusterer.numberOfClusters(),
                parameters.getParameter(EMClustererParameters.visualization).getValue());
        return result;

    } catch (Exception ex) {
        logger.log(Level.SEVERE, null, ex);
        return null;
    }
}

From source file:net.sf.mzmine.modules.peaklistmethods.dataanalysis.clustering.farthestfirst.FarthestFirstClusterer.java

License:Open Source License

@Override
public ClusteringResult performClustering(Instances dataset, ParameterSet parameters) {

    List<Integer> clusters = new ArrayList<Integer>();
    String[] options = new String[2];
    FarthestFirst clusterer = new FarthestFirst();

    int numberOfGroups = parameters.getParameter(FarthestFirstClustererParameters.numberOfGroups).getValue();
    options[0] = "-N";
    options[1] = String.valueOf(numberOfGroups);

    try {/*w w  w  . j av  a2  s .  co m*/
        clusterer.setOptions(options);
        clusterer.buildClusterer(dataset);
        Enumeration<?> e = dataset.enumerateInstances();
        while (e.hasMoreElements()) {
            clusters.add(clusterer.clusterInstance((Instance) e.nextElement()));
        }
        ClusteringResult result = new ClusteringResult(clusters, null, clusterer.numberOfClusters(),
                parameters.getParameter(EMClustererParameters.visualization).getValue());
        return result;
    } catch (Exception ex) {
        logger.log(Level.SEVERE, null, ex);
        return null;
    }
}

From source file:net.sf.mzmine.modules.peaklistmethods.dataanalysis.clustering.simplekmeans.SimpleKMeansClusterer.java

License:Open Source License

@Override
public ClusteringResult performClustering(Instances dataset, ParameterSet parameters) {

    List<Integer> clusters = new ArrayList<Integer>();
    String[] options = new String[2];
    SimpleKMeans clusterer = new SimpleKMeans();

    int numberOfGroups = parameters.getParameter(SimpleKMeansClustererParameters.numberOfGroups).getValue();
    options[0] = "-N";
    options[1] = String.valueOf(numberOfGroups);

    try {//from w w  w . j  a  v a  2s  .c  o  m
        clusterer.setOptions(options);
        clusterer.buildClusterer(dataset);
        Enumeration<?> e = dataset.enumerateInstances();
        while (e.hasMoreElements()) {
            clusters.add(clusterer.clusterInstance((Instance) e.nextElement()));
        }
        ClusteringResult result = new ClusteringResult(clusters, null, clusterer.numberOfClusters(),
                parameters.getParameter(EMClustererParameters.visualization).getValue());
        return result;

    } catch (Exception ex) {
        logger.log(Level.SEVERE, null, ex);
        return null;
    }
}

From source file:newdtl.NewJ48.java

/**
 * Creates a J48 tree.// w  w w.j  a  v a2s . c o m
 *
 * @param data the training data
 * @exception Exception if tree failed to build
 */
private void makeTree(Instances data) throws Exception {

    // Mengecek apakah tidak terdapat instance dalam node ini
    if (data.numInstances() == 0) {
        splitAttribute = null;
        label = DOUBLE_MISSING_VALUE;
        classDistributions = new double[data.numClasses()];
        isLeaf = true;
    } else {
        // Mencari Gain Ratio maksimum
        double[] gainRatios = new double[data.numAttributes()];
        double[] thresholds = new double[data.numAttributes()];

        Enumeration attEnum = data.enumerateAttributes();
        while (attEnum.hasMoreElements()) {
            Attribute att = (Attribute) attEnum.nextElement();
            double[] result = computeGainRatio(data, att);
            gainRatios[att.index()] = result[0];
            thresholds[att.index()] = result[1];
        }

        splitAttribute = data.attribute(maxIndex(gainRatios));

        if (splitAttribute.isNumeric()) {
            splitThreshold = thresholds[maxIndex(gainRatios)];
        } else {
            splitThreshold = Double.NaN;
        }

        classDistributions = new double[data.numClasses()];
        for (int i = 0; i < data.numInstances(); i++) {
            Instance inst = (Instance) data.instance(i);
            classDistributions[(int) inst.classValue()]++;
        }

        // Membuat daun jika Gain Ratio-nya 0
        if (Double.compare(gainRatios[splitAttribute.index()], 0) == 0) {
            splitAttribute = null;

            label = maxIndex(classDistributions);
            classAttribute = data.classAttribute();
            isLeaf = true;
        } else {
            // Mengecek jika ada missing value
            if (isMissing(data, splitAttribute)) {
                // cari modus
                int index = modusIndex(data, splitAttribute);

                // ubah data yang punya missing value
                Enumeration dataEnum = data.enumerateInstances();
                while (dataEnum.hasMoreElements()) {
                    Instance inst = (Instance) dataEnum.nextElement();
                    if (inst.isMissing(splitAttribute)) {
                        inst.setValue(splitAttribute, splitAttribute.value(index));
                    }
                }
            }

            // Membuat tree baru di bawah node ini
            Instances[] splitData;
            if (splitAttribute.isNumeric()) {
                splitData = splitData(data, splitAttribute, splitThreshold);
                children = new NewJ48[2];
                for (int j = 0; j < 2; j++) {
                    children[j] = new NewJ48();
                    children[j].makeTree(splitData[j]);
                }
            } else {
                splitData = splitData(data, splitAttribute);
                children = new NewJ48[splitAttribute.numValues()];
                for (int j = 0; j < splitAttribute.numValues(); j++) {
                    children[j] = new NewJ48();
                    children[j].makeTree(splitData[j]);
                }
            }
            isLeaf = false;
        }
    }
}

From source file:newdtl.NewJ48.java

/**
 * search data that has missing value for attribute
 *
 * @param data the data for searching//from   w  w  w .  jav  a2s  .c o m
 * @param att the attribute for searching
 * @return if data has missing value for attribute
 */
private boolean isMissing(Instances data, Attribute att) {

    boolean isMissingValue = false;
    Enumeration dataEnum = data.enumerateInstances();

    while (dataEnum.hasMoreElements() && !isMissingValue) {
        Instance inst = (Instance) dataEnum.nextElement();
        if (inst.isMissing(att)) {
            isMissingValue = true;
        }
    }

    return isMissingValue;
}

From source file:newdtl.NewJ48.java

/**
 * search index of attribute that has most common value
 *
 * @param data the data for searching//from  w w w  .j  a v  a  2  s  .co  m
 * @param att the attribute for searching
 * @return index of attribute that has most common value
 */
private int modusIndex(Instances data, Attribute att) {
    // cari modus
    int[] modus = new int[att.numValues()];
    Enumeration dataEnumeration = data.enumerateInstances();

    while (dataEnumeration.hasMoreElements()) {
        Instance inst = (Instance) dataEnumeration.nextElement();
        if (!inst.isMissing(att)) {
            modus[(int) inst.value(att)]++;
        }
    }

    // cari modus terbesar
    int indexMax = 0;
    for (int i = 1; i < modus.length; ++i) {
        if (modus[i] > modus[indexMax]) {
            indexMax = i;
        }
    }
    return indexMax;
}

From source file:org.montp2.m1decol.ter.clustering.XMeansClustering.java

License:Open Source License

public Clusterer computeClustering(String inPath, String outPath, Properties propertiesCluster)
        throws Exception {
    Instances inputInstances = WekaUtils.loadARFF(inPath);

    EuclideanDistance euclideanDistance = new EuclideanDistance();
    euclideanDistance.setAttributeIndices("first-last");
    euclideanDistance.setDontNormalize(false);
    euclideanDistance.setInvertSelection(false);

    XMeans xmeans = new XMeans();
    xmeans.setMaxIterations(500);// w  w w  .j  av a 2 s  .c  om
    xmeans.setSeed(10);
    xmeans.setMinNumClusters(5);
    xmeans.setMaxNumClusters(12);
    xmeans.setMaxKMeans(1000);
    xmeans.setMaxKMeansForChildren(1000);
    xmeans.setBinValue(1.0);
    xmeans.setCutOffFactor(0.5);
    xmeans.setDebugLevel(0);
    xmeans.setMaxIterations(1);
    xmeans.buildClusterer(inputInstances);

    Enumeration<Instance> e = inputInstances.enumerateInstances();
    while (e.hasMoreElements()) {
        Instance ins = e.nextElement();
        int cluster_num = xmeans.clusterInstance(ins);
        System.out.println(ins.toString());
        System.out.println(cluster_num);
    }

    WekaUtils.saveModel(xmeans, outPath);

    return xmeans;
}

From source file:org.montp2.m1decol.ter.preprocessing.GlobalPreProcessing.java

License:Open Source License

public Map<Integer, Integer> getMapOfInstanceArffToIdUser(String dirPath, String inArff) throws Exception {

    Pattern pattern = Pattern.compile("^( \\p{Print}+ ) (_(\\p{Digit}+).txt)$", Pattern.COMMENTS);

    Map<Integer, String> arffIds = new HashMap<Integer, String>();
    for (File file : FileUtils.ls(dirPath)) {
        Matcher matcher = pattern.matcher(file.getAbsolutePath());
        matcher.matches();//from   w ww .  ja v a2 s .  c  o  m
        arffIds.put(Integer.parseInt(matcher.group(3)),
                InputStreamUtils.readInputStream(new BufferedInputStream(new FileInputStream(file))));
    }

    Instances instances = WekaUtils.loadARFF(inArff);
    Enumeration<Instance> en = instances.enumerateInstances();
    Map<Integer, Integer> arffTOIdUser = new HashMap<Integer, Integer>();
    int index = 0;
    while (en.hasMoreElements()) {
        String value = en.nextElement().toString();
        value = value.substring(1, value.length() - 1);
        String works[] = value.split("\\s");
        for (Map.Entry<Integer, String> arff : arffIds.entrySet()) {
            String wordArff[] = arff.getValue().split("\\s");
            boolean isEqual = true;
            if (wordArff.length == works.length) {
                for (int j = 0; j < wordArff.length; j++) {
                    if (!wordArff[j].equals(works[j])) {
                        isEqual = false;
                        break;
                    }
                }
                if (isEqual) {
                    arffTOIdUser.put(index, arff.getKey());
                    break;
                }
            }
        }
        index++;
    }

    return arffTOIdUser;
}