Example usage for weka.core Instances numInstances

Introduction

In this page you can find the example usage for weka.core Instances numInstances.

Prototype


publicint numInstances()

Source Link

Document

Returns the number of instances in the dataset.

Usage

From source file:gate.plugin.learningframework.engines.EngineWeka.java

@Override
public Object evaluateHoldout(InstanceList instances, double portion, int repeats, String parms) {
    // Get the parameters 
    // -s/-seed <int> : seed, default 0
    // -S/-nostratify : switch off stratification if we evaluate classification
    Parms opts = new Parms(parms, "s:seed:i", "S:nostratify:b");
    int seed = (int) opts.getValueOrElse("seed", 0);
    if (algorithm instanceof AlgorithmRegression) {
        throw new UnsupportedOperationException("Weka holdout eval for regression not supported yet.");
    } else {// ww w .j a v  a2 s . c o m
        // must be classification algorithm then!
        weka.core.Instances all = new CorpusRepresentationWeka(corpusRepresentationMallet)
                .getRepresentationWeka();
        boolean noStratify = (boolean) opts.getValueOrElse("nostratify", 0);
        Random rand = new Random(seed);
        all.randomize(rand);
        boolean stratified = !noStratify;
        // TODO: not sure if/how we can do stratification for holdout evaluation
        // TODO: there must be a better way to do the splitting too!
        // TODO: if there is no better way to split, maybe do out outside for
        // TODO: how to implement repeats?
        if (repeats != 1) {
            throw new GateRuntimeException("Only repeats == 1 supported yet");
        }
        // both regression and classification?
        int trainSize = (int) Math.round(all.numInstances() * portion);
        int testSize = all.numInstances() - trainSize;
        Instances train = new Instances(all, 0, trainSize);
        Instances test = new Instances(all, trainSize, testSize);
        Classifier classifier = (Classifier) trainer;
        try {
            classifier.buildClassifier(train);
        } catch (Exception ex) {
            throw new GateRuntimeException("Error during training of Weka classifier", ex);
        }
        Evaluation eval = null;
        try {
            eval = new Evaluation(train);
        } catch (Exception ex) {
            throw new GateRuntimeException("Could not create Evaluation object", ex);
        }
        try {
            eval.evaluateModel(classifier, test);
        } catch (Exception ex) {
            throw new GateRuntimeException("Error evaluating the classifier", ex);
        }
        System.out.println("Evaluation result:\n" + eval);
        return eval;
    }
}

From source file:GClass.EvaluationInternal.java

License:Open Source License

/**
 * Evaluates the classifier on a given set of instances. Note that
 * the data must have exactly the same format (e.g. order of
 * attributes) as the data used to train the classifier! Otherwise
 * the results will generally be meaningless.
 *
 * @param classifier machine learning classifier
 * @param data set of test instances for evaluation
 * @exception Exception if model could not be evaluated
 * successfully/*from  ww  w.j a va2  s  .com*/
 */
public double[] evaluateModel(Classifier classifier, Instances data) throws Exception {

    double predictions[] = new double[data.numInstances()];

    for (int i = 0; i < data.numInstances(); i++) {
        predictions[i] = evaluateModelOnce((Classifier) classifier, data.instance(i));
    }
    return predictions;
}

From source file:GClass.EvaluationInternal.java

License:Open Source License

/**
 * Sets the class prior probabilities//w  w  w. j  a v a2 s.co  m
 *
 * @param train the training instances used to determine
 * the prior probabilities
 * @exception Exception if the class attribute of the instances is not
 * set
 */
public void setPriors(Instances train) throws Exception {

    if (!m_ClassIsNominal) {

        m_NumTrainClassVals = 0;
        m_TrainClassVals = null;
        m_TrainClassWeights = null;
        m_PriorErrorEstimator = null;
        m_ErrorEstimator = null;

        for (int i = 0; i < train.numInstances(); i++) {
            Instance currentInst = train.instance(i);
            if (!currentInst.classIsMissing()) {
                addNumericTrainClass(currentInst.classValue(), currentInst.weight());
            }
        }

    } else {
        for (int i = 0; i < m_NumClasses; i++) {
            m_ClassPriors[i] = 1;
        }
        m_ClassPriorsSum = m_NumClasses;
        for (int i = 0; i < train.numInstances(); i++) {
            if (!train.instance(i).classIsMissing()) {
                m_ClassPriors[(int) train.instance(i).classValue()] += train.instance(i).weight();
                m_ClassPriorsSum += train.instance(i).weight();
            }
        }
    }
}

From source file:general.Util.java

/**
 * show learning statistic result by percentage split
 * @param data training data//from  www.ja v  a2  s  .c  o  m
 * @param trainPercent percentage of the training data
 * @param Classifier model
 */
public static void PercentageSplit(Instances data, double trainPercent, String Classifier) {
    try {
        int trainSize = (int) Math.round(data.numInstances() * trainPercent / 100);
        int testSize = data.numInstances() - trainSize;

        data.randomize(new Random(1));

        Instances train = new Instances(data, 0, trainSize);
        Instances test = new Instances(data, trainSize, testSize);
        train.setClassIndex(train.numAttributes() - 1);
        test.setClassIndex(test.numAttributes() - 1);

        switch (Classifier.toLowerCase()) {
        case "naivebayes":
            classifier = new NaiveBayes();
            break;
        case "j48-prune":
            classifier = new MyJ48(true, 0.25f);
            break;
        case "j48-unprune":
            classifier = new MyJ48(false, 0f);
            break;
        case "id3":
            classifier = new MyID3();
            break;
        default:
            break;
        }
        classifier.buildClassifier(train);

        for (int i = 0; i < test.numInstances(); i++) {
            try {
                double pred = classifier.classifyInstance(test.instance(i));
                System.out.print("ID: " + test.instance(i));
                System.out
                        .print(", actual: " + test.classAttribute().value((int) test.instance(i).classValue()));
                System.out.println(", predicted: " + test.classAttribute().value((int) pred));
            } catch (Exception ex) {
                Logger.getLogger(Util.class.getName()).log(Level.SEVERE, null, ex);
            }
        }

        // Start evaluate model using instances test and print results
        try {
            Evaluation eval = new Evaluation(train);
            eval.evaluateModel(classifier, test);
            System.out.println(eval.toSummaryString("\nResults\n\n", false));
        } catch (Exception e) {
            e.printStackTrace();
        }

    } catch (Exception ex) {
        Logger.getLogger(Util.class.getName()).log(Level.SEVERE, null, ex);
    }

}

From source file:general.Util.java

/**
 * Classify test set using pre-build model
 * @param model model pathfile//  w w w .ja  va  2  s .c  o m
 * @param test test file
 */
public static void doClassify(Classifier model, Instances test) {
    test.setClassIndex(test.numAttributes() - 1);
    for (int i = 0; i < test.numInstances(); i++) {
        try {
            double pred = model.classifyInstance(test.instance(i));
            System.out.print("ID: " + test.instance(i));
            System.out.print(", actual: " + test.classAttribute().value((int) test.instance(i).classValue()));
            System.out.println(", predicted: " + test.classAttribute().value((int) pred));
        } catch (Exception ex) {
            Logger.getLogger(Util.class.getName()).log(Level.SEVERE, null, ex);
        }
    }
}

From source file:gr.auth.ee.lcs.AbstractLearningClassifierSystem.java

License:Open Source License

/**
 * Initialize the rule population by clustering the train set and producing rules based upon the clusters.
 * The train set is initially divided in as many partitions as are the distinct label combinations.
 * @throws Exception /*  w w  w .  j  ava2 s.c  o  m*/
 * 
 * @param file
 *          the .arff file
 * */
public ClassifierSet initializePopulation(final String file) throws Exception {

    final double gamma = SettingsLoader.getNumericSetting("CLUSTER_GAMMA", .2);

    int numberOfLabels = (int) SettingsLoader.getNumericSetting("numberOfLabels", 1);

    final Instances set = InstancesUtility.openInstance(file);

    SimpleKMeans kmeans = new SimpleKMeans();
    kmeans.setSeed(10);
    kmeans.setPreserveInstancesOrder(true);

    /*
     * Table partitions will hold instances only with attributes.
     * On the contrary, table partitionsWithCLasses will hold only the labels
     */
    Instances[] partitions = InstancesUtility.partitionInstances(this, file);
    Instances[] partitionsWithCLasses = InstancesUtility.partitionInstances(this, file);

    /*
     * Instead of having multiple positions for the same label combination, use only one.
     * This is the one that will be used to "cover" the centroids.
     */
    for (int i = 0; i < partitionsWithCLasses.length; i++) {
        Instance temp = partitionsWithCLasses[i].instance(0);
        partitionsWithCLasses[i].delete();
        partitionsWithCLasses[i].add(temp);
    }

    /*
     * Delete the labels from the partitions.
     */
    String attributesIndicesForDeletion = "";

    for (int k = set.numAttributes() - numberOfLabels + 1; k <= set.numAttributes(); k++) {
        if (k != set.numAttributes())
            attributesIndicesForDeletion += k + ",";
        else
            attributesIndicesForDeletion += k;
    }

    /*    attributesIncicesForDeletion = 8,9,10,11,12,13,14 e.g. for 7 attributes and 7 labels. 
     * It does not start from 7 because it assumes that the user inputs the number. See the api.
     */
    for (int i = 0; i < partitions.length; i++) {
        Remove remove = new Remove();
        remove.setAttributeIndices(attributesIndicesForDeletion);
        remove.setInvertSelection(false);
        remove.setInputFormat(partitions[i]);
        partitions[i] = Filter.useFilter(partitions[i], remove);
        //System.out.println(partitions[i]);
    }
    // partitions now contains only attributes

    /*
     * delete the attributes from partitionsWithCLasses
     */
    String labelsIndicesForDeletion = "";

    for (int k = 1; k <= set.numAttributes() - numberOfLabels; k++) {
        if (k != set.numAttributes() - numberOfLabels)
            labelsIndicesForDeletion += k + ",";
        else
            labelsIndicesForDeletion += k;
    }

    /*    attributesIncicesForDeletion = 8,9,10,11,12,13,14 e.g. for 7 attributes and 7 labels. 
     * It does not start from 7 because it assumes that the user inputs the number. See the api.
     */
    for (int i = 0; i < partitionsWithCLasses.length; i++) {
        Remove remove = new Remove();
        remove.setAttributeIndices(labelsIndicesForDeletion);
        remove.setInvertSelection(false);
        remove.setInputFormat(partitionsWithCLasses[i]);
        partitionsWithCLasses[i] = Filter.useFilter(partitionsWithCLasses[i], remove);
        //System.out.println(partitionsWithCLasses[i]);
    }
    // partitionsWithCLasses now contains only labels

    int populationSize = (int) SettingsLoader.getNumericSetting("populationSize", 1500);

    // the set used to store the rules from all the clusters
    ClassifierSet initialClassifiers = new ClassifierSet(new FixedSizeSetWorstFitnessDeletion(this,
            populationSize, new RouletteWheelSelector(AbstractUpdateStrategy.COMPARISON_MODE_DELETION, true)));

    for (int i = 0; i < partitions.length; i++) {

        try {

            kmeans.setNumClusters((int) Math.ceil(gamma * partitions[i].numInstances()));
            kmeans.buildClusterer(partitions[i]);
            int[] assignments = kmeans.getAssignments();

            /*            int k=0;
                        for (int j = 0; j < assignments.length; j++) {
                           System.out.printf("Instance %d => Cluster %d ", k, assignments[j]);
                           k++;
                           System.out.println();
                    
                        }
                        System.out.println();*/

            Instances centroids = kmeans.getClusterCentroids();
            int numOfCentroidAttributes = centroids.numAttributes();

            /*
             * The centroids in this stage hold only attributes. To continue, we need to provide them the labels.
             * These are the ones we removed earlier.
             * But first, open up positions for attributes.
             * */

            for (int j = 0; j < numberOfLabels; j++) {
                Attribute label = new Attribute("label" + j);
                centroids.insertAttributeAt(label, numOfCentroidAttributes + j);
            }

            for (int centroidInstances = 0; centroidInstances < centroids.numInstances(); centroidInstances++) {
                for (int labels = 0; labels < numberOfLabels; labels++) {
                    centroids.instance(centroidInstances).setValue(numOfCentroidAttributes + labels,
                            partitionsWithCLasses[i].instance(0).value(labels));
                }
            }

            double[][] centroidsArray = InstancesUtility.convertIntancesToDouble(centroids);

            for (int j = 0; j < centroidsArray.length; j++) {
                //System.out.printf("Instance %d => Cluster %d ", k, assignments[j]);
                final Classifier coveringClassifier = this.getClassifierTransformBridge()
                        .createRandomClusteringClassifier(centroidsArray[j]);

                coveringClassifier.setClassifierOrigin(Classifier.CLASSIFIER_ORIGIN_INIT);
                initialClassifiers.addClassifier(new Macroclassifier(coveringClassifier, 1), false);
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
    System.out.println(initialClassifiers);
    return initialClassifiers;
}

From source file:gr.auth.ee.lcs.AbstractLearningClassifierSystem.java

License:Open Source License

/**
 * Initialize the rule population by clustering the train set and producing rules based upon the clusters.
 * The train set is initially divided in as many partitions as are the distinct label combinations.
 * @throws Exception /*  w w  w  .  j  a  v a2  s .c  o m*/
 * 
 * @param trainSet
 *             the type of Instances train set
 * */

public ClassifierSet initializePopulation(final Instances trainset) throws Exception {

    final double gamma = SettingsLoader.getNumericSetting("CLUSTER_GAMMA", .2);

    int numberOfLabels = (int) SettingsLoader.getNumericSetting("numberOfLabels", 1);

    final Instances set = trainset;

    SimpleKMeans kmeans = new SimpleKMeans();
    kmeans.setSeed(10);
    kmeans.setPreserveInstancesOrder(true);

    /*
     * Table partitions will hold instances only with attributes.
     * On the contrary, table partitionsWithCLasses will hold only the labels
     */
    Instances[] partitions = InstancesUtility.partitionInstances(this, trainset);
    Instances[] partitionsWithCLasses = InstancesUtility.partitionInstances(this, trainset);

    /*
    * Instead of having multiple positions for the same label combination, use only one.
    * This is the one that will be used to "cover" the centroids.
    */

    for (int i = 0; i < partitionsWithCLasses.length; i++) {
        Instance temp = partitionsWithCLasses[i].instance(0);
        partitionsWithCLasses[i].delete();
        partitionsWithCLasses[i].add(temp);
    }

    /*
    * Delete the labels from the partitions.
    */
    String attributesIndicesForDeletion = "";

    for (int k = set.numAttributes() - numberOfLabels + 1; k <= set.numAttributes(); k++) {
        if (k != set.numAttributes())
            attributesIndicesForDeletion += k + ",";
        else
            attributesIndicesForDeletion += k;
    }
    /*    attributesIncicesForDeletion = 8,9,10,11,12,13,14 e.g. for 7 attributes and 7 labels. 
    * It does not start from 7 because it assumes that the user inputs the number. See the api.
    */
    for (int i = 0; i < partitions.length; i++) {
        Remove remove = new Remove();
        remove.setAttributeIndices(attributesIndicesForDeletion);
        remove.setInvertSelection(false);
        remove.setInputFormat(partitions[i]);
        partitions[i] = Filter.useFilter(partitions[i], remove);
    }
    // partitions now contains only attributes

    /*
    * delete the attributes from partitionsWithCLasses
    */
    String labelsIndicesForDeletion = "";

    for (int k = 1; k <= set.numAttributes() - numberOfLabels; k++) {
        if (k != set.numAttributes() - numberOfLabels)
            labelsIndicesForDeletion += k + ",";
        else
            labelsIndicesForDeletion += k;
    }
    /*    attributesIncicesForDeletion = 8,9,10,11,12,13,14 e.g. for 7 attributes and 7 labels. 
    * It does not start from 7 because it assumes that the user inputs the number. See the api.
    */
    for (int i = 0; i < partitionsWithCLasses.length; i++) {
        Remove remove = new Remove();
        remove.setAttributeIndices(labelsIndicesForDeletion);
        remove.setInvertSelection(false);
        remove.setInputFormat(partitionsWithCLasses[i]);
        partitionsWithCLasses[i] = Filter.useFilter(partitionsWithCLasses[i], remove);
        //System.out.println(partitionsWithCLasses[i]);
    }
    // partitionsWithCLasses now contains only labels

    int populationSize = (int) SettingsLoader.getNumericSetting("populationSize", 1500);

    // the set used to store the rules from all the clusters
    ClassifierSet initialClassifiers = new ClassifierSet(new FixedSizeSetWorstFitnessDeletion(this,
            populationSize, new RouletteWheelSelector(AbstractUpdateStrategy.COMPARISON_MODE_DELETION, true)));

    for (int i = 0; i < partitions.length; i++) {

        try {

            kmeans.setNumClusters((int) Math.ceil(gamma * partitions[i].numInstances()));
            kmeans.buildClusterer(partitions[i]);
            int[] assignments = kmeans.getAssignments();

            /*            int k=0;
                        for (int j = 0; j < assignments.length; j++) {
                           System.out.printf("Instance %d => Cluster %d ", k, assignments[j]);
                           k++;
                           System.out.println();
                    
                        }
                        System.out.println();*/

            Instances centroids = kmeans.getClusterCentroids();

            int numOfCentroidAttributes = centroids.numAttributes();

            /*
             * The centroids in this stage hold only attributes. To continue, we need to provide them the labels.
             * These are the ones we removed earlier.
             * But first, open up positions for attributes.
             * */

            for (int j = 0; j < numberOfLabels; j++) {
                Attribute label = new Attribute("label" + j);
                centroids.insertAttributeAt(label, numOfCentroidAttributes + j);
            }

            for (int centroidInstances = 0; centroidInstances < centroids.numInstances(); centroidInstances++) {
                for (int labels = 0; labels < numberOfLabels; labels++) {
                    centroids.instance(centroidInstances).setValue(numOfCentroidAttributes + labels,
                            partitionsWithCLasses[i].instance(0).value(labels));
                }
            }

            //System.out.println(centroids);
            double[][] centroidsArray = InstancesUtility.convertIntancesToDouble(centroids);

            for (int j = 0; j < centroidsArray.length; j++) {
                //System.out.printf("Instance %d => Cluster %d ", k, assignments[j]);
                final Classifier coveringClassifier = this.getClassifierTransformBridge()
                        .createRandomCoveringClassifier(centroidsArray[j]);

                coveringClassifier.setClassifierOrigin(Classifier.CLASSIFIER_ORIGIN_INIT);
                initialClassifiers.addClassifier(new Macroclassifier(coveringClassifier, 1), false);
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
    //System.out.println(initialClassifiers);
    return initialClassifiers;
}

From source file:gr.auth.ee.lcs.data.representations.complex.ComplexRepresentation.java

License:Open Source License

/**
 * Build the representation for some instances.
 * /*www .  java2 s. c  o m*/
 * @param instances
 *            the instances
 */

protected void buildRepresentationFromInstance(final Instances instances) {

    for (int i = 0; i < (instances.numAttributes() - numberOfLabels); i++) {

        final String attributeName = instances.attribute(i).name();

        if (instances.attribute(i).isNominal()) {

            String[] attributeNames = new String[instances.attribute(i).numValues()];

            final Enumeration<?> values = instances.attribute(i).enumerateValues();

            for (int j = 0; j < attributeNames.length; j++) {
                attributeNames[j] = (String) values.nextElement();
            }

            // Create boolean or generic nominal
            if (attributeNames.length > 2)
                attributeList[i] = new ComplexRepresentation.NominalAttribute(this.chromosomeSize,
                        attributeName, attributeNames, attributeGeneralizationRate);
            else
                attributeList[i] = new ComplexRepresentation.BooleanAttribute(chromosomeSize, attributeName,
                        attributeGeneralizationRate);

        } else if (instances.attribute(i).isNumeric()) {
            float minValue, maxValue;
            minValue = (float) instances.instance(0).toDoubleArray()[i];
            maxValue = minValue;
            for (int sample = 0; sample < instances.numInstances(); sample++) {
                final float currentVal = (float) instances.instance(sample).toDoubleArray()[i];
                if (currentVal > maxValue)
                    maxValue = currentVal;
                if (currentVal < minValue)
                    minValue = currentVal;
            }
            attributeList[i] = new ComplexRepresentation.IntervalAttribute(this.chromosomeSize, attributeName,
                    minValue, maxValue, precision, attributeGeneralizationRate);
        }
    }

    createClassRepresentation(instances);
}

From source file:gr.auth.ee.lcs.utilities.InstancesUtility.java

License:Open Source License

/**
 * Perform the conversion.// w ww .  j  a  va 2 s.c  om
 * 
 * @param set
 *            the set containing the instances
 * @return a double[][] containing the instances and their respective
 *         attributes
 */
public static double[][] convertIntancesToDouble(final Instances set) {
    if (set == null)
        return null;

    final double[][] result = new double[set.numInstances()][set.numAttributes()];
    for (int i = 0; i < set.numInstances(); i++) {

        for (int j = 0; j < set.numAttributes(); j++) {
            result[i][j] = set.instance(i).value(j);
        }
    }

    return result;

}

From source file:gr.auth.ee.lcs.utilities.InstancesUtility.java

License:Open Source License

/**
 * Returns the label cardinality of the specified set.
 * /* w w  w  . j ava2s.  com*/
 */
public static double getLabelCardinality(final Instances set) {
    if (set == null)
        return -1;

    int numberOfLabels = (int) SettingsLoader.getNumericSetting("numberOfLabels", 1);
    double sumOfLabels = 0;

    for (int i = 0; i < set.numInstances(); i++) {
        for (int j = set.numAttributes() - numberOfLabels; j < set.numAttributes(); j++) {
            sumOfLabels += set.instance(i).value(j);
        }
    }

    if (set.numInstances() != 0) {

        return (double) (sumOfLabels / set.numInstances());
    }
    return 0;
}