Example usage for weka.core Instances size

List of usage examples for weka.core Instances size

Introduction

In this page you can find the example usage for weka.core Instances size.

Prototype


@Override
publicint size() 

Source Link

Document

Returns the number of instances in the dataset.

Usage

From source file:moa.tud.ke.patching.Patching.java

/**
 * Learns a specific subset classifier (of the same type as the base
 * classifier) to improve accuracy on the regions that performed bad before.
 *
 * @param subsets//from   ww w . ja v a2 s .c  o  m
 * @param basePerformance
 * @return
 */
private Vector createPatches(Vector subsets, Vector basePerformance) {
    Vector patches = new Vector();

    System.out.println("Creating patches: #" + subsets.size());
    try {
        for (int d = 0; d < subsets.size(); d++) {

            Instances set = (Instances) subsets.get(d);

            Classifier patch;
            if (set.size() < 5) // Too small to do anything properly
            {
                patch = null; // null will then default to base classifier
            } else {

                patch = getPatchClassifier();
                patch.buildClassifier(set);
            }

            patches.add(d, patch);
        }
    } catch (Exception e) {
        System.err.println("Error building patches:");
        System.err.println(e.getMessage());
    }

    return patches;
}

From source file:mulan.classifier.transformation.CalibratedLabelRanking.java

License:Open Source License

@Override
protected void buildInternal(MultiLabelInstances trainingSet) throws Exception {
    // Virtual label models
    debug("Building calibration label models");
    System.out.println("Building calibration label models");
    virtualLabelModels = new BinaryRelevance(getBaseClassifier());
    virtualLabelModels.setDebug(getDebug());
    virtualLabelModels.build(trainingSet);

    // One-vs-one models
    numModels = ((numLabels) * (numLabels - 1)) / 2;
    oneVsOneModels = AbstractClassifier.makeCopies(getBaseClassifier(), numModels);
    nodata = new boolean[numModels];
    metaDataTest = new Instances[numModels];

    Instances trainingData = trainingSet.getDataSet();

    int counter = 0;
    // Creation of one-vs-one models
    for (int label1 = 0; label1 < numLabels - 1; label1++) {
        // Attribute of label 1
        Attribute attrLabel1 = trainingData.attribute(labelIndices[label1]);
        for (int label2 = label1 + 1; label2 < numLabels; label2++) {
            debug("Building one-vs-one model " + (counter + 1) + "/" + numModels);
            System.out.println("Building one-vs-one model " + (counter + 1) + "/" + numModels);
            // Attribute of label 2
            Attribute attrLabel2 = trainingData.attribute(labelIndices[label2]);

            // initialize training set
            Instances dataOneVsOne = new Instances(trainingData, 0);
            // filter out examples with no preference
            for (int i = 0; i < trainingData.numInstances(); i++) {
                Instance tempInstance;/*w ww.java  2s  . c  om*/
                if (trainingData.instance(i) instanceof SparseInstance) {
                    tempInstance = new SparseInstance(trainingData.instance(i));
                } else {
                    tempInstance = new DenseInstance(trainingData.instance(i));
                }

                int nominalValueIndex;
                nominalValueIndex = (int) tempInstance.value(labelIndices[label1]);
                String value1 = attrLabel1.value(nominalValueIndex);
                nominalValueIndex = (int) tempInstance.value(labelIndices[label2]);
                String value2 = attrLabel2.value(nominalValueIndex);

                if (!value1.equals(value2)) {
                    tempInstance.setValue(attrLabel1, value1);
                    dataOneVsOne.add(tempInstance);
                }
            }

            // remove all labels apart from label1 and place it at the end
            Reorder filter = new Reorder();
            int numPredictors = trainingData.numAttributes() - numLabels;
            int[] reorderedIndices = new int[numPredictors + 1];
            for (int i = 0; i < numPredictors; i++) {
                reorderedIndices[i] = featureIndices[i];
            }
            reorderedIndices[numPredictors] = labelIndices[label1];
            filter.setAttributeIndicesArray(reorderedIndices);
            filter.setInputFormat(dataOneVsOne);
            dataOneVsOne = Filter.useFilter(dataOneVsOne, filter);
            //System.out.println(dataOneVsOne.toString());
            dataOneVsOne.setClassIndex(numPredictors);

            // build model label1 vs label2
            if (dataOneVsOne.size() > 0) {
                oneVsOneModels[counter].buildClassifier(dataOneVsOne);
            } else {
                nodata[counter] = true;
            }
            dataOneVsOne.delete();
            metaDataTest[counter] = dataOneVsOne;
            counter++;
        }
    }
}

From source file:mulan.classifier.transformation.Pairwise.java

License:Open Source License

@Override
protected void buildInternal(MultiLabelInstances train) throws Exception {
    numModels = ((numLabels) * (numLabels - 1)) / 2;
    oneVsOneModels = AbstractClassifier.makeCopies(getBaseClassifier(), numModels);
    nodata = new boolean[numModels];

    debug("preparing shell");
    pt = new PairwiseTransformation(train);

    int counter = 0;
    // Creation of one-vs-one models
    for (int label1 = 0; label1 < numLabels - 1; label1++) {
        for (int label2 = label1 + 1; label2 < numLabels; label2++) {
            debug("Building one-vs-one model " + (counter + 1) + "/" + numModels);
            // initialize training set
            Instances dataOneVsOne = pt.transformInstances(label1, label2);

            // build model label1 vs label2
            if (dataOneVsOne.size() > 0) {
                oneVsOneModels[counter].buildClassifier(dataOneVsOne);
            } else {
                nodata[counter] = true;/*  w  ww  .  j a va 2 s  .  co m*/
            }
            counter++;
        }
    }
}

From source file:mulan.classifier.transformation.TwoStageClassifierChainArchitecture.java

License:Open Source License

@Override
protected void buildInternal(MultiLabelInstances trainingSet) throws Exception {
    // Virtual label models
    debug("Building calibration label models");
    virtualLabelModels = new BinaryRelevance(getBaseClassifier());
    virtualLabelModels.setDebug(getDebug());
    virtualLabelModels.build(trainingSet);

    //Generate the chain: Test the same dataset
    MultiLabelInstances tempTrainingSet = GenerateChain(trainingSet);

    labelIndices = tempTrainingSet.getLabelIndices();
    featureIndices = tempTrainingSet.getFeatureIndices();

    // One-vs-one models
    numModels = ((numLabels) * (numLabels - 1)) / 2;
    oneVsOneModels = AbstractClassifier.makeCopies(getBaseClassifier(), numModels);
    nodata = new boolean[numModels];
    metaDataTest = new Instances[numModels];

    Instances trainingData = tempTrainingSet.getDataSet();

    int counter = 0;
    // Creation of one-vs-one models
    for (int label1 = 0; label1 < numLabels - 1; label1++) {
        // Attribute of label 1
        Attribute attrLabel1 = trainingData.attribute(labelIndices[label1]);
        for (int label2 = label1 + 1; label2 < numLabels; label2++) {
            debug("Building one-vs-one model " + (counter + 1) + "/" + numModels);
            // Attribute of label 2
            Attribute attrLabel2 = trainingData.attribute(labelIndices[label2]);

            // initialize training set
            Instances dataOneVsOne = new Instances(trainingData, 0);
            // filter out examples with no preference
            for (int i = 0; i < trainingData.numInstances(); i++) {
                Instance tempInstance;/*from  w ww.j a v a 2s . c o  m*/
                if (trainingData.instance(i) instanceof SparseInstance) {
                    tempInstance = new SparseInstance(trainingData.instance(i));
                } else {
                    tempInstance = new DenseInstance(trainingData.instance(i));
                }

                int nominalValueIndex;
                nominalValueIndex = (int) tempInstance.value(labelIndices[label1]);
                String value1 = attrLabel1.value(nominalValueIndex);
                nominalValueIndex = (int) tempInstance.value(labelIndices[label2]);
                String value2 = attrLabel2.value(nominalValueIndex);

                if (!value1.equals(value2)) {
                    tempInstance.setValue(attrLabel1, value1);
                    dataOneVsOne.add(tempInstance);
                }
            }

            // remove all labels apart from label1 and place it at the end
            Reorder filter = new Reorder();
            int numPredictors = trainingData.numAttributes() - numLabels;
            int[] reorderedIndices = new int[numPredictors + 1];

            System.arraycopy(featureIndices, 0, reorderedIndices, 0, numPredictors);
            reorderedIndices[numPredictors] = labelIndices[label1];
            filter.setAttributeIndicesArray(reorderedIndices);
            filter.setInputFormat(dataOneVsOne);
            dataOneVsOne = Filter.useFilter(dataOneVsOne, filter);
            //System.out.println(dataOneVsOne.toString());
            dataOneVsOne.setClassIndex(numPredictors);

            // build model label1 vs label2
            if (dataOneVsOne.size() > 0) {
                oneVsOneModels[counter].buildClassifier(dataOneVsOne);
            } else {
                nodata[counter] = true;
            }
            dataOneVsOne.delete();
            metaDataTest[counter] = dataOneVsOne;
            counter++;
        }
    }
}

From source file:mulan.classifier.transformation.TwoStagePrunedClassifierChainArchitecture.java

License:Open Source License

@Override
protected void buildInternal(MultiLabelInstances trainingSet) throws Exception {
    // Virtual label models
    debug("Building calibration label models");
    virtualLabelModels = new BinaryRelevance(getBaseClassifier());
    virtualLabelModels.setDebug(getDebug());
    virtualLabelModels.build(trainingSet);

    // One-vs-one models
    numModels = ((numLabels) * (numLabels - 1)) / 2;
    oneVsOneModels = AbstractClassifier.makeCopies(getBaseClassifier(), numModels);
    nodata = new boolean[numModels];
    metaDataTest = new Instances[numModels];

    ArrayList<MultiLabelOutput> predictions;
    predictions = predictLabels(trainingSet);

    int counter = 0;
    // Creation of one-vs-one models
    for (int label1 = 0; label1 < numLabels - 1; label1++) {
        for (int label2 = label1 + 1; label2 < numLabels; label2++) {
            //Generate the chain: Test the same dataset
            MultiLabelInstances tempTrainingSet = GenerateChain(trainingSet, label1, label2, predictions);

            Instances trainingData = tempTrainingSet.getDataSet();

            labelIndices = tempTrainingSet.getLabelIndices();
            featureIndices = tempTrainingSet.getFeatureIndices();

            // Attribute of label 1
            Attribute attrLabel1 = trainingData.attribute(labelIndices[label1]);

            debug("Building one-vs-one model " + (counter + 1) + "/" + numModels);
            // Attribute of label 2
            Attribute attrLabel2 = trainingData.attribute(labelIndices[label2]);

            // initialize training set
            Instances dataOneVsOne = new Instances(trainingData, 0);
            // filter out examples with no preference
            for (int i = 0; i < trainingData.numInstances(); i++) {
                Instance tempInstance;/*ww  w .j a  v  a2 s. co m*/
                if (trainingData.instance(i) instanceof SparseInstance) {
                    tempInstance = new SparseInstance(trainingData.instance(i));
                } else {
                    tempInstance = new DenseInstance(trainingData.instance(i));
                }

                int nominalValueIndex;
                nominalValueIndex = (int) tempInstance.value(labelIndices[label1]);
                String value1 = attrLabel1.value(nominalValueIndex);
                nominalValueIndex = (int) tempInstance.value(labelIndices[label2]);
                String value2 = attrLabel2.value(nominalValueIndex);

                if (!value1.equals(value2)) {
                    tempInstance.setValue(attrLabel1, value1);
                    dataOneVsOne.add(tempInstance);
                }
            }

            // remove all labels apart from label1 and place it at the end
            Reorder filter = new Reorder();
            int numPredictors = trainingData.numAttributes() - numLabels;
            int[] reorderedIndices = new int[numPredictors + 1];
            System.arraycopy(featureIndices, 0, reorderedIndices, 0, numPredictors);
            reorderedIndices[numPredictors] = labelIndices[label1];
            filter.setAttributeIndicesArray(reorderedIndices);
            filter.setInputFormat(dataOneVsOne);
            dataOneVsOne = Filter.useFilter(dataOneVsOne, filter);
            //System.out.println(dataOneVsOne.toString());
            dataOneVsOne.setClassIndex(numPredictors);

            // build model label1 vs label2
            if (dataOneVsOne.size() > 0) {
                oneVsOneModels[counter].buildClassifier(dataOneVsOne);
            } else {
                nodata[counter] = true;
            }
            dataOneVsOne.delete();
            metaDataTest[counter] = dataOneVsOne;
            counter++;
        }
    }
}

From source file:nlpmusic.StringClusterer.java

public ArrayList<ArrayList<String>> cluster(ArrayList<String> tem) throws Exception {
    Instances source = listLoad(tem);//w ww  . ja  va 2 s.  co  m

    StringToWordVector vect = new StringToWordVector();
    vect.setWordsToKeep(to_keep);
    vect.setInputFormat(source);
    Instances datas = Filter.useFilter(source, vect);
    //vect.setDoNotOperateOnPerClassBasis(true);        
    //System.out.println("ASDASD" + vect.wordsToKeepTipText());
    //System.out.println(datas.numAttributes());
    //System.out.println("ASDASD" + vect.getWordsToKeep());
    DBSCAN clusterer = new DBSCAN();
    clusterer.setEpsilon(threshold);
    clusterer.setMinPoints(min_points);

    clusterer.buildClusterer(datas);

    ArrayList<ArrayList<String>> ret = new ArrayList<>();

    for (int i = 0; i < clusterer.numberOfClusters(); i++) {
        ArrayList<String> to_add = new ArrayList<>();
        //System.out.println(i);
        for (int j = 0; j < datas.size(); j++) {
            try {
                if (clusterer.clusterInstance(datas.get(j)) == i)
                    //System.out.println("* " + source.get(j).toString() + " *");
                    to_add.add(source.get(j).toString());
            } catch (Exception e) {
                //e.printStackTrace();
            }
        }
        ret.add(to_add);
    }
    return ret;
}

From source file:predictors.HelixIndexer.java

License:Open Source License

/**
 * Trains the Weka Classifer./*from  w  w w  .j a  va  2s. c  o m*/
 */
public void trainClassifier() {
    try {
        RandomForest classifier = new weka.classifiers.trees.RandomForest();
        Instances data = this.dataset;

        if (data.classIndex() == -1) {
            data.setClassIndex(data.numAttributes() - 1);
        }

        data.randomize(new Random(data.size()));

        String[] optClassifier = weka.core.Utils.splitOptions("-I 100 -K 9 -S 1 -num-slots 3");

        classifier.setOptions(optClassifier);
        classifier.setSeed(data.size());

        classifier.buildClassifier(data);

        this.classifier = classifier;
        this.isTrained = true;
    } catch (Exception e) {
        ErrorUtils.printError(HelixIndexer.class, "Training failed", e);
    }
}

From source file:predictors.HelixPredictor.java

License:Open Source License

/**
 * Trains the Weka Classifer.//ww  w .  ja  va2 s  .  c o  m
 */
public void trainClassifier() {
    try {
        MultilayerPerceptron classifier = new weka.classifiers.functions.MultilayerPerceptron();
        Instances data = this.dataset;

        if (data.classIndex() == -1) {
            data.setClassIndex(data.numAttributes() - 1);
        }

        data.randomize(new Random(data.size()));

        String[] optClassifier = weka.core.Utils
                .splitOptions("-L 0.01 -M 0.8 -N 256 -V 20 -S 0 -E 5 -H 25 -B -I -D -C");

        classifier.setOptions(optClassifier);
        classifier.setSeed(data.size());

        classifier.buildClassifier(data);

        this.classifier = classifier;
        this.isTrained = true;
    } catch (Exception e) {
        ErrorUtils.printError(HelixPredictor.class, "Training failed", e);
    }
}

From source file:predictors.TopologyPredictor.java

License:Open Source License

/**
 * Trains the Weka Classifer./*w  w  w  .  j  a v a2 s .  c  om*/
 */
public void trainClassifier() {
    try {
        RandomForest classifier = new weka.classifiers.trees.RandomForest();
        Instances data = this.dataset;

        if (data.classIndex() == -1) {
            data.setClassIndex(data.numAttributes() - 1);
        }

        data.randomize(new Random(data.size()));

        String[] optClassifier = weka.core.Utils.splitOptions("-I 100 -K 7 -S 1 -num-slots 1");

        classifier.setOptions(optClassifier);
        classifier.setSeed(data.size());

        classifier.buildClassifier(data);

        this.classifier = classifier;
        this.isTrained = true;
    } catch (Exception e) {
        ErrorUtils.printError(TopologyPredictor.class, "Training failed", e);
    }
}

From source file:sentinets.Prediction.java

License:Open Source License

public void writeStats(Instances tweetInstances) {
    //TweetCorpusStatistics stats = new TweetCorpusStatistics();
    System.out.println("Stats Instances: \n" + tweetInstances.toSummaryString());
    for (int i = 0; i < tweetInstances.size(); i++) {
        String user = tweetInstances.get(i).stringValue(11 - 1);
        String mentions = tweetInstances.get(i).stringValue(3 - 1);
        String hashtags = tweetInstances.get(i).stringValue(14 - 1);
        String epClass = tweetInstances.get(i).stringValue(15 - 1);
        String snsClass = tweetInstances.get(i).stringValue(16 - 1);
        System.out.println("Tweet Details:\t" + user + "\t" + mentions + "\t" + hashtags + "\t"
                + printDist(classDist.get(i)));
        //stats.updateStatistics(user, mentions, hashtags, epClass+","+snsClass, classDist.get(i));
    }/*  w  w  w  .  ja v a2s. co m*/
}