Example usage for weka.core Instances size

List of usage examples for weka.core Instances size

Introduction

In this page you can find the example usage for weka.core Instances size.

Prototype


@Override
publicint size() 

Source Link

Document

Returns the number of instances in the dataset.

Usage

From source file:sentinets.Prediction.java

License:Open Source License

public String updateModel(String inputFile, ArrayList<Double[]> metrics) {
    String output = "";
    this.setInstances(inputFile);
    FilteredClassifier fcls = (FilteredClassifier) this.cls;
    SGD cls = (SGD) fcls.getClassifier();
    Filter filter = fcls.getFilter();
    Instances insAll;
    try {/* ww w  .  j ava  2s. c  o  m*/
        insAll = Filter.useFilter(this.unlabled, filter);
        if (insAll.size() > 0) {
            Random rand = new Random(10);
            int folds = 10 > insAll.size() ? 2 : 10;
            Instances randData = new Instances(insAll);
            randData.randomize(rand);
            if (randData.classAttribute().isNominal()) {
                randData.stratify(folds);
            }
            Evaluation eval = new Evaluation(randData);
            eval.evaluateModel(cls, insAll);
            System.out.println("Initial Evaluation");
            System.out.println(eval.toSummaryString());
            System.out.println(eval.toClassDetailsString());
            metrics.add(new Double[] { eval.fMeasure(0), eval.fMeasure(1), eval.weightedFMeasure() });
            output += "\n====" + "Initial Evaluation" + "====\n";
            output += "\n" + eval.toSummaryString();
            output += "\n" + eval.toClassDetailsString();
            System.out.println("Cross Validated Evaluation");
            output += "\n====" + "Cross Validated Evaluation" + "====\n";
            for (int n = 0; n < folds; n++) {
                Instances train = randData.trainCV(folds, n);
                Instances test = randData.testCV(folds, n);

                for (int i = 0; i < train.numInstances(); i++) {
                    cls.updateClassifier(train.instance(i));
                }

                eval.evaluateModel(cls, test);
                System.out.println("Cross Validated Evaluation fold: " + n);
                output += "\n====" + "Cross Validated Evaluation fold (" + n + ")====\n";
                System.out.println(eval.toSummaryString());
                System.out.println(eval.toClassDetailsString());
                output += "\n" + eval.toSummaryString();
                output += "\n" + eval.toClassDetailsString();
                metrics.add(new Double[] { eval.fMeasure(0), eval.fMeasure(1), eval.weightedFMeasure() });
            }
            for (int i = 0; i < insAll.numInstances(); i++) {
                cls.updateClassifier(insAll.instance(i));
            }
            eval.evaluateModel(cls, insAll);
            System.out.println("Final Evaluation");
            System.out.println(eval.toSummaryString());
            System.out.println(eval.toClassDetailsString());
            output += "\n====" + "Final Evaluation" + "====\n";
            output += "\n" + eval.toSummaryString();
            output += "\n" + eval.toClassDetailsString();
            metrics.add(new Double[] { eval.fMeasure(0), eval.fMeasure(1), eval.weightedFMeasure() });
            fcls.setClassifier(cls);
            String modelFilePath = outputDir + "/" + Utils.getOutDir(Utils.OutDirIndex.MODELS)
                    + "/updatedClassifier.model";
            weka.core.SerializationHelper.write(modelFilePath, fcls);
            output += "\n" + "Updated Model saved at: " + modelFilePath;
        } else {
            output += "No new instances for training the model.";
        }
    } catch (Exception e) {
        e.printStackTrace();
    }
    return output;
}

From source file:src.BigDataClassifier.GenerateFolds.java

public void generateFolds(Instances trainDataset) throws Exception {

    //randomize data
    Random rand = new Random(1);
    //set folds/*from  w  w w.j a v a 2  s . c  o m*/
    int folds = 3;
    //create random dataset
    Instances randData = new Instances(trainDataset);
    randData.randomize(rand);

    Instances[] result = new Instances[folds * 2];
    //cross-validate
    for (int n = 0; n < folds; n++) {
        trainDataset = randData.trainCV(folds, n);
        System.out.println("Train dataset size is = " + trainDataset.size());
        Instances testDataset = randData.testCV(folds, n);
        System.out.println("Test dataset size is = " + testDataset.size());
        result[n] = trainDataset;
        result[n + 1] = testDataset;
        trainDataset2 = trainDataset;
        testDataset2 = testDataset;
    }
    trainDatasetSize = trainDataset2.size();
    testDatasetSize = testDataset2.size();
}

From source file:test.org.moa.opencl.IBk.java

License:Open Source License

/**
 * Calculates the class membership probabilities for the given test instance.
 *
 * @param instance the instance to be classified
 * @return predicted class probability distribution
 * @throws Exception if an error occurred during the prediction
 *///from www . j a v a 2  s  .c  o  m
public double[] distributionForInstance(Instance instance) throws Exception {

    if (m_Train.numInstances() == 0) {
        //throw new Exception("No training instances!");
        return m_defaultModel.distributionForInstance(instance);
    }
    if ((m_WindowSize > 0) && (m_Train.numInstances() > m_WindowSize)) {
        m_kNNValid = false;
        boolean deletedInstance = false;
        while (m_Train.numInstances() > m_WindowSize) {
            m_Train.delete(0);
        }
        //rebuild datastructure KDTree currently can't delete
        if (deletedInstance == true)
            m_NNSearch.setInstances(m_Train);
    }

    // Select k by cross validation
    if (!m_kNNValid && (m_CrossValidate) && (m_kNNUpper >= 1)) {
        crossValidate();
    }

    m_NNSearch.addInstanceInfo(instance);

    Instances neighbours = m_NNSearch.kNearestNeighbours(instance, m_kNN);
    double[] distances = m_NNSearch.getDistances();

    System.out.print("distances weka ");
    for (int i = 0; i < distances.length; ++i)
        System.out.print(" " + distances[i]);
    System.out.println();
    System.out.println("Neighbours");
    for (int i = 0; i < neighbours.size(); ++i)
        System.out.println(neighbours.get(i));
    double[] distribution = makeDistribution(neighbours, distances);

    return distribution;
}

From source file:tr.gov.ulakbim.jDenetX.experiments.wrappers.EvalActiveBoostingID.java

License:Open Source License

protected void selfTrain(Instance testInst) {
    int maxInstances = this.maxInstancesOption.getValue();
    int poolSizeRatio = poolSizeOption.getValue();
    int poolLimit = maxInstances / poolSizeRatio;
    int poolCount = 0;
    VotedInstancePool vInstPool = SelfOzaBoostID.getVotedInstancePool();
    noOfClassesInPool = vInstPool.getNoOfClasses();

    System.out.println("No of instances in the pool: " + vInstPool.getSize());
    System.out.println("No of classes in the pool: " + noOfClassesInPool);

    if (vInstPool.getSize() > 10) {
        ArrayList<Attribute> attrs = new ArrayList<Attribute>();
        for (int i = 0; i < testInst.numAttributes(); i++) {
            attrs.add(testInst.attribute(i));
        }//w w  w  .  j  a  va  2 s.co m
        Instances instances = new Instances("instances", attrs, vInstPool.getSize());
        Iterator instanceIt = vInstPool.iterator();
        System.out.println("Size of pool: " + vInstPool.getSize());

        while (instanceIt.hasNext() && poolCount < poolLimit) {
            VotedInstance vInstance = (VotedInstance) instanceIt.next();
            ((Instances) instances).add(vInstance.getInstance());
            poolCount++;
        }

        System.out.println("Size of instances: " + instances.size());
        instances = clusterInstances(instances);
        InstanceStream activeStream = new CachedInstancesStream((Instances) instances);

        System.out.println("Selftraining have been started");
        System.out.println("Number of self training instances: " + instances.numInstances());

        long treeSize = vInstPool.getSize();
        long limit = treeSize / SAMPLING_LIMIT;
        Instance inst = null;

        for (long j = 0; j < limit && activeStream.hasMoreInstances(); j++) {
            inst = activeStream.nextInstance();
            if (inst.numAttributes() == attrs.size()) {
                model.trainOnInstance(inst);
            }
        }
    }

}

From source file:tr.gov.ulakbim.jDenetX.experiments.wrappers.EvalActiveBoostingID.java

License:Open Source License

public static Instances clusterInstances(Instances data) {
    XMeans xmeans = new XMeans();
    Remove filter = new Remove();
    Instances dataClusterer = null;/*from  ww  w.j  a  v a2s. com*/
    if (data == null) {
        throw new NullPointerException("Data is null at clusteredInstances method");
    }
    //Get the attributes from the data for creating the sampled_data object

    ArrayList<Attribute> attrList = new ArrayList<Attribute>();
    Enumeration attributes = data.enumerateAttributes();
    while (attributes.hasMoreElements()) {
        attrList.add((Attribute) attributes.nextElement());
    }

    Instances sampled_data = new Instances(data.relationName(), attrList, 0);
    data.setClassIndex(data.numAttributes() - 1);
    sampled_data.setClassIndex(data.numAttributes() - 1);
    filter.setAttributeIndices("" + (data.classIndex() + 1));
    data.remove(0);//In Wavelet Stream of MOA always the first element comes without class

    try {
        filter.setInputFormat(data);
        dataClusterer = Filter.useFilter(data, filter);
        String[] options = new String[4];
        options[0] = "-L"; // max. iterations
        options[1] = Integer.toString(noOfClassesInPool - 1);
        if (noOfClassesInPool > 2) {
            options[1] = Integer.toString(noOfClassesInPool - 1);
            xmeans.setMinNumClusters(noOfClassesInPool - 1);
        } else {
            options[1] = Integer.toString(noOfClassesInPool);
            xmeans.setMinNumClusters(noOfClassesInPool);
        }
        xmeans.setMaxNumClusters(data.numClasses() + 1);
        System.out.println("No of classes in the pool: " + noOfClassesInPool);
        xmeans.setUseKDTree(true);
        //xmeans.setOptions(options);
        xmeans.buildClusterer(dataClusterer);
        System.out.println("Xmeans\n:" + xmeans);
    } catch (Exception e) {
        e.printStackTrace();
    }
    //System.out.println("Assignments\n: " + assignments);
    ClusterEvaluation eval = new ClusterEvaluation();
    eval.setClusterer(xmeans);
    try {
        eval.evaluateClusterer(data);
        int classesToClustersMap[] = eval.getClassesToClusters();
        //check the classes to cluster map
        int clusterNo = 0;
        for (int i = 0; i < data.size(); i++) {
            clusterNo = xmeans.clusterInstance(dataClusterer.get(i));
            //Check if the class value of instance and class value of cluster matches
            if ((int) data.get(i).classValue() == classesToClustersMap[clusterNo]) {
                sampled_data.add(data.get(i));
            }
        }
    } catch (Exception e) {
        e.printStackTrace();
    }
    return ((Instances) sampled_data);
}

From source file:utils.AttributePairsUtils.java

License:Open Source License

/**
 * Get pairs of attributes//  ww w. ja v a2  s. c o m
 * 
 * @param dataset Dataset
 * @return List of pairs
 */
public static ArrayList<AttributesPair> getAttributePairs(MultiLabelInstances dataset) {
    Instances instances = dataset.getDataSet();

    //Return possible combinations among labels
    int possibleCombinations = getPossibleCombinations(dataset.getNumLabels());

    int[] labelPairAppearances = new int[possibleCombinations];
    int[] currentLabelValues;
    int[] labelIndices = dataset.getLabelIndices();

    for (int i = 0; i < instances.size(); i++) {
        currentLabelValues = DataInfoUtils.getCurrentValueLabels(instances, i, labelIndices);
        labelPairAppearances = updateAttributePairs(labelPairAppearances, currentLabelValues);
    }

    return makeAttributePairs(labelPairAppearances, labelIndices, dataset);
}

From source file:utils.DataInfoUtils.java

License:Open Source License

/**
 * Get label frequency given the index/* www . ja v  a 2s.  c  om*/
 * 
 * @param dataset Dataset
 * @param labelIndex Label index
 * @return Frequency of label
 */
public static double getLabelFrequency(MultiLabelInstances dataset, int labelIndex) {
    double value = 0.0;

    Instances instances = dataset.getDataSet();

    double isLabel;

    for (int i = 0; i < instances.size(); i++) {
        isLabel = instances.instance(i).value(labelIndex);
        if (isLabel == 1.0) {
            value++;
        }
    }

    return value / dataset.getNumInstances();
}

From source file:utils.MetricUtils.java

License:Open Source License

/**
 * Obtain labels ordered by IR inter class
 * //from  w  ww  .  j  a v a  2s  .c om
 * @param dataset Dataset
 * @param labelsByFrequency Labels
 * @return Labels sorted by IR inter class
 */
public static ImbalancedFeature[] getImbalancedDataByIRInterClass(MultiLabelInstances dataset,
        ImbalancedFeature[] labelsByFrequency) {
    int[] labelIndices = dataset.getLabelIndices();

    ImbalancedFeature[] imbalancedData = new ImbalancedFeature[labelIndices.length];

    Instances instances = dataset.getDataSet();

    int n1 = 0, n0 = 0, maxAppearance;
    double is, IRIntraClass, variance, IRInterClass;
    double mean = dataset.getNumInstances() / 2;

    Attribute currentAttribute;
    ImbalancedFeature currentLabel;

    for (int i = 0; i < labelIndices.length; i++) {
        currentAttribute = instances.attribute(labelIndices[i]);

        for (int j = 0; j < instances.size(); j++) {
            is = instances.instance(j).value(currentAttribute);
            if (is == 1.0) {
                n1++;
            } else {
                n0++;
            }
        }

        try {
            if (n0 == 0 || n1 == 0) {
                IRIntraClass = 0;
            } else if (n0 > n1) {
                IRIntraClass = n0 / (n1 * 1.0);
            } else {
                IRIntraClass = n1 / (n0 * 1.0);
            }
        } catch (Exception e1) {
            e1.printStackTrace();
            IRIntraClass = 0;
        }

        variance = (Math.pow((n0 - mean), 2) + Math.pow((n1 - mean), 2)) / 2;

        currentLabel = getLabelByLabelname(currentAttribute.name(), labelsByFrequency);

        maxAppearance = labelsByFrequency[0].getAppearances();

        if (currentLabel.getAppearances() <= 0) {
            IRInterClass = Double.NaN;
        } else {
            IRInterClass = maxAppearance / (currentLabel.getAppearances() * 1.0);
        }

        imbalancedData[i] = new ImbalancedFeature(currentAttribute.name(), currentLabel.getAppearances(),
                IRIntraClass, variance, IRInterClass);

        n0 = 0;
        n1 = 0;
    }

    return imbalancedData;
}

From source file:utils.MetricUtils.java

License:Open Source License

/**
 * Obtain labels as ImbalancedFeature objects
 * //from w ww .jav a  2s.  co  m
 * @param dataset Datasets
 * @return Labels as ImbalanceFeature array
 */
public static ImbalancedFeature[] getImbalancedData(MultiLabelInstances dataset) {
    int[] labelIndices = dataset.getLabelIndices();

    ImbalancedFeature[] imbalancedData = new ImbalancedFeature[labelIndices.length];

    Instances instances = dataset.getDataSet();

    int n1 = 0, n0 = 0;
    double is, IR, variance;
    double mean = dataset.getNumInstances() / 2;

    Attribute current;

    for (int i = 0; i < labelIndices.length; i++) {
        current = instances.attribute(labelIndices[i]);

        for (int j = 0; j < instances.size(); j++) {
            is = instances.instance(j).value(current);
            if (is == 1.0) {
                n1++;
            } else {
                n0++;
            }
        }
        try {
            if (n0 == 0 || n1 == 0) {
                IR = 0;
            } else if (n0 > n1) {
                IR = n0 / (n1 * 1.0);
            } else {
                IR = n1 / (n0 * 1.0);
            }
        } catch (Exception e1) {
            e1.printStackTrace();
            IR = 0;
        }

        variance = (Math.pow((n0 - mean), 2) + Math.pow((n1 - mean), 2)) / 2;

        imbalancedData[i] = new ImbalancedFeature(current.name(), IR, variance);

        n0 = 0;
        n1 = 0;
    }

    return imbalancedData;
}

From source file:utils.MetricUtils.java

License:Open Source License

/**
 * Obtain labels ordered by number of appearances
 * /*from w w w. ja v  a  2s  .c  om*/
 * @param dataset Dataset
 * @return Labels as ImbalanceFeature objects
 */
public static ImbalancedFeature[] getImbalancedDataByAppearances(MultiLabelInstances dataset) {
    int[] labelIndices = dataset.getLabelIndices();

    ImbalancedFeature[] imbalancedData = new ImbalancedFeature[labelIndices.length];

    Instances instances = dataset.getDataSet();

    int appearances = 0;
    double is;
    Attribute current;

    for (int i = 0; i < labelIndices.length; i++) {
        current = instances.attribute(labelIndices[i]);

        for (int j = 0; j < instances.size(); j++) {
            is = instances.instance(j).value(current);
            if (is == 1.0) {
                appearances++;
            }
        }
        imbalancedData[i] = new ImbalancedFeature(current.name(), appearances);
        appearances = 0;
    }

    return imbalancedData;
}