Example usage for weka.core Instances numInstances

Introduction

In this page you can find the example usage for weka.core Instances numInstances.

Prototype


publicint numInstances()

Source Link

Document

Returns the number of instances in the dataset.

Usage

From source file:c4.pkg5crossv.Classifier.java

public static void trainAndTest() throws FileNotFoundException, IOException, Exception {

    Instances data = DataLoad.loadData("./src/data/irysy.arff");
    data.setClassIndex(data.numAttributes() - 1);

    //Losowy podzial tablicy
    data.randomize(new Random());
    double percent = 60.0;
    int trainSize = (int) Math.round(data.numInstances() * percent / 100);
    int testSize = data.numInstances() - trainSize;
    Instances trainData = new Instances(data, 0, trainSize);
    Instances testData = new Instances(data, trainSize, testSize);

    String[] options = Utils.splitOptions("-U -M 10");
    J48 tree = new J48();
    tree.setOptions(options);//from  w  ww  .  j  a va2s  .co m
    tree.buildClassifier(trainData);

    Evaluation eval2 = new Evaluation(trainData);
    eval2.crossValidateModel(tree, testData, 10, new Random(1)); // 5 - fold
    System.out.println(eval2.toSummaryString("Wyniki:", false)); //Wypisanie testovania cross validation
}

From source file:ca.uqac.florentinth.speakerauthentication.Learning.Learning.java

License:Apache License

public void trainClassifier(Classifier classifier, FileReader trainingDataset, FileOutputStream trainingModel,
        Integer crossValidationFoldNumber) throws Exception {
    Instances instances = new Instances(new BufferedReader(trainingDataset));

    switch (classifier) {
    case KNN://from ww w  .  j av  a2 s .  com
        int K = (int) Math.ceil(Math.sqrt(instances.numInstances()));
        this.classifier = new IBk(K);
        break;
    case NB:
        this.classifier = new NaiveBayes();
    }

    if (instances.classIndex() == -1) {
        instances.setClassIndex(instances.numAttributes() - 1);
    }

    this.classifier.buildClassifier(instances);

    if (crossValidationFoldNumber > 0) {
        Evaluation evaluation = new Evaluation(instances);
        evaluation.crossValidateModel(this.classifier, instances, crossValidationFoldNumber, new Random(1));
        kappa = evaluation.kappa();
        fMeasure = evaluation.weightedFMeasure();
        confusionMatrix = evaluation.toMatrixString("Confusion matrix: ");
    }

    ObjectOutputStream outputStream = new ObjectOutputStream(trainingModel);
    outputStream.writeObject(this.classifier);
    outputStream.flush();
    outputStream.close();
}

From source file:ca.uqac.florentinth.speakerauthentication.Learning.Learning.java

License:Apache License

public Map<String, String> makePrediction(String username, FileInputStream trainingModel,
        FileReader testingDataset) throws Exception {
    Map<String, String> predictions = new HashMap<>();

    ObjectInputStream inputStream = new ObjectInputStream(trainingModel);
    weka.classifiers.Classifier classifier = (weka.classifiers.Classifier) inputStream.readObject();
    inputStream.close();/*from   w  w w .  j  av  a 2 s .  co  m*/

    Instances instances = new Instances(new BufferedReader(testingDataset));

    if (instances.classIndex() == -1) {
        instances.setClassIndex(instances.numAttributes() - 1);
    }

    int last = instances.numInstances() - 1;

    if (instances.instance(last).stringValue(instances.classIndex()).equals(username)) {
        double label = classifier.classifyInstance(instances.instance(last));
        instances.instance(last).setClassValue(label);
        predictions.put(username, instances.instance(last).stringValue(instances.classIndex()));
    }

    return predictions;
}

From source file:categorization.SpectralWEKA.java

License:Open Source License

/**
 * Generates a clusterer by the mean of spectral clustering algorithm.
 *
 * @param data set of instances serving as training data
 * @exception Exception if the clusterer has not been generated successfully
 *//* w ww .  j av  a2s  .c o m*/
public void buildClusterer(Instances data) throws java.lang.Exception {
    m_Sequences = new Instances(data);
    int n = data.numInstances();
    int k = data.numAttributes();
    DoubleMatrix2D w;
    if (useSparseMatrix)
        w = DoubleFactory2D.sparse.make(n, n);
    else
        w = DoubleFactory2D.dense.make(n, n);
    double[][] v1 = new double[n][];
    for (int i = 0; i < n; i++)
        v1[i] = data.instance(i).toDoubleArray();
    v = DoubleFactory2D.dense.make(v1);
    double sigma_sq = sigma * sigma;
    //Sets up similarity matrix
    for (int i = 0; i < n; i++)
        for (int j = i; j < n; j++) {
            /*double dist = distnorm2(v.viewRow(i), v.viewRow(j));
            if((r == -1) || (dist < r)) {
              double sim = Math.exp(- (dist * dist) / (2 * sigma_sq));
              w.set(i, j, sim);
              w.set(j, i, sim);
            }*/
            /* String [] key = {data.instance(i).stringValue(0), data.instance(j).stringValue(0)};
             System.out.println(key[0]);
             System.out.println(key[1]);
             System.out.println(simScoreMap.containsKey(key));
             Double simValue = simScoreMap.get(key);*/

            double sim = sim_matrix[i][j];
            w.set(i, j, sim);
            w.set(j, i, sim);
        }

    //Partitions points
    int[][] p = partition(w, alpha_star);

    //Deploys results
    numOfClusters = p.length;
    cluster = new int[n];
    for (int i = 0; i < p.length; i++)
        for (int j = 0; j < p[i].length; j++)
            cluster[p[i][j]] = i;

    //System.out.println("Final partition:");
    // UtilsJS.printMatrix(p);
    // System.out.println("Cluster:\n");
    // UtilsJS.printArray(cluster);
    this.numOfClusters = cluster[Utils.maxIndex(cluster)] + 1;
    //  System.out.println("Num clusters:\t"+this.numOfClusters);
}

From source file:cba.Apriori.java

License:Open Source License

/**
 * Removes columns that are all missing from the data
 * @param instances the instances/*from w  w  w .  ja  v a 2s. c  om*/
 * @return a new set of instances with all missing columns removed
 * @throws Exception if something goes wrong
 */
protected Instances removeMissingColumns(Instances instances) throws Exception {

    int numInstances = instances.numInstances();
    StringBuffer deleteString = new StringBuffer();
    int removeCount = 0;
    boolean first = true;
    int maxCount = 0;

    for (int i = 0; i < instances.numAttributes(); i++) {
        AttributeStats as = instances.attributeStats(i);
        if (m_upperBoundMinSupport == 1.0 && maxCount != numInstances) {
            // see if we can decrease this by looking for the most frequent value
            int[] counts = as.nominalCounts;
            if (counts[Utils.maxIndex(counts)] > maxCount) {
                maxCount = counts[Utils.maxIndex(counts)];
            }
        }
        if (as.missingCount == numInstances) {
            if (first) {
                deleteString.append((i + 1));
                first = false;
            } else {
                deleteString.append("," + (i + 1));
            }
            removeCount++;
        }
    }
    if (m_verbose) {
        System.err.println("Removed : " + removeCount + " columns with all missing " + "values.");
    }
    if (m_upperBoundMinSupport == 1.0 && maxCount != numInstances) {
        m_upperBoundMinSupport = (double) maxCount / (double) numInstances;
        if (m_verbose) {
            System.err.println("Setting upper bound min support to : " + m_upperBoundMinSupport);
        }
    }

    if (deleteString.toString().length() > 0) {
        Remove af = new Remove();
        af.setAttributeIndices(deleteString.toString());
        af.setInvertSelection(false);
        af.setInputFormat(instances);
        Instances newInst = Filter.useFilter(instances, af);

        return newInst;
    }
    return instances;
}

From source file:cba.AprioriItemSet.java

License:Open Source License

/**
 * Converts the header info of the given set of instances into a set 
 * of item sets (singletons). The ordering of values in the header file 
 * determines the lexicographic order.//from   w w  w  .j  ava2 s  .co m
 *
 * @param instances the set of instances whose header info is to be used
 * @return a set of item sets, each containing a single item
 * @exception Exception if singletons can't be generated successfully
 */
public static FastVector singletons(Instances instances, boolean treatZeroAsMissing) throws Exception {

    FastVector setOfItemSets = new FastVector();
    ItemSet current;

    for (int i = 0; i < instances.numAttributes(); i++) {
        if (instances.attribute(i).isNumeric())
            throw new Exception("Can't handle numeric attributes!");
        int j = (treatZeroAsMissing) ? 1 : 0;
        for (; j < instances.attribute(i).numValues(); j++) {
            current = new AprioriItemSet(instances.numInstances());
            current.setTreatZeroAsMissing(treatZeroAsMissing);
            current.m_items = new int[instances.numAttributes()];
            for (int k = 0; k < instances.numAttributes(); k++)
                current.m_items[k] = -1;
            current.m_items[i] = j;
            setOfItemSets.addElement(current);
        }
    }
    return setOfItemSets;
}

From source file:cba.ItemSet.java

License:Open Source License

/**
 * Converts the header info of the given set of instances into a set 
 * of item sets (singletons). The ordering of values in the header file 
 * determines the lexicographic order.// w  w  w .  j  a va  2 s . c om
 *
 * @param instances the set of instances whose header info is to be used
 * @return a set of item sets, each containing a single item
 * @exception Exception if singletons can't be generated successfully
 */
public static FastVector singletons(Instances instances) throws Exception {

    FastVector setOfItemSets = new FastVector();
    ItemSet current;

    for (int i = 0; i < instances.numAttributes(); i++) {
        if (instances.attribute(i).isNumeric())
            throw new Exception("Can't handle numeric attributes!");
        for (int j = 0; j < instances.attribute(i).numValues(); j++) {
            current = new ItemSet(instances.numInstances());
            current.m_items = new int[instances.numAttributes()];
            for (int k = 0; k < instances.numAttributes(); k++)
                current.m_items[k] = -1;
            current.m_items[i] = j;

            setOfItemSets.addElement(current);
        }
    }
    return setOfItemSets;
}

From source file:cba.ItemSet.java

License:Open Source License

/**
 * Updates counters for a set of item sets and a set of instances.
 *
 * @param itemSets the set of item sets which are to be updated
 * @param instances the instances to be used for updating the counters
 *///from   w w w.  ja va  2 s  . c o  m
public static void upDateCounters(FastVector itemSets, Instances instances) {

    for (int i = 0; i < instances.numInstances(); i++) {
        Enumeration enu = itemSets.elements();
        while (enu.hasMoreElements())
            ((ItemSet) enu.nextElement()).upDateCounter(instances.instance(i));
    }
}

From source file:cba.RuleItem.java

License:Open Source License

/**
 * Constructs a new RuleItem if the support of the given rule is above the support threshold.
 * @param premise the premise/*from   w  ww.  ja  va  2 s.  c  o  m*/
 * @param consequence the consequence
 * @param instances the instances
 * @param genTime the time of generation of the current premise and consequence
 * @param minRuleCount the support threshold
 * @param m_midPoints the mid points of the intervals
 * @param m_priors the estimated priori probabilities (in a hashtable)
 * @return a RuleItem if its support is above the threshold, null otherwise
 */
public RuleItem generateRuleItem(ItemSet premise, ItemSet consequence, Instances instances, int genTime,
        int minRuleCount, double[] m_midPoints, Hashtable m_priors) {
    ItemSet rule = new ItemSet(instances.numInstances());
    rule.m_items = new int[(consequence.m_items).length];
    System.arraycopy(premise.m_items, 0, rule.m_items, 0, (premise.m_items).length);
    for (int k = 0; k < consequence.m_items.length; k++) {
        if (consequence.m_items[k] != -1)
            rule.m_items[k] = consequence.m_items[k];
    }
    for (int i = 0; i < instances.numInstances(); i++)
        rule.upDateCounter(instances.instance(i));
    int ruleSupport = rule.support();
    if (ruleSupport > minRuleCount) {
        RuleItem newRule = new RuleItem(premise, consequence, genTime, ruleSupport, m_midPoints, m_priors);
        return newRule;
    }
    return null;
}

From source file:cerebro.Id3.java

License:Open Source License

/**
 * Method for building an Id3 tree.//w  ww . jav  a2  s.c  o  m
 *
 * @param data the training data
 * @exception Exception if decision tree can't be built successfully
 */
private void makeTree(Instances data) throws Exception {

    // Check if no instances have reached this node.
    if (data.numInstances() == 0) {
        m_Attribute = null;
        m_ClassValue = Instance.missingValue();
        m_Distribution = new double[data.numClasses()];
        return;
    }

    // Compute attribute with maximum information gain.
    double[] infoGains = new double[data.numAttributes()];
    Enumeration attEnum = data.enumerateAttributes();
    while (attEnum.hasMoreElements()) {
        Attribute att = (Attribute) attEnum.nextElement();
        infoGains[att.index()] = computeInfoGain(data, att);
    }
    m_Attribute = data.attribute(Utils.maxIndex(infoGains));

    // Make leaf if information gain is zero.
    // Otherwise create successors.
    if (Utils.eq(infoGains[m_Attribute.index()], 0)) {
        m_Attribute = null;
        m_Distribution = new double[data.numClasses()];
        Enumeration instEnum = data.enumerateInstances();
        while (instEnum.hasMoreElements()) {
            Instance inst = (Instance) instEnum.nextElement();
            m_Distribution[(int) inst.classValue()]++;
        }
        Utils.normalize(m_Distribution);
        m_ClassValue = Utils.maxIndex(m_Distribution);
        m_ClassAttribute = data.classAttribute();
    } else {
        Instances[] splitData = splitData(data, m_Attribute);
        m_Successors = new Id3[m_Attribute.numValues()];
        for (int j = 0; j < m_Attribute.numValues(); j++) {
            m_Successors[j] = new Id3();
            m_Successors[j].makeTree(splitData[j]);
        }
    }
}