Example usage for weka.core Instances numInstances

Introduction

In this page you can find the example usage for weka.core Instances numInstances.

Prototype


publicint numInstances()

Source Link

Document

Returns the number of instances in the dataset.

Usage

From source file:csav2.Weka_additive.java

public void classifyTestSet6(String input) throws Exception {
    String ids = "";
    ReaderWriter rw = new ReaderWriter();

    //ATTRIBUTES//from  ww  w.j  a v  a  2 s. com
    Attribute attr[] = new Attribute[50];

    //numeric
    attr[0] = new Attribute("Autosentiment");
    attr[1] = new Attribute("PositiveMatch");
    attr[2] = new Attribute("NegativeMatch");
    attr[3] = new Attribute("FW");
    attr[4] = new Attribute("JJ");
    attr[5] = new Attribute("RB");
    attr[6] = new Attribute("RB_JJ");
    attr[7] = new Attribute("amod");
    attr[8] = new Attribute("acomp");
    attr[9] = new Attribute("advmod");
    attr[10] = new Attribute("BLPos");
    attr[11] = new Attribute("BLNeg");
    attr[12] = new Attribute("VSPos");
    attr[13] = new Attribute("VSNeg");

    //class
    FastVector classValue = new FastVector(3);
    classValue.addElement("p");
    classValue.addElement("n");
    classValue.addElement("o");
    attr[14] = new Attribute("answer", classValue);

    FastVector attrs = new FastVector();
    attrs.addElement(attr[0]);
    attrs.addElement(attr[1]);
    attrs.addElement(attr[2]);
    attrs.addElement(attr[3]);
    attrs.addElement(attr[4]);
    attrs.addElement(attr[5]);
    attrs.addElement(attr[6]);
    attrs.addElement(attr[7]);
    attrs.addElement(attr[8]);
    attrs.addElement(attr[9]);
    attrs.addElement(attr[10]);
    attrs.addElement(attr[11]);
    attrs.addElement(attr[12]);
    attrs.addElement(attr[13]);
    attrs.addElement(attr[14]);

    // Add Instances
    Instances dataset = new Instances("my_dataset", attrs, 0);

    StringTokenizer tokenizer = new StringTokenizer(input);

    while (tokenizer.hasMoreTokens()) {
        Instance example = new Instance(15);
        for (int j = 0; j < 15; j++) {
            String st = tokenizer.nextToken();
            System.out.println(j + " " + st);
            if (j == 0)
                example.setValue(attr[j], Float.parseFloat(st));
            else if (j == 14)
                example.setValue(attr[j], st);
            else
                example.setValue(attr[j], Integer.parseInt(st));
        }
        ids += tokenizer.nextToken() + "\t";
        dataset.add(example);
    }

    //Save dataset
    String file = "Classifier\\featurefile_additive_test6.arff";
    ArffSaver saver = new ArffSaver();
    saver.setInstances(dataset);
    saver.setFile(new File(file));
    saver.writeBatch();

    //Read dataset
    ArffLoader loader = new ArffLoader();
    loader.setFile(new File(file));
    dataset = loader.getDataSet();

    //Build classifier
    dataset.setClassIndex(14);

    //Read classifier back
    String file1 = "Classifier\\classifier_asAndpolarwordsAndposAnddepAndblAndvs.model";
    InputStream is = new FileInputStream(file1);
    Classifier classifier;
    ObjectInputStream objectInputStream = new ObjectInputStream(is);
    classifier = (Classifier) objectInputStream.readObject();

    //Evaluate
    Instances test = new Instances(dataset, 0, dataset.numInstances());
    test.setClassIndex(14);

    //Do eval
    Evaluation eval = new Evaluation(test); //trainset
    eval.evaluateModel(classifier, test); //testset
    System.out.println(eval.toSummaryString());
    System.out.println("WEIGHTED F-MEASURE:" + eval.weightedFMeasure());
    System.out.println("WEIGHTED PRECISION:" + eval.weightedPrecision());
    System.out.println("WEIGHTED RECALL:" + eval.weightedRecall());

    //output predictions
    String optest = "", val = "";
    StringTokenizer op = new StringTokenizer(ids);
    int count = 0;
    while (op.hasMoreTokens()) {
        double[] prediction = classifier.distributionForInstance(test.instance(count));
        count += 1;
        if (prediction[0] > prediction[1]) {
            if (prediction[0] > prediction[2]) {
                val = "p: " + Double.toString((double) Math.round((prediction[0]) * 1000) / 1000);
            } else {
                val = "o: " + Double.toString((double) Math.round((prediction[2]) * 1000) / 1000);
            }
        } else {
            if (prediction[1] > prediction[2]) {
                val = "n: " + Double.toString((double) Math.round((prediction[1]) * 1000) / 1000);
            } else {
                val = "o: " + Double.toString((double) Math.round((prediction[2]) * 1000) / 1000);
            }
        }
        optest += op.nextToken() + "\t" + val + "\n";
    }
    rw.writeToFile(optest, "Answers_additive_Test6", "txt");
}

From source file:cyber009.udal.functions.StatisticalAnalysis.java

/**
 * //w  w  w. j  a  v  a  2  s .  c om
 * @param classifier
 * @param trainingDataSet
 * @param unLabelDataSets
 * @param unLabelSet
 * @param classTarget
 * @return 
 */
public double conditionalEntropy(Classifier classifier, Instances trainingDataSet, Instances unLabelDataSets,
        Instance unLabelSet, double classTarget) {
    double cEnt = 0.0D;
    double entropy = 0.0D;
    unLabelSet.setClassValue(classTarget);
    trainingDataSet.add(trainingDataSet.numInstances(), unLabelSet);
    AttributeStats classStats = trainingDataSet.attributeStats(trainingDataSet.classIndex());
    for (Instance set : unLabelDataSets) {
        if (instanceCMPWithoutClass(set, unLabelSet) == true)
            continue;
        for (int i = 0; i < classStats.nominalCounts.length; i++) {
            double target = new Double(trainingDataSet.attribute(trainingDataSet.classIndex()).value(i));
            set.setClassValue(target);
            entropy = posteriorDistribution(classifier, trainingDataSet, set, classTarget);
            //System.out.println("entropy:"+entropy);
            cEnt += -(entropy) * Math.log10(entropy);
            set.setClassMissing();
        }
    }
    trainingDataSet.remove(trainingDataSet.numInstances() - 1);
    return cEnt;
}

From source file:data.generation.target.utils.PrincipalComponents.java

License:Open Source License

/**
 * Gets the transformed training data./*  www.  ja v a  2 s .  co  m*/
 * @return the transformed training data
 * @throws Exception if transformed data can't be returned
 */
public Instances transformedData(Instances data) throws Exception {
    if (m_eigenvalues == null) {
        throw new Exception("Principal components hasn't been built yet");
    }

    Instances output = null;

    if (m_transBackToOriginal) {
        output = new Instances(m_originalSpaceFormat);
    } else {
        output = new Instances(m_transformedFormat);
    }
    for (int i = 0; i < data.numInstances(); i++) {
        Instance converted = convertInstance(data.instance(i));
        output.add(converted);
    }

    return output;
}

From source file:data.statistics.MILStatistics.java

License:Open Source License

/**
 * Calculates various MIML statistics, such as instancesPerBag and
 * attributesPerBag//from  w  w w .j  av a 2s  .com
 * 
 * @param dataSet
 *            A MIL dataset
 */
public void calculateStats(Instances dataSet) {
    numBags = dataSet.numInstances();
    attributesPerBag = dataSet.instance(0).relationalValue(1).numAttributes();
    minInstancesPerBag = Integer.MAX_VALUE;
    maxInstancesPerBag = Integer.MIN_VALUE;

    // Each pair <Integer, Integer> stores <numberOfInstances, numberOfBags>
    distributionBags = new HashMap<Integer, Integer>();
    for (int i = 0; i < numBags; i++) {
        int nInstances = dataSet.instance(i).relationalValue(1).numInstances();
        if (nInstances < minInstancesPerBag) {
            minInstancesPerBag = nInstances;
        }
        if (nInstances > maxInstancesPerBag) {
            maxInstancesPerBag = nInstances;
        }
        if (distributionBags.containsKey(nInstances)) {
            distributionBags.put(nInstances, distributionBags.get(nInstances) + 1);
        } else {
            distributionBags.put(nInstances, 1);
        }
    }

    avgInstancesPerBag = 0.0;
    for (Integer set : distributionBags.keySet()) {
        avgInstancesPerBag += set * distributionBags.get(set);
    }
    avgInstancesPerBag = avgInstancesPerBag / numBags;
}

From source file:data.statistics.MLStatistics.java

License:Open Source License

/**
 * Calculates Phi and Chi-square correlation matrix.
 *
 * @param dataSet/*  ww  w. j av a2 s .  c o m*/
 *            A multi-label dataset.
 * @throws java.lang.Exception
 *             To be handled in an upper level.
 */
public void calculatePhiChi2(MultiLabelInstances dataSet) throws Exception {
    numLabels = dataSet.getNumLabels();

    // The indices of the label attributes
    int[] labelIndices;

    labelIndices = dataSet.getLabelIndices();
    numLabels = dataSet.getNumLabels();
    phi = new double[numLabels][numLabels];
    chi2 = new double[numLabels][numLabels];

    Remove remove = new Remove();
    remove.setInvertSelection(true);
    remove.setAttributeIndicesArray(labelIndices);
    remove.setInputFormat(dataSet.getDataSet());
    Instances result = Filter.useFilter(dataSet.getDataSet(), remove);
    result.setClassIndex(result.numAttributes() - 1);

    for (int i = 0; i < numLabels; i++) {
        int a[] = new int[numLabels];
        int b[] = new int[numLabels];
        int c[] = new int[numLabels];
        int d[] = new int[numLabels];
        double e[] = new double[numLabels];
        double f[] = new double[numLabels];
        double g[] = new double[numLabels];
        double h[] = new double[numLabels];
        for (int j = 0; j < result.numInstances(); j++) {
            for (int l = 0; l < numLabels; l++) {
                if (result.instance(j).stringValue(i).equals("0")) {
                    if (result.instance(j).stringValue(l).equals("0")) {
                        a[l]++;
                    } else {
                        c[l]++;
                    }
                } else {
                    if (result.instance(j).stringValue(l).equals("0")) {
                        b[l]++;
                    } else {
                        d[l]++;
                    }
                }
            }
        }
        for (int l = 0; l < numLabels; l++) {
            e[l] = a[l] + b[l];
            f[l] = c[l] + d[l];
            g[l] = a[l] + c[l];
            h[l] = b[l] + d[l];
            double mult = e[l] * f[l] * g[l] * h[l];
            double denominator = Math.sqrt(mult);
            double nominator = a[l] * d[l] - b[l] * c[l];
            phi[i][l] = nominator / denominator;
            chi2[i][l] = phi[i][l] * phi[i][l] * (a[l] + b[l] + c[l] + d[l]);
        }
    }
}

From source file:dataHandlers.DataClusterHandler.java

private int seedAmount(Instances dataPoints) {
    return dataPoints.numInstances();
}

From source file:dataHandlers.DataClusterHandler.java

private int clusterCount(Instances dataPoints) {
    int totalDataCount = dataPoints.numInstances();
    return (int) Math.sqrt(totalDataCount);
}

From source file:DataMiningLogHistoriKIRI.DecisionTree.java

public String[] id3(Instances arff) {
    J48 tree = new J48();
    try {/*  w ww .ja  v  a  2 s  .c  om*/
        tree.buildClassifier(arff);
    } catch (Exception ex) {
        Logger.getLogger(Controller.class.getName()).log(Level.SEVERE, null, ex);
    }
    System.out.println(tree.toString());

    int nilaiBenar = 0, resultInt;
    float result = 0;
    for (int i = 0; i < arff.numInstances(); i++) {
        try {
            result = (float) tree.classifyInstance(arff.instance(i));
            resultInt = Math.round(result);
            //System.out.println(dataAfterPreprocessing.get(i)[6] + " " + arff.instance(i).stringValue(6));
            if (resultInt == Integer.parseInt(arff.instance(i).stringValue(6))) {
                nilaiBenar++;
            }
        } catch (Exception ex) {
            Logger.getLogger(Controller.class.getName()).log(Level.SEVERE, null, ex);
        }
    }
    System.out.println("nilai: " + nilaiBenar + " " + arff.numInstances());
    double confident = nilaiBenar * 1.0 / arff.numInstances() * 100;
    System.out.println("Confident = " + confident + "%");

    String[] result2 = new String[5];
    return result2;
}

From source file:DataMiningLogHistoriKIRIPercobaan2.DecisionTree.java

public double calculateConfiden(Instances arff) {
    // mengecek confiden
    int nilaiBenar = 0, resultInt;
    float result = 0;
    for (int i = 0; i < arff.numInstances(); i++) {
        try {//from  ww w . j  a  va 2  s  . co m
            result = (float) tree.classifyInstance(arff.instance(i));
            resultInt = Math.round(result);
            if (resultInt == Integer.parseInt(arff.instance(i).stringValue(6))) {
                nilaiBenar++;
            }
        } catch (Exception ex) {
            Logger.getLogger(Controller.class.getName()).log(Level.SEVERE, null, ex);
        }
    }
    double confident = nilaiBenar * 1.0 / arff.numInstances() * 100;
    return confident;
}

From source file:de.fub.maps.project.detector.model.inference.processhandler.CrossValidationProcessHandler.java

License:Open Source License

@Override
protected void handle() {
    Collection<Attribute> attributeList = getInferenceModel().getAttributes();
    Instances trainingSet = new Instances("Classes", new ArrayList<Attribute>(attributeList), 9);
    trainingSet.setClassIndex(0);/*  w  w  w .  j  a  v a2s  .  c o  m*/

    HashMap<String, HashSet<TrackSegment>> dataset = getInferenceModel().getInput().getTrainingsSet();

    for (Entry<String, HashSet<TrackSegment>> entry : dataset.entrySet()) {
        for (TrackSegment trackSegment : entry.getValue()) {
            Instance instance = getInstance(entry.getKey(), trackSegment);
            trainingSet.add(instance);
        }
    }

    assert trainingSet.numInstances() > 0 : "Training set is empty and has no instances"; //NO18N

    evaluate(trainingSet);
}