Example usage for weka.core Instances numInstances

List of usage examples for weka.core Instances numInstances

Introduction

In this page you can find the example usage for weka.core Instances numInstances.

Prototype


publicint numInstances() 

Source Link

Document

Returns the number of instances in the dataset.

Usage

From source file:fantail.core.Tools.java

License:Open Source License

public static int getNumberTargets(Instances data) throws Exception {
    if (data == null) {
        throw new Exception("data can't be null.");
    }//w ww .  j a v  a2 s  .co m
    if (data.numInstances() <= 0) {
        throw new Exception("data can't be empty.");
    }
    if (data.classIndex() < 0) {
        throw new Exception("class index is not set.");
    }
    Instance tempInst = data.instance(0);
    Instances targets = tempInst.relationalValue(data.classIndex());
    return targets.numAttributes();
}

From source file:fantail.core.WekaLRHelper.java

License:Open Source License

public static Instances covertArff2Xarff(Instances data) {

    Instances xarffData = null;/*w w  w  . j  a  va 2s  .c o  m*/

    try {
        String userDIR = System.getProperty("user.dir");
        String randFileName = Long.toString(System.nanoTime()).substring(10)
                + ".fantail.algorithms.LRT.temp.xarff";
        String path_separator = System.getProperty("file.separator");
        String xarffPath = userDIR + path_separator + randFileName;
        //System.out.println(m_xarffPath);
        int numObjects = Tools.getNumberTargets(data);

        StringBuilder sb = new StringBuilder();
        sb.append("@relation arff2xarff").append(System.getProperty("line.separator"));
        for (int i = 0; i < data.numAttributes() - 1; i++) {
            sb.append("@attribute ");
            sb.append(data.attribute(i).name());
            sb.append(" numeric").append(System.getProperty("line.separator"));
        }
        sb.append("@attribute L RANKING {");
        for (int i = 0; i < numObjects; i++) {
            String spr = ",";
            if (i == numObjects - 1) {
                spr = "";
            }
            String targetName = "T" + (i);
            sb.append(targetName).append(spr);
        }
        sb.append("}").append(System.getProperty("line.separator"));
        sb.append("@data ").append(System.getProperty("line.separator"));

        for (int i = 0; i < data.numInstances(); i++) {
            Instance inst = data.instance(i);
            for (int j = 0; j < data.numAttributes() - 1; j++) {
                sb.append(inst.value(j)).append(",");
            }
            for (int x = 1; x <= numObjects; x++) {
                int rank = x;

                String[] names = Tools.getTargetNames(inst);
                String algo = getName(rank, Tools.getTargetVector(inst), names);

                System.out.println("\t algo: " + algo + ". rank: " + rank + ", Tools.getTargetVector(inst):"
                        + Arrays.toString(Tools.getTargetVector(inst)) + ", " + "names:"
                        + Arrays.toString(names));

                String sprr = ">";
                if (x == names.length) {
                    sprr = "";
                }
                sb.append(algo).append(sprr);
            }
            sb.append(System.getProperty("line.separator"));
        }

        File file = new File(xarffPath);
        Writer output = new BufferedWriter(new FileWriter(file));
        output.write(sb.toString());
        output.close();

        System.out.println(file.getAbsoluteFile());

        weka.core.converters.XArffLoader xarffLoader = new weka.core.converters.XArffLoader();
        xarffLoader.setSource(new File(xarffPath));
        xarffData = xarffLoader.getDataSet();
        //
        File tmpxarffFile = new File(xarffPath);
        if (tmpxarffFile.exists()) {
            tmpxarffFile.delete();
        }
    } catch (Exception e) {
        e.printStackTrace();
        System.exit(-1);
    }

    return xarffData;
}

From source file:fantail.core.WekaLRHelper.java

License:Open Source License

public static Instances covertArff2Xarff2(DATA_TYPE data_type, Instances data) {

    Instances xarffData = null;/* w  ww. j  a v  a 2  s  .c om*/

    try {
        String userDIR = System.getProperty("user.dir");
        //String randFileName = Long.toString(System.nanoTime()).substring(10) + ".LRT.temp.xarff";
        String randFileName = UUID.randomUUID().toString() + ".LRT.temp.xarff";
        String path_separator = System.getProperty("file.separator");
        String xarffPath = userDIR + path_separator + randFileName;
        //System.out.println(m_xarffPath);
        int numObjects = Tools.getNumberTargets(data);

        StringBuilder sb = new StringBuilder();
        sb.append("@relation arff2xarff").append(System.getProperty("line.separator"));
        for (int i = 0; i < data.numAttributes() - 1; i++) {
            sb.append("@attribute ");
            sb.append(data.attribute(i).name());
            sb.append(" numeric").append(System.getProperty("line.separator"));
        }
        sb.append("@attribute L RANKING {");
        for (int i = 0; i < numObjects; i++) {
            String spr = ",";
            if (i == numObjects - 1) {
                spr = "";
            }
            String targetName = "T" + (i);
            sb.append(targetName).append(spr);
        }
        sb.append("}").append(System.getProperty("line.separator"));
        sb.append("@data ").append(System.getProperty("line.separator"));

        for (int i = 0; i < data.numInstances(); i++) {
            Instance inst = data.instance(i);

            // determine a ranking of the class labels
            String ranking_result = determineRanking(Tools.getTargetObjects(inst));
            if (ranking_result == null)
                continue;
            //System.out.println("\t ranking_result:" + ranking_result);

            // looking at a>b>c, if the 'a' part consists of more than two partial relations, we need to split them.
            List<ArrayList<String>> label_collection = new ArrayList<ArrayList<String>>();

            // generate feature string
            String attr_set_str = "";
            for (int j = 0; j < data.numAttributes() - 1; j++) {
                attr_set_str += (inst.value(j) + ",");
            }

            // split label string via ">"
            String items[] = ranking_result.split(">");
            for (int j = 0; j < items.length; j++) {

                String labels[] = items[j].split("\\|");

                // if the first label has more than or equal to 2 partial relations, we split it.
                ArrayList<String> label_list = new ArrayList<String>();
                if (j == 0) {
                    if (labels.length >= 2) {
                        for (int k = 0; k < labels.length; k++) {
                            label_list.add(labels[k]);
                        }
                    } else {
                        label_list.add(items[j]);
                    }
                    label_collection.add(label_list);
                } else {
                    if (labels.length >= 3) {
                        for (int k = 0; k < labels.length; k++) {
                            label_list.add(labels[k]);
                        }

                    } else {
                        label_list.add(items[j]);
                    }
                    label_collection.add(label_list);
                }
            }

            List<String> prev_items_in_label_collection = new ArrayList<String>();
            for (int j = 0; j < label_collection.size(); j++) {
                List<String> items_in_label_collection = new ArrayList<String>();
                if (j == 0) {
                    for (int k = 0; k < label_collection.get(j).size(); k++) {
                        items_in_label_collection.add(label_collection.get(j).get(k));
                    }
                } else {
                    for (int k = 0; k < label_collection.get(j).size(); k++) {
                        for (int l = 0; l < prev_items_in_label_collection.size(); l++) {
                            items_in_label_collection.add(prev_items_in_label_collection.get(l) + ">"
                                    + label_collection.get(j).get(k));
                        }
                    }
                }
                prev_items_in_label_collection = items_in_label_collection;
            }

            for (int j = 0; j < prev_items_in_label_collection.size(); j++) {
                //System.out.println("\t\t line:" + prev_items_in_label_collection.get(j));
                sb.append(attr_set_str + prev_items_in_label_collection.get(j) + "\n");
            }

            InputStream is = new ByteArrayInputStream(sb.toString().getBytes());
            weka.core.converters.XArffLoader xarffLoader = new weka.core.converters.XArffLoader();
            xarffLoader.setSource(is);
            xarffData = xarffLoader.getDataSet();
        }
    } catch (Exception e) {
        e.printStackTrace();
        System.exit(-1);
    }

    return xarffData;
}

From source file:fantail.examples.LabelRankingAlgorithmsMultiRunEvalExample01.java

License:Open Source License

public static void main(String[] args) throws Exception {

    String arffPath = "/Users/Quan/Dropbox/Ranking_Datasets/LabelRankingSemiSyntheticData/iris_dense.csv.arff";
    Instances data = Tools.loadFantailARFFInstances(arffPath);

    int numRuns = 30;
    int randSeed = 1;
    double trainsetRatio = 0.90;

    System.out.println(arffPath);
    System.out.println("Num of labels: " + Tools.getNumberTargets(data));
    System.out.println("Num of instances: " + data.numInstances());
    System.out.println("Num of attributes (incl. target att): " + data.numAttributes());
    System.out.println();//ww w. j  a va 2  s  .  c om
    System.out.println("Num of runs: " + numRuns);
    System.out.println("trainsetRatio: " + trainsetRatio);
    System.out.println();

    AbstractRanker ranker;
    MultiRunEvaluation eval;

    //
    String strFormat = "%-30s %-30s %-30s";
    System.out.println(String.format(strFormat, "<Algorithms>", "<Kendall's tau>", "<Spearman Correlation>"));
    //
    ranker = new AverageRanking();
    eval = new MultiRunEvaluation(data);
    eval.multiRunEvaluate(ranker, numRuns, trainsetRatio, randSeed);
    printResult(strFormat, ranker, eval);

    ranker = new RankingWithBinaryPCT();
    eval = new MultiRunEvaluation(data);
    eval.multiRunEvaluate(ranker, numRuns, trainsetRatio, randSeed);
    printResult(strFormat, ranker, eval);

    ranker = new RankingByPairwiseComparison();
    eval = new MultiRunEvaluation(data);
    eval.multiRunEvaluate(ranker, numRuns, trainsetRatio, randSeed);
    printResult(strFormat, ranker, eval);

    ranker = new RankingWithkNN();
    eval = new MultiRunEvaluation(data);
    eval.multiRunEvaluate(ranker, numRuns, trainsetRatio, randSeed);
    printResult(strFormat, ranker, eval);

    ranker = new BinaryART();
    ((BinaryART) ranker).setK(9999);
    ((BinaryART) ranker).setMiniLeaf(1);
    eval = new MultiRunEvaluation(data);
    eval.multiRunEvaluate(ranker, numRuns, trainsetRatio, randSeed);
    printResult(strFormat, ranker, eval);

    ranker = new ARTForests();
    ((ARTForests) ranker).setNumIterations(50);
    ((ARTForests) ranker).setK(0);
    ((ARTForests) ranker).setNumMinInstances(1);
    eval = new MultiRunEvaluation(data);
    eval.multiRunEvaluate(ranker, numRuns, trainsetRatio, randSeed);
    printResult(strFormat, ranker, eval);
    //
    ranker = new RankingViaRegression();
    eval = new MultiRunEvaluation(data);
    eval.multiRunEvaluate(ranker, numRuns, trainsetRatio, randSeed);
    printResult(strFormat, ranker, eval);
}

From source file:fantail.examples.LabelRankingSingleAlgoExample01.java

License:Open Source License

public static void main(String[] args) throws Exception {

    String arffPath = "/Users/Quan/Dropbox/Ranking_Datasets/LabelRankingSemiSyntheticData/glass_dense.csv.arff";
    Instances data = Tools.loadFantailARFFInstances(arffPath);

    int numRuns = 30;
    int randSeed = 1;
    double trainsetRatio = 0.50;

    System.out.println(arffPath);
    System.out.println("Num of labels: " + Tools.getNumberTargets(data));
    System.out.println("Num of instances: " + data.numInstances());
    System.out.println("Num of attributes (incl. target att): " + data.numAttributes());
    System.out.println();/* www.  j  a  va  2s  . co m*/

    AbstractRanker ranker;
    MultiRunEvaluation eval;

    String strFormat = "%-30s %-30s %-30s";
    System.out.println(String.format(strFormat, "<Algorithms>", "<Kendall>", "<SpearmanCC>"));

    ranker = new ARTForests();
    eval = new MultiRunEvaluation(data);
    eval.multiRunEvaluate(ranker, numRuns, trainsetRatio, randSeed);
    printResult(strFormat, ranker, eval);

}

From source file:farm_ads.Form.java

private void detailInstances(Instances i) {
    String s = new String();
    s += "==== Thng tin d liu ==== \n";
    s += "\n  S lng mu: " + i.numInstances();
    s += "\n  S thuc tnh: 54877";
    this.tResult.setText(s);
}

From source file:farm_ads.MyClassifier.java

public String ClassifyMultiInstances(Classifier c, Instances t) throws Exception {
    String format = "%4s %15s %15s\n";
    String format1 = "%15s %15s %15s\n";
    String s = new String();
    TP = FP = FN = TN = 0.0;/* ww w .  j a  va 2  s . co  m*/

    s += "S lng mu: " + Integer.toString(t.numInstances()) + "\n\n";
    s += "======= Kt qu d on qung co========\n";
    s += String.format(format, "STT", "Trc d on", "Sau d on");

    for (int i = 0; i < t.numInstances(); i++) {
        String[] classAds = { "Ph hp", "Khng ph hp" };
        double actValue = t.instance(i).classValue();

        Instance newInst = t.instance(i);

        double pred = c.classifyInstance(newInst);

        countPredicted(actValue, pred);

        s += String.format(format, Integer.toString(i + 1), classAds[(int) actValue], classAds[(int) pred]);
    }

    s += "\nCh thch -->  Ph hp: (+1) ,  Khng ph hp: (-1)\n";
    s += "\nS mu c phn lp ng: " + Integer.toString(getCorrect());
    s += "\nS mu c phn lp sai: " + Integer.toString(getInCorrect());

    s += "\n\n======= ?nh gi kt qu d on  ========\n";
    s += String.format(format1, "Prediction", "Recall", "F-measure");
    s += String.format(format1, getPrecision(), getRecall(), getFMeasure());

    return s;
}

From source file:feature.InfoGainEval.java

License:Open Source License

/**
 * Initializes an information gain attribute evaluator. Discretizes all
 * attributes that are numeric./*from  w w w  .  ja  v a 2 s . c  o  m*/
 *
 * @param data
 *            set of instances serving as training data
 * @throws Exception
 *             if the evaluator has not been generated successfully
 */
public double computeInfoGain(Instances data, int att) throws Exception {

    // can evaluator handle data?
    getCapabilities().testWithFail(data);

    int classIndex = data.classIndex();
    int numInstances = data.numInstances();

    if (!m_Binarize) {
        Discretize disTransform = new Discretize();
        disTransform.setUseBetterEncoding(true);
        disTransform.setInputFormat(data);
        data = Filter.useFilter(data, disTransform);
    } else {
        NumericToBinary binTransform = new NumericToBinary();
        binTransform.setInputFormat(data);
        data = Filter.useFilter(data, binTransform);
    }
    int numClasses = data.attribute(classIndex).numValues();

    // Reserve space and initialize counters
    double[][][] counts = new double[data.numAttributes()][][];
    for (int k = 0; k < data.numAttributes(); k++) {
        if (k != classIndex) {
            int numValues = data.attribute(k).numValues();
            counts[k] = new double[numValues + 1][numClasses + 1];
        }
    }

    // Initialize counters
    double[] temp = new double[numClasses + 1];
    for (int k = 0; k < numInstances; k++) {
        Instance inst = data.instance(k);
        if (inst.classIsMissing()) {
            temp[numClasses] += inst.weight();
        } else {
            temp[(int) inst.classValue()] += inst.weight();
        }
    }
    for (int k = 0; k < counts.length; k++) {
        if (k != classIndex) {
            for (int i = 0; i < temp.length; i++) {
                counts[k][0][i] = temp[i];
            }
        }
    }

    // Get counts
    for (int k = 0; k < numInstances; k++) {
        Instance inst = data.instance(k);
        for (int i = 0; i < inst.numValues(); i++) {
            if (inst.index(i) != classIndex) {
                if (inst.isMissingSparse(i) || inst.classIsMissing()) {
                    if (!inst.isMissingSparse(i)) {
                        counts[inst.index(i)][(int) inst.valueSparse(i)][numClasses] += inst.weight();
                        counts[inst.index(i)][0][numClasses] -= inst.weight();
                    } else if (!inst.classIsMissing()) {
                        counts[inst.index(i)][data.attribute(inst.index(i)).numValues()][(int) inst
                                .classValue()] += inst.weight();
                        counts[inst.index(i)][0][(int) inst.classValue()] -= inst.weight();
                    } else {
                        counts[inst.index(i)][data.attribute(inst.index(i)).numValues()][numClasses] += inst
                                .weight();
                        counts[inst.index(i)][0][numClasses] -= inst.weight();
                    }
                } else {
                    counts[inst.index(i)][(int) inst.valueSparse(i)][(int) inst.classValue()] += inst.weight();
                    counts[inst.index(i)][0][(int) inst.classValue()] -= inst.weight();
                }
            }
        }
    }

    // distribute missing counts if required
    if (m_missing_merge) {

        for (int k = 0; k < data.numAttributes(); k++) {
            if (k != classIndex) {
                int numValues = data.attribute(k).numValues();

                // Compute marginals
                double[] rowSums = new double[numValues];
                double[] columnSums = new double[numClasses];
                double sum = 0;
                for (int i = 0; i < numValues; i++) {
                    for (int j = 0; j < numClasses; j++) {
                        rowSums[i] += counts[k][i][j];
                        columnSums[j] += counts[k][i][j];
                    }
                    sum += rowSums[i];
                }

                if (Utils.gr(sum, 0)) {
                    double[][] additions = new double[numValues][numClasses];

                    // Compute what needs to be added to each row
                    for (int i = 0; i < numValues; i++) {
                        for (int j = 0; j < numClasses; j++) {
                            additions[i][j] = (rowSums[i] / sum) * counts[k][numValues][j];
                        }
                    }

                    // Compute what needs to be added to each column
                    for (int i = 0; i < numClasses; i++) {
                        for (int j = 0; j < numValues; j++) {
                            additions[j][i] += (columnSums[i] / sum) * counts[k][j][numClasses];
                        }
                    }

                    // Compute what needs to be added to each cell
                    for (int i = 0; i < numClasses; i++) {
                        for (int j = 0; j < numValues; j++) {
                            additions[j][i] += (counts[k][j][i] / sum) * counts[k][numValues][numClasses];
                        }
                    }

                    // Make new contingency table
                    double[][] newTable = new double[numValues][numClasses];
                    for (int i = 0; i < numValues; i++) {
                        for (int j = 0; j < numClasses; j++) {
                            newTable[i][j] = counts[k][i][j] + additions[i][j];
                        }
                    }
                    counts[k] = newTable;
                }
            }
        }
    }

    // Compute info gains
    m_InfoGains = new double[data.numAttributes()];
    m_InfoGains[att] = (ContingencyTables.entropyOverColumns(counts[att])
            - ContingencyTables.entropyConditionedOnRows(counts[att]));

    return m_InfoGains[att];
}

From source file:feature.InfoGainEval.java

License:Open Source License

public void buildEvaluator(Instances data) throws Exception {

    // can evaluator handle data?
    getCapabilities().testWithFail(data);

    int classIndex = data.classIndex();
    int numInstances = data.numInstances();

    if (!m_Binarize) {
        Discretize disTransform = new Discretize();
        disTransform.setUseBetterEncoding(true);
        disTransform.setInputFormat(data);
        data = Filter.useFilter(data, disTransform);
    } else {//from www .j a v  a 2s.  c o m
        NumericToBinary binTransform = new NumericToBinary();
        binTransform.setInputFormat(data);
        data = Filter.useFilter(data, binTransform);
    }
    int numClasses = data.attribute(classIndex).numValues();

    // Reserve space and initialize counters
    double[][][] counts = new double[data.numAttributes()][][];
    for (int k = 0; k < data.numAttributes(); k++) {
        if (k != classIndex) {
            int numValues = data.attribute(k).numValues();
            counts[k] = new double[numValues + 1][numClasses + 1];
        }
    }

    // Initialize counters
    double[] temp = new double[numClasses + 1];
    for (int k = 0; k < numInstances; k++) {
        Instance inst = data.instance(k);
        if (inst.classIsMissing()) {
            temp[numClasses] += inst.weight();
        } else {
            temp[(int) inst.classValue()] += inst.weight();
        }
    }
    for (int k = 0; k < counts.length; k++) {
        if (k != classIndex) {
            for (int i = 0; i < temp.length; i++) {
                counts[k][0][i] = temp[i];
            }
        }
    }

    // Get counts
    for (int k = 0; k < numInstances; k++) {
        Instance inst = data.instance(k);
        for (int i = 0; i < inst.numValues(); i++) {
            if (inst.index(i) != classIndex) {
                if (inst.isMissingSparse(i) || inst.classIsMissing()) {
                    if (!inst.isMissingSparse(i)) {
                        counts[inst.index(i)][(int) inst.valueSparse(i)][numClasses] += inst.weight();
                        counts[inst.index(i)][0][numClasses] -= inst.weight();
                    } else if (!inst.classIsMissing()) {
                        counts[inst.index(i)][data.attribute(inst.index(i)).numValues()][(int) inst
                                .classValue()] += inst.weight();
                        counts[inst.index(i)][0][(int) inst.classValue()] -= inst.weight();
                    } else {
                        counts[inst.index(i)][data.attribute(inst.index(i)).numValues()][numClasses] += inst
                                .weight();
                        counts[inst.index(i)][0][numClasses] -= inst.weight();
                    }
                } else {
                    counts[inst.index(i)][(int) inst.valueSparse(i)][(int) inst.classValue()] += inst.weight();
                    counts[inst.index(i)][0][(int) inst.classValue()] -= inst.weight();
                }
            }
        }
    }

    // distribute missing counts if required
    if (m_missing_merge) {

        for (int k = 0; k < data.numAttributes(); k++) {
            if (k != classIndex) {
                int numValues = data.attribute(k).numValues();

                // Compute marginals
                double[] rowSums = new double[numValues];
                double[] columnSums = new double[numClasses];
                double sum = 0;
                for (int i = 0; i < numValues; i++) {
                    for (int j = 0; j < numClasses; j++) {
                        rowSums[i] += counts[k][i][j];
                        columnSums[j] += counts[k][i][j];
                    }
                    sum += rowSums[i];
                }

                if (Utils.gr(sum, 0)) {
                    double[][] additions = new double[numValues][numClasses];

                    // Compute what needs to be added to each row
                    for (int i = 0; i < numValues; i++) {
                        for (int j = 0; j < numClasses; j++) {
                            additions[i][j] = (rowSums[i] / sum) * counts[k][numValues][j];
                        }
                    }

                    // Compute what needs to be added to each column
                    for (int i = 0; i < numClasses; i++) {
                        for (int j = 0; j < numValues; j++) {
                            additions[j][i] += (columnSums[i] / sum) * counts[k][j][numClasses];
                        }
                    }

                    // Compute what needs to be added to each cell
                    for (int i = 0; i < numClasses; i++) {
                        for (int j = 0; j < numValues; j++) {
                            additions[j][i] += (counts[k][j][i] / sum) * counts[k][numValues][numClasses];
                        }
                    }

                    // Make new contingency table
                    double[][] newTable = new double[numValues][numClasses];
                    for (int i = 0; i < numValues; i++) {
                        for (int j = 0; j < numClasses; j++) {
                            newTable[i][j] = counts[k][i][j] + additions[i][j];
                        }
                    }
                    counts[k] = newTable;
                }
            }
        }
    }

    // Compute info gains
    m_InfoGains = new double[data.numAttributes()];
    for (int i = 0; i < data.numAttributes(); i++) {
        if (i != classIndex) {
            m_InfoGains[i] = (ContingencyTables.entropyOverColumns(counts[i])
                    - ContingencyTables.entropyConditionedOnRows(counts[i]));
        }
    }
}

From source file:feature_construction.GeneticProgramming.java

License:LGPL

public static double[][] convertInstancesToInputFeaturesArray(String fileName) {
    // Create instances (file that contains the inputs to feed through the program)
    double[][] inputFeatures;

    try {//from  ww w  .  ja  v  a2 s .c  om
        //load CSV
        CSVLoader loaderInputs = new CSVLoader();
        loaderInputs.setSource(new File(fileName));
        Instances inputSet = loaderInputs.getDataSet();
        inputSet.setClassIndex(inputSet.numAttributes() - 1);

        inputFeatures = new double[inputSet.numInstances()][inputSet.numAttributes()];

        // Convert instances to double[][]
        for (int i = 0; i < inputSet.numInstances(); i++) {
            for (int j = 0; j < inputSet.numAttributes(); j++) {
                inputFeatures[i][j] = inputSet.get(i).value(j);
            }
        }

        return inputFeatures;
    } catch (Exception e) {
        e.printStackTrace();
    }

    return null;
}