Example usage for weka.core Instances numInstances

List of usage examples for weka.core Instances numInstances

Introduction

In this page you can find the example usage for weka.core Instances numInstances.

Prototype


publicint numInstances() 

Source Link

Document

Returns the number of instances in the dataset.

Usage

From source file:fantail.algorithms.BinaryART.java

License:Open Source License

private void makeTree(Instances data, java.util.Random r, int depth) throws Exception {
    if (m_K > data.numAttributes()) {
        m_K = data.numAttributes() - 1;//from   www  . jav a2 s.c o m
    }

    if (m_K < 1) {
        m_K = (int) weka.core.Utils.log2(data.numAttributes()) + 1;
    }

    int[] randAtts = new int[data.numAttributes() - 1];
    //TODO: handle class target att
    for (int i = 0; i < randAtts.length; i++) {
        randAtts[i] = i;
    }
    for (int i = 0; i < randAtts.length; i++) {
        int randomPosition = r.nextInt(randAtts.length);
        int temp = randAtts[i];
        randAtts[i] = randAtts[randomPosition];
        randAtts[randomPosition] = temp;
    }

    int bestAttIndex = -1;

    AttScorePair[] attScorePair = new AttScorePair[m_K];

    //double currentR2 = estimateAvgDistanceSpearman(data);
    for (int i = 0; i < m_K; i++) {
        int attIndex = randAtts[i];

        double splitPoint = Double.NaN;

        if (!m_UseMedian) {
            splitPoint = data.meanOrMode(attIndex);
        } else {
            splitPoint = getMedian(data, attIndex);
        }

        double r2 = estimateR2(data, attIndex, splitPoint);
        attScorePair[i] = new AttScorePair(attIndex, r2);
    }

    Arrays.sort(attScorePair);

    bestAttIndex = attScorePair[0].index;
    double maxR2 = attScorePair[0].score;
    boolean stop1 = false;

    //        for (int kk = 0; kk < attScorePair.length; kk++) {
    //            System.out.println(attScorePair[kk].score);
    //        }
    //        if (true) {
    //            throw new Exception("stop");
    //        }
    if (attScorePair[0].score <= attScorePair[m_K - 1].score) {
        stop1 = true;
    }

    if (data.numInstances() <= m_MiniLeaf || (depth >= m_MaxDepth && m_MaxDepth != 0)
    //|| maxR2 <= 0.01 // removed 10/01/2013
            || maxR2 >= 0.95 || stop1 // 11/01/13 the paper version doesn't have this
            || data.variance(bestAttIndex) <= 0) {

        m_Attribute = null;
        m_Prototype = AbstractRanker.getAvgRanking(data);
        //m_Prototype = AbstractRanker.getCenterRanking(data, m_ApproxCenterMethod);
        return;
    }

    m_Attribute = data.attribute(bestAttIndex);
    if (!m_UseMedian) {
        m_SplitPoint = data.meanOrMode(bestAttIndex);
    } else {
        m_SplitPoint = getMedian(data, bestAttIndex);
    }
    Instances[] splitData = splitData(data, bestAttIndex, m_SplitPoint);

    m_Successors = new BinaryART[2];
    for (int j = 0; j < 2; j++) {
        m_Successors[j] = new BinaryART();
        m_Successors[j].setMiniLeaf(m_MiniLeaf);
        m_Successors[j].setK(m_K);
        m_Successors[j].setUseMedian(m_UseMedian);
        m_Successors[j].setNumObjects(m_NumObjects);

        m_Successors[j].makeTree(splitData[j], r, depth + 1);
    }
}

From source file:fantail.algorithms.RankingByPairwiseComparison.java

License:Open Source License

@Override
public void buildRanker(Instances data) throws Exception {
    m_Classifiers = new ArrayList<weka.classifiers.AbstractClassifier>();
    m_AlgoPairs = new ArrayList<String>();
    m_NumLabels = Tools.getNumberTargets(data);

    // build pb datasets
    for (int a = 0; a < m_NumLabels; a++) {
        for (int b = 0; b < m_NumLabels; b++) {

            String pairStr = a + "|" + b;
            if (!hasPair(m_AlgoPairs, pairStr) && a != b) {
                m_AlgoPairs.add(pairStr);

                Instances d = new Instances(data);
                d.setClassIndex(-1);/*from   w ww . j ava2 s .c o m*/
                d.deleteAttributeAt(d.numAttributes() - 1);

                weka.filters.unsupervised.attribute.Add add = new weka.filters.unsupervised.attribute.Add();
                add.setInputFormat(d);
                add.setOptions(weka.core.Utils
                        .splitOptions("-T NOM -N class -L " + ((int) a) + "," + ((int) b) + " -C last"));

                d = Filter.useFilter(d, add);
                d.setClassIndex(d.numAttributes() - 1);

                for (int i = 0; i < d.numInstances(); i++) {

                    Instance metaInst = (Instance) data.instance(i);
                    Instance inst = d.instance(i);

                    double[] rankVector = Tools.getTargetVector(metaInst);

                    double rank_a = rankVector[a];
                    double rank_b = rankVector[b];

                    if (rank_a < rank_b) {
                        inst.setClassValue(0.0);
                    } else {
                        inst.setClassValue(1.0);
                    }
                }

                //weka.classifiers.functions.SMO cls = new weka.classifiers.functions.SMO();
                //String ops = "weka.classifiers.functions.SMO -C 1.0 -L 0.001 -P 1.0E-12 -N 0 -V -1 -W 1 -K \"weka.classifiers.functions.supportVector.RBFKernel -C 250007 -G 0.01\"";
                //cls.setOptions(weka.core.Utils.splitOptions(ops));                   
                //cls.buildClassifier(d);
                //weka.classifiers.functions.Logistic cls = new weka.classifiers.functions.Logistic();
                //weka.classifiers.trees.J48 cls = new weka.classifiers.trees.J48();
                //weka.classifiers.rules.ZeroR cls = new weka.classifiers.rules.ZeroR();
                weka.classifiers.trees.DecisionStump cls = new weka.classifiers.trees.DecisionStump();
                cls.buildClassifier(d);
                m_Classifiers.add(cls);
                m_BaseClassifierName = cls.getClass().getSimpleName();
                m_Add = add;
            }
        }
    }
}

From source file:fantail.algorithms.RankingViaRegression.java

License:Open Source License

@Override
public void buildRanker(Instances data) throws Exception {

    Instances workingData = new Instances(data);
    //Instance instTemp = workingData.instance(0);

    //m_LastFeatureIndex = workingData.numAttributes() - 1;
    m_NumFeatures = workingData.numAttributes() - 1;
    m_NumTargets = Tools.getNumberTargets(data);
    m_Classifiers = new AbstractClassifier[m_NumTargets];

    for (int i = 0; i < m_NumTargets; i++) {
        weka.classifiers.functions.LinearRegression lr = new weka.classifiers.functions.LinearRegression();
        m_Classifiers[i] = AbstractClassifier.makeCopy(lr);
    }/*from w  w w.j a  v  a 2 s.c  o m*/

    Instances[] trainingSets = new Instances[m_NumTargets];

    for (int t = 0; t < m_NumTargets; t++) {

        ArrayList attributes = new ArrayList();
        for (int i = 0; i < m_NumFeatures; i++) {
            attributes.add(new Attribute(workingData.attribute(i).name()));
        }

        String targetName = "att-" + (t + 1);
        attributes.add(new Attribute(targetName));

        trainingSets[t] = new Instances("data-" + targetName, attributes, 0);

        for (int j = 0; j < workingData.numInstances(); j++) {
            Instance metaInst = workingData.instance(j);
            double[] ranking = Tools.getTargetVector(metaInst);
            double[] values = new double[trainingSets[t].numAttributes()];

            for (int m = 0; m < (trainingSets[t].numAttributes() - 1); m++) {
                values[m] = metaInst.value(m);
            }
            values[values.length - 1] = ranking[t];
            trainingSets[t].add(new DenseInstance(1.0, values));
        }

        trainingSets[t].setClassIndex(trainingSets[t].numAttributes() - 1);
        m_Classifiers[t].buildClassifier(trainingSets[t]);
    }

    m_TempHeader = new Instances(trainingSets[0], 0);
}

From source file:fantail.algorithms.RankingWithBinaryPCT.java

License:Open Source License

private double computeVariance(Instances data) throws Exception {
    double[][] targets = new double[data.numInstances()][];
    for (int i = 0; i < data.numInstances(); i++) {
        targets[i] = Tools.getTargetVector(data.instance(i));
    }/*w w w.java 2s  .c  om*/
    double sumVar = 0;
    for (int i = 0; i < m_NumTargetLabels; i++) {
        double[] target_i = new double[data.numInstances()];

        for (int j = 0; j < data.numInstances(); j++) {
            Instance metaInst = (Instance) data.instance(j);
            target_i[j] = targets[j][i] * metaInst.weight();
        }
        sumVar += weka.core.Utils.variance(target_i);
    }
    return sumVar / m_NumTargetLabels;
}

From source file:fantail.algorithms.RankingWithBinaryPCT.java

License:Open Source License

private double computeVarianceReduction(Instances data, int attIndex, double splitPoint) throws Exception {
    //double varianceaE = computeVariance(data); // doesn't make sense to compute this
    Instances[] P = splitData(data, attIndex, splitPoint);
    double variancePk = 0;
    for (int k = 0; k < P.length; k++) {
        variancePk += (1.0 * P[k].numInstances() / data.numInstances() * computeVariance(P[k]));
    }/*from w  ww .ja v a 2  s.  com*/
    return -variancePk;
}

From source file:fantail.algorithms.RankingWithBinaryPCT.java

License:Open Source License

private Instances[] splitData(Instances data, int attIndex, double splitPoint) throws Exception {
    Instances[] subsets = new Instances[2];
    subsets[0] = new Instances(data, 0);
    subsets[1] = new Instances(data, 0);

    for (int i = 0; i < data.numInstances(); i++) {
        Instance inst = data.instance(i);
        if (inst.value(attIndex) <= splitPoint && (subsets[0].numInstances() <= 0.5 * data.numInstances())) {
            subsets[0].add(inst);//from  w w w .jav  a  2s. c  om
        } else {
            subsets[1].add(inst);
        }
    }
    // TODO: 
    if (subsets[1].numInstances() == 0) {
        subsets[1].add(subsets[0].instance(0));
    }
    if (subsets[0].numInstances() == 0) {
        subsets[0].add(subsets[1].instance(0));
    }
    return subsets;
}

From source file:fantail.algorithms.RankingWithBinaryPCT.java

License:Open Source License

private Instances[] splitData2(Instances data, int attIndex, double splitPoint) throws Exception {
    Instances[] subsets = new Instances[2];
    subsets[0] = new Instances(data, 0);
    subsets[1] = new Instances(data, 0);
    for (int i = 0; i < data.numInstances(); i++) {
        Instance inst = data.instance(i);
        if (inst.value(attIndex) <= splitPoint) {
            subsets[0].add(inst);/* w w w . j a  v  a  2 s  . c om*/
        } else {
            subsets[1].add(inst);
        }
    }
    // TODO: 
    if (subsets[1].numInstances() == 0) {
        subsets[1].add(subsets[0].instance(0));
    }
    if (subsets[0].numInstances() == 0) {
        subsets[0].add(subsets[1].instance(0));
    }
    return subsets;
}

From source file:fantail.algorithms.RankingWithBinaryPCT.java

License:Open Source License

private double getMedian(Instances data, int attIndex) throws Exception {
    DescriptiveStatistics stats = new DescriptiveStatistics();
    for (int i = 0; i < data.numInstances(); i++) {
        Instance inst = (Instance) data.instance(i);
        stats.addValue(inst.value(attIndex));
    }/*from  w ww .  j  a v a 2  s  .c om*/
    double median = stats.getPercentile(50);
    return median;
}

From source file:fantail.algorithms.RankingWithBinaryPCT.java

License:Open Source License

private void makeTree(Instances data, Random r, int depth) throws Exception {

    if (data.numInstances() <= m_MiniLeaf || (depth >= m_MaxDepth && m_MaxDepth != 0)
            || computeVariance(data) <= m_MinVariancea) {
        //|| maxVarianceaReduction <= 0
        //|| data.variance(bestAttIndex) <= 0) { // || data.variance(bestAttIndex) <= 0 ) {   copied from ART, 

        m_Attribute = null;/*w  w  w  .j a  va 2 s  . co  m*/
        m_Prototype = AbstractRanker.getAvgRanking(data);
        return;
    }

    //
    if (m_K > data.numAttributes()) {
        m_K = data.numAttributes();
    }
    if (m_K < 1) {
        m_K = (int) weka.core.Utils.log2(data.numAttributes()) + 1;
    }

    // TODO:
    int[] attIndice = new int[data.numAttributes() - 1];
    for (int i = 0; i < attIndice.length; i++) {
        attIndice[i] = i;
    }
    for (int i = 0; i < attIndice.length; i++) {
        //int randomPosition = getRandomPosition(r, attIndice);
        int randomPosition = r.nextInt(attIndice.length);
        int temp = attIndice[i];
        attIndice[i] = attIndice[randomPosition];
        attIndice[randomPosition] = temp;
    }

    AttScorePair[] attScorePair = new AttScorePair[m_K];

    for (int i = 0; i < m_K; i++) {
        int attIndex = attIndice[i];

        double splitPoint = Double.NaN;
        if (!m_UseMedian) {
            splitPoint = data.meanOrMode(attIndex);
        } else {
            splitPoint = getMedian(data, attIndex);
        }
        double varianceReduction = computeVarianceReduction(data, attIndex, splitPoint);
        attScorePair[i] = new AttScorePair(attIndex, varianceReduction);

    }

    Arrays.sort(attScorePair);
    int randAttIndex = 0;
    int bestAttIndex = attScorePair[randAttIndex].index;

    double maxVarianceaReduction = attScorePair[randAttIndex].score;

    //        if (data.numInstances() <= 1 * m_MiniLeaf
    //                || (depth >= m_MaxDepth && m_MaxDepth != 0)
    //                || computeVariance(data) <= m_MinVariancea) {
    //                //|| maxVarianceaReduction <= 0
    //                //|| data.variance(bestAttIndex) <= 0) { // || data.variance(bestAttIndex) <= 0 ) {   copied from ART, 
    //
    //            m_Attribute = null;
    //            m_Prototype = AbstractRanker.getAvgRanking(data);
    //            return;
    //        }
    m_Attribute = data.attribute(bestAttIndex);

    if (!m_UseMedian) {
        m_SplitPoint = data.meanOrMode(bestAttIndex);
    } else {
        m_SplitPoint = getMedian(data, bestAttIndex);
    }

    //m_SplitPoint = data.meanOrMode(m_Attribute);
    Instances[] splitData = splitData(data, bestAttIndex, m_SplitPoint);

    //System.out.println(splitData[0].numInstances());
    //System.out.println(splitData[1].numInstances());
    //System.out.println();

    m_Successors = new RankingWithBinaryPCT[2];

    for (int j = 0; j < 2; j++) {
        m_Successors[j] = new RankingWithBinaryPCT();
        m_Successors[j].setMiniLeaf(m_MiniLeaf);
        m_Successors[j].setK(m_K);
        m_Successors[j].setUseMedian(m_UseMedian);
        m_Successors[j].setNumTargetLabels(m_NumTargetLabels);
        m_Successors[j].makeTree(splitData[j], r, depth + 1);
    }
}

From source file:fantail.core.MultiRunEvaluation.java

License:Open Source License

public void multiRunEvaluate(AbstractRanker ranker, int randSeed) throws Exception {
    //m_NumRuns = numRuns;
    m_Rand = new Random();

    int nFold = 5;
    ///*from  ww  w .  j  av a  2  s.  c om*/
    m_ScoreKendall = new double[nFold];
    m_ScoreSpearmanCC = new double[nFold];
    m_ScorePrecision = new double[nFold];
    //
    //m_Data.randomize(m_Rand);

    for (int i = 0; i < nFold; i++) {
        Instances train = m_Data.trainCV(nFold, i);
        Instances test = m_Data.testCV(nFold, i);

        //System.out.println("train size:" + train.size() + ", test size:" + test.size());
        ranker.buildRanker(train);

        double localScoreKendall = 0;
        double localScoreSpearmanCC = 0;
        double localScorePrecision = 0;

        for (int m = 0; m < test.numInstances(); m++) {
            Instance inst = test.instance(m);
            double[] pred = ranker.recommendRanking(inst);
            double[] actual = Tools.getTargetVector(inst);

            //System.out.println("test instance:" + inst.toString());

            List<ReasonerComponent> pred_list = sortRanking(pred);
            List<ReasonerComponent> actual_list = sortRanking(actual);
            double precision = computePrecision(pred_list, actual_list);
            localScorePrecision += precision;

            //System.out.println("\tprecision:" + precision + " ==> pred:" + pred_list + ", actual:" + actual_list);
            //System.out.println("\t ==> pred:" + Arrays.toString(pred) + ", actual:" + Arrays.toString(actual));

            //localScoreKendall += EvalTools.computeKendallTau(actual, pred);
            //localScoreSpearmanCC += EvalTools.computeSpearmanCC(actual, pred);
        }

        localScoreKendall /= test.numInstances();
        localScoreSpearmanCC /= test.numInstances();
        localScorePrecision /= test.numInstances();

        m_ScoreKendall[i] += localScoreKendall;
        m_ScoreSpearmanCC[i] += localScoreSpearmanCC;
        m_ScorePrecision[i] += localScorePrecision;
    }
}