Example usage for weka.core Instances numAttributes

List of usage examples for weka.core Instances numAttributes

Introduction

In this page you can find the example usage for weka.core Instances numAttributes.

Prototype


publicint numAttributes() 

Source Link

Document

Returns the number of attributes.

Usage

From source file:facebookpostpuller.SpecificUserModel.java

public void convertToArff(File file) throws Exception {

    FastVector atts;/*from  w ww  .  j a  v a 2s. co  m*/
    FastVector attVals;
    Instances data;
    double[] vals;

    file = new File(file + ".arff");

    atts = new FastVector();
    atts.addElement(new Attribute(("name"), (FastVector) null)); // 5/27/2014
    atts.addElement(new Attribute(("message"), (FastVector) null));

    attVals = new FastVector();
    attVals.addElement("13-17");
    attVals.addElement("18-24");
    attVals.addElement("25-34");
    attVals.addElement("35-44");
    attVals.addElement("45-54");
    atts.addElement(new Attribute("age-group", attVals));

    data = new Instances("predict_age", atts, 0);

    Iterator it = posts.entrySet().iterator();

    while (it.hasNext()) {
        Map.Entry pairs = (Map.Entry) it.next();

        vals = new double[data.numAttributes()];
        User user = (User) pairs.getValue();
        String name = user.getName(); // 5/27/2014
        String message = ((Post) (pairs.getKey())).getMessage();

        Preprocess pre = new Preprocess();
        message = pre.emoticons(message);
        message = pre.emoji(message);
        message = pre.url(message);

        //StringFilter filter = new StringFilter(message);
        vals[0] = data.attribute(0).addStringValue(name); // 5/27/2014
        vals[1] = data.attribute(1).addStringValue(message);

        if (ageGroup.equals("13-17")) {
            vals[2] = attVals.indexOf("13-17");
        } else if (ageGroup.equals("18-24")) {
            vals[2] = attVals.indexOf("18-24");
        } else if (ageGroup.equals("25-34")) {
            vals[2] = attVals.indexOf("25-34");
        } else if (ageGroup.equals("35-44")) {
            vals[2] = attVals.indexOf("35-44");
        } else if (ageGroup.equals("45-54")) { // Modified 6/11/2014 
            vals[2] = attVals.indexOf("45-54");
        }

        data.add(new Instance(1.0, vals));

        it.remove();
    }

    ArffSaver saver = new ArffSaver();
    saver.setInstances(data);
    saver.setFile(file);
    saver.writeBatch();
}

From source file:fantail.algorithms.BinaryART.java

License:Open Source License

private void makeTree(Instances data, java.util.Random r, int depth) throws Exception {
    if (m_K > data.numAttributes()) {
        m_K = data.numAttributes() - 1;/*from   w  w w  . java 2  s. co  m*/
    }

    if (m_K < 1) {
        m_K = (int) weka.core.Utils.log2(data.numAttributes()) + 1;
    }

    int[] randAtts = new int[data.numAttributes() - 1];
    //TODO: handle class target att
    for (int i = 0; i < randAtts.length; i++) {
        randAtts[i] = i;
    }
    for (int i = 0; i < randAtts.length; i++) {
        int randomPosition = r.nextInt(randAtts.length);
        int temp = randAtts[i];
        randAtts[i] = randAtts[randomPosition];
        randAtts[randomPosition] = temp;
    }

    int bestAttIndex = -1;

    AttScorePair[] attScorePair = new AttScorePair[m_K];

    //double currentR2 = estimateAvgDistanceSpearman(data);
    for (int i = 0; i < m_K; i++) {
        int attIndex = randAtts[i];

        double splitPoint = Double.NaN;

        if (!m_UseMedian) {
            splitPoint = data.meanOrMode(attIndex);
        } else {
            splitPoint = getMedian(data, attIndex);
        }

        double r2 = estimateR2(data, attIndex, splitPoint);
        attScorePair[i] = new AttScorePair(attIndex, r2);
    }

    Arrays.sort(attScorePair);

    bestAttIndex = attScorePair[0].index;
    double maxR2 = attScorePair[0].score;
    boolean stop1 = false;

    //        for (int kk = 0; kk < attScorePair.length; kk++) {
    //            System.out.println(attScorePair[kk].score);
    //        }
    //        if (true) {
    //            throw new Exception("stop");
    //        }
    if (attScorePair[0].score <= attScorePair[m_K - 1].score) {
        stop1 = true;
    }

    if (data.numInstances() <= m_MiniLeaf || (depth >= m_MaxDepth && m_MaxDepth != 0)
    //|| maxR2 <= 0.01 // removed 10/01/2013
            || maxR2 >= 0.95 || stop1 // 11/01/13 the paper version doesn't have this
            || data.variance(bestAttIndex) <= 0) {

        m_Attribute = null;
        m_Prototype = AbstractRanker.getAvgRanking(data);
        //m_Prototype = AbstractRanker.getCenterRanking(data, m_ApproxCenterMethod);
        return;
    }

    m_Attribute = data.attribute(bestAttIndex);
    if (!m_UseMedian) {
        m_SplitPoint = data.meanOrMode(bestAttIndex);
    } else {
        m_SplitPoint = getMedian(data, bestAttIndex);
    }
    Instances[] splitData = splitData(data, bestAttIndex, m_SplitPoint);

    m_Successors = new BinaryART[2];
    for (int j = 0; j < 2; j++) {
        m_Successors[j] = new BinaryART();
        m_Successors[j].setMiniLeaf(m_MiniLeaf);
        m_Successors[j].setK(m_K);
        m_Successors[j].setUseMedian(m_UseMedian);
        m_Successors[j].setNumObjects(m_NumObjects);

        m_Successors[j].makeTree(splitData[j], r, depth + 1);
    }
}

From source file:fantail.algorithms.RankingByPairwiseComparison.java

License:Open Source License

@Override
public void buildRanker(Instances data) throws Exception {
    m_Classifiers = new ArrayList<weka.classifiers.AbstractClassifier>();
    m_AlgoPairs = new ArrayList<String>();
    m_NumLabels = Tools.getNumberTargets(data);

    // build pb datasets
    for (int a = 0; a < m_NumLabels; a++) {
        for (int b = 0; b < m_NumLabels; b++) {

            String pairStr = a + "|" + b;
            if (!hasPair(m_AlgoPairs, pairStr) && a != b) {
                m_AlgoPairs.add(pairStr);

                Instances d = new Instances(data);
                d.setClassIndex(-1);/*from w ww .j a v  a2  s .co m*/
                d.deleteAttributeAt(d.numAttributes() - 1);

                weka.filters.unsupervised.attribute.Add add = new weka.filters.unsupervised.attribute.Add();
                add.setInputFormat(d);
                add.setOptions(weka.core.Utils
                        .splitOptions("-T NOM -N class -L " + ((int) a) + "," + ((int) b) + " -C last"));

                d = Filter.useFilter(d, add);
                d.setClassIndex(d.numAttributes() - 1);

                for (int i = 0; i < d.numInstances(); i++) {

                    Instance metaInst = (Instance) data.instance(i);
                    Instance inst = d.instance(i);

                    double[] rankVector = Tools.getTargetVector(metaInst);

                    double rank_a = rankVector[a];
                    double rank_b = rankVector[b];

                    if (rank_a < rank_b) {
                        inst.setClassValue(0.0);
                    } else {
                        inst.setClassValue(1.0);
                    }
                }

                //weka.classifiers.functions.SMO cls = new weka.classifiers.functions.SMO();
                //String ops = "weka.classifiers.functions.SMO -C 1.0 -L 0.001 -P 1.0E-12 -N 0 -V -1 -W 1 -K \"weka.classifiers.functions.supportVector.RBFKernel -C 250007 -G 0.01\"";
                //cls.setOptions(weka.core.Utils.splitOptions(ops));                   
                //cls.buildClassifier(d);
                //weka.classifiers.functions.Logistic cls = new weka.classifiers.functions.Logistic();
                //weka.classifiers.trees.J48 cls = new weka.classifiers.trees.J48();
                //weka.classifiers.rules.ZeroR cls = new weka.classifiers.rules.ZeroR();
                weka.classifiers.trees.DecisionStump cls = new weka.classifiers.trees.DecisionStump();
                cls.buildClassifier(d);
                m_Classifiers.add(cls);
                m_BaseClassifierName = cls.getClass().getSimpleName();
                m_Add = add;
            }
        }
    }
}

From source file:fantail.algorithms.RankingByPairwiseComparison.java

License:Open Source License

@Override
public double[] recommendRanking(Instance testInst) throws Exception {
    Instances tempData = new Instances(testInst.dataset(), 0);
    tempData.add((Instance) testInst.copy());
    // remove the relation att
    tempData.setClassIndex(-1);//from   w  w  w . ja va  2 s  .c o  m
    tempData.deleteAttributeAt(tempData.numAttributes() - 1);
    tempData = Filter.useFilter(tempData, m_Add);
    tempData.setClassIndex(tempData.numAttributes() - 1);
    double predRanking[] = new double[m_NumLabels];
    for (int i = 0; i < predRanking.length; i++) {
        predRanking[i] = m_NumLabels - 1;
    }
    for (int i = 0; i < m_Classifiers.size(); i++) {
        double predIndex = m_Classifiers.get(i).classifyInstance(tempData.instance(0));
        String algoPair = m_AlgoPairs.get(i);
        String[] parts = algoPair.split("\\|");
        int trueIndex = Integer.parseInt(parts[(int) predIndex]);
        predRanking[trueIndex] -= 1;
    }
    predRanking = Tools.doubleArrayToRanking(predRanking);
    return predRanking;
}

From source file:fantail.algorithms.RankingByPairwiseComparison.java

License:Open Source License

public double[] recommendRanking2(Instance testInst) throws Exception {
    Instances tempData = new Instances(testInst.dataset(), 0);
    tempData.add((Instance) testInst.copy());
    // remove the relation att
    tempData.setClassIndex(-1);/* w  w  w.j  a v a 2s.com*/
    tempData.deleteAttributeAt(tempData.numAttributes() - 1);
    tempData = Filter.useFilter(tempData, m_Add);
    tempData.setClassIndex(tempData.numAttributes() - 1);
    double predRanking[] = new double[m_NumLabels];
    for (int i = 0; i < m_Classifiers.size(); i++) {
        double predIndex = m_Classifiers.get(i).classifyInstance(tempData.instance(0));
        double predProb = m_Classifiers.get(i).distributionForInstance(tempData.instance(0))[0];
        String algoPair = m_AlgoPairs.get(i);
        String[] parts = algoPair.split("\\|");
        int trueIndex = Integer.parseInt(parts[(int) predIndex]);
        predRanking[trueIndex] -= predProb;
    }
    return Tools.doubleArrayToRanking(predRanking);
}

From source file:fantail.algorithms.RankingViaRegression.java

License:Open Source License

@Override
public void buildRanker(Instances data) throws Exception {

    Instances workingData = new Instances(data);
    //Instance instTemp = workingData.instance(0);

    //m_LastFeatureIndex = workingData.numAttributes() - 1;
    m_NumFeatures = workingData.numAttributes() - 1;
    m_NumTargets = Tools.getNumberTargets(data);
    m_Classifiers = new AbstractClassifier[m_NumTargets];

    for (int i = 0; i < m_NumTargets; i++) {
        weka.classifiers.functions.LinearRegression lr = new weka.classifiers.functions.LinearRegression();
        m_Classifiers[i] = AbstractClassifier.makeCopy(lr);
    }//from  ww  w.  ja  v  a 2s.co  m

    Instances[] trainingSets = new Instances[m_NumTargets];

    for (int t = 0; t < m_NumTargets; t++) {

        ArrayList attributes = new ArrayList();
        for (int i = 0; i < m_NumFeatures; i++) {
            attributes.add(new Attribute(workingData.attribute(i).name()));
        }

        String targetName = "att-" + (t + 1);
        attributes.add(new Attribute(targetName));

        trainingSets[t] = new Instances("data-" + targetName, attributes, 0);

        for (int j = 0; j < workingData.numInstances(); j++) {
            Instance metaInst = workingData.instance(j);
            double[] ranking = Tools.getTargetVector(metaInst);
            double[] values = new double[trainingSets[t].numAttributes()];

            for (int m = 0; m < (trainingSets[t].numAttributes() - 1); m++) {
                values[m] = metaInst.value(m);
            }
            values[values.length - 1] = ranking[t];
            trainingSets[t].add(new DenseInstance(1.0, values));
        }

        trainingSets[t].setClassIndex(trainingSets[t].numAttributes() - 1);
        m_Classifiers[t].buildClassifier(trainingSets[t]);
    }

    m_TempHeader = new Instances(trainingSets[0], 0);
}

From source file:fantail.algorithms.RankingWithBinaryPCT.java

License:Open Source License

private void makeTree(Instances data, Random r, int depth) throws Exception {

    if (data.numInstances() <= m_MiniLeaf || (depth >= m_MaxDepth && m_MaxDepth != 0)
            || computeVariance(data) <= m_MinVariancea) {
        //|| maxVarianceaReduction <= 0
        //|| data.variance(bestAttIndex) <= 0) { // || data.variance(bestAttIndex) <= 0 ) {   copied from ART, 

        m_Attribute = null;//from  ww  w. java2 s. c o m
        m_Prototype = AbstractRanker.getAvgRanking(data);
        return;
    }

    //
    if (m_K > data.numAttributes()) {
        m_K = data.numAttributes();
    }
    if (m_K < 1) {
        m_K = (int) weka.core.Utils.log2(data.numAttributes()) + 1;
    }

    // TODO:
    int[] attIndice = new int[data.numAttributes() - 1];
    for (int i = 0; i < attIndice.length; i++) {
        attIndice[i] = i;
    }
    for (int i = 0; i < attIndice.length; i++) {
        //int randomPosition = getRandomPosition(r, attIndice);
        int randomPosition = r.nextInt(attIndice.length);
        int temp = attIndice[i];
        attIndice[i] = attIndice[randomPosition];
        attIndice[randomPosition] = temp;
    }

    AttScorePair[] attScorePair = new AttScorePair[m_K];

    for (int i = 0; i < m_K; i++) {
        int attIndex = attIndice[i];

        double splitPoint = Double.NaN;
        if (!m_UseMedian) {
            splitPoint = data.meanOrMode(attIndex);
        } else {
            splitPoint = getMedian(data, attIndex);
        }
        double varianceReduction = computeVarianceReduction(data, attIndex, splitPoint);
        attScorePair[i] = new AttScorePair(attIndex, varianceReduction);

    }

    Arrays.sort(attScorePair);
    int randAttIndex = 0;
    int bestAttIndex = attScorePair[randAttIndex].index;

    double maxVarianceaReduction = attScorePair[randAttIndex].score;

    //        if (data.numInstances() <= 1 * m_MiniLeaf
    //                || (depth >= m_MaxDepth && m_MaxDepth != 0)
    //                || computeVariance(data) <= m_MinVariancea) {
    //                //|| maxVarianceaReduction <= 0
    //                //|| data.variance(bestAttIndex) <= 0) { // || data.variance(bestAttIndex) <= 0 ) {   copied from ART, 
    //
    //            m_Attribute = null;
    //            m_Prototype = AbstractRanker.getAvgRanking(data);
    //            return;
    //        }
    m_Attribute = data.attribute(bestAttIndex);

    if (!m_UseMedian) {
        m_SplitPoint = data.meanOrMode(bestAttIndex);
    } else {
        m_SplitPoint = getMedian(data, bestAttIndex);
    }

    //m_SplitPoint = data.meanOrMode(m_Attribute);
    Instances[] splitData = splitData(data, bestAttIndex, m_SplitPoint);

    //System.out.println(splitData[0].numInstances());
    //System.out.println(splitData[1].numInstances());
    //System.out.println();

    m_Successors = new RankingWithBinaryPCT[2];

    for (int j = 0; j < 2; j++) {
        m_Successors[j] = new RankingWithBinaryPCT();
        m_Successors[j].setMiniLeaf(m_MiniLeaf);
        m_Successors[j].setK(m_K);
        m_Successors[j].setUseMedian(m_UseMedian);
        m_Successors[j].setNumTargetLabels(m_NumTargetLabels);
        m_Successors[j].makeTree(splitData[j], r, depth + 1);
    }
}

From source file:fantail.algorithms.RankingWithkNN.java

License:Open Source License

@Override
public void buildRanker(Instances metaData) throws Exception {

    Instances workingData = new Instances(metaData);
    workingData.setClassIndex(workingData.numAttributes() - 1);
    m_kNN = new IBkEnhanced();
    // EuclideanDistance, ChebyshevDistance, ManhattanDistance
    String ops = "-W 0 -A \"weka.core.neighboursearch.LinearNNSearch -A \\\"weka.core.EuclideanDistance -R first-last\\\"\"";
    m_kNN.setOptions(weka.core.Utils.splitOptions(ops));
    m_kNN.setKNN(m_K);/*from  w ww .j a  va2  s.c o  m*/
    m_kNN.buildClassifier(workingData);
    workingData.setClassIndex(-1);
}

From source file:fantail.core.Tools.java

License:Open Source License

public static int getNumberTargets(Instances data) throws Exception {
    if (data == null) {
        throw new Exception("data can't be null.");
    }/*from  w  ww  .  j a v a 2 s .c o m*/
    if (data.numInstances() <= 0) {
        throw new Exception("data can't be empty.");
    }
    if (data.classIndex() < 0) {
        throw new Exception("class index is not set.");
    }
    Instance tempInst = data.instance(0);
    Instances targets = tempInst.relationalValue(data.classIndex());
    return targets.numAttributes();
}

From source file:fantail.core.Tools.java

License:Open Source License

public static int getNumberTargets(Instance inst) throws Exception {
    if (inst == null) {
        throw new Exception("inst can't be null.");
    }/*from w w w .j  a v  a  2  s . c o  m*/
    Instances targets = inst.relationalValue(inst.classIndex());
    return targets.numAttributes();
}