List of usage examples for weka.core Instances numInstances
publicint numInstances()
From source file:fantail.algorithms.BinaryART.java
License:Open Source License
private void makeTree(Instances data, java.util.Random r, int depth) throws Exception { if (m_K > data.numAttributes()) { m_K = data.numAttributes() - 1;//from www . jav a2 s.c o m } if (m_K < 1) { m_K = (int) weka.core.Utils.log2(data.numAttributes()) + 1; } int[] randAtts = new int[data.numAttributes() - 1]; //TODO: handle class target att for (int i = 0; i < randAtts.length; i++) { randAtts[i] = i; } for (int i = 0; i < randAtts.length; i++) { int randomPosition = r.nextInt(randAtts.length); int temp = randAtts[i]; randAtts[i] = randAtts[randomPosition]; randAtts[randomPosition] = temp; } int bestAttIndex = -1; AttScorePair[] attScorePair = new AttScorePair[m_K]; //double currentR2 = estimateAvgDistanceSpearman(data); for (int i = 0; i < m_K; i++) { int attIndex = randAtts[i]; double splitPoint = Double.NaN; if (!m_UseMedian) { splitPoint = data.meanOrMode(attIndex); } else { splitPoint = getMedian(data, attIndex); } double r2 = estimateR2(data, attIndex, splitPoint); attScorePair[i] = new AttScorePair(attIndex, r2); } Arrays.sort(attScorePair); bestAttIndex = attScorePair[0].index; double maxR2 = attScorePair[0].score; boolean stop1 = false; // for (int kk = 0; kk < attScorePair.length; kk++) { // System.out.println(attScorePair[kk].score); // } // if (true) { // throw new Exception("stop"); // } if (attScorePair[0].score <= attScorePair[m_K - 1].score) { stop1 = true; } if (data.numInstances() <= m_MiniLeaf || (depth >= m_MaxDepth && m_MaxDepth != 0) //|| maxR2 <= 0.01 // removed 10/01/2013 || maxR2 >= 0.95 || stop1 // 11/01/13 the paper version doesn't have this || data.variance(bestAttIndex) <= 0) { m_Attribute = null; m_Prototype = AbstractRanker.getAvgRanking(data); //m_Prototype = AbstractRanker.getCenterRanking(data, m_ApproxCenterMethod); return; } m_Attribute = data.attribute(bestAttIndex); if (!m_UseMedian) { m_SplitPoint = data.meanOrMode(bestAttIndex); } else { m_SplitPoint = getMedian(data, bestAttIndex); } Instances[] splitData = splitData(data, bestAttIndex, m_SplitPoint); m_Successors = new BinaryART[2]; for (int j = 0; j < 2; j++) { m_Successors[j] = new BinaryART(); m_Successors[j].setMiniLeaf(m_MiniLeaf); m_Successors[j].setK(m_K); m_Successors[j].setUseMedian(m_UseMedian); m_Successors[j].setNumObjects(m_NumObjects); m_Successors[j].makeTree(splitData[j], r, depth + 1); } }
From source file:fantail.algorithms.RankingByPairwiseComparison.java
License:Open Source License
@Override public void buildRanker(Instances data) throws Exception { m_Classifiers = new ArrayList<weka.classifiers.AbstractClassifier>(); m_AlgoPairs = new ArrayList<String>(); m_NumLabels = Tools.getNumberTargets(data); // build pb datasets for (int a = 0; a < m_NumLabels; a++) { for (int b = 0; b < m_NumLabels; b++) { String pairStr = a + "|" + b; if (!hasPair(m_AlgoPairs, pairStr) && a != b) { m_AlgoPairs.add(pairStr); Instances d = new Instances(data); d.setClassIndex(-1);/*from w ww . j ava2 s .c o m*/ d.deleteAttributeAt(d.numAttributes() - 1); weka.filters.unsupervised.attribute.Add add = new weka.filters.unsupervised.attribute.Add(); add.setInputFormat(d); add.setOptions(weka.core.Utils .splitOptions("-T NOM -N class -L " + ((int) a) + "," + ((int) b) + " -C last")); d = Filter.useFilter(d, add); d.setClassIndex(d.numAttributes() - 1); for (int i = 0; i < d.numInstances(); i++) { Instance metaInst = (Instance) data.instance(i); Instance inst = d.instance(i); double[] rankVector = Tools.getTargetVector(metaInst); double rank_a = rankVector[a]; double rank_b = rankVector[b]; if (rank_a < rank_b) { inst.setClassValue(0.0); } else { inst.setClassValue(1.0); } } //weka.classifiers.functions.SMO cls = new weka.classifiers.functions.SMO(); //String ops = "weka.classifiers.functions.SMO -C 1.0 -L 0.001 -P 1.0E-12 -N 0 -V -1 -W 1 -K \"weka.classifiers.functions.supportVector.RBFKernel -C 250007 -G 0.01\""; //cls.setOptions(weka.core.Utils.splitOptions(ops)); //cls.buildClassifier(d); //weka.classifiers.functions.Logistic cls = new weka.classifiers.functions.Logistic(); //weka.classifiers.trees.J48 cls = new weka.classifiers.trees.J48(); //weka.classifiers.rules.ZeroR cls = new weka.classifiers.rules.ZeroR(); weka.classifiers.trees.DecisionStump cls = new weka.classifiers.trees.DecisionStump(); cls.buildClassifier(d); m_Classifiers.add(cls); m_BaseClassifierName = cls.getClass().getSimpleName(); m_Add = add; } } } }
From source file:fantail.algorithms.RankingViaRegression.java
License:Open Source License
@Override public void buildRanker(Instances data) throws Exception { Instances workingData = new Instances(data); //Instance instTemp = workingData.instance(0); //m_LastFeatureIndex = workingData.numAttributes() - 1; m_NumFeatures = workingData.numAttributes() - 1; m_NumTargets = Tools.getNumberTargets(data); m_Classifiers = new AbstractClassifier[m_NumTargets]; for (int i = 0; i < m_NumTargets; i++) { weka.classifiers.functions.LinearRegression lr = new weka.classifiers.functions.LinearRegression(); m_Classifiers[i] = AbstractClassifier.makeCopy(lr); }/*from w w w.j a v a 2 s.c o m*/ Instances[] trainingSets = new Instances[m_NumTargets]; for (int t = 0; t < m_NumTargets; t++) { ArrayList attributes = new ArrayList(); for (int i = 0; i < m_NumFeatures; i++) { attributes.add(new Attribute(workingData.attribute(i).name())); } String targetName = "att-" + (t + 1); attributes.add(new Attribute(targetName)); trainingSets[t] = new Instances("data-" + targetName, attributes, 0); for (int j = 0; j < workingData.numInstances(); j++) { Instance metaInst = workingData.instance(j); double[] ranking = Tools.getTargetVector(metaInst); double[] values = new double[trainingSets[t].numAttributes()]; for (int m = 0; m < (trainingSets[t].numAttributes() - 1); m++) { values[m] = metaInst.value(m); } values[values.length - 1] = ranking[t]; trainingSets[t].add(new DenseInstance(1.0, values)); } trainingSets[t].setClassIndex(trainingSets[t].numAttributes() - 1); m_Classifiers[t].buildClassifier(trainingSets[t]); } m_TempHeader = new Instances(trainingSets[0], 0); }
From source file:fantail.algorithms.RankingWithBinaryPCT.java
License:Open Source License
private double computeVariance(Instances data) throws Exception { double[][] targets = new double[data.numInstances()][]; for (int i = 0; i < data.numInstances(); i++) { targets[i] = Tools.getTargetVector(data.instance(i)); }/*w w w.java 2s .c om*/ double sumVar = 0; for (int i = 0; i < m_NumTargetLabels; i++) { double[] target_i = new double[data.numInstances()]; for (int j = 0; j < data.numInstances(); j++) { Instance metaInst = (Instance) data.instance(j); target_i[j] = targets[j][i] * metaInst.weight(); } sumVar += weka.core.Utils.variance(target_i); } return sumVar / m_NumTargetLabels; }
From source file:fantail.algorithms.RankingWithBinaryPCT.java
License:Open Source License
private double computeVarianceReduction(Instances data, int attIndex, double splitPoint) throws Exception { //double varianceaE = computeVariance(data); // doesn't make sense to compute this Instances[] P = splitData(data, attIndex, splitPoint); double variancePk = 0; for (int k = 0; k < P.length; k++) { variancePk += (1.0 * P[k].numInstances() / data.numInstances() * computeVariance(P[k])); }/*from w ww .ja v a 2 s. com*/ return -variancePk; }
From source file:fantail.algorithms.RankingWithBinaryPCT.java
License:Open Source License
private Instances[] splitData(Instances data, int attIndex, double splitPoint) throws Exception { Instances[] subsets = new Instances[2]; subsets[0] = new Instances(data, 0); subsets[1] = new Instances(data, 0); for (int i = 0; i < data.numInstances(); i++) { Instance inst = data.instance(i); if (inst.value(attIndex) <= splitPoint && (subsets[0].numInstances() <= 0.5 * data.numInstances())) { subsets[0].add(inst);//from w w w .jav a 2s. c om } else { subsets[1].add(inst); } } // TODO: if (subsets[1].numInstances() == 0) { subsets[1].add(subsets[0].instance(0)); } if (subsets[0].numInstances() == 0) { subsets[0].add(subsets[1].instance(0)); } return subsets; }
From source file:fantail.algorithms.RankingWithBinaryPCT.java
License:Open Source License
private Instances[] splitData2(Instances data, int attIndex, double splitPoint) throws Exception { Instances[] subsets = new Instances[2]; subsets[0] = new Instances(data, 0); subsets[1] = new Instances(data, 0); for (int i = 0; i < data.numInstances(); i++) { Instance inst = data.instance(i); if (inst.value(attIndex) <= splitPoint) { subsets[0].add(inst);/* w w w . j a v a 2 s . c om*/ } else { subsets[1].add(inst); } } // TODO: if (subsets[1].numInstances() == 0) { subsets[1].add(subsets[0].instance(0)); } if (subsets[0].numInstances() == 0) { subsets[0].add(subsets[1].instance(0)); } return subsets; }
From source file:fantail.algorithms.RankingWithBinaryPCT.java
License:Open Source License
private double getMedian(Instances data, int attIndex) throws Exception { DescriptiveStatistics stats = new DescriptiveStatistics(); for (int i = 0; i < data.numInstances(); i++) { Instance inst = (Instance) data.instance(i); stats.addValue(inst.value(attIndex)); }/*from w ww . j a v a 2 s .c om*/ double median = stats.getPercentile(50); return median; }
From source file:fantail.algorithms.RankingWithBinaryPCT.java
License:Open Source License
private void makeTree(Instances data, Random r, int depth) throws Exception { if (data.numInstances() <= m_MiniLeaf || (depth >= m_MaxDepth && m_MaxDepth != 0) || computeVariance(data) <= m_MinVariancea) { //|| maxVarianceaReduction <= 0 //|| data.variance(bestAttIndex) <= 0) { // || data.variance(bestAttIndex) <= 0 ) { copied from ART, m_Attribute = null;/*w w w .j a va 2 s . co m*/ m_Prototype = AbstractRanker.getAvgRanking(data); return; } // if (m_K > data.numAttributes()) { m_K = data.numAttributes(); } if (m_K < 1) { m_K = (int) weka.core.Utils.log2(data.numAttributes()) + 1; } // TODO: int[] attIndice = new int[data.numAttributes() - 1]; for (int i = 0; i < attIndice.length; i++) { attIndice[i] = i; } for (int i = 0; i < attIndice.length; i++) { //int randomPosition = getRandomPosition(r, attIndice); int randomPosition = r.nextInt(attIndice.length); int temp = attIndice[i]; attIndice[i] = attIndice[randomPosition]; attIndice[randomPosition] = temp; } AttScorePair[] attScorePair = new AttScorePair[m_K]; for (int i = 0; i < m_K; i++) { int attIndex = attIndice[i]; double splitPoint = Double.NaN; if (!m_UseMedian) { splitPoint = data.meanOrMode(attIndex); } else { splitPoint = getMedian(data, attIndex); } double varianceReduction = computeVarianceReduction(data, attIndex, splitPoint); attScorePair[i] = new AttScorePair(attIndex, varianceReduction); } Arrays.sort(attScorePair); int randAttIndex = 0; int bestAttIndex = attScorePair[randAttIndex].index; double maxVarianceaReduction = attScorePair[randAttIndex].score; // if (data.numInstances() <= 1 * m_MiniLeaf // || (depth >= m_MaxDepth && m_MaxDepth != 0) // || computeVariance(data) <= m_MinVariancea) { // //|| maxVarianceaReduction <= 0 // //|| data.variance(bestAttIndex) <= 0) { // || data.variance(bestAttIndex) <= 0 ) { copied from ART, // // m_Attribute = null; // m_Prototype = AbstractRanker.getAvgRanking(data); // return; // } m_Attribute = data.attribute(bestAttIndex); if (!m_UseMedian) { m_SplitPoint = data.meanOrMode(bestAttIndex); } else { m_SplitPoint = getMedian(data, bestAttIndex); } //m_SplitPoint = data.meanOrMode(m_Attribute); Instances[] splitData = splitData(data, bestAttIndex, m_SplitPoint); //System.out.println(splitData[0].numInstances()); //System.out.println(splitData[1].numInstances()); //System.out.println(); m_Successors = new RankingWithBinaryPCT[2]; for (int j = 0; j < 2; j++) { m_Successors[j] = new RankingWithBinaryPCT(); m_Successors[j].setMiniLeaf(m_MiniLeaf); m_Successors[j].setK(m_K); m_Successors[j].setUseMedian(m_UseMedian); m_Successors[j].setNumTargetLabels(m_NumTargetLabels); m_Successors[j].makeTree(splitData[j], r, depth + 1); } }
From source file:fantail.core.MultiRunEvaluation.java
License:Open Source License
public void multiRunEvaluate(AbstractRanker ranker, int randSeed) throws Exception { //m_NumRuns = numRuns; m_Rand = new Random(); int nFold = 5; ///*from ww w . j av a 2 s. c om*/ m_ScoreKendall = new double[nFold]; m_ScoreSpearmanCC = new double[nFold]; m_ScorePrecision = new double[nFold]; // //m_Data.randomize(m_Rand); for (int i = 0; i < nFold; i++) { Instances train = m_Data.trainCV(nFold, i); Instances test = m_Data.testCV(nFold, i); //System.out.println("train size:" + train.size() + ", test size:" + test.size()); ranker.buildRanker(train); double localScoreKendall = 0; double localScoreSpearmanCC = 0; double localScorePrecision = 0; for (int m = 0; m < test.numInstances(); m++) { Instance inst = test.instance(m); double[] pred = ranker.recommendRanking(inst); double[] actual = Tools.getTargetVector(inst); //System.out.println("test instance:" + inst.toString()); List<ReasonerComponent> pred_list = sortRanking(pred); List<ReasonerComponent> actual_list = sortRanking(actual); double precision = computePrecision(pred_list, actual_list); localScorePrecision += precision; //System.out.println("\tprecision:" + precision + " ==> pred:" + pred_list + ", actual:" + actual_list); //System.out.println("\t ==> pred:" + Arrays.toString(pred) + ", actual:" + Arrays.toString(actual)); //localScoreKendall += EvalTools.computeKendallTau(actual, pred); //localScoreSpearmanCC += EvalTools.computeSpearmanCC(actual, pred); } localScoreKendall /= test.numInstances(); localScoreSpearmanCC /= test.numInstances(); localScorePrecision /= test.numInstances(); m_ScoreKendall[i] += localScoreKendall; m_ScoreSpearmanCC[i] += localScoreSpearmanCC; m_ScorePrecision[i] += localScorePrecision; } }