List of usage examples for weka.core Instances numAttributes
publicint numAttributes()
From source file:facebookpostpuller.SpecificUserModel.java
public void convertToArff(File file) throws Exception { FastVector atts;/*from w ww . j a v a 2s. co m*/ FastVector attVals; Instances data; double[] vals; file = new File(file + ".arff"); atts = new FastVector(); atts.addElement(new Attribute(("name"), (FastVector) null)); // 5/27/2014 atts.addElement(new Attribute(("message"), (FastVector) null)); attVals = new FastVector(); attVals.addElement("13-17"); attVals.addElement("18-24"); attVals.addElement("25-34"); attVals.addElement("35-44"); attVals.addElement("45-54"); atts.addElement(new Attribute("age-group", attVals)); data = new Instances("predict_age", atts, 0); Iterator it = posts.entrySet().iterator(); while (it.hasNext()) { Map.Entry pairs = (Map.Entry) it.next(); vals = new double[data.numAttributes()]; User user = (User) pairs.getValue(); String name = user.getName(); // 5/27/2014 String message = ((Post) (pairs.getKey())).getMessage(); Preprocess pre = new Preprocess(); message = pre.emoticons(message); message = pre.emoji(message); message = pre.url(message); //StringFilter filter = new StringFilter(message); vals[0] = data.attribute(0).addStringValue(name); // 5/27/2014 vals[1] = data.attribute(1).addStringValue(message); if (ageGroup.equals("13-17")) { vals[2] = attVals.indexOf("13-17"); } else if (ageGroup.equals("18-24")) { vals[2] = attVals.indexOf("18-24"); } else if (ageGroup.equals("25-34")) { vals[2] = attVals.indexOf("25-34"); } else if (ageGroup.equals("35-44")) { vals[2] = attVals.indexOf("35-44"); } else if (ageGroup.equals("45-54")) { // Modified 6/11/2014 vals[2] = attVals.indexOf("45-54"); } data.add(new Instance(1.0, vals)); it.remove(); } ArffSaver saver = new ArffSaver(); saver.setInstances(data); saver.setFile(file); saver.writeBatch(); }
From source file:fantail.algorithms.BinaryART.java
License:Open Source License
private void makeTree(Instances data, java.util.Random r, int depth) throws Exception { if (m_K > data.numAttributes()) { m_K = data.numAttributes() - 1;/*from w w w . java 2 s. co m*/ } if (m_K < 1) { m_K = (int) weka.core.Utils.log2(data.numAttributes()) + 1; } int[] randAtts = new int[data.numAttributes() - 1]; //TODO: handle class target att for (int i = 0; i < randAtts.length; i++) { randAtts[i] = i; } for (int i = 0; i < randAtts.length; i++) { int randomPosition = r.nextInt(randAtts.length); int temp = randAtts[i]; randAtts[i] = randAtts[randomPosition]; randAtts[randomPosition] = temp; } int bestAttIndex = -1; AttScorePair[] attScorePair = new AttScorePair[m_K]; //double currentR2 = estimateAvgDistanceSpearman(data); for (int i = 0; i < m_K; i++) { int attIndex = randAtts[i]; double splitPoint = Double.NaN; if (!m_UseMedian) { splitPoint = data.meanOrMode(attIndex); } else { splitPoint = getMedian(data, attIndex); } double r2 = estimateR2(data, attIndex, splitPoint); attScorePair[i] = new AttScorePair(attIndex, r2); } Arrays.sort(attScorePair); bestAttIndex = attScorePair[0].index; double maxR2 = attScorePair[0].score; boolean stop1 = false; // for (int kk = 0; kk < attScorePair.length; kk++) { // System.out.println(attScorePair[kk].score); // } // if (true) { // throw new Exception("stop"); // } if (attScorePair[0].score <= attScorePair[m_K - 1].score) { stop1 = true; } if (data.numInstances() <= m_MiniLeaf || (depth >= m_MaxDepth && m_MaxDepth != 0) //|| maxR2 <= 0.01 // removed 10/01/2013 || maxR2 >= 0.95 || stop1 // 11/01/13 the paper version doesn't have this || data.variance(bestAttIndex) <= 0) { m_Attribute = null; m_Prototype = AbstractRanker.getAvgRanking(data); //m_Prototype = AbstractRanker.getCenterRanking(data, m_ApproxCenterMethod); return; } m_Attribute = data.attribute(bestAttIndex); if (!m_UseMedian) { m_SplitPoint = data.meanOrMode(bestAttIndex); } else { m_SplitPoint = getMedian(data, bestAttIndex); } Instances[] splitData = splitData(data, bestAttIndex, m_SplitPoint); m_Successors = new BinaryART[2]; for (int j = 0; j < 2; j++) { m_Successors[j] = new BinaryART(); m_Successors[j].setMiniLeaf(m_MiniLeaf); m_Successors[j].setK(m_K); m_Successors[j].setUseMedian(m_UseMedian); m_Successors[j].setNumObjects(m_NumObjects); m_Successors[j].makeTree(splitData[j], r, depth + 1); } }
From source file:fantail.algorithms.RankingByPairwiseComparison.java
License:Open Source License
@Override public void buildRanker(Instances data) throws Exception { m_Classifiers = new ArrayList<weka.classifiers.AbstractClassifier>(); m_AlgoPairs = new ArrayList<String>(); m_NumLabels = Tools.getNumberTargets(data); // build pb datasets for (int a = 0; a < m_NumLabels; a++) { for (int b = 0; b < m_NumLabels; b++) { String pairStr = a + "|" + b; if (!hasPair(m_AlgoPairs, pairStr) && a != b) { m_AlgoPairs.add(pairStr); Instances d = new Instances(data); d.setClassIndex(-1);/*from w ww .j a v a2 s .co m*/ d.deleteAttributeAt(d.numAttributes() - 1); weka.filters.unsupervised.attribute.Add add = new weka.filters.unsupervised.attribute.Add(); add.setInputFormat(d); add.setOptions(weka.core.Utils .splitOptions("-T NOM -N class -L " + ((int) a) + "," + ((int) b) + " -C last")); d = Filter.useFilter(d, add); d.setClassIndex(d.numAttributes() - 1); for (int i = 0; i < d.numInstances(); i++) { Instance metaInst = (Instance) data.instance(i); Instance inst = d.instance(i); double[] rankVector = Tools.getTargetVector(metaInst); double rank_a = rankVector[a]; double rank_b = rankVector[b]; if (rank_a < rank_b) { inst.setClassValue(0.0); } else { inst.setClassValue(1.0); } } //weka.classifiers.functions.SMO cls = new weka.classifiers.functions.SMO(); //String ops = "weka.classifiers.functions.SMO -C 1.0 -L 0.001 -P 1.0E-12 -N 0 -V -1 -W 1 -K \"weka.classifiers.functions.supportVector.RBFKernel -C 250007 -G 0.01\""; //cls.setOptions(weka.core.Utils.splitOptions(ops)); //cls.buildClassifier(d); //weka.classifiers.functions.Logistic cls = new weka.classifiers.functions.Logistic(); //weka.classifiers.trees.J48 cls = new weka.classifiers.trees.J48(); //weka.classifiers.rules.ZeroR cls = new weka.classifiers.rules.ZeroR(); weka.classifiers.trees.DecisionStump cls = new weka.classifiers.trees.DecisionStump(); cls.buildClassifier(d); m_Classifiers.add(cls); m_BaseClassifierName = cls.getClass().getSimpleName(); m_Add = add; } } } }
From source file:fantail.algorithms.RankingByPairwiseComparison.java
License:Open Source License
@Override public double[] recommendRanking(Instance testInst) throws Exception { Instances tempData = new Instances(testInst.dataset(), 0); tempData.add((Instance) testInst.copy()); // remove the relation att tempData.setClassIndex(-1);//from w w w . ja va 2 s .c o m tempData.deleteAttributeAt(tempData.numAttributes() - 1); tempData = Filter.useFilter(tempData, m_Add); tempData.setClassIndex(tempData.numAttributes() - 1); double predRanking[] = new double[m_NumLabels]; for (int i = 0; i < predRanking.length; i++) { predRanking[i] = m_NumLabels - 1; } for (int i = 0; i < m_Classifiers.size(); i++) { double predIndex = m_Classifiers.get(i).classifyInstance(tempData.instance(0)); String algoPair = m_AlgoPairs.get(i); String[] parts = algoPair.split("\\|"); int trueIndex = Integer.parseInt(parts[(int) predIndex]); predRanking[trueIndex] -= 1; } predRanking = Tools.doubleArrayToRanking(predRanking); return predRanking; }
From source file:fantail.algorithms.RankingByPairwiseComparison.java
License:Open Source License
public double[] recommendRanking2(Instance testInst) throws Exception { Instances tempData = new Instances(testInst.dataset(), 0); tempData.add((Instance) testInst.copy()); // remove the relation att tempData.setClassIndex(-1);/* w w w.j a v a 2s.com*/ tempData.deleteAttributeAt(tempData.numAttributes() - 1); tempData = Filter.useFilter(tempData, m_Add); tempData.setClassIndex(tempData.numAttributes() - 1); double predRanking[] = new double[m_NumLabels]; for (int i = 0; i < m_Classifiers.size(); i++) { double predIndex = m_Classifiers.get(i).classifyInstance(tempData.instance(0)); double predProb = m_Classifiers.get(i).distributionForInstance(tempData.instance(0))[0]; String algoPair = m_AlgoPairs.get(i); String[] parts = algoPair.split("\\|"); int trueIndex = Integer.parseInt(parts[(int) predIndex]); predRanking[trueIndex] -= predProb; } return Tools.doubleArrayToRanking(predRanking); }
From source file:fantail.algorithms.RankingViaRegression.java
License:Open Source License
@Override public void buildRanker(Instances data) throws Exception { Instances workingData = new Instances(data); //Instance instTemp = workingData.instance(0); //m_LastFeatureIndex = workingData.numAttributes() - 1; m_NumFeatures = workingData.numAttributes() - 1; m_NumTargets = Tools.getNumberTargets(data); m_Classifiers = new AbstractClassifier[m_NumTargets]; for (int i = 0; i < m_NumTargets; i++) { weka.classifiers.functions.LinearRegression lr = new weka.classifiers.functions.LinearRegression(); m_Classifiers[i] = AbstractClassifier.makeCopy(lr); }//from ww w. ja v a 2s.co m Instances[] trainingSets = new Instances[m_NumTargets]; for (int t = 0; t < m_NumTargets; t++) { ArrayList attributes = new ArrayList(); for (int i = 0; i < m_NumFeatures; i++) { attributes.add(new Attribute(workingData.attribute(i).name())); } String targetName = "att-" + (t + 1); attributes.add(new Attribute(targetName)); trainingSets[t] = new Instances("data-" + targetName, attributes, 0); for (int j = 0; j < workingData.numInstances(); j++) { Instance metaInst = workingData.instance(j); double[] ranking = Tools.getTargetVector(metaInst); double[] values = new double[trainingSets[t].numAttributes()]; for (int m = 0; m < (trainingSets[t].numAttributes() - 1); m++) { values[m] = metaInst.value(m); } values[values.length - 1] = ranking[t]; trainingSets[t].add(new DenseInstance(1.0, values)); } trainingSets[t].setClassIndex(trainingSets[t].numAttributes() - 1); m_Classifiers[t].buildClassifier(trainingSets[t]); } m_TempHeader = new Instances(trainingSets[0], 0); }
From source file:fantail.algorithms.RankingWithBinaryPCT.java
License:Open Source License
private void makeTree(Instances data, Random r, int depth) throws Exception { if (data.numInstances() <= m_MiniLeaf || (depth >= m_MaxDepth && m_MaxDepth != 0) || computeVariance(data) <= m_MinVariancea) { //|| maxVarianceaReduction <= 0 //|| data.variance(bestAttIndex) <= 0) { // || data.variance(bestAttIndex) <= 0 ) { copied from ART, m_Attribute = null;//from ww w. java2 s. c o m m_Prototype = AbstractRanker.getAvgRanking(data); return; } // if (m_K > data.numAttributes()) { m_K = data.numAttributes(); } if (m_K < 1) { m_K = (int) weka.core.Utils.log2(data.numAttributes()) + 1; } // TODO: int[] attIndice = new int[data.numAttributes() - 1]; for (int i = 0; i < attIndice.length; i++) { attIndice[i] = i; } for (int i = 0; i < attIndice.length; i++) { //int randomPosition = getRandomPosition(r, attIndice); int randomPosition = r.nextInt(attIndice.length); int temp = attIndice[i]; attIndice[i] = attIndice[randomPosition]; attIndice[randomPosition] = temp; } AttScorePair[] attScorePair = new AttScorePair[m_K]; for (int i = 0; i < m_K; i++) { int attIndex = attIndice[i]; double splitPoint = Double.NaN; if (!m_UseMedian) { splitPoint = data.meanOrMode(attIndex); } else { splitPoint = getMedian(data, attIndex); } double varianceReduction = computeVarianceReduction(data, attIndex, splitPoint); attScorePair[i] = new AttScorePair(attIndex, varianceReduction); } Arrays.sort(attScorePair); int randAttIndex = 0; int bestAttIndex = attScorePair[randAttIndex].index; double maxVarianceaReduction = attScorePair[randAttIndex].score; // if (data.numInstances() <= 1 * m_MiniLeaf // || (depth >= m_MaxDepth && m_MaxDepth != 0) // || computeVariance(data) <= m_MinVariancea) { // //|| maxVarianceaReduction <= 0 // //|| data.variance(bestAttIndex) <= 0) { // || data.variance(bestAttIndex) <= 0 ) { copied from ART, // // m_Attribute = null; // m_Prototype = AbstractRanker.getAvgRanking(data); // return; // } m_Attribute = data.attribute(bestAttIndex); if (!m_UseMedian) { m_SplitPoint = data.meanOrMode(bestAttIndex); } else { m_SplitPoint = getMedian(data, bestAttIndex); } //m_SplitPoint = data.meanOrMode(m_Attribute); Instances[] splitData = splitData(data, bestAttIndex, m_SplitPoint); //System.out.println(splitData[0].numInstances()); //System.out.println(splitData[1].numInstances()); //System.out.println(); m_Successors = new RankingWithBinaryPCT[2]; for (int j = 0; j < 2; j++) { m_Successors[j] = new RankingWithBinaryPCT(); m_Successors[j].setMiniLeaf(m_MiniLeaf); m_Successors[j].setK(m_K); m_Successors[j].setUseMedian(m_UseMedian); m_Successors[j].setNumTargetLabels(m_NumTargetLabels); m_Successors[j].makeTree(splitData[j], r, depth + 1); } }
From source file:fantail.algorithms.RankingWithkNN.java
License:Open Source License
@Override public void buildRanker(Instances metaData) throws Exception { Instances workingData = new Instances(metaData); workingData.setClassIndex(workingData.numAttributes() - 1); m_kNN = new IBkEnhanced(); // EuclideanDistance, ChebyshevDistance, ManhattanDistance String ops = "-W 0 -A \"weka.core.neighboursearch.LinearNNSearch -A \\\"weka.core.EuclideanDistance -R first-last\\\"\""; m_kNN.setOptions(weka.core.Utils.splitOptions(ops)); m_kNN.setKNN(m_K);/*from w ww .j a va2 s.c o m*/ m_kNN.buildClassifier(workingData); workingData.setClassIndex(-1); }
From source file:fantail.core.Tools.java
License:Open Source License
public static int getNumberTargets(Instances data) throws Exception { if (data == null) { throw new Exception("data can't be null."); }/*from w ww . j a v a 2 s .c o m*/ if (data.numInstances() <= 0) { throw new Exception("data can't be empty."); } if (data.classIndex() < 0) { throw new Exception("class index is not set."); } Instance tempInst = data.instance(0); Instances targets = tempInst.relationalValue(data.classIndex()); return targets.numAttributes(); }
From source file:fantail.core.Tools.java
License:Open Source License
public static int getNumberTargets(Instance inst) throws Exception { if (inst == null) { throw new Exception("inst can't be null."); }/*from w w w .j a v a 2 s . c o m*/ Instances targets = inst.relationalValue(inst.classIndex()); return targets.numAttributes(); }