List of usage examples for weka.core Instances numInstances
publicint numInstances()
From source file:fantail.core.Tools.java
License:Open Source License
public static int getNumberTargets(Instances data) throws Exception { if (data == null) { throw new Exception("data can't be null."); }//w ww . j a v a2 s .co m if (data.numInstances() <= 0) { throw new Exception("data can't be empty."); } if (data.classIndex() < 0) { throw new Exception("class index is not set."); } Instance tempInst = data.instance(0); Instances targets = tempInst.relationalValue(data.classIndex()); return targets.numAttributes(); }
From source file:fantail.core.WekaLRHelper.java
License:Open Source License
public static Instances covertArff2Xarff(Instances data) { Instances xarffData = null;/*w w w . j a va 2s .c o m*/ try { String userDIR = System.getProperty("user.dir"); String randFileName = Long.toString(System.nanoTime()).substring(10) + ".fantail.algorithms.LRT.temp.xarff"; String path_separator = System.getProperty("file.separator"); String xarffPath = userDIR + path_separator + randFileName; //System.out.println(m_xarffPath); int numObjects = Tools.getNumberTargets(data); StringBuilder sb = new StringBuilder(); sb.append("@relation arff2xarff").append(System.getProperty("line.separator")); for (int i = 0; i < data.numAttributes() - 1; i++) { sb.append("@attribute "); sb.append(data.attribute(i).name()); sb.append(" numeric").append(System.getProperty("line.separator")); } sb.append("@attribute L RANKING {"); for (int i = 0; i < numObjects; i++) { String spr = ","; if (i == numObjects - 1) { spr = ""; } String targetName = "T" + (i); sb.append(targetName).append(spr); } sb.append("}").append(System.getProperty("line.separator")); sb.append("@data ").append(System.getProperty("line.separator")); for (int i = 0; i < data.numInstances(); i++) { Instance inst = data.instance(i); for (int j = 0; j < data.numAttributes() - 1; j++) { sb.append(inst.value(j)).append(","); } for (int x = 1; x <= numObjects; x++) { int rank = x; String[] names = Tools.getTargetNames(inst); String algo = getName(rank, Tools.getTargetVector(inst), names); System.out.println("\t algo: " + algo + ". rank: " + rank + ", Tools.getTargetVector(inst):" + Arrays.toString(Tools.getTargetVector(inst)) + ", " + "names:" + Arrays.toString(names)); String sprr = ">"; if (x == names.length) { sprr = ""; } sb.append(algo).append(sprr); } sb.append(System.getProperty("line.separator")); } File file = new File(xarffPath); Writer output = new BufferedWriter(new FileWriter(file)); output.write(sb.toString()); output.close(); System.out.println(file.getAbsoluteFile()); weka.core.converters.XArffLoader xarffLoader = new weka.core.converters.XArffLoader(); xarffLoader.setSource(new File(xarffPath)); xarffData = xarffLoader.getDataSet(); // File tmpxarffFile = new File(xarffPath); if (tmpxarffFile.exists()) { tmpxarffFile.delete(); } } catch (Exception e) { e.printStackTrace(); System.exit(-1); } return xarffData; }
From source file:fantail.core.WekaLRHelper.java
License:Open Source License
public static Instances covertArff2Xarff2(DATA_TYPE data_type, Instances data) { Instances xarffData = null;/* w ww. j a v a 2 s .c om*/ try { String userDIR = System.getProperty("user.dir"); //String randFileName = Long.toString(System.nanoTime()).substring(10) + ".LRT.temp.xarff"; String randFileName = UUID.randomUUID().toString() + ".LRT.temp.xarff"; String path_separator = System.getProperty("file.separator"); String xarffPath = userDIR + path_separator + randFileName; //System.out.println(m_xarffPath); int numObjects = Tools.getNumberTargets(data); StringBuilder sb = new StringBuilder(); sb.append("@relation arff2xarff").append(System.getProperty("line.separator")); for (int i = 0; i < data.numAttributes() - 1; i++) { sb.append("@attribute "); sb.append(data.attribute(i).name()); sb.append(" numeric").append(System.getProperty("line.separator")); } sb.append("@attribute L RANKING {"); for (int i = 0; i < numObjects; i++) { String spr = ","; if (i == numObjects - 1) { spr = ""; } String targetName = "T" + (i); sb.append(targetName).append(spr); } sb.append("}").append(System.getProperty("line.separator")); sb.append("@data ").append(System.getProperty("line.separator")); for (int i = 0; i < data.numInstances(); i++) { Instance inst = data.instance(i); // determine a ranking of the class labels String ranking_result = determineRanking(Tools.getTargetObjects(inst)); if (ranking_result == null) continue; //System.out.println("\t ranking_result:" + ranking_result); // looking at a>b>c, if the 'a' part consists of more than two partial relations, we need to split them. List<ArrayList<String>> label_collection = new ArrayList<ArrayList<String>>(); // generate feature string String attr_set_str = ""; for (int j = 0; j < data.numAttributes() - 1; j++) { attr_set_str += (inst.value(j) + ","); } // split label string via ">" String items[] = ranking_result.split(">"); for (int j = 0; j < items.length; j++) { String labels[] = items[j].split("\\|"); // if the first label has more than or equal to 2 partial relations, we split it. ArrayList<String> label_list = new ArrayList<String>(); if (j == 0) { if (labels.length >= 2) { for (int k = 0; k < labels.length; k++) { label_list.add(labels[k]); } } else { label_list.add(items[j]); } label_collection.add(label_list); } else { if (labels.length >= 3) { for (int k = 0; k < labels.length; k++) { label_list.add(labels[k]); } } else { label_list.add(items[j]); } label_collection.add(label_list); } } List<String> prev_items_in_label_collection = new ArrayList<String>(); for (int j = 0; j < label_collection.size(); j++) { List<String> items_in_label_collection = new ArrayList<String>(); if (j == 0) { for (int k = 0; k < label_collection.get(j).size(); k++) { items_in_label_collection.add(label_collection.get(j).get(k)); } } else { for (int k = 0; k < label_collection.get(j).size(); k++) { for (int l = 0; l < prev_items_in_label_collection.size(); l++) { items_in_label_collection.add(prev_items_in_label_collection.get(l) + ">" + label_collection.get(j).get(k)); } } } prev_items_in_label_collection = items_in_label_collection; } for (int j = 0; j < prev_items_in_label_collection.size(); j++) { //System.out.println("\t\t line:" + prev_items_in_label_collection.get(j)); sb.append(attr_set_str + prev_items_in_label_collection.get(j) + "\n"); } InputStream is = new ByteArrayInputStream(sb.toString().getBytes()); weka.core.converters.XArffLoader xarffLoader = new weka.core.converters.XArffLoader(); xarffLoader.setSource(is); xarffData = xarffLoader.getDataSet(); } } catch (Exception e) { e.printStackTrace(); System.exit(-1); } return xarffData; }
From source file:fantail.examples.LabelRankingAlgorithmsMultiRunEvalExample01.java
License:Open Source License
public static void main(String[] args) throws Exception { String arffPath = "/Users/Quan/Dropbox/Ranking_Datasets/LabelRankingSemiSyntheticData/iris_dense.csv.arff"; Instances data = Tools.loadFantailARFFInstances(arffPath); int numRuns = 30; int randSeed = 1; double trainsetRatio = 0.90; System.out.println(arffPath); System.out.println("Num of labels: " + Tools.getNumberTargets(data)); System.out.println("Num of instances: " + data.numInstances()); System.out.println("Num of attributes (incl. target att): " + data.numAttributes()); System.out.println();//ww w. j a va 2 s . c om System.out.println("Num of runs: " + numRuns); System.out.println("trainsetRatio: " + trainsetRatio); System.out.println(); AbstractRanker ranker; MultiRunEvaluation eval; // String strFormat = "%-30s %-30s %-30s"; System.out.println(String.format(strFormat, "<Algorithms>", "<Kendall's tau>", "<Spearman Correlation>")); // ranker = new AverageRanking(); eval = new MultiRunEvaluation(data); eval.multiRunEvaluate(ranker, numRuns, trainsetRatio, randSeed); printResult(strFormat, ranker, eval); ranker = new RankingWithBinaryPCT(); eval = new MultiRunEvaluation(data); eval.multiRunEvaluate(ranker, numRuns, trainsetRatio, randSeed); printResult(strFormat, ranker, eval); ranker = new RankingByPairwiseComparison(); eval = new MultiRunEvaluation(data); eval.multiRunEvaluate(ranker, numRuns, trainsetRatio, randSeed); printResult(strFormat, ranker, eval); ranker = new RankingWithkNN(); eval = new MultiRunEvaluation(data); eval.multiRunEvaluate(ranker, numRuns, trainsetRatio, randSeed); printResult(strFormat, ranker, eval); ranker = new BinaryART(); ((BinaryART) ranker).setK(9999); ((BinaryART) ranker).setMiniLeaf(1); eval = new MultiRunEvaluation(data); eval.multiRunEvaluate(ranker, numRuns, trainsetRatio, randSeed); printResult(strFormat, ranker, eval); ranker = new ARTForests(); ((ARTForests) ranker).setNumIterations(50); ((ARTForests) ranker).setK(0); ((ARTForests) ranker).setNumMinInstances(1); eval = new MultiRunEvaluation(data); eval.multiRunEvaluate(ranker, numRuns, trainsetRatio, randSeed); printResult(strFormat, ranker, eval); // ranker = new RankingViaRegression(); eval = new MultiRunEvaluation(data); eval.multiRunEvaluate(ranker, numRuns, trainsetRatio, randSeed); printResult(strFormat, ranker, eval); }
From source file:fantail.examples.LabelRankingSingleAlgoExample01.java
License:Open Source License
public static void main(String[] args) throws Exception { String arffPath = "/Users/Quan/Dropbox/Ranking_Datasets/LabelRankingSemiSyntheticData/glass_dense.csv.arff"; Instances data = Tools.loadFantailARFFInstances(arffPath); int numRuns = 30; int randSeed = 1; double trainsetRatio = 0.50; System.out.println(arffPath); System.out.println("Num of labels: " + Tools.getNumberTargets(data)); System.out.println("Num of instances: " + data.numInstances()); System.out.println("Num of attributes (incl. target att): " + data.numAttributes()); System.out.println();/* www. j a va 2s . co m*/ AbstractRanker ranker; MultiRunEvaluation eval; String strFormat = "%-30s %-30s %-30s"; System.out.println(String.format(strFormat, "<Algorithms>", "<Kendall>", "<SpearmanCC>")); ranker = new ARTForests(); eval = new MultiRunEvaluation(data); eval.multiRunEvaluate(ranker, numRuns, trainsetRatio, randSeed); printResult(strFormat, ranker, eval); }
From source file:farm_ads.Form.java
private void detailInstances(Instances i) { String s = new String(); s += "==== Thng tin d liu ==== \n"; s += "\n S lng mu: " + i.numInstances(); s += "\n S thuc tnh: 54877"; this.tResult.setText(s); }
From source file:farm_ads.MyClassifier.java
public String ClassifyMultiInstances(Classifier c, Instances t) throws Exception { String format = "%4s %15s %15s\n"; String format1 = "%15s %15s %15s\n"; String s = new String(); TP = FP = FN = TN = 0.0;/* ww w . j a va 2 s . co m*/ s += "S lng mu: " + Integer.toString(t.numInstances()) + "\n\n"; s += "======= Kt qu d on qung co========\n"; s += String.format(format, "STT", "Trc d on", "Sau d on"); for (int i = 0; i < t.numInstances(); i++) { String[] classAds = { "Ph hp", "Khng ph hp" }; double actValue = t.instance(i).classValue(); Instance newInst = t.instance(i); double pred = c.classifyInstance(newInst); countPredicted(actValue, pred); s += String.format(format, Integer.toString(i + 1), classAds[(int) actValue], classAds[(int) pred]); } s += "\nCh thch --> Ph hp: (+1) , Khng ph hp: (-1)\n"; s += "\nS mu c phn lp ng: " + Integer.toString(getCorrect()); s += "\nS mu c phn lp sai: " + Integer.toString(getInCorrect()); s += "\n\n======= ?nh gi kt qu d on ========\n"; s += String.format(format1, "Prediction", "Recall", "F-measure"); s += String.format(format1, getPrecision(), getRecall(), getFMeasure()); return s; }
From source file:feature.InfoGainEval.java
License:Open Source License
/** * Initializes an information gain attribute evaluator. Discretizes all * attributes that are numeric./*from w w w . ja v a 2 s . c o m*/ * * @param data * set of instances serving as training data * @throws Exception * if the evaluator has not been generated successfully */ public double computeInfoGain(Instances data, int att) throws Exception { // can evaluator handle data? getCapabilities().testWithFail(data); int classIndex = data.classIndex(); int numInstances = data.numInstances(); if (!m_Binarize) { Discretize disTransform = new Discretize(); disTransform.setUseBetterEncoding(true); disTransform.setInputFormat(data); data = Filter.useFilter(data, disTransform); } else { NumericToBinary binTransform = new NumericToBinary(); binTransform.setInputFormat(data); data = Filter.useFilter(data, binTransform); } int numClasses = data.attribute(classIndex).numValues(); // Reserve space and initialize counters double[][][] counts = new double[data.numAttributes()][][]; for (int k = 0; k < data.numAttributes(); k++) { if (k != classIndex) { int numValues = data.attribute(k).numValues(); counts[k] = new double[numValues + 1][numClasses + 1]; } } // Initialize counters double[] temp = new double[numClasses + 1]; for (int k = 0; k < numInstances; k++) { Instance inst = data.instance(k); if (inst.classIsMissing()) { temp[numClasses] += inst.weight(); } else { temp[(int) inst.classValue()] += inst.weight(); } } for (int k = 0; k < counts.length; k++) { if (k != classIndex) { for (int i = 0; i < temp.length; i++) { counts[k][0][i] = temp[i]; } } } // Get counts for (int k = 0; k < numInstances; k++) { Instance inst = data.instance(k); for (int i = 0; i < inst.numValues(); i++) { if (inst.index(i) != classIndex) { if (inst.isMissingSparse(i) || inst.classIsMissing()) { if (!inst.isMissingSparse(i)) { counts[inst.index(i)][(int) inst.valueSparse(i)][numClasses] += inst.weight(); counts[inst.index(i)][0][numClasses] -= inst.weight(); } else if (!inst.classIsMissing()) { counts[inst.index(i)][data.attribute(inst.index(i)).numValues()][(int) inst .classValue()] += inst.weight(); counts[inst.index(i)][0][(int) inst.classValue()] -= inst.weight(); } else { counts[inst.index(i)][data.attribute(inst.index(i)).numValues()][numClasses] += inst .weight(); counts[inst.index(i)][0][numClasses] -= inst.weight(); } } else { counts[inst.index(i)][(int) inst.valueSparse(i)][(int) inst.classValue()] += inst.weight(); counts[inst.index(i)][0][(int) inst.classValue()] -= inst.weight(); } } } } // distribute missing counts if required if (m_missing_merge) { for (int k = 0; k < data.numAttributes(); k++) { if (k != classIndex) { int numValues = data.attribute(k).numValues(); // Compute marginals double[] rowSums = new double[numValues]; double[] columnSums = new double[numClasses]; double sum = 0; for (int i = 0; i < numValues; i++) { for (int j = 0; j < numClasses; j++) { rowSums[i] += counts[k][i][j]; columnSums[j] += counts[k][i][j]; } sum += rowSums[i]; } if (Utils.gr(sum, 0)) { double[][] additions = new double[numValues][numClasses]; // Compute what needs to be added to each row for (int i = 0; i < numValues; i++) { for (int j = 0; j < numClasses; j++) { additions[i][j] = (rowSums[i] / sum) * counts[k][numValues][j]; } } // Compute what needs to be added to each column for (int i = 0; i < numClasses; i++) { for (int j = 0; j < numValues; j++) { additions[j][i] += (columnSums[i] / sum) * counts[k][j][numClasses]; } } // Compute what needs to be added to each cell for (int i = 0; i < numClasses; i++) { for (int j = 0; j < numValues; j++) { additions[j][i] += (counts[k][j][i] / sum) * counts[k][numValues][numClasses]; } } // Make new contingency table double[][] newTable = new double[numValues][numClasses]; for (int i = 0; i < numValues; i++) { for (int j = 0; j < numClasses; j++) { newTable[i][j] = counts[k][i][j] + additions[i][j]; } } counts[k] = newTable; } } } } // Compute info gains m_InfoGains = new double[data.numAttributes()]; m_InfoGains[att] = (ContingencyTables.entropyOverColumns(counts[att]) - ContingencyTables.entropyConditionedOnRows(counts[att])); return m_InfoGains[att]; }
From source file:feature.InfoGainEval.java
License:Open Source License
public void buildEvaluator(Instances data) throws Exception { // can evaluator handle data? getCapabilities().testWithFail(data); int classIndex = data.classIndex(); int numInstances = data.numInstances(); if (!m_Binarize) { Discretize disTransform = new Discretize(); disTransform.setUseBetterEncoding(true); disTransform.setInputFormat(data); data = Filter.useFilter(data, disTransform); } else {//from www .j a v a 2s. c o m NumericToBinary binTransform = new NumericToBinary(); binTransform.setInputFormat(data); data = Filter.useFilter(data, binTransform); } int numClasses = data.attribute(classIndex).numValues(); // Reserve space and initialize counters double[][][] counts = new double[data.numAttributes()][][]; for (int k = 0; k < data.numAttributes(); k++) { if (k != classIndex) { int numValues = data.attribute(k).numValues(); counts[k] = new double[numValues + 1][numClasses + 1]; } } // Initialize counters double[] temp = new double[numClasses + 1]; for (int k = 0; k < numInstances; k++) { Instance inst = data.instance(k); if (inst.classIsMissing()) { temp[numClasses] += inst.weight(); } else { temp[(int) inst.classValue()] += inst.weight(); } } for (int k = 0; k < counts.length; k++) { if (k != classIndex) { for (int i = 0; i < temp.length; i++) { counts[k][0][i] = temp[i]; } } } // Get counts for (int k = 0; k < numInstances; k++) { Instance inst = data.instance(k); for (int i = 0; i < inst.numValues(); i++) { if (inst.index(i) != classIndex) { if (inst.isMissingSparse(i) || inst.classIsMissing()) { if (!inst.isMissingSparse(i)) { counts[inst.index(i)][(int) inst.valueSparse(i)][numClasses] += inst.weight(); counts[inst.index(i)][0][numClasses] -= inst.weight(); } else if (!inst.classIsMissing()) { counts[inst.index(i)][data.attribute(inst.index(i)).numValues()][(int) inst .classValue()] += inst.weight(); counts[inst.index(i)][0][(int) inst.classValue()] -= inst.weight(); } else { counts[inst.index(i)][data.attribute(inst.index(i)).numValues()][numClasses] += inst .weight(); counts[inst.index(i)][0][numClasses] -= inst.weight(); } } else { counts[inst.index(i)][(int) inst.valueSparse(i)][(int) inst.classValue()] += inst.weight(); counts[inst.index(i)][0][(int) inst.classValue()] -= inst.weight(); } } } } // distribute missing counts if required if (m_missing_merge) { for (int k = 0; k < data.numAttributes(); k++) { if (k != classIndex) { int numValues = data.attribute(k).numValues(); // Compute marginals double[] rowSums = new double[numValues]; double[] columnSums = new double[numClasses]; double sum = 0; for (int i = 0; i < numValues; i++) { for (int j = 0; j < numClasses; j++) { rowSums[i] += counts[k][i][j]; columnSums[j] += counts[k][i][j]; } sum += rowSums[i]; } if (Utils.gr(sum, 0)) { double[][] additions = new double[numValues][numClasses]; // Compute what needs to be added to each row for (int i = 0; i < numValues; i++) { for (int j = 0; j < numClasses; j++) { additions[i][j] = (rowSums[i] / sum) * counts[k][numValues][j]; } } // Compute what needs to be added to each column for (int i = 0; i < numClasses; i++) { for (int j = 0; j < numValues; j++) { additions[j][i] += (columnSums[i] / sum) * counts[k][j][numClasses]; } } // Compute what needs to be added to each cell for (int i = 0; i < numClasses; i++) { for (int j = 0; j < numValues; j++) { additions[j][i] += (counts[k][j][i] / sum) * counts[k][numValues][numClasses]; } } // Make new contingency table double[][] newTable = new double[numValues][numClasses]; for (int i = 0; i < numValues; i++) { for (int j = 0; j < numClasses; j++) { newTable[i][j] = counts[k][i][j] + additions[i][j]; } } counts[k] = newTable; } } } } // Compute info gains m_InfoGains = new double[data.numAttributes()]; for (int i = 0; i < data.numAttributes(); i++) { if (i != classIndex) { m_InfoGains[i] = (ContingencyTables.entropyOverColumns(counts[i]) - ContingencyTables.entropyConditionedOnRows(counts[i])); } } }
From source file:feature_construction.GeneticProgramming.java
License:LGPL
public static double[][] convertInstancesToInputFeaturesArray(String fileName) { // Create instances (file that contains the inputs to feed through the program) double[][] inputFeatures; try {//from ww w . ja v a2 s .c om //load CSV CSVLoader loaderInputs = new CSVLoader(); loaderInputs.setSource(new File(fileName)); Instances inputSet = loaderInputs.getDataSet(); inputSet.setClassIndex(inputSet.numAttributes() - 1); inputFeatures = new double[inputSet.numInstances()][inputSet.numAttributes()]; // Convert instances to double[][] for (int i = 0; i < inputSet.numInstances(); i++) { for (int j = 0; j < inputSet.numAttributes(); j++) { inputFeatures[i][j] = inputSet.get(i).value(j); } } return inputFeatures; } catch (Exception e) { e.printStackTrace(); } return null; }