List of usage examples for weka.core Instances setClassIndex
public void setClassIndex(int classIndex)
From source file:id3j48.WekaAccess.java
public static Instances readCsv(String filename) throws Exception { CSVLoader csvLoader = new CSVLoader(); csvLoader.setSource(new File(datasetFolder + File.separator + filename)); Instances data = csvLoader.getDataSet(); if (data.classIndex() == -1) { data.setClassIndex(data.numAttributes() - 1); }//from w ww . j ava 2 s. c o m return data; }
From source file:id3j48.WekaAccess.java
public static void classify(String filename, Classifier classifier) throws Exception { Instances input = readArff(filename); input.setClassIndex(input.numAttributes() - 1); for (int i = 0; i < input.numInstances(); i++) { double classLabel = classifier.classifyInstance(input.instance(i)); input.instance(i).setClassValue(classLabel); System.out.println("Instance: " + input.instance(i)); System.out.println("Class: " + input.classAttribute().value((int) classLabel)); }//from www . j a va2 s .c o m try (BufferedWriter writer = new BufferedWriter( new FileWriter(classifiedFolder + File.separator + filename))) { writer.write(input.toString()); writer.newLine(); writer.flush(); } }
From source file:irisdata.IrisData.java
/** * @param args the command line arguments * @throws java.lang.Exception //from ww w. ja v a 2s .co m */ public static void main(String[] args) throws Exception { String file = "/Users/paul/Desktop/BYU-Idaho/Spring2015/CS450/iris.csv"; DataSource source = new DataSource(file); Instances data = source.getDataSet(); if (data.classIndex() == -1) { data.setClassIndex(data.numAttributes() - 1); } data.randomize(new Random(1)); // set training set to 70% RemovePercentage remove = new RemovePercentage(); remove.setPercentage(30); remove.setInputFormat(data); Instances trainingSet = Filter.useFilter(data, remove); // set the rest for the testing set remove.setInvertSelection(true); Instances testSet = Filter.useFilter(data, remove); // train classifier - kind of HardCodedClassifier classifier = new HardCodedClassifier(); classifier.buildClassifier(trainingSet); // this does nothing right now // Evaluate classifier Evaluation eval = new Evaluation(trainingSet); eval.evaluateModel(classifier, testSet); //eval.crossValidateModel(classifier, data, 10, new Random(1)); // Print some statistics System.out.println("Results: " + eval.toSummaryString()); }
From source file:irisdriver.IrisDriver.java
/** * @param args the command line arguments */// w w w . j av a 2s .c o m public static void main(String[] args) { //As an example of arguments: sepallength=5.1 sepalwidth=3.5 petallength=1.4 petalwidth=0.2 try { Hashtable<String, String> values = new Hashtable<String, String>(); /*Iris irisModel = new Iris(); for(int i = 0; i < args.length; i++) { String[] tokens = args[i].split("="); values.put(tokens[0], tokens[1]); } System.out.println("Classification: " + irisModel.classifySpecies(values));*/ //Loading the model String pathModel = ""; String pathTestSet = ""; JFileChooser chooserModel = new JFileChooser(); chooserModel.setCurrentDirectory(new java.io.File(".")); chooserModel.setDialogTitle("Choose the model"); chooserModel.setFileSelectionMode(JFileChooser.FILES_AND_DIRECTORIES); chooserModel.setAcceptAllFileFilterUsed(true); if (chooserModel.showOpenDialog(null) == JFileChooser.APPROVE_OPTION) { File filePathModel = chooserModel.getSelectedFile(); pathModel = filePathModel.getPath(); Iris irisModel = new Iris(pathModel); //Loading the model JFileChooser chooserTestSet = new JFileChooser(); chooserTestSet.setDialogTitle("Choose TEST SET"); chooserTestSet.setFileSelectionMode(JFileChooser.FILES_AND_DIRECTORIES); chooserTestSet.setAcceptAllFileFilterUsed(true); //Loading the testing dataset if (chooserTestSet.showOpenDialog(null) == JFileChooser.APPROVE_OPTION) { File filePathTestSet = chooserTestSet.getSelectedFile(); pathTestSet = filePathTestSet.getPath(); //WRITTING THE OUTPUT: BufferedWriter writer = new BufferedWriter(new FileWriter("D:\\output_file.txt")); //Transforming the data set into pairs attribute-value ConverterUtils.DataSource unlabeledSource = new ConverterUtils.DataSource(pathTestSet); Instances unlabeledData = unlabeledSource.getDataSet(); if (unlabeledData.classIndex() == -1) { unlabeledData.setClassIndex(unlabeledData.numAttributes() - 1); } for (int i = 0; i < unlabeledData.numInstances(); i++) { Instance ins = unlabeledData.instance(i); //ins.numAttributes()-1 --> not to include the label for (int j = 0; j < ins.numAttributes() - 1; j++) { String attrib = ins.attribute(j).name(); double val = ins.value(ins.attribute(j)); values.put(attrib, String.valueOf(val)); } String predictedLabel = irisModel.classifySpecies(values); System.out.println("Classification: " + predictedLabel); values.clear(); //Writting the results in a txt writer.write("The label is: " + predictedLabel); //writer.newLine(); //writers.write("The error rate of the prediction is : " + eval.errorRate()); //writer.newLine(); } writer.flush(); writer.close(); } } } catch (Exception ex) { Logger.getLogger(IrisDriver.class.getName()).log(Level.SEVERE, null, ex); } }
From source file:it.unisa.gitdm.evaluation.WekaEvaluator.java
public WekaEvaluator(String baseFolderPath, String projectName, Classifier classifier, String classifierName, String modelName) {//from ww w .j av a 2 s.c o m // READ FILE /*CODICE VECCHIO try { BufferedReader reader = new BufferedReader(new FileReader(filePath)); Instances data = new Instances(reader); data.setClassIndex(data.numAttributes() - 1); System.out.println(data.size()); // dividere istance in train e test Instances train = data; Instances test = null; // EVALUATION Evaluation eval = new Evaluation(train); //eval.evaluateModel(j48, test); // CROSS-VALIDATION eval.crossValidateModel(classifier, train, 10, new Random(1)); System.out.println(eval.toSummaryString()); System.out.println(eval.toMatrixString()); } catch (Exception ex) { Logger.getLogger(WekaEvaluator.class.getName()).log(Level.SEVERE, null, ex); } CODICE VECCHIO*/ String filePath = baseFolderPath + projectName + "/predictors.csv"; try { DataSource source = new DataSource(filePath); Instances instances = source.getDataSet(); instances.setClassIndex(instances.numAttributes() - 1); System.out.println("Numero istanze: " + instances.size()); evaluateModel(baseFolderPath, projectName, classifier, instances, modelName, classifierName); } catch (Exception ex) { Logger.getLogger(WekaEvaluator.class.getName()).log(Level.SEVERE, null, ex); } }
From source file:it.unisa.gitdm.evaluation.WekaEvaluator.java
private static void evaluateModel(String baseFolderPath, String projectName, Classifier pClassifier, Instances pInstances, String pModelName, String pClassifierName) throws Exception { // other options int folds = 10; // randomize data Random rand = new Random(42); Instances randData = new Instances(pInstances); randData.randomize(rand);/*w ww . ja v a 2 s . co m*/ if (randData.classAttribute().isNominal()) { randData.stratify(folds); } // perform cross-validation and add predictions Instances predictedData = null; Evaluation eval = new Evaluation(randData); int positiveValueIndexOfClassFeature = 0; for (int n = 0; n < folds; n++) { Instances train = randData.trainCV(folds, n); Instances test = randData.testCV(folds, n); // the above code is used by the StratifiedRemoveFolds filter, the // code below by the Explorer/Experimenter: // Instances train = randData.trainCV(folds, n, rand); int classFeatureIndex = 0; for (int i = 0; i < train.numAttributes(); i++) { if (train.attribute(i).name().equals("isBuggy")) { classFeatureIndex = i; break; } } Attribute classFeature = train.attribute(classFeatureIndex); for (int i = 0; i < classFeature.numValues(); i++) { if (classFeature.value(i).equals("TRUE")) { positiveValueIndexOfClassFeature = i; } } train.setClassIndex(classFeatureIndex); test.setClassIndex(classFeatureIndex); // build and evaluate classifier pClassifier.buildClassifier(train); eval.evaluateModel(pClassifier, test); // add predictions // AddClassification filter = new AddClassification(); // filter.setClassifier(pClassifier); // filter.setOutputClassification(true); // filter.setOutputDistribution(true); // filter.setOutputErrorFlag(true); // filter.setInputFormat(train); // Filter.useFilter(train, filter); // Instances pred = Filter.useFilter(test, filter); // if (predictedData == null) // predictedData = new Instances(pred, 0); // // for (int j = 0; j < pred.numInstances(); j++) // predictedData.add(pred.instance(j)); } double accuracy = (eval.numTruePositives(positiveValueIndexOfClassFeature) + eval.numTrueNegatives(positiveValueIndexOfClassFeature)) / (eval.numTruePositives(positiveValueIndexOfClassFeature) + eval.numFalsePositives(positiveValueIndexOfClassFeature) + eval.numFalseNegatives(positiveValueIndexOfClassFeature) + eval.numTrueNegatives(positiveValueIndexOfClassFeature)); double fmeasure = 2 * ((eval.precision(positiveValueIndexOfClassFeature) * eval.recall(positiveValueIndexOfClassFeature)) / (eval.precision(positiveValueIndexOfClassFeature) + eval.recall(positiveValueIndexOfClassFeature))); File wekaOutput = new File(baseFolderPath + projectName + "/predictors.csv"); PrintWriter pw1 = new PrintWriter(wekaOutput); pw1.write(accuracy + ";" + eval.precision(positiveValueIndexOfClassFeature) + ";" + eval.recall(positiveValueIndexOfClassFeature) + ";" + fmeasure + ";" + eval.areaUnderROC(positiveValueIndexOfClassFeature)); System.out.println(projectName + ";" + pClassifierName + ";" + pModelName + ";" + eval.numTruePositives(positiveValueIndexOfClassFeature) + ";" + eval.numFalsePositives(positiveValueIndexOfClassFeature) + ";" + eval.numFalseNegatives(positiveValueIndexOfClassFeature) + ";" + eval.numTrueNegatives(positiveValueIndexOfClassFeature) + ";" + accuracy + ";" + eval.precision(positiveValueIndexOfClassFeature) + ";" + eval.recall(positiveValueIndexOfClassFeature) + ";" + fmeasure + ";" + eval.areaUnderROC(positiveValueIndexOfClassFeature) + "\n"); }
From source file:jjj.asap.sas.ensemble.impl.CrossValidatedEnsemble.java
License:Open Source License
@Override public StrongLearner build(int essaySet, String ensembleName, List<WeakLearner> learners) { // can't handle empty case if (learners.isEmpty()) { return this.ensemble.build(essaySet, ensembleName, learners); }/*from ww w . j a v a 2 s .c om*/ // create a dummy dataset. DatasetBuilder builder = new DatasetBuilder(); builder.addVariable("id"); builder.addNominalVariable("class", Contest.getRubrics(essaySet)); Instances dummy = builder.getDataset("dummy"); // add data Map<Double, Double> groundTruth = Contest.getGoldStandard(essaySet); for (double id : learners.get(0).getPreds().keySet()) { dummy.add(new DenseInstance(1.0, new double[] { id, groundTruth.get(id) })); } // stratify dummy.sort(0); dummy.randomize(new Random(1)); dummy.setClassIndex(1); dummy.stratify(nFolds); // now evaluate each fold Map<Double, Double> preds = new HashMap<Double, Double>(); for (int k = 0; k < nFolds; k++) { Instances train = dummy.trainCV(nFolds, k); Instances test = dummy.testCV(nFolds, k); List<WeakLearner> cvLeaners = new ArrayList<WeakLearner>(); for (WeakLearner learner : learners) { WeakLearner copy = learner.copyOf(); for (int i = 0; i < test.numInstances(); i++) { copy.getPreds().remove(test.instance(i).value(0)); copy.getProbs().remove(test.instance(i).value(0)); } cvLeaners.add(copy); } // train on fold StrongLearner cv = this.ensemble.build(essaySet, ensembleName, cvLeaners); List<WeakLearner> testLeaners = new ArrayList<WeakLearner>(); for (WeakLearner learner : cv.getLearners()) { WeakLearner copy = learner.copyOf(); copy.getPreds().clear(); copy.getProbs().clear(); WeakLearner source = find(copy.getName(), learners); for (int i = 0; i < test.numInstances(); i++) { double id = test.instance(i).value(0); copy.getPreds().put(id, source.getPreds().get(id)); copy.getProbs().put(id, source.getProbs().get(id)); } testLeaners.add(copy); } preds.putAll(this.ensemble.classify(essaySet, ensembleName, testLeaners, cv.getContext())); } // now prepare final result StrongLearner strong = this.ensemble.build(essaySet, ensembleName, learners); double trainingError = strong.getKappa(); double cvError = Calc.kappa(essaySet, preds, groundTruth); // Job.log(essaySet+"-"+ensembleName, "XVAL: training error = " + trainingError + " cv error = " + cvError); strong.setKappa(cvError); return strong; }
From source file:jmetal.test.survivalanalysis.GenerateSurvivalGraph.java
License:Open Source License
/** * Evaluates a solution // www.j a v a 2 s. c om * @param solution The solution to evaluate */ public void evaluate(Solution solution) { Binary variable; int counterSelectedFeatures; DataSource source; double testStatistic = Double.MAX_VALUE; double pValue = Double.MAX_VALUE; double ArithmeticHarmonicCutScore = Double.MAX_VALUE; //double statScore; REXP x; variable = ((Binary) solution.getDecisionVariables()[0]); counterSelectedFeatures = 0; try { // read the data file source = new DataSource(this.dataFileName); Instances data = source.getDataSet(); //System.out.print("Data read successfully. "); //System.out.print("Number of attributes: " + data.numAttributes()); //System.out.println(". Number of instances: " + data.numInstances()); // save the attribute 'T' and 'Censor' attTime = data.attribute(data.numAttributes() - 2); attCensor = data.attribute(data.numAttributes() - 1); // First filter the attributes based on chromosome Instances tmpData = this.filterByChromosome(data, solution); // Now filter the attribute 'T' and 'Censor' Remove filter = new Remove(); // remove the two last attributes : 'T' and 'Censor' filter.setAttributeIndices("" + (tmpData.numAttributes() - 1) + "," + tmpData.numAttributes()); //System.out.println("After chromosome filtering no of attributes: " + tmpData.numAttributes()); filter.setInputFormat(tmpData); Instances dataClusterer = Filter.useFilter(tmpData, filter); // filtering complete // List the selected features/attributes Enumeration<Attribute> attributeList = dataClusterer.enumerateAttributes(); System.out.println("Selected attributes/features: "); while (attributeList.hasMoreElements()) { Attribute att = attributeList.nextElement(); System.out.print(att.name() + ","); } System.out.println(); /* // debug: write the filtered dataset ArffSaver saver = new ArffSaver(); saver.setInstances(dataClusterer); saver.setFile(new File("filteered-data.arff")); saver.writeBatch(); // end debug */ // train hierarchical clusterer HierarchicalClusterer clusterer = new HierarchicalClusterer(); clusterer.setOptions(new String[] { "-L", this.HC_LinkType }); //Link type (Single, Complete, Average, Mean, Centroid, Ward, Adjusted complete, Neighbor Joining) //[SINGLE|COMPLETE|AVERAGE|MEAN|CENTROID|WARD|ADJCOMPLETE|NEIGHBOR_JOINING] //clusterer.setDebug(true); clusterer.setNumClusters(2); clusterer.setDistanceFunction(new EuclideanDistance()); clusterer.setDistanceIsBranchLength(false); // ?? Should it be changed to false? (Noman) clusterer.buildClusterer(dataClusterer); double[][] distanceMatrix = clusterer.getDistanceMatrix(); // Cluster evaluation: ClusterEvaluation eval = new ClusterEvaluation(); eval.setClusterer(clusterer); if (this.testDataFileName != null) { DataSource testSource = new DataSource(this.testDataFileName); Instances tmpTestData = testSource.getDataSet(); tmpTestData.setClassIndex(tmpTestData.numAttributes() - 1); //testSource. // First filter the attributes based on chromosome Instances testData = this.filterByChromosome(tmpTestData, solution); //String[] options = new String[2]; //options[0] = "-t"; //options[1] = "/some/where/somefile.arff"; //eval. //System.out.println(eval.evaluateClusterer(testData, options)); eval.evaluateClusterer(testData); System.out.println("\nCluster evluation for this solution(" + this.testDataFileName + "): " + eval.clusterResultsToString()); } // First analyze using my library function // save the cluster assignments int[] clusterAssignment = new int[dataClusterer.numInstances()]; int classOneCnt = 0; int classTwoCnt = 0; for (int i = 0; i < dataClusterer.numInstances(); ++i) { clusterAssignment[i] = clusterer.clusterInstance(dataClusterer.get(i)); if (clusterAssignment[i] == 0) { ++classOneCnt; } else if (clusterAssignment[i] == 1) { ++classTwoCnt; } //System.out.println("Instance " + i + ": " + clusterAssignment[i]); } System.out.println("Class 1 cnt: " + classOneCnt + " Class 2 cnt: " + classTwoCnt); // create arrays with time (event occurrence time) and censor data for use with jstat LogRankTest double[] time1 = new double[classOneCnt]; double[] censor1 = new double[classOneCnt]; double[] time2 = new double[classTwoCnt]; double[] censor2 = new double[classTwoCnt]; //data = source.getDataSet(); for (int i = 0, cnt1 = 0, cnt2 = 0; i < dataClusterer.numInstances(); ++i) { //clusterAssignment[i] = clusterer.clusterInstance(dataClusterer.get(i)); if (clusterAssignment[i] == 0) { time1[cnt1] = data.get(i).value(attTime); censor1[cnt1++] = data.get(i).value(attCensor); //System.out.println("i: " + i + " T: " + time1[cnt1-1]); } else if (clusterAssignment[i] == 1) { time2[cnt2] = data.get(i).value(attTime); //System.out.println("i: " + i + " T: " + time2[cnt2-1]); censor2[cnt2++] = data.get(i).value(attCensor); ; } //System.out.println("Instance " + i + ": " + clusterAssignment[i]); } //Instances[] classInstances = separateClassInstances(clusterAssignment, this.dataFileName,solution); //System.out.println("Class instances seperated"); // calculate log rank test and p values LogRankTest testclass1 = new LogRankTest(time1, time2, censor1, censor2); double[] scores = testclass1.logRank(); testStatistic = scores[0]; pValue = scores[2]; ArithmeticHarmonicCutScore = this.getArithmeticHarmonicCutScore(distanceMatrix, clusterAssignment); //debug: System.out.println("Calculation by myLibrary:\n testStatistic: " + scores[0] + " pValue: " + scores[2] + " Arithmetic Harmonic Cut Score: " + ArithmeticHarmonicCutScore); //end debug //WilcoxonTest testclass1 = new WilcoxonTest(time1, censor1, time2, censor2); //testStatistic = testclass1.testStatistic; //pValue = testclass1.pValue;true // Now analyze calling R for Log Rank test, Parallelization not possible String strT = "time <- c("; String strC = "censor <- c("; String strG = "group <- c("; for (int i = 0; i < dataClusterer.numInstances() - 1; ++i) { strT = strT + (int) data.get(i).value(attTime) + ","; strG = strG + clusterer.clusterInstance(dataClusterer.get(i)) + ","; strC = strC + (int) data.get(i).value(attCensor) + ","; } int tmpi = dataClusterer.numInstances() - 1; strT = strT + (int) data.get(tmpi).value(attTime) + ")"; strG = strG + clusterer.clusterInstance(dataClusterer.get(tmpi)) + ")"; strC = strC + (int) data.get(tmpi).value(attCensor) + ")"; this.re.eval(strT); this.re.eval(strC); this.re.eval(strG); //debug //System.out.println(strT); //System.out.println(strC); //System.out.println(strG); //end debug /** If you are calling surv_test from coin library */ /*v re.eval("library(coin)"); re.eval("grp <- factor (group)"); re.eval("result <- surv_test(Surv(time,censor)~grp,distribution=\"exact\")"); x=re.eval("statistic(result)"); testStatistic = x.asDouble(); //x=re.eval("pvalue(result)"); //pValue = x.asDouble(); //System.out.println("StatScore: " + statScore + "pValue: " + pValue); */ /** If you are calling survdiff from survival library (much faster) */ re.eval("library(survival)"); re.eval("res2 <- survdiff(Surv(time,censor)~group,rho=0)"); x = re.eval("res2$chisq"); testStatistic = x.asDouble(); //System.out.println(x); x = re.eval("pchisq(res2$chisq, df=1, lower.tail = FALSE)"); //x = re.eval("1.0 - pchisq(res2$chisq, df=1)"); pValue = x.asDouble(); //debug: //System.out.println("Calculation by R: StatScore: " + testStatistic + "pValue: " + pValue); //end debug System.out.println("Calculation by R:"); System.out.println("StatScore: " + testStatistic + " pValue: " + pValue); re.eval("timestrata1.surv <- survfit( Surv(time, censor)~ strata(group), conf.type=\"log-log\")"); re.eval("timestrata1.surv1 <- survfit( Surv(time, censor)~ 1, conf.type=\"none\")"); String evalStr = "jpeg('SurvivalPlot-" + this.SolutionID + ".jpg')"; re.eval(evalStr); re.eval("plot(timestrata1.surv, col=c(2,3), xlab=\"Time\", ylab=\"Survival Probability\")"); re.eval("par(new=T)"); re.eval("plot(timestrata1.surv1,col=1)"); re.eval("legend(0.2, c(\"Group1\",\"Group2\",\"Whole\"))"); re.eval("dev.off()"); System.out.println("\nCluster Assignments:"); for (int i = 0; i < dataClusterer.numInstances(); ++i) { System.out.println("Instance " + i + ": " + clusterAssignment[i]); } } catch (Exception e) { // TODO Auto-generated catch block System.err.println("Can't open the data file."); e.printStackTrace(); System.exit(1); } }
From source file:jmetal.test.survivalanalysis.GenerateSurvivalGraphOld.java
License:Open Source License
/** * Evaluates a solution - actually generate the survival graph * @param solution The solution to evaluate *//* w w w . ja va 2s . c o m*/ public void evaluate(Solution solution) { Binary variable; int counterSelectedFeatures; DataSource source; double testStatistic = Double.MAX_VALUE; double pValue = Double.MAX_VALUE; //double statScore; REXP x; variable = ((Binary) solution.getDecisionVariables()[0]); counterSelectedFeatures = 0; System.out.println("\nSolution ID " + this.SolutionID); try { // read the data file source = new DataSource(this.dataFileName); Instances data = source.getDataSet(); //System.out.print("Data read successfully. "); //System.out.print("Number of attributes: " + data.numAttributes()); //System.out.println(". Number of instances: " + data.numInstances()); // save the attribute 'T' and 'Censor' attTime = data.attribute(data.numAttributes() - 2); attCensor = data.attribute(data.numAttributes() - 1); // First filter the attributes based on chromosome Instances tmpData = this.filterByChromosome(data, solution); // Now filter the attribute 'T' and 'Censor' Remove filter = new Remove(); // remove the two last attributes : 'T' and 'Censor' filter.setAttributeIndices("" + (tmpData.numAttributes() - 1) + "," + tmpData.numAttributes()); //System.out.println("After chromosome filtering no of attributes: " + tmpData.numAttributes()); filter.setInputFormat(tmpData); Instances dataClusterer = Filter.useFilter(tmpData, filter); Enumeration<Attribute> attributeList = dataClusterer.enumerateAttributes(); System.out.println("Selected attributes: "); while (attributeList.hasMoreElements()) { Attribute att = attributeList.nextElement(); System.out.print(att.name() + ","); } System.out.println(); // filtering complete // Debug: write the filtered dataset /* ArffSaver saver = new ArffSaver(); saver.setInstances(dataClusterer); saver.setFile(new File("filteered-data.arff")); saver.writeBatch(); */ // train hierarchical clusterer HierarchicalClusterer clusterer = new HierarchicalClusterer(); clusterer.setOptions(new String[] { "-L", "COMPLETE" }); // complete linkage clustering //clusterer.setDebug(true); clusterer.setNumClusters(2); clusterer.setDistanceFunction(new EuclideanDistance()); //clusterer.setDistanceFunction(new ChebyshevDistance()); clusterer.setDistanceIsBranchLength(false); clusterer.buildClusterer(dataClusterer); // Cluster evaluation: ClusterEvaluation eval = new ClusterEvaluation(); eval.setClusterer(clusterer); if (this.testDataFileName != null) { DataSource testSource = new DataSource(this.testDataFileName); Instances tmpTestData = testSource.getDataSet(); tmpTestData.setClassIndex(tmpTestData.numAttributes() - 1); //testSource. // First filter the attributes based on chromosome Instances testData = this.filterByChromosome(tmpTestData, solution); //String[] options = new String[2]; //options[0] = "-t"; //options[1] = "/some/where/somefile.arff"; //eval. //System.out.println(eval.evaluateClusterer(testData, options)); eval.evaluateClusterer(testData); System.out.println("\nCluster evluation for this solution: " + eval.clusterResultsToString()); } // Print the cluster assignments: // save the cluster assignments //if (printClusterAssignment==true){ int[] clusterAssignment = new int[dataClusterer.numInstances()]; int classOneCnt = 0; int classTwoCnt = 0; for (int i = 0; i < dataClusterer.numInstances(); ++i) { clusterAssignment[i] = clusterer.clusterInstance(dataClusterer.get(i)); if (clusterAssignment[i] == 0) { ++classOneCnt; } else if (clusterAssignment[i] == 1) { ++classTwoCnt; } //System.out.println("Instance " + i + ": " + clusterAssignment[i]); } System.out.println("Class 1 cnt: " + classOneCnt + " Class 2 cnt: " + classTwoCnt); //} /* // create arrays with time (event occurrence time) and censor data for use with jstat LogRankTest double[] time1 = new double[classOneCnt]; double[] censor1 = new double[classOneCnt]; double[] time2 = new double[classTwoCnt]; double[] censor2 = new double[classTwoCnt]; //data = source.getDataSet(); for (int i=0, cnt1=0, cnt2=0; i<dataClusterer.numInstances(); ++i){ clusterAssignment[i] = clusterer.clusterInstance(dataClusterer.get(i)); if (clusterAssignment[i]==0){ time1[cnt1] = data.get(i).value(attTime); censor1[cnt1++] = 1; //System.out.println("i: " + i + " T: " + time1[cnt1-1]); } else if (clusterAssignment[i]==1){ time2[cnt2] = data.get(i).value(attTime); //System.out.println("i: " + i + " T: " + time2[cnt2-1]); censor2[cnt2++] = 1; } //System.out.println("Instance " + i + ": " + clusterAssignment[i]); } //Instances[] classInstances = separateClassInstances(clusterAssignment, this.dataFileName,solution); //System.out.println("Class instances seperated"); // calculate log rank test and p values //LogRankTest testclass1 = new LogRankTest(time1, censor1, time2, censor2); //testStatistic = testclass1.testStatistic; //pValue = testclass1.pValue; WilcoxonTest testclass1 = new WilcoxonTest(time1, censor1, time2, censor2); testStatistic = testclass1.testStatistic; pValue = testclass1.pValue;true */ String strT = "time1 <- c("; String strC = "censor1 <- c("; String strG = "group1 <- c("; for (int i = 0; i < dataClusterer.numInstances() - 1; ++i) { strT = strT + (int) data.get(i).value(attTime) + ","; strG = strG + clusterer.clusterInstance(dataClusterer.get(i)) + ","; strC = strC + (int) data.get(i).value(attCensor) + ","; } int tmpi = dataClusterer.numInstances() - 1; strT = strT + (int) data.get(tmpi).value(attTime) + ")"; strG = strG + clusterer.clusterInstance(dataClusterer.get(tmpi)) + ")"; strC = strC + (int) data.get(tmpi).value(attCensor) + ")"; this.re.eval(strT); this.re.eval(strC); this.re.eval(strG); // for MyLogRankTest double[] time1 = new double[classOneCnt]; double[] time2 = new double[classTwoCnt]; double[] censor1 = new double[classOneCnt]; double[] censor2 = new double[classTwoCnt]; int i1 = 0, i2 = 0; for (int i = 0; i < dataClusterer.numInstances(); ++i) { strT = strT + (int) data.get(i).value(attTime) + ","; strG = strG + clusterer.clusterInstance(dataClusterer.get(i)) + ","; strC = strC + (int) data.get(i).value(attCensor) + ","; if (clusterer.clusterInstance(dataClusterer.get(i)) == 0) { time1[i1] = data.get(i).value(attTime); censor1[i1] = data.get(i).value(attCensor); ++i1; } else { time2[i2] = data.get(i).value(attTime); censor2[i2] = data.get(i).value(attCensor); ++i2; } } /** If you are calling surv_test from coin library */ /*v re.eval("library(coin)"); re.eval("grp <- factor (group)"); re.eval("result <- surv_test(Surv(time,censor)~grp,distribution=\"exact\")"); x=re.eval("statistic(result)"); testStatistic = x.asDouble(); //x=re.eval("pvalue(result)"); //pValue = x.asDouble(); //System.out.println("StatScore: " + statScore + "pValue: " + pValue); */ /** If you are calling survdiff from survival library (much faster) */ re.eval("library(survival)"); re.eval("res21 <- survdiff(Surv(time1,censor1)~group1,rho=0)"); x = re.eval("res21$chisq"); testStatistic = x.asDouble(); //System.out.println(x); x = re.eval("pchisq(res21$chisq, df=1, lower.tail = FALSE)"); //x = re.eval("1.0 - pchisq(res2$chisq, df=1)"); pValue = x.asDouble(); System.out.println("Results from R:"); System.out.println("StatScore: " + testStatistic + " pValue: " + pValue); re.eval("timestrata1.surv <- survfit( Surv(time1, censor1)~ strata(group1), conf.type=\"log-log\")"); re.eval("timestrata1.surv1 <- survfit( Surv(time1, censor1)~ 1, conf.type=\"none\")"); String evalStr = "jpeg('SurvivalPlot-" + this.SolutionID + ".jpg')"; re.eval(evalStr); re.eval("plot(timestrata1.surv, col=c(2,3), xlab=\"Time\", ylab=\"Survival Probability\")"); re.eval("par(new=T)"); re.eval("plot(timestrata1.surv1,col=1)"); re.eval("legend(0.2, c(\"Group1\",\"Group2\",\"Whole\"))"); re.eval("dev.off()"); System.out.println("Results from my code: "); LogRankTest lrt = new LogRankTest(time1, time2, censor1, censor2); double[] results = lrt.logRank(); System.out.println("Statistics: " + results[0] + " variance: " + results[1] + " pValue: " + results[2]); } catch (Exception e) { // TODO Auto-generated catch block System.err.println("Can't open the data file."); e.printStackTrace(); System.exit(1); } /********** * Current Implementation considers two objectives * 1. pvalue to be minimized / statistical score to be maximized * 2. Number of Features to be maximized/minimized */ }
From source file:jwebminer2.FeatureValueFileSaver.java
/** * Save the given text to the given location in the given format or * save the stored feature values, depending on the chosen_file_extension. * A progress bar is displayed (although not incremented). * * @param chosen_file_extension The file extension (corresponding to one * of the extensions published by the * getFileFormatExtension method) to use when * saving data_to_save, and the corresponding * file format. * @param data_to_save The HTML code displayed on-screen. May be * null for non-HTML saving. * @param save_location The file to save data_to_save to. * @throws Exception Throws an Exception if the file cannot be * saved.//from w ww . j a v a 2s.c o m */ public void saveContents(String chosen_file_extension, String data_to_save, File save_location) throws Exception { // Prepare the progress bar SimpleProgressBarDialog progress_bar = new SimpleProgressBarDialog(1, results_panel); // Write the whole contents of data_to_save verbatim as an HTML file // if an HTML file is to be saved if (chosen_file_extension.equals("HTML")) { DataOutputStream writer = mckay.utilities.staticlibraries.FileMethods .getDataOutputStream(save_location); writer.writeBytes(data_to_save); writer.close(); } // Only save the table of final feature values itself if a non-HTML // file format is to be saved else { // Access information to store double[][] feature_table = results_panel.feature_values; String[] column_labels = results_panel.column_labels; String[] row_labels = results_panel.row_labels; String[] orig_column_labels = column_labels; if (AnalysisProcessor.lastfm_enabled && AnalysisProcessor.is_cross_tabulation && (AnalysisProcessor.yahoo_application_id != null || AnalysisProcessor.google_license_key != null)) { String[] column_labels_lastfm_websearch = new String[2 * column_labels.length]; for (int i = 0; i < column_labels.length; i++) { column_labels_lastfm_websearch[i] = column_labels[i] + "_WS"; column_labels_lastfm_websearch[i + column_labels.length] = column_labels[i] + "_LastFM"; } column_labels = column_labels_lastfm_websearch; } else { column_labels = orig_column_labels; } // Save as tab delimited text file if (chosen_file_extension.equals("TXT")) { // Calculate the table to save String[][] results_table = new String[row_labels.length + 1][column_labels.length + 1]; results_table[0][0] = ""; for (int i = 0; i < results_table.length; i++) { for (int j = 0; j < results_table[i].length; j++) { if (i == 0) { if (j != 0) results_table[i][j] = column_labels[j - 1]; } else { if (j == 0) results_table[i][j] = row_labels[i - 1]; else results_table[i][j] = String.valueOf(feature_table[i - 1][j - 1]); } } } // Save the table DataOutputStream writer = mckay.utilities.staticlibraries.FileMethods .getDataOutputStream(save_location); for (int i = 0; i < results_table.length; i++) { for (int j = 0; j < results_table[i].length; j++) { // Write the table entry writer.writeBytes(results_table[i][j]); // Add a tab or a line break if (j == results_table[i].length - 1) writer.writeBytes("\n"); else writer.writeBytes("\t"); } } // Close the writing stream writer.close(); } // Save as ACE XML file else if (chosen_file_extension.equals("ACE XML")) { // Set the name of the dataset to the name of the file // that is tob be saved String data_set_name = mckay.utilities.staticlibraries.StringMethods .removeExtension(save_location.getName()); // Prepare feature definitions and store feature names to // put in DataSets FeatureDefinition[] feature_definitions = new FeatureDefinition[column_labels.length]; String[] feature_names = new String[column_labels.length]; for (int feat = 0; feat < feature_definitions.length; feat++) { feature_definitions[feat] = new FeatureDefinition(column_labels[feat], "", false, 1); feature_names[feat] = column_labels[feat]; } // Prepare the the DataSets to write DataSet[] data_sets = new DataSet[row_labels.length]; for (int instance = 0; instance < data_sets.length; instance++) { // Instantiate the DataSet data_sets[instance] = new DataSet(); // Store the instance names data_sets[instance].identifier = row_labels[instance]; // Store the names of the features data_sets[instance].feature_names = feature_names; // Store the features for this DataSet as well as the // feature names double[][] these_feature_values = new double[feature_table[instance].length][1]; for (int feat = 0; feat < these_feature_values.length; feat++) these_feature_values[feat][0] = feature_table[instance][feat]; data_sets[instance].feature_values = these_feature_values; // Validate, order and compact the DataSet data_sets[instance].orderAndCompactFeatures(feature_definitions, true); } // Save the feature values DataSet.saveDataSets(data_sets, feature_definitions, save_location, "Features extracted with jWebMiner 2.0"); } // Save as Weka ARFF file else if (chosen_file_extension.equals("Weka ARFF")) { // Set the name of the dataset to the name of the file // that is to be saved String data_set_name = mckay.utilities.staticlibraries.StringMethods .removeExtension(save_location.getName()); // Set the Attributes (feature names and class names) FastVector attributes_vector = new FastVector(column_labels.length + 1); // extra 1 is for class name for (int feat = 0; feat < column_labels.length; feat++) attributes_vector.addElement(new Attribute(column_labels[feat])); FastVector class_names_vector = new FastVector(column_labels.length); for (int cat = 0; cat < orig_column_labels.length; cat++) class_names_vector.addElement(orig_column_labels[cat]); attributes_vector.addElement(new Attribute("Class", class_names_vector)); // Store attributes in an Instances object Instances instances = new Instances(data_set_name, attributes_vector, row_labels.length); instances.setClassIndex(instances.numAttributes() - 1); // Store the feature values and model classifications for (int inst = 0; inst < row_labels.length; inst++) { // Initialize an instance Instance this_instance = new Instance(instances.numAttributes()); this_instance.setDataset(instances); int current_attribute = 0; // Set feature values for the instance for (int feat = 0; feat < column_labels.length; feat++) this_instance.setValue(feat, feature_table[inst][feat]); // Set the class value for the instance // this_instance.setClassValue("a"); instances.setRelationName("jWebMiner2"); // Add this instance to instances instances.add(this_instance); } // Prepare the buffer to save to and add comments indicating // the names of the rows DataOutputStream writer = mckay.utilities.staticlibraries.FileMethods .getDataOutputStream(save_location); writer.writeBytes("% INSTANCES (DATA ROWS) BELOW CORRESPOND TO:\n%\n"); for (int inst = 0; inst < row_labels.length; inst++) writer.writeBytes("% " + (inst + 1) + ") " + row_labels[inst] + "\n"); writer.writeBytes("%\n"); // Save the ARFF file ArffSaver arff_saver = new ArffSaver(); arff_saver.setInstances(instances); arff_saver.setFile(save_location); arff_saver.setDestination(writer); try { arff_saver.writeBatch(); } catch (Exception e) { throw new Exception( "File only partially saved.\n\nTry resaving the file with a .arff extension."); } // Close the writer writer.close(); } } // Terminate the progress bar progress_bar.done(); }