List of usage examples for weka.core Instances toString
@Override
public String toString()
From source file:edu.teco.context.recognition.WekaManager.java
License:Apache License
private void storeArffFile(Instances dataSet) { // see http://weka.wikispaces.com/Save+Instances+to+an+ARFF+File // better performance with ArffSaver but no comments supported? if (DataLogger.isExternalStorageAvailable()) { // ArffSaver saver = new ArffSaver(); // saver.setInstances(dataSet); try {//from w ww .j a v a2s .c o m String arffDirectory = FrameworkConfiguration.getInstance().getArffDirectory(); File arffDir = new File(Environment.getExternalStorageDirectory() + arffDirectory); arffDir.mkdirs(); String arffFileName = "Recognition_" + new Date().getTime() + ".arff"; mArffFile = new File(arffDir, arffFileName); if (isLogDirectlyToFile) { mWriter = new BufferedWriter(new FileWriter(mArffFile, true), 8192); // add the metadata comments for (String metaDataLine : metaData) { mWriter.write("% " + metaDataLine); mWriter.newLine(); } mWriter.write(dataSet.toString()); mWriter.flush(); } else { BufferedWriter writer = new BufferedWriter(new FileWriter(mArffFile)); // add the metadata comments for (String metaDataLine : metaData) { writer.write("% " + metaDataLine); writer.newLine(); } writer.write(dataSet.toString()); writer.flush(); writer.close(); } } catch (IOException e) { e.printStackTrace(); } } }
From source file:es.bsc.autonomic.powermodeller.tools.classifiers.WekaWrapper.java
License:Apache License
public static DataSet processDataSet(DataSet ds, VariableParser parser) { String independent = ds.getIndependent(); if (independent == null) throw new WekaWrapperException("Independent variable is not set in dataset."); HashMap<String, String> expression_list = parser.getNewMetrics(); Instances data = convertDataSetToInstances(ds); try {//from w ww .j a va 2 s .c o m // Apply filters for all the new variables for (Map.Entry<String, String> entry : expression_list.entrySet()) { String key = entry.getKey(); String value = entry.getValue(); logger.debug("Generating new variable " + key + " as " + value); AddExpression add_filter = new AddExpression(); add_filter.setName(key); add_filter.setExpression(value); add_filter.setInputFormat(data); data = useFilter(data, add_filter); } } catch (Exception e) { logger.error("Error while processing new variables", e); throw new WekaWrapperException("Error while processing new variables"); } // Iterate over all the columns and keep only the ones contained in variables list List<String> variables = parser.getColumns(); // Append independent variable to the list of variables to keep variables.add(independent); // Remove unneeded attributes try { // it's important to iterate from last to first, because when we remove // an instance, the rest shifts by one position. for (int i = data.numAttributes() - 1; i >= 0; i--) { String n = data.attribute(i).name(); if (!variables.contains(data.attribute(i).name())) { logger.trace("Deleting unnecessary attribute " + data.attribute(i).name()); data.deleteAttributeAt(i); } } data.toString(); } catch (Exception e) { logger.error("Error while removing unneeded variables", e); throw new WekaWrapperException("Error while removing unneeded variables"); } // Convert Instances in csv and return the new DataSet String new_path = CoreConfiguration.getNewCSVFileName(); try { CSVSaver saver = new CSVSaver(); saver.setInstances(data); saver.setFile(new File(new_path)); saver.writeBatch(); } catch (Exception e) { logger.error("Error while removing unneeded variables", e); throw new WekaWrapperException("Error while removing unneeded variables"); } DataSet ret = new DataSet(new_path); ret.setIndependent(independent); return ret; }
From source file:etc.aloe.filters.StringToDictionaryVector.java
License:Open Source License
public static void main(String[] args) { //Create a test dataset ArrayList<Attribute> attributes = new ArrayList<Attribute>(); attributes.add(new Attribute("message", (ArrayList<String>) null)); attributes.add(new Attribute("id")); {//from www.j a va 2 s. c o m ArrayList<String> classValues = new ArrayList<String>(); classValues.add("0"); classValues.add("1"); attributes.add(new Attribute("class", classValues)); } Instances instances = new Instances("test", attributes, 0); instances.setClassIndex(2); String[] messages = new String[] { "No emoticons here", "I have a smiley :)", "Two smileys and a frownie :) :) :(", "Several emoticons :( :-( :) :-) ;-) 8-) :-/ :-P" }; for (int i = 0; i < messages.length; i++) { Instance instance = new DenseInstance(instances.numAttributes()); instance.setValue(instances.attribute(0), messages[i]); instance.setValue(instances.attribute(1), i); instance.setValue(instances.attribute(2), Integer.toString(i % 2)); instances.add(instance); } System.out.println("Before filter:"); for (int i = 0; i < instances.size(); i++) { System.out.println(instances.instance(i).toString()); } try { String dictionaryName = "emoticons.txt"; StringToDictionaryVector filter = new StringToDictionaryVector(); List<String> termList = StringToDictionaryVector.readDictionaryFile(new File(dictionaryName)); filter.setTermList(termList); filter.setMinTermFreq(1); filter.setTFTransform(true); filter.setIDFTransform(true); filter.setNormalizeDocLength(new SelectedTag(FILTER_NORMALIZE_TEST_ONLY, TAGS_FILTER)); filter.setOutputWordCounts(true); filter.setStringAttribute("message"); filter.setInputFormat(instances); Instances trans1 = Filter.useFilter(instances, filter); Instances trans2 = Filter.useFilter(instances, filter); System.out.println("\nFirst application:"); System.out.println(trans1.toString()); System.out.println("\nSecond application:"); System.out.println(trans2.toString()); } catch (Exception e) { e.printStackTrace(); } }
From source file:id3j48.WekaAccess.java
public static void classify(String filename, Classifier classifier) throws Exception { Instances input = readArff(filename); input.setClassIndex(input.numAttributes() - 1); for (int i = 0; i < input.numInstances(); i++) { double classLabel = classifier.classifyInstance(input.instance(i)); input.instance(i).setClassValue(classLabel); System.out.println("Instance: " + input.instance(i)); System.out.println("Class: " + input.classAttribute().value((int) classLabel)); }/* w ww. j a v a2s. c o m*/ try (BufferedWriter writer = new BufferedWriter( new FileWriter(classifiedFolder + File.separator + filename))) { writer.write(input.toString()); writer.newLine(); writer.flush(); } }
From source file:id3j48.WekaAccess.java
public static void main(String[] args) { initializePath();// ww w. j av a 2 s .c o m try { cin = new Scanner(System.in); Instances data = null, tempdata; Classifier NBclassifier, ID3classifier, j48classifier; Evaluation NBeval, ID3eval, j48eval; System.out.println("Enter filename below"); String filename = cin.nextLine(); System.out.println("Loading " + filename + "..."); String extension = ""; String name = ""; int i = filename.lastIndexOf('.'); if (i > 0) { extension = filename.substring(i + 1); name = filename.substring(0, i); } if (extension.equalsIgnoreCase("arff")) { try { data = readArff(filename); } catch (Exception ex) { Logger.getLogger(WekaAccess.class.getName()).log(Level.SEVERE, null, ex); } } else if (extension.equalsIgnoreCase("csv")) { try { data = readCsv(filename); } catch (Exception ex) { Logger.getLogger(WekaAccess.class.getName()).log(Level.SEVERE, null, ex); } } else { System.out.println("Invalid extension"); System.exit(0); } System.out.println(data.toString()); System.out.println("Resample data? (y for yes) "); String resample = cin.nextLine(); if (resample.equalsIgnoreCase("y")) { try { tempdata = resampleData(data); System.out.println("-- Resampled data --"); System.out.println(tempdata.toString()); } catch (Exception ex) { Logger.getLogger(WekaAccess.class.getName()).log(Level.SEVERE, null, ex); } } tempdata = removeAttribute(data, data.numAttributes()); System.out.println("-- Remove Attribute --"); System.out.println(tempdata.toString()); NBclassifier = buildClassifier(data, new NaiveBayes()); System.out.println("-- Naive Bayes Classifier --"); System.out.println(NBclassifier.toString()); ID3classifier = buildClassifier(data, new Id3()); System.out.println("-- ID3 Classifier --"); System.out.println(ID3classifier.toString()); j48classifier = buildClassifier(data, new J48()); System.out.println("-- J48 Classifier --"); System.out.println(j48classifier.toString()); Instances test = null; if (extension.equalsIgnoreCase("arff")) test = readArff("test." + filename); else if (extension.equalsIgnoreCase("csv")) test = readCsv("test." + filename); NBeval = testModel(NBclassifier, data, test); System.out.println( NBeval.toSummaryString("-- Training set evaluation results with Naive Bayes --\n", false)); ID3eval = testModel(ID3classifier, data, test); System.out.println(NBeval.toSummaryString("-- Training set evaluation results with ID3 --\n", false)); j48eval = testModel(j48classifier, data, test); System.out.println(NBeval.toSummaryString("-- Training set evaluation results with J48 --\n", false)); NBeval = tenFoldCrossValidation(data, NBclassifier); System.out.println( NBeval.toSummaryString("-- 10-fold cross validation results with Naive Bayes --\n", false)); ID3eval = tenFoldCrossValidation(data, ID3classifier); System.out.println(NBeval.toSummaryString("-- 10-fold cross validation results with ID3 --\n", false)); j48eval = tenFoldCrossValidation(data, j48classifier); System.out.println(NBeval.toSummaryString("-- 10-fold cross validation results with J48 --\n", false)); NBeval = percentageSplit(data, NBclassifier, 66); System.out.println( NBeval.toSummaryString("-- 66% split validation results with Naive Bayes --\n", false)); ID3eval = percentageSplit(data, ID3classifier, 66); System.out.println(NBeval.toSummaryString("-- 66% split validation results with ID3 --\n", false)); j48eval = percentageSplit(data, j48classifier, 66); System.out.println(NBeval.toSummaryString("-- 66% split validation results with J48 --\n", false)); System.out.println("-- Save Naive Bayes Model --"); saveModel("nb." + name + ".model", NBclassifier); System.out.println("-- Save Naive Bayes Model --"); saveModel("id3." + name + ".model", ID3classifier); System.out.println("-- Save Naive Bayes Model --"); saveModel("j48." + name + ".model", j48classifier); System.out.println("-- Save Naive Bayes Model --"); saveModel("nb." + name + ".model", NBclassifier); System.out.println("-- Save ID3 Model --"); saveModel("id3." + name + ".model", ID3classifier); System.out.println("-- Save J48 Model --"); saveModel("j48." + name + ".model", j48classifier); System.out.println("-- Load Naive Bayes Model --"); System.out.println(loadModel("nb." + name + ".model").toString()); System.out.println("-- Load ID3 Model --"); System.out.println(loadModel("id3." + name + ".model").toString()); System.out.println("-- Load J48 Model --"); System.out.println(loadModel("j48." + name + ".model").toString()); System.out.println("-- Classify Naive Bayes Model --"); classify("classify." + filename, NBclassifier); System.out.println("-- Classify ID3 Model --"); classify("classify." + filename, ID3classifier); System.out.println("-- Classify J48 Model --"); classify("classify." + filename, j48classifier); } catch (Exception ex) { Logger.getLogger(WekaAccess.class.getName()).log(Level.SEVERE, null, ex); } }
From source file:intensityclustering.IntensityClustering.java
/** * Draws the 2D Histogram Plot in the IntensityClustering. X-Axsis is * intensity value of chanel 2 image (where the stained nuclei are). Y-axis * are relative frequencies of present nuclei. * * @param tss The TMAspots whose nuclei are considered (both gold-standard * and estimated nuclei)./*from www . ja v a2s.co m*/ * @param doAlsoClustering If true, the TMApoints are also clustered * according to the histogram. */ void drawNucleiIntensities2D(List<TMAspot> tss, boolean doAlsoClustering) { // draw the plot Plot2DPanel plot; if (((java.awt.BorderLayout) (jPanel9.getLayout())) .getLayoutComponent(java.awt.BorderLayout.CENTER) != null) { plot = (Plot2DPanel) ((java.awt.BorderLayout) (jPanel9.getLayout())) .getLayoutComponent(java.awt.BorderLayout.CENTER); plot.removeAllPlots(); plot.removeAllPlotables(); } else { plot = new Plot2DPanel(PlotPanel.SOUTH); plot.setAxisLabels("Intensity", "Frequency"); plot.plotCanvas.setBackground(jPanel9.getBackground()); plot.plotLegend.setBackground(jPanel9.getBackground()); plot.plotToolBar.setBackground(plot.plotCanvas.getBackground()); } if (((java.awt.BorderLayout) (jPanel9.getLayout())) .getLayoutComponent(java.awt.BorderLayout.CENTER) == null) { jPanel9.add(plot, java.awt.BorderLayout.CENTER); jPanel15.setBackground(plot.plotCanvas.getBackground()); jPanel15.setVisible(true); validate(); pack(); } if (tss.size() > 0) { try { this.setCursor(Cursor.getPredefinedCursor(Cursor.WAIT_CURSOR)); List<Integer> intensities = new ArrayList<>(); int intensity; int min = Integer.parseInt(jTextField1.getText()); int max = Integer.parseInt(jTextField16.getText()); for (TMAspot ts : tss) { //TODO: GET THE CHANNEL 2 Image //BufferedImage img = ts.getBufferedImage(TMAspot.SHOW_CHANNEL2_IMAGE, false); BufferedImage img = ts.getBufferedImage(false); // img can be null if color deconvolution has not been performed, yet. if (img != null) { List<TMApoint> tps = ts.getPoints(); for (TMALabel tp : tps) { intensity = TMAspot.getAverageColorAtPoint(img, tp.x, tp.y, ts.getParam_r(), false) .getRed(); if (intensity >= min && intensity <= max) { intensities.add(intensity); } } } } double[] intensities_array = new double[intensities.size()]; for (int i = 0; i < intensities.size(); i++) { intensities_array[i] = intensities.get(i); } int nbins = jSlider7.getValue(); if (intensities_array.length > 0) { plot.addHistogramPlot("TMA points", intensities_array, 0, 256, nbins); } //else { // JOptionPane.showMessageDialog(this, "No TMA points have been found.", "No TMA points found.", JOptionPane.WARNING_MESSAGE); //} //// Cluster Points according to histograms if (doAlsoClustering) { // Find Clusters int n = getParam_nClusters(); // Create ARFF Data FastVector atts; Instances data; int i; // 1. create arff data format atts = new FastVector(1); for (i = 0; i < 1; i++) { atts.addElement(new Attribute(Integer.toString(i))); } // 2. create Instances object data = new Instances("TMA points", atts, tmarker.getNumberNuclei(tss)); // 3. fill with data for (i = 0; i < intensities_array.length; i++) { // add the instance Instance inst = new Instance(1.0, new double[] { intensities_array[i] }); inst.setDataset(data); data.add(inst); } // 4. set data class index (last attribute is the class) //data.setClassIndex(data.numAttributes() - 1); // not for weka 3.5.X if (tmarker.DEBUG > 4) { java.util.logging.Logger.getLogger(getClass().getName()).log(java.util.logging.Level.INFO, data.toString()); } Clusterer clusterer = getClusterer(); String[] options = getClustererOptions(); if (tmarker.DEBUG > 3) { if (options.length > 0) { String info = "Clusterer should have options:\n"; for (String o : options) { info += o + " "; } info += "\n"; java.util.logging.Logger.getLogger(getClass().getName()) .log(java.util.logging.Level.INFO, info); } } clusterer.setOptions(options); // set the clusterer options clusterer.buildClusterer(data); // build the clusterer // order the clusters according to the brightness // The most bright cluster should be 0, then 1, then 2,... ArrayList<ArrayList<Double>> values = new ArrayList<>(); for (i = 0; i < n; i++) { values.add(new ArrayList<Double>()); } int z; double value; for (i = 0; i < data.numInstances(); i++) { z = clusterer.clusterInstance(data.instance(i)); value = data.instance(i).value(0); values.get(z).add(value); } double[] means = new double[n]; double[] stds = new double[n]; for (i = 0; i < n; i++) { means[i] = Misc.mean(values.get(i).toArray(new Double[values.get(i).size()])); stds[i] = Misc.std(values.get(i).toArray(new Double[values.get(i).size()])); } int[] ordering = Misc.orderArray(means, true); for (i = 0; i < n; i++) { int ind = Misc.IndexOf(ordering, i); plot.addPlotable(new Line(getParam_ColorOfClassK(i), new double[] { means[ind], plot.plotCanvas.base.roundXmin[1] }, new double[] { means[ind], plot.plotCanvas.base.roundXmax[1] }, 2 * stds[ind])); plot.addPlot(Plot2DPanel.LINE, "Staining " + i, getParam_ColorOfClassK(i), new double[][] { new double[] { means[ind], plot.plotCanvas.base.roundXmin[1] }, new double[] { means[ind], plot.plotCanvas.base.roundXmax[1] } }); } String clusterInfo = ""; for (String o : clusterer.getOptions()) { clusterInfo += o + " "; } clusterInfo += "\n\n"; clusterInfo += clusterer.toString().trim(); if (getParam_AutomaticClustererString().equalsIgnoreCase("Hierarchical")) { try { clusterInfo += ((HierarchicalClusterer) clusterer).graph(); HierarchyVisualizer a = new HierarchyVisualizer( ((HierarchicalClusterer) clusterer).graph()); a.setSize(800, 600); if (clusterVisualizer == null) { clusterVisualizer = new JFrame("Hierarchical Clusterer Dendrogram"); clusterVisualizer.setIconImage(getIconImage()); clusterVisualizer.setDefaultCloseOperation(JFrame.DISPOSE_ON_CLOSE); clusterVisualizer.setSize(800, 600); } Container contentPane = clusterVisualizer.getContentPane(); contentPane.removeAll(); contentPane.add(a); } catch (Exception e) { clusterVisualizer = null; } } jTextArea1.setText(clusterInfo); if (tmarker.DEBUG > 3) { String info = "Clusterer has options\n"; for (String o : clusterer.getOptions()) { info += o + " "; } info += "\n"; info += clusterer.toString() + "\n"; // info += (clusterer).globalInfo() + "\n"; info += "\n"; info += clusterInfo + "\n"; java.util.logging.Logger.getLogger(getClass().getName()).log(java.util.logging.Level.INFO, info); } // cluster all TMAspots and assign the corresponding class to them // Cluster the points List<List<Integer>> clustered_points = new ArrayList<>(); for (i = 0; i < n; i++) { clustered_points.add(new ArrayList<Integer>()); } int k; for (TMAspot ts : tss) { //TODO: GET THE CHANNEL 2 IMAGE //BufferedImage img = ts.getBufferedImage(TMAspot.SHOW_CHANNEL2_IMAGE, false); BufferedImage img = ts.getBufferedImage(false); List<TMApoint> tps = ts.getPoints(); for (TMApoint tp : tps) { intensity = TMAspot.getAverageColorAtPoint(img, tp.x, tp.y, ts.getParam_r(), false) .getRed(); // add the instance Instance inst = new Instance(1.0, new double[] { intensity }); inst.setDataset(data); k = ordering[clusterer.clusterInstance(inst)]; // store the color for later visualization clustered_points.get(k).add(intensity); // set the staining of the TMApoint switch (k) { case 0: tp.setStaining(TMALabel.STAINING_0); break; case 1: tp.setStaining(TMALabel.STAINING_1); break; case 2: tp.setStaining(TMALabel.STAINING_2); break; default: tp.setStaining(TMALabel.STAINING_3); break; } } ts.dispStainingInfo(); if (manager.getVisibleTMAspot() == ts) { manager.repaintVisibleTMAspot(); } } // Write the description String description = "Nuclei clustered with " + getParam_AutomaticClustererString(); if (getParam_AutomaticClustererString().equalsIgnoreCase("Hierarchical")) { description += " (" + getParam_HierarchicalClusteringMethod() + ")"; } description += ", n=" + getParam_nClusters() + ", channel 2 intensity."; jLabel42.setText(description); jLabel41.setText(" "); } } catch (Exception e) { e.printStackTrace(); } finally { this.setCursor(Cursor.getPredefinedCursor(Cursor.DEFAULT_CURSOR)); } } }
From source file:intensityclustering.IntensityClustering.java
/** * Clusters the TMApoints on given TMAspots according to their staining * intensity (color). All parameters (e.g. clusterer and parameters) are * selected by the user. Features are simple color features. * * @param tss The TMAspots of which all nuclei (gold-standard and estimated) * are clustered according to color./* w w w . ja v a 2 s . com*/ */ private void clusterPointsAutomaticallyColorSpace(List<TMAspot> tss) { if (tss.size() > 0) { try { this.setCursor(Cursor.getPredefinedCursor(Cursor.WAIT_CURSOR)); int n = getParam_nClusters(); // Create ARFF Data FastVector atts; Instances data; int i; // 1. create arff data format atts = new FastVector(3); for (i = 0; i < 3; i++) { atts.addElement(new Attribute(Integer.toString(i))); } // 2. create Instances object data = new Instances("TMA points", atts, tmarker.getNumberNuclei(tss)); // 3. fill with data BufferedImage img; Color c; float[] features = new float[3]; String colorSpace = getParam_ColorSpace(); for (TMAspot ts : tss) { img = ts.getBufferedImage(); List<TMApoint> tps = ts.getPoints(); for (TMApoint tp : tps) { Color2Feature(TMAspot.getAverageColorAtPoint(img, tp.x, tp.y, ts.getParam_r(), false), colorSpace, features); // add the instance Instance inst = new Instance(1.0, new double[] { features[0], features[1], features[2] }); inst.setDataset(data); data.add(inst); } } // 4. set data class index (last attribute is the class) //data.setClassIndex(data.numAttributes() - 1); // not for weka 3.5.X if (tmarker.DEBUG > 4) { java.util.logging.Logger.getLogger(getClass().getName()).log(java.util.logging.Level.INFO, data.toString()); } Clusterer clusterer = getClusterer(); String[] options = getClustererOptions(); if (false && colorSpace.equalsIgnoreCase("hsb")) { String[] newoptions = new String[options.length + 2]; System.arraycopy(options, 0, newoptions, 0, options.length); newoptions[options.length] = "-A"; newoptions[options.length + 1] = "weka.core.MyHSBDistance"; options = newoptions; } if (tmarker.DEBUG > 3) { if (options.length > 0) { String info = "Clusterer should have options\n"; for (String o : options) { info += o + " "; } info += "\n"; java.util.logging.Logger.getLogger(getClass().getName()).log(java.util.logging.Level.INFO, info); } } clusterer.setOptions(options); // set the clusterer options clusterer.buildClusterer(data); // build the clusterer // order the clusters according to the brightness // The most bright cluster should be 0, then 1, then 2,... ArrayList<ArrayList<Double>> values = new ArrayList<>(); for (i = 0; i < clusterer.numberOfClusters(); i++) { values.add(new ArrayList<Double>()); } int z; double value; for (i = 0; i < data.numInstances(); i++) { z = clusterer.clusterInstance(data.instance(i)); value = getParam_ColorSpace().equalsIgnoreCase("hsb") ? data.instance(i).value(2) : Misc.RGBToGray(data.instance(i).value(0), data.instance(i).value(1), data.instance(i).value(2)); values.get(z).add(value); } double[] means = new double[clusterer.numberOfClusters()]; for (i = 0; i < clusterer.numberOfClusters(); i++) { means[i] = Misc.mean(values.get(i).toArray(new Double[values.get(i).size()])); } int[] ordering = Misc.orderArray(means, !getParam_ColorSpace().equalsIgnoreCase("rtp")); String clusterInfo = ""; for (String o : clusterer.getOptions()) { clusterInfo += o + " "; } clusterInfo += "\n\n"; clusterInfo += clusterer.toString().trim(); if (getParam_AutomaticClustererString().equalsIgnoreCase("Hierarchical")) { try { clusterInfo += ((HierarchicalClusterer) clusterer).graph(); HierarchyVisualizer a = new HierarchyVisualizer( ((HierarchicalClusterer) clusterer).graph()); a.setSize(800, 600); if (clusterVisualizer == null) { clusterVisualizer = new JFrame("Hierarchical Clusterer Dendrogram"); clusterVisualizer.setIconImage(getIconImage()); clusterVisualizer.setDefaultCloseOperation(JFrame.DISPOSE_ON_CLOSE); clusterVisualizer.setSize(800, 600); } Container contentPane = clusterVisualizer.getContentPane(); contentPane.removeAll(); contentPane.add(a); } catch (Exception e) { clusterVisualizer = null; } } jTextArea1.setText(clusterInfo); if (tmarker.DEBUG > 3) { String info = "Clusterer has options\n"; for (String o : clusterer.getOptions()) { info += o + " "; } info += "\n"; info += clusterer.toString() + "\n"; // info += (clusterer).globalInfo() + "\n"; info += "\n"; info += clusterInfo + "\n"; java.util.logging.Logger.getLogger(getClass().getName()).log(java.util.logging.Level.INFO, info); } // cluster all TMAspots and assign the corresponding class to them // Cluster the points List<List<Color>> clustered_points = new ArrayList<>(); for (i = 0; i < clusterer.numberOfClusters(); i++) { clustered_points.add(new ArrayList<Color>()); } int k; for (TMAspot ts : tss) { img = ts.getBufferedImage(); List<TMApoint> tps = ts.getPoints(); for (TMApoint tp : tps) { c = TMAspot.getAverageColorAtPoint(img, tp.x, tp.y, ts.getParam_r(), false); Color2Feature(c, colorSpace, features); // add the instance Instance inst = new Instance(1.0, new double[] { features[0], features[1], features[2] }); inst.setDataset(data); k = ordering[clusterer.clusterInstance(inst)]; // store the color for later visualization clustered_points.get(k).add(c); // set the staining of the TMApoint switch (k) { case 0: tp.setStaining(TMALabel.STAINING_0); break; case 1: tp.setStaining(TMALabel.STAINING_1); break; case 2: tp.setStaining(TMALabel.STAINING_2); break; default: tp.setStaining(TMALabel.STAINING_3); break; } } ts.dispStainingInfo(); if (manager.getVisibleTMAspot() == ts) { manager.repaintVisibleTMAspot(); } } // draw the points Plot3DPanel plot; if (((java.awt.BorderLayout) (jPanel2.getLayout())) .getLayoutComponent(java.awt.BorderLayout.CENTER) != null) { plot = (Plot3DPanel) ((java.awt.BorderLayout) (jPanel2.getLayout())) .getLayoutComponent(java.awt.BorderLayout.CENTER); plot.removeAllPlots(); } else { plot = new Plot3DPanel(); plot.plotCanvas.setBackground(jPanel2.getBackground()); plot.addLegend(PlotPanel.SOUTH); plot.plotLegend.setBackground(jPanel2.getBackground()); } if (colorSpace.equalsIgnoreCase("hsb")) { plot.setAxisLabels("Hue", "Saturation", "Brightness"); } else if (colorSpace.equalsIgnoreCase("rtp")) { plot.setAxisLabels("R", "Theta", "Phi"); } else { plot.setAxisLabels("Red", "Green", "Blue"); } for (i = 0; i < clusterer.numberOfClusters(); i++) { double[] xs = new double[clustered_points.get(i).size()]; double[] ys = new double[clustered_points.get(i).size()]; double[] zs = new double[clustered_points.get(i).size()]; for (int j = 0; j < clustered_points.get(i).size(); j++) { Color2Feature(clustered_points.get(i).get(j), colorSpace, features); xs[j] = features[0]; ys[j] = features[1]; zs[j] = features[2]; } if (xs.length > 0) { c = getParam_ColorOfClassK(i); plot.addScatterPlot("Staining " + i, c, xs, ys, zs); } } // Write the description String description = "Nuclei clustered with " + getParam_AutomaticClustererString(); if (getParam_AutomaticClustererString().equalsIgnoreCase("Hierarchical")) { description += " (" + getParam_HierarchicalClusteringMethod() + ")"; } description += ", n=" + getParam_nClusters() + ", color space " + getParam_ColorSpace() + "."; jLabel41.setText(description); jLabel42.setText(" "); if (((java.awt.BorderLayout) (jPanel2.getLayout())) .getLayoutComponent(java.awt.BorderLayout.CENTER) == null) { jPanel2.add(plot, java.awt.BorderLayout.CENTER); validate(); pack(); } } catch (Exception | OutOfMemoryError e) { java.util.logging.Logger.getLogger(getClass().getName()).log(java.util.logging.Level.SEVERE, null, e); JOptionPane.showMessageDialog(this, "The clustering could not be performed.\n\n" + "A possible reasons is:\n" + "- Not enough memory (too many points), \n\n" + "You might want to try a different clustering method or less TMAspots.\n\n" + "The error message is: \n" + e.getMessage(), "Error at Nucleus clustering", JOptionPane.WARNING_MESSAGE); } finally { this.setCursor(Cursor.getPredefinedCursor(Cursor.DEFAULT_CURSOR)); } } }
From source file:lascer.WekaClassifier.java
License:Open Source License
/** * Generates the classifier./*from w w w . j a v a 2 s. c o m*/ * * @param data the data to be used. * * @exception Exception if the classifier can't built successfully. */ public void buildClassifier(Instances data) throws Exception { weka.coreExtended.Instances extendedInstances; weka.coreExtended.BasicInstance extInst; weka.coreExtended.BasicAttribute classAttribut; de.unistuttgart.commandline.Option formelnArtOption; de.unistuttgart.commandline.Option formelnKlasseOption; de.unistuttgart.commandline.Option loggingSwitch; Instance readInst; Beispieldaten invDatensatz; StringReader stringReader; Enumeration instEnum; Enumeration attribEnum; PraedErzParameter praedErzParameter = null; KonzErzParameter konzErzParameter = null; Pruning pruning; String formelArt; String formelKlasse; String optionWert; float posPruneAnt, negPruneAnt; int instNumber; boolean unbekannteWertBsp; Steuerung.parseArguments(parser); formelArt = Konstanten.WEKA_FORMEL_ART; formelnArtOption = parser.getOption("formelArt"); if (parser.isEnabled(formelnArtOption)) { optionWert = parser.getParameter(formelnArtOption); if (!optionWert.equals("dis") && !optionWert.equals("kon") && !optionWert.equals("beste")) { System.err.println("Wert der Option formelArt unzulssig"); System.err.println("Zulssig: " + formelnArtOption.toString()); throw (new RuntimeException("Wert von Option unzulssig.")); } formelArt = optionWert; } formelKlasse = Konstanten.WEKA_FORMEL_KLASSE; formelnKlasseOption = parser.getOption("formelKlasse"); if (parser.isEnabled(formelnKlasseOption)) { optionWert = parser.getParameter(formelnKlasseOption); if (!optionWert.equals("pos") && !optionWert.equals("neg") && !optionWert.equals("beste") && !optionWert.equals("beide")) { System.err.println("Wert der Option formelKlasse unzulssig"); System.err.println("Zulssig: " + formelnKlasseOption.toString()); throw (new RuntimeException("Wert von Option unzulssig.")); } formelKlasse = optionWert; } loggingSwitch = parser.getOption("logging"); if (debugMode || parser.isEnabled(loggingSwitch)) { Steuerung.setLogLevel(Konstanten.LOGGING_LEVEL); } // Ermittlung der Parameter. unbekannteWertBsp = Steuerung.unbekannteWertBeispiele(parser); posPruneAnt = Steuerung.posPruneAnteil(parser); negPruneAnt = Steuerung.negPruneAnteil(parser); praedErzParameter = Steuerung.praedErzParameter(parser); konzErzParameter = Steuerung.konzErzParameter(parser); // Einlesen der Daten und Erzeugung des Instanzen-Objekts. instNumber = data.numInstances(); stringReader = new StringReader(data.toString()); extendedInstances = new weka.coreExtended.Instances(stringReader, instNumber); instEnum = data.enumerateInstances(); while (instEnum.hasMoreElements()) { readInst = (Instance) instEnum.nextElement(); extInst = new weka.coreExtended.BasicInstance(readInst.weight(), readInst.toDoubleArray()); extendedInstances.addBasicInstance(extInst); } // Erzeugung der Datenstze. posDatensatz = ArffDateiEinlesen.beispieldaten(extendedInstances, unbekannteWertBsp); negDatensatz = posDatensatz.kopie(true); // Erzeugung der Liste der Attribute. attributListe = new LinkedList(); attribEnum = extendedInstances.enumerateBasicAttributes(); while (attribEnum.hasMoreElements()) { attributListe.add(attribEnum.nextElement()); } // Ermittlung der Werte der Klassifikation. classAttribut = extendedInstances.basicClassAttribute(); wekaClassTrue = classAttribut.indexOfValue("true"); wekaClassFalse = classAttribut.indexOfValue("false"); // Die Formel zur Klasse der positiven Beispiele erzeugen. if (formelKlasse.equals("pos") || formelKlasse.equals("beste") || formelKlasse.equals("beide")) { posFormel = generatedFormula(posDatensatz, praedErzParameter, konzErzParameter, formelArt); } // Die Formel zur Klasse der negativen Beispiele erzeugen. if (formelKlasse.equals("neg") || formelKlasse.equals("beste") || formelKlasse.equals("beide")) { negFormel = generatedFormula(negDatensatz, praedErzParameter, konzErzParameter, formelArt); } if (formelKlasse.equals("beste")) { // Die schlechtere Formel lschen. if (negFormel.istBesser(posFormel)) { posFormel = null; } else { negFormel = null; } } if ((posPruneAnt > 0) || (negPruneAnt > 0)) { pruning = new Pruning(); if (posFormel != null) { posDatensatz = pruning.reduzierteDaten(posDatensatz, posFormel, posPruneAnt, negPruneAnt); posFormel = generatedFormula(posDatensatz, praedErzParameter, konzErzParameter, formelArt); } if (negFormel != null) { negDatensatz = pruning.reduzierteDaten(negDatensatz, negFormel, negPruneAnt, posPruneAnt); negFormel = generatedFormula(negDatensatz, praedErzParameter, konzErzParameter, formelArt); } } }
From source file:lector.Analizador.java
public static void clasificador() { BufferedReader reader1;//from w w w . j av a 2 s . c o m BufferedReader reader2; try { reader1 = new BufferedReader(new FileReader("/Users/danieltapia/Google Drive/EPN/MAESTRIA/MSW128 BI/" + "proyecto/compartida/DataSetAnalisisSentimientos.arff")); reader2 = new BufferedReader(new FileReader("/Users/danieltapia/Google Drive/EPN/MAESTRIA/MSW128 BI/" + "proyecto/compartida/DataSetAnalisisSentimientos_inc.arff")); Instances train = new Instances(reader1); train.setClassIndex(train.numAttributes() - 1); System.out.println(train.classIndex() + " " + train.numAttributes()); Instances test = new Instances(reader2); test.setClassIndex(train.numAttributes() - 1); System.out.println(test.classIndex() + " " + test.numAttributes()); NaiveBayes model = new NaiveBayes(); model.buildClassifier(train); //classify Instances labeled = new Instances(test); for (int i = 0; i < test.numInstances(); i++) { double clsLabel = model.classifyInstance(test.instance(i)); labeled.instance(i).setClassValue(clsLabel); } // https://youtu.be/JY_x5zKTfyo?list=PLJbE6j2EG1pZnBhOg3_Rb63WLCprtyJag Evaluation eval_train = new Evaluation(test); eval_train.evaluateModel(model, test); reader1.close(); reader2.close(); //System.out.println(eval_train.toSummaryString("\nResults\n======\n", false)); String[] options = new String[4]; options[0] = "-t"; //name of training file options[1] = "/Users/danieltapia/Google Drive/EPN/MAESTRIA/MSW128 BI/proyecto/" + "compartida/DataSetAnalisisSentimientos.arff"; options[2] = "-T"; options[3] = "/Users/danieltapia/Google Drive/EPN/MAESTRIA/MSW128 BI/proyecto/" + "compartida/DataSetAnalisisSentimientos_inc.arff"; System.out.println(Evaluation.evaluateModel(model, options)); try ( // print classification results to file BufferedWriter writer = new BufferedWriter( new FileWriter("/Users/danieltapia/Google Drive/EPN/MAESTRIA/MSW128 BI/" + "proyecto/compartida/DataSetAnalisisSentimientos_labeled.arff"))) { writer.write(labeled.toString()); } } catch (Exception e) { } }
From source file:mao.datamining.DataSetPair.java
private void doItOnce4All() { if (didIt)//w w w. j av a 2 s .co m return; didIt = true; try { //step 0, remove all those empty columns, which has more than 50% missing values Instances orangeDataSet = ConverterUtils.DataSource.read(trainSourceFileName); orangeDataSet.setClassIndex(orangeDataSet.numAttributes() - 1); Attribute classAttr = orangeDataSet.attribute(orangeDataSet.numAttributes() - 1); MainLogger.log(Level.INFO, "Class Attribute: {0}", classAttr.toString()); //step 0-1, to remove all columns which has more than half missing values Instances newData = orangeDataSet; RemoveUselessColumnsByMissingValues removeMissingValuesColumns = new RemoveUselessColumnsByMissingValues(); removeMissingValuesColumns.setM_maxMissingPercentage(50); removeMissingValuesColumns.setManualDeleteColumns(columns2Delete); removeMissingValuesColumns.setInputFormat(newData); newData = Filter.useFilter(newData, removeMissingValuesColumns); Main.logging("== New Data After Removing all Columns having >50% missing values: ===\n" + newData.toSummaryString()); try (BufferedWriter writer = new BufferedWriter(new OutputStreamWriter( new FileOutputStream(Main.OrangeProcessedDSHome + "/afterRemoveMissingColumns1.arff")))) { writer.write(newData.toString()); } //step 0-2 to transform those numeric columns to Nominal //to delete those instances with more than half missing values BufferedReader reader70 = new BufferedReader(new InputStreamReader( new FileInputStream(Main.OrangeProcessedDSHome + "/afterRemoveMissingColumns1.arff"))); BufferedWriter writerAfterDeleteRows = new BufferedWriter(new OutputStreamWriter( new FileOutputStream(Main.OrangeProcessedDSHome + "/afterRemoveRows2.arff"))); int columnNum = newData.numAttributes(); int totalInstanceNum = newData.numInstances(), deleteM1Num = 0, delete1Num = 0; String line = null; int missingColumnNum = 0; while ((line = reader70.readLine()) != null) { missingColumnNum = 0; for (int i = 0; i < line.length(); i++) { if (line.charAt(i) == '?') missingColumnNum++; } if (missingColumnNum * 100 / columnNum < 50) { writerAfterDeleteRows.write(line); writerAfterDeleteRows.newLine(); } else { System.out.println("Delete Row: [" + line + "]"); if (line.endsWith("-1")) { deleteM1Num++; } else { delete1Num++; } } } System.out.println("Total: " + totalInstanceNum + ", delete class -1: " + deleteM1Num + ", delete class 1: " + delete1Num); reader70.close(); writerAfterDeleteRows.close(); //create sample files: createSampleDataSets(); } catch (Exception e) { Main.logging(null, e); } }