Example usage for weka.core Instances toString

List of usage examples for weka.core Instances toString

Introduction

In this page you can find the example usage for weka.core Instances toString.

Prototype

@Override
public String toString() 

Source Link

Document

Returns the dataset as a string in ARFF format.

Usage

From source file:edu.teco.context.recognition.WekaManager.java

License:Apache License

private void storeArffFile(Instances dataSet) {

    // see http://weka.wikispaces.com/Save+Instances+to+an+ARFF+File
    // better performance with ArffSaver but no comments supported?

    if (DataLogger.isExternalStorageAvailable()) {
        // ArffSaver saver = new ArffSaver();
        // saver.setInstances(dataSet);
        try {//from   w  ww .j a  v  a2s .c o m

            String arffDirectory = FrameworkConfiguration.getInstance().getArffDirectory();
            File arffDir = new File(Environment.getExternalStorageDirectory() + arffDirectory);
            arffDir.mkdirs();

            String arffFileName = "Recognition_" + new Date().getTime() + ".arff";

            mArffFile = new File(arffDir, arffFileName);

            if (isLogDirectlyToFile) {
                mWriter = new BufferedWriter(new FileWriter(mArffFile, true), 8192);

                // add the metadata comments
                for (String metaDataLine : metaData) {
                    mWriter.write("% " + metaDataLine);
                    mWriter.newLine();
                }
                mWriter.write(dataSet.toString());
                mWriter.flush();

            } else {
                BufferedWriter writer = new BufferedWriter(new FileWriter(mArffFile));

                // add the metadata comments
                for (String metaDataLine : metaData) {
                    writer.write("% " + metaDataLine);
                    writer.newLine();
                }

                writer.write(dataSet.toString());
                writer.flush();
                writer.close();
            }

        } catch (IOException e) {
            e.printStackTrace();
        }
    }
}

From source file:es.bsc.autonomic.powermodeller.tools.classifiers.WekaWrapper.java

License:Apache License

public static DataSet processDataSet(DataSet ds, VariableParser parser) {

    String independent = ds.getIndependent();

    if (independent == null)
        throw new WekaWrapperException("Independent variable is not set in dataset.");

    HashMap<String, String> expression_list = parser.getNewMetrics();
    Instances data = convertDataSetToInstances(ds);

    try {//from w ww .j  a  va  2 s  .c o  m
        // Apply filters for all the new variables
        for (Map.Entry<String, String> entry : expression_list.entrySet()) {
            String key = entry.getKey();
            String value = entry.getValue();
            logger.debug("Generating new variable " + key + " as " + value);

            AddExpression add_filter = new AddExpression();
            add_filter.setName(key);
            add_filter.setExpression(value);
            add_filter.setInputFormat(data);

            data = useFilter(data, add_filter);

        }

    } catch (Exception e) {
        logger.error("Error while processing new variables", e);
        throw new WekaWrapperException("Error while processing new variables");
    }

    // Iterate over all the columns and keep only the ones contained in variables list
    List<String> variables = parser.getColumns();

    // Append independent variable to the list of variables to keep
    variables.add(independent);

    // Remove unneeded attributes
    try {

        // it's important to iterate from last to first, because when we remove
        // an instance, the rest shifts by one position.
        for (int i = data.numAttributes() - 1; i >= 0; i--) {
            String n = data.attribute(i).name();
            if (!variables.contains(data.attribute(i).name())) {
                logger.trace("Deleting unnecessary attribute " + data.attribute(i).name());
                data.deleteAttributeAt(i);
            }
        }

        data.toString();
    } catch (Exception e) {
        logger.error("Error while removing unneeded variables", e);
        throw new WekaWrapperException("Error while removing unneeded variables");
    }

    // Convert Instances in csv and return the new DataSet
    String new_path = CoreConfiguration.getNewCSVFileName();
    try {
        CSVSaver saver = new CSVSaver();
        saver.setInstances(data);
        saver.setFile(new File(new_path));
        saver.writeBatch();
    } catch (Exception e) {
        logger.error("Error while removing unneeded variables", e);
        throw new WekaWrapperException("Error while removing unneeded variables");
    }

    DataSet ret = new DataSet(new_path);
    ret.setIndependent(independent);
    return ret;
}

From source file:etc.aloe.filters.StringToDictionaryVector.java

License:Open Source License

public static void main(String[] args) {

    //Create a test dataset
    ArrayList<Attribute> attributes = new ArrayList<Attribute>();
    attributes.add(new Attribute("message", (ArrayList<String>) null));
    attributes.add(new Attribute("id"));
    {//from www.j  a  va 2  s. c  o m
        ArrayList<String> classValues = new ArrayList<String>();
        classValues.add("0");
        classValues.add("1");
        attributes.add(new Attribute("class", classValues));
    }

    Instances instances = new Instances("test", attributes, 0);
    instances.setClassIndex(2);

    String[] messages = new String[] { "No emoticons here", "I have a smiley :)",
            "Two smileys and a frownie :) :) :(", "Several emoticons :( :-( :) :-) ;-) 8-) :-/ :-P" };

    for (int i = 0; i < messages.length; i++) {
        Instance instance = new DenseInstance(instances.numAttributes());
        instance.setValue(instances.attribute(0), messages[i]);
        instance.setValue(instances.attribute(1), i);
        instance.setValue(instances.attribute(2), Integer.toString(i % 2));
        instances.add(instance);
    }

    System.out.println("Before filter:");
    for (int i = 0; i < instances.size(); i++) {
        System.out.println(instances.instance(i).toString());
    }

    try {
        String dictionaryName = "emoticons.txt";
        StringToDictionaryVector filter = new StringToDictionaryVector();
        List<String> termList = StringToDictionaryVector.readDictionaryFile(new File(dictionaryName));
        filter.setTermList(termList);
        filter.setMinTermFreq(1);
        filter.setTFTransform(true);
        filter.setIDFTransform(true);
        filter.setNormalizeDocLength(new SelectedTag(FILTER_NORMALIZE_TEST_ONLY, TAGS_FILTER));
        filter.setOutputWordCounts(true);
        filter.setStringAttribute("message");

        filter.setInputFormat(instances);
        Instances trans1 = Filter.useFilter(instances, filter);
        Instances trans2 = Filter.useFilter(instances, filter);

        System.out.println("\nFirst application:");
        System.out.println(trans1.toString());

        System.out.println("\nSecond application:");
        System.out.println(trans2.toString());

    } catch (Exception e) {
        e.printStackTrace();
    }
}

From source file:id3j48.WekaAccess.java

public static void classify(String filename, Classifier classifier) throws Exception {
    Instances input = readArff(filename);
    input.setClassIndex(input.numAttributes() - 1);
    for (int i = 0; i < input.numInstances(); i++) {
        double classLabel = classifier.classifyInstance(input.instance(i));
        input.instance(i).setClassValue(classLabel);
        System.out.println("Instance: " + input.instance(i));
        System.out.println("Class: " + input.classAttribute().value((int) classLabel));
    }/*  w ww. j  a v a2s.  c o m*/

    try (BufferedWriter writer = new BufferedWriter(
            new FileWriter(classifiedFolder + File.separator + filename))) {
        writer.write(input.toString());
        writer.newLine();
        writer.flush();
    }
}

From source file:id3j48.WekaAccess.java

public static void main(String[] args) {
    initializePath();//  ww w. j  av a 2  s .c o  m
    try {
        cin = new Scanner(System.in);
        Instances data = null, tempdata;
        Classifier NBclassifier, ID3classifier, j48classifier;
        Evaluation NBeval, ID3eval, j48eval;
        System.out.println("Enter filename below");
        String filename = cin.nextLine();
        System.out.println("Loading " + filename + "...");
        String extension = "";
        String name = "";
        int i = filename.lastIndexOf('.');
        if (i > 0) {
            extension = filename.substring(i + 1);
            name = filename.substring(0, i);
        }
        if (extension.equalsIgnoreCase("arff")) {
            try {
                data = readArff(filename);
            } catch (Exception ex) {
                Logger.getLogger(WekaAccess.class.getName()).log(Level.SEVERE, null, ex);
            }
        } else if (extension.equalsIgnoreCase("csv")) {
            try {
                data = readCsv(filename);
            } catch (Exception ex) {
                Logger.getLogger(WekaAccess.class.getName()).log(Level.SEVERE, null, ex);
            }
        } else {
            System.out.println("Invalid extension");
            System.exit(0);
        }
        System.out.println(data.toString());
        System.out.println("Resample data? (y for yes) ");
        String resample = cin.nextLine();
        if (resample.equalsIgnoreCase("y")) {
            try {
                tempdata = resampleData(data);
                System.out.println("-- Resampled data --");
                System.out.println(tempdata.toString());
            } catch (Exception ex) {
                Logger.getLogger(WekaAccess.class.getName()).log(Level.SEVERE, null, ex);
            }
        }
        tempdata = removeAttribute(data, data.numAttributes());
        System.out.println("-- Remove Attribute --");
        System.out.println(tempdata.toString());
        NBclassifier = buildClassifier(data, new NaiveBayes());
        System.out.println("-- Naive Bayes Classifier --");
        System.out.println(NBclassifier.toString());
        ID3classifier = buildClassifier(data, new Id3());
        System.out.println("-- ID3 Classifier --");
        System.out.println(ID3classifier.toString());
        j48classifier = buildClassifier(data, new J48());
        System.out.println("-- J48 Classifier --");
        System.out.println(j48classifier.toString());
        Instances test = null;
        if (extension.equalsIgnoreCase("arff"))
            test = readArff("test." + filename);
        else if (extension.equalsIgnoreCase("csv"))
            test = readCsv("test." + filename);
        NBeval = testModel(NBclassifier, data, test);
        System.out.println(
                NBeval.toSummaryString("-- Training set evaluation results with Naive Bayes --\n", false));
        ID3eval = testModel(ID3classifier, data, test);
        System.out.println(NBeval.toSummaryString("-- Training set evaluation results with ID3 --\n", false));
        j48eval = testModel(j48classifier, data, test);
        System.out.println(NBeval.toSummaryString("-- Training set evaluation results with J48 --\n", false));
        NBeval = tenFoldCrossValidation(data, NBclassifier);
        System.out.println(
                NBeval.toSummaryString("-- 10-fold cross validation results with Naive Bayes --\n", false));
        ID3eval = tenFoldCrossValidation(data, ID3classifier);
        System.out.println(NBeval.toSummaryString("-- 10-fold cross validation results with ID3 --\n", false));
        j48eval = tenFoldCrossValidation(data, j48classifier);
        System.out.println(NBeval.toSummaryString("-- 10-fold cross validation results with J48 --\n", false));
        NBeval = percentageSplit(data, NBclassifier, 66);
        System.out.println(
                NBeval.toSummaryString("-- 66% split validation results with Naive Bayes --\n", false));
        ID3eval = percentageSplit(data, ID3classifier, 66);
        System.out.println(NBeval.toSummaryString("-- 66% split validation results with ID3 --\n", false));
        j48eval = percentageSplit(data, j48classifier, 66);
        System.out.println(NBeval.toSummaryString("-- 66% split validation results with J48 --\n", false));
        System.out.println("-- Save Naive Bayes Model --");
        saveModel("nb." + name + ".model", NBclassifier);
        System.out.println("-- Save Naive Bayes Model --");
        saveModel("id3." + name + ".model", ID3classifier);
        System.out.println("-- Save Naive Bayes Model --");
        saveModel("j48." + name + ".model", j48classifier);
        System.out.println("-- Save Naive Bayes Model --");
        saveModel("nb." + name + ".model", NBclassifier);
        System.out.println("-- Save ID3 Model --");
        saveModel("id3." + name + ".model", ID3classifier);
        System.out.println("-- Save J48 Model --");
        saveModel("j48." + name + ".model", j48classifier);
        System.out.println("-- Load Naive Bayes Model --");
        System.out.println(loadModel("nb." + name + ".model").toString());
        System.out.println("-- Load ID3 Model --");
        System.out.println(loadModel("id3." + name + ".model").toString());
        System.out.println("-- Load J48 Model --");
        System.out.println(loadModel("j48." + name + ".model").toString());
        System.out.println("-- Classify Naive Bayes Model --");
        classify("classify." + filename, NBclassifier);
        System.out.println("-- Classify ID3 Model --");
        classify("classify." + filename, ID3classifier);
        System.out.println("-- Classify J48 Model --");
        classify("classify." + filename, j48classifier);
    } catch (Exception ex) {
        Logger.getLogger(WekaAccess.class.getName()).log(Level.SEVERE, null, ex);
    }
}

From source file:intensityclustering.IntensityClustering.java

/**
 * Draws the 2D Histogram Plot in the IntensityClustering. X-Axsis is
 * intensity value of chanel 2 image (where the stained nuclei are). Y-axis
 * are relative frequencies of present nuclei.
 *
 * @param tss The TMAspots whose nuclei are considered (both gold-standard
 * and estimated nuclei)./*from   www  .  ja v a2s.co m*/
 * @param doAlsoClustering If true, the TMApoints are also clustered
 * according to the histogram.
 */
void drawNucleiIntensities2D(List<TMAspot> tss, boolean doAlsoClustering) {
    // draw the plot
    Plot2DPanel plot;
    if (((java.awt.BorderLayout) (jPanel9.getLayout()))
            .getLayoutComponent(java.awt.BorderLayout.CENTER) != null) {
        plot = (Plot2DPanel) ((java.awt.BorderLayout) (jPanel9.getLayout()))
                .getLayoutComponent(java.awt.BorderLayout.CENTER);
        plot.removeAllPlots();
        plot.removeAllPlotables();
    } else {
        plot = new Plot2DPanel(PlotPanel.SOUTH);
        plot.setAxisLabels("Intensity", "Frequency");
        plot.plotCanvas.setBackground(jPanel9.getBackground());
        plot.plotLegend.setBackground(jPanel9.getBackground());
        plot.plotToolBar.setBackground(plot.plotCanvas.getBackground());
    }
    if (((java.awt.BorderLayout) (jPanel9.getLayout()))
            .getLayoutComponent(java.awt.BorderLayout.CENTER) == null) {
        jPanel9.add(plot, java.awt.BorderLayout.CENTER);
        jPanel15.setBackground(plot.plotCanvas.getBackground());
        jPanel15.setVisible(true);
        validate();
        pack();
    }

    if (tss.size() > 0) {
        try {
            this.setCursor(Cursor.getPredefinedCursor(Cursor.WAIT_CURSOR));

            List<Integer> intensities = new ArrayList<>();
            int intensity;
            int min = Integer.parseInt(jTextField1.getText());
            int max = Integer.parseInt(jTextField16.getText());
            for (TMAspot ts : tss) {
                //TODO: GET THE CHANNEL 2 Image
                //BufferedImage img = ts.getBufferedImage(TMAspot.SHOW_CHANNEL2_IMAGE, false);
                BufferedImage img = ts.getBufferedImage(false);
                // img can be null if color deconvolution has not been performed, yet.
                if (img != null) {
                    List<TMApoint> tps = ts.getPoints();
                    for (TMALabel tp : tps) {
                        intensity = TMAspot.getAverageColorAtPoint(img, tp.x, tp.y, ts.getParam_r(), false)
                                .getRed();
                        if (intensity >= min && intensity <= max) {
                            intensities.add(intensity);
                        }
                    }
                }
            }

            double[] intensities_array = new double[intensities.size()];

            for (int i = 0; i < intensities.size(); i++) {
                intensities_array[i] = intensities.get(i);
            }

            int nbins = jSlider7.getValue();
            if (intensities_array.length > 0) {
                plot.addHistogramPlot("TMA points", intensities_array, 0, 256, nbins);
            } //else {
              //  JOptionPane.showMessageDialog(this, "No TMA points have been found.", "No TMA points found.", JOptionPane.WARNING_MESSAGE);
              //}

            //// Cluster Points according to histograms
            if (doAlsoClustering) {
                // Find Clusters
                int n = getParam_nClusters();

                // Create ARFF Data
                FastVector atts;
                Instances data;
                int i;

                // 1. create arff data format
                atts = new FastVector(1);
                for (i = 0; i < 1; i++) {
                    atts.addElement(new Attribute(Integer.toString(i)));
                }

                // 2. create Instances object
                data = new Instances("TMA points", atts, tmarker.getNumberNuclei(tss));

                // 3. fill with data
                for (i = 0; i < intensities_array.length; i++) {
                    // add the instance
                    Instance inst = new Instance(1.0, new double[] { intensities_array[i] });
                    inst.setDataset(data);
                    data.add(inst);
                }

                // 4. set data class index (last attribute is the class)
                //data.setClassIndex(data.numAttributes() - 1); // not for weka 3.5.X
                if (tmarker.DEBUG > 4) {
                    java.util.logging.Logger.getLogger(getClass().getName()).log(java.util.logging.Level.INFO,
                            data.toString());
                }

                Clusterer clusterer = getClusterer();
                String[] options = getClustererOptions();

                if (tmarker.DEBUG > 3) {
                    if (options.length > 0) {
                        String info = "Clusterer should have options:\n";
                        for (String o : options) {
                            info += o + " ";
                        }
                        info += "\n";
                        java.util.logging.Logger.getLogger(getClass().getName())
                                .log(java.util.logging.Level.INFO, info);
                    }
                }

                clusterer.setOptions(options); // set the clusterer options
                clusterer.buildClusterer(data); // build the clusterer

                // order the clusters according to the brightness
                // The most bright cluster should be 0, then 1, then 2,...
                ArrayList<ArrayList<Double>> values = new ArrayList<>();
                for (i = 0; i < n; i++) {
                    values.add(new ArrayList<Double>());
                }
                int z;
                double value;
                for (i = 0; i < data.numInstances(); i++) {
                    z = clusterer.clusterInstance(data.instance(i));
                    value = data.instance(i).value(0);
                    values.get(z).add(value);
                }
                double[] means = new double[n];
                double[] stds = new double[n];
                for (i = 0; i < n; i++) {
                    means[i] = Misc.mean(values.get(i).toArray(new Double[values.get(i).size()]));
                    stds[i] = Misc.std(values.get(i).toArray(new Double[values.get(i).size()]));
                }
                int[] ordering = Misc.orderArray(means, true);

                for (i = 0; i < n; i++) {
                    int ind = Misc.IndexOf(ordering, i);
                    plot.addPlotable(new Line(getParam_ColorOfClassK(i),
                            new double[] { means[ind], plot.plotCanvas.base.roundXmin[1] },
                            new double[] { means[ind], plot.plotCanvas.base.roundXmax[1] }, 2 * stds[ind]));
                    plot.addPlot(Plot2DPanel.LINE, "Staining " + i, getParam_ColorOfClassK(i),
                            new double[][] { new double[] { means[ind], plot.plotCanvas.base.roundXmin[1] },
                                    new double[] { means[ind], plot.plotCanvas.base.roundXmax[1] } });
                }

                String clusterInfo = "";
                for (String o : clusterer.getOptions()) {
                    clusterInfo += o + " ";
                }
                clusterInfo += "\n\n";
                clusterInfo += clusterer.toString().trim();
                if (getParam_AutomaticClustererString().equalsIgnoreCase("Hierarchical")) {
                    try {
                        clusterInfo += ((HierarchicalClusterer) clusterer).graph();
                        HierarchyVisualizer a = new HierarchyVisualizer(
                                ((HierarchicalClusterer) clusterer).graph());
                        a.setSize(800, 600);
                        if (clusterVisualizer == null) {
                            clusterVisualizer = new JFrame("Hierarchical Clusterer Dendrogram");
                            clusterVisualizer.setIconImage(getIconImage());
                            clusterVisualizer.setDefaultCloseOperation(JFrame.DISPOSE_ON_CLOSE);
                            clusterVisualizer.setSize(800, 600);
                        }
                        Container contentPane = clusterVisualizer.getContentPane();
                        contentPane.removeAll();
                        contentPane.add(a);
                    } catch (Exception e) {
                        clusterVisualizer = null;
                    }
                }
                jTextArea1.setText(clusterInfo);

                if (tmarker.DEBUG > 3) {
                    String info = "Clusterer has options\n";
                    for (String o : clusterer.getOptions()) {
                        info += o + " ";
                    }
                    info += "\n";
                    info += clusterer.toString() + "\n";
                    // info += (clusterer).globalInfo() + "\n";
                    info += "\n";
                    info += clusterInfo + "\n";
                    java.util.logging.Logger.getLogger(getClass().getName()).log(java.util.logging.Level.INFO,
                            info);
                }

                // cluster all TMAspots and assign the corresponding class to them
                // Cluster the points
                List<List<Integer>> clustered_points = new ArrayList<>();
                for (i = 0; i < n; i++) {
                    clustered_points.add(new ArrayList<Integer>());
                }

                int k;
                for (TMAspot ts : tss) {
                    //TODO: GET THE CHANNEL 2 IMAGE
                    //BufferedImage img = ts.getBufferedImage(TMAspot.SHOW_CHANNEL2_IMAGE, false);
                    BufferedImage img = ts.getBufferedImage(false);
                    List<TMApoint> tps = ts.getPoints();
                    for (TMApoint tp : tps) {
                        intensity = TMAspot.getAverageColorAtPoint(img, tp.x, tp.y, ts.getParam_r(), false)
                                .getRed();

                        // add the instance
                        Instance inst = new Instance(1.0, new double[] { intensity });
                        inst.setDataset(data);
                        k = ordering[clusterer.clusterInstance(inst)];

                        // store the color for later visualization
                        clustered_points.get(k).add(intensity);

                        // set the staining of the TMApoint
                        switch (k) {
                        case 0:
                            tp.setStaining(TMALabel.STAINING_0);
                            break;
                        case 1:
                            tp.setStaining(TMALabel.STAINING_1);
                            break;
                        case 2:
                            tp.setStaining(TMALabel.STAINING_2);
                            break;
                        default:
                            tp.setStaining(TMALabel.STAINING_3);
                            break;
                        }
                    }
                    ts.dispStainingInfo();
                    if (manager.getVisibleTMAspot() == ts) {
                        manager.repaintVisibleTMAspot();
                    }
                }

                // Write the description
                String description = "Nuclei clustered with " + getParam_AutomaticClustererString();
                if (getParam_AutomaticClustererString().equalsIgnoreCase("Hierarchical")) {
                    description += " (" + getParam_HierarchicalClusteringMethod() + ")";
                }
                description += ", n=" + getParam_nClusters() + ", channel 2 intensity.";
                jLabel42.setText(description);
                jLabel41.setText(" ");

            }
        } catch (Exception e) {
            e.printStackTrace();
        } finally {
            this.setCursor(Cursor.getPredefinedCursor(Cursor.DEFAULT_CURSOR));
        }
    }
}

From source file:intensityclustering.IntensityClustering.java

/**
 * Clusters the TMApoints on given TMAspots according to their staining
 * intensity (color). All parameters (e.g. clusterer and parameters) are
 * selected by the user. Features are simple color features.
 *
 * @param tss The TMAspots of which all nuclei (gold-standard and estimated)
 * are clustered according to color./* w w  w  .  ja  v  a  2 s .  com*/
 */
private void clusterPointsAutomaticallyColorSpace(List<TMAspot> tss) {
    if (tss.size() > 0) {
        try {
            this.setCursor(Cursor.getPredefinedCursor(Cursor.WAIT_CURSOR));

            int n = getParam_nClusters();

            // Create ARFF Data
            FastVector atts;
            Instances data;
            int i;

            // 1. create arff data format
            atts = new FastVector(3);
            for (i = 0; i < 3; i++) {
                atts.addElement(new Attribute(Integer.toString(i)));
            }

            // 2. create Instances object
            data = new Instances("TMA points", atts, tmarker.getNumberNuclei(tss));

            // 3. fill with data
            BufferedImage img;
            Color c;
            float[] features = new float[3];
            String colorSpace = getParam_ColorSpace();
            for (TMAspot ts : tss) {
                img = ts.getBufferedImage();
                List<TMApoint> tps = ts.getPoints();
                for (TMApoint tp : tps) {
                    Color2Feature(TMAspot.getAverageColorAtPoint(img, tp.x, tp.y, ts.getParam_r(), false),
                            colorSpace, features);

                    // add the instance
                    Instance inst = new Instance(1.0, new double[] { features[0], features[1], features[2] });
                    inst.setDataset(data);
                    data.add(inst);
                }
            }

            // 4. set data class index (last attribute is the class)
            //data.setClassIndex(data.numAttributes() - 1); // not for weka 3.5.X
            if (tmarker.DEBUG > 4) {
                java.util.logging.Logger.getLogger(getClass().getName()).log(java.util.logging.Level.INFO,
                        data.toString());
            }

            Clusterer clusterer = getClusterer();
            String[] options = getClustererOptions();
            if (false && colorSpace.equalsIgnoreCase("hsb")) {
                String[] newoptions = new String[options.length + 2];
                System.arraycopy(options, 0, newoptions, 0, options.length);
                newoptions[options.length] = "-A";
                newoptions[options.length + 1] = "weka.core.MyHSBDistance";
                options = newoptions;
            }

            if (tmarker.DEBUG > 3) {
                if (options.length > 0) {
                    String info = "Clusterer should have options\n";
                    for (String o : options) {
                        info += o + " ";
                    }
                    info += "\n";
                    java.util.logging.Logger.getLogger(getClass().getName()).log(java.util.logging.Level.INFO,
                            info);
                }
            }

            clusterer.setOptions(options); // set the clusterer options
            clusterer.buildClusterer(data); // build the clusterer

            // order the clusters according to the brightness
            // The most bright cluster should be 0, then 1, then 2,...
            ArrayList<ArrayList<Double>> values = new ArrayList<>();
            for (i = 0; i < clusterer.numberOfClusters(); i++) {
                values.add(new ArrayList<Double>());
            }
            int z;
            double value;
            for (i = 0; i < data.numInstances(); i++) {
                z = clusterer.clusterInstance(data.instance(i));
                value = getParam_ColorSpace().equalsIgnoreCase("hsb") ? data.instance(i).value(2)
                        : Misc.RGBToGray(data.instance(i).value(0), data.instance(i).value(1),
                                data.instance(i).value(2));
                values.get(z).add(value);
            }
            double[] means = new double[clusterer.numberOfClusters()];
            for (i = 0; i < clusterer.numberOfClusters(); i++) {
                means[i] = Misc.mean(values.get(i).toArray(new Double[values.get(i).size()]));
            }
            int[] ordering = Misc.orderArray(means, !getParam_ColorSpace().equalsIgnoreCase("rtp"));

            String clusterInfo = "";
            for (String o : clusterer.getOptions()) {
                clusterInfo += o + " ";
            }
            clusterInfo += "\n\n";
            clusterInfo += clusterer.toString().trim();
            if (getParam_AutomaticClustererString().equalsIgnoreCase("Hierarchical")) {
                try {
                    clusterInfo += ((HierarchicalClusterer) clusterer).graph();
                    HierarchyVisualizer a = new HierarchyVisualizer(
                            ((HierarchicalClusterer) clusterer).graph());
                    a.setSize(800, 600);
                    if (clusterVisualizer == null) {
                        clusterVisualizer = new JFrame("Hierarchical Clusterer Dendrogram");
                        clusterVisualizer.setIconImage(getIconImage());
                        clusterVisualizer.setDefaultCloseOperation(JFrame.DISPOSE_ON_CLOSE);
                        clusterVisualizer.setSize(800, 600);
                    }
                    Container contentPane = clusterVisualizer.getContentPane();
                    contentPane.removeAll();
                    contentPane.add(a);
                } catch (Exception e) {
                    clusterVisualizer = null;
                }
            }
            jTextArea1.setText(clusterInfo);

            if (tmarker.DEBUG > 3) {
                String info = "Clusterer has options\n";
                for (String o : clusterer.getOptions()) {
                    info += o + " ";
                }
                info += "\n";
                info += clusterer.toString() + "\n";
                // info += (clusterer).globalInfo() + "\n";
                info += "\n";
                info += clusterInfo + "\n";
                java.util.logging.Logger.getLogger(getClass().getName()).log(java.util.logging.Level.INFO,
                        info);
            }

            // cluster all TMAspots and assign the corresponding class to them
            // Cluster the points
            List<List<Color>> clustered_points = new ArrayList<>();
            for (i = 0; i < clusterer.numberOfClusters(); i++) {
                clustered_points.add(new ArrayList<Color>());
            }

            int k;
            for (TMAspot ts : tss) {
                img = ts.getBufferedImage();
                List<TMApoint> tps = ts.getPoints();
                for (TMApoint tp : tps) {
                    c = TMAspot.getAverageColorAtPoint(img, tp.x, tp.y, ts.getParam_r(), false);
                    Color2Feature(c, colorSpace, features);

                    // add the instance
                    Instance inst = new Instance(1.0, new double[] { features[0], features[1], features[2] });
                    inst.setDataset(data);
                    k = ordering[clusterer.clusterInstance(inst)];

                    // store the color for later visualization
                    clustered_points.get(k).add(c);

                    // set the staining of the TMApoint
                    switch (k) {
                    case 0:
                        tp.setStaining(TMALabel.STAINING_0);
                        break;
                    case 1:
                        tp.setStaining(TMALabel.STAINING_1);
                        break;
                    case 2:
                        tp.setStaining(TMALabel.STAINING_2);
                        break;
                    default:
                        tp.setStaining(TMALabel.STAINING_3);
                        break;
                    }
                }
                ts.dispStainingInfo();
                if (manager.getVisibleTMAspot() == ts) {
                    manager.repaintVisibleTMAspot();
                }
            }

            // draw the points
            Plot3DPanel plot;
            if (((java.awt.BorderLayout) (jPanel2.getLayout()))
                    .getLayoutComponent(java.awt.BorderLayout.CENTER) != null) {
                plot = (Plot3DPanel) ((java.awt.BorderLayout) (jPanel2.getLayout()))
                        .getLayoutComponent(java.awt.BorderLayout.CENTER);
                plot.removeAllPlots();
            } else {
                plot = new Plot3DPanel();
                plot.plotCanvas.setBackground(jPanel2.getBackground());
                plot.addLegend(PlotPanel.SOUTH);
                plot.plotLegend.setBackground(jPanel2.getBackground());
            }
            if (colorSpace.equalsIgnoreCase("hsb")) {
                plot.setAxisLabels("Hue", "Saturation", "Brightness");
            } else if (colorSpace.equalsIgnoreCase("rtp")) {
                plot.setAxisLabels("R", "Theta", "Phi");
            } else {
                plot.setAxisLabels("Red", "Green", "Blue");
            }

            for (i = 0; i < clusterer.numberOfClusters(); i++) {
                double[] xs = new double[clustered_points.get(i).size()];
                double[] ys = new double[clustered_points.get(i).size()];
                double[] zs = new double[clustered_points.get(i).size()];
                for (int j = 0; j < clustered_points.get(i).size(); j++) {
                    Color2Feature(clustered_points.get(i).get(j), colorSpace, features);
                    xs[j] = features[0];
                    ys[j] = features[1];
                    zs[j] = features[2];
                }
                if (xs.length > 0) {
                    c = getParam_ColorOfClassK(i);
                    plot.addScatterPlot("Staining " + i, c, xs, ys, zs);
                }
            }

            // Write the description
            String description = "Nuclei clustered with " + getParam_AutomaticClustererString();
            if (getParam_AutomaticClustererString().equalsIgnoreCase("Hierarchical")) {
                description += " (" + getParam_HierarchicalClusteringMethod() + ")";
            }
            description += ", n=" + getParam_nClusters() + ", color space " + getParam_ColorSpace() + ".";
            jLabel41.setText(description);
            jLabel42.setText(" ");

            if (((java.awt.BorderLayout) (jPanel2.getLayout()))
                    .getLayoutComponent(java.awt.BorderLayout.CENTER) == null) {
                jPanel2.add(plot, java.awt.BorderLayout.CENTER);
                validate();
                pack();
            }
        } catch (Exception | OutOfMemoryError e) {
            java.util.logging.Logger.getLogger(getClass().getName()).log(java.util.logging.Level.SEVERE, null,
                    e);
            JOptionPane.showMessageDialog(this,
                    "The clustering could not be performed.\n\n" + "A possible reasons is:\n"
                            + "- Not enough memory (too many points), \n\n"
                            + "You might want to try a different clustering method or less TMAspots.\n\n"
                            + "The error message is: \n" + e.getMessage(),
                    "Error at Nucleus clustering", JOptionPane.WARNING_MESSAGE);
        } finally {
            this.setCursor(Cursor.getPredefinedCursor(Cursor.DEFAULT_CURSOR));
        }
    }
}

From source file:lascer.WekaClassifier.java

License:Open Source License

/**
 * Generates the classifier./*from w  w w  .  j  a  v  a 2 s. c  o m*/
 *
 * @param data  the data to be used.
 *
 * @exception Exception  if the classifier can't built successfully.
 */
public void buildClassifier(Instances data) throws Exception {
    weka.coreExtended.Instances extendedInstances;
    weka.coreExtended.BasicInstance extInst;
    weka.coreExtended.BasicAttribute classAttribut;
    de.unistuttgart.commandline.Option formelnArtOption;
    de.unistuttgart.commandline.Option formelnKlasseOption;
    de.unistuttgart.commandline.Option loggingSwitch;
    Instance readInst;
    Beispieldaten invDatensatz;
    StringReader stringReader;
    Enumeration instEnum;
    Enumeration attribEnum;
    PraedErzParameter praedErzParameter = null;
    KonzErzParameter konzErzParameter = null;
    Pruning pruning;
    String formelArt;
    String formelKlasse;
    String optionWert;
    float posPruneAnt, negPruneAnt;
    int instNumber;
    boolean unbekannteWertBsp;

    Steuerung.parseArguments(parser);

    formelArt = Konstanten.WEKA_FORMEL_ART;
    formelnArtOption = parser.getOption("formelArt");
    if (parser.isEnabled(formelnArtOption)) {
        optionWert = parser.getParameter(formelnArtOption);
        if (!optionWert.equals("dis") && !optionWert.equals("kon") && !optionWert.equals("beste")) {

            System.err.println("Wert der Option formelArt unzulssig");
            System.err.println("Zulssig: " + formelnArtOption.toString());
            throw (new RuntimeException("Wert von Option unzulssig."));
        }
        formelArt = optionWert;
    }

    formelKlasse = Konstanten.WEKA_FORMEL_KLASSE;
    formelnKlasseOption = parser.getOption("formelKlasse");
    if (parser.isEnabled(formelnKlasseOption)) {
        optionWert = parser.getParameter(formelnKlasseOption);
        if (!optionWert.equals("pos") && !optionWert.equals("neg") && !optionWert.equals("beste")
                && !optionWert.equals("beide")) {

            System.err.println("Wert der Option formelKlasse unzulssig");
            System.err.println("Zulssig: " + formelnKlasseOption.toString());
            throw (new RuntimeException("Wert von Option unzulssig."));
        }
        formelKlasse = optionWert;
    }

    loggingSwitch = parser.getOption("logging");
    if (debugMode || parser.isEnabled(loggingSwitch)) {
        Steuerung.setLogLevel(Konstanten.LOGGING_LEVEL);
    }

    // Ermittlung der Parameter.
    unbekannteWertBsp = Steuerung.unbekannteWertBeispiele(parser);
    posPruneAnt = Steuerung.posPruneAnteil(parser);
    negPruneAnt = Steuerung.negPruneAnteil(parser);
    praedErzParameter = Steuerung.praedErzParameter(parser);
    konzErzParameter = Steuerung.konzErzParameter(parser);

    // Einlesen der Daten und Erzeugung des Instanzen-Objekts.
    instNumber = data.numInstances();
    stringReader = new StringReader(data.toString());
    extendedInstances = new weka.coreExtended.Instances(stringReader, instNumber);
    instEnum = data.enumerateInstances();
    while (instEnum.hasMoreElements()) {
        readInst = (Instance) instEnum.nextElement();
        extInst = new weka.coreExtended.BasicInstance(readInst.weight(), readInst.toDoubleArray());
        extendedInstances.addBasicInstance(extInst);
    }

    // Erzeugung der Datenstze.
    posDatensatz = ArffDateiEinlesen.beispieldaten(extendedInstances, unbekannteWertBsp);
    negDatensatz = posDatensatz.kopie(true);

    // Erzeugung der Liste der Attribute.
    attributListe = new LinkedList();
    attribEnum = extendedInstances.enumerateBasicAttributes();
    while (attribEnum.hasMoreElements()) {
        attributListe.add(attribEnum.nextElement());
    }

    // Ermittlung der Werte der Klassifikation.
    classAttribut = extendedInstances.basicClassAttribute();
    wekaClassTrue = classAttribut.indexOfValue("true");
    wekaClassFalse = classAttribut.indexOfValue("false");

    // Die Formel zur Klasse der positiven Beispiele erzeugen.
    if (formelKlasse.equals("pos") || formelKlasse.equals("beste") || formelKlasse.equals("beide")) {

        posFormel = generatedFormula(posDatensatz, praedErzParameter, konzErzParameter, formelArt);
    }

    // Die Formel zur Klasse der negativen Beispiele erzeugen.
    if (formelKlasse.equals("neg") || formelKlasse.equals("beste") || formelKlasse.equals("beide")) {

        negFormel = generatedFormula(negDatensatz, praedErzParameter, konzErzParameter, formelArt);
    }

    if (formelKlasse.equals("beste")) {
        // Die schlechtere Formel lschen.
        if (negFormel.istBesser(posFormel)) {
            posFormel = null;
        } else {
            negFormel = null;
        }
    }

    if ((posPruneAnt > 0) || (negPruneAnt > 0)) {
        pruning = new Pruning();

        if (posFormel != null) {
            posDatensatz = pruning.reduzierteDaten(posDatensatz, posFormel, posPruneAnt, negPruneAnt);
            posFormel = generatedFormula(posDatensatz, praedErzParameter, konzErzParameter, formelArt);
        }

        if (negFormel != null) {
            negDatensatz = pruning.reduzierteDaten(negDatensatz, negFormel, negPruneAnt, posPruneAnt);
            negFormel = generatedFormula(negDatensatz, praedErzParameter, konzErzParameter, formelArt);
        }
    }
}

From source file:lector.Analizador.java

public static void clasificador() {

    BufferedReader reader1;//from w w  w . j av a 2 s  . c o m
    BufferedReader reader2;
    try {
        reader1 = new BufferedReader(new FileReader("/Users/danieltapia/Google Drive/EPN/MAESTRIA/MSW128 BI/"
                + "proyecto/compartida/DataSetAnalisisSentimientos.arff"));

        reader2 = new BufferedReader(new FileReader("/Users/danieltapia/Google Drive/EPN/MAESTRIA/MSW128 BI/"
                + "proyecto/compartida/DataSetAnalisisSentimientos_inc.arff"));
        Instances train = new Instances(reader1);
        train.setClassIndex(train.numAttributes() - 1);
        System.out.println(train.classIndex() + " " + train.numAttributes());

        Instances test = new Instances(reader2);
        test.setClassIndex(train.numAttributes() - 1);
        System.out.println(test.classIndex() + " " + test.numAttributes());

        NaiveBayes model = new NaiveBayes();
        model.buildClassifier(train);

        //classify
        Instances labeled = new Instances(test);

        for (int i = 0; i < test.numInstances(); i++) {
            double clsLabel = model.classifyInstance(test.instance(i));
            labeled.instance(i).setClassValue(clsLabel);
        }

        // https://youtu.be/JY_x5zKTfyo?list=PLJbE6j2EG1pZnBhOg3_Rb63WLCprtyJag
        Evaluation eval_train = new Evaluation(test);
        eval_train.evaluateModel(model, test);

        reader1.close();
        reader2.close();

        //System.out.println(eval_train.toSummaryString("\nResults\n======\n", false));
        String[] options = new String[4];
        options[0] = "-t"; //name of training file
        options[1] = "/Users/danieltapia/Google Drive/EPN/MAESTRIA/MSW128 BI/proyecto/"
                + "compartida/DataSetAnalisisSentimientos.arff";
        options[2] = "-T";
        options[3] = "/Users/danieltapia/Google Drive/EPN/MAESTRIA/MSW128 BI/proyecto/"
                + "compartida/DataSetAnalisisSentimientos_inc.arff";
        System.out.println(Evaluation.evaluateModel(model, options));

        try ( // print classification results to file
                BufferedWriter writer = new BufferedWriter(
                        new FileWriter("/Users/danieltapia/Google Drive/EPN/MAESTRIA/MSW128 BI/"
                                + "proyecto/compartida/DataSetAnalisisSentimientos_labeled.arff"))) {
            writer.write(labeled.toString());
        }

    } catch (Exception e) {
    }
}

From source file:mao.datamining.DataSetPair.java

private void doItOnce4All() {
    if (didIt)//w  w w.  j av  a 2 s .co  m
        return;
    didIt = true;
    try {
        //step 0, remove all those empty columns, which has more than 50% missing values
        Instances orangeDataSet = ConverterUtils.DataSource.read(trainSourceFileName);
        orangeDataSet.setClassIndex(orangeDataSet.numAttributes() - 1);
        Attribute classAttr = orangeDataSet.attribute(orangeDataSet.numAttributes() - 1);
        MainLogger.log(Level.INFO, "Class Attribute: {0}", classAttr.toString());

        //step 0-1, to remove all columns which has more than half missing values
        Instances newData = orangeDataSet;
        RemoveUselessColumnsByMissingValues removeMissingValuesColumns = new RemoveUselessColumnsByMissingValues();
        removeMissingValuesColumns.setM_maxMissingPercentage(50);
        removeMissingValuesColumns.setManualDeleteColumns(columns2Delete);
        removeMissingValuesColumns.setInputFormat(newData);
        newData = Filter.useFilter(newData, removeMissingValuesColumns);
        Main.logging("== New Data After Removing all Columns having >50% missing values: ===\n"
                + newData.toSummaryString());
        try (BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(
                new FileOutputStream(Main.OrangeProcessedDSHome + "/afterRemoveMissingColumns1.arff")))) {
            writer.write(newData.toString());
        }

        //step 0-2 to transform those numeric columns to Nominal
        //to delete those instances with more than half missing values
        BufferedReader reader70 = new BufferedReader(new InputStreamReader(
                new FileInputStream(Main.OrangeProcessedDSHome + "/afterRemoveMissingColumns1.arff")));
        BufferedWriter writerAfterDeleteRows = new BufferedWriter(new OutputStreamWriter(
                new FileOutputStream(Main.OrangeProcessedDSHome + "/afterRemoveRows2.arff")));
        int columnNum = newData.numAttributes();
        int totalInstanceNum = newData.numInstances(), deleteM1Num = 0, delete1Num = 0;
        String line = null;
        int missingColumnNum = 0;
        while ((line = reader70.readLine()) != null) {
            missingColumnNum = 0;
            for (int i = 0; i < line.length(); i++) {
                if (line.charAt(i) == '?')
                    missingColumnNum++;
            }
            if (missingColumnNum * 100 / columnNum < 50) {
                writerAfterDeleteRows.write(line);
                writerAfterDeleteRows.newLine();
            } else {
                System.out.println("Delete Row: [" + line + "]");
                if (line.endsWith("-1")) {
                    deleteM1Num++;
                } else {
                    delete1Num++;
                }
            }
        }
        System.out.println("Total: " + totalInstanceNum + ", delete class -1: " + deleteM1Num
                + ", delete class 1:  " + delete1Num);
        reader70.close();
        writerAfterDeleteRows.close();

        //create sample files:
        createSampleDataSets();

    } catch (Exception e) {
        Main.logging(null, e);
    }
}