List of usage examples for weka.core Instance setDataset
public void setDataset(Instances instances);
From source file:gr.ntua.sentimentanalysis.VectorModelSentimentAnalysis.java
License:Open Source License
@WebMethod(operationName = "getTweetSentiment") public String getTextSentiment(String document) { Instance instance = vmcl.getInstance(-1, REP_MODEL, document); instance.setDataset(instances); int response = -1; try {//w ww . ja v a 2 s .c o m response = (int) classifier.classifyInstance(instance); } catch (Exception e) { e.printStackTrace(); } if (response == 0) { return "negative"; } else if (response == 1) { return "positive"; } else { return "unknown"; } }
From source file:GroupProject.DMChartUI.java
/** * Action for the generate button/*from w w w. jav a 2 s . c om*/ * It reads the user input from the table and the selected options and performs * a classifiecation of the user input * the user can choose linear regression, naive bayes classifier, or j48 trees to classify * */ private void generateButtonActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_generateButtonActionPerformed // TODO add your handling code here: // TODO add your handling code here: //File file = new File("studentTemp.csv"); CSVtoArff converter = new CSVtoArff(); Instances students = null; Instances students2 = null; try { converter.convert("studentTemp.csv", "studentTemp.arff"); } catch (IOException ex) { Logger.getLogger(DMChartUI.class.getName()).log(Level.SEVERE, null, ex); } try { students = new Instances(new BufferedReader(new FileReader("studentTemp.arff"))); students2 = new Instances(new BufferedReader(new FileReader("studentTemp.arff"))); } catch (IOException ex) { Logger.getLogger(DMChartUI.class.getName()).log(Level.SEVERE, null, ex); } //get column to predict values for //int target=students.numAttributes()-1; int target = dataSelector.getSelectedIndex() + 1; System.out.printf("this is the target: %d\n", target); //set target students.setClassIndex(target); students2.setClassIndex(target); //case on which radio button is selected //Linear Regressions if (LRB.isSelected()) { LinearRegression model = null; if (Lmodel != null) { model = Lmodel; } else { buildLinearModel(); model = Lmodel; } System.out.println("im doing linear regression"); equationDisplayArea.setText(model.toString()); System.out.println("im going to get the instance"); Instance prediction2 = getInstance(true); Remove remove = new Remove(); int[] toremove = { 0, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 16, 17 }; remove.setAttributeIndicesArray(toremove); try { remove.setInputFormat(students); } catch (Exception ex) { Logger.getLogger(DMChartUI.class.getName()).log(Level.SEVERE, null, ex); } Instances instNew = null; try { instNew = Filter.useFilter(students, remove); } catch (Exception ex) { Logger.getLogger(DMChartUI.class.getName()).log(Level.SEVERE, null, ex); } prediction2.setDataset(instNew); System.err.print("i got the instance"); double result = 0; try { result = model.classifyInstance(prediction2); } catch (Exception ex) { Logger.getLogger(DMChartUI.class.getName()).log(Level.SEVERE, null, ex); } System.out.printf("the result : %f \n ", result); predictValue.setText(Double.toString(result)); System.out.println("I'm done with Linear Regression"); } //Naive Bayes else if (NBB.isSelected()) { Classifier cModel = null; if (NBmodel != null) { cModel = NBmodel; } else { buildNBClassifier(); cModel = NBmodel; } System.out.println("im doing NB"); //build test Evaluation eTest = null; try { eTest = new Evaluation(students); } catch (Exception ex) { Logger.getLogger(DMChartUI.class.getName()).log(Level.SEVERE, null, ex); } System.out.println("Using NB"); try { eTest.evaluateModel(cModel, students); } catch (Exception ex) { Logger.getLogger(DMChartUI.class.getName()).log(Level.SEVERE, null, ex); } //display the test results to console String strSummary = eTest.toSummaryString(); System.out.println(strSummary); //build instance to predict System.out.println("im going to get the instance"); Instance prediction2 = getInstance(false); prediction2.setDataset(students); System.err.print("i got the instance"); //replace with loop stating the class names //fit text based on name of categories double pred = 0; try { pred = cModel.classifyInstance(prediction2); prediction2.setClassValue(pred); } catch (Exception ex) { Logger.getLogger(DMChartUI.class.getName()).log(Level.SEVERE, null, ex); } //get the predicted value and set predictValue to it predictValue.setText(prediction2.classAttribute().value((int) pred)); System.out.println("I'm done with Naive Bayes"); double[] fDistribution2 = null; try { fDistribution2 = cModel.distributionForInstance(prediction2); } catch (Exception ex) { Logger.getLogger(DMChartUI.class.getName()).log(Level.SEVERE, null, ex); } double max = 0; int maxindex = 0; max = fDistribution2[0]; for (int i = 0; i < fDistribution2.length; i++) { if (fDistribution2[i] > max) { maxindex = i; max = fDistribution2[i]; } System.out.println("the value at " + i + " : " + fDistribution2[i]); System.out.println("the label at " + i + prediction2.classAttribute().value(i)); } prediction2.setClassValue(maxindex); predictValue.setText(prediction2.classAttribute().value(maxindex)); } //J48 Tree else if (JB.isSelected()) { System.out.println("im doing j48 "); Classifier jModel = null; if (Jmodel != null) { jModel = Jmodel; } else { buildJClassifier(); jModel = Jmodel; } //test model Evaluation eTest2 = null; try { eTest2 = new Evaluation(students); } catch (Exception ex) { Logger.getLogger(DMChartUI.class.getName()).log(Level.SEVERE, null, ex); } System.out.println("Using J48 test"); try { eTest2.evaluateModel(jModel, students); } catch (Exception ex) { Logger.getLogger(DMChartUI.class.getName()).log(Level.SEVERE, null, ex); } String strSummary2 = eTest2.toSummaryString(); System.out.println(strSummary2); System.out.println("im going to get the instance"); Instance prediction2 = getInstance(false); prediction2.setDataset(students); System.err.print("i got the instance\n"); double pred = 0; try { pred = jModel.classifyInstance(prediction2); prediction2.setClassValue(pred); System.out.println("i did a prediction"); } catch (Exception ex) { Logger.getLogger(DMChartUI.class.getName()).log(Level.SEVERE, null, ex); } //get the predicted value and set predictValue to it System.out.println("this was pred:" + pred); predictValue.setText(prediction2.classAttribute().value((int) pred)); System.out.println("I'm done with J48"); //replace with loop stating the class names //fit text based on name of categories double[] fDistribution2 = null; try { fDistribution2 = jModel.distributionForInstance(prediction2); } catch (Exception ex) { Logger.getLogger(DMChartUI.class.getName()).log(Level.SEVERE, null, ex); } double max = 0; int maxindex = 0; max = fDistribution2[0]; for (int i = 0; i < fDistribution2.length; i++) { if (fDistribution2[i] > max) { maxindex = i; max = fDistribution2[i]; } System.out.println("the value at " + i + " : " + fDistribution2[i]); System.out.println("the label at " + i + " " + prediction2.classAttribute().value(i)); } prediction2.setClassValue(maxindex); predictValue.setText(prediction2.classAttribute().value(maxindex)); } }
From source file:ia02classificacao.IA02Classificacao.java
/** * @param args the command line arguments *///from w w w . j a v a 2 s. c om public static void main(String[] args) throws Exception { // abre o banco de dados arff e mostra a quantidade de instancias (linhas) DataSource arquivo = new DataSource("data/zoo.arff"); Instances dados = arquivo.getDataSet(); System.out.println("Instancias lidas: " + dados.numInstances()); // FILTER: remove o atributo nome do animal da classificao String[] parametros = new String[] { "-R", "1" }; Remove filtro = new Remove(); filtro.setOptions(parametros); filtro.setInputFormat(dados); dados = Filter.useFilter(dados, filtro); AttributeSelection selAtributo = new AttributeSelection(); InfoGainAttributeEval avaliador = new InfoGainAttributeEval(); Ranker busca = new Ranker(); selAtributo.setEvaluator(avaliador); selAtributo.setSearch(busca); selAtributo.SelectAttributes(dados); int[] indices = selAtributo.selectedAttributes(); System.out.println("Selected attributes: " + Utils.arrayToString(indices)); // Usa o algoritimo J48 e mostra a classificao dos dados em forma textual String[] opcoes = new String[1]; opcoes[0] = "-U"; J48 arvore = new J48(); arvore.setOptions(opcoes); arvore.buildClassifier(dados); System.out.println(arvore); // Usa o algoritimo J48 e mostra a classificao de dados em forma grafica /* TreeVisualizer tv = new TreeVisualizer(null, arvore.graph(), new PlaceNode2()); JFrame frame = new javax.swing.JFrame("?rvore de Conhecimento"); frame.setSize(800,500); frame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE); frame.getContentPane().add(tv); frame.setVisible(true); tv.fitToScreen(); */ /* * Classificao de novos dados */ System.out.println("\n\nCLASSIFICAO DE NOVOS DADOS"); // criar atributos double[] vals = new double[dados.numAttributes()]; vals[0] = 1.0; // hair vals[1] = 0.0; // feathers vals[2] = 0.0; // eggs vals[3] = 1.0; // milk vals[4] = 1.0; // airborne vals[5] = 0.0; // aquatic vals[6] = 0.0; // predator vals[7] = 1.0; // toothed vals[8] = 1.0; // backbone vals[9] = 1.0; // breathes vals[10] = 0.0; // venomous vals[11] = 0.0; // fins vals[12] = 4.0; // legs vals[13] = 1.0; // tail vals[14] = 1.0; // domestic vals[15] = 1.0; // catsize // Criar uma instncia baseada nestes atributos Instance meuUnicornio = new DenseInstance(1.0, vals); // Adicionar a instncia nos dados meuUnicornio.setDataset(dados); // Classificar esta nova instncia double label = arvore.classifyInstance(meuUnicornio); // Imprimir o resultado da classificao System.out.println("Novo Animal: Unicrnio"); System.out.println("classificacao: " + dados.classAttribute().value((int) label)); /* * Avaliao e predio de erros de mtrica */ System.out.println("\n\nAVALIAO E PREDIO DE ERROS DE MTRICA"); Classifier cl = new J48(); Evaluation eval_roc = new Evaluation(dados); eval_roc.crossValidateModel(cl, dados, 10, new Random(1), new Object[] {}); System.out.println(eval_roc.toSummaryString()); /* * Matriz de confuso */ System.out.println("\n\nMATRIZ DE CONFUSO"); double[][] confusionMatrix = eval_roc.confusionMatrix(); System.out.println(eval_roc.toMatrixString()); }
From source file:ia03classificador.jFrClassificador.java
public void doClassificate() throws Exception { // Quando clicado, a variavel recebe 1, quando no clicado recebe 0 v00 = ((btn00.isSelected()) ? ((double) 1) : ((double) 0)); v01 = ((btn01.isSelected()) ? ((double) 1) : ((double) 0)); v02 = ((btn02.isSelected()) ? ((double) 1) : ((double) 0)); v03 = ((btn03.isSelected()) ? ((double) 1) : ((double) 0)); v04 = ((btn04.isSelected()) ? ((double) 1) : ((double) 0)); v05 = ((btn05.isSelected()) ? ((double) 1) : ((double) 0)); v06 = ((btn06.isSelected()) ? ((double) 1) : ((double) 0)); v07 = ((btn07.isSelected()) ? ((double) 1) : ((double) 0)); v08 = ((btn08.isSelected()) ? ((double) 1) : ((double) 0)); v09 = ((btn09.isSelected()) ? ((double) 1) : ((double) 0)); v10 = ((btn10.isSelected()) ? ((double) 1) : ((double) 0)); v11 = ((btn11.isSelected()) ? ((double) 1) : ((double) 0)); v13 = ((btn13.isSelected()) ? ((double) 1) : ((double) 0)); v14 = ((btn14.isSelected()) ? ((double) 1) : ((double) 0)); v15 = ((btn15.isSelected()) ? ((double) 1) : ((double) 0)); legs = txtLegs.getText();/*from w ww. java 2 s . c o m*/ legs = ((legs == null || legs.trim().isEmpty() ? "2" : legs)); name = txtName.getText(); // abre o banco de dados arff e guarda os registros no objeto dados ConverterUtils.DataSource arquivo = new ConverterUtils.DataSource("data/zoo.arff"); Instances dados = arquivo.getDataSet(); // FILTER: remove o atributo nome do animal da classificao String[] parametros = new String[] { "-R", "1" }; Remove filtro = new Remove(); filtro.setOptions(parametros); filtro.setInputFormat(dados); dados = Filter.useFilter(dados, filtro); AttributeSelection selAtributo = new AttributeSelection(); InfoGainAttributeEval avaliador = new InfoGainAttributeEval(); Ranker busca = new Ranker(); selAtributo.setEvaluator(avaliador); selAtributo.setSearch(busca); selAtributo.SelectAttributes(dados); int[] indices = selAtributo.selectedAttributes(); //System.out.println("Selected attributes: " + Utils.arrayToString(indices)); // Usa o algoritimo J48 para montar a arvore de dados String[] opcoes = new String[1]; opcoes[0] = "-U"; J48 arvore = new J48(); arvore.setOptions(opcoes); arvore.buildClassifier(dados); // cria o novo elemento para comparao double[] vals = new double[dados.numAttributes()]; vals[0] = v00; // hair vals[1] = v01; // feathers vals[2] = v02; // eggs vals[3] = v03; // milk vals[4] = v04; // airborne vals[5] = v05; // aquatic vals[6] = v06; // predator vals[7] = v07; // toothed vals[8] = v08; // backbone vals[9] = v09; // breathes vals[10] = v10; // venomous vals[11] = v11; // fins vals[12] = Double.parseDouble(legs); // legs vals[13] = v13; // tail vals[14] = v14; // domestic vals[15] = v15; // catsize // Criar uma instncia baseada nestes atributos Instance newAnimal = new DenseInstance(1.0, vals); // Adicionar a instncia nos dados newAnimal.setDataset(dados); // Classificar esta nova instncia double label = arvore.classifyInstance(newAnimal); // Imprimir o resultado da classificao lblClassification.setText(dados.classAttribute().value((int) label)); }
From source file:intensityclustering.IntensityClustering.java
/** * Draws the 2D Histogram Plot in the IntensityClustering. X-Axsis is * intensity value of chanel 2 image (where the stained nuclei are). Y-axis * are relative frequencies of present nuclei. * * @param tss The TMAspots whose nuclei are considered (both gold-standard * and estimated nuclei)./*from w w w . j a va 2s . c om*/ * @param doAlsoClustering If true, the TMApoints are also clustered * according to the histogram. */ void drawNucleiIntensities2D(List<TMAspot> tss, boolean doAlsoClustering) { // draw the plot Plot2DPanel plot; if (((java.awt.BorderLayout) (jPanel9.getLayout())) .getLayoutComponent(java.awt.BorderLayout.CENTER) != null) { plot = (Plot2DPanel) ((java.awt.BorderLayout) (jPanel9.getLayout())) .getLayoutComponent(java.awt.BorderLayout.CENTER); plot.removeAllPlots(); plot.removeAllPlotables(); } else { plot = new Plot2DPanel(PlotPanel.SOUTH); plot.setAxisLabels("Intensity", "Frequency"); plot.plotCanvas.setBackground(jPanel9.getBackground()); plot.plotLegend.setBackground(jPanel9.getBackground()); plot.plotToolBar.setBackground(plot.plotCanvas.getBackground()); } if (((java.awt.BorderLayout) (jPanel9.getLayout())) .getLayoutComponent(java.awt.BorderLayout.CENTER) == null) { jPanel9.add(plot, java.awt.BorderLayout.CENTER); jPanel15.setBackground(plot.plotCanvas.getBackground()); jPanel15.setVisible(true); validate(); pack(); } if (tss.size() > 0) { try { this.setCursor(Cursor.getPredefinedCursor(Cursor.WAIT_CURSOR)); List<Integer> intensities = new ArrayList<>(); int intensity; int min = Integer.parseInt(jTextField1.getText()); int max = Integer.parseInt(jTextField16.getText()); for (TMAspot ts : tss) { //TODO: GET THE CHANNEL 2 Image //BufferedImage img = ts.getBufferedImage(TMAspot.SHOW_CHANNEL2_IMAGE, false); BufferedImage img = ts.getBufferedImage(false); // img can be null if color deconvolution has not been performed, yet. if (img != null) { List<TMApoint> tps = ts.getPoints(); for (TMALabel tp : tps) { intensity = TMAspot.getAverageColorAtPoint(img, tp.x, tp.y, ts.getParam_r(), false) .getRed(); if (intensity >= min && intensity <= max) { intensities.add(intensity); } } } } double[] intensities_array = new double[intensities.size()]; for (int i = 0; i < intensities.size(); i++) { intensities_array[i] = intensities.get(i); } int nbins = jSlider7.getValue(); if (intensities_array.length > 0) { plot.addHistogramPlot("TMA points", intensities_array, 0, 256, nbins); } //else { // JOptionPane.showMessageDialog(this, "No TMA points have been found.", "No TMA points found.", JOptionPane.WARNING_MESSAGE); //} //// Cluster Points according to histograms if (doAlsoClustering) { // Find Clusters int n = getParam_nClusters(); // Create ARFF Data FastVector atts; Instances data; int i; // 1. create arff data format atts = new FastVector(1); for (i = 0; i < 1; i++) { atts.addElement(new Attribute(Integer.toString(i))); } // 2. create Instances object data = new Instances("TMA points", atts, tmarker.getNumberNuclei(tss)); // 3. fill with data for (i = 0; i < intensities_array.length; i++) { // add the instance Instance inst = new Instance(1.0, new double[] { intensities_array[i] }); inst.setDataset(data); data.add(inst); } // 4. set data class index (last attribute is the class) //data.setClassIndex(data.numAttributes() - 1); // not for weka 3.5.X if (tmarker.DEBUG > 4) { java.util.logging.Logger.getLogger(getClass().getName()).log(java.util.logging.Level.INFO, data.toString()); } Clusterer clusterer = getClusterer(); String[] options = getClustererOptions(); if (tmarker.DEBUG > 3) { if (options.length > 0) { String info = "Clusterer should have options:\n"; for (String o : options) { info += o + " "; } info += "\n"; java.util.logging.Logger.getLogger(getClass().getName()) .log(java.util.logging.Level.INFO, info); } } clusterer.setOptions(options); // set the clusterer options clusterer.buildClusterer(data); // build the clusterer // order the clusters according to the brightness // The most bright cluster should be 0, then 1, then 2,... ArrayList<ArrayList<Double>> values = new ArrayList<>(); for (i = 0; i < n; i++) { values.add(new ArrayList<Double>()); } int z; double value; for (i = 0; i < data.numInstances(); i++) { z = clusterer.clusterInstance(data.instance(i)); value = data.instance(i).value(0); values.get(z).add(value); } double[] means = new double[n]; double[] stds = new double[n]; for (i = 0; i < n; i++) { means[i] = Misc.mean(values.get(i).toArray(new Double[values.get(i).size()])); stds[i] = Misc.std(values.get(i).toArray(new Double[values.get(i).size()])); } int[] ordering = Misc.orderArray(means, true); for (i = 0; i < n; i++) { int ind = Misc.IndexOf(ordering, i); plot.addPlotable(new Line(getParam_ColorOfClassK(i), new double[] { means[ind], plot.plotCanvas.base.roundXmin[1] }, new double[] { means[ind], plot.plotCanvas.base.roundXmax[1] }, 2 * stds[ind])); plot.addPlot(Plot2DPanel.LINE, "Staining " + i, getParam_ColorOfClassK(i), new double[][] { new double[] { means[ind], plot.plotCanvas.base.roundXmin[1] }, new double[] { means[ind], plot.plotCanvas.base.roundXmax[1] } }); } String clusterInfo = ""; for (String o : clusterer.getOptions()) { clusterInfo += o + " "; } clusterInfo += "\n\n"; clusterInfo += clusterer.toString().trim(); if (getParam_AutomaticClustererString().equalsIgnoreCase("Hierarchical")) { try { clusterInfo += ((HierarchicalClusterer) clusterer).graph(); HierarchyVisualizer a = new HierarchyVisualizer( ((HierarchicalClusterer) clusterer).graph()); a.setSize(800, 600); if (clusterVisualizer == null) { clusterVisualizer = new JFrame("Hierarchical Clusterer Dendrogram"); clusterVisualizer.setIconImage(getIconImage()); clusterVisualizer.setDefaultCloseOperation(JFrame.DISPOSE_ON_CLOSE); clusterVisualizer.setSize(800, 600); } Container contentPane = clusterVisualizer.getContentPane(); contentPane.removeAll(); contentPane.add(a); } catch (Exception e) { clusterVisualizer = null; } } jTextArea1.setText(clusterInfo); if (tmarker.DEBUG > 3) { String info = "Clusterer has options\n"; for (String o : clusterer.getOptions()) { info += o + " "; } info += "\n"; info += clusterer.toString() + "\n"; // info += (clusterer).globalInfo() + "\n"; info += "\n"; info += clusterInfo + "\n"; java.util.logging.Logger.getLogger(getClass().getName()).log(java.util.logging.Level.INFO, info); } // cluster all TMAspots and assign the corresponding class to them // Cluster the points List<List<Integer>> clustered_points = new ArrayList<>(); for (i = 0; i < n; i++) { clustered_points.add(new ArrayList<Integer>()); } int k; for (TMAspot ts : tss) { //TODO: GET THE CHANNEL 2 IMAGE //BufferedImage img = ts.getBufferedImage(TMAspot.SHOW_CHANNEL2_IMAGE, false); BufferedImage img = ts.getBufferedImage(false); List<TMApoint> tps = ts.getPoints(); for (TMApoint tp : tps) { intensity = TMAspot.getAverageColorAtPoint(img, tp.x, tp.y, ts.getParam_r(), false) .getRed(); // add the instance Instance inst = new Instance(1.0, new double[] { intensity }); inst.setDataset(data); k = ordering[clusterer.clusterInstance(inst)]; // store the color for later visualization clustered_points.get(k).add(intensity); // set the staining of the TMApoint switch (k) { case 0: tp.setStaining(TMALabel.STAINING_0); break; case 1: tp.setStaining(TMALabel.STAINING_1); break; case 2: tp.setStaining(TMALabel.STAINING_2); break; default: tp.setStaining(TMALabel.STAINING_3); break; } } ts.dispStainingInfo(); if (manager.getVisibleTMAspot() == ts) { manager.repaintVisibleTMAspot(); } } // Write the description String description = "Nuclei clustered with " + getParam_AutomaticClustererString(); if (getParam_AutomaticClustererString().equalsIgnoreCase("Hierarchical")) { description += " (" + getParam_HierarchicalClusteringMethod() + ")"; } description += ", n=" + getParam_nClusters() + ", channel 2 intensity."; jLabel42.setText(description); jLabel41.setText(" "); } } catch (Exception e) { e.printStackTrace(); } finally { this.setCursor(Cursor.getPredefinedCursor(Cursor.DEFAULT_CURSOR)); } } }
From source file:intensityclustering.IntensityClustering.java
/** * Clusters the TMApoints on given TMAspots according to their staining * intensity (color). All parameters (e.g. clusterer and parameters) are * selected by the user. Features are simple color features. * * @param tss The TMAspots of which all nuclei (gold-standard and estimated) * are clustered according to color./* w w w . j ava 2 s.c om*/ */ private void clusterPointsAutomaticallyColorSpace(List<TMAspot> tss) { if (tss.size() > 0) { try { this.setCursor(Cursor.getPredefinedCursor(Cursor.WAIT_CURSOR)); int n = getParam_nClusters(); // Create ARFF Data FastVector atts; Instances data; int i; // 1. create arff data format atts = new FastVector(3); for (i = 0; i < 3; i++) { atts.addElement(new Attribute(Integer.toString(i))); } // 2. create Instances object data = new Instances("TMA points", atts, tmarker.getNumberNuclei(tss)); // 3. fill with data BufferedImage img; Color c; float[] features = new float[3]; String colorSpace = getParam_ColorSpace(); for (TMAspot ts : tss) { img = ts.getBufferedImage(); List<TMApoint> tps = ts.getPoints(); for (TMApoint tp : tps) { Color2Feature(TMAspot.getAverageColorAtPoint(img, tp.x, tp.y, ts.getParam_r(), false), colorSpace, features); // add the instance Instance inst = new Instance(1.0, new double[] { features[0], features[1], features[2] }); inst.setDataset(data); data.add(inst); } } // 4. set data class index (last attribute is the class) //data.setClassIndex(data.numAttributes() - 1); // not for weka 3.5.X if (tmarker.DEBUG > 4) { java.util.logging.Logger.getLogger(getClass().getName()).log(java.util.logging.Level.INFO, data.toString()); } Clusterer clusterer = getClusterer(); String[] options = getClustererOptions(); if (false && colorSpace.equalsIgnoreCase("hsb")) { String[] newoptions = new String[options.length + 2]; System.arraycopy(options, 0, newoptions, 0, options.length); newoptions[options.length] = "-A"; newoptions[options.length + 1] = "weka.core.MyHSBDistance"; options = newoptions; } if (tmarker.DEBUG > 3) { if (options.length > 0) { String info = "Clusterer should have options\n"; for (String o : options) { info += o + " "; } info += "\n"; java.util.logging.Logger.getLogger(getClass().getName()).log(java.util.logging.Level.INFO, info); } } clusterer.setOptions(options); // set the clusterer options clusterer.buildClusterer(data); // build the clusterer // order the clusters according to the brightness // The most bright cluster should be 0, then 1, then 2,... ArrayList<ArrayList<Double>> values = new ArrayList<>(); for (i = 0; i < clusterer.numberOfClusters(); i++) { values.add(new ArrayList<Double>()); } int z; double value; for (i = 0; i < data.numInstances(); i++) { z = clusterer.clusterInstance(data.instance(i)); value = getParam_ColorSpace().equalsIgnoreCase("hsb") ? data.instance(i).value(2) : Misc.RGBToGray(data.instance(i).value(0), data.instance(i).value(1), data.instance(i).value(2)); values.get(z).add(value); } double[] means = new double[clusterer.numberOfClusters()]; for (i = 0; i < clusterer.numberOfClusters(); i++) { means[i] = Misc.mean(values.get(i).toArray(new Double[values.get(i).size()])); } int[] ordering = Misc.orderArray(means, !getParam_ColorSpace().equalsIgnoreCase("rtp")); String clusterInfo = ""; for (String o : clusterer.getOptions()) { clusterInfo += o + " "; } clusterInfo += "\n\n"; clusterInfo += clusterer.toString().trim(); if (getParam_AutomaticClustererString().equalsIgnoreCase("Hierarchical")) { try { clusterInfo += ((HierarchicalClusterer) clusterer).graph(); HierarchyVisualizer a = new HierarchyVisualizer( ((HierarchicalClusterer) clusterer).graph()); a.setSize(800, 600); if (clusterVisualizer == null) { clusterVisualizer = new JFrame("Hierarchical Clusterer Dendrogram"); clusterVisualizer.setIconImage(getIconImage()); clusterVisualizer.setDefaultCloseOperation(JFrame.DISPOSE_ON_CLOSE); clusterVisualizer.setSize(800, 600); } Container contentPane = clusterVisualizer.getContentPane(); contentPane.removeAll(); contentPane.add(a); } catch (Exception e) { clusterVisualizer = null; } } jTextArea1.setText(clusterInfo); if (tmarker.DEBUG > 3) { String info = "Clusterer has options\n"; for (String o : clusterer.getOptions()) { info += o + " "; } info += "\n"; info += clusterer.toString() + "\n"; // info += (clusterer).globalInfo() + "\n"; info += "\n"; info += clusterInfo + "\n"; java.util.logging.Logger.getLogger(getClass().getName()).log(java.util.logging.Level.INFO, info); } // cluster all TMAspots and assign the corresponding class to them // Cluster the points List<List<Color>> clustered_points = new ArrayList<>(); for (i = 0; i < clusterer.numberOfClusters(); i++) { clustered_points.add(new ArrayList<Color>()); } int k; for (TMAspot ts : tss) { img = ts.getBufferedImage(); List<TMApoint> tps = ts.getPoints(); for (TMApoint tp : tps) { c = TMAspot.getAverageColorAtPoint(img, tp.x, tp.y, ts.getParam_r(), false); Color2Feature(c, colorSpace, features); // add the instance Instance inst = new Instance(1.0, new double[] { features[0], features[1], features[2] }); inst.setDataset(data); k = ordering[clusterer.clusterInstance(inst)]; // store the color for later visualization clustered_points.get(k).add(c); // set the staining of the TMApoint switch (k) { case 0: tp.setStaining(TMALabel.STAINING_0); break; case 1: tp.setStaining(TMALabel.STAINING_1); break; case 2: tp.setStaining(TMALabel.STAINING_2); break; default: tp.setStaining(TMALabel.STAINING_3); break; } } ts.dispStainingInfo(); if (manager.getVisibleTMAspot() == ts) { manager.repaintVisibleTMAspot(); } } // draw the points Plot3DPanel plot; if (((java.awt.BorderLayout) (jPanel2.getLayout())) .getLayoutComponent(java.awt.BorderLayout.CENTER) != null) { plot = (Plot3DPanel) ((java.awt.BorderLayout) (jPanel2.getLayout())) .getLayoutComponent(java.awt.BorderLayout.CENTER); plot.removeAllPlots(); } else { plot = new Plot3DPanel(); plot.plotCanvas.setBackground(jPanel2.getBackground()); plot.addLegend(PlotPanel.SOUTH); plot.plotLegend.setBackground(jPanel2.getBackground()); } if (colorSpace.equalsIgnoreCase("hsb")) { plot.setAxisLabels("Hue", "Saturation", "Brightness"); } else if (colorSpace.equalsIgnoreCase("rtp")) { plot.setAxisLabels("R", "Theta", "Phi"); } else { plot.setAxisLabels("Red", "Green", "Blue"); } for (i = 0; i < clusterer.numberOfClusters(); i++) { double[] xs = new double[clustered_points.get(i).size()]; double[] ys = new double[clustered_points.get(i).size()]; double[] zs = new double[clustered_points.get(i).size()]; for (int j = 0; j < clustered_points.get(i).size(); j++) { Color2Feature(clustered_points.get(i).get(j), colorSpace, features); xs[j] = features[0]; ys[j] = features[1]; zs[j] = features[2]; } if (xs.length > 0) { c = getParam_ColorOfClassK(i); plot.addScatterPlot("Staining " + i, c, xs, ys, zs); } } // Write the description String description = "Nuclei clustered with " + getParam_AutomaticClustererString(); if (getParam_AutomaticClustererString().equalsIgnoreCase("Hierarchical")) { description += " (" + getParam_HierarchicalClusteringMethod() + ")"; } description += ", n=" + getParam_nClusters() + ", color space " + getParam_ColorSpace() + "."; jLabel41.setText(description); jLabel42.setText(" "); if (((java.awt.BorderLayout) (jPanel2.getLayout())) .getLayoutComponent(java.awt.BorderLayout.CENTER) == null) { jPanel2.add(plot, java.awt.BorderLayout.CENTER); validate(); pack(); } } catch (Exception | OutOfMemoryError e) { java.util.logging.Logger.getLogger(getClass().getName()).log(java.util.logging.Level.SEVERE, null, e); JOptionPane.showMessageDialog(this, "The clustering could not be performed.\n\n" + "A possible reasons is:\n" + "- Not enough memory (too many points), \n\n" + "You might want to try a different clustering method or less TMAspots.\n\n" + "The error message is: \n" + e.getMessage(), "Error at Nucleus clustering", JOptionPane.WARNING_MESSAGE); } finally { this.setCursor(Cursor.getPredefinedCursor(Cursor.DEFAULT_CURSOR)); } } }
From source file:jwebminer2.FeatureValueFileSaver.java
/** * Save the given text to the given location in the given format or * save the stored feature values, depending on the chosen_file_extension. * A progress bar is displayed (although not incremented). * * @param chosen_file_extension The file extension (corresponding to one * of the extensions published by the * getFileFormatExtension method) to use when * saving data_to_save, and the corresponding * file format. * @param data_to_save The HTML code displayed on-screen. May be * null for non-HTML saving. * @param save_location The file to save data_to_save to. * @throws Exception Throws an Exception if the file cannot be * saved./*from ww w.ja va2 s . c o m*/ */ public void saveContents(String chosen_file_extension, String data_to_save, File save_location) throws Exception { // Prepare the progress bar SimpleProgressBarDialog progress_bar = new SimpleProgressBarDialog(1, results_panel); // Write the whole contents of data_to_save verbatim as an HTML file // if an HTML file is to be saved if (chosen_file_extension.equals("HTML")) { DataOutputStream writer = mckay.utilities.staticlibraries.FileMethods .getDataOutputStream(save_location); writer.writeBytes(data_to_save); writer.close(); } // Only save the table of final feature values itself if a non-HTML // file format is to be saved else { // Access information to store double[][] feature_table = results_panel.feature_values; String[] column_labels = results_panel.column_labels; String[] row_labels = results_panel.row_labels; String[] orig_column_labels = column_labels; if (AnalysisProcessor.lastfm_enabled && AnalysisProcessor.is_cross_tabulation && (AnalysisProcessor.yahoo_application_id != null || AnalysisProcessor.google_license_key != null)) { String[] column_labels_lastfm_websearch = new String[2 * column_labels.length]; for (int i = 0; i < column_labels.length; i++) { column_labels_lastfm_websearch[i] = column_labels[i] + "_WS"; column_labels_lastfm_websearch[i + column_labels.length] = column_labels[i] + "_LastFM"; } column_labels = column_labels_lastfm_websearch; } else { column_labels = orig_column_labels; } // Save as tab delimited text file if (chosen_file_extension.equals("TXT")) { // Calculate the table to save String[][] results_table = new String[row_labels.length + 1][column_labels.length + 1]; results_table[0][0] = ""; for (int i = 0; i < results_table.length; i++) { for (int j = 0; j < results_table[i].length; j++) { if (i == 0) { if (j != 0) results_table[i][j] = column_labels[j - 1]; } else { if (j == 0) results_table[i][j] = row_labels[i - 1]; else results_table[i][j] = String.valueOf(feature_table[i - 1][j - 1]); } } } // Save the table DataOutputStream writer = mckay.utilities.staticlibraries.FileMethods .getDataOutputStream(save_location); for (int i = 0; i < results_table.length; i++) { for (int j = 0; j < results_table[i].length; j++) { // Write the table entry writer.writeBytes(results_table[i][j]); // Add a tab or a line break if (j == results_table[i].length - 1) writer.writeBytes("\n"); else writer.writeBytes("\t"); } } // Close the writing stream writer.close(); } // Save as ACE XML file else if (chosen_file_extension.equals("ACE XML")) { // Set the name of the dataset to the name of the file // that is tob be saved String data_set_name = mckay.utilities.staticlibraries.StringMethods .removeExtension(save_location.getName()); // Prepare feature definitions and store feature names to // put in DataSets FeatureDefinition[] feature_definitions = new FeatureDefinition[column_labels.length]; String[] feature_names = new String[column_labels.length]; for (int feat = 0; feat < feature_definitions.length; feat++) { feature_definitions[feat] = new FeatureDefinition(column_labels[feat], "", false, 1); feature_names[feat] = column_labels[feat]; } // Prepare the the DataSets to write DataSet[] data_sets = new DataSet[row_labels.length]; for (int instance = 0; instance < data_sets.length; instance++) { // Instantiate the DataSet data_sets[instance] = new DataSet(); // Store the instance names data_sets[instance].identifier = row_labels[instance]; // Store the names of the features data_sets[instance].feature_names = feature_names; // Store the features for this DataSet as well as the // feature names double[][] these_feature_values = new double[feature_table[instance].length][1]; for (int feat = 0; feat < these_feature_values.length; feat++) these_feature_values[feat][0] = feature_table[instance][feat]; data_sets[instance].feature_values = these_feature_values; // Validate, order and compact the DataSet data_sets[instance].orderAndCompactFeatures(feature_definitions, true); } // Save the feature values DataSet.saveDataSets(data_sets, feature_definitions, save_location, "Features extracted with jWebMiner 2.0"); } // Save as Weka ARFF file else if (chosen_file_extension.equals("Weka ARFF")) { // Set the name of the dataset to the name of the file // that is to be saved String data_set_name = mckay.utilities.staticlibraries.StringMethods .removeExtension(save_location.getName()); // Set the Attributes (feature names and class names) FastVector attributes_vector = new FastVector(column_labels.length + 1); // extra 1 is for class name for (int feat = 0; feat < column_labels.length; feat++) attributes_vector.addElement(new Attribute(column_labels[feat])); FastVector class_names_vector = new FastVector(column_labels.length); for (int cat = 0; cat < orig_column_labels.length; cat++) class_names_vector.addElement(orig_column_labels[cat]); attributes_vector.addElement(new Attribute("Class", class_names_vector)); // Store attributes in an Instances object Instances instances = new Instances(data_set_name, attributes_vector, row_labels.length); instances.setClassIndex(instances.numAttributes() - 1); // Store the feature values and model classifications for (int inst = 0; inst < row_labels.length; inst++) { // Initialize an instance Instance this_instance = new Instance(instances.numAttributes()); this_instance.setDataset(instances); int current_attribute = 0; // Set feature values for the instance for (int feat = 0; feat < column_labels.length; feat++) this_instance.setValue(feat, feature_table[inst][feat]); // Set the class value for the instance // this_instance.setClassValue("a"); instances.setRelationName("jWebMiner2"); // Add this instance to instances instances.add(this_instance); } // Prepare the buffer to save to and add comments indicating // the names of the rows DataOutputStream writer = mckay.utilities.staticlibraries.FileMethods .getDataOutputStream(save_location); writer.writeBytes("% INSTANCES (DATA ROWS) BELOW CORRESPOND TO:\n%\n"); for (int inst = 0; inst < row_labels.length; inst++) writer.writeBytes("% " + (inst + 1) + ") " + row_labels[inst] + "\n"); writer.writeBytes("%\n"); // Save the ARFF file ArffSaver arff_saver = new ArffSaver(); arff_saver.setInstances(instances); arff_saver.setFile(save_location); arff_saver.setDestination(writer); try { arff_saver.writeBatch(); } catch (Exception e) { throw new Exception( "File only partially saved.\n\nTry resaving the file with a .arff extension."); } // Close the writer writer.close(); } } // Terminate the progress bar progress_bar.done(); }
From source file:kea.KEAFilter.java
License:Open Source License
/** * Converts an instance./*w w w . j a v a 2s. c o m*/ */ private FastVector convertInstance(Instance instance, boolean training) throws Exception { FastVector vector = new FastVector(); if (m_Debug) { System.err.println("-- Converting instance"); } // Get the key phrases for the document HashMap hashKeyphrases = null; HashMap hashKeysEval = null; if (!instance.isMissing(m_KeyphrasesAtt)) { String keyphrases = instance.stringValue(m_KeyphrasesAtt); hashKeyphrases = getGivenKeyphrases(keyphrases, false); hashKeysEval = getGivenKeyphrases(keyphrases, true); } // Get the phrases for the document HashMap hash = new HashMap(); int length = getPhrases(hash, instance.stringValue(m_DocumentAtt)); // Compute number of extra attributes int numFeatures = 5; if (m_Debug) { if (m_KFused) { numFeatures = numFeatures + 1; } } // Set indices of key attributes int phraseAttIndex = m_DocumentAtt; int tfidfAttIndex = m_DocumentAtt + 2; int distAttIndex = m_DocumentAtt + 3; int probsAttIndex = m_DocumentAtt + numFeatures - 1; // Go through the phrases and convert them into instances Iterator it = hash.keySet().iterator(); while (it.hasNext()) { String phrase = (String) it.next(); FastVector phraseInfo = (FastVector) hash.get(phrase); double[] vals = featVals(phrase, phraseInfo, training, hashKeysEval, hashKeyphrases, length); Instance inst = new Instance(instance.weight(), vals); inst.setDataset(m_ClassifierData); // Get probability of phrase being key phrase double[] probs = m_Classifier.distributionForInstance(inst); double prob = probs[1]; // Compute attribute values for final instance double[] newInst = new double[instance.numAttributes() + numFeatures]; int pos = 0; for (int i = 0; i < instance.numAttributes(); i++) { if (i == m_DocumentAtt) { // Add phrase int index = outputFormatPeek().attribute(pos).addStringValue(phrase); newInst[pos++] = index; // Add original version index = outputFormatPeek().attribute(pos).addStringValue((String) phraseInfo.elementAt(2)); newInst[pos++] = index; // Add TFxIDF newInst[pos++] = inst.value(m_TfidfIndex); // Add distance newInst[pos++] = inst.value(m_FirstOccurIndex); // Add other features if (m_Debug) { if (m_KFused) { newInst[pos++] = inst.value(m_KeyFreqIndex); } } // Add probability probsAttIndex = pos; newInst[pos++] = prob; // Set rank to missing (computed below) newInst[pos++] = Instance.missingValue(); } else if (i == m_KeyphrasesAtt) { newInst[pos++] = inst.classValue(); } else { newInst[pos++] = instance.value(i); } } Instance ins = new Instance(instance.weight(), newInst); ins.setDataset(outputFormatPeek()); vector.addElement(ins); } // Add dummy instances for keyphrases that don't occur // in the document if (hashKeysEval != null) { Iterator phrases = hashKeysEval.keySet().iterator(); while (phrases.hasNext()) { String phrase = (String) phrases.next(); double[] newInst = new double[instance.numAttributes() + numFeatures]; int pos = 0; for (int i = 0; i < instance.numAttributes(); i++) { if (i == m_DocumentAtt) { // Add phrase int index = outputFormatPeek().attribute(pos).addStringValue(phrase); newInst[pos++] = (double) index; // Add original version index = outputFormatPeek().attribute(pos).addStringValue((String) hashKeysEval.get(phrase)); newInst[pos++] = (double) index; // Add TFxIDF newInst[pos++] = Instance.missingValue(); // Add distance newInst[pos++] = Instance.missingValue(); // Add other features if (m_Debug) { if (m_KFused) { newInst[pos++] = Instance.missingValue(); } } // Add probability and rank newInst[pos++] = -Double.MAX_VALUE; newInst[pos++] = Instance.missingValue(); } else if (i == m_KeyphrasesAtt) { newInst[pos++] = 1; // Keyphrase } else { newInst[pos++] = instance.value(i); } } Instance inst = new Instance(instance.weight(), newInst); inst.setDataset(outputFormatPeek()); vector.addElement(inst); } } // Sort phrases according to their distance (stable sort) double[] vals = new double[vector.size()]; for (int i = 0; i < vals.length; i++) { vals[i] = ((Instance) vector.elementAt(i)).value(distAttIndex); } FastVector newVector = new FastVector(vector.size()); int[] sortedIndices = Utils.stableSort(vals); for (int i = 0; i < vals.length; i++) { newVector.addElement(vector.elementAt(sortedIndices[i])); } vector = newVector; // Sort phrases according to their tfxidf value (stable sort) for (int i = 0; i < vals.length; i++) { vals[i] = -((Instance) vector.elementAt(i)).value(tfidfAttIndex); } newVector = new FastVector(vector.size()); sortedIndices = Utils.stableSort(vals); for (int i = 0; i < vals.length; i++) { newVector.addElement(vector.elementAt(sortedIndices[i])); } vector = newVector; // Sort phrases according to their probability (stable sort) for (int i = 0; i < vals.length; i++) { vals[i] = 1 - ((Instance) vector.elementAt(i)).value(probsAttIndex); } newVector = new FastVector(vector.size()); sortedIndices = Utils.stableSort(vals); for (int i = 0; i < vals.length; i++) { newVector.addElement(vector.elementAt(sortedIndices[i])); } vector = newVector; // Compute rank of phrases. Check for subphrases that are ranked // lower than superphrases and assign probability -1 and set the // rank to Integer.MAX_VALUE int rank = 1; for (int i = 0; i < vals.length; i++) { Instance currentInstance = (Instance) vector.elementAt(i); // Short cut: if phrase very unlikely make rank very low and continue if (Utils.grOrEq(vals[i], 1.0)) { currentInstance.setValue(probsAttIndex + 1, Integer.MAX_VALUE); continue; } // Otherwise look for super phrase starting with first phrase // in list that has same probability, TFxIDF value, and distance as // current phrase. We do this to catch all superphrases // that have same probability, TFxIDF value and distance as current phrase. int startInd = i; while (startInd < vals.length) { Instance inst = (Instance) vector.elementAt(startInd); if ((inst.value(tfidfAttIndex) != currentInstance.value(tfidfAttIndex)) || (inst.value(probsAttIndex) != currentInstance.value(probsAttIndex)) || (inst.value(distAttIndex) != currentInstance.value(distAttIndex))) { break; } startInd++; } String val = currentInstance.stringValue(phraseAttIndex); boolean foundSuperphrase = false; for (int j = startInd - 1; j >= 0; j--) { if (j != i) { Instance candidate = (Instance) vector.elementAt(j); String potSuperphrase = candidate.stringValue(phraseAttIndex); if (val.length() <= potSuperphrase.length()) { if (KEAFilter.contains(val, potSuperphrase)) { foundSuperphrase = true; break; } } } } if (foundSuperphrase) { currentInstance.setValue(probsAttIndex + 1, Integer.MAX_VALUE); } else { currentInstance.setValue(probsAttIndex + 1, rank++); } } return vector; }
From source file:kea.KEAPhraseFilter.java
License:Open Source License
/** * Converts an instance by removing all non-alphanumeric characters * from its string attribute values./* www . j a va 2s . co m*/ */ private void convertInstance(Instance instance) throws Exception { double[] instVals = new double[instance.numAttributes()]; for (int i = 0; i < instance.numAttributes(); i++) { if (!instance.attribute(i).isString() || instance.isMissing(i)) { instVals[i] = instance.value(i); } else { if (!m_SelectCols.isInRange(i)) { int index = getOutputFormat().attribute(i).addStringValue(instance.stringValue(i)); instVals[i] = (double) index; continue; } String str = instance.stringValue(i); StringBuffer resultStr = new StringBuffer(); int j = 0; boolean phraseStart = true; boolean seenNewLine = false; boolean haveSeenHyphen = false; boolean haveSeenSlash = false; while (j < str.length()) { boolean isWord = false; boolean potNumber = false; int startj = j; while (j < str.length()) { char ch = str.charAt(j); if (Character.isLetterOrDigit(ch)) { potNumber = true; if (Character.isLetter(ch)) { isWord = true; } j++; } else if ((!m_DisallowInternalPeriods && (ch == '.')) || (ch == '@') || (ch == '_') || (ch == '&') || (ch == '/') || (ch == '-')) { if ((j > 0) && (j + 1 < str.length()) && Character.isLetterOrDigit(str.charAt(j - 1)) && Character.isLetterOrDigit(str.charAt(j + 1))) { j++; } else { break; } } else if (ch == '\'') { if ((j > 0) && Character.isLetterOrDigit(str.charAt(j - 1))) { j++; } else { break; } } else { break; } } if (isWord == true) { if (!phraseStart) { if (haveSeenHyphen) { resultStr.append('-'); } else if (haveSeenSlash) { resultStr.append('/'); } else { resultStr.append(' '); } } resultStr.append(str.substring(startj, j)); if (j == str.length()) { break; } phraseStart = false; seenNewLine = false; haveSeenHyphen = false; haveSeenSlash = false; if (Character.isWhitespace(str.charAt(j))) { if (str.charAt(j) == '\n') { seenNewLine = true; } } else if (str.charAt(j) == '-') { haveSeenHyphen = true; } else if (str.charAt(j) == '/') { haveSeenSlash = true; } else { phraseStart = true; resultStr.append('\n'); } j++; } else if (j == str.length()) { break; } else if (str.charAt(j) == '\n') { if (seenNewLine) { if (phraseStart == false) { resultStr.append('\n'); phraseStart = true; } } else if (potNumber) { if (phraseStart == false) { phraseStart = true; resultStr.append('\n'); } } seenNewLine = true; j++; } else if (Character.isWhitespace(str.charAt(j))) { if (potNumber) { if (phraseStart == false) { phraseStart = true; resultStr.append('\n'); } } j++; } else { if (phraseStart == false) { resultStr.append('\n'); phraseStart = true; } j++; } } int index = getOutputFormat().attribute(i).addStringValue(resultStr.toString()); instVals[i] = (double) index; } } Instance inst = new Instance(instance.weight(), instVals); inst.setDataset(getOutputFormat()); push(inst); }
From source file:kea.NumbersFilter.java
License:Open Source License
/** * Converts an instance. A phrase boundary is inserted where * a number is found./*from ww w. j a va 2 s. c om*/ */ private void convertInstance(Instance instance) throws Exception { double[] instVals = new double[instance.numAttributes()]; for (int i = 0; i < instance.numAttributes(); i++) { if ((!instance.attribute(i).isString()) || instance.isMissing(i)) { instVals[i] = instance.value(i); } else { String str = instance.stringValue(i); StringBuffer resultStr = new StringBuffer(); StringTokenizer tok = new StringTokenizer(str, " \t\n", true); while (tok.hasMoreTokens()) { String token = tok.nextToken(); // Everything that doesn't contain at least // one letter is considered to be a number boolean isNumber = true; for (int j = 0; j < token.length(); j++) { if (Character.isLetter(token.charAt(j))) { isNumber = false; break; } } if (!isNumber) { resultStr.append(token); } else { if (token.equals(" ") || token.equals("\t") || token.equals("\n")) { resultStr.append(token); } else { resultStr.append(" \n "); } } } int index = getOutputFormat().attribute(i).addStringValue(resultStr.toString()); instVals[i] = (double) index; } } Instance inst = new Instance(instance.weight(), instVals); inst.setDataset(getOutputFormat()); push(inst); }