List of usage examples for weka.core Instances numInstances
publicint numInstances()
From source file:adams.gui.visualization.instances.instancestable.AbstractPlotRow.java
License:Open Source License
/** * Hook method for checks before attempting the plot. * * @param table the source table//from w w w.jav a 2s . com * @param data the instances to use as basis * @param actRow the actual row in the instances * @param selRow the selected row in the table * @return null if passed, otherwise error message */ protected String check(InstancesTable table, Instances data, int actRow, int selRow) { if (table == null) return "No source table available!"; if (data == null) return "No instances available!"; if (actRow < 0) return "Negative row index!"; if (actRow >= data.numInstances()) return "Row index too large: " + (actRow + 1) + " > " + data.numInstances(); return null; }
From source file:adams.gui.visualization.instances.instancestable.AbstractProcessCell.java
License:Open Source License
/** * Hook method for checks before attempting processing. * * @param table the source table/* www. j a v a 2s .co m*/ * @param data the instances to use as basis * @param actRow the row in the instances * @param selRow the selected row in the table * @param column the column in the instances * @return null if passed, otherwise error message */ protected String check(InstancesTable table, Instances data, int actRow, int selRow, int column) { if (table == null) return "No source table available!"; if (data == null) return "No instances available!"; if (actRow < 0) return "Negative row index!"; if (actRow >= data.numInstances()) return "Row index too large: " + (actRow + 1) + " > " + data.numInstances(); if (column < 0) return "Negative column index!"; if (column >= data.numAttributes()) return "Column index too large: " + (column + 1) + " > " + data.numAttributes(); return null; }
From source file:adams.gui.visualization.instances.instancestable.JFreeChart.java
License:Open Source License
/** * Allows the user to generate a plot from either a row or a column. * * @param data the instances to use/*from w w w . j a v a2 s . c o m*/ * @param isColumn whether the to use column or row * @param index the index of the row/column */ protected void plot(final InstancesTable table, final Instances data, final boolean isColumn, int index) { final List<Double> list; List<Double> tmp; GenericObjectEditorDialog setup; int i; final String title; SwingWorker worker; adams.flow.sink.JFreeChartPlot last; int numPoints; String newPoints; int col; int row; Object value; final SpreadSheet sheet; Row srow; boolean sorted; boolean asc; numPoints = isColumn ? data.numInstances() : data.numAttributes(); if (numPoints > MAX_POINTS) { newPoints = GUIHelper.showInputDialog(null, "More than " + MAX_POINTS + " data points to plot - enter sample size:", "" + numPoints); if (newPoints == null) return; if (!Utils.isInteger(newPoints)) return; if (Integer.parseInt(newPoints) != numPoints) numPoints = Integer.parseInt(newPoints); else numPoints = -1; } else { numPoints = -1; } // let user customize plot if (GUIHelper.getParentDialog(table) != null) setup = new GenericObjectEditorDialog(GUIHelper.getParentDialog(table), ModalityType.DOCUMENT_MODAL); else setup = new GenericObjectEditorDialog(GUIHelper.getParentFrame(table), true); setup.setDefaultCloseOperation(GenericObjectEditorDialog.DISPOSE_ON_CLOSE); setup.getGOEEditor().setClassType(Actor.class); setup.getGOEEditor().setCanChangeClassInDialog(false); last = (adams.flow.sink.JFreeChartPlot) table.getLastSetup(getClass(), true, !isColumn); if (last == null) last = new adams.flow.sink.JFreeChartPlot(); setup.setCurrent(last); setup.setLocationRelativeTo(GUIHelper.getParentComponent(table)); setup.setVisible(true); if (setup.getResult() != GenericObjectEditorDialog.APPROVE_OPTION) return; last = (adams.flow.sink.JFreeChartPlot) setup.getCurrent(); table.addLastSetup(getClass(), true, !isColumn, last); // get data from instances tmp = new ArrayList<>(); sorted = false; asc = table.isAscending(); if (isColumn) { col = index + 1; sorted = (table.getSortColumn() == col); for (i = 0; i < table.getRowCount(); i++) { value = table.getValueAt(i, col); if ((value != null) && (Utils.isDouble(value.toString()))) tmp.add(Utils.toDouble(value.toString())); } } else { row = index; for (i = 0; i < data.numAttributes(); i++) { if (data.attribute(i).isNumeric() && !data.instance(row).isMissing(i)) tmp.add(data.instance(row).value(i)); } } if (numPoints > -1) { numPoints = Math.min(numPoints, tmp.size()); Collections.shuffle(tmp, new Random(1)); list = tmp.subList(0, numPoints); if (sorted) { Collections.sort(list); if (!asc) Collections.reverse(list); } } else { list = tmp; } // create new spreadsheet sheet = new DefaultSpreadSheet(); sheet.getHeaderRow().addCell("x").setContentAsString(isColumn ? "Row" : "Column"); sheet.getHeaderRow().addCell("y") .setContentAsString(isColumn ? data.attribute(index).name() : ("Row " + (index + 1))); for (i = 0; i < list.size(); i++) { srow = sheet.addRow(); srow.addCell("x").setContent((double) i + 1.0); srow.addCell("y").setContent(list.get(i)); } // generate plot if (isColumn) title = "Column " + (index + 1) + "/" + data.attribute(index).name(); else title = "Row " + (index + 1); last.getChart().setTitle(sheet.getColumnName(1)); worker = new SwingWorker() { @Override protected Object doInBackground() throws Exception { Flow flow = new Flow(); flow.setDefaultCloseOperation(BaseFrame.DISPOSE_ON_CLOSE); StorageValue sv = new StorageValue(); sv.setStorageName(new StorageName("values")); flow.add(sv); Object last = table.getLastSetup(JFreeChart.this.getClass(), true, !isColumn); adams.flow.sink.JFreeChartPlot plot = (adams.flow.sink.JFreeChartPlot) ((adams.flow.sink.JFreeChartPlot) last) .shallowCopy(); plot.setShortTitle(true); plot.setName(title); plot.setX(-2); plot.setY(-2); flow.add(plot); flow.setUp(); flow.getStorage().put(new StorageName("values"), sheet); flow.execute(); flow.wrapUp(); return null; } }; worker.execute(); }
From source file:adams.gui.visualization.instances.instancestable.SimplePlot.java
License:Open Source License
/** * Allows the user to generate a plot from either a row or a column. * * @param data the instances to use/*ww w. ja v a2 s . c o m*/ * @param isColumn whether the to use column or row * @param index the index of the row/column */ protected void plot(final InstancesTable table, final Instances data, final boolean isColumn, int index) { final List<Double> list; List<Double> tmp; GenericObjectEditorDialog setup; int i; final String title; SwingWorker worker; adams.flow.sink.SimplePlot last; int numPoints; String newPoints; int col; int row; Object value; boolean sorted; boolean asc; numPoints = isColumn ? data.numInstances() : data.numAttributes(); if (numPoints > MAX_POINTS) { newPoints = GUIHelper.showInputDialog(null, "More than " + MAX_POINTS + " data points to plot - enter sample size:", "" + numPoints); if (newPoints == null) return; if (!Utils.isInteger(newPoints)) return; if (Integer.parseInt(newPoints) != numPoints) numPoints = Integer.parseInt(newPoints); else numPoints = -1; } else { numPoints = -1; } // let user customize plot if (GUIHelper.getParentDialog(table) != null) setup = new GenericObjectEditorDialog(GUIHelper.getParentDialog(table), ModalityType.DOCUMENT_MODAL); else setup = new GenericObjectEditorDialog(GUIHelper.getParentFrame(table), true); setup.setDefaultCloseOperation(GenericObjectEditorDialog.DISPOSE_ON_CLOSE); setup.getGOEEditor().setClassType(Actor.class); setup.getGOEEditor().setCanChangeClassInDialog(false); last = (adams.flow.sink.SimplePlot) table.getLastSetup(getClass(), true, !isColumn); if (last == null) { last = new adams.flow.sink.SimplePlot(); last.setNoToolTips(true); last.setMouseClickAction(new ViewDataClickAction()); } setup.setCurrent(last); setup.setLocationRelativeTo(GUIHelper.getParentComponent(table)); setup.setVisible(true); if (setup.getResult() != GenericObjectEditorDialog.APPROVE_OPTION) return; last = (adams.flow.sink.SimplePlot) setup.getCurrent(); table.addLastSetup(getClass(), true, !isColumn, last); // get data from instances tmp = new ArrayList<>(); sorted = false; asc = table.isAscending(); if (isColumn) { col = index + 1; sorted = (table.getSortColumn() == col); for (i = 0; i < table.getRowCount(); i++) { value = table.getValueAt(i, col); if ((value != null) && (Utils.isDouble(value.toString()))) tmp.add(Utils.toDouble(value.toString())); } } else { row = index; for (i = 0; i < data.numAttributes(); i++) { if (data.attribute(i).isNumeric() && !data.instance(row).isMissing(i)) tmp.add(data.instance(row).value(i)); } } if (numPoints > -1) { numPoints = Math.min(numPoints, tmp.size()); Collections.shuffle(tmp, new Random(1)); list = tmp.subList(0, numPoints); if (sorted) { Collections.sort(list); if (!asc) Collections.reverse(list); } } else { list = tmp; } // generate plot if (isColumn) title = "Column " + (index + 1) + "/" + data.attribute(index).name(); else title = "Row " + (index + 1); worker = new SwingWorker() { @Override protected Object doInBackground() throws Exception { Flow flow = new Flow(); flow.setDefaultCloseOperation(BaseFrame.DISPOSE_ON_CLOSE); StorageValue sv = new StorageValue(); sv.setStorageName(new StorageName("values")); flow.add(sv); ArrayToSequence a2s = new ArrayToSequence(); flow.add(a2s); MakePlotContainer mpc = new MakePlotContainer(); mpc.setPlotName(title); flow.add(mpc); Object last = table.getLastSetup(SimplePlot.this.getClass(), true, !isColumn); adams.flow.sink.SimplePlot plot = (adams.flow.sink.SimplePlot) ((adams.flow.sink.SimplePlot) last) .shallowCopy(); plot.setShortTitle(true); plot.setShowSidePanel(false); plot.setName(title); plot.setX(-2); plot.setY(-2); flow.add(plot); flow.setUp(); flow.getStorage().put(new StorageName("values"), list.toArray(new Double[list.size()])); flow.execute(); flow.wrapUp(); return null; } }; worker.execute(); }
From source file:adams.opt.optimise.genetic.fitnessfunctions.AttributeSelection.java
License:Open Source License
/** * Callback for best measure so far//from w w w . ja v a 2 s .c o m */ @Override public void newBest(double val, OptData opd) { int cnt = 0; int[] weights = getWeights(opd); Instances newInstances = new Instances(getInstances()); for (int i = 0; i < getInstances().numInstances(); i++) { Instance in = newInstances.instance(i); cnt = 0; for (int a = 0; a < getInstances().numAttributes(); a++) { if (a == getInstances().classIndex()) continue; if (weights[cnt++] == 0) { in.setValue(a, 0); } else { in.setValue(a, in.value(a)); } } } try { File file = new File(getOutputDirectory().getAbsolutePath() + File.separator + Double.toString(getMeasure().adjust(val)) + ".arff"); file.createNewFile(); Writer writer = new BufferedWriter(new FileWriter(file)); Instances header = new Instances(newInstances, 0); // remove filter setup Remove remove = new Remove(); remove.setAttributeIndices(getRemoveAsString(weights)); remove.setInvertSelection(true); header.setRelationName(OptionUtils.getCommandLine(remove)); writer.write(header.toString()); writer.write("\n"); for (int i = 0; i < newInstances.numInstances(); i++) { writer.write(newInstances.instance(i).toString()); writer.write("\n"); } writer.flush(); writer.close(); } catch (Exception e) { e.printStackTrace(); } }
From source file:adaptedClusteringAlgorithms.MyFarthestFirst.java
License:Open Source License
/** * Generates a clusterer. Has to initialize all fields of the clusterer * that are not being set via options.//w w w .j a v a 2s . com * * @param data set of instances serving as training data * @throws Exception if the clusterer has not been * generated successfully */ public void buildClusterer(Instances data) throws Exception { if (!SESAME.SESAME_GUI) MyFirstClusterer.weka_gui = true; // can clusterer handle the data? getCapabilities().testWithFail(data); //long start = System.currentTimeMillis(); m_ReplaceMissingFilter = new ReplaceMissingValues(); // Missing values replacement is not required so this modification is made /*m_ReplaceMissingFilter.setInputFormat(data); m_instances = Filter.useFilter(data, m_ReplaceMissingFilter);*/ Instances m_instances = new Instances(data); // To use semantic measurers through DistanceFunction interface m_DistanceFunction.setInstances(m_instances); initMinMax(m_instances); m_ClusterCentroids = new Instances(m_instances, m_NumClusters); int n = m_instances.numInstances(); Random r = new Random(getSeed()); boolean[] selected = new boolean[n]; double[] minDistance = new double[n]; for (int i = 0; i < n; i++) minDistance[i] = Double.MAX_VALUE; int firstI = r.nextInt(n); m_ClusterCentroids.add(m_instances.instance(firstI)); selected[firstI] = true; updateMinDistance(minDistance, selected, m_instances, m_instances.instance(firstI)); if (m_NumClusters > n) m_NumClusters = n; for (int i = 1; i < m_NumClusters; i++) { int nextI = farthestAway(minDistance, selected); m_ClusterCentroids.add(m_instances.instance(nextI)); selected[nextI] = true; updateMinDistance(minDistance, selected, m_instances, m_instances.instance(nextI)); } m_instances = new Instances(m_instances, 0); //long end = System.currentTimeMillis(); //System.out.println("Clustering Time = " + (end-start)); // Save memory!! m_DistanceFunction.clean(); if (!SESAME.SESAME_GUI) MyFirstClusterer.weka_gui = true; }
From source file:adaptedClusteringAlgorithms.MyFarthestFirst.java
License:Open Source License
protected void initMinMax(Instances data) { m_Min = new double[data.numAttributes()]; m_Max = new double[data.numAttributes()]; for (int i = 0; i < data.numAttributes(); i++) { m_Min[i] = m_Max[i] = Double.NaN; }//from w ww. j a va 2 s . c o m for (int i = 0; i < data.numInstances(); i++) { updateMinMax(data.instance(i)); } }
From source file:adaptedClusteringAlgorithms.MySimpleKMeans.java
License:Open Source License
/** * Generates a clusterer. Has to initialize all fields of the clusterer that * are not being set via options./*from w w w.ja v a 2 s . co m*/ * * @param data set of instances serving as training data * @throws Exception if the clusterer has not been generated successfully */ @Override public void buildClusterer(Instances data) throws Exception { if (!SESAME.SESAME_GUI) MyFirstClusterer.weka_gui = true; // can clusterer handle the data? getCapabilities().testWithFail(data); m_Iterations = 0; m_ReplaceMissingFilter = new ReplaceMissingValues(); Instances instances = new Instances(data); instances.setClassIndex(-1); if (!m_dontReplaceMissing) { m_ReplaceMissingFilter.setInputFormat(instances); instances = Filter.useFilter(instances, m_ReplaceMissingFilter); } m_FullMissingCounts = new int[instances.numAttributes()]; if (m_displayStdDevs) { m_FullStdDevs = new double[instances.numAttributes()]; } m_FullNominalCounts = new int[instances.numAttributes()][0]; m_FullMeansOrMediansOrModes = moveCentroid(0, instances, false); for (int i = 0; i < instances.numAttributes(); i++) { m_FullMissingCounts[i] = instances.attributeStats(i).missingCount; if (instances.attribute(i).isNumeric()) { if (m_displayStdDevs) { m_FullStdDevs[i] = Math.sqrt(instances.variance(i)); } if (m_FullMissingCounts[i] == instances.numInstances()) { m_FullMeansOrMediansOrModes[i] = Double.NaN; // mark missing as mean } } else { m_FullNominalCounts[i] = instances.attributeStats(i).nominalCounts; if (m_FullMissingCounts[i] > m_FullNominalCounts[i][Utils.maxIndex(m_FullNominalCounts[i])]) { m_FullMeansOrMediansOrModes[i] = -1; // mark missing as most common // value } } } m_ClusterCentroids = new Instances(instances, m_NumClusters); int[] clusterAssignments = new int[instances.numInstances()]; if (m_PreserveOrder) { m_Assignments = clusterAssignments; } m_DistanceFunction.setInstances(instances); Random RandomO = new Random(getSeed()); int instIndex; HashMap initC = new HashMap(); DecisionTableHashKey hk = null; Instances initInstances = null; if (m_PreserveOrder) { initInstances = new Instances(instances); } else { initInstances = instances; } for (int j = initInstances.numInstances() - 1; j >= 0; j--) { instIndex = RandomO.nextInt(j + 1); hk = new DecisionTableHashKey(initInstances.instance(instIndex), initInstances.numAttributes(), true); if (!initC.containsKey(hk)) { m_ClusterCentroids.add(initInstances.instance(instIndex)); initC.put(hk, null); } initInstances.swap(j, instIndex); if (m_ClusterCentroids.numInstances() == m_NumClusters) { break; } } m_NumClusters = m_ClusterCentroids.numInstances(); // removing reference initInstances = null; int i; boolean converged = false; int emptyClusterCount; Instances[] tempI = new Instances[m_NumClusters]; m_squaredErrors = new double[m_NumClusters]; m_ClusterNominalCounts = new int[m_NumClusters][instances.numAttributes()][0]; m_ClusterMissingCounts = new int[m_NumClusters][instances.numAttributes()]; while (!converged) { emptyClusterCount = 0; m_Iterations++; converged = true; for (i = 0; i < instances.numInstances(); i++) { Instance toCluster = instances.instance(i); int newC = clusterProcessedInstance(toCluster, true); if (newC != clusterAssignments[i]) { converged = false; } clusterAssignments[i] = newC; } // update centroids m_ClusterCentroids = new Instances(instances, m_NumClusters); for (i = 0; i < m_NumClusters; i++) { tempI[i] = new Instances(instances, 0); } for (i = 0; i < instances.numInstances(); i++) { tempI[clusterAssignments[i]].add(instances.instance(i)); } for (i = 0; i < m_NumClusters; i++) { if (tempI[i].numInstances() == 0) { // empty cluster emptyClusterCount++; } else { moveCentroid(i, tempI[i], true); } } if (m_Iterations == m_MaxIterations) { converged = true; } if (emptyClusterCount > 0) { m_NumClusters -= emptyClusterCount; if (converged) { Instances[] t = new Instances[m_NumClusters]; int index = 0; for (int k = 0; k < tempI.length; k++) { if (tempI[k].numInstances() > 0) { t[index] = tempI[k]; for (i = 0; i < tempI[k].numAttributes(); i++) { m_ClusterNominalCounts[index][i] = m_ClusterNominalCounts[k][i]; } index++; } } tempI = t; } else { tempI = new Instances[m_NumClusters]; } } if (!converged) { m_squaredErrors = new double[m_NumClusters]; m_ClusterNominalCounts = new int[m_NumClusters][instances.numAttributes()][0]; } } if (m_displayStdDevs) { m_ClusterStdDevs = new Instances(instances, m_NumClusters); } m_ClusterSizes = new int[m_NumClusters]; for (i = 0; i < m_NumClusters; i++) { if (m_displayStdDevs) { double[] vals2 = new double[instances.numAttributes()]; for (int j = 0; j < instances.numAttributes(); j++) { if (instances.attribute(j).isNumeric()) { vals2[j] = Math.sqrt(tempI[i].variance(j)); } else { vals2[j] = Instance.missingValue(); } } m_ClusterStdDevs.add(new Instance(1.0, vals2)); } m_ClusterSizes[i] = tempI[i].numInstances(); } // Save memory!! m_DistanceFunction.clean(); if (!SESAME.SESAME_GUI) MyFirstClusterer.weka_gui = true; }
From source file:adaptedClusteringAlgorithms.MySimpleKMeans.java
License:Open Source License
/** * Move the centroid to it's new coordinates. Generate the centroid * coordinates based on it's members (objects assigned to the cluster of the * centroid) and the distance function being used. * /*from ww w . j a v a2s. c o m*/ * @param centroidIndex index of the centroid which the coordinates will be * computed * @param members the objects that are assigned to the cluster of this * centroid * @param updateClusterInfo if the method is supposed to update the m_Cluster * arrays * @return the centroid coordinates */ protected double[] moveCentroid(int centroidIndex, Instances members, boolean updateClusterInfo) { double[] vals = new double[members.numAttributes()]; for (int j = 0; j < members.numAttributes(); j++) { // The centroid is the mean point. If the attribute is nominal, the centroid is the mode if (m_DistanceFunction instanceof ChEBIInd || m_DistanceFunction instanceof ChEBIDir || m_DistanceFunction instanceof GOInd || m_DistanceFunction instanceof GODir || m_DistanceFunction instanceof GOChEBIInd || m_DistanceFunction instanceof GOChEBIDir || m_DistanceFunction instanceof CalculusInd || m_DistanceFunction instanceof CalculusDir || members.attribute(j).isNominal()) { vals[j] = members.meanOrMode(j); } if (updateClusterInfo) { m_ClusterMissingCounts[centroidIndex][j] = members.attributeStats(j).missingCount; m_ClusterNominalCounts[centroidIndex][j] = members.attributeStats(j).nominalCounts; if (members.attribute(j).isNominal()) { if (m_ClusterMissingCounts[centroidIndex][j] > m_ClusterNominalCounts[centroidIndex][j][Utils .maxIndex(m_ClusterNominalCounts[centroidIndex][j])]) { vals[j] = Instance.missingValue(); // mark mode as missing } } else { if (m_ClusterMissingCounts[centroidIndex][j] == members.numInstances()) { vals[j] = Instance.missingValue(); // mark mean as missing } } } } if (updateClusterInfo) { m_ClusterCentroids.add(new Instance(1.0, vals)); } return vals; }
From source file:agnes.MyAgnes.java
public void buildClusterer(Instances data) { distanceCounter = new EuclideanDistance(data); ArrayList<ArrayList<Instance>> currentClusters = new ArrayList<>(); for (int i = 0; i < data.numInstances(); i++) { currentClusters.add(new ArrayList<>()); currentClusters.get(i).add(data.instance(i)); instanceID.put(data.instance(i), i); }/*from www. ja v a 2 s .c om*/ addNewClusterHierarchy(currentClusters); }