List of usage examples for weka.core Instances numInstances
publicint numInstances()
From source file:adams.flow.sink.WekaThresholdCurve.java
License:Open Source License
/** * Creates a new panel for the token./*from ww w . j a v a 2s .c o m*/ * * @param token the token to display in a new panel, can be null * @return the generated panel */ public AbstractDisplayPanel createDisplayPanel(Token token) { AbstractDisplayPanel result; String name; if (token != null) name = "Threshold curve (" + getEvaluation(token).getHeader().relationName() + ")"; else name = "Threshold curve"; result = new AbstractComponentDisplayPanel(name) { private static final long serialVersionUID = -7362768698548152899L; protected ThresholdVisualizePanel m_VisualizePanel; @Override protected void initGUI() { super.initGUI(); setLayout(new BorderLayout()); m_VisualizePanel = new ThresholdVisualizePanel(); add(m_VisualizePanel, BorderLayout.CENTER); } @Override public void display(Token token) { try { Evaluation eval = getEvaluation(token); m_ClassLabelRange.setMax(eval.getHeader().classAttribute().numValues()); int[] indices = m_ClassLabelRange.getIntIndices(); for (int index : indices) { ThresholdCurve curve = new ThresholdCurve(); Instances data = curve.getCurve(eval.predictions(), index); PlotData2D plot = new PlotData2D(data); plot.setPlotName(eval.getHeader().classAttribute().value(index)); plot.m_displayAllPoints = true; boolean[] connectPoints = new boolean[data.numInstances()]; for (int cp = 1; cp < connectPoints.length; cp++) connectPoints[cp] = true; plot.setConnectPoints(connectPoints); m_VisualizePanel.addPlot(plot); if (data.attribute(m_AttributeX.toDisplay()) != null) m_VisualizePanel.setXIndex(data.attribute(m_AttributeX.toDisplay()).index()); if (data.attribute(m_AttributeY.toDisplay()) != null) m_VisualizePanel.setYIndex(data.attribute(m_AttributeY.toDisplay()).index()); } } catch (Exception e) { getLogger().log(Level.SEVERE, "Failed to display token: " + token, e); } } @Override public JComponent supplyComponent() { return m_VisualizePanel; } @Override public void clearPanel() { m_VisualizePanel.removeAllPlots(); } public void cleanUp() { m_VisualizePanel.removeAllPlots(); } }; if (token != null) result.display(token); return result; }
From source file:adams.flow.transformer.WekaCrossValidationClustererEvaluator.java
License:Open Source License
/** * Executes the flow item.//w ww . ja v a2 s .c o m * * @return null if everything is fine, otherwise error message */ @Override protected String doExecute() { String result; Instances data; weka.clusterers.Clusterer cls; int folds; MakeDensityBasedClusterer make; double log; result = null; try { // evaluate classifier cls = getClustererInstance(); if (cls == null) throw new IllegalStateException("Clusterer '" + getClusterer() + "' not found!"); data = (Instances) m_InputToken.getPayload(); folds = m_Folds; if (folds == -1) folds = data.numInstances(); if (!(cls instanceof DensityBasedClusterer)) { make = new MakeDensityBasedClusterer(); make.setClusterer(cls); cls = make; } log = ClusterEvaluation.crossValidateModel((DensityBasedClusterer) cls, data, folds, new Random(m_Seed)); m_OutputToken = new Token(new WekaClusterEvaluationContainer(log)); } catch (Exception e) { m_OutputToken = null; result = handleException("Failed to cross-validate clusterer: ", e); } if (m_OutputToken != null) updateProvenance(m_OutputToken); return result; }
From source file:adams.flow.transformer.WekaExtractArray.java
License:Open Source License
/** * Executes the flow item./*from w w w . j av a 2s. co m*/ * * @return null if everything is fine, otherwise error message */ @Override protected String doExecute() { String result; Double[] array; Instances inst; SpreadSheet sheet; int i; int index; Cell cell; result = null; array = null; if (m_InputToken.getPayload() instanceof Instances) { inst = (Instances) m_InputToken.getPayload(); if (m_Type == ExtractionType.COLUMN) m_Index.setMax(inst.numAttributes()); else m_Index.setMax(inst.numInstances()); index = m_Index.getIntIndex(); if (index == -1) result = "Invalid index: " + m_Index + " (max=" + m_Index.getMax() + ")"; else if ((m_Type == ExtractionType.COLUMN) && !inst.attribute(index).isNumeric()) result = "Column " + m_Index + " is not numeric!"; if (result == null) { if (m_Type == ExtractionType.COLUMN) { array = new Double[inst.numInstances()]; for (i = 0; i < array.length; i++) array[i] = inst.instance(i).value(index); } else { array = new Double[inst.numAttributes()]; for (i = 0; i < array.length; i++) array[i] = inst.instance(index).value(i); } } } else { sheet = (SpreadSheet) m_InputToken.getPayload(); if (m_Type == ExtractionType.COLUMN) m_Index.setMax(sheet.getColumnCount()); else m_Index.setMax(sheet.getRowCount()); index = m_Index.getIntIndex(); if (index == -1) result = "Invalid index: " + m_Index + " (max=" + m_Index.getMax() + ")"; else if ((m_Type == ExtractionType.COLUMN) && !sheet.isNumeric(index, true)) result = "Column " + m_Index + " is not numeric!"; if (result == null) { if (m_Type == ExtractionType.COLUMN) { array = new Double[sheet.getRowCount()]; for (i = 0; i < array.length; i++) { cell = sheet.getCell(i, index); if ((cell != null) && !cell.isMissing()) array[i] = cell.toDouble(); } } else { array = new Double[sheet.getColumnCount()]; for (i = 0; i < array.length; i++) { cell = sheet.getCell(index, i); if ((cell != null) && !cell.isMissing()) array[i] = cell.toDouble(); } } } } if (array != null) m_OutputToken = new Token(array); return result; }
From source file:adams.flow.transformer.WekaFilter.java
License:Open Source License
/** * Executes the flow item.//from w w w.j a v a 2 s .com * * @return null if everything is fine, otherwise error message */ @Override protected String doExecute() { String result; weka.core.Instances data; weka.core.Instances filteredData; weka.core.Instance inst; adams.data.instance.Instance instA; weka.core.Instance filteredInst; String relation; result = null; data = null; inst = null; if (m_InputToken.hasPayload(weka.core.Instance.class)) inst = m_InputToken.getPayload(weka.core.Instance.class); else if (m_InputToken.hasPayload(adams.data.instance.Instance.class)) inst = m_InputToken.getPayload(adams.data.instance.Instance.class).toInstance(); else if (m_InputToken.hasPayload(weka.core.Instances.class)) data = m_InputToken.getPayload(weka.core.Instances.class); else result = m_InputToken.unhandledData(); if (result == null) { try { // initialize filter? if (!m_Initialized || !m_InitializeOnce) { if (data == null) { data = new weka.core.Instances(inst.dataset(), 0); data.add(inst); } initActualFilter(data); } synchronized (m_ActualFilter) { if (!m_FlowContextUpdated) { m_FlowContextUpdated = true; if (m_ActualFilter instanceof FlowContextHandler) ((FlowContextHandler) m_ActualFilter).setFlowContext(this); } // filter data filteredData = null; filteredInst = null; if (data != null) { relation = data.relationName(); filteredData = weka.filters.Filter.useFilter(data, m_ActualFilter); if (m_KeepRelationName) { filteredData.setRelationName(relation); if (isLoggingEnabled()) getLogger().info("Setting relation name: " + relation); } m_Initialized = true; } else { relation = inst.dataset().relationName(); m_ActualFilter.input(inst); m_ActualFilter.batchFinished(); filteredInst = m_ActualFilter.output(); if (m_KeepRelationName) { filteredInst.dataset().setRelationName(relation); if (isLoggingEnabled()) getLogger().info("Setting relation name: " + relation); } } } // build output token if (inst != null) { if (filteredInst != null) { if (m_InputToken.getPayload() instanceof weka.core.Instance) { m_OutputToken = new Token(filteredInst); } else { instA = new adams.data.instance.Instance(); instA.set(filteredInst); m_OutputToken = createToken(m_InputToken.getPayload(), instA); } } else if ((filteredData != null) && (filteredData.numInstances() > 0)) { m_OutputToken = createToken(m_InputToken.getPayload(), filteredData.instance(0)); } } else { m_OutputToken = createToken(m_InputToken.getPayload(), filteredData); } } catch (Exception e) { result = handleException("Failed to filter data: ", e); } } if (m_OutputToken != null) updateProvenance(m_OutputToken); return result; }
From source file:adams.flow.transformer.WekaGetInstancesValue.java
License:Open Source License
/** * Executes the flow item.// ww w .j a v a 2 s .c om * * @return null if everything is fine, otherwise error message */ @Override protected String doExecute() { String result; Instances inst; int index; int row; result = null; inst = (Instances) m_InputToken.getPayload(); m_Column.setData(inst); m_Row.setMax(inst.numInstances()); index = m_Column.getIntIndex(); row = m_Row.getIntIndex(); if (row == -1) result = "Failed to retrieve row: " + m_Row.getIndex(); else if (index == -1) result = "Failed to retrieve column: " + m_Column.getIndex(); if (result == null) { try { if (inst.instance(row).isMissing(index)) { m_OutputToken = new Token("?"); } else { switch (inst.attribute(index).type()) { case Attribute.NUMERIC: m_OutputToken = new Token(inst.instance(row).value(index)); break; case Attribute.DATE: case Attribute.NOMINAL: case Attribute.STRING: case Attribute.RELATIONAL: m_OutputToken = new Token(inst.instance(row).stringValue(index)); break; default: result = "Unhandled attribute type: " + inst.attribute(index).type(); } } } catch (Exception e) { result = handleException("Failed to obtain value from dataset:", e); } } return result; }
From source file:adams.flow.transformer.WekaInstancesHistogramRanges.java
License:Open Source License
/** * Executes the flow item.//from w w w .j av a 2 s .c om * * @return null if everything is fine, otherwise error message */ @Override protected String doExecute() { String result; SpreadSheet sheet; Instances data; int i; int n; Index index; ArrayHistogram stat; result = null; m_Queue.clear(); try { sheet = null; data = (Instances) m_InputToken.getPayload(); stat = new ArrayHistogram(); stat.setBinCalculation(m_BinCalculation); stat.setNumBins(m_NumBins); stat.setBinWidth(m_BinWidth); stat.setNormalize(m_Normalize); stat.setUseFixedMinMax(m_UseFixedMinMax); stat.setManualMin(m_ManualMin); stat.setManualMax(m_ManualMax); stat.setDisplayRanges(true); stat.setNumDecimals(m_NumDecimals); for (i = 0; i < m_Locations.length; i++) { switch (m_DataType) { case ROW_BY_INDEX: index = new Index(m_Locations[i].stringValue()); index.setMax(data.numInstances()); stat.add(StatUtils.toNumberArray(data.instance(index.getIntIndex()).toDoubleArray())); break; case COLUMN_BY_INDEX: index = new WekaAttributeIndex(m_Locations[i].stringValue()); ((WekaAttributeIndex) index).setData(data); stat.add(StatUtils.toNumberArray(data.attributeToDoubleArray(index.getIntIndex()))); break; case COLUMN_BY_REGEXP: for (n = 0; n < data.numAttributes(); n++) { if (data.attribute(n).name().matches(m_Locations[i].stringValue())) { stat.add(StatUtils.toNumberArray(data.attributeToDoubleArray(n))); break; } } break; default: throw new IllegalStateException("Unhandled data type: " + m_DataType); } } sheet = stat.calculate().toSpreadSheet(); } catch (Exception e) { result = handleException("Error generating the ranges: ", e); sheet = null; } if (sheet != null) { for (i = 0; i < sheet.getColumnCount(); i++) m_Queue.add(sheet.getColumnName(i)); } return result; }
From source file:adams.flow.transformer.WekaInstancesInfo.java
License:Open Source License
/** * Executes the flow item./* w w w . ja v a 2 s. c o m*/ * * @return null if everything is fine, otherwise error message */ @Override protected String doExecute() { String result; Instances inst; int index; int labelIndex; double[] dist; Enumeration enm; int i; result = null; if (m_InputToken.getPayload() instanceof Instance) inst = ((Instance) m_InputToken.getPayload()).dataset(); else inst = (Instances) m_InputToken.getPayload(); m_AttributeIndex.setData(inst); index = m_AttributeIndex.getIntIndex(); m_Queue.clear(); switch (m_Type) { case FULL: m_Queue.add(inst.toSummaryString()); break; case FULL_ATTRIBUTE: m_Queue.add(getAttributeStats(inst, index)); break; case FULL_CLASS: if (inst.classIndex() > -1) m_Queue.add(getAttributeStats(inst, inst.classIndex())); break; case HEADER: m_Queue.add(new Instances(inst, 0).toString()); break; case RELATION_NAME: m_Queue.add(inst.relationName()); break; case ATTRIBUTE_NAME: if (index != -1) m_Queue.add(inst.attribute(index).name()); break; case ATTRIBUTE_NAMES: for (i = 0; i < inst.numAttributes(); i++) m_Queue.add(inst.attribute(i).name()); break; case LABELS: if (index != -1) { enm = inst.attribute(index).enumerateValues(); while (enm.hasMoreElements()) m_Queue.add(enm.nextElement()); } break; case CLASS_LABELS: if (inst.classIndex() > -1) { enm = inst.classAttribute().enumerateValues(); while (enm.hasMoreElements()) m_Queue.add(enm.nextElement()); } break; case LABEL_COUNT: if (index > -1) { m_LabelIndex.setData(inst.attribute(index)); labelIndex = m_LabelIndex.getIntIndex(); m_Queue.add(inst.attributeStats(index).nominalCounts[labelIndex]); } break; case LABEL_COUNTS: if (index > -1) m_Queue.add(StatUtils.toNumberArray(inst.attributeStats(index).nominalCounts)); break; case LABEL_DISTRIBUTION: if (index > -1) { dist = new double[inst.attributeStats(index).nominalCounts.length]; for (i = 0; i < dist.length; i++) dist[i] = inst.attributeStats(index).nominalCounts[i]; Utils.normalize(dist); m_Queue.add(StatUtils.toNumberArray(dist)); } break; case CLASS_LABEL_COUNT: if (inst.classIndex() > -1) { m_LabelIndex.setData(inst.classAttribute()); labelIndex = m_LabelIndex.getIntIndex(); m_Queue.add(inst.attributeStats(inst.classIndex()).nominalCounts[labelIndex]); } break; case CLASS_LABEL_COUNTS: if (inst.classIndex() > -1) m_Queue.add(StatUtils.toNumberArray(inst.attributeStats(inst.classIndex()).nominalCounts)); break; case CLASS_LABEL_DISTRIBUTION: if (inst.classIndex() > -1) { dist = new double[inst.attributeStats(inst.classIndex()).nominalCounts.length]; for (i = 0; i < dist.length; i++) dist[i] = inst.attributeStats(inst.classIndex()).nominalCounts[i]; Utils.normalize(dist); m_Queue.add(StatUtils.toNumberArray(dist)); } break; case NUM_ATTRIBUTES: m_Queue.add(inst.numAttributes()); break; case NUM_INSTANCES: m_Queue.add(inst.numInstances()); break; case NUM_CLASS_LABELS: if ((inst.classIndex() != -1) && inst.classAttribute().isNominal()) m_Queue.add(inst.classAttribute().numValues()); break; case NUM_LABELS: if ((index != -1) && inst.attribute(index).isNominal()) m_Queue.add(inst.attribute(index).numValues()); break; case NUM_DISTINCT_VALUES: if (index != -1) m_Queue.add(inst.attributeStats(index).distinctCount); break; case NUM_UNIQUE_VALUES: if (index != -1) m_Queue.add(inst.attributeStats(index).uniqueCount); break; case NUM_MISSING_VALUES: if (index != -1) m_Queue.add(inst.attributeStats(index).missingCount); break; case MIN: if ((index != -1) && inst.attribute(index).isNumeric()) m_Queue.add(inst.attributeStats(index).numericStats.min); break; case MAX: if ((index != -1) && inst.attribute(index).isNumeric()) m_Queue.add(inst.attributeStats(index).numericStats.max); break; case MEAN: if ((index != -1) && inst.attribute(index).isNumeric()) m_Queue.add(inst.attributeStats(index).numericStats.mean); break; case STDEV: if ((index != -1) && inst.attribute(index).isNumeric()) m_Queue.add(inst.attributeStats(index).numericStats.stdDev); break; case ATTRIBUTE_TYPE: if (index != -1) m_Queue.add(Attribute.typeToString(inst.attribute(index))); break; case CLASS_TYPE: if (inst.classIndex() != -1) m_Queue.add(Attribute.typeToString(inst.classAttribute())); break; default: result = "Unhandled info type: " + m_Type; } return result; }
From source file:adams.flow.transformer.WekaInstancesMerge.java
License:Open Source License
/** * Prefixes the attributes./*from ww w . j a v a 2 s . c om*/ * * @param index the index of the dataset * @param inst the data to process * @return the processed data */ protected Instances prefixAttributes(Instances inst, int index) { Instances result; String prefix; ArrayList<Attribute> atts; int i; prefix = createPrefix(inst, index); // header atts = new ArrayList<>(); for (i = 0; i < inst.numAttributes(); i++) atts.add(inst.attribute(i).copy(prefix + inst.attribute(i).name())); // data result = new Instances(inst.relationName(), atts, inst.numInstances()); result.setClassIndex(inst.classIndex()); for (i = 0; i < inst.numInstances(); i++) result.add((Instance) inst.instance(i).copy()); return result; }
From source file:adams.flow.transformer.WekaInstancesMerge.java
License:Open Source License
/** * Updates the IDs in the hashset with the ones stored in the ID attribute * of the provided dataset./*from www .j a v a2 s . c o m*/ * * @param instIndex the dataset index * @param inst the dataset to obtain the IDs from * @param ids the hashset to store the IDs in */ protected void updateIDs(int instIndex, Instances inst, HashSet ids) { Attribute att; int i; boolean numeric; HashSet current; Object id; att = inst.attribute(m_UniqueID); if (att == null) throw new IllegalStateException("Attribute '" + m_UniqueID + "' not found in relation '" + inst.relationName() + "' (#" + (instIndex + 1) + ")!"); // determine/check type if (m_AttType == -1) { if ((att.type() == Attribute.NUMERIC) || (att.type() == Attribute.STRING)) m_AttType = att.type(); else throw new IllegalStateException("Attribute '" + m_UniqueID + "' must be either NUMERIC or STRING (#" + (instIndex + 1) + ")!"); } else { if (m_AttType != att.type()) throw new IllegalStateException("Attribute '" + m_UniqueID + "' must have same attribute type in all the datasets (#" + (instIndex + 1) + ")!"); } // get IDs numeric = m_AttType == Attribute.NUMERIC; current = new HashSet(); for (i = 0; i < inst.numInstances(); i++) { if (numeric) id = inst.instance(i).value(att); else id = inst.instance(i).stringValue(att); if (m_Strict && current.contains(id)) throw new IllegalStateException( "ID '" + id + "' is not unique in dataset #" + (instIndex + 1) + "!"); current.add(id); } ids.addAll(current); }
From source file:adams.flow.transformer.WekaInstancesStatistic.java
License:Open Source License
/** * Executes the flow item./* w w w . ja v a 2s . com*/ * * @return null if everything is fine, otherwise error message */ @Override protected String doExecute() { String result; SpreadSheet sheet; Instances data; int i; int n; Index index; AbstractArrayStatistic stat; result = null; try { sheet = null; data = (Instances) m_InputToken.getPayload(); stat = m_Statistic.shallowCopy(true); for (i = 0; i < m_Locations.length; i++) { switch (m_DataType) { case ROW_BY_INDEX: index = new Index(m_Locations[i].stringValue()); index.setMax(data.numInstances()); stat.add(StatUtils.toNumberArray(data.instance(index.getIntIndex()).toDoubleArray())); break; case COLUMN_BY_INDEX: index = new WekaAttributeIndex(m_Locations[i].stringValue()); ((WekaAttributeIndex) index).setData(data); stat.add(StatUtils.toNumberArray(data.attributeToDoubleArray(index.getIntIndex()))); break; case COLUMN_BY_REGEXP: for (n = 0; n < data.numAttributes(); n++) { if (data.attribute(n).name().matches(m_Locations[i].stringValue())) { stat.add(StatUtils.toNumberArray(data.attributeToDoubleArray(n))); break; } } break; default: throw new IllegalStateException("Unhandled data type: " + m_DataType); } } sheet = stat.calculate().toSpreadSheet(); } catch (Exception e) { result = handleException("Error generating the statistic: ", e); sheet = null; } if (sheet != null) m_OutputToken = new Token(sheet); return result; }