List of usage examples for weka.core Instances Instances
public Instances(Instances dataset, int capacity)
From source file:activeSegmentation.learning.WekaClassifier.java
License:Open Source License
/** * Evaluates the classifier using the test dataset and stores the evaluation. * * @param instances The instances to test * @return The evaluation//from w w w .j ava2 s.c o m */ @Override public double[] testModel(IDataSet instances) { try { // test the current classifier with the test set Evaluation evaluator = new Evaluation(new Instances(instances.getDataset(), 0)); double[] predict = evaluator.evaluateModel(classifier, instances.getDataset()); System.out.println(evaluator.toSummaryString()); return predict; } catch (Exception e) { Logger.getLogger(WekaClassifier.class.getName()).log(Level.SEVERE, null, e); } return null; }
From source file:adams.data.conversion.AbstractMatchWekaInstanceAgainstHeader.java
License:Open Source License
/** * Performs the actual conversion.//from w w w .j a v a 2 s . co m * * @return the converted data * @throws Exception if something goes wrong with the conversion */ protected Object doConvert() throws Exception { Instance input; Instance result; String error; result = null; // get header if (m_Dataset == null) { m_Dataset = new Instances(getDatasetHeader(), 0); if (m_Dataset == null) throw new IllegalStateException("Failed to obtain header!"); } input = (Instance) m_Input; // check compatibility error = isCompatible(input); if (error != null) throw new IllegalArgumentException("Input is not compatible: " + error); // convert (if necessary) result = match(input); return result; }
From source file:adams.data.instance.Instance.java
License:Open Source License
/** * Clears the container and adds the data from the weka.core.Instance * (internal values). Uses only the attributes specified in the range. * * @param inst the instance to use// ww w . j ava 2 s . c o m * @param index the row index in the original dataset, use -1 to ignore * @param additional the indices of the additional attribute values to * store in the report * @param range the range of attributes to limit the instance to * @param attTypes whether to restrict to attributes types, null or zero-length array means no restriction * @see Attribute */ public void set(weka.core.Instance inst, int index, int[] additional, Range range, HashSet<Integer> attTypes) { ArrayList<InstancePoint> list; int i; Attribute att; String fieldStr; clear(); // keep reference to header m_DatasetHeader = new Instances(inst.dataset(), 0); range.setMax(inst.numAttributes()); list = new ArrayList<InstancePoint>(); for (i = 0; i < inst.numAttributes(); i++) { if (i == inst.classIndex()) continue; if (!range.isInRange(i)) continue; if ((attTypes != null) && (!attTypes.contains(inst.attribute(i).type()))) continue; list.add(new InstancePoint(i, inst.value(i))); } addAll(list); // create artificial report m_Report.addParameter(REPORT_DATASET, m_DatasetHeader.relationName()); att = m_DatasetHeader.attribute(ArffUtils.getDBIDName()); if (att != null) { m_Report.addParameter(REPORT_DB_ID, new Double(inst.value(att))); m_Report.setDatabaseID((int) inst.value(att)); } att = m_DatasetHeader.attribute(ArffUtils.getIDName()); if (att != null) m_Report.addParameter(REPORT_ID, new Double(inst.value(att))); // class if (inst.classIndex() > -1) { if (inst.classAttribute().isNumeric()) { m_Report.addField(new Field(REPORT_CLASS, DataType.NUMERIC)); if (inst.classIsMissing()) { m_Report.addField(new Field(REPORT_CLASS, DataType.STRING)); m_Report.addParameter(REPORT_CLASS, "?"); } else { m_Report.addField(new Field(REPORT_CLASS, DataType.NUMERIC)); m_Report.addParameter(REPORT_CLASS, Double.toString(inst.classValue())); } } else { m_Report.addField(new Field(REPORT_CLASS, DataType.STRING)); if (inst.classIsMissing()) m_Report.addParameter(REPORT_CLASS, "?"); else m_Report.addParameter(REPORT_CLASS, inst.stringValue(inst.classIndex())); } } // row if (index != -1) { m_Report.addField(new Field(REPORT_ROW, DataType.NUMERIC)); m_Report.addParameter(REPORT_ROW, new Double(index + 1)); } // additional attributes for (i = 0; i < additional.length; i++) { att = inst.attribute(additional[i]); fieldStr = REPORT_ADDITIONAL_PREFIX + (additional[i] + 1) + "-" + att.name(); if (att.isNumeric()) { m_Report.addField(new Field(fieldStr, DataType.NUMERIC)); m_Report.addParameter(fieldStr, inst.value(additional[i])); } else { m_Report.addField(new Field(fieldStr, DataType.STRING)); m_Report.addParameter(fieldStr, inst.stringValue(additional[i])); } } // display ID (hashcode of string representation of Instance) if (getID().length() == 0) setID("" + inst.toString().hashCode()); }
From source file:adams.data.instancesanalysis.pls.OPLS.java
License:Open Source License
/** * Determines the output format based on the input format and returns this. * * @param input the input format to base the output format on * @return the output format//from www. ja v a2 s .c o m * @throws Exception in case the determination goes wrong */ @Override public Instances determineOutputFormat(Instances input) throws Exception { m_OutputFormat = new Instances(input, 0); return m_OutputFormat; }
From source file:adams.flow.transformer.WekaDatasetsMerge.java
License:Open Source License
/** * Creates an Instances dataset, containing a copy of the single instance * provided.//from www .j av a2 s. c o m * * @param instance The instance to create a dataset for. * @return The created dataset. */ protected Instances datasetForSingleInstance(Instance instance) { // Create a copy of the instance's original dataset Instances dataset = new Instances(instance.dataset(), 1); // Add a copy of the provided instance dataset.add((Instance) instance.copy()); // Return the dataset return dataset; }
From source file:adams.flow.transformer.WekaInstanceBuffer.java
License:Open Source License
/** * Executes the flow item.// www. j ava2 s . c o m * * @return null if everything is fine, otherwise error message */ @Override protected String doExecute() { String result; Instance[] insts; Instance inst; double[] values; int i; int n; boolean updated; result = null; if (m_Operation == Operation.INSTANCE_TO_INSTANCES) { if (m_InputToken.getPayload() instanceof Instance) { insts = new Instance[] { (Instance) m_InputToken.getPayload() }; } else { insts = (Instance[]) m_InputToken.getPayload(); } for (n = 0; n < insts.length; n++) { inst = insts[n]; if ((m_Buffer != null) && m_CheckHeader) { if (!m_Buffer.equalHeaders(inst.dataset())) { getLogger().info("Header changed, resetting buffer"); m_Buffer = null; } } // buffer instance if (m_Buffer == null) m_Buffer = new Instances(inst.dataset(), 0); // we need to make sure that string and relational values are in our // buffer header and update the current Instance accordingly before // buffering it values = inst.toDoubleArray(); updated = false; for (i = 0; i < values.length; i++) { if (inst.isMissing(i)) continue; if (inst.attribute(i).isString()) { values[i] = m_Buffer.attribute(i).addStringValue(inst.stringValue(i)); updated = true; } else if (inst.attribute(i).isRelationValued()) { values[i] = m_Buffer.attribute(i).addRelation(inst.relationalValue(i)); updated = true; } } if (updated) { if (inst instanceof SparseInstance) { inst = new SparseInstance(inst.weight(), values); } else if (inst instanceof BinarySparseInstance) { inst = new BinarySparseInstance(inst.weight(), values); } else { if (!(inst instanceof DenseInstance)) { getLogger().severe("Unhandled instance class (" + inst.getClass().getName() + "), " + "defaulting to " + DenseInstance.class.getName()); } inst = new DenseInstance(inst.weight(), values); } } else { inst = (Instance) inst.copy(); } m_Buffer.add(inst); } if (m_Buffer.numInstances() % m_Interval == 0) { m_OutputToken = new Token(m_Buffer); if (m_ClearBuffer) m_Buffer = null; } } else if (m_Operation == Operation.INSTANCES_TO_INSTANCE) { m_Buffer = (Instances) m_InputToken.getPayload(); m_Iterator = m_Buffer.iterator(); } else { throw new IllegalStateException("Unhandled operation: " + m_Operation); } return result; }
From source file:adams.flow.transformer.WekaInstanceDumper.java
License:Open Source License
/** * Turns the dataset header into the appropriate format. * * @param header the header to convert * @return the generated output/*from w ww . j a va 2 s .c om*/ */ protected String createHeader(Instances header) { StringBuilder result; int i; result = new StringBuilder(); switch (m_OutputFormat) { case ARFF: result.append(new Instances(header, 0).toString()); break; case CSV: for (i = 0; i < header.numAttributes(); i++) { if (i > 0) result.append(","); result.append(Utils.quote(header.attribute(i).name())); } break; case TAB: for (i = 0; i < header.numAttributes(); i++) { if (i > 0) result.append("\t"); result.append(Utils.quote(header.attribute(i).name())); } break; default: throw new IllegalStateException("Unhandled output format: " + m_OutputFormat); } return result.toString(); }
From source file:adams.flow.transformer.WekaInstancesInfo.java
License:Open Source License
/** * Executes the flow item.//from w w w. ja v a2s. c om * * @return null if everything is fine, otherwise error message */ @Override protected String doExecute() { String result; Instances inst; int index; int labelIndex; double[] dist; Enumeration enm; int i; result = null; if (m_InputToken.getPayload() instanceof Instance) inst = ((Instance) m_InputToken.getPayload()).dataset(); else inst = (Instances) m_InputToken.getPayload(); m_AttributeIndex.setData(inst); index = m_AttributeIndex.getIntIndex(); m_Queue.clear(); switch (m_Type) { case FULL: m_Queue.add(inst.toSummaryString()); break; case FULL_ATTRIBUTE: m_Queue.add(getAttributeStats(inst, index)); break; case FULL_CLASS: if (inst.classIndex() > -1) m_Queue.add(getAttributeStats(inst, inst.classIndex())); break; case HEADER: m_Queue.add(new Instances(inst, 0).toString()); break; case RELATION_NAME: m_Queue.add(inst.relationName()); break; case ATTRIBUTE_NAME: if (index != -1) m_Queue.add(inst.attribute(index).name()); break; case ATTRIBUTE_NAMES: for (i = 0; i < inst.numAttributes(); i++) m_Queue.add(inst.attribute(i).name()); break; case LABELS: if (index != -1) { enm = inst.attribute(index).enumerateValues(); while (enm.hasMoreElements()) m_Queue.add(enm.nextElement()); } break; case CLASS_LABELS: if (inst.classIndex() > -1) { enm = inst.classAttribute().enumerateValues(); while (enm.hasMoreElements()) m_Queue.add(enm.nextElement()); } break; case LABEL_COUNT: if (index > -1) { m_LabelIndex.setData(inst.attribute(index)); labelIndex = m_LabelIndex.getIntIndex(); m_Queue.add(inst.attributeStats(index).nominalCounts[labelIndex]); } break; case LABEL_COUNTS: if (index > -1) m_Queue.add(StatUtils.toNumberArray(inst.attributeStats(index).nominalCounts)); break; case LABEL_DISTRIBUTION: if (index > -1) { dist = new double[inst.attributeStats(index).nominalCounts.length]; for (i = 0; i < dist.length; i++) dist[i] = inst.attributeStats(index).nominalCounts[i]; Utils.normalize(dist); m_Queue.add(StatUtils.toNumberArray(dist)); } break; case CLASS_LABEL_COUNT: if (inst.classIndex() > -1) { m_LabelIndex.setData(inst.classAttribute()); labelIndex = m_LabelIndex.getIntIndex(); m_Queue.add(inst.attributeStats(inst.classIndex()).nominalCounts[labelIndex]); } break; case CLASS_LABEL_COUNTS: if (inst.classIndex() > -1) m_Queue.add(StatUtils.toNumberArray(inst.attributeStats(inst.classIndex()).nominalCounts)); break; case CLASS_LABEL_DISTRIBUTION: if (inst.classIndex() > -1) { dist = new double[inst.attributeStats(inst.classIndex()).nominalCounts.length]; for (i = 0; i < dist.length; i++) dist[i] = inst.attributeStats(inst.classIndex()).nominalCounts[i]; Utils.normalize(dist); m_Queue.add(StatUtils.toNumberArray(dist)); } break; case NUM_ATTRIBUTES: m_Queue.add(inst.numAttributes()); break; case NUM_INSTANCES: m_Queue.add(inst.numInstances()); break; case NUM_CLASS_LABELS: if ((inst.classIndex() != -1) && inst.classAttribute().isNominal()) m_Queue.add(inst.classAttribute().numValues()); break; case NUM_LABELS: if ((index != -1) && inst.attribute(index).isNominal()) m_Queue.add(inst.attribute(index).numValues()); break; case NUM_DISTINCT_VALUES: if (index != -1) m_Queue.add(inst.attributeStats(index).distinctCount); break; case NUM_UNIQUE_VALUES: if (index != -1) m_Queue.add(inst.attributeStats(index).uniqueCount); break; case NUM_MISSING_VALUES: if (index != -1) m_Queue.add(inst.attributeStats(index).missingCount); break; case MIN: if ((index != -1) && inst.attribute(index).isNumeric()) m_Queue.add(inst.attributeStats(index).numericStats.min); break; case MAX: if ((index != -1) && inst.attribute(index).isNumeric()) m_Queue.add(inst.attributeStats(index).numericStats.max); break; case MEAN: if ((index != -1) && inst.attribute(index).isNumeric()) m_Queue.add(inst.attributeStats(index).numericStats.mean); break; case STDEV: if ((index != -1) && inst.attribute(index).isNumeric()) m_Queue.add(inst.attributeStats(index).numericStats.stdDev); break; case ATTRIBUTE_TYPE: if (index != -1) m_Queue.add(Attribute.typeToString(inst.attribute(index))); break; case CLASS_TYPE: if (inst.classIndex() != -1) m_Queue.add(Attribute.typeToString(inst.classAttribute())); break; default: result = "Unhandled info type: " + m_Type; } return result; }
From source file:adams.flow.transformer.WekaInstancesMerge.java
License:Open Source License
/** * Executes the flow item.//from ww w . j a va 2 s . co m * * @return null if everything is fine, otherwise error message */ @Override protected String doExecute() { String result; String[] filesStr; File[] files; int i; Instances output; Instances[] orig; Instances[] inst; Instance[] rows; HashSet ids; int max; TIntList uniqueList; Remove remove; result = null; // get filenames files = null; orig = null; if (m_InputToken.getPayload() instanceof String[]) { filesStr = (String[]) m_InputToken.getPayload(); files = new File[filesStr.length]; for (i = 0; i < filesStr.length; i++) files[i] = new PlaceholderFile(filesStr[i]); } else if (m_InputToken.getPayload() instanceof File[]) { files = (File[]) m_InputToken.getPayload(); } else if (m_InputToken.getPayload() instanceof Instance[]) { rows = (Instance[]) m_InputToken.getPayload(); orig = new Instances[rows.length]; for (i = 0; i < rows.length; i++) { orig[i] = new Instances(rows[i].dataset(), 1); orig[i].add((Instance) rows[i].copy()); } } else if (m_InputToken.getPayload() instanceof Instances[]) { orig = (Instances[]) m_InputToken.getPayload(); } else { throw new IllegalStateException("Unhandled input type: " + m_InputToken.getPayload().getClass()); } try { output = null; // simple merge if (m_UniqueID.length() == 0) { if (files != null) { inst = new Instances[1]; for (i = 0; i < files.length; i++) { if (isStopped()) break; inst[0] = DataSource.read(files[i].getAbsolutePath()); inst[0] = prepareData(inst[0], i); if (i == 0) { output = inst[0]; } else { if (isLoggingEnabled()) getLogger().info("Merging with file #" + (i + 1) + ": " + files[i]); output = Instances.mergeInstances(output, inst[0]); } } } else if (orig != null) { inst = new Instances[1]; for (i = 0; i < orig.length; i++) { if (isStopped()) break; inst[0] = prepareData(orig[i], i); if (i == 0) { output = inst[0]; } else { if (isLoggingEnabled()) getLogger() .info("Merging with dataset #" + (i + 1) + ": " + orig[i].relationName()); output = Instances.mergeInstances(output, inst[0]); } } } } // merge based on row IDs else { m_AttType = -1; max = 0; m_UniqueIDAtts = new ArrayList<>(); if (files != null) { orig = new Instances[files.length]; for (i = 0; i < files.length; i++) { if (isStopped()) break; if (isLoggingEnabled()) getLogger().info("Loading file #" + (i + 1) + ": " + files[i]); orig[i] = DataSource.read(files[i].getAbsolutePath()); max = Math.max(max, orig[i].numInstances()); } } else if (orig != null) { for (i = 0; i < orig.length; i++) max = Math.max(max, orig[i].numInstances()); } inst = new Instances[orig.length]; ids = new HashSet(max); for (i = 0; i < orig.length; i++) { if (isStopped()) break; if (isLoggingEnabled()) getLogger().info("Updating IDs #" + (i + 1)); updateIDs(i, orig[i], ids); if (isLoggingEnabled()) getLogger().info("Preparing dataset #" + (i + 1)); inst[i] = prepareData(orig[i], i); } output = merge(orig, inst, ids); // remove unnecessary unique ID attributes if (m_KeepOnlySingleUniqueID) { uniqueList = new TIntArrayList(); for (String att : m_UniqueIDAtts) uniqueList.add(output.attribute(att).index()); if (uniqueList.size() > 0) { if (isLoggingEnabled()) getLogger().info("Removing duplicate unique ID attributes: " + m_UniqueIDAtts); remove = new Remove(); remove.setAttributeIndicesArray(uniqueList.toArray()); remove.setInputFormat(output); output = Filter.useFilter(output, remove); } } } if (!isStopped()) { m_OutputToken = new Token(output); updateProvenance(m_OutputToken); } } catch (Exception e) { result = handleException("Failed to merge: ", e); } return result; }
From source file:adams.flow.transformer.WekaPrimeForecaster.java
License:Open Source License
/** * Executes the flow item./* www . j a v a2 s .c o m*/ * * @return null if everything is fine, otherwise error message */ @Override protected String doExecute() { String result; Instances data; Instance inst; AbstractForecaster cls; result = null; try { cls = getForecasterInstance(); if (cls == null) result = "Failed to obtain forecaster!"; if (result == null) { if ((m_InputToken != null) && (m_InputToken.getPayload() instanceof Instances)) { data = (Instances) m_InputToken.getPayload(); cls.primeForecaster(data); m_OutputToken = new Token(new WekaModelContainer(cls, new Instances(data, 0), data)); } else if ((m_InputToken != null) && (m_InputToken.getPayload() instanceof Instance)) { inst = (Instance) m_InputToken.getPayload(); data = inst.dataset(); if (cls instanceof IncrementallyPrimeable) { ((IncrementallyPrimeable) cls).primeForecasterIncremental(inst); m_OutputToken = new Token(new WekaModelContainer(cls, new Instances(data, 0), data)); } else { result = m_Forecaster.getValue() + " (= " + cls.getClass().getName() + ") does not implement " + IncrementallyPrimeable.class.getName() + "! Cannot prime incrementally!"; } } } } catch (Exception e) { m_OutputToken = null; result = handleException("Failed to process data:", e); } if (m_OutputToken != null) updateProvenance(m_OutputToken); return result; }