List of usage examples for weka.core Attribute isDate
public finalboolean isDate()
From source file:adams.flow.transformer.WekaInstancesInfo.java
License:Open Source License
/** * Generates attributes statistics.// w w w . j av a2s .c o m * * @param data the dataset to use * @param index the 0-based index of the attribute */ protected SpreadSheet getAttributeStats(Instances data, int index) { SpreadSheet result; Attribute att; AttributeStats stats; Row row; int i; result = new DefaultSpreadSheet(); result.setName("Attribute statistics - #" + (index + 1) + " " + data.attribute(index).name()); // header row = result.getHeaderRow(); row.addCell("S").setContent("Statistic"); row.addCell("V").setContent("Value"); // data att = data.attribute(index); if (att.isNominal()) { stats = data.attributeStats(index); addStatistic(result, "Total", stats.totalCount); addStatistic(result, "Missing", stats.missingCount); addStatistic(result, "Unique", stats.uniqueCount); addStatistic(result, "Distinct", stats.distinctCount); addStatistic(result, "Integer-like", stats.intCount); addStatistic(result, "Float-like", stats.realCount); for (i = 0; i < stats.nominalCounts.length; i++) addStatistic(result, "Label-" + (i + 1) + "-" + att.value(i), stats.nominalCounts[i]); for (i = 0; i < stats.nominalWeights.length; i++) addStatistic(result, "Weight-" + (i + 1) + "-" + att.value(i), stats.nominalWeights[i]); } else if (att.isDate()) { if (m_DateFormat == null) m_DateFormat = DateUtils.getTimestampFormatter(); stats = data.attributeStats(index); addStatistic(result, "Count", stats.numericStats.count); addStatistic(result, "Min", formatDate(stats.numericStats.min)); addStatistic(result, "Max", formatDate(stats.numericStats.max)); addStatistic(result, "Mean", formatDate(stats.numericStats.mean)); addStatistic(result, "StdDev (in days)", stats.numericStats.stdDev / 1000 / 60 / 60 / 24); } else if (att.isNumeric()) { stats = data.attributeStats(index); addStatistic(result, "Count", stats.numericStats.count); addStatistic(result, "Min", stats.numericStats.min); addStatistic(result, "Max", stats.numericStats.max); addStatistic(result, "Mean", stats.numericStats.mean); addStatistic(result, "StdDev", stats.numericStats.stdDev); addStatistic(result, "Sum", stats.numericStats.sum); addStatistic(result, "Sum^2", stats.numericStats.sumSq); } return result; }
From source file:com.deafgoat.ml.prognosticator.AppClassifier.java
License:Apache License
/** * Returns the Weka type of the given attribute */// w ww .j a v a2 s . c om public String getAttributeType(Attribute attribute) { if (attribute.isDate()) { return "date"; } else if (attribute.isNominal()) { return "nominal"; } else if (attribute.isNumeric()) { return "numeric"; } else { return "string"; } }
From source file:core.DatabaseSaverEx.java
License:Open Source License
/** * Writes the structure (header information) to a database by creating a new table. * // w w w . j a v a 2 s . c om * @throws Exception if something goes wrong */ private void writeStructure() throws Exception { StringBuffer query = new StringBuffer(); Instances structure = getInstances(); query.append("CREATE TABLE "); if (m_tabName || m_tableName.equals("")) m_tableName = m_DataBaseConnection.maskKeyword(structure.relationName()); if (m_DataBaseConnection.getUpperCase()) { m_tableName = m_tableName.toUpperCase(); m_createInt = m_createInt.toUpperCase(); m_createDouble = m_createDouble.toUpperCase(); m_createText = m_createText.toUpperCase(); m_createDate = m_createDate.toUpperCase(); } m_tableName = m_tableName.replaceAll("[^\\w]", "_"); m_tableName = m_DataBaseConnection.maskKeyword(m_tableName); query.append(m_tableName); if (structure.numAttributes() == 0) throw new Exception("Instances have no attribute."); query.append(" ( "); if (m_id) { if (m_DataBaseConnection.getUpperCase()) m_idColumn = m_idColumn.toUpperCase(); query.append(m_DataBaseConnection.maskKeyword(m_idColumn)); query.append(" "); query.append(m_createInt); query.append(" PRIMARY KEY,"); } for (int i = 0; i < structure.numAttributes(); i++) { Attribute att = structure.attribute(i); String attName = att.name(); attName = attName.replaceAll("[^\\w]", "_"); attName = m_DataBaseConnection.maskKeyword(attName); if (m_DataBaseConnection.getUpperCase()) query.append(attName.toUpperCase()); else query.append(attName); if (att.isDate()) query.append(" " + m_createDate); else { if (att.isNumeric()) query.append(" " + m_createDouble); else query.append(" " + m_createText); } if (i != structure.numAttributes() - 1) query.append(", "); } query.append(" )"); //System.out.println(query.toString()); m_DataBaseConnection.update(query.toString()); m_DataBaseConnection.close(); if (!m_DataBaseConnection.tableExists(m_tableName)) { throw new IOException("Table cannot be built."); } }
From source file:lu.lippmann.cdb.common.gui.ts.TimeSeriesChartUtil.java
License:Open Source License
public static JScrollPane buildPanelWithChartForEachAttribute(final Instances dataSet, final int dateIdx) { final JXPanel jxpm = new JXPanel(); jxpm.setLayout(new GridBagLayout()); jxpm.setScrollableHeightHint(ScrollableSizeHint.VERTICAL_STRETCH); final GridBagConstraints gbc = new GridBagConstraints(); gbc.gridx = 0;/*ww w . j a v a 2 s .c o m*/ gbc.gridy = 0; gbc.weightx = 1; gbc.weighty = 1; gbc.fill = GridBagConstraints.BOTH; gbc.insets = new Insets(10, 10, 10, 10); final int numAttributes = dataSet.numAttributes(); for (int i = 0; i < numAttributes; i++) { final Attribute attr = dataSet.attribute(i); System.out.println("Build chart panel for '" + attr.name() + "' time serie ..."); if (attr.isNominal()) { jxpm.add(buildChartPanelForNominalAttribute(dataSet, attr, dateIdx), gbc); gbc.gridy++; } else if (attr.isNumeric() && !attr.isDate()) { try { final Instances filteredDs = WekaDataProcessingUtil.buildFilteredByAttributesDataSet(dataSet, new int[] { attr.index(), dateIdx }); jxpm.add(buildChartPanelForAllAttributes(filteredDs, false, 1, null), gbc); gbc.gridy++; } catch (Exception e) { e.printStackTrace(); } } } return new JScrollPane(jxpm, JScrollPane.VERTICAL_SCROLLBAR_ALWAYS, JScrollPane.HORIZONTAL_SCROLLBAR_NEVER); }
From source file:org.opentox.toxotis.factory.DatasetFactory.java
License:Open Source License
/** * Create a {@link DataEntry data entry} from a single instance. * @param instance/*from w w w . j a v a 2s .c o m*/ * @return * A Data Entry that corresponds to the provided instance. * @throws ToxOtisException */ public DataEntry createDataEntry(Instance instance) throws ToxOtisException { Enumeration attributes = instance.enumerateAttributes(); DataEntry de = new DataEntry(); try { while (attributes.hasMoreElements()) { Attribute attribute = (Attribute) attributes.nextElement(); if (attribute.name().equals(Dataset.COMPOUND_URI) || attribute.name().equals("URI")) { de.setConformer(new Compound(new VRI(instance.stringValue(attribute)))); } else { FeatureValue fv = new FeatureValue(); Feature feature = new Feature(new VRI(attribute.name())); LiteralValue value = null; if (attribute.isNumeric()) { value = new LiteralValue<Double>(instance.value(attribute), XSDDatatype.XSDdouble); feature.getOntologicalClasses().add(OTClasses.numericFeature()); } else if (attribute.isString() || attribute.isDate()) { value = new LiteralValue<String>(instance.stringValue(attribute), XSDDatatype.XSDstring); feature.getOntologicalClasses().add(OTClasses.stringFeature()); } else if (attribute.isNominal()) { value = new LiteralValue<String>(instance.stringValue(attribute), XSDDatatype.XSDstring); Enumeration nominalValues = attribute.enumerateValues(); feature.getOntologicalClasses().add(OTClasses.nominalFeature()); while (nominalValues.hasMoreElements()) { String nomValue = (String) nominalValues.nextElement(); feature.getAdmissibleValues() .add(new LiteralValue<String>(nomValue, XSDDatatype.XSDstring)); } } fv.setFeature(feature); fv.setValue(value); de.addFeatureValue(fv); } } } catch (URISyntaxException ex) { throw new ToxOtisException(ex); } return de; }
From source file:org.packDataMining.SMOTE.java
License:Open Source License
/** * The procedure implementing the SMOTE algorithm. The output * instances are pushed onto the output queue for collection. * // ww w . j a v a 2 s . c o m * @throws Exception if provided options cannot be executed * on input instances */ protected void doSMOTE() throws Exception { int minIndex = 0; int min = Integer.MAX_VALUE; if (m_DetectMinorityClass) { // find minority class int[] classCounts = getInputFormat().attributeStats(getInputFormat().classIndex()).nominalCounts; for (int i = 0; i < classCounts.length; i++) { if (classCounts[i] != 0 && classCounts[i] < min) { min = classCounts[i]; minIndex = i; } } } else { String classVal = getClassValue(); if (classVal.equalsIgnoreCase("first")) { minIndex = 1; } else if (classVal.equalsIgnoreCase("last")) { minIndex = getInputFormat().numClasses(); } else { minIndex = Integer.parseInt(classVal); } if (minIndex > getInputFormat().numClasses()) { throw new Exception("value index must be <= the number of classes"); } minIndex--; // make it an index } int nearestNeighbors; if (min <= getNearestNeighbors()) { nearestNeighbors = min - 1; } else { nearestNeighbors = getNearestNeighbors(); } if (nearestNeighbors < 1) throw new Exception("Cannot use 0 neighbors!"); // compose minority class dataset // also push all dataset instances Instances sample = getInputFormat().stringFreeStructure(); Enumeration instanceEnum = getInputFormat().enumerateInstances(); while (instanceEnum.hasMoreElements()) { Instance instance = (Instance) instanceEnum.nextElement(); push((Instance) instance.copy()); if ((int) instance.classValue() == minIndex) { sample.add(instance); } } // compute Value Distance Metric matrices for nominal features Map vdmMap = new HashMap(); Enumeration attrEnum = getInputFormat().enumerateAttributes(); while (attrEnum.hasMoreElements()) { Attribute attr = (Attribute) attrEnum.nextElement(); if (!attr.equals(getInputFormat().classAttribute())) { if (attr.isNominal() || attr.isString()) { double[][] vdm = new double[attr.numValues()][attr.numValues()]; vdmMap.put(attr, vdm); int[] featureValueCounts = new int[attr.numValues()]; int[][] featureValueCountsByClass = new int[getInputFormat().classAttribute().numValues()][attr .numValues()]; instanceEnum = getInputFormat().enumerateInstances(); while (instanceEnum.hasMoreElements()) { Instance instance = (Instance) instanceEnum.nextElement(); int value = (int) instance.value(attr); int classValue = (int) instance.classValue(); featureValueCounts[value]++; featureValueCountsByClass[classValue][value]++; } for (int valueIndex1 = 0; valueIndex1 < attr.numValues(); valueIndex1++) { for (int valueIndex2 = 0; valueIndex2 < attr.numValues(); valueIndex2++) { double sum = 0; for (int classValueIndex = 0; classValueIndex < getInputFormat() .numClasses(); classValueIndex++) { double c1i = (double) featureValueCountsByClass[classValueIndex][valueIndex1]; double c2i = (double) featureValueCountsByClass[classValueIndex][valueIndex2]; double c1 = (double) featureValueCounts[valueIndex1]; double c2 = (double) featureValueCounts[valueIndex2]; double term1 = c1i / c1; double term2 = c2i / c2; sum += Math.abs(term1 - term2); } vdm[valueIndex1][valueIndex2] = sum; } } } } } // use this random source for all required randomness Random rand = new Random(getRandomSeed()); // find the set of extra indices to use if the percentage is not evenly divisible by 100 List extraIndices = new LinkedList(); double percentageRemainder = (getPercentage() / 100) - Math.floor(getPercentage() / 100.0); int extraIndicesCount = (int) (percentageRemainder * sample.numInstances()); if (extraIndicesCount >= 1) { for (int i = 0; i < sample.numInstances(); i++) { extraIndices.add(i); } } Collections.shuffle(extraIndices, rand); extraIndices = extraIndices.subList(0, extraIndicesCount); Set extraIndexSet = new HashSet(extraIndices); // the main loop to handle computing nearest neighbors and generating SMOTE // examples from each instance in the original minority class data Instance[] nnArray = new Instance[nearestNeighbors]; for (int i = 0; i < sample.numInstances(); i++) { Instance instanceI = sample.instance(i); // find k nearest neighbors for each instance List distanceToInstance = new LinkedList(); for (int j = 0; j < sample.numInstances(); j++) { Instance instanceJ = sample.instance(j); if (i != j) { double distance = 0; attrEnum = getInputFormat().enumerateAttributes(); while (attrEnum.hasMoreElements()) { Attribute attr = (Attribute) attrEnum.nextElement(); if (!attr.equals(getInputFormat().classAttribute())) { double iVal = instanceI.value(attr); double jVal = instanceJ.value(attr); if (attr.isNumeric()) { distance += Math.pow(iVal - jVal, 2); } else { distance += ((double[][]) vdmMap.get(attr))[(int) iVal][(int) jVal]; } } } distance = Math.pow(distance, .5); distanceToInstance.add(new Object[] { distance, instanceJ }); } } // sort the neighbors according to distance Collections.sort(distanceToInstance, new Comparator() { public int compare(Object o1, Object o2) { double distance1 = (Double) ((Object[]) o1)[0]; double distance2 = (Double) ((Object[]) o2)[0]; return (int) Math.ceil(distance1 - distance2); } }); // populate the actual nearest neighbor instance array Iterator entryIterator = distanceToInstance.iterator(); int j = 0; while (entryIterator.hasNext() && j < nearestNeighbors) { nnArray[j] = (Instance) ((Object[]) entryIterator.next())[1]; j++; } // create synthetic examples int n = (int) Math.floor(getPercentage() / 100); while (n > 0 || extraIndexSet.remove(i)) { double[] values = new double[sample.numAttributes()]; int nn = rand.nextInt(nearestNeighbors); attrEnum = getInputFormat().enumerateAttributes(); while (attrEnum.hasMoreElements()) { Attribute attr = (Attribute) attrEnum.nextElement(); if (!attr.equals(getInputFormat().classAttribute())) { if (attr.isNumeric()) { double dif = nnArray[nn].value(attr) - instanceI.value(attr); double gap = rand.nextDouble(); values[attr.index()] = (double) (instanceI.value(attr) + gap * dif); } else if (attr.isDate()) { double dif = nnArray[nn].value(attr) - instanceI.value(attr); double gap = rand.nextDouble(); values[attr.index()] = (long) (instanceI.value(attr) + gap * dif); } else { int[] valueCounts = new int[attr.numValues()]; int iVal = (int) instanceI.value(attr); valueCounts[iVal]++; for (int nnEx = 0; nnEx < nearestNeighbors; nnEx++) { int val = (int) nnArray[nnEx].value(attr); valueCounts[val]++; } int maxIndex = 0; int max = Integer.MIN_VALUE; for (int index = 0; index < attr.numValues(); index++) { if (valueCounts[index] > max) { max = valueCounts[index]; maxIndex = index; } } values[attr.index()] = maxIndex; } } } values[sample.classIndex()] = minIndex; Instance synthetic = new Instance(1.0, values); push(synthetic); n--; } } }