List of usage examples for weka.core Attribute isNumeric
public finalboolean isNumeric()
From source file:adams.data.instance.Instance.java
License:Open Source License
/** * Clears the container and adds the data from the weka.core.Instance * (internal values). Uses only the attributes specified in the range. * * @param inst the instance to use//from www . j a va2 s .c o m * @param index the row index in the original dataset, use -1 to ignore * @param additional the indices of the additional attribute values to * store in the report * @param range the range of attributes to limit the instance to * @param attTypes whether to restrict to attributes types, null or zero-length array means no restriction * @see Attribute */ public void set(weka.core.Instance inst, int index, int[] additional, Range range, HashSet<Integer> attTypes) { ArrayList<InstancePoint> list; int i; Attribute att; String fieldStr; clear(); // keep reference to header m_DatasetHeader = new Instances(inst.dataset(), 0); range.setMax(inst.numAttributes()); list = new ArrayList<InstancePoint>(); for (i = 0; i < inst.numAttributes(); i++) { if (i == inst.classIndex()) continue; if (!range.isInRange(i)) continue; if ((attTypes != null) && (!attTypes.contains(inst.attribute(i).type()))) continue; list.add(new InstancePoint(i, inst.value(i))); } addAll(list); // create artificial report m_Report.addParameter(REPORT_DATASET, m_DatasetHeader.relationName()); att = m_DatasetHeader.attribute(ArffUtils.getDBIDName()); if (att != null) { m_Report.addParameter(REPORT_DB_ID, new Double(inst.value(att))); m_Report.setDatabaseID((int) inst.value(att)); } att = m_DatasetHeader.attribute(ArffUtils.getIDName()); if (att != null) m_Report.addParameter(REPORT_ID, new Double(inst.value(att))); // class if (inst.classIndex() > -1) { if (inst.classAttribute().isNumeric()) { m_Report.addField(new Field(REPORT_CLASS, DataType.NUMERIC)); if (inst.classIsMissing()) { m_Report.addField(new Field(REPORT_CLASS, DataType.STRING)); m_Report.addParameter(REPORT_CLASS, "?"); } else { m_Report.addField(new Field(REPORT_CLASS, DataType.NUMERIC)); m_Report.addParameter(REPORT_CLASS, Double.toString(inst.classValue())); } } else { m_Report.addField(new Field(REPORT_CLASS, DataType.STRING)); if (inst.classIsMissing()) m_Report.addParameter(REPORT_CLASS, "?"); else m_Report.addParameter(REPORT_CLASS, inst.stringValue(inst.classIndex())); } } // row if (index != -1) { m_Report.addField(new Field(REPORT_ROW, DataType.NUMERIC)); m_Report.addParameter(REPORT_ROW, new Double(index + 1)); } // additional attributes for (i = 0; i < additional.length; i++) { att = inst.attribute(additional[i]); fieldStr = REPORT_ADDITIONAL_PREFIX + (additional[i] + 1) + "-" + att.name(); if (att.isNumeric()) { m_Report.addField(new Field(fieldStr, DataType.NUMERIC)); m_Report.addParameter(fieldStr, inst.value(additional[i])); } else { m_Report.addField(new Field(fieldStr, DataType.STRING)); m_Report.addParameter(fieldStr, inst.stringValue(additional[i])); } } // display ID (hashcode of string representation of Instance) if (getID().length() == 0) setID("" + inst.toString().hashCode()); }
From source file:adams.flow.transformer.WekaInstancesInfo.java
License:Open Source License
/** * Generates attributes statistics.//from ww w. jav a2s. com * * @param data the dataset to use * @param index the 0-based index of the attribute */ protected SpreadSheet getAttributeStats(Instances data, int index) { SpreadSheet result; Attribute att; AttributeStats stats; Row row; int i; result = new DefaultSpreadSheet(); result.setName("Attribute statistics - #" + (index + 1) + " " + data.attribute(index).name()); // header row = result.getHeaderRow(); row.addCell("S").setContent("Statistic"); row.addCell("V").setContent("Value"); // data att = data.attribute(index); if (att.isNominal()) { stats = data.attributeStats(index); addStatistic(result, "Total", stats.totalCount); addStatistic(result, "Missing", stats.missingCount); addStatistic(result, "Unique", stats.uniqueCount); addStatistic(result, "Distinct", stats.distinctCount); addStatistic(result, "Integer-like", stats.intCount); addStatistic(result, "Float-like", stats.realCount); for (i = 0; i < stats.nominalCounts.length; i++) addStatistic(result, "Label-" + (i + 1) + "-" + att.value(i), stats.nominalCounts[i]); for (i = 0; i < stats.nominalWeights.length; i++) addStatistic(result, "Weight-" + (i + 1) + "-" + att.value(i), stats.nominalWeights[i]); } else if (att.isDate()) { if (m_DateFormat == null) m_DateFormat = DateUtils.getTimestampFormatter(); stats = data.attributeStats(index); addStatistic(result, "Count", stats.numericStats.count); addStatistic(result, "Min", formatDate(stats.numericStats.min)); addStatistic(result, "Max", formatDate(stats.numericStats.max)); addStatistic(result, "Mean", formatDate(stats.numericStats.mean)); addStatistic(result, "StdDev (in days)", stats.numericStats.stdDev / 1000 / 60 / 60 / 24); } else if (att.isNumeric()) { stats = data.attributeStats(index); addStatistic(result, "Count", stats.numericStats.count); addStatistic(result, "Min", stats.numericStats.min); addStatistic(result, "Max", stats.numericStats.max); addStatistic(result, "Mean", stats.numericStats.mean); addStatistic(result, "StdDev", stats.numericStats.stdDev); addStatistic(result, "Sum", stats.numericStats.sum); addStatistic(result, "Sum^2", stats.numericStats.sumSq); } return result; }
From source file:app.RunApp.java
License:Open Source License
/** * Generates TableModel for attributes/*from w w w . j a v a2 s . c o m*/ * * @param jtable Table * @param dataset Multi-label dataset * @return Generated TableModel */ private TableModel attributesTableModel(JTable jtable, MultiLabelInstances dataset) { DefaultTableModel tableModel = new DefaultTableModel() { @Override public boolean isCellEditable(int row, int column) { //This causes all cells to be not editable return false; } }; tableModel.addColumn("Attribute"); Object[] row = new Object[1]; Instances instances = dataset.getDataSet(); int numLabels = dataset.getNumLabels(); int numAttributes = instances.numAttributes() - numLabels; Attribute att; for (int i = 0; i < numAttributes; i++) { att = instances.attribute(i); if (att.isNumeric()) { row[0] = att.name(); tableModel.addRow(row); } } jtable.setModel(tableModel); return jtable.getModel(); }
From source file:cn.ict.zyq.bestConf.bestConf.sampler.LHSSampler.java
License:Open Source License
/** * Assumptions:(1)Numberic is continuous and has lower/upper bounds; (2) Nominals have domains permutable * /* w w w .ja v a 2 s . c o m*/ * @param useMid true if to use the middle point of a subdomain, false if to use a random point within a subdomain */ private static Instances getMultiDim(ArrayList<Attribute> atts, int sampleSetSize, boolean useMid) { int L = Math.min(7, Math.max(sampleSetSize, atts.size()));//7 is chosen for no special reason double maxMinDist = 0, crntMinDist;//work as the threshold to select the sample set ArrayList<Integer>[] setWithMaxMinDist = null; //generate L sets of sampleSetSize points for (int i = 0; i < L; i++) { ArrayList<Integer>[] setPerm = generateOneSampleSet(sampleSetSize, atts.size()); //compute the minimum distance minDist between any sample pair for each set crntMinDist = minDistForSet(setPerm); //select the set with the maximum minDist if (crntMinDist > maxMinDist) { setWithMaxMinDist = setPerm; maxMinDist = crntMinDist; } } //generate and output the set with the maximum minDist as the result //first, divide the domain of each attribute into sampleSetSize equal subdomain double[][] bounds = new double[atts.size()][sampleSetSize + 1];//sampleSetSize+1 to include the lower and upper bounds Iterator<Attribute> itr = atts.iterator(); Attribute crntAttr; double pace; for (int i = 0; i < bounds.length; i++) { crntAttr = itr.next(); if (crntAttr.isNumeric()) { bounds[i][0] = crntAttr.getLowerNumericBound(); bounds[i][sampleSetSize] = crntAttr.getUpperNumericBound(); pace = (crntAttr.getUpperNumericBound() - crntAttr.getLowerNumericBound()) / sampleSetSize; for (int j = 1; j < sampleSetSize; j++) { bounds[i][j] = bounds[i][j - 1] + pace; } } else {//crntAttr.isNominal() if (crntAttr.numValues() >= sampleSetSize) { //randomly select among the set for (int j = 0; j <= sampleSetSize; j++) bounds[i][j] = uniRand.nextInt(crntAttr.numValues());//the position of one of the nominal values } else { //first round-robin int lastPart = sampleSetSize % crntAttr.numValues(); for (int j = 0; j < sampleSetSize - lastPart; j++) bounds[i][j] = j % crntAttr.numValues(); //then randomly select for (int j = sampleSetSize - lastPart; j <= sampleSetSize; j++) bounds[i][j] = uniRand.nextInt(crntAttr.numValues()); } } //nominal attribute } //get all subdomains //second, generate the set according to setWithMaxMinDist Instances data = new Instances("InitialSetByLHS", atts, sampleSetSize); for (int i = 0; i < sampleSetSize; i++) { double[] vals = new double[atts.size()]; for (int j = 0; j < vals.length; j++) { if (atts.get(j).isNumeric()) { vals[j] = useMid ? (bounds[j][setWithMaxMinDist[j].get(i)] + bounds[j][setWithMaxMinDist[j].get(i) + 1]) / 2 : bounds[j][setWithMaxMinDist[j].get(i)] + ((bounds[j][setWithMaxMinDist[j].get(i) + 1] - bounds[j][setWithMaxMinDist[j].get(i)]) * uniRand.nextDouble()); } else {//isNominal() vals[j] = bounds[j][setWithMaxMinDist[j].get(i)]; } } data.add(new DenseInstance(1.0, vals)); } //third, return the generated points return data; }
From source file:cn.ict.zyq.bestConf.util.LHSInitializer.java
License:Open Source License
/** * Assumptions:(1)Numberic is continuous and has lower/upper bounds; (2) Nominals have domains permutable * /*from w w w. ja va2 s.c o m*/ * @param useMid true if to use the middle point of a subdomain, false if to use a random point within a subdomain */ public static Instances getMultiDim(ArrayList<Attribute> atts, int sampleSetSize, boolean useMid) { int L = Math.min(7, Math.max(sampleSetSize, atts.size()));//7 is chosen for no special reason double maxMinDist = 0, crntMinDist;//work as the threshold to select the sample set ArrayList<Integer>[] setWithMaxMinDist = null; //generate L sets of sampleSetSize points for (int i = 0; i < L; i++) { ArrayList<Integer>[] setPerm = generateOneSampleSet(sampleSetSize, atts.size()); //compute the minimum distance minDist between any sample pair for each set crntMinDist = minDistForSet(setPerm); //select the set with the maximum minDist if (crntMinDist > maxMinDist) { setWithMaxMinDist = setPerm; maxMinDist = crntMinDist; } } //generate and output the set with the maximum minDist as the result //first, divide the domain of each attribute into sampleSetSize equal subdomain double[][] bounds = new double[atts.size()][sampleSetSize + 1];//sampleSetSize+1 to include the lower and upper bounds Iterator<Attribute> itr = atts.iterator(); Attribute crntAttr; double pace; for (int i = 0; i < bounds.length; i++) { crntAttr = itr.next(); if (crntAttr.isNumeric()) { bounds[i][0] = crntAttr.getLowerNumericBound(); bounds[i][sampleSetSize] = crntAttr.getUpperNumericBound(); pace = (crntAttr.getUpperNumericBound() - crntAttr.getLowerNumericBound()) / sampleSetSize; for (int j = 1; j < sampleSetSize; j++) { bounds[i][j] = bounds[i][j - 1] + pace; } } else {//crntAttr.isNominal() if (crntAttr.numValues() >= sampleSetSize) { //randomly select among the set for (int j = 0; j <= sampleSetSize; j++) bounds[i][j] = uniRand.nextInt(crntAttr.numValues());//the position of one of the nominal values } else { //first round-robin int lastPart = sampleSetSize % crntAttr.numValues(); for (int j = 0; j < sampleSetSize - lastPart; j++) bounds[i][j] = j % crntAttr.numValues(); //then randomly select for (int j = sampleSetSize - lastPart; j <= sampleSetSize; j++) bounds[i][j] = uniRand.nextInt(crntAttr.numValues()); } } //nominal attribute } //get all subdomains //second, generate the set according to setWithMaxMinDist Instances data = new Instances("InitialSetByLHS", atts, sampleSetSize); for (int i = 0; i < sampleSetSize; i++) { double[] vals = new double[atts.size()]; for (int j = 0; j < vals.length; j++) { if (atts.get(j).isNumeric()) { vals[j] = useMid ? (bounds[j][setWithMaxMinDist[j].get(i)] + bounds[j][setWithMaxMinDist[j].get(i) + 1]) / 2 : bounds[j][setWithMaxMinDist[j].get(i)] + ((bounds[j][setWithMaxMinDist[j].get(i) + 1] - bounds[j][setWithMaxMinDist[j].get(i)]) * uniRand.nextDouble()); } else {//isNominal() vals[j] = bounds[j][setWithMaxMinDist[j].get(i)]; } } data.add(new DenseInstance(1.0, vals)); } //third, return the generated points return data; }
From source file:com.deafgoat.ml.prognosticator.AppClassifier.java
License:Apache License
/** * Returns the Weka type of the given attribute *//*w w w . j a v a 2s .co m*/ public String getAttributeType(Attribute attribute) { if (attribute.isDate()) { return "date"; } else if (attribute.isNominal()) { return "nominal"; } else if (attribute.isNumeric()) { return "numeric"; } else { return "string"; } }
From source file:core.DatabaseSaverEx.java
License:Open Source License
/** * Writes the structure (header information) to a database by creating a new table. * // ww w . ja v a2 s. c om * @throws Exception if something goes wrong */ private void writeStructure() throws Exception { StringBuffer query = new StringBuffer(); Instances structure = getInstances(); query.append("CREATE TABLE "); if (m_tabName || m_tableName.equals("")) m_tableName = m_DataBaseConnection.maskKeyword(structure.relationName()); if (m_DataBaseConnection.getUpperCase()) { m_tableName = m_tableName.toUpperCase(); m_createInt = m_createInt.toUpperCase(); m_createDouble = m_createDouble.toUpperCase(); m_createText = m_createText.toUpperCase(); m_createDate = m_createDate.toUpperCase(); } m_tableName = m_tableName.replaceAll("[^\\w]", "_"); m_tableName = m_DataBaseConnection.maskKeyword(m_tableName); query.append(m_tableName); if (structure.numAttributes() == 0) throw new Exception("Instances have no attribute."); query.append(" ( "); if (m_id) { if (m_DataBaseConnection.getUpperCase()) m_idColumn = m_idColumn.toUpperCase(); query.append(m_DataBaseConnection.maskKeyword(m_idColumn)); query.append(" "); query.append(m_createInt); query.append(" PRIMARY KEY,"); } for (int i = 0; i < structure.numAttributes(); i++) { Attribute att = structure.attribute(i); String attName = att.name(); attName = attName.replaceAll("[^\\w]", "_"); attName = m_DataBaseConnection.maskKeyword(attName); if (m_DataBaseConnection.getUpperCase()) query.append(attName.toUpperCase()); else query.append(attName); if (att.isDate()) query.append(" " + m_createDate); else { if (att.isNumeric()) query.append(" " + m_createDouble); else query.append(" " + m_createText); } if (i != structure.numAttributes() - 1) query.append(", "); } query.append(" )"); //System.out.println(query.toString()); m_DataBaseConnection.update(query.toString()); m_DataBaseConnection.close(); if (!m_DataBaseConnection.tableExists(m_tableName)) { throw new IOException("Table cannot be built."); } }
From source file:decisiontree.MyC45.java
/** * Method for building an C45 tree.//from w w w .j a v a 2 s .c om * * @param instances the training data * @exception Exception if decision tree can't be built successfully */ private void makeTree(Instances instances) throws Exception { // Check if no instances have reached this node. if (instances.numInstances() == 0) { m_Attribute = null; m_ClassValue = Instance.missingValue(); m_Distribution = new double[instances.numClasses()]; return; } // Compute attribute with maximum gain ratio. double[] gainRatios = new double[instances.numAttributes()]; Enumeration attrEnum = instances.enumerateAttributes(); while (attrEnum.hasMoreElements()) { Attribute attr = (Attribute) attrEnum.nextElement(); if (attr.isNominal()) { gainRatios[attr.index()] = computeGainRatio(instances, attr); } else if (attr.isNumeric()) { gainRatios[attr.index()] = computeGainRatio(instances, attr, computeThreshold(instances, attr)); } } m_Attribute = instances.attribute(Utils.maxIndex(gainRatios)); // Make leaf if gain ratio is zero. // Otherwise create successors. if (Utils.eq(gainRatios[m_Attribute.index()], 0)) { m_Attribute = null; m_Distribution = new double[instances.numClasses()]; Enumeration instEnum = instances.enumerateInstances(); while (instEnum.hasMoreElements()) { Instance inst = (Instance) instEnum.nextElement(); m_Distribution[(int) inst.classValue()]++; } Utils.normalize(m_Distribution); m_ClassValue = Utils.maxIndex(m_Distribution); m_ClassAttribute = instances.classAttribute(); } else { Instances[] splitData = null; int child = 0; if (m_Attribute.isNominal()) { child = m_Attribute.numValues(); splitData = splitData(instances, m_Attribute); } else if (m_Attribute.isNumeric()) { child = 2; splitData = splitData(instances, m_Attribute, computeThreshold(instances, m_Attribute)); } m_Successors = new MyC45[child]; for (int j = 0; j < child; j++) { m_Successors[j] = new MyC45(); m_Successors[j].makeTree(splitData[j]); } } }
From source file:decisiontree.MyC45.java
private Instances handleMissingValues(Instances data) throws Exception { Instances newData = data;/*from w w w .j av a 2 s . c om*/ Enumeration attrEnum = newData.enumerateAttributes(); while (attrEnum.hasMoreElements()) { Attribute attr = (Attribute) attrEnum.nextElement(); AttributeStats attrStats = newData.attributeStats(attr.index()); if (attr.isNominal()) { int maxIdx = 0; for (int i = 0; i < attr.numValues(); i++) { if (attrStats.nominalCounts[i] > attrStats.nominalCounts[maxIdx]) { maxIdx = i; } } for (int i = 0; i < newData.numInstances(); i++) { if (newData.instance(i).isMissing(attr.index())) { newData.instance(i).setValue(attr.index(), maxIdx); } } } else if (attr.isNumeric()) { double mean = attrStats.numericStats.mean; for (int i = 0; i < newData.numInstances(); i++) { if (newData.instance(i).isMissing(attr.index())) { newData.instance(i).setValue(attr.index(), mean); } } } } return newData; }
From source file:edu.illinois.cs.cogcomp.lbjava.learn.WekaWrapper.java
License:Open Source License
/** * This method makes one or more decisions about a single object, returning those decisions as * Features in a vector.// w w w .j av a 2s . c o m * * @param exampleFeatures The example's array of feature indices. * @param exampleValues The example's array of feature values. * @return A feature vector with a single feature containing the prediction for this example. **/ public FeatureVector classify(int[] exampleFeatures, double[] exampleValues) { if (!trained) { System.err.println( "WekaWrapper: Error - Cannot make a classification with an " + "untrained classifier."); new Exception().printStackTrace(); System.exit(1); } /* * Assuming that the first Attribute in our attributeInfo vector is the class attribute, * decide which case we are in */ Attribute classAtt = (Attribute) attributeInfo.elementAt(0); if (classAtt.isNominal() || classAtt.isString()) { double[] dist = getDistribution(exampleFeatures, exampleValues); int best = 0; for (int i = 1; i < dist.length; ++i) if (dist[i] > dist[best]) best = i; Feature label = labelLexicon.lookupKey(best); if (label == null) return new FeatureVector(); String value = label.getStringValue(); return new FeatureVector(new DiscretePrimitiveStringFeature(containingPackage, name, "", value, valueIndexOf(value), (short) allowableValues().length)); } else if (classAtt.isNumeric()) { return new FeatureVector(new RealPrimitiveStringFeature(containingPackage, name, "", getDistribution(exampleFeatures, exampleValues)[0])); } else { System.err.println("WekaWrapper: Error - illegal class type."); new Exception().printStackTrace(); System.exit(1); } return new FeatureVector(); }