Example usage for weka.core Attribute isNumeric

List of usage examples for weka.core Attribute isNumeric

Introduction

In this page you can find the example usage for weka.core Attribute isNumeric.

Prototype


public finalboolean isNumeric() 

Source Link

Document

Tests if the attribute is numeric.

Usage

From source file:adams.data.instance.Instance.java

License:Open Source License

/**
 * Clears the container and adds the data from the weka.core.Instance
 * (internal values). Uses only the attributes specified in the range.
 *
 * @param inst   the instance to use//from www  .  j a  va2 s  .c  o  m
 * @param index   the row index in the original dataset, use -1 to ignore
 * @param additional   the indices of the additional attribute values to
 *          store in the report
 * @param range   the range of attributes to limit the instance to
 * @param attTypes   whether to restrict to attributes types, null or zero-length array means no restriction
 * @see      Attribute
 */
public void set(weka.core.Instance inst, int index, int[] additional, Range range, HashSet<Integer> attTypes) {
    ArrayList<InstancePoint> list;
    int i;
    Attribute att;
    String fieldStr;

    clear();

    // keep reference to header
    m_DatasetHeader = new Instances(inst.dataset(), 0);

    range.setMax(inst.numAttributes());
    list = new ArrayList<InstancePoint>();
    for (i = 0; i < inst.numAttributes(); i++) {
        if (i == inst.classIndex())
            continue;
        if (!range.isInRange(i))
            continue;
        if ((attTypes != null) && (!attTypes.contains(inst.attribute(i).type())))
            continue;
        list.add(new InstancePoint(i, inst.value(i)));
    }

    addAll(list);

    // create artificial report
    m_Report.addParameter(REPORT_DATASET, m_DatasetHeader.relationName());
    att = m_DatasetHeader.attribute(ArffUtils.getDBIDName());
    if (att != null) {
        m_Report.addParameter(REPORT_DB_ID, new Double(inst.value(att)));
        m_Report.setDatabaseID((int) inst.value(att));
    }
    att = m_DatasetHeader.attribute(ArffUtils.getIDName());
    if (att != null)
        m_Report.addParameter(REPORT_ID, new Double(inst.value(att)));
    // class
    if (inst.classIndex() > -1) {
        if (inst.classAttribute().isNumeric()) {
            m_Report.addField(new Field(REPORT_CLASS, DataType.NUMERIC));
            if (inst.classIsMissing()) {
                m_Report.addField(new Field(REPORT_CLASS, DataType.STRING));
                m_Report.addParameter(REPORT_CLASS, "?");
            } else {
                m_Report.addField(new Field(REPORT_CLASS, DataType.NUMERIC));
                m_Report.addParameter(REPORT_CLASS, Double.toString(inst.classValue()));
            }
        } else {
            m_Report.addField(new Field(REPORT_CLASS, DataType.STRING));
            if (inst.classIsMissing())
                m_Report.addParameter(REPORT_CLASS, "?");
            else
                m_Report.addParameter(REPORT_CLASS, inst.stringValue(inst.classIndex()));
        }
    }
    // row
    if (index != -1) {
        m_Report.addField(new Field(REPORT_ROW, DataType.NUMERIC));
        m_Report.addParameter(REPORT_ROW, new Double(index + 1));
    }
    // additional attributes
    for (i = 0; i < additional.length; i++) {
        att = inst.attribute(additional[i]);
        fieldStr = REPORT_ADDITIONAL_PREFIX + (additional[i] + 1) + "-" + att.name();
        if (att.isNumeric()) {
            m_Report.addField(new Field(fieldStr, DataType.NUMERIC));
            m_Report.addParameter(fieldStr, inst.value(additional[i]));
        } else {
            m_Report.addField(new Field(fieldStr, DataType.STRING));
            m_Report.addParameter(fieldStr, inst.stringValue(additional[i]));
        }
    }

    // display ID (hashcode of string representation of Instance)
    if (getID().length() == 0)
        setID("" + inst.toString().hashCode());
}

From source file:adams.flow.transformer.WekaInstancesInfo.java

License:Open Source License

/**
 * Generates attributes statistics.//from ww  w.  jav a2s.  com
 * 
 * @param data   the dataset to use
 * @param index   the 0-based index of the attribute
 */
protected SpreadSheet getAttributeStats(Instances data, int index) {
    SpreadSheet result;
    Attribute att;
    AttributeStats stats;
    Row row;
    int i;

    result = new DefaultSpreadSheet();
    result.setName("Attribute statistics - #" + (index + 1) + " " + data.attribute(index).name());

    // header
    row = result.getHeaderRow();
    row.addCell("S").setContent("Statistic");
    row.addCell("V").setContent("Value");

    // data
    att = data.attribute(index);
    if (att.isNominal()) {
        stats = data.attributeStats(index);
        addStatistic(result, "Total", stats.totalCount);
        addStatistic(result, "Missing", stats.missingCount);
        addStatistic(result, "Unique", stats.uniqueCount);
        addStatistic(result, "Distinct", stats.distinctCount);
        addStatistic(result, "Integer-like", stats.intCount);
        addStatistic(result, "Float-like", stats.realCount);
        for (i = 0; i < stats.nominalCounts.length; i++)
            addStatistic(result, "Label-" + (i + 1) + "-" + att.value(i), stats.nominalCounts[i]);
        for (i = 0; i < stats.nominalWeights.length; i++)
            addStatistic(result, "Weight-" + (i + 1) + "-" + att.value(i), stats.nominalWeights[i]);
    } else if (att.isDate()) {
        if (m_DateFormat == null)
            m_DateFormat = DateUtils.getTimestampFormatter();
        stats = data.attributeStats(index);
        addStatistic(result, "Count", stats.numericStats.count);
        addStatistic(result, "Min", formatDate(stats.numericStats.min));
        addStatistic(result, "Max", formatDate(stats.numericStats.max));
        addStatistic(result, "Mean", formatDate(stats.numericStats.mean));
        addStatistic(result, "StdDev (in days)", stats.numericStats.stdDev / 1000 / 60 / 60 / 24);
    } else if (att.isNumeric()) {
        stats = data.attributeStats(index);
        addStatistic(result, "Count", stats.numericStats.count);
        addStatistic(result, "Min", stats.numericStats.min);
        addStatistic(result, "Max", stats.numericStats.max);
        addStatistic(result, "Mean", stats.numericStats.mean);
        addStatistic(result, "StdDev", stats.numericStats.stdDev);
        addStatistic(result, "Sum", stats.numericStats.sum);
        addStatistic(result, "Sum^2", stats.numericStats.sumSq);
    }

    return result;
}

From source file:app.RunApp.java

License:Open Source License

/**
 * Generates TableModel for attributes/*from   w  w  w . j  a v  a2  s  . c o m*/
 * 
 * @param jtable Table
 * @param dataset Multi-label dataset
 * @return Generated TableModel
 */
private TableModel attributesTableModel(JTable jtable, MultiLabelInstances dataset) {
    DefaultTableModel tableModel = new DefaultTableModel() {
        @Override
        public boolean isCellEditable(int row, int column) {
            //This causes all cells to be not editable
            return false;
        }
    };

    tableModel.addColumn("Attribute");

    Object[] row = new Object[1];

    Instances instances = dataset.getDataSet();

    int numLabels = dataset.getNumLabels();

    int numAttributes = instances.numAttributes() - numLabels;

    Attribute att;
    for (int i = 0; i < numAttributes; i++) {
        att = instances.attribute(i);
        if (att.isNumeric()) {
            row[0] = att.name();
            tableModel.addRow(row);
        }
    }

    jtable.setModel(tableModel);

    return jtable.getModel();
}

From source file:cn.ict.zyq.bestConf.bestConf.sampler.LHSSampler.java

License:Open Source License

/**
 * Assumptions:(1)Numberic is continuous and has lower/upper bounds; (2) Nominals have domains permutable
 * /*  w  w  w  .ja v a 2 s . c o m*/
 * @param useMid true if to use the middle point of a subdomain, false if to use a random point within a subdomain
 */
private static Instances getMultiDim(ArrayList<Attribute> atts, int sampleSetSize, boolean useMid) {

    int L = Math.min(7, Math.max(sampleSetSize, atts.size()));//7 is chosen for no special reason
    double maxMinDist = 0, crntMinDist;//work as the threshold to select the sample set
    ArrayList<Integer>[] setWithMaxMinDist = null;
    //generate L sets of sampleSetSize points
    for (int i = 0; i < L; i++) {
        ArrayList<Integer>[] setPerm = generateOneSampleSet(sampleSetSize, atts.size());
        //compute the minimum distance minDist between any sample pair for each set
        crntMinDist = minDistForSet(setPerm);
        //select the set with the maximum minDist
        if (crntMinDist > maxMinDist) {
            setWithMaxMinDist = setPerm;
            maxMinDist = crntMinDist;
        }
    }

    //generate and output the set with the maximum minDist as the result

    //first, divide the domain of each attribute into sampleSetSize equal subdomain
    double[][] bounds = new double[atts.size()][sampleSetSize + 1];//sampleSetSize+1 to include the lower and upper bounds
    Iterator<Attribute> itr = atts.iterator();
    Attribute crntAttr;
    double pace;
    for (int i = 0; i < bounds.length; i++) {
        crntAttr = itr.next();

        if (crntAttr.isNumeric()) {
            bounds[i][0] = crntAttr.getLowerNumericBound();
            bounds[i][sampleSetSize] = crntAttr.getUpperNumericBound();
            pace = (crntAttr.getUpperNumericBound() - crntAttr.getLowerNumericBound()) / sampleSetSize;
            for (int j = 1; j < sampleSetSize; j++) {
                bounds[i][j] = bounds[i][j - 1] + pace;
            }
        } else {//crntAttr.isNominal()
            if (crntAttr.numValues() >= sampleSetSize) {
                //randomly select among the set
                for (int j = 0; j <= sampleSetSize; j++)
                    bounds[i][j] = uniRand.nextInt(crntAttr.numValues());//the position of one of the nominal values
            } else {
                //first round-robin
                int lastPart = sampleSetSize % crntAttr.numValues();
                for (int j = 0; j < sampleSetSize - lastPart; j++)
                    bounds[i][j] = j % crntAttr.numValues();
                //then randomly select
                for (int j = sampleSetSize - lastPart; j <= sampleSetSize; j++)
                    bounds[i][j] = uniRand.nextInt(crntAttr.numValues());
            }
        } //nominal attribute
    } //get all subdomains

    //second, generate the set according to setWithMaxMinDist
    Instances data = new Instances("InitialSetByLHS", atts, sampleSetSize);
    for (int i = 0; i < sampleSetSize; i++) {
        double[] vals = new double[atts.size()];
        for (int j = 0; j < vals.length; j++) {
            if (atts.get(j).isNumeric()) {
                vals[j] = useMid
                        ? (bounds[j][setWithMaxMinDist[j].get(i)] + bounds[j][setWithMaxMinDist[j].get(i) + 1])
                                / 2
                        : bounds[j][setWithMaxMinDist[j].get(i)] + ((bounds[j][setWithMaxMinDist[j].get(i) + 1]
                                - bounds[j][setWithMaxMinDist[j].get(i)]) * uniRand.nextDouble());
            } else {//isNominal()
                vals[j] = bounds[j][setWithMaxMinDist[j].get(i)];
            }
        }
        data.add(new DenseInstance(1.0, vals));
    }

    //third, return the generated points
    return data;
}

From source file:cn.ict.zyq.bestConf.util.LHSInitializer.java

License:Open Source License

/**
 * Assumptions:(1)Numberic is continuous and has lower/upper bounds; (2) Nominals have domains permutable
 * /*from  w  w w.  ja va2 s.c  o  m*/
 * @param useMid true if to use the middle point of a subdomain, false if to use a random point within a subdomain
 */
public static Instances getMultiDim(ArrayList<Attribute> atts, int sampleSetSize, boolean useMid) {

    int L = Math.min(7, Math.max(sampleSetSize, atts.size()));//7 is chosen for no special reason
    double maxMinDist = 0, crntMinDist;//work as the threshold to select the sample set
    ArrayList<Integer>[] setWithMaxMinDist = null;
    //generate L sets of sampleSetSize points
    for (int i = 0; i < L; i++) {
        ArrayList<Integer>[] setPerm = generateOneSampleSet(sampleSetSize, atts.size());
        //compute the minimum distance minDist between any sample pair for each set
        crntMinDist = minDistForSet(setPerm);
        //select the set with the maximum minDist
        if (crntMinDist > maxMinDist) {
            setWithMaxMinDist = setPerm;
            maxMinDist = crntMinDist;
        }
    }

    //generate and output the set with the maximum minDist as the result

    //first, divide the domain of each attribute into sampleSetSize equal subdomain
    double[][] bounds = new double[atts.size()][sampleSetSize + 1];//sampleSetSize+1 to include the lower and upper bounds
    Iterator<Attribute> itr = atts.iterator();
    Attribute crntAttr;
    double pace;
    for (int i = 0; i < bounds.length; i++) {
        crntAttr = itr.next();

        if (crntAttr.isNumeric()) {
            bounds[i][0] = crntAttr.getLowerNumericBound();
            bounds[i][sampleSetSize] = crntAttr.getUpperNumericBound();
            pace = (crntAttr.getUpperNumericBound() - crntAttr.getLowerNumericBound()) / sampleSetSize;
            for (int j = 1; j < sampleSetSize; j++) {
                bounds[i][j] = bounds[i][j - 1] + pace;
            }
        } else {//crntAttr.isNominal()
            if (crntAttr.numValues() >= sampleSetSize) {
                //randomly select among the set
                for (int j = 0; j <= sampleSetSize; j++)
                    bounds[i][j] = uniRand.nextInt(crntAttr.numValues());//the position of one of the nominal values
            } else {
                //first round-robin
                int lastPart = sampleSetSize % crntAttr.numValues();
                for (int j = 0; j < sampleSetSize - lastPart; j++)
                    bounds[i][j] = j % crntAttr.numValues();
                //then randomly select
                for (int j = sampleSetSize - lastPart; j <= sampleSetSize; j++)
                    bounds[i][j] = uniRand.nextInt(crntAttr.numValues());
            }
        } //nominal attribute
    } //get all subdomains

    //second, generate the set according to setWithMaxMinDist
    Instances data = new Instances("InitialSetByLHS", atts, sampleSetSize);
    for (int i = 0; i < sampleSetSize; i++) {
        double[] vals = new double[atts.size()];
        for (int j = 0; j < vals.length; j++) {
            if (atts.get(j).isNumeric()) {
                vals[j] = useMid
                        ? (bounds[j][setWithMaxMinDist[j].get(i)] + bounds[j][setWithMaxMinDist[j].get(i) + 1])
                                / 2
                        : bounds[j][setWithMaxMinDist[j].get(i)] + ((bounds[j][setWithMaxMinDist[j].get(i) + 1]
                                - bounds[j][setWithMaxMinDist[j].get(i)]) * uniRand.nextDouble());
            } else {//isNominal()
                vals[j] = bounds[j][setWithMaxMinDist[j].get(i)];
            }
        }
        data.add(new DenseInstance(1.0, vals));
    }

    //third, return the generated points
    return data;
}

From source file:com.deafgoat.ml.prognosticator.AppClassifier.java

License:Apache License

/**
 * Returns the Weka type of the given attribute
 *//*w w  w  .  j a v a 2s .co  m*/
public String getAttributeType(Attribute attribute) {
    if (attribute.isDate()) {
        return "date";
    } else if (attribute.isNominal()) {
        return "nominal";
    } else if (attribute.isNumeric()) {
        return "numeric";
    } else {
        return "string";
    }
}

From source file:core.DatabaseSaverEx.java

License:Open Source License

/** 
 * Writes the structure (header information) to a database by creating a new table.
 * // ww w  .  ja v a2 s.  c  om
 * @throws Exception if something goes wrong
 */
private void writeStructure() throws Exception {

    StringBuffer query = new StringBuffer();
    Instances structure = getInstances();
    query.append("CREATE TABLE ");
    if (m_tabName || m_tableName.equals(""))
        m_tableName = m_DataBaseConnection.maskKeyword(structure.relationName());
    if (m_DataBaseConnection.getUpperCase()) {
        m_tableName = m_tableName.toUpperCase();
        m_createInt = m_createInt.toUpperCase();
        m_createDouble = m_createDouble.toUpperCase();
        m_createText = m_createText.toUpperCase();
        m_createDate = m_createDate.toUpperCase();
    }
    m_tableName = m_tableName.replaceAll("[^\\w]", "_");
    m_tableName = m_DataBaseConnection.maskKeyword(m_tableName);
    query.append(m_tableName);
    if (structure.numAttributes() == 0)
        throw new Exception("Instances have no attribute.");
    query.append(" ( ");
    if (m_id) {
        if (m_DataBaseConnection.getUpperCase())
            m_idColumn = m_idColumn.toUpperCase();
        query.append(m_DataBaseConnection.maskKeyword(m_idColumn));
        query.append(" ");
        query.append(m_createInt);
        query.append(" PRIMARY KEY,");
    }
    for (int i = 0; i < structure.numAttributes(); i++) {
        Attribute att = structure.attribute(i);
        String attName = att.name();
        attName = attName.replaceAll("[^\\w]", "_");
        attName = m_DataBaseConnection.maskKeyword(attName);
        if (m_DataBaseConnection.getUpperCase())
            query.append(attName.toUpperCase());
        else
            query.append(attName);
        if (att.isDate())
            query.append(" " + m_createDate);
        else {
            if (att.isNumeric())
                query.append(" " + m_createDouble);
            else
                query.append(" " + m_createText);
        }
        if (i != structure.numAttributes() - 1)
            query.append(", ");
    }
    query.append(" )");
    //System.out.println(query.toString());
    m_DataBaseConnection.update(query.toString());
    m_DataBaseConnection.close();
    if (!m_DataBaseConnection.tableExists(m_tableName)) {
        throw new IOException("Table cannot be built.");
    }
}

From source file:decisiontree.MyC45.java

/**
* Method for building an C45 tree.//from w w  w  .j  a  v  a 2  s  .c om
*
* @param instances the training data
* @exception Exception if decision tree can't be built successfully
*/
private void makeTree(Instances instances) throws Exception {

    // Check if no instances have reached this node.
    if (instances.numInstances() == 0) {
        m_Attribute = null;
        m_ClassValue = Instance.missingValue();
        m_Distribution = new double[instances.numClasses()];
        return;
    }

    // Compute attribute with maximum gain ratio.
    double[] gainRatios = new double[instances.numAttributes()];
    Enumeration attrEnum = instances.enumerateAttributes();
    while (attrEnum.hasMoreElements()) {
        Attribute attr = (Attribute) attrEnum.nextElement();
        if (attr.isNominal()) {
            gainRatios[attr.index()] = computeGainRatio(instances, attr);
        } else if (attr.isNumeric()) {
            gainRatios[attr.index()] = computeGainRatio(instances, attr, computeThreshold(instances, attr));
        }
    }
    m_Attribute = instances.attribute(Utils.maxIndex(gainRatios));

    // Make leaf if gain ratio is zero. 
    // Otherwise create successors.
    if (Utils.eq(gainRatios[m_Attribute.index()], 0)) {
        m_Attribute = null;
        m_Distribution = new double[instances.numClasses()];
        Enumeration instEnum = instances.enumerateInstances();
        while (instEnum.hasMoreElements()) {
            Instance inst = (Instance) instEnum.nextElement();
            m_Distribution[(int) inst.classValue()]++;
        }
        Utils.normalize(m_Distribution);
        m_ClassValue = Utils.maxIndex(m_Distribution);
        m_ClassAttribute = instances.classAttribute();
    } else {
        Instances[] splitData = null;
        int child = 0;
        if (m_Attribute.isNominal()) {
            child = m_Attribute.numValues();
            splitData = splitData(instances, m_Attribute);
        } else if (m_Attribute.isNumeric()) {
            child = 2;
            splitData = splitData(instances, m_Attribute, computeThreshold(instances, m_Attribute));
        }
        m_Successors = new MyC45[child];
        for (int j = 0; j < child; j++) {
            m_Successors[j] = new MyC45();
            m_Successors[j].makeTree(splitData[j]);
        }
    }
}

From source file:decisiontree.MyC45.java

private Instances handleMissingValues(Instances data) throws Exception {
    Instances newData = data;/*from w w w  .j  av  a  2  s  . c  om*/
    Enumeration attrEnum = newData.enumerateAttributes();
    while (attrEnum.hasMoreElements()) {
        Attribute attr = (Attribute) attrEnum.nextElement();
        AttributeStats attrStats = newData.attributeStats(attr.index());
        if (attr.isNominal()) {
            int maxIdx = 0;
            for (int i = 0; i < attr.numValues(); i++) {
                if (attrStats.nominalCounts[i] > attrStats.nominalCounts[maxIdx]) {
                    maxIdx = i;
                }
            }

            for (int i = 0; i < newData.numInstances(); i++) {
                if (newData.instance(i).isMissing(attr.index())) {
                    newData.instance(i).setValue(attr.index(), maxIdx);
                }
            }
        } else if (attr.isNumeric()) {
            double mean = attrStats.numericStats.mean;
            for (int i = 0; i < newData.numInstances(); i++) {
                if (newData.instance(i).isMissing(attr.index())) {
                    newData.instance(i).setValue(attr.index(), mean);
                }
            }
        }
    }

    return newData;
}

From source file:edu.illinois.cs.cogcomp.lbjava.learn.WekaWrapper.java

License:Open Source License

/**
 * This method makes one or more decisions about a single object, returning those decisions as
 * Features in a vector.//  w w w .j av a 2s  . c  o  m
 *
 * @param exampleFeatures The example's array of feature indices.
 * @param exampleValues The example's array of feature values.
 * @return A feature vector with a single feature containing the prediction for this example.
 **/
public FeatureVector classify(int[] exampleFeatures, double[] exampleValues) {
    if (!trained) {
        System.err.println(
                "WekaWrapper: Error - Cannot make a classification with an " + "untrained classifier.");
        new Exception().printStackTrace();
        System.exit(1);
    }

    /*
     * Assuming that the first Attribute in our attributeInfo vector is the class attribute,
     * decide which case we are in
     */
    Attribute classAtt = (Attribute) attributeInfo.elementAt(0);

    if (classAtt.isNominal() || classAtt.isString()) {
        double[] dist = getDistribution(exampleFeatures, exampleValues);
        int best = 0;
        for (int i = 1; i < dist.length; ++i)
            if (dist[i] > dist[best])
                best = i;

        Feature label = labelLexicon.lookupKey(best);
        if (label == null)
            return new FeatureVector();
        String value = label.getStringValue();

        return new FeatureVector(new DiscretePrimitiveStringFeature(containingPackage, name, "", value,
                valueIndexOf(value), (short) allowableValues().length));
    } else if (classAtt.isNumeric()) {
        return new FeatureVector(new RealPrimitiveStringFeature(containingPackage, name, "",
                getDistribution(exampleFeatures, exampleValues)[0]));
    } else {
        System.err.println("WekaWrapper: Error - illegal class type.");
        new Exception().printStackTrace();
        System.exit(1);
    }

    return new FeatureVector();
}