Example usage for weka.core Attribute Attribute

List of usage examples for weka.core Attribute Attribute

Introduction

In this page you can find the example usage for weka.core Attribute Attribute.

Prototype



public Attribute(String attributeName) 

Source Link

Document

Constructor for a numeric attribute.

Usage

From source file:DocClassifier.java

private FastVector createTerms(File[] files) {
    try {//from w  ww .  ja  v a2s. c o  m
        Set<String> termSet = new HashSet<String>();
        for (File file : files) {
            BufferedReader reader = new BufferedReader(new FileReader(file));
            Set<String> docTermSet = new HashSet<String>();
            while (reader.ready()) {
                String line = reader.readLine();
                String[] words = line.split(" ");
                for (String word : words) {
                    Kelime[] kelimeler = this.zemberek.kelimeCozumle(word);
                    if (kelimeler.length > 0) {
                        String kok = kelimeler[0].kok().icerik();
                        docTermSet.add(kok);
                        termSet.add(kok);
                    }
                }
            }
            // DF for a doc
            for (String t : docTermSet) {
                Double freq = this.idfMap.get(t);
                this.idfMap.put(t, ((freq != null) ? (freq + 1) : 1));
            }
            reader.close();
        }
        //Remove some words like ve,veya,de,da,in from set
        termSet = PreProcesser.filterTermSet(termSet);
        //IDF Calculation
        for (String t : termSet) {
            Double df = this.idfMap.get(t);
            if (df != null) {
                this.idfMap.put(t, Math.log(files.length / df) / Math.log(2));
            } else {
                this.idfMap.put(t, 0.0);
            }
            //System.out.println(t + ": " + df);
        }
        // Attribute creation
        //System.err.println("\nAttribute:");
        FastVector terms = new FastVector();
        for (String term : termSet) {
            terms.addElement(new Attribute(term));
            // System.err.println(term + "-");
        }
        // Class values are created
        Set<String> classSet = new HashSet<String>();
        for (File file : files) {
            classSet.add(file.getName().substring(0, 3).toLowerCase());
        }
        //System.err.println("\nClass:");
        this.classValues = new FastVector();
        for (String category : classSet) {
            this.classValues.addElement(category);
            // System.out.print(category + "-");
        }
        terms.addElement(new Attribute(CLASS_ATTR_NAME, classValues));
        return terms;
    } catch (FileNotFoundException ex) {
        Logger.getLogger(DocClassifier.class.getName()).log(Level.SEVERE, null, ex);
    } catch (IOException ex) {
        Logger.getLogger(DocClassifier.class.getName()).log(Level.SEVERE, null, ex);
    }
    return null;
}

From source file:PrincipalComponents.java

License:Open Source License

/**
 * Set up the header for the PC->original space dataset
 *
 * @return the output format/*  w w w  .  ja v  a2  s  .  com*/
 * @throws Exception if something goes wrong
 */
private Instances setOutputFormatOriginal() throws Exception {
    ArrayList<Attribute> attributes = new ArrayList<Attribute>();

    for (int i = 0; i < m_numAttribs; i++) {
        String att = m_trainInstances.attribute(i).name();
        attributes.add(new Attribute(att));
    }

    if (m_hasClass) {
        attributes.add((Attribute) m_trainHeader.classAttribute().copy());
    }

    Instances outputFormat = new Instances(m_trainHeader.relationName() + "->PC->original space", attributes,
            0);

    // set the class to be the last attribute if necessary
    if (m_hasClass) {
        outputFormat.setClassIndex(outputFormat.numAttributes() - 1);
    }

    return outputFormat;
}

From source file:PrincipalComponents.java

License:Open Source License

/**
 * Set the format for the transformed data
 *
 * @return a set of empty Instances (header only) in the new format
 * @throws Exception if the output format can't be set
 *//*from ww w.  j a v a  2 s  .  co  m*/
private Instances setOutputFormat() throws Exception {
    if (m_eigenvalues == null) {
        return null;
    }

    double cumulative = 0.0;
    ArrayList<Attribute> attributes = new ArrayList<Attribute>();
    for (int i = m_numAttribs - 1; i >= 0; i--) {
        StringBuffer attName = new StringBuffer();
        // build array of coefficients
        double[] coeff_mags = new double[m_numAttribs];
        for (int j = 0; j < m_numAttribs; j++) {
            coeff_mags[j] = -Math.abs(m_eigenvectors[j][m_sortedEigens[i]]);
        }
        int num_attrs = (m_maxAttrsInName > 0) ? Math.min(m_numAttribs, m_maxAttrsInName) : m_numAttribs;
        // this array contains the sorted indices of the coefficients
        int[] coeff_inds;
        if (m_numAttribs > 0) {
            // if m_maxAttrsInName > 0, sort coefficients by decreasing
            // magnitude
            coeff_inds = Utils.sort(coeff_mags);
        } else {
            // if m_maxAttrsInName <= 0, use all coeffs in original order
            coeff_inds = new int[m_numAttribs];
            for (int j = 0; j < m_numAttribs; j++) {
                coeff_inds[j] = j;
            }
        }
        // build final attName string
        for (int j = 0; j < num_attrs; j++) {
            double coeff_value = m_eigenvectors[coeff_inds[j]][m_sortedEigens[i]];
            if (j > 0 && coeff_value >= 0) {
                attName.append("+");
            }
            attName.append(
                    Utils.doubleToString(coeff_value, 5, 3) + m_trainInstances.attribute(coeff_inds[j]).name());
        }
        if (num_attrs < m_numAttribs) {
            attName.append("...");
        }

        attributes.add(new Attribute(attName.toString()));
        cumulative += m_eigenvalues[m_sortedEigens[i]];

        if ((cumulative / m_sumOfEigenValues) >= m_coverVariance) {
            break;
        }
    }

    if (m_hasClass) {
        attributes.add((Attribute) m_trainHeader.classAttribute().copy());
    }

    Instances outputFormat = new Instances(m_trainInstances.relationName() + "_principal components",
            attributes, 0);

    // set the class to be the last attribute if necessary
    if (m_hasClass) {
        outputFormat.setClassIndex(outputFormat.numAttributes() - 1);
    }

    m_outputNumAtts = outputFormat.numAttributes();
    return outputFormat;
}

From source file:ArrayLoader.java

License:Open Source License

/**
 * Return the full data set. If the structure hasn't yet been determined
 * by a call to getStructure then method should do so before processing
 * the rest of the data set./* www. j  av  a 2s  .c o m*/
 *
 * @return the structure of the data set as an empty set of Instances
 * @exception IOException if there is no source or parsing fails
 */
public Instances getDataSet() throws IOException {
    if (m_data == null) {
        throw new IOException("No source has been specified");
    }

    if (m_structure == null) {
        getStructure();
    }

    m_cumulativeStructure = new FastVector(m_structure.numAttributes());
    for (int i = 0; i < m_structure.numAttributes(); i++) {
        m_cumulativeStructure.addElement(new Hashtable());
    }

    m_cumulativeInstances = new FastVector();
    FastVector current;

    for (int i = 0; i < m_data.length; i++) {
        current = getInstance(m_data[i]);

        m_cumulativeInstances.addElement(current);
    }

    FastVector atts = new FastVector(m_structure.numAttributes());
    for (int i = 0; i < m_structure.numAttributes(); i++) {
        String attname = m_structure.attribute(i).name();
        Hashtable tempHash = ((Hashtable) m_cumulativeStructure.elementAt(i));
        if (tempHash.size() == 0) {
            atts.addElement(new Attribute(attname));
        } else {
            if (m_StringAttributes.isInRange(i)) {
                atts.addElement(new Attribute(attname, (FastVector) null));
            } else {
                FastVector values = new FastVector(tempHash.size());
                // add dummy objects in order to make the FastVector's size == capacity
                for (int z = 0; z < tempHash.size(); z++) {
                    values.addElement("dummy");
                }
                Enumeration e = tempHash.keys();
                while (e.hasMoreElements()) {
                    Object ob = e.nextElement();
                    //     if (ob instanceof Double) {
                    int index = ((Integer) tempHash.get(ob)).intValue();
                    String s = ob.toString();
                    if (s.startsWith("'") || s.startsWith("\""))
                        s = s.substring(1, s.length() - 1);
                    values.setElementAt(new String(s), index);
                    //     }
                }
                atts.addElement(new Attribute(attname, values));
            }
        }
    }

    // make the instances
    String relationName;
    relationName = "ArrayData";
    Instances dataSet = new Instances(relationName, atts, m_cumulativeInstances.size());

    for (int i = 0; i < m_cumulativeInstances.size(); i++) {
        current = ((FastVector) m_cumulativeInstances.elementAt(i));
        double[] vals = new double[dataSet.numAttributes()];
        for (int j = 0; j < current.size(); j++) {
            Object cval = current.elementAt(j);
            if (cval instanceof String) {
                if (((String) cval).compareTo(m_MissingValue) == 0) {
                    vals[j] = Instance.missingValue();
                } else {
                    if (dataSet.attribute(j).isString()) {
                        vals[j] = dataSet.attribute(j).addStringValue((String) cval);
                    } else if (dataSet.attribute(j).isNominal()) {
                        // find correct index
                        Hashtable lookup = (Hashtable) m_cumulativeStructure.elementAt(j);
                        int index = ((Integer) lookup.get(cval)).intValue();
                        vals[j] = index;
                    } else {
                        throw new IllegalStateException("Wrong attribute type at position " + (i + 1) + "!!!");
                    }
                }
            } else if (dataSet.attribute(j).isNominal()) {
                // find correct index
                Hashtable lookup = (Hashtable) m_cumulativeStructure.elementAt(j);
                int index = ((Integer) lookup.get(cval)).intValue();
                vals[j] = index;
            } else if (dataSet.attribute(j).isString()) {
                vals[j] = dataSet.attribute(j).addStringValue("" + cval);
            } else {
                vals[j] = ((Double) cval).doubleValue();
            }
        }
        dataSet.add(new Instance(1.0, vals));
    }
    m_structure = new Instances(dataSet, 0);
    m_cumulativeStructure = null; // conserve memory

    return dataSet;
}

From source file:ArrayLoader.java

License:Open Source License

/**
 * Assumes the first line of the file contains the attribute names.
 * Assumes all attributes are real (Reading the full data set with
 * getDataSet will establish the true structure).
 *
 *///from  w  w  w  .  ja  va 2 s.c o  m
private void readHeader(String[] column) throws IOException {

    FastVector attribNames = new FastVector();

    // Assume first row of data are the column titles
    for (int i = 0; i < column.length; i++) {
        attribNames.addElement(new Attribute(column[i]));
    }

    m_structure = new Instances("DataArray", attribNames, 0);
}

From source file:CJWeka.java

License:Open Source License

public Object addInstance(Object args) throws Exception {
    if (!(args instanceof String)) {
        throw new RuntimeException("Invalid type for execute");
    }/*  w  ww  .j a  v a2 s.  co  m*/

    StringBuffer retbuf = new StringBuffer("");

    // function code goes in here

    String floatstring = (String) args;

    // convert floatstring to float/double array to instance
    String[] flostr = floatstring.split(" ");
    int nvalues = flostr.length;

    // add instance to ii

    if (my_attributes.isEmpty()) {
        // create attributes for all instances
        for (int j = 0; j < nvalues - 1; j++) {
            Attribute a = new Attribute(Integer.toString(j));
            my_attributes.add(a);
        }

        classvals.add("0");
        classvals.add("1");
        /*     classvals.add("2");
        classvals.add("3");
        classvals.add("4");
        classvals.add("5");
        classvals.add("6");
        classvals.add("7");*/

        Attribute cls = new Attribute("class", classvals);

        my_attributes.add(cls);

        ii = new Instances("my_instances", my_attributes, 0);
    }

    ii.setClassIndex(nvalues - 1);

    Instance inst = this.floatstringToInst(floatstring, ii, true);
    ii.add(inst);

    retbuf.append(ii.numInstances()); // return number of Instances in ii
    return retbuf.toString();
}

From source file:aaa.util.test.CreateArff.java

License:Open Source License

/**
 * Generates the Instances object and outputs it in ARFF format to stdout.
 *
 * @param args   ignored/*from   w  w  w .  ja v a 2  s . c o m*/
 * @throws Exception   if generation of instances fails
 */
public static void main(String[] args) throws Exception {
    ArrayList<Attribute> atts;
    ArrayList<Attribute> attsRel;
    ArrayList<String> attVals;
    ArrayList<String> attValsRel;
    Instances data;
    Instances dataRel;
    double[] vals;
    double[] valsRel;
    int i;

    // 1. set up attributes
    atts = new ArrayList<Attribute>();
    // - numeric
    atts.add(new Attribute("att1"));
    // - nominal
    attVals = new ArrayList<String>();
    for (i = 0; i < 5; i++)
        attVals.add("val" + (i + 1));
    atts.add(new Attribute("att2", attVals));
    // - string
    atts.add(new Attribute("att3", (ArrayList<String>) null));
    // - date
    atts.add(new Attribute("att4", "yyyy-MM-dd"));
    // - relational
    attsRel = new ArrayList<Attribute>();
    // -- numeric
    attsRel.add(new Attribute("att5.1"));
    // -- nominal
    attValsRel = new ArrayList<String>();
    for (i = 0; i < 5; i++)
        attValsRel.add("val5." + (i + 1));
    attsRel.add(new Attribute("att5.2", attValsRel));
    dataRel = new Instances("att5", attsRel, 0);
    atts.add(new Attribute("att5", dataRel, 0));

    // 2. create Instances object
    data = new Instances("MyRelation", atts, 0);

    // 3. fill with data
    // first instance
    vals = new double[data.numAttributes()];
    // - numeric
    vals[0] = Math.PI;
    // - nominal
    vals[1] = attVals.indexOf("val3");
    // - string
    vals[2] = data.attribute(2).addStringValue("This is a string!");
    // - date
    vals[3] = data.attribute(3).parseDate("2001-11-09");
    // - relational
    dataRel = new Instances(data.attribute(4).relation(), 0);
    // -- first instance
    valsRel = new double[2];
    valsRel[0] = Math.PI + 1;
    valsRel[1] = attValsRel.indexOf("val5.3");
    dataRel.add(new DenseInstance(1.0, valsRel));
    // -- second instance
    valsRel = new double[2];
    valsRel[0] = Math.PI + 2;
    valsRel[1] = attValsRel.indexOf("val5.2");
    dataRel.add(new DenseInstance(1.0, valsRel));
    vals[4] = data.attribute(4).addRelation(dataRel);
    // add
    data.add(new DenseInstance(1.0, vals));

    // second instance
    vals = new double[data.numAttributes()]; // important: needs NEW array!
    // - numeric
    vals[0] = Math.E;
    // - nominal
    vals[1] = attVals.indexOf("val1");
    // - string
    vals[2] = data.attribute(2).addStringValue("And another one!");
    // - date
    vals[3] = data.attribute(3).parseDate("2000-12-01");
    // - relational
    dataRel = new Instances(data.attribute(4).relation(), 0);
    // -- first instance
    valsRel = new double[2];
    valsRel[0] = Math.E + 1;
    valsRel[1] = attValsRel.indexOf("val5.4");
    dataRel.add(new DenseInstance(1.0, valsRel));
    // -- second instance
    valsRel = new double[2];
    valsRel[0] = Math.E + 2;
    valsRel[1] = attValsRel.indexOf("val5.1");
    dataRel.add(new DenseInstance(1.0, valsRel));
    vals[4] = data.attribute(4).addRelation(dataRel);
    // add
    data.add(new DenseInstance(1.0, vals));

    // 4. output data
    System.out.println(data);
}

From source file:activeSegmentation.feature.FeatureExtraction.java

License:Open Source License

private ArrayList<Attribute> createFeatureHeader() {
    ArrayList<Attribute> attributes = new ArrayList<Attribute>();
    for (int i = 1; i <= filterManager.getNumOfFeatures(); i++) {
        String attString = filterManager.getLabel(i);
        attributes.add(new Attribute(attString));
    }/*from ww  w. ja v  a2  s. co m*/

    return attributes;
}

From source file:adams.data.conversion.ReportToWekaInstance.java

License:Open Source License

/**
 * Performs the actual conversion./*from   w w w .  j a va2  s . co m*/
 *
 * @return      the converted data
 * @throws Exception   if something goes wrong with the conversion
 */
protected Object doConvert() throws Exception {
    Report report;
    Instance result;
    ArrayList atts;
    ArrayList attValues;
    int i;
    double[] values;

    report = (Report) m_Input;

    // generate header
    if (m_Header == null) {
        atts = new ArrayList();
        for (i = 0; i < m_Fields.length; i++) {
            switch (m_Fields[i].getDataType()) {
            case NUMERIC:
                atts.add(new Attribute(m_Fields[i].getName()));
                break;
            case BOOLEAN:
                attValues = new ArrayList();
                attValues.add("false");
                attValues.add("true");
                atts.add(new Attribute(m_Fields[i].getName(), attValues));
                break;
            default:
                atts.add(new Attribute(m_Fields[i].getName(), (List) null));
                break;
            }
        }
        m_Header = new Instances(getClass().getName(), atts, 0);
    }

    // generate instance
    values = new double[m_Header.numAttributes()];
    for (i = 0; i < m_Fields.length; i++) {
        if (report.hasValue(m_Fields[i])) {
            switch (m_Fields[i].getDataType()) {
            case NUMERIC:
                values[i] = report.getDoubleValue(m_Fields[i]);
                break;
            case BOOLEAN:
                if (report.getBooleanValue(m_Fields[i]))
                    values[i] = 1;
                else
                    values[i] = 0;
                break;
            default:
                values[i] = m_Header.attribute(i).addStringValue("" + report.getValue(m_Fields[i]));
                break;
            }
        } else {
            values[i] = weka.core.Utils.missingValue();
        }
    }
    result = new DenseInstance(1.0, values);
    result.setDataset(m_Header);

    return result;
}

From source file:adams.data.conversion.SpreadSheetToWekaInstances.java

License:Open Source License

/**
 * Performs the actual conversion.//from   www  . ja va  2 s  .c  o m
 *
 * @return      the converted data
 * @throws Exception   if something goes wrong with the conversion
 */
@Override
protected Object doConvert() throws Exception {
    Instances result;
    SpreadSheet sheet;
    DenseInstance inst;
    ArrayList<Attribute> atts;
    HashSet<String> unique;
    ArrayList<String> labels;
    Row row;
    Cell cell;
    int i;
    int n;
    double[] values;
    Collection<ContentType> types;
    ContentType type;
    boolean added;
    int[] classIndices;

    sheet = (SpreadSheet) m_Input;

    // create header
    atts = new ArrayList<>();
    for (i = 0; i < sheet.getColumnCount(); i++) {
        added = false;
        types = sheet.getContentTypes(i);
        if (types.contains(ContentType.DOUBLE))
            types.remove(ContentType.LONG);
        if (types.contains(ContentType.LONG)) {
            types.add(ContentType.DOUBLE);
            types.remove(ContentType.LONG);
        }

        if (types.size() == 1) {
            type = (ContentType) types.toArray()[0];
            if (type == ContentType.DOUBLE) {
                atts.add(new Attribute(sheet.getHeaderRow().getCell(i).getContent()));
                added = true;
            } else if (type == ContentType.DATE) {
                atts.add(new Attribute(sheet.getHeaderRow().getCell(i).getContent(),
                        Constants.TIMESTAMP_FORMAT));
                added = true;
            } else if (type == ContentType.TIME) {
                atts.add(new Attribute(sheet.getHeaderRow().getCell(i).getContent(), Constants.TIME_FORMAT));
                added = true;
            }
        }

        if (!added) {
            unique = new HashSet<>();
            for (n = 0; n < sheet.getRowCount(); n++) {
                row = sheet.getRow(n);
                cell = row.getCell(i);
                if ((cell != null) && !cell.isMissing())
                    unique.add(cell.getContent());
            }
            if ((unique.size() > m_MaxLabels) || (m_MaxLabels < 1)) {
                atts.add(new Attribute(sheet.getHeaderRow().getCell(i).getContent(), (FastVector) null));
            } else {
                labels = new ArrayList<>(unique);
                Collections.sort(labels);
                atts.add(new Attribute(sheet.getHeaderRow().getCell(i).getContent(), labels));
            }
        }
    }
    result = new Instances(Environment.getInstance().getProject(), atts, sheet.getRowCount());
    if (sheet.hasName())
        result.setRelationName(sheet.getName());

    // add data
    for (n = 0; n < sheet.getRowCount(); n++) {
        row = sheet.getRow(n);
        values = new double[result.numAttributes()];
        for (i = 0; i < result.numAttributes(); i++) {
            cell = row.getCell(i);
            values[i] = weka.core.Utils.missingValue();
            if ((cell != null) && !cell.isMissing()) {
                if (result.attribute(i).type() == Attribute.DATE) {
                    if (cell.isTime())
                        values[i] = cell.toTime().getTime();
                    else
                        values[i] = cell.toDate().getTime();
                } else if (result.attribute(i).isNumeric()) {
                    values[i] = Utils.toDouble(cell.getContent());
                } else if (result.attribute(i).isString()) {
                    values[i] = result.attribute(i).addStringValue(cell.getContent());
                } else {
                    values[i] = result.attribute(i).indexOfValue(cell.getContent());
                }
            }
        }
        inst = new DenseInstance(1.0, values);
        result.add(inst);
    }

    if (sheet instanceof Dataset) {
        classIndices = ((Dataset) sheet).getClassAttributeIndices();
        if (classIndices.length > 0)
            result.setClassIndex(classIndices[0]);
    }

    return result;
}