weka.filters.unsupervised.attribute.AddValues.java Source code

Java tutorial

Introduction

Here is the source code for weka.filters.unsupervised.attribute.AddValues.java

Source

/*
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation, either version 3 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

/*
 * AddValues.java
 * Copyright (C) 2006-2012 University of Waikato, Hamilton, New Zealand
 *
 */

package weka.filters.unsupervised.attribute;

import java.util.ArrayList;
import java.util.Collections;
import java.util.Enumeration;
import java.util.Vector;

import weka.core.*;
import weka.core.Capabilities.Capability;
import weka.filters.Filter;
import weka.filters.StreamableFilter;
import weka.filters.UnsupervisedFilter;

/**
 * <!-- globalinfo-start --> Adds the labels from the given list to an attribute
 * if they are missing. The labels can also be sorted in an ascending manner. If
 * no labels are provided then only the (optional) sorting applies.
 * <p/>
 * <!-- globalinfo-end -->
 * 
 * <!-- options-start --> Valid options are:
 * <p/>
 * 
 * <pre>
 * -C &lt;col&gt;
 *  Sets the attribute index
 *  (default last).
 * </pre>
 * 
 * <pre>
 * -L &lt;label1,label2,...&gt;
 *  Comma-separated list of labels to add.
 *  (default: none)
 * </pre>
 * 
 * <pre>
 * -S
 *  Turns on the sorting of the labels.
 * </pre>
 * 
 * <!-- options-end -->
 * 
 * Based on code from AddValues.
 * 
 * @author FracPete (fracpete at waikato dot ac dot nz)
 * @version $Revision$
 * @see AddValues
 */
public class AddValues extends Filter implements UnsupervisedFilter, StreamableFilter, OptionHandler,
        WeightedAttributesHandler, WeightedInstancesHandler {

    /** for serialization */
    private static final long serialVersionUID = -8100622241742393656L;

    /** The attribute's index setting. */
    protected SingleIndex m_AttIndex = new SingleIndex("last");

    /** The values to add. */
    protected ArrayList<String> m_Labels = new ArrayList<String>();

    /** Whether to sort the values. */
    protected boolean m_Sort = false;

    /** the array with the sorted label indices */
    protected int[] m_SortedIndices;

    /**
     * Returns a string describing this filter
     * 
     * @return a description of the filter suitable for displaying in the
     *         explorer/experimenter gui
     */
    public String globalInfo() {
        return "Adds the labels from the given list to an attribute if they are "
                + "missing. The labels can also be sorted in an ascending manner. "
                + "If no labels are provided then only the (optional) sorting applies.";
    }

    /**
     * Returns an enumeration describing the available options.
     * 
     * @return an enumeration of all the available options.
     */
    @Override
    public Enumeration<Option> listOptions() {

        Vector<Option> result = new Vector<Option>();

        result.addElement(new Option("\tSets the attribute index\n" + "\t(default last).", "C", 1, "-C <col>"));

        result.addElement(new Option("\tComma-separated list of labels to add.\n" + "\t(default: none)", "L", 1,
                "-L <label1,label2,...>"));

        result.addElement(new Option("\tTurns on the sorting of the labels.", "S", 0, "-S"));

        return result.elements();
    }

    /**
     * Parses a given list of options.
     * <p/>
     * 
     * <!-- options-start --> Valid options are:
     * <p/>
     * 
     * <pre>
     * -C &lt;col&gt;
     *  Sets the attribute index
     *  (default last).
     * </pre>
     * 
     * <pre>
     * -L &lt;label1,label2,...&gt;
     *  Comma-separated list of labels to add.
     *  (default: none)
     * </pre>
     * 
     * <pre>
     * -S
     *  Turns on the sorting of the labels.
     * </pre>
     * 
     * <!-- options-end -->
     * 
     * @param options the list of options as an array of strings
     * @throws Exception if an option is not supported
     */
    @Override
    public void setOptions(String[] options) throws Exception {
        String tmpStr;

        tmpStr = Utils.getOption('C', options);
        if (tmpStr.length() != 0) {
            setAttributeIndex(tmpStr);
        } else {
            setAttributeIndex("last");
        }

        tmpStr = Utils.getOption('L', options);
        if (tmpStr.length() != 0) {
            setLabels(tmpStr);
        } else {
            setLabels("");
        }

        setSort(Utils.getFlag('S', options));

        if (getInputFormat() != null) {
            setInputFormat(getInputFormat());
        }

        Utils.checkForRemainingOptions(options);
    }

    /**
     * Gets the current settings of the filter.
     * 
     * @return an array of strings suitable for passing to setOptions
     */
    @Override
    public String[] getOptions() {

        Vector<String> result = new Vector<String>();

        result.add("-C");
        result.add("" + getAttributeIndex());

        result.add("-L");
        result.add("" + getLabels());

        if (getSort()) {
            result.add("-S");
        }

        return result.toArray(new String[result.size()]);
    }

    /**
     * Returns the Capabilities of this filter.
     * 
     * @return the capabilities of this object
     * @see Capabilities
     */
    @Override
    public Capabilities getCapabilities() {
        Capabilities result = super.getCapabilities();
        result.disableAll();

        // attributes
        result.enableAllAttributes();
        result.enable(Capability.MISSING_VALUES);

        // class
        result.enableAllClasses();
        result.enable(Capability.MISSING_CLASS_VALUES);
        result.enable(Capability.NO_CLASS);

        return result;
    }

    /**
     * Sets the format of the input instances.
     * 
     * @param instanceInfo an Instances object containing the input instance
     *          structure (any instances contained in the object are ignored -
     *          only the structure is required).
     * @return true if the outputFormat may be collected immediately
     * @throws Exception if the input format can't be set successfully
     */
    @Override
    public boolean setInputFormat(Instances instanceInfo) throws Exception {
        Attribute att;
        Attribute attNew;
        ArrayList<String> allLabels;
        Enumeration<Object> enm;
        int i;
        ArrayList<String> values;
        ArrayList<Attribute> atts;
        Instances instNew;

        super.setInputFormat(instanceInfo);

        m_AttIndex.setUpper(instanceInfo.numAttributes() - 1);
        att = instanceInfo.attribute(m_AttIndex.getIndex());
        if (!att.isNominal()) {
            throw new UnsupportedAttributeTypeException("Chosen attribute not nominal.");
        }

        // merge labels
        allLabels = new ArrayList<String>();
        enm = att.enumerateValues();
        while (enm.hasMoreElements()) {
            Object o = enm.nextElement();
            if (o instanceof SerializedObject) {
                o = ((SerializedObject) o).getObject();
            }
            allLabels.add((String) o);

        }
        for (i = 0; i < m_Labels.size(); i++) {
            if (!allLabels.contains(m_Labels.get(i))) {
                allLabels.add(m_Labels.get(i));
            }
        }

        // generate index array
        if (getSort()) {
            Collections.sort(allLabels);
        }
        m_SortedIndices = new int[att.numValues()];
        enm = att.enumerateValues();
        i = 0;
        while (enm.hasMoreElements()) {
            m_SortedIndices[i] = allLabels.indexOf(enm.nextElement());
            i++;
        }

        // generate new header
        values = new ArrayList<String>();
        for (i = 0; i < allLabels.size(); i++) {
            values.add(allLabels.get(i));
        }
        attNew = new Attribute(att.name(), values);
        attNew.setWeight(att.weight());

        atts = new ArrayList<Attribute>();
        for (i = 0; i < instanceInfo.numAttributes(); i++) {
            if (i == m_AttIndex.getIndex()) {
                atts.add(attNew);
            } else {
                atts.add(instanceInfo.attribute(i));
            }
        }

        instNew = new Instances(instanceInfo.relationName(), atts, 0);
        instNew.setClassIndex(instanceInfo.classIndex());

        // set new format
        setOutputFormat(instNew);

        return true;
    }

    /**
     * Input an instance for filtering. The instance is processed and made
     * available for output immediately.
     * 
     * @param instance the input instance
     * @return true if the filtered instance may now be collected with output().
     * @throws IllegalStateException if no input format has been set.
     */
    @Override
    public boolean input(Instance instance) {
        Instance newInstance;
        double[] values;

        if (getInputFormat() == null) {
            throw new IllegalStateException("No input instance format defined");
        }

        if (m_NewBatch) {
            resetQueue();
            m_NewBatch = false;
        }

        // generate new Instance
        values = instance.toDoubleArray();
        if (!instance.isMissing(m_AttIndex.getIndex())) {
            values[m_AttIndex.getIndex()] = m_SortedIndices[(int) values[m_AttIndex.getIndex()]];
        }
        newInstance = new DenseInstance(instance.weight(), values);

        // copy string values etc. from input to output
        copyValues(instance, false, instance.dataset(), outputFormatPeek());

        push(newInstance); // No need to copy instance

        return true;
    }

    /**
     * Returns the tip text for this property
     * 
     * @return tip text for this property suitable for displaying in the
     *         explorer/experimenter gui
     */
    public String attributeIndexTipText() {
        return "Sets which attribute to process. This "
                + "attribute must be nominal (\"first\" and \"last\" are valid values)";
    }

    /**
     * Get the index of the attribute used.
     * 
     * @return the index of the attribute
     */
    public String getAttributeIndex() {
        return m_AttIndex.getSingleIndex();
    }

    /**
     * Sets index of the attribute used.
     * 
     * @param attIndex the index of the attribute
     */
    public void setAttributeIndex(String attIndex) {
        m_AttIndex.setSingleIndex(attIndex);
    }

    /**
     * Returns the tip text for this property
     * 
     * @return tip text for this property suitable for displaying in the
     *         explorer/experimenter gui
     */
    public String labelsTipText() {
        return "Comma-separated list of lables to add.";
    }

    /**
     * Get the comma-separated list of labels that are added.
     * 
     * @return the list of labels
     */
    public String getLabels() {
        String result;
        int i;

        result = "";
        for (i = 0; i < m_Labels.size(); i++) {
            if (i > 0) {
                result += ",";
            }
            result += Utils.quote(m_Labels.get(i));
        }

        return result;
    }

    /**
     * Sets the comma-separated list of labels.
     * 
     * @param value the list
     */
    public void setLabels(String value) {
        int i;
        String label;
        boolean quoted;
        boolean add;

        m_Labels.clear();

        label = "";
        quoted = false;
        add = false;

        for (i = 0; i < value.length(); i++) {
            // quotes?
            if (value.charAt(i) == '"') {
                quoted = !quoted;
                if (!quoted) {
                    add = true;
                }
            }
            // comma
            else if ((value.charAt(i) == ',') && (!quoted)) {
                add = true;
            }
            // normal character
            else {
                label += value.charAt(i);
                // last character?
                if (i == value.length() - 1) {
                    add = true;
                }
            }

            if (add) {
                if (label.length() != 0) {
                    m_Labels.add(label);
                }
                label = "";
                add = false;
            }
        }
    }

    /**
     * Returns the tip text for this property
     * 
     * @return tip text for this property suitable for displaying in the
     *         explorer/experimenter gui
     */
    public String sortTipText() {
        return "Whether to sort the labels alphabetically.";
    }

    /**
     * Gets whether the labels are sorted or not.
     * 
     * @return true if the labels are sorted
     */
    public boolean getSort() {
        return m_Sort;
    }

    /**
     * Sets whether the labels are sorted.
     * 
     * @param value if true the labels are sorted
     */
    public void setSort(boolean value) {
        m_Sort = value;
    }

    /**
     * Returns the revision string.
     * 
     * @return the revision
     */
    @Override
    public String getRevision() {
        return RevisionUtils.extract("$Revision$");
    }

    /**
     * Main method for testing and running this class.
     * 
     * @param args should contain arguments to the filter: use -h for help
     */
    public static void main(String[] args) {
        runFilter(new AddValues(), args);
    }
}