weka.filters.unsupervised.attribute.InterquartileRange.java Source code

Java tutorial

Introduction

Here is the source code for weka.filters.unsupervised.attribute.InterquartileRange.java

Source

/*
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation, either version 3 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

/*
 * InterquartileRange.java
 * Copyright (C) 2006-2012 University of Waikato, Hamilton, New Zealand
 */

package weka.filters.unsupervised.attribute;

import java.util.ArrayList;
import java.util.Collections;
import java.util.Enumeration;
import java.util.Vector;

import weka.core.*;
import weka.core.Capabilities.Capability;
import weka.filters.SimpleBatchFilter;

/**
 * <!-- globalinfo-start --> A filter for detecting outliers and extreme values
 * based on interquartile ranges. The filter skips the class attribute.<br/>
 * <br/>
 * Outliers:<br/>
 * Q3 + OF*IQR &lt; x &lt;= Q3 + EVF*IQR<br/>
 * or<br/>
 * Q1 - EVF*IQR &lt;= x &lt; Q1 - OF*IQR<br/>
 * <br/>
 * Extreme values:<br/>
 * x &gt; Q3 + EVF*IQR<br/>
 * or<br/>
 * x &lt; Q1 - EVF*IQR<br/>
 * <br/>
 * Key:<br/>
 * Q1 = 25% quartile<br/>
 * Q3 = 75% quartile<br/>
 * IQR = Interquartile Range, difference between Q1 and Q3<br/>
 * OF = Outlier Factor<br/>
 * EVF = Extreme Value Factor
 * <p/>
 * <!-- globalinfo-end -->
 * 
 * <!-- options-start --> Valid options are:
 * <p/>
 * 
 * <pre>
 * -D
 *  Turns on output of debugging information.
 * </pre>
 * 
 * <pre>
 * -R &lt;col1,col2-col4,...&gt;
 *  Specifies list of columns to base outlier/extreme value detection
 *  on. If an instance is considered in at least one of those
 *  attributes an outlier/extreme value, it is tagged accordingly.
 *  'first' and 'last' are valid indexes.
 *  (default none)
 * </pre>
 * 
 * <pre>
 * -O &lt;num&gt;
 *  The factor for outlier detection.
 *  (default: 3)
 * </pre>
 * 
 * <pre>
 * -E &lt;num&gt;
 *  The factor for extreme values detection.
 *  (default: 2*Outlier Factor)
 * </pre>
 * 
 * <pre>
 * -E-as-O
 *  Tags extreme values also as outliers.
 *  (default: off)
 * </pre>
 * 
 * <pre>
 * -P
 *  Generates Outlier/ExtremeValue pair for each numeric attribute in
 *  the range, not just a single indicator pair for all the attributes.
 *  (default: off)
 * </pre>
 * 
 * <pre>
 * -M
 *  Generates an additional attribute 'Offset' per Outlier/ExtremeValue
 *  pair that contains the multiplier that the value is off the median.
 *     value = median + 'multiplier' * IQR
 * Note: implicitely sets '-P'. (default: off)
 * </pre>
 * 
 * <!-- options-end -->
 * 
 * Thanks to Dale for a few brainstorming sessions.
 * 
 * @author Dale Fletcher (dale at cs dot waikato dot ac dot nz)
 * @author fracpete (fracpete at waikato dot ac dot nz)
 * @version $Revision$
 */
public class InterquartileRange extends SimpleBatchFilter implements WeightedAttributesHandler {

    /** for serialization */
    private static final long serialVersionUID = -227879653639723030L;

    /** indicator for non-numeric attributes */
    public final static int NON_NUMERIC = -1;

    /** enum for obtaining the various determined IQR values. */
    public enum ValueType {
        UPPER_EXTREME_VALUES, UPPER_OUTLIER_VALUES, LOWER_OUTLIER_VALUES, LOWER_EXTREME_VALUES, MEDIAN, IQR
    };

    /** the attribute range to work on */
    protected Range m_Attributes = new Range("first-last");

    /** the generated indices (only for performance reasons) */
    protected int[] m_AttributeIndices = null;

    /** the factor for detecting outliers */
    protected double m_OutlierFactor = 3;

    /** the factor for detecting extreme values, by default 2*m_OutlierFactor */
    protected double m_ExtremeValuesFactor = 2 * m_OutlierFactor;

    /** whether extreme values are also tagged as outliers */
    protected boolean m_ExtremeValuesAsOutliers = false;

    /** the upper extreme value threshold (= Q3 + EVF*IQR) */
    protected double[] m_UpperExtremeValue = null;

    /** the upper outlier threshold (= Q3 + OF*IQR) */
    protected double[] m_UpperOutlier = null;

    /** the lower outlier threshold (= Q1 - OF*IQR) */
    protected double[] m_LowerOutlier = null;

    /** the interquartile range */
    protected double[] m_IQR = null;

    /** the median */
    protected double[] m_Median = null;

    /** the lower extreme value threshold (= Q1 - EVF*IQR) */
    protected double[] m_LowerExtremeValue = null;

    /**
     * whether to generate Outlier/ExtremeValue attributes for each attribute
     * instead of a general one
     */
    protected boolean m_DetectionPerAttribute = false;

    /** the position of the outlier attribute */
    protected int[] m_OutlierAttributePosition = null;

    /**
     * whether to add another attribute called "Offset", that lists the
     * 'multiplier' by which the outlier/extreme value is away from the median,
     * i.e., value = median + 'multiplier' * IQR <br/>
     * automatically enables m_DetectionPerAttribute!
     */
    protected boolean m_OutputOffsetMultiplier = false;

    /**
     * Returns a string describing this filter
     * 
     * @return a description of the filter suitable for displaying in the
     *         explorer/experimenter gui
     */
    @Override
    public String globalInfo() {
        return "A filter for detecting outliers and extreme values based on "
                + "interquartile ranges. The filter skips the class attribute.\n\n" + "Outliers:\n"
                + "  Q3 + OF*IQR < x <= Q3 + EVF*IQR\n" + "  or\n" + "  Q1 - EVF*IQR <= x < Q1 - OF*IQR\n" + "\n"
                + "Extreme values:\n" + "  x > Q3 + EVF*IQR\n" + "  or\n" + "  x < Q1 - EVF*IQR\n" + "\n" + "Key:\n"
                + "  Q1  = 25% quartile\n" + "  Q3  = 75% quartile\n"
                + "  IQR = Interquartile Range, difference between Q1 and Q3\n" + "  OF  = Outlier Factor\n"
                + "  EVF = Extreme Value Factor";
    }

    /**
     * Returns an enumeration describing the available options.
     * 
     * @return an enumeration of all the available options.
     */
    @Override
    public Enumeration<Option> listOptions() {

        Vector<Option> result = new Vector<Option>();

        result.addElement(new Option(
                "\tSpecifies list of columns to base outlier/extreme value detection\n"
                        + "\ton. If an instance is considered in at least one of those\n"
                        + "\tattributes an outlier/extreme value, it is tagged accordingly.\n"
                        + " 'first' and 'last' are valid indexes.\n" + "\t(default none)",
                "R", 1, "-R <col1,col2-col4,...>"));

        result.addElement(
                new Option("\tThe factor for outlier detection.\n" + "\t(default: 3)", "O", 1, "-O <num>"));

        result.addElement(
                new Option("\tThe factor for extreme values detection.\n" + "\t(default: 2*Outlier Factor)", "E", 1,
                        "-E <num>"));

        result.addElement(new Option("\tTags extreme values also as outliers.\n" + "\t(default: off)", "E-as-O", 0,
                "-E-as-O"));

        result.addElement(new Option("\tGenerates Outlier/ExtremeValue pair for each numeric attribute in\n"
                + "\tthe range, not just a single indicator pair for all the attributes.\n" + "\t(default: off)",
                "P", 0, "-P"));

        result.addElement(new Option("\tGenerates an additional attribute 'Offset' per Outlier/ExtremeValue\n"
                + "\tpair that contains the multiplier that the value is off the median.\n"
                + "\t   value = median + 'multiplier' * IQR\n" + "Note: implicitely sets '-P'."
                + "\t(default: off)", "M", 0, "-M"));

        result.addAll(Collections.list(super.listOptions()));

        return result.elements();
    }

    /**
     * Parses a list of options for this object.
     * <p/>
     * 
     * <!-- options-start --> Valid options are:
     * <p/>
     * 
     * <pre>
     * -D
     *  Turns on output of debugging information.
     * </pre>
     * 
     * <pre>
     * -R &lt;col1,col2-col4,...&gt;
     *  Specifies list of columns to base outlier/extreme value detection
     *  on. If an instance is considered in at least one of those
     *  attributes an outlier/extreme value, it is tagged accordingly.
     *  'first' and 'last' are valid indexes.
     *  (default none)
     * </pre>
     * 
     * <pre>
     * -O &lt;num&gt;
     *  The factor for outlier detection.
     *  (default: 3)
     * </pre>
     * 
     * <pre>
     * -E &lt;num&gt;
     *  The factor for extreme values detection.
     *  (default: 2*Outlier Factor)
     * </pre>
     * 
     * <pre>
     * -E-as-O
     *  Tags extreme values also as outliers.
     *  (default: off)
     * </pre>
     * 
     * <pre>
     * -P
     *  Generates Outlier/ExtremeValue pair for each numeric attribute in
     *  the range, not just a single indicator pair for all the attributes.
     *  (default: off)
     * </pre>
     * 
     * <pre>
     * -M
     *  Generates an additional attribute 'Offset' per Outlier/ExtremeValue
     *  pair that contains the multiplier that the value is off the median.
     *     value = median + 'multiplier' * IQR
     * Note: implicitely sets '-P'. (default: off)
     * </pre>
     * 
     * <!-- options-end -->
     * 
     * @param options the list of options as an array of strings
     * @throws Exception if an option is not supported
     */
    @Override
    public void setOptions(String[] options) throws Exception {

        String tmpStr = Utils.getOption("R", options);
        if (tmpStr.length() != 0) {
            setAttributeIndices(tmpStr);
        } else {
            setAttributeIndices("first-last");
        }

        tmpStr = Utils.getOption("O", options);
        if (tmpStr.length() != 0) {
            setOutlierFactor(Double.parseDouble(tmpStr));
        } else {
            setOutlierFactor(3);
        }

        tmpStr = Utils.getOption("E", options);
        if (tmpStr.length() != 0) {
            setExtremeValuesFactor(Double.parseDouble(tmpStr));
        } else {
            setExtremeValuesFactor(2 * getOutlierFactor());
        }

        setExtremeValuesAsOutliers(Utils.getFlag("E-as-O", options));

        setDetectionPerAttribute(Utils.getFlag("P", options));

        setOutputOffsetMultiplier(Utils.getFlag("M", options));

        super.setOptions(options);

        Utils.checkForRemainingOptions(options);
    }

    /**
     * Gets the current settings of the filter.
     * 
     * @return an array of strings suitable for passing to setOptions
     */
    @Override
    public String[] getOptions() {

        Vector<String> result = new Vector<String>();

        result.add("-R");
        if (!getAttributeIndices().equals("")) {
            result.add(getAttributeIndices());
        } else {
            result.add("first-last");
        }

        result.add("-O");
        result.add("" + getOutlierFactor());

        result.add("-E");
        result.add("" + getExtremeValuesFactor());

        if (getExtremeValuesAsOutliers()) {
            result.add("-E-as-O");
        }

        if (getDetectionPerAttribute()) {
            result.add("-P");
        }

        if (getOutputOffsetMultiplier()) {
            result.add("-M");
        }

        Collections.addAll(result, super.getOptions());

        return result.toArray(new String[result.size()]);
    }

    /**
     * Returns the tip text for this property
     * 
     * @return tip text for this property suitable for displaying in the
     *         explorer/experimenter gui
     */
    public String attributeIndicesTipText() {
        return "Specify range of attributes to act on; "
                + " this is a comma separated list of attribute indices, with"
                + " \"first\" and \"last\" valid values; specify an inclusive"
                + " range with \"-\", eg: \"first-3,5,6-10,last\".";
    }

    /**
     * Gets the current range selection
     * 
     * @return a string containing a comma separated list of ranges
     */
    public String getAttributeIndices() {
        return m_Attributes.getRanges();
    }

    /**
     * Sets which attributes are to be used for interquartile calculations and
     * outlier/extreme value detection (only numeric attributes among the
     * selection will be used).
     * 
     * @param value a string representing the list of attributes. Since the string
     *          will typically come from a user, attributes are indexed from 1. <br>
     *          eg: first-3,5,6-last
     * @throws IllegalArgumentException if an invalid range list is supplied
     */
    public void setAttributeIndices(String value) {
        m_Attributes.setRanges(value);
    }

    /**
     * Sets which attributes are to be used for interquartile calculations and
     * outlier/extreme value detection (only numeric attributes among the
     * selection will be used).
     * 
     * @param value an array containing indexes of attributes to work on. Since
     *          the array will typically come from a program, attributes are
     *          indexed from 0.
     * @throws IllegalArgumentException if an invalid set of ranges is supplied
     */
    public void setAttributeIndicesArray(int[] value) {
        setAttributeIndices(Range.indicesToRangeList(value));
    }

    /**
     * Returns the tip text for this property
     * 
     * @return tip text for this property suitable for displaying in the
     *         explorer/experimenter gui
     */
    public String outlierFactorTipText() {
        return "The factor for determining the thresholds for outliers.";
    }

    /**
     * Sets the factor for determining the thresholds for outliers.
     * 
     * @param value the factor.
     */
    public void setOutlierFactor(double value) {
        if (value >= getExtremeValuesFactor()) {
            System.err.println("OutlierFactor must be smaller than ExtremeValueFactor");
        } else {
            m_OutlierFactor = value;
        }
    }

    /**
     * Gets the factor for determining the thresholds for outliers.
     * 
     * @return the factor.
     */
    public double getOutlierFactor() {
        return m_OutlierFactor;
    }

    /**
     * Returns the tip text for this property
     * 
     * @return tip text for this property suitable for displaying in the
     *         explorer/experimenter gui
     */
    public String extremeValuesFactorTipText() {
        return "The factor for determining the thresholds for extreme values.";
    }

    /**
     * Sets the factor for determining the thresholds for extreme values.
     * 
     * @param value the factor.
     */
    public void setExtremeValuesFactor(double value) {
        if (value <= getOutlierFactor()) {
            System.err.println("ExtremeValuesFactor must be greater than OutlierFactor!");
        } else {
            m_ExtremeValuesFactor = value;
        }
    }

    /**
     * Gets the factor for determining the thresholds for extreme values.
     * 
     * @return the factor.
     */
    public double getExtremeValuesFactor() {
        return m_ExtremeValuesFactor;
    }

    /**
     * Returns the tip text for this property
     * 
     * @return tip text for this property suitable for displaying in the
     *         explorer/experimenter gui
     */
    public String extremeValuesAsOutliersTipText() {
        return "Whether to tag extreme values also as outliers.";
    }

    /**
     * Set whether extreme values are also tagged as outliers.
     * 
     * @param value whether or not to tag extreme values also as outliers.
     */
    public void setExtremeValuesAsOutliers(boolean value) {
        m_ExtremeValuesAsOutliers = value;
    }

    /**
     * Get whether extreme values are also tagged as outliers.
     * 
     * @return true if extreme values are also tagged as outliers.
     */
    public boolean getExtremeValuesAsOutliers() {
        return m_ExtremeValuesAsOutliers;
    }

    /**
     * Returns the tip text for this property
     * 
     * @return tip text for this property suitable for displaying in the
     *         explorer/experimenter gui
     */
    public String detectionPerAttributeTipText() {
        return "Generates Outlier/ExtremeValue attribute pair for each numeric "
                + "attribute, not just a single pair for all numeric attributes together.";
    }

    /**
     * Set whether an Outlier/ExtremeValue attribute pair is generated for each
     * numeric attribute ("true") or just one pair for all numeric attributes
     * together ("false").
     * 
     * @param value whether or not to generate indicator attribute pairs for each
     *          numeric attribute.
     */
    public void setDetectionPerAttribute(boolean value) {
        m_DetectionPerAttribute = value;
        if (!m_DetectionPerAttribute) {
            m_OutputOffsetMultiplier = false;
        }
    }

    /**
     * Gets whether an Outlier/ExtremeValue attribute pair is generated for each
     * numeric attribute ("true") or just one pair for all numeric attributes
     * together ("false").
     * 
     * @return true if indicator attribute pairs are generated for each numeric
     *         attribute.
     */
    public boolean getDetectionPerAttribute() {
        return m_DetectionPerAttribute;
    }

    /**
     * Returns the tip text for this property
     * 
     * @return tip text for this property suitable for displaying in the
     *         explorer/experimenter gui
     */
    public String outputOffsetMultiplierTipText() {
        return "Generates an additional attribute 'Offset' that contains the "
                + "multiplier the value is off the median: " + "value = median + 'multiplier' * IQR";
    }

    /**
     * Set whether an additional attribute "Offset" is generated per
     * Outlier/ExtremeValue attribute pair that lists the multiplier the value is
     * off the median: value = median + 'multiplier' * IQR.
     * 
     * @param value whether or not to generate the additional attribute.
     */
    public void setOutputOffsetMultiplier(boolean value) {
        m_OutputOffsetMultiplier = value;
        if (m_OutputOffsetMultiplier) {
            m_DetectionPerAttribute = true;
        }
    }

    /**
     * Gets whether an additional attribute "Offset" is generated per
     * Outlier/ExtremeValue attribute pair that lists the multiplier the value is
     * off the median: value = median + 'multiplier' * IQR.
     * 
     * @return true if the additional attribute is generated.
     */
    public boolean getOutputOffsetMultiplier() {
        return m_OutputOffsetMultiplier;
    }

    /**
     * Returns the Capabilities of this filter.
     * 
     * @return the capabilities of this object
     * @see Capabilities
     */
    @Override
    public Capabilities getCapabilities() {
        Capabilities result = super.getCapabilities();
        result.disableAll();

        // attributes
        result.enableAllAttributes();

        // class
        result.enableAllClasses();
        result.enable(Capability.MISSING_CLASS_VALUES);
        result.enable(Capability.NO_CLASS);

        return result;
    }

    /**
     * Determines the output format based on the input format and returns this. In
     * case the output format cannot be returned immediately, i.e.,
     * hasImmediateOutputFormat() returns false, then this method will called from
     * batchFinished() after the call of preprocess(Instances), in which, e.g.,
     * statistics for the actual processing step can be gathered.
     * 
     * @param inputFormat the input format to base the output format on
     * @return the output format
     * @throws Exception in case the determination goes wrong
     * @see #hasImmediateOutputFormat()
     * @see #batchFinished()
     */
    @Override
    protected Instances determineOutputFormat(Instances inputFormat) throws Exception {
        ArrayList<Attribute> atts;
        ArrayList<String> values;
        Instances result;
        int i;

        // attributes must be numeric
        m_Attributes.setUpper(inputFormat.numAttributes() - 1);
        m_AttributeIndices = m_Attributes.getSelection();
        for (i = 0; i < m_AttributeIndices.length; i++) {
            // ignore class
            if (m_AttributeIndices[i] == inputFormat.classIndex()) {
                m_AttributeIndices[i] = NON_NUMERIC;
                continue;
            }
            // not numeric -> ignore it
            if (!inputFormat.attribute(m_AttributeIndices[i]).isNumeric()) {
                m_AttributeIndices[i] = NON_NUMERIC;
            }
        }

        // get old attributes
        atts = new ArrayList<Attribute>();
        for (i = 0; i < inputFormat.numAttributes(); i++) {
            atts.add(inputFormat.attribute(i));
        }

        if (!getDetectionPerAttribute()) {
            m_OutlierAttributePosition = new int[1];
            m_OutlierAttributePosition[0] = atts.size();

            // add 2 new attributes
            values = new ArrayList<String>();
            values.add("no");
            values.add("yes");
            atts.add(new Attribute("Outlier", values));

            values = new ArrayList<String>();
            values.add("no");
            values.add("yes");
            atts.add(new Attribute("ExtremeValue", values));
        } else {
            m_OutlierAttributePosition = new int[m_AttributeIndices.length];

            for (i = 0; i < m_AttributeIndices.length; i++) {
                if (m_AttributeIndices[i] == NON_NUMERIC) {
                    continue;
                }

                m_OutlierAttributePosition[i] = atts.size();

                // add new attributes
                values = new ArrayList<String>();
                values.add("no");
                values.add("yes");
                Attribute aO = new Attribute(inputFormat.attribute(m_AttributeIndices[i]).name() + "_Outlier",
                        values);
                aO.setWeight(inputFormat.attribute(m_AttributeIndices[i]).weight());
                atts.add(aO);

                values = new ArrayList<String>();
                values.add("no");
                values.add("yes");
                Attribute aE = new Attribute(inputFormat.attribute(m_AttributeIndices[i]).name() + "_ExtremeValue",
                        values);
                aE.setWeight(inputFormat.attribute(m_AttributeIndices[i]).weight());
                atts.add(aE);

                if (getOutputOffsetMultiplier()) {
                    Attribute aF = new Attribute(inputFormat.attribute(m_AttributeIndices[i]).name() + "_Offset");
                    aF.setWeight(inputFormat.attribute(m_AttributeIndices[i]).weight());
                    atts.add(aF);
                }
            }
        }

        // generate header
        result = new Instances(inputFormat.relationName(), atts, 0);
        result.setClassIndex(inputFormat.classIndex());

        return result;
    }

    /**
     * computes the thresholds for outliers and extreme values
     * 
     * @param instances the data to work on
     */
    protected void computeThresholds(Instances instances) {
        int i;
        double[] values;
        int[] sortedIndices;
        int half;
        int quarter;
        double q1;
        double q2;
        double q3;

        m_UpperExtremeValue = new double[m_AttributeIndices.length];
        m_UpperOutlier = new double[m_AttributeIndices.length];
        m_LowerOutlier = new double[m_AttributeIndices.length];
        m_LowerExtremeValue = new double[m_AttributeIndices.length];
        m_Median = new double[m_AttributeIndices.length];
        m_IQR = new double[m_AttributeIndices.length];

        for (i = 0; i < m_AttributeIndices.length; i++) {
            // non-numeric attribute?
            if (m_AttributeIndices[i] == NON_NUMERIC) {
                continue;
            }

            // sort attribute data
            values = instances.attributeToDoubleArray(m_AttributeIndices[i]);
            sortedIndices = Utils.sort(values);

            // determine indices
            half = sortedIndices.length / 2;
            quarter = half / 2;

            if (sortedIndices.length % 2 == 1) {
                q2 = values[sortedIndices[half]];
            } else {
                q2 = (values[sortedIndices[half]] + values[sortedIndices[half + 1]]) / 2;
            }

            if (half % 2 == 1) {
                q1 = values[sortedIndices[quarter]];
                q3 = values[sortedIndices[sortedIndices.length - quarter - 1]];
            } else {
                q1 = (values[sortedIndices[quarter]] + values[sortedIndices[quarter + 1]]) / 2;
                q3 = (values[sortedIndices[sortedIndices.length - quarter - 1]]
                        + values[sortedIndices[sortedIndices.length - quarter]]) / 2;
            }

            // determine thresholds and other values
            m_Median[i] = q2;
            m_IQR[i] = q3 - q1;
            m_UpperExtremeValue[i] = q3 + getExtremeValuesFactor() * m_IQR[i];
            m_UpperOutlier[i] = q3 + getOutlierFactor() * m_IQR[i];
            m_LowerOutlier[i] = q1 - getOutlierFactor() * m_IQR[i];
            m_LowerExtremeValue[i] = q1 - getExtremeValuesFactor() * m_IQR[i];
        }
    }

    /**
     * Returns the values for the specified type.
     * 
     * @param type the type of values to return
     * @return the values
     */
    public double[] getValues(ValueType type) {
        switch (type) {
        case UPPER_EXTREME_VALUES:
            return m_UpperExtremeValue;
        case UPPER_OUTLIER_VALUES:
            return m_UpperOutlier;
        case LOWER_OUTLIER_VALUES:
            return m_LowerOutlier;
        case LOWER_EXTREME_VALUES:
            return m_LowerExtremeValue;
        case MEDIAN:
            return m_Median;
        case IQR:
            return m_IQR;
        default:
            throw new IllegalArgumentException("Unhandled value type: " + type);
        }
    }

    /**
     * returns whether the instance has an outlier in the specified attribute or
     * not
     * 
     * @param inst the instance to test
     * @param index the attribute index
     * @return true if the instance is an outlier
     */
    protected boolean isOutlier(Instance inst, int index) {
        boolean result;
        double value;

        value = inst.value(m_AttributeIndices[index]);
        result = ((m_UpperOutlier[index] < value) && (value <= m_UpperExtremeValue[index]))
                || ((m_LowerExtremeValue[index] <= value) && (value < m_LowerOutlier[index]));

        return result;
    }

    /**
     * returns whether the instance is an outlier or not
     * 
     * @param inst the instance to test
     * @return true if the instance is an outlier
     */
    protected boolean isOutlier(Instance inst) {
        boolean result;
        int i;

        result = false;

        for (i = 0; i < m_AttributeIndices.length; i++) {
            // non-numeric attribute?
            if (m_AttributeIndices[i] == NON_NUMERIC) {
                continue;
            }

            result = isOutlier(inst, i);

            if (result) {
                break;
            }
        }

        return result;
    }

    /**
     * returns whether the instance has an extreme value in the specified
     * attribute or not
     * 
     * @param inst the instance to test
     * @param index the attribute index
     * @return true if the instance is an extreme value
     */
    protected boolean isExtremeValue(Instance inst, int index) {
        boolean result;
        double value;

        value = inst.value(m_AttributeIndices[index]);
        result = (value > m_UpperExtremeValue[index]) || (value < m_LowerExtremeValue[index]);

        return result;
    }

    /**
     * returns whether the instance is an extreme value or not
     * 
     * @param inst the instance to test
     * @return true if the instance is an extreme value
     */
    protected boolean isExtremeValue(Instance inst) {
        boolean result;
        int i;

        result = false;

        for (i = 0; i < m_AttributeIndices.length; i++) {
            // non-numeric attribute?
            if (m_AttributeIndices[i] == NON_NUMERIC) {
                continue;
            }

            result = isExtremeValue(inst, i);

            if (result) {
                break;
            }
        }

        return result;
    }

    /**
     * returns the mulitplier of the IQR the instance is off the median for this
     * particular attribute.
     * 
     * @param inst the instance to test
     * @param index the attribute index
     * @return the multiplier
     */
    protected double calculateMultiplier(Instance inst, int index) {
        double result;
        double value;

        value = inst.value(m_AttributeIndices[index]);
        result = (value - m_Median[index]) / m_IQR[index];

        return result;
    }

    /**
     * Processes the given data (may change the provided dataset) and returns the
     * modified version. This method is called in batchFinished(). This
     * implementation only calls process(Instance) for each instance in the given
     * dataset.
     * 
     * @param instances the data to process
     * @return the modified data
     * @throws Exception in case the processing goes wrong
     * @see #batchFinished()
     */
    @Override
    protected Instances process(Instances instances) throws Exception {
        Instances result;
        Instance instOld;
        Instance instNew;
        int i;
        int n;
        double[] values;
        int numAttNew;
        int numAttOld;

        if (!isFirstBatchDone()) {
            computeThresholds(instances);
        }

        result = getOutputFormat();
        numAttOld = instances.numAttributes();
        numAttNew = result.numAttributes();

        for (n = 0; n < instances.numInstances(); n++) {
            instOld = instances.instance(n);
            values = new double[numAttNew];
            System.arraycopy(instOld.toDoubleArray(), 0, values, 0, numAttOld);

            // per attribute?
            if (!getDetectionPerAttribute()) {
                // outlier?
                if (isOutlier(instOld)) {
                    values[m_OutlierAttributePosition[0]] = 1;
                }
                // extreme value?
                if (isExtremeValue(instOld)) {
                    values[m_OutlierAttributePosition[0] + 1] = 1;
                    // tag extreme values also as outliers?
                    if (getExtremeValuesAsOutliers()) {
                        values[m_OutlierAttributePosition[0]] = 1;
                    }
                }
            } else {
                for (i = 0; i < m_AttributeIndices.length; i++) {
                    // non-numeric attribute?
                    if (m_AttributeIndices[i] == NON_NUMERIC) {
                        continue;
                    }

                    // outlier?
                    if (isOutlier(instOld, m_AttributeIndices[i])) {
                        values[m_OutlierAttributePosition[i]] = 1;
                    }
                    // extreme value?
                    if (isExtremeValue(instOld, m_AttributeIndices[i])) {
                        values[m_OutlierAttributePosition[i] + 1] = 1;
                        // tag extreme values also as outliers?
                        if (getExtremeValuesAsOutliers()) {
                            values[m_OutlierAttributePosition[i]] = 1;
                        }
                    }
                    // add multiplier?
                    if (getOutputOffsetMultiplier()) {
                        values[m_OutlierAttributePosition[i] + 2] = calculateMultiplier(instOld,
                                m_AttributeIndices[i]);
                    }
                }
            }

            // generate new instance
            instNew = new DenseInstance(1.0, values);
            instNew.setDataset(result);

            // copy possible strings, relational values...
            copyValues(instNew, false, instOld.dataset(), outputFormatPeek());

            // add to output
            result.add(instNew);
        }

        return result;
    }

    /**
     * Returns the revision string.
     * 
     * @return the revision
     */
    @Override
    public String getRevision() {
        return RevisionUtils.extract("$Revision$");
    }

    /**
     * Main method for testing this class.
     * 
     * @param args should contain arguments to the filter: use -h for help
     */
    public static void main(String[] args) {
        runFilter(new InterquartileRange(), args);
    }
}