zhaop.textmining.proj.MultiFilter.java Source code

Java tutorial

Introduction

Here is the source code for zhaop.textmining.proj.MultiFilter.java

Source

/*
 *    This program is free software; you can redistribute it and/or modify
 *    it under the terms of the GNU General Public License as published by
 *    the Free Software Foundation; either version 2 of the License, or
 *    (at your option) any later version.
 *
 *    This program is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    GNU General Public License for more details.
 *
 *    You should have received a copy of the GNU General Public License
 *    along with this program; if not, write to the Free Software
 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */

/*
 * MultiFilter.java
 * Copyright (C) 2005 University of Waikato, Hamilton, New Zealand
 *
 */

package zhaop.textmining.proj;

import weka.core.Capabilities;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.RevisionUtils;
import weka.core.Utils;
import weka.filters.*;
import java.util.Enumeration;
import java.util.Vector;

/**
 * <!-- globalinfo-start --> Applies several filters successively. In case all
 * supplied filters are StreamableFilters, it will act as a streamable one, too.
 * <p/>
 * <!-- globalinfo-end -->
 * 
 * <!-- options-start --> Valid options are:
 * <p/>
 * 
 * <pre>
 * -D
 *  Turns on output of debugging information.
 * </pre>
 * 
 * <pre>
 * -F &lt;classname [options]&gt;
 *  A filter to apply (can be specified multiple times).
 * </pre>
 * 
 * <!-- options-end -->
 * 
 * @author FracPete (fracpete at waikato dot ac dot nz)
 * @version $Revision: 1.7 $
 * @see weka.filters.StreamableFilter
 */
public class MultiFilter extends SimpleStreamFilter {

    /** for serialization */
    private static final long serialVersionUID = -6293720886005713120L;

    /** The filters */
    protected Filter m_Filters[] = { new AllFilter() };

    /** caches the streamable state */
    protected boolean m_Streamable = false;

    /** whether we already checked the streamable state */
    protected boolean m_StreamableChecked = false;

    /**
     * Returns a string describing this filter
     * 
     * @return a description of the filter suitable for displaying in the
     *         explorer/experimenter gui
     */
    public String globalInfo() {
        return "Applies several filters successively. In case all supplied filters "
                + "are StreamableFilters, it will act as a streamable one, too.";
    }

    /**
     * Returns an enumeration describing the available options.
     * 
     * @return an enumeration of all the available options.
     */
    public Enumeration listOptions() {
        Vector result = new Vector();
        Enumeration enm = super.listOptions();
        while (enm.hasMoreElements())
            result.add(enm.nextElement());

        result.addElement(new Option("\tA filter to apply (can be specified multiple times).", "F", 1,
                "-F <classname [options]>"));

        return result.elements();
    }

    /**
     * Parses a list of options for this object.
     * <p/>
     * 
     * <!-- options-start --> Valid options are:
     * <p/>
     * 
     * <pre>
     * -D
     *  Turns on output of debugging information.
     * </pre>
     * 
     * <pre>
     * -F &lt;classname [options]&gt;
     *  A filter to apply (can be specified multiple times).
     * </pre>
     * 
     * <!-- options-end -->
     * 
     * @param options
     *          the list of options as an array of strings
     * @throws Exception
     *           if an option is not supported
     */
    public void setOptions(String[] options) throws Exception {
        String tmpStr;
        String filter;
        String[] options2;
        Vector filters;

        super.setOptions(options);

        filters = new Vector();
        while ((tmpStr = Utils.getOption("F", options)).length() != 0) {
            options2 = Utils.splitOptions(tmpStr);
            filter = options2[0];
            options2[0] = "";
            filters.add(Utils.forName(Filter.class, filter, options2));
        }

        // at least one filter
        if (filters.size() == 0)
            filters.add(new AllFilter());

        setFilters((Filter[]) filters.toArray(new Filter[filters.size()]));
    }

    /**
     * Gets the current settings of the filter.
     * 
     * @return an array of strings suitable for passing to setOptions
     */
    public String[] getOptions() {
        Vector result;
        String[] options;
        int i;

        result = new Vector();

        options = super.getOptions();
        for (i = 0; i < options.length; i++)
            result.add(options[i]);

        for (i = 0; i < getFilters().length; i++) {
            result.add("-F");
            result.add(getFilterSpec(getFilter(i)));
        }

        return (String[]) result.toArray(new String[result.size()]);
    }

    /**
     * Returns the Capabilities of this filter.
     * 
     * @return the capabilities of this object
     * @see Capabilities
     */
    public Capabilities getCapabilities() {
        if (getFilters().length == 0)
            return super.getCapabilities();
        else
            return getFilters()[0].getCapabilities();
    }

    /**
     * resets the filter, i.e., m_NewBatch to true and m_FirstBatchDone to false.
     * 
     * @see #m_NewBatch
     * @see #m_FirstBatchDone
     */
    protected void reset() {
        super.reset();
        m_StreamableChecked = false;
    }

    /**
     * Sets the list of possible filters to choose from. Also resets the state of
     * the filter (this reset doesn't affect the options).
     * 
     * @param filters
     *          an array of filters with all options set.
     * @see #reset()
     */
    public void setFilters(Filter[] filters) {
        m_Filters = filters;
        reset();
    }

    /**
     * Gets the list of possible filters to choose from.
     * 
     * @return the array of Filters
     */
    public Filter[] getFilters() {
        return m_Filters;
    }

    /**
     * Returns the tip text for this property
     * 
     * @return tip text for this property suitable for displaying in the
     *         explorer/experimenter gui
     */
    public String filtersTipText() {
        return "The base filters to be used.";
    }

    /**
     * Gets a single filter from the set of available filters.
     * 
     * @param index
     *          the index of the filter wanted
     * @return the Filter
     */
    public Filter getFilter(int index) {
        return m_Filters[index];
    }

    /**
     * returns the filter classname and the options as one string
     * 
     * @param filter
     *          the filter to get the specs for
     * @return the classname plus options
     */
    protected String getFilterSpec(Filter filter) {
        String result;

        if (filter == null) {
            result = "";
        } else {
            result = filter.getClass().getName();
            if (filter instanceof OptionHandler)
                result += " " + Utils.joinOptions(((OptionHandler) filter).getOptions());
        }

        return result;
    }

    /**
     * tests whether all the enclosed filters are streamable
     * 
     * @return true if all the enclosed filters are streamable
     */
    public boolean isStreamableFilter() {
        int i;

        if (!m_StreamableChecked) {
            m_Streamable = true;
            m_StreamableChecked = true;

            for (i = 0; i < getFilters().length; i++) {
                if (getFilter(i) instanceof MultiFilter)
                    m_Streamable = ((MultiFilter) getFilter(i)).isStreamableFilter();
                else if (getFilter(i) instanceof StreamableFilter)
                    m_Streamable = true;
                else
                    m_Streamable = false;

                if (!m_Streamable)
                    break;
            }

            if (getDebug())
                System.out.println("Streamable: " + m_Streamable);
        }

        return m_Streamable;
    }

    /**
     * Returns true if the output format is immediately available after the input
     * format has been set and not only after all the data has been seen (see
     * batchFinished()). This method should normally return true for a stream
     * filter, since the data will be processed in a batch manner instead (or at
     * least for the second batch of files, see m_FirstBatchDone).
     * 
     * @return true if the output format is immediately available
     * @see #batchFinished()
     * @see #setInputFormat(Instances)
     * @see #m_FirstBatchDone
     */
    protected boolean hasImmediateOutputFormat() {
        return isStreamableFilter();
    }

    /**
     * Determines the output format based on the input format and returns this. In
     * case the output format cannot be returned immediately, i.e.,
     * hasImmediateOutputFormat() returns false, then this method will called from
     * batchFinished() after the call of preprocess(Instances), in which, e.g.,
     * statistics for the actual processing step can be gathered.
     * 
     * @param inputFormat
     *          the input format to base the output format on
     * @return the output format
     * @throws Exception
     *           in case the determination goes wrong
     * @see #hasImmediateOutputFormat()
     * @see #batchFinished()
     * @see #preprocess(Instances)
     */
    protected Instances determineOutputFormat(Instances inputFormat) throws Exception {
        Instances result;
        int i;

        result = getInputFormat();

        for (i = 0; i < getFilters().length; i++) {
            if (!isFirstBatchDone())
                getFilter(i).setInputFormat(result);
            result = getFilter(i).getOutputFormat();
        }

        return result;
    }

    /**
     * processes the given instance (may change the provided instance) and returns
     * the modified version.
     * 
     * @param instance
     *          the instance to process
     * @return the modified data
     * @throws Exception
     *           in case the processing goes wrong
     */
    protected Instance process(Instance instance) throws Exception {
        Instance result;
        int i;

        result = (Instance) instance.copy();

        for (i = 0; i < getFilters().length; i++) {
            getFilter(i).input(result);
            result = getFilter(i).output();
        }

        return result;
    }

    /**
     * Processes the given data (may change the provided dataset) and returns the
     * modified version. This method is called in batchFinished(). This
     * implementation only calls process(Instance) for each instance in the given
     * dataset.
     * 
     * @param instances
     *          the data to process
     * @return the modified data
     * @throws Exception
     *           in case the processing goes wrong
     * @see #batchFinished()
     * @see #process(Instance)
     */
    protected Instances process(Instances instances) throws Exception {
        Instances result;
        int i;

        result = instances;

        for (i = 0; i < getFilters().length; i++) {
            System.out.println("tryingn to do filter: " + getFilter(i).getClass().getSimpleName());
            if (!isFirstBatchDone())
                getFilter(i).setInputFormat(result);

            result = Filter.useFilter(result, getFilter(i));
        }

        return result;
    }

    /**
     * Returns the revision string.
     * 
     * @return the revision
     */
    public String getRevision() {
        return RevisionUtils.extract("$Revision: 1.7 $");
    }

    /**
     * Main method for executing this class.
     * 
     * @param args
     *          should contain arguments for the filter: use -h for help
     */
    public static void main(String[] args) {
        runFilter(new MultiFilter(), args);
    }
}