mulan.classifier.transformation.PrunedSets.java Source code

Java tutorial

Introduction

Here is the source code for mulan.classifier.transformation.PrunedSets.java

Source

/*
 *    This program is free software; you can redistribute it and/or modify
 *    it under the terms of the GNU General Public License as published by
 *    the Free Software Foundation; either version 2 of the License, or
 *    (at your option) any later version.
 *
 *    This program is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    GNU General Public License for more details.
 *
 *    You should have received a copy of the GNU General Public License
 *    along with this program; if not, write to the Free Software
 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */

/*
 *    PrunedSets.java
 *    Copyright (C) 2009-2010 Aristotle University of Thessaloniki, Thessaloniki, Greece
 */
package mulan.classifier.transformation;

import java.util.ArrayList;

import java.util.logging.Level;
import java.util.logging.Logger;
import mulan.data.LabelSet;
import mulan.data.DataUtils;
import weka.classifiers.Classifier;
import weka.core.Instance;
import weka.core.TechnicalInformation;
import weka.core.TechnicalInformation.Field;
import weka.core.TechnicalInformation.Type;

/**
 * Class that implements the PrunedSets algorithm <p>
 *
 * @author Grigorios Tsoumakas 
 * @version June 4, 2010
 */
public class PrunedSets extends LabelsetPruning {

    /** strategies for processing infrequent labelsets*/
    public enum Strategy {

        /**
         * Strategy A: rank subsets firstly by the number of labels they
         * contain and secondly by the times they occur, then keep top b ranked
         */
        A,
        /**
         * Strategy B: keep all subsets of size greater than b
         */
        B;
    };

    /** strategy for processing infrequent labelsets */
    private Strategy strategy;
    /** parameter of strategy for processing infrequent labelsets*/
    private int b;

    /**
     * Constructor that initializes learner with base algorithm, parameter p
     * and strategy for processing infrequent labelsets
     *
     * @param classifier base single-label classification algorithm
     * @param aP number of instances required for a labelset to be included.
     * @param aStrategy strategy for processing infrequent labelsets
     * @param aB parameter of the strategy for processing infrequent labelsets
     */
    public PrunedSets(Classifier classifier, int aP, Strategy aStrategy, int aB) {
        super(classifier, aP);
        b = aB;
        strategy = aStrategy;
        setConfidenceCalculationMethod(2);
        setMakePredictionsBasedOnConfidences(false);
    }

    /**
     * Returns an instance of a TechnicalInformation object, containing
     * detailed information about the technical background of this class,
     * e.g., paper reference or book this class is based on.
     *
     * @return the technical information about this class
     */
    @Override
    public TechnicalInformation getTechnicalInformation() {
        TechnicalInformation result = new TechnicalInformation(Type.INPROCEEDINGS);
        result.setValue(Field.AUTHOR, "Read, Jesse and Pfahringer, Bernhard and Holmes, Geoff");
        result.setValue(Field.TITLE, "Multi-Label Classification using Ensembles of Pruned Sets");
        result.setValue(Field.PAGES, "995-1000");
        result.setValue(Field.BOOKTITLE, "Proc. 8th IEEE International Conference on Data Mining (ICDM 2008)");
        result.setValue(Field.YEAR, "2008");
        return result;
    }

    @Override
    ArrayList<Instance> processRejected(LabelSet ls) {
        ArrayList<LabelSet> subsets = null;
        ArrayList<Instance> instances = null;
        ArrayList<Instance> newInstances = null;
        switch (strategy) {
        case A:
            // split LabelSet into smaller ones
            //debug System.out.println("original:" + ls.toString());
            subsets = null;
            try {
                subsets = ls.getSubsets();
            } catch (Exception ex) {
                Logger.getLogger(PrunedSets.class.getName()).log(Level.SEVERE, null, ex);
            }
            //System.out.println("subsets: " + subsets.toString());
            ArrayList<LabelSet> sortedSubsets = new ArrayList<LabelSet>();
            for (LabelSet l : subsets) {
                //System.out.println(l.toString());
                // check if it exists in the training set
                if (!ListInstancePerLabel.containsKey(l)) {
                    continue;
                }
                // check if it occurs more than p times 
                if (ListInstancePerLabel.get(l).size() <= p) {
                    continue;
                }
                //
                boolean added = false;
                for (int i = 0; i < sortedSubsets.size(); i++) {
                    LabelSet l2 = sortedSubsets.get(i);
                    if (l.size() > l2.size()) {
                        sortedSubsets.add(i, l);
                        //System.out.println("adding " + l.toString());
                        added = true;
                        break;
                    }
                    if (l.size() == l2.size()
                            && ListInstancePerLabel.get(l).size() > ListInstancePerLabel.get(l2).size()) {
                        sortedSubsets.add(i, l);
                        //System.out.println("adding " + l.toString());
                        added = true;
                        break;
                    }
                }
                if (added == false) {
                    //System.out.println("adding " + l.toString());
                    sortedSubsets.add(l);
                }
                //System.out.println("sorted: " + sortedSubsets.toString());
            }
            // take the top b
            newInstances = new ArrayList<Instance>();
            instances = ListInstancePerLabel.get(ls);
            for (Instance tempInstance : instances) {
                int counter = 0;
                for (LabelSet l : sortedSubsets) {
                    double[] temp = tempInstance.toDoubleArray();
                    double[] tempLabels = l.toDoubleArray();
                    for (int i = 0; i < numLabels; i++) {
                        if (format.attribute(labelIndices[i]).value(0).equals("0"))
                            temp[labelIndices[i]] = tempLabels[i];
                        else
                            temp[labelIndices[i]] = 1 - tempLabels[i];
                    }
                    Instance newInstance = DataUtils.createInstance(tempInstance, 1, temp);
                    newInstances.add(newInstance);
                    counter++;
                    if (counter == b) {
                        break;
                    }
                }
            }
            return newInstances;
        case B:
            // split LabelSet into smaller ones
            //debug 
            System.out.println("original:" + ls.toString());
            subsets = null;
            try {
                subsets = ls.getSubsets();
            } catch (Exception ex) {
                Logger.getLogger(PrunedSets.class.getName()).log(Level.SEVERE, null, ex);
            }
            ArrayList<LabelSet> subsetsForInsertion = new ArrayList<LabelSet>();
            for (LabelSet l : subsets) {
                // check if it exists in the training set
                if (!ListInstancePerLabel.containsKey(l)) {
                    continue;
                }
                // check if it occurs more than p times
                if (ListInstancePerLabel.get(l).size() <= p) {
                    continue;
                }
                // check if it has more than b elements
                if (l.size() <= b) {
                    continue;
                }
                subsetsForInsertion.add(l);
            }

            // insert subsetsForInsertion with corresponding instances
            // from the original labelset
            instances = ListInstancePerLabel.get(ls);
            newInstances = new ArrayList<Instance>();
            for (Instance tempInstance : instances) {
                for (LabelSet l : subsetsForInsertion) {
                    double[] temp = tempInstance.toDoubleArray();
                    double[] tempLabels = l.toDoubleArray();
                    for (int i = 0; i < numLabels; i++) {
                        if (format.attribute(labelIndices[i]).value(0).equals("0"))
                            temp[labelIndices[i]] = tempLabels[i];
                        else
                            temp[labelIndices[i]] = 1 - tempLabels[i];
                    }
                    Instance newInstance = DataUtils.createInstance(tempInstance, 1, temp);
                    newInstances.add(newInstance);
                }
            }
            System.out.println("finished");
            return newInstances;
        default:
            return null;
        }
    }
}