mulan.classifier.meta.HOMER.java Source code

Java tutorial

Introduction

Here is the source code for mulan.classifier.meta.HOMER.java

Source

/*
 *    This program is free software; you can redistribute it and/or modify
 *    it under the terms of the GNU General Public License as published by
 *    the Free Software Foundation; either version 2 of the License, or
 *    (at your option) any later version.
 *
 *    This program is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    GNU General Public License for more details.
 *
 *    You should have received a copy of the GNU General Public License
 *    along with this program; if not, write to the Free Software
 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */

/*
 *    HOMER.java
 *    Copyright (C) 2009-2010 Aristotle University of Thessaloniki, Thessaloniki, Greece
 */
package mulan.classifier.meta;

import java.util.Set;
import mulan.classifier.MultiLabelLearner;
import mulan.classifier.MultiLabelOutput;
import mulan.data.MultiLabelInstances;
import mulan.data.DataUtils;
import mulan.classifier.meta.HierarchyBuilder.Method;
import mulan.data.LabelsMetaData;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.TechnicalInformation;
import weka.core.TechnicalInformation.*;

/**
 * <!-- globalinfo-start -->
 *
 * <pre>
 * Class implementing the HOMER algorithm
 * </pre>
 *
 * For more information:
 *
 * <pre>
 * G. Tsoumakas, I. Katakis, I. Vlahavas, "Effective and Efficient Multilabel
 * Classification in Domains with Large Number of Labels", Proc. ECML/PKDD 2008
 * Workshop on Mining Multidimensional Data (MMD'08), Antwerp, Belgium, 2008.
 * </pre>
 *f
 * <!-- globalinfo-end -->
 *
 * <!-- technical-bibtex-start --> BibTeX:
 * 
 * <!-- technical-bibtex-end -->
 *
 * @author Grigorios Tsoumakas
 * @revisions 0.01
 */
public class HOMER extends MultiLabelMetaLearner {

    private final int numClusters;
    private HMC hmc;
    private HierarchyBuilder hb;
    private Instances header;
    private Method method;
    private MultiLabelInstances m;
    private int numMetaLabels;

    public HOMER(MultiLabelLearner mll, int clusters, Method method) {
        super(mll);
        this.method = method;
        numClusters = clusters;
    }

    @Override
    protected void buildInternal(MultiLabelInstances trainingSet) throws Exception {
        debug("Learning the hierarchy of models");
        System.out.println("Learning the hierarchy of models");
        hb = new HierarchyBuilder(numClusters, method);
        LabelsMetaData labelHierarchy = hb.buildLabelHierarchy(trainingSet);

        debug("Constructing the hierarchical multilabel dataset");
        System.out.println("Constructing the hierarchical multilabel dataset");
        MultiLabelInstances meta = HierarchyBuilder.createHierarchicalDataset(trainingSet, labelHierarchy);
        header = new Instances(meta.getDataSet(), 0);

        debug("Training the hierarchical classifier");
        System.out.println("Training the hierarchical classifier");
        hmc = new HMC(baseLearner);
        hmc.setDebug(getDebug());
        hmc.build(meta);

        Set<String> leafLabels = trainingSet.getLabelsMetaData().getLabelNames();
        Set<String> metaLabels = labelHierarchy.getLabelNames();
        for (String string : leafLabels) {
            metaLabels.remove(string);
        }
        numMetaLabels = metaLabels.size();
    }

    protected MultiLabelOutput makePredictionInternal(Instance instance) throws Exception {
        Instance transformed = DataUtils.createInstance(instance, instance.weight(), instance.toDoubleArray());
        for (int i = 0; i < numMetaLabels; i++) {
            transformed.insertAttributeAt(transformed.numAttributes());
        }

        transformed.setDataset(header);
        MultiLabelOutput mlo = hmc.makePrediction(transformed);
        boolean[] oldBipartition = mlo.getBipartition();
        //System.out.println("old:" + Arrays.toString(oldBipartition));
        boolean[] newBipartition = new boolean[numLabels];
        System.arraycopy(oldBipartition, 0, newBipartition, 0, numLabels);
        //System.out.println("new:" + Arrays.toString(newBipartition));
        double[] oldConfidences = mlo.getConfidences();
        double[] newConfidences = new double[numLabels];
        System.arraycopy(oldConfidences, 0, newConfidences, 0, numLabels);
        MultiLabelOutput newMLO = new MultiLabelOutput(newBipartition, newConfidences);
        return newMLO;
    }

    @Override
    public TechnicalInformation getTechnicalInformation() {
        TechnicalInformation result = new TechnicalInformation(Type.INPROCEEDINGS);
        result.setValue(Field.AUTHOR, "Grigorios Tsoumakas and Ioannis Katakis and Ioannis Vlahavas");
        result.setValue(Field.TITLE,
                "Effective and Efficient Multilabel Classification in Domains with Large Number of Labels");
        result.setValue(Field.BOOKTITLE, "Proc. ECML/PKDD 2008 Workshop on Mining Multidimensional Data (MMD'08)");
        result.setValue(Field.LOCATION, "Antwerp, Belgium");
        result.setValue(Field.YEAR, "2008");
        return result;
    }

    //spark temporary edit for complexity measures   
    public long getNoNodes() {
        return hmc.getNoNodes();
    }

    public long getNoClassifierEvals() {
        return hmc.getNoClassifierEvals();
    }

    public long getTotalUsedTrainInsts() {
        return hmc.getTotalUsedTrainInsts();
    }
}