Example usage for weka.filters.unsupervised.attribute Remove batchFinished

List of usage examples for weka.filters.unsupervised.attribute Remove batchFinished

Introduction

In this page you can find the example usage for weka.filters.unsupervised.attribute Remove batchFinished.

Prototype

public boolean batchFinished() throws Exception 

Source Link

Document

Signify that this batch of input to the filter is finished.

Usage

From source file:core.ClusterEvaluationEX.java

License:Open Source License

/**
 * Evaluates a clusterer with the options given in an array of
 * strings. It takes the string indicated by "-t" as training file, the
 * string indicated by "-T" as test file.
 * If the test file is missing, a stratified ten-fold
 * cross-validation is performed (distribution clusterers only).
 * Using "-x" you can change the number of
 * folds to be used, and using "-s" the random seed.
 * If the "-p" option is present it outputs the classification for
 * each test instance. If you provide the name of an object file using
 * "-l", a clusterer will be loaded from the given file. If you provide the
 * name of an object file using "-d", the clusterer built from the
 * training data will be saved to the given file.
 *
 * @param clusterer machine learning clusterer
 * @param options the array of string containing the options
 * @throws Exception if model could not be evaluated successfully
 * @return a string describing the results 
 *//*  ww  w .  java 2 s  .  com*/
public static String evaluateClusterer(Clusterer clusterer, String[] options) throws Exception {

    int seed = 1, folds = 10;
    boolean doXval = false;
    Instances train = null;
    Random random;
    String trainFileName, testFileName, seedString, foldsString;
    String objectInputFileName, objectOutputFileName, attributeRangeString;
    String graphFileName;
    String[] savedOptions = null;
    boolean printClusterAssignments = false;
    Range attributesToOutput = null;
    StringBuffer text = new StringBuffer();
    int theClass = -1; // class based evaluation of clustering
    boolean updateable = (clusterer instanceof UpdateableClusterer);
    DataSource source = null;
    Instance inst;

    if (Utils.getFlag('h', options) || Utils.getFlag("help", options)) {

        // global info requested as well?
        boolean globalInfo = Utils.getFlag("synopsis", options) || Utils.getFlag("info", options);

        throw new Exception("Help requested." + makeOptionString(clusterer, globalInfo));
    }

    try {
        // Get basic options (options the same for all clusterers
        //printClusterAssignments = Utils.getFlag('p', options);
        objectInputFileName = Utils.getOption('l', options);
        objectOutputFileName = Utils.getOption('d', options);
        trainFileName = Utils.getOption('t', options);
        testFileName = Utils.getOption('T', options);
        graphFileName = Utils.getOption('g', options);

        // Check -p option
        try {
            attributeRangeString = Utils.getOption('p', options);
        } catch (Exception e) {
            throw new Exception(e.getMessage() + "\nNOTE: the -p option has changed. "
                    + "It now expects a parameter specifying a range of attributes "
                    + "to list with the predictions. Use '-p 0' for none.");
        }
        if (attributeRangeString.length() != 0) {
            printClusterAssignments = true;
            if (!attributeRangeString.equals("0"))
                attributesToOutput = new Range(attributeRangeString);
        }

        if (trainFileName.length() == 0) {
            if (objectInputFileName.length() == 0) {
                throw new Exception("No training file and no object " + "input file given.");
            }

            if (testFileName.length() == 0) {
                throw new Exception("No training file and no test file given.");
            }
        } else {
            if ((objectInputFileName.length() != 0) && (printClusterAssignments == false)) {
                throw new Exception("Can't use both train and model file " + "unless -p specified.");
            }
        }

        seedString = Utils.getOption('s', options);

        if (seedString.length() != 0) {
            seed = Integer.parseInt(seedString);
        }

        foldsString = Utils.getOption('x', options);

        if (foldsString.length() != 0) {
            folds = Integer.parseInt(foldsString);
            doXval = true;
        }
    } catch (Exception e) {
        throw new Exception('\n' + e.getMessage() + makeOptionString(clusterer, false));
    }

    try {
        if (trainFileName.length() != 0) {
            source = new DataSource(trainFileName);
            train = source.getStructure();

            String classString = Utils.getOption('c', options);
            if (classString.length() != 0) {
                if (classString.compareTo("last") == 0)
                    theClass = train.numAttributes();
                else if (classString.compareTo("first") == 0)
                    theClass = 1;
                else
                    theClass = Integer.parseInt(classString);

                if (theClass != -1) {
                    if (doXval || testFileName.length() != 0)
                        throw new Exception("Can only do class based evaluation on the " + "training data");

                    if (objectInputFileName.length() != 0)
                        throw new Exception("Can't load a clusterer and do class based " + "evaluation");

                    if (objectOutputFileName.length() != 0)
                        throw new Exception("Can't do class based evaluation and save clusterer");
                }
            } else {
                // if the dataset defines a class attribute, use it
                if (train.classIndex() != -1) {
                    theClass = train.classIndex() + 1;
                    System.err
                            .println("Note: using class attribute from dataset, i.e., attribute #" + theClass);
                }
            }

            if (theClass != -1) {
                if (theClass < 1 || theClass > train.numAttributes())
                    throw new Exception("Class is out of range!");

                if (!train.attribute(theClass - 1).isNominal())
                    throw new Exception("Class must be nominal!");

                train.setClassIndex(theClass - 1);
            }
        }
    } catch (Exception e) {
        throw new Exception("ClusterEvaluation: " + e.getMessage() + '.');
    }

    // Save options
    if (options != null) {
        savedOptions = new String[options.length];
        System.arraycopy(options, 0, savedOptions, 0, options.length);
    }

    if (objectInputFileName.length() != 0)
        Utils.checkForRemainingOptions(options);

    // Set options for clusterer
    if (clusterer instanceof OptionHandler)
        ((OptionHandler) clusterer).setOptions(options);

    Utils.checkForRemainingOptions(options);

    Instances trainHeader = train;
    if (objectInputFileName.length() != 0) {
        // Load the clusterer from file
        //      clusterer = (Clusterer) SerializationHelper.read(objectInputFileName);
        java.io.ObjectInputStream ois = new java.io.ObjectInputStream(
                new java.io.BufferedInputStream(new java.io.FileInputStream(objectInputFileName)));
        clusterer = (Clusterer) ois.readObject();
        // try and get the training header
        try {
            trainHeader = (Instances) ois.readObject();
        } catch (Exception ex) {
            // don't moan if we cant
        }
    } else {
        // Build the clusterer if no object file provided
        if (theClass == -1) {
            if (updateable) {
                clusterer.buildClusterer(source.getStructure());
                while (source.hasMoreElements(train)) {
                    inst = source.nextElement(train);
                    ((UpdateableClusterer) clusterer).updateClusterer(inst);
                }
                ((UpdateableClusterer) clusterer).updateFinished();
            } else {
                clusterer.buildClusterer(source.getDataSet());
            }
        } else {
            Remove removeClass = new Remove();
            removeClass.setAttributeIndices("" + theClass);
            removeClass.setInvertSelection(false);
            removeClass.setInputFormat(train);
            if (updateable) {
                Instances clusterTrain = Filter.useFilter(train, removeClass);
                clusterer.buildClusterer(clusterTrain);
                trainHeader = clusterTrain;
                while (source.hasMoreElements(train)) {
                    inst = source.nextElement(train);
                    removeClass.input(inst);
                    removeClass.batchFinished();
                    Instance clusterTrainInst = removeClass.output();
                    ((UpdateableClusterer) clusterer).updateClusterer(clusterTrainInst);
                }
                ((UpdateableClusterer) clusterer).updateFinished();
            } else {
                Instances clusterTrain = Filter.useFilter(source.getDataSet(), removeClass);
                clusterer.buildClusterer(clusterTrain);
                trainHeader = clusterTrain;
            }
            ClusterEvaluationEX ce = new ClusterEvaluationEX();
            ce.setClusterer(clusterer);
            ce.evaluateClusterer(train, trainFileName);

            return "\n\n=== Clustering stats for training data ===\n\n" + ce.clusterResultsToString();
        }
    }

    /* Output cluster predictions only (for the test data if specified,
       otherwise for the training data */
    if (printClusterAssignments) {
        return printClusterings(clusterer, trainFileName, testFileName, attributesToOutput);
    }

    text.append(clusterer.toString());
    text.append(
            "\n\n=== Clustering stats for training data ===\n\n" + printClusterStats(clusterer, trainFileName));

    if (testFileName.length() != 0) {
        // check header compatibility
        DataSource test = new DataSource(testFileName);
        Instances testStructure = test.getStructure();
        if (!trainHeader.equalHeaders(testStructure)) {
            throw new Exception("Training and testing data are not compatible\n");
        }

        text.append("\n\n=== Clustering stats for testing data ===\n\n"
                + printClusterStats(clusterer, testFileName));
    }

    if ((clusterer instanceof DensityBasedClusterer) && (doXval == true) && (testFileName.length() == 0)
            && (objectInputFileName.length() == 0)) {
        // cross validate the log likelihood on the training data
        random = new Random(seed);
        random.setSeed(seed);
        train = source.getDataSet();
        train.randomize(random);
        text.append(crossValidateModel(clusterer.getClass().getName(), train, folds, savedOptions, random));
    }

    // Save the clusterer if an object output file is provided
    if (objectOutputFileName.length() != 0) {
        //SerializationHelper.write(objectOutputFileName, clusterer);
        saveClusterer(objectOutputFileName, clusterer, trainHeader);
    }

    // If classifier is drawable output string describing graph
    if ((clusterer instanceof Drawable) && (graphFileName.length() != 0)) {
        BufferedWriter writer = new BufferedWriter(new FileWriter(graphFileName));
        writer.write(((Drawable) clusterer).graph());
        writer.newLine();
        writer.flush();
        writer.close();
    }

    return text.toString();
}

From source file:mulan.transformations.regression.ChainTransformation.java

License:Open Source License

/**
 * Transforms a single Instance in the same way as
 * {@link #transformInstance(Instance, int[], int)} transforms an Instances object.
 * /*from w w w  .  j  a va 2s .c o  m*/
 * @param instance the input instance
 * @param chain a chain (permutation) of the indices of the target attributes
 * @param numTargetsToKeep the number of target attributes from the beginning of the chain that
 *            should be kept, 1&lt;=numTargetsToKeep&lt;=numOfTargets
 * @return the transformed Instance object. The input object is not modified.
 * @throws Exception Potential exception thrown. To be handled in an upper level.
 */
public static Instance transformInstance(Instance instance, int[] chain, int numTargetsToKeep)
        throws Exception {
    int numOfTargets = chain.length;
    // Indices of attributes to remove
    int[] indicesToRemove = new int[numOfTargets - numTargetsToKeep];
    for (int i = 0; i < numOfTargets - numTargetsToKeep; i++) {
        indicesToRemove[i] = chain[numTargetsToKeep + i];
    }
    Remove remove = new Remove();
    remove.setAttributeIndicesArray(indicesToRemove);
    remove.setInputFormat(instance.dataset());
    remove.input(instance);
    remove.batchFinished();
    Instance transformed = remove.output();

    return transformed;
}