Example usage for weka.filters.unsupervised.instance SparseToNonSparse SparseToNonSparse

List of usage examples for weka.filters.unsupervised.instance SparseToNonSparse SparseToNonSparse

Introduction

In this page you can find the example usage for weka.filters.unsupervised.instance SparseToNonSparse SparseToNonSparse.

Prototype

SparseToNonSparse

Source Link

Usage

From source file:etc.aloe.oilspill2010.FeatureGenerationImpl.java

License:Open Source License

protected Filter getSparseToNonsparseFilter(ExampleSet examples) throws Exception {
    SparseToNonSparse filter = new SparseToNonSparse();

    filter.setInputFormat(examples.getInstances());
    Instances filtered = Filter.useFilter(examples.getInstances(), filter);
    examples.setInstances(filtered);/*from w  w  w.j a va  2s. c o  m*/

    return filter;
}

From source file:meka.classifiers.multilabel.Maniac.java

License:Open Source License

@Override
public Instances transformLabels(Instances D) throws Exception {
    // crazy scala-specific stuff that is necessary to access
    // "static" methods from java
    org.kramerlab.autoencoder.package$ autoencoderStatics = org.kramerlab.autoencoder.package$.MODULE$;

    org.kramerlab.autoencoder.wekacompatibility.package$ wekaStatics = org.kramerlab.autoencoder.wekacompatibility.package$.MODULE$;

    org.kramerlab.autoencoder.experiments.package$ experimentsStatics = org.kramerlab.autoencoder.experiments.package$.MODULE$;

    int topiter = -1;

    // the optimization is a bit special, since we learn a stream
    // of autoencoders, no need to start from scratch, we just add layers
    if (this.isOptimizeAE()) {
        Instances train = D.trainCV(3, 1);
        Instances test = D.testCV(3, 1);
        Instances labels = this.extractPart(train, true);

        // first convert the arff into non sparse form
        SparseToNonSparse spfilter = new SparseToNonSparse();
        spfilter.setInputFormat(labels);
        Instances aeData = Filter.useFilter(labels, spfilter);

        // now convert it into a format suitable for the autoencoder
        Mat data = wekaStatics.instancesToMat(aeData);

        Iterable<Autoencoder> autoencoders = autoencoderStatics.deepAutoencoderStream_java(
                autoencoderStatics.Sigmoid(), // type of neurons.
                // Sigmoid is ok
                this.getNumberAutoencoders(), // number of autoencoders = (max hidden layers + 1) /
                // 2
                this.getCompression(), // compression from k-th layer to (k+1)-th layer
                data, // training data 
                true, // true = L2 Error, false = CrossEntropy
                autoencoderStatics.HintonsMiraculousStrategy(), true, autoencoderStatics.NoObservers());

        // test each autoencoder, select the best classifier
        double bestAccuracy = Double.NEGATIVE_INFINITY;
        int iteratorcount = 0;
        topiter = 0;/*  ww w. ja  v a2  s . c om*/
        for (Autoencoder a : autoencoders) {
            iteratorcount++;

            Maniac candidate = new Maniac();
            candidate.setOptimizeAE(false);
            candidate.setNumberAutoencoders(this.getNumberAutoencoders());
            candidate.setCompression(this.getCompression());
            candidate.setClassifier(this.getClassifier());

            candidate.setAE(a);

            Result res = Evaluation.evaluateModel(candidate, train, test);
            double curac = (Double) res.getValue("Accuracy");

            if (bestAccuracy < curac) {
                bestAccuracy = curac;
                topiter = iteratorcount;
            }
        }
    }
    Instances features = this.extractPart(D, false);
    Instances labels = this.extractPart(D, true);

    // first convert the arff into non sparse form
    SparseToNonSparse spfilter = new SparseToNonSparse();
    spfilter.setInputFormat(labels);
    Instances aeData = Filter.useFilter(labels, spfilter);

    // now convert it into a format suitable for the autoencoder
    Mat data = wekaStatics.instancesToMat(aeData);

    if (this.getAE() == null) {
        Iterable<Autoencoder> autoencoders = autoencoderStatics.deepAutoencoderStream_java(
                autoencoderStatics.Sigmoid(), // type of neurons.
                // Sigmoid is ok
                this.getNumberAutoencoders(), // number of autoencoders = (max hidden layers + 1) /
                // 2
                this.getCompression(), // compression from k-th layer to (k+1)-th layer
                data, // training data 
                true, // true = L2 Error, false = CrossEntropy
                autoencoderStatics.HintonsMiraculousStrategy(), true, autoencoderStatics.NoObservers());
        int itercount = 0;
        for (Autoencoder a : autoencoders) {
            itercount++;
            if (topiter > 0 && itercount == topiter || itercount == this.getNumberAutoencoders()) {
                this.setAE(a);
                break;
            }
        }
    }

    Mat compressed = this.getAE().compress(data);
    Instances compressedLabels = wekaStatics.matToInstances(compressed);

    // remember the labels to use for the prediction step,
    this.compressedTemplateInst = new Instances(compressedLabels);

    Instances result = Instances.mergeInstances(compressedLabels, features);

    result.setClassIndex(compressedLabels.numAttributes());

    return result;
}

From source file:mulan.classifier.clus.ClusWrapperClassification.java

License:Open Source License

/**
 * Takes a dataset as a MultiLabelInstances object and writes an arff file that is compliant with CLUS.
 * // w ww  .j  a  v a2 s . c o  m
 * @param mlDataset the dataset as a MultiLabelInstances object
 * @param fileName the name of the generated arff file
 * @throws Exception Potential exception thrown. To be handled in an upper level.
 */
public static void makeClusCompliant(MultiLabelInstances mlDataset, String fileName) throws Exception {
    BufferedWriter out = new BufferedWriter(new FileWriter(new File(fileName)));

    // the file will be written in the datasetPath directory
    // Instances dataset = mlDataset.getDataSet();
    // any changes are applied to a copy of the original dataset
    Instances dataset = new Instances(mlDataset.getDataSet());
    SparseToNonSparse stns = new SparseToNonSparse(); // new instance of filter
    stns.setInputFormat(dataset); // inform filter about dataset **AFTER** setting options
    Instances nonSparseDataset = Filter.useFilter(dataset, stns); // apply filter

    String header = new Instances(nonSparseDataset, 0).toString();
    // preprocess the header
    // remove ; characters and truncate long attribute names
    String[] headerLines = header.split("\n");
    for (int i = 0; i < headerLines.length; i++) {
        if (headerLines[i].startsWith("@attribute")) {
            headerLines[i] = headerLines[i].replaceAll(";", "SEMI_COLON");
            String originalAttributeName = headerLines[i].split(" ")[1];
            String newAttributeName = originalAttributeName;
            if (originalAttributeName.length() > 30) {
                newAttributeName = originalAttributeName.substring(0, 30) + "..";
            }
            out.write(headerLines[i].replace(originalAttributeName, newAttributeName) + "\n");
        } else {
            out.write(headerLines[i] + "\n");
        }
    }
    for (int i = 0; i < nonSparseDataset.numInstances(); i++) {
        if (i % 100 == 0) {
            out.flush();
        }
        out.write(nonSparseDataset.instance(i) + "\n");
    }
    out.close();
}

From source file:org.kramerlab.mlcbmad.classifier.MLCBMaD.java

License:Open Source License

/**
 * Builds the classifier./*w w w.j a  va 2  s  .c o  m*/
 * 
 * @param trainingSet
 * @throws Exception
 */
protected void buildInternal(MultiLabelInstances trainingSet) throws Exception {

    // This step is necessary as there are problems with the 
    // attribute indexes in WEKA when merging instances
    Instances train = this.copyInstances(trainingSet.getDataSet());

    debug("Learning model...");
    debug("Parameter Setting k = " + k + " and t = " + t + " ...");

    // remove the features, so we make a matrix decomposition only of
    // the labels

    Remove rem0 = new Remove();
    int[] features0 = trainingSet.getFeatureIndices();
    rem0.setAttributeIndicesArray(features0);
    rem0.setInputFormat(train);
    train = Filter.useFilter(train, rem0);

    Instances decompData;

    // lets do the decomposition

    // first save the arff in non sparse form

    SparseToNonSparse spfilter = new SparseToNonSparse();
    spfilter.setInputFormat(train);
    Instances out = Filter.useFilter(train, spfilter);

    BooleanMatrixDecomposition bmd = BooleanMatrixDecomposition.BEST_CONFIGURED(this.t);
    Tuple<Instances, Instances> res = bmd.decompose(out, this.k);

    decompData = res._1;
    uppermatrix = res._2;

    // get indices

    decomp = decompData;

    int[] features = trainingSet.getFeatureIndices();

    int[] decompindices = new int[decompData.numAttributes()];

    int countf = 0;
    for (int i = features.length; i < (decompData.numAttributes() + features.length); i++) {
        decompindices[countf] = i;
        countf++;
    }
    labelsdecomp = decompindices;

    // get features from training set

    Instances copied = this.copyInstances(trainingSet.getDataSet());

    Remove rem = new Remove();

    rem.setAttributeIndicesArray(features);
    rem.setInvertSelection(true);
    rem.setInputFormat(copied);

    Instances onlyFeatures = Filter.useFilter(copied, rem);

    // merge features with matrix decomposition

    if (onlyFeatures.numInstances() != decompData.numInstances()) {
        //sthg went wrong when decomposing
        throw new Exception("Problem when decomposing");
    }

    featuresAndDecomp = Instances.mergeInstances(onlyFeatures, this.copyInstances(decompData));

    Instances trainset = featuresAndDecomp;

    LabelsMetaDataImpl trainlmd = new LabelsMetaDataImpl();
    for (int lab : labelsdecomp) {
        LabelNode lni = new LabelNodeImpl(trainset.attribute(lab).name());
        trainlmd.addRootNode(lni);
    }

    MultiLabelInstances trainMulti = new MultiLabelInstances(trainset, trainlmd);

    // build br for decomposed label prediction

    basebr = new BinaryRelevance(baseClassifier);

    basebr.build(trainMulti);

    debug("Model trained... all done.");

}