List of usage examples for weka.filters.unsupervised.instance SparseToNonSparse SparseToNonSparse
SparseToNonSparse
From source file:etc.aloe.oilspill2010.FeatureGenerationImpl.java
License:Open Source License
protected Filter getSparseToNonsparseFilter(ExampleSet examples) throws Exception { SparseToNonSparse filter = new SparseToNonSparse(); filter.setInputFormat(examples.getInstances()); Instances filtered = Filter.useFilter(examples.getInstances(), filter); examples.setInstances(filtered);/*from w w w.j a va 2s. c o m*/ return filter; }
From source file:meka.classifiers.multilabel.Maniac.java
License:Open Source License
@Override public Instances transformLabels(Instances D) throws Exception { // crazy scala-specific stuff that is necessary to access // "static" methods from java org.kramerlab.autoencoder.package$ autoencoderStatics = org.kramerlab.autoencoder.package$.MODULE$; org.kramerlab.autoencoder.wekacompatibility.package$ wekaStatics = org.kramerlab.autoencoder.wekacompatibility.package$.MODULE$; org.kramerlab.autoencoder.experiments.package$ experimentsStatics = org.kramerlab.autoencoder.experiments.package$.MODULE$; int topiter = -1; // the optimization is a bit special, since we learn a stream // of autoencoders, no need to start from scratch, we just add layers if (this.isOptimizeAE()) { Instances train = D.trainCV(3, 1); Instances test = D.testCV(3, 1); Instances labels = this.extractPart(train, true); // first convert the arff into non sparse form SparseToNonSparse spfilter = new SparseToNonSparse(); spfilter.setInputFormat(labels); Instances aeData = Filter.useFilter(labels, spfilter); // now convert it into a format suitable for the autoencoder Mat data = wekaStatics.instancesToMat(aeData); Iterable<Autoencoder> autoencoders = autoencoderStatics.deepAutoencoderStream_java( autoencoderStatics.Sigmoid(), // type of neurons. // Sigmoid is ok this.getNumberAutoencoders(), // number of autoencoders = (max hidden layers + 1) / // 2 this.getCompression(), // compression from k-th layer to (k+1)-th layer data, // training data true, // true = L2 Error, false = CrossEntropy autoencoderStatics.HintonsMiraculousStrategy(), true, autoencoderStatics.NoObservers()); // test each autoencoder, select the best classifier double bestAccuracy = Double.NEGATIVE_INFINITY; int iteratorcount = 0; topiter = 0;/* ww w. ja v a2 s . c om*/ for (Autoencoder a : autoencoders) { iteratorcount++; Maniac candidate = new Maniac(); candidate.setOptimizeAE(false); candidate.setNumberAutoencoders(this.getNumberAutoencoders()); candidate.setCompression(this.getCompression()); candidate.setClassifier(this.getClassifier()); candidate.setAE(a); Result res = Evaluation.evaluateModel(candidate, train, test); double curac = (Double) res.getValue("Accuracy"); if (bestAccuracy < curac) { bestAccuracy = curac; topiter = iteratorcount; } } } Instances features = this.extractPart(D, false); Instances labels = this.extractPart(D, true); // first convert the arff into non sparse form SparseToNonSparse spfilter = new SparseToNonSparse(); spfilter.setInputFormat(labels); Instances aeData = Filter.useFilter(labels, spfilter); // now convert it into a format suitable for the autoencoder Mat data = wekaStatics.instancesToMat(aeData); if (this.getAE() == null) { Iterable<Autoencoder> autoencoders = autoencoderStatics.deepAutoencoderStream_java( autoencoderStatics.Sigmoid(), // type of neurons. // Sigmoid is ok this.getNumberAutoencoders(), // number of autoencoders = (max hidden layers + 1) / // 2 this.getCompression(), // compression from k-th layer to (k+1)-th layer data, // training data true, // true = L2 Error, false = CrossEntropy autoencoderStatics.HintonsMiraculousStrategy(), true, autoencoderStatics.NoObservers()); int itercount = 0; for (Autoencoder a : autoencoders) { itercount++; if (topiter > 0 && itercount == topiter || itercount == this.getNumberAutoencoders()) { this.setAE(a); break; } } } Mat compressed = this.getAE().compress(data); Instances compressedLabels = wekaStatics.matToInstances(compressed); // remember the labels to use for the prediction step, this.compressedTemplateInst = new Instances(compressedLabels); Instances result = Instances.mergeInstances(compressedLabels, features); result.setClassIndex(compressedLabels.numAttributes()); return result; }
From source file:mulan.classifier.clus.ClusWrapperClassification.java
License:Open Source License
/** * Takes a dataset as a MultiLabelInstances object and writes an arff file that is compliant with CLUS. * // w ww .j a v a2 s . c o m * @param mlDataset the dataset as a MultiLabelInstances object * @param fileName the name of the generated arff file * @throws Exception Potential exception thrown. To be handled in an upper level. */ public static void makeClusCompliant(MultiLabelInstances mlDataset, String fileName) throws Exception { BufferedWriter out = new BufferedWriter(new FileWriter(new File(fileName))); // the file will be written in the datasetPath directory // Instances dataset = mlDataset.getDataSet(); // any changes are applied to a copy of the original dataset Instances dataset = new Instances(mlDataset.getDataSet()); SparseToNonSparse stns = new SparseToNonSparse(); // new instance of filter stns.setInputFormat(dataset); // inform filter about dataset **AFTER** setting options Instances nonSparseDataset = Filter.useFilter(dataset, stns); // apply filter String header = new Instances(nonSparseDataset, 0).toString(); // preprocess the header // remove ; characters and truncate long attribute names String[] headerLines = header.split("\n"); for (int i = 0; i < headerLines.length; i++) { if (headerLines[i].startsWith("@attribute")) { headerLines[i] = headerLines[i].replaceAll(";", "SEMI_COLON"); String originalAttributeName = headerLines[i].split(" ")[1]; String newAttributeName = originalAttributeName; if (originalAttributeName.length() > 30) { newAttributeName = originalAttributeName.substring(0, 30) + ".."; } out.write(headerLines[i].replace(originalAttributeName, newAttributeName) + "\n"); } else { out.write(headerLines[i] + "\n"); } } for (int i = 0; i < nonSparseDataset.numInstances(); i++) { if (i % 100 == 0) { out.flush(); } out.write(nonSparseDataset.instance(i) + "\n"); } out.close(); }
From source file:org.kramerlab.mlcbmad.classifier.MLCBMaD.java
License:Open Source License
/** * Builds the classifier./*w w w.j a va 2 s .c o m*/ * * @param trainingSet * @throws Exception */ protected void buildInternal(MultiLabelInstances trainingSet) throws Exception { // This step is necessary as there are problems with the // attribute indexes in WEKA when merging instances Instances train = this.copyInstances(trainingSet.getDataSet()); debug("Learning model..."); debug("Parameter Setting k = " + k + " and t = " + t + " ..."); // remove the features, so we make a matrix decomposition only of // the labels Remove rem0 = new Remove(); int[] features0 = trainingSet.getFeatureIndices(); rem0.setAttributeIndicesArray(features0); rem0.setInputFormat(train); train = Filter.useFilter(train, rem0); Instances decompData; // lets do the decomposition // first save the arff in non sparse form SparseToNonSparse spfilter = new SparseToNonSparse(); spfilter.setInputFormat(train); Instances out = Filter.useFilter(train, spfilter); BooleanMatrixDecomposition bmd = BooleanMatrixDecomposition.BEST_CONFIGURED(this.t); Tuple<Instances, Instances> res = bmd.decompose(out, this.k); decompData = res._1; uppermatrix = res._2; // get indices decomp = decompData; int[] features = trainingSet.getFeatureIndices(); int[] decompindices = new int[decompData.numAttributes()]; int countf = 0; for (int i = features.length; i < (decompData.numAttributes() + features.length); i++) { decompindices[countf] = i; countf++; } labelsdecomp = decompindices; // get features from training set Instances copied = this.copyInstances(trainingSet.getDataSet()); Remove rem = new Remove(); rem.setAttributeIndicesArray(features); rem.setInvertSelection(true); rem.setInputFormat(copied); Instances onlyFeatures = Filter.useFilter(copied, rem); // merge features with matrix decomposition if (onlyFeatures.numInstances() != decompData.numInstances()) { //sthg went wrong when decomposing throw new Exception("Problem when decomposing"); } featuresAndDecomp = Instances.mergeInstances(onlyFeatures, this.copyInstances(decompData)); Instances trainset = featuresAndDecomp; LabelsMetaDataImpl trainlmd = new LabelsMetaDataImpl(); for (int lab : labelsdecomp) { LabelNode lni = new LabelNodeImpl(trainset.attribute(lab).name()); trainlmd.addRootNode(lni); } MultiLabelInstances trainMulti = new MultiLabelInstances(trainset, trainlmd); // build br for decomposed label prediction basebr = new BinaryRelevance(baseClassifier); basebr.build(trainMulti); debug("Model trained... all done."); }