List of usage examples for weka.core Instances mergeInstances
public static Instances mergeInstances(Instances first, Instances second)
From source file:adams.flow.transformer.WekaInstancesMerge.java
License:Open Source License
/** * Executes the flow item.//from w ww . ja v a2 s . c o m * * @return null if everything is fine, otherwise error message */ @Override protected String doExecute() { String result; String[] filesStr; File[] files; int i; Instances output; Instances[] orig; Instances[] inst; Instance[] rows; HashSet ids; int max; TIntList uniqueList; Remove remove; result = null; // get filenames files = null; orig = null; if (m_InputToken.getPayload() instanceof String[]) { filesStr = (String[]) m_InputToken.getPayload(); files = new File[filesStr.length]; for (i = 0; i < filesStr.length; i++) files[i] = new PlaceholderFile(filesStr[i]); } else if (m_InputToken.getPayload() instanceof File[]) { files = (File[]) m_InputToken.getPayload(); } else if (m_InputToken.getPayload() instanceof Instance[]) { rows = (Instance[]) m_InputToken.getPayload(); orig = new Instances[rows.length]; for (i = 0; i < rows.length; i++) { orig[i] = new Instances(rows[i].dataset(), 1); orig[i].add((Instance) rows[i].copy()); } } else if (m_InputToken.getPayload() instanceof Instances[]) { orig = (Instances[]) m_InputToken.getPayload(); } else { throw new IllegalStateException("Unhandled input type: " + m_InputToken.getPayload().getClass()); } try { output = null; // simple merge if (m_UniqueID.length() == 0) { if (files != null) { inst = new Instances[1]; for (i = 0; i < files.length; i++) { if (isStopped()) break; inst[0] = DataSource.read(files[i].getAbsolutePath()); inst[0] = prepareData(inst[0], i); if (i == 0) { output = inst[0]; } else { if (isLoggingEnabled()) getLogger().info("Merging with file #" + (i + 1) + ": " + files[i]); output = Instances.mergeInstances(output, inst[0]); } } } else if (orig != null) { inst = new Instances[1]; for (i = 0; i < orig.length; i++) { if (isStopped()) break; inst[0] = prepareData(orig[i], i); if (i == 0) { output = inst[0]; } else { if (isLoggingEnabled()) getLogger() .info("Merging with dataset #" + (i + 1) + ": " + orig[i].relationName()); output = Instances.mergeInstances(output, inst[0]); } } } } // merge based on row IDs else { m_AttType = -1; max = 0; m_UniqueIDAtts = new ArrayList<>(); if (files != null) { orig = new Instances[files.length]; for (i = 0; i < files.length; i++) { if (isStopped()) break; if (isLoggingEnabled()) getLogger().info("Loading file #" + (i + 1) + ": " + files[i]); orig[i] = DataSource.read(files[i].getAbsolutePath()); max = Math.max(max, orig[i].numInstances()); } } else if (orig != null) { for (i = 0; i < orig.length; i++) max = Math.max(max, orig[i].numInstances()); } inst = new Instances[orig.length]; ids = new HashSet(max); for (i = 0; i < orig.length; i++) { if (isStopped()) break; if (isLoggingEnabled()) getLogger().info("Updating IDs #" + (i + 1)); updateIDs(i, orig[i], ids); if (isLoggingEnabled()) getLogger().info("Preparing dataset #" + (i + 1)); inst[i] = prepareData(orig[i], i); } output = merge(orig, inst, ids); // remove unnecessary unique ID attributes if (m_KeepOnlySingleUniqueID) { uniqueList = new TIntArrayList(); for (String att : m_UniqueIDAtts) uniqueList.add(output.attribute(att).index()); if (uniqueList.size() > 0) { if (isLoggingEnabled()) getLogger().info("Removing duplicate unique ID attributes: " + m_UniqueIDAtts); remove = new Remove(); remove.setAttributeIndicesArray(uniqueList.toArray()); remove.setInputFormat(output); output = Filter.useFilter(output, remove); } } } if (!isStopped()) { m_OutputToken = new Token(output); updateProvenance(m_OutputToken); } } catch (Exception e) { result = handleException("Failed to merge: ", e); } return result; }
From source file:adams.flow.transformer.WekaPredictionsToInstances.java
License:Open Source License
/** * Executes the flow item.//from w w w .ja v a 2 s .com * * @return null if everything is fine, otherwise error message */ @Override protected String doExecute() { String result; Evaluation eval; int i; int n; int indexErr; int indexProb; int indexDist; int indexWeight; boolean nominal; Instances header; ArrayList<Attribute> atts; ArrayList<String> values; ArrayList<Prediction> predictions; Prediction pred; double[] vals; Instances data; Instances testData; int[] indices; result = null; if (m_InputToken.getPayload() instanceof WekaEvaluationContainer) { eval = (Evaluation) ((WekaEvaluationContainer) m_InputToken.getPayload()) .getValue(WekaEvaluationContainer.VALUE_EVALUATION); indices = (int[]) ((WekaEvaluationContainer) m_InputToken.getPayload()) .getValue(WekaEvaluationContainer.VALUE_ORIGINALINDICES); testData = (Instances) ((WekaEvaluationContainer) m_InputToken.getPayload()) .getValue(WekaEvaluationContainer.VALUE_TESTDATA); } else { eval = (Evaluation) m_InputToken.getPayload(); indices = null; testData = null; } header = eval.getHeader(); nominal = header.classAttribute().isNominal(); predictions = eval.predictions(); if (predictions != null) { // create header atts = new ArrayList<>(); // actual if (nominal && m_AddLabelIndex) { values = new ArrayList<>(); for (i = 0; i < header.classAttribute().numValues(); i++) values.add((i + 1) + ":" + header.classAttribute().value(i)); atts.add(new Attribute(m_MeasuresPrefix + "Actual", values)); } else { atts.add(header.classAttribute().copy(m_MeasuresPrefix + "Actual")); } // predicted if (nominal && m_AddLabelIndex) { values = new ArrayList<>(); for (i = 0; i < header.classAttribute().numValues(); i++) values.add((i + 1) + ":" + header.classAttribute().value(i)); atts.add(new Attribute(m_MeasuresPrefix + "Predicted", values)); } else { atts.add(header.classAttribute().copy(m_MeasuresPrefix + "Predicted")); } // error indexErr = -1; if (m_ShowError) { indexErr = atts.size(); if (nominal) { values = new ArrayList<>(); values.add("n"); values.add("y"); atts.add(new Attribute(m_MeasuresPrefix + "Error", values)); } else { atts.add(new Attribute(m_MeasuresPrefix + "Error")); } } // probability indexProb = -1; if (m_ShowProbability && nominal) { indexProb = atts.size(); atts.add(new Attribute(m_MeasuresPrefix + "Probability")); } // distribution indexDist = -1; if (m_ShowDistribution && nominal) { indexDist = atts.size(); for (n = 0; n < header.classAttribute().numValues(); n++) atts.add(new Attribute( m_MeasuresPrefix + "Distribution (" + header.classAttribute().value(n) + ")")); } // weight indexWeight = -1; if (m_ShowWeight) { indexWeight = atts.size(); atts.add(new Attribute(m_MeasuresPrefix + "Weight")); } data = new Instances("Predictions", atts, predictions.size()); data.setClassIndex(1); // predicted // add data if ((indices != null) && m_UseOriginalIndices) predictions = CrossValidationHelper.alignPredictions(predictions, indices); for (i = 0; i < predictions.size(); i++) { pred = predictions.get(i); vals = new double[data.numAttributes()]; // actual vals[0] = pred.actual(); // predicted vals[1] = pred.predicted(); // error if (m_ShowError) { if (nominal) { vals[indexErr] = ((pred.actual() != pred.predicted()) ? 1.0 : 0.0); } else { if (m_UseAbsoluteError) vals[indexErr] = Math.abs(pred.actual() - pred.predicted()); else vals[indexErr] = pred.actual() - pred.predicted(); } } // probability if (m_ShowProbability && nominal) { vals[indexProb] = StatUtils.max(((NominalPrediction) pred).distribution()); } // distribution if (m_ShowDistribution && nominal) { for (n = 0; n < header.classAttribute().numValues(); n++) vals[indexDist + n] = ((NominalPrediction) pred).distribution()[n]; } // weight if (m_ShowWeight) { vals[indexWeight] = pred.weight(); } // add row data.add(new DenseInstance(1.0, vals)); } // add test data? if ((testData != null) && !m_TestAttributes.isEmpty()) { testData = filterTestData(testData); if (testData != null) data = Instances.mergeInstances(data, testData); } // generate output token m_OutputToken = new Token(data); } else { getLogger().severe("No predictions available from Evaluation object!"); } return result; }
From source file:meka.classifiers.multilabel.Maniac.java
License:Open Source License
@Override public Instance transformInstance(Instance x) throws Exception { Instances tmpInst = new Instances(x.dataset()); tmpInst.delete();/*from w w w . j a v a 2 s . com*/ tmpInst.add(x); Instances features = this.extractPart(tmpInst, false); Instances pseudoLabels = new Instances(this.compressedTemplateInst); Instance tmpin = pseudoLabels.instance(0); pseudoLabels.delete(); pseudoLabels.add(tmpin); for (int i = 0; i < pseudoLabels.classIndex(); i++) { pseudoLabels.instance(0).setMissing(i); } Instances newDataSet = Instances.mergeInstances(pseudoLabels, features); newDataSet.setClassIndex(pseudoLabels.numAttributes()); return newDataSet.instance(0); }
From source file:meka.classifiers.multilabel.Maniac.java
License:Open Source License
@Override public Instances transformLabels(Instances D) throws Exception { // crazy scala-specific stuff that is necessary to access // "static" methods from java org.kramerlab.autoencoder.package$ autoencoderStatics = org.kramerlab.autoencoder.package$.MODULE$; org.kramerlab.autoencoder.wekacompatibility.package$ wekaStatics = org.kramerlab.autoencoder.wekacompatibility.package$.MODULE$; org.kramerlab.autoencoder.experiments.package$ experimentsStatics = org.kramerlab.autoencoder.experiments.package$.MODULE$; int topiter = -1; // the optimization is a bit special, since we learn a stream // of autoencoders, no need to start from scratch, we just add layers if (this.isOptimizeAE()) { Instances train = D.trainCV(3, 1); Instances test = D.testCV(3, 1); Instances labels = this.extractPart(train, true); // first convert the arff into non sparse form SparseToNonSparse spfilter = new SparseToNonSparse(); spfilter.setInputFormat(labels); Instances aeData = Filter.useFilter(labels, spfilter); // now convert it into a format suitable for the autoencoder Mat data = wekaStatics.instancesToMat(aeData); Iterable<Autoencoder> autoencoders = autoencoderStatics.deepAutoencoderStream_java( autoencoderStatics.Sigmoid(), // type of neurons. // Sigmoid is ok this.getNumberAutoencoders(), // number of autoencoders = (max hidden layers + 1) / // 2 this.getCompression(), // compression from k-th layer to (k+1)-th layer data, // training data true, // true = L2 Error, false = CrossEntropy autoencoderStatics.HintonsMiraculousStrategy(), true, autoencoderStatics.NoObservers()); // test each autoencoder, select the best classifier double bestAccuracy = Double.NEGATIVE_INFINITY; int iteratorcount = 0; topiter = 0;//from w w w . j av a 2 s . c o m for (Autoencoder a : autoencoders) { iteratorcount++; Maniac candidate = new Maniac(); candidate.setOptimizeAE(false); candidate.setNumberAutoencoders(this.getNumberAutoencoders()); candidate.setCompression(this.getCompression()); candidate.setClassifier(this.getClassifier()); candidate.setAE(a); Result res = Evaluation.evaluateModel(candidate, train, test); double curac = (Double) res.getValue("Accuracy"); if (bestAccuracy < curac) { bestAccuracy = curac; topiter = iteratorcount; } } } Instances features = this.extractPart(D, false); Instances labels = this.extractPart(D, true); // first convert the arff into non sparse form SparseToNonSparse spfilter = new SparseToNonSparse(); spfilter.setInputFormat(labels); Instances aeData = Filter.useFilter(labels, spfilter); // now convert it into a format suitable for the autoencoder Mat data = wekaStatics.instancesToMat(aeData); if (this.getAE() == null) { Iterable<Autoencoder> autoencoders = autoencoderStatics.deepAutoencoderStream_java( autoencoderStatics.Sigmoid(), // type of neurons. // Sigmoid is ok this.getNumberAutoencoders(), // number of autoencoders = (max hidden layers + 1) / // 2 this.getCompression(), // compression from k-th layer to (k+1)-th layer data, // training data true, // true = L2 Error, false = CrossEntropy autoencoderStatics.HintonsMiraculousStrategy(), true, autoencoderStatics.NoObservers()); int itercount = 0; for (Autoencoder a : autoencoders) { itercount++; if (topiter > 0 && itercount == topiter || itercount == this.getNumberAutoencoders()) { this.setAE(a); break; } } } Mat compressed = this.getAE().compress(data); Instances compressedLabels = wekaStatics.matToInstances(compressed); // remember the labels to use for the prediction step, this.compressedTemplateInst = new Instances(compressedLabels); Instances result = Instances.mergeInstances(compressedLabels, features); result.setClassIndex(compressedLabels.numAttributes()); return result; }
From source file:meka.classifiers.multilabel.MLCBMaD.java
License:Open Source License
@Override public Instance transformInstance(Instance x) throws Exception { Instances tmpInst = new Instances(x.dataset()); tmpInst.delete();/*w ww . ja v a2 s. c o m*/ tmpInst.add(x); Instances features = this.extractPart(tmpInst, false); Instances pseudoLabels = new Instances(this.compressedMatrix); Instance tmpin = pseudoLabels.instance(0); pseudoLabels.delete(); pseudoLabels.add(tmpin); for (int i = 0; i < pseudoLabels.classIndex(); i++) { pseudoLabels.instance(0).setMissing(i); } Instances newDataSet = Instances.mergeInstances(pseudoLabels, features); newDataSet.setClassIndex(this.size); return newDataSet.instance(0); }
From source file:meka.classifiers.multilabel.MLCBMaD.java
License:Open Source License
@Override public Instances transformLabels(Instances D) throws Exception { Instances features = this.extractPart(D, false); Instances labels = this.extractPart(D, true); BooleanMatrixDecomposition bmd = BooleanMatrixDecomposition.BEST_CONFIGURED(this.threshold); Tuple<Instances, Instances> res = bmd.decompose(labels, this.size); this.compressedMatrix = res._1; this.uppermatrix = res._2; Instances result = Instances.mergeInstances(compressedMatrix, features); result.setClassIndex(this.getSize()); return result; }
From source file:meka.classifiers.multilabel.PLST.java
License:Open Source License
/** * The method to transform the labels into another set of latent labels, * typically a compression method is used, e.g., Boolean matrix decomposition * in the case of MLC-BMaD, or matrix multiplication based on SVD for PLST. * * @param D the instances to transform into new instances with transformed labels. The * Instances consist of features and original labels. * @return The resulting instances. Instances consist of features and transformed labels. */// w w w .j ava2s . c o m @Override public Instances transformLabels(Instances D) throws Exception { Instances features = this.extractPart(D, false); Instances labels = this.extractPart(D, true); Matrix labelMatrix = MatrixUtils.instancesToMatrix(labels); // first, lets do the preprocessing as in the original implementation double[] averages = new double[labels.numAttributes()]; for (int i = 0; i < labels.numAttributes(); i++) { double[] column = labels.attributeToDoubleArray(i); double sum = 0.0; for (int j = 0; j < column.length; j++) { if (column[j] == 1.0) { sum += 1.0; } else { sum += -1; // The algorithm needs 1/-1 coding, so let's // change the matrix here labelMatrix.set(j, i, -1.0); } } averages[i] = sum / column.length; } double[][] shiftMatrix = new double[1][labels.numAttributes()]; shiftMatrix[0] = averages; // remember shift for prediction this.m_Shift = new Matrix(shiftMatrix); double[][] shiftTrainMatrix = new double[labels.numInstances()][labels.numAttributes()]; for (int i = 0; i < labels.numInstances(); i++) { shiftTrainMatrix[i] = averages; } Matrix trainShift = new Matrix(shiftTrainMatrix); SingularValueDecomposition svd = new SingularValueDecomposition(labelMatrix.minus(trainShift)); // The paper uses U here, but the implementation by the authors uses V, so // we used V here too. m_v = svd.getV(); //remove columns so only size are left double[][] newArr = new double[m_v.getRowDimension()][this.getSize()]; for (int i = 0; i < newArr.length; i++) { for (int j = 0; j < newArr[i].length; j++) { newArr[i][j] = m_v.getArray()[i][j]; } } m_v = new Matrix(newArr); // now the multiplication (last step of the algorithm) Matrix compressed = MatrixUtils.instancesToMatrix(labels).times(this.m_v); // and transform it to Instances ArrayList<Attribute> attinfos = new ArrayList<Attribute>(); for (int i = 0; i < compressed.getColumnDimension(); i++) { Attribute att = new Attribute("att" + i); attinfos.add(att); } // create pattern instances (also used in prediction) note: this is a regression // problem now, labels are not binary this.m_PatternInstances = new Instances("compressedlabels", attinfos, compressed.getRowDimension()); // fill result Instances Instances result = Instances.mergeInstances(MatrixUtils.matrixToInstances(compressed, m_PatternInstances), features); result.setClassIndex(this.getSize()); return result; }
From source file:meka.classifiers.multilabel.PLST.java
License:Open Source License
/** * Transforms the instance in the prediction process before given to the internal multi-label * or multi-target classifier. The instance is passed having the original set of labels, these * must be replaced with the transformed labels (attributes) so that the internla classifier * can predict them.//from ww w .j av a 2s . co m * * @param x The instance to transform. Consists of features and labels. * @return The transformed instance. Consists of features and transformed labels. */ @Override public Instance transformInstance(Instance x) throws Exception { Instances tmpInst = new Instances(x.dataset()); tmpInst.delete(); tmpInst.add(x); Instances features = this.extractPart(tmpInst, false); Instances labels = new Instances(this.m_PatternInstances); labels.add(new DenseInstance(labels.numAttributes())); Instances result = Instances.mergeInstances(labels, features); result.setClassIndex(labels.numAttributes()); return result.instance(0); }
From source file:org.kramerlab.mlcbmad.classifier.MLCBMaD.java
License:Open Source License
/** * Builds the classifier./*from ww w . j av a2 s .c om*/ * * @param trainingSet * @throws Exception */ protected void buildInternal(MultiLabelInstances trainingSet) throws Exception { // This step is necessary as there are problems with the // attribute indexes in WEKA when merging instances Instances train = this.copyInstances(trainingSet.getDataSet()); debug("Learning model..."); debug("Parameter Setting k = " + k + " and t = " + t + " ..."); // remove the features, so we make a matrix decomposition only of // the labels Remove rem0 = new Remove(); int[] features0 = trainingSet.getFeatureIndices(); rem0.setAttributeIndicesArray(features0); rem0.setInputFormat(train); train = Filter.useFilter(train, rem0); Instances decompData; // lets do the decomposition // first save the arff in non sparse form SparseToNonSparse spfilter = new SparseToNonSparse(); spfilter.setInputFormat(train); Instances out = Filter.useFilter(train, spfilter); BooleanMatrixDecomposition bmd = BooleanMatrixDecomposition.BEST_CONFIGURED(this.t); Tuple<Instances, Instances> res = bmd.decompose(out, this.k); decompData = res._1; uppermatrix = res._2; // get indices decomp = decompData; int[] features = trainingSet.getFeatureIndices(); int[] decompindices = new int[decompData.numAttributes()]; int countf = 0; for (int i = features.length; i < (decompData.numAttributes() + features.length); i++) { decompindices[countf] = i; countf++; } labelsdecomp = decompindices; // get features from training set Instances copied = this.copyInstances(trainingSet.getDataSet()); Remove rem = new Remove(); rem.setAttributeIndicesArray(features); rem.setInvertSelection(true); rem.setInputFormat(copied); Instances onlyFeatures = Filter.useFilter(copied, rem); // merge features with matrix decomposition if (onlyFeatures.numInstances() != decompData.numInstances()) { //sthg went wrong when decomposing throw new Exception("Problem when decomposing"); } featuresAndDecomp = Instances.mergeInstances(onlyFeatures, this.copyInstances(decompData)); Instances trainset = featuresAndDecomp; LabelsMetaDataImpl trainlmd = new LabelsMetaDataImpl(); for (int lab : labelsdecomp) { LabelNode lni = new LabelNodeImpl(trainset.attribute(lab).name()); trainlmd.addRootNode(lni); } MultiLabelInstances trainMulti = new MultiLabelInstances(trainset, trainlmd); // build br for decomposed label prediction basebr = new BinaryRelevance(baseClassifier); basebr.build(trainMulti); debug("Model trained... all done."); }
From source file:org.opentox.jaqpot3.qsar.predictor.FastRbfNnPredictor.java
License:Open Source License
@Override public Instances predict(Instances inputSet) throws JaqpotException { FastRbfNnModel actualModel = (FastRbfNnModel) model.getActualModel().getSerializableActualModel(); Instances orderedDataset = null;/*from www .ja v a2s .c o m*/ try { orderedDataset = InstancesUtil.sortForPMMLModel(model.getIndependentFeatures(), trFieldsAttrIndex, inputSet, -1); } catch (JaqpotException ex) { logger.error(null, ex); } Instances predictions = new Instances(orderedDataset); Add attributeAdder = new Add(); attributeAdder.setAttributeIndex("last"); attributeAdder.setAttributeName(model.getPredictedFeatures().iterator().next().getUri().toString()); try { attributeAdder.setInputFormat(predictions); predictions = Filter.useFilter(predictions, attributeAdder); predictions.setClass( predictions.attribute(model.getPredictedFeatures().iterator().next().getUri().toString())); } catch (Exception ex) { String message = "Exception while trying to add prediction feature to Instances"; logger.debug(message, ex); throw new JaqpotException(message, ex); } Instances nodes = actualModel.getNodes(); double[] sigma = actualModel.getSigma(); double[] coeffs = actualModel.getLrCoefficients(); double sum; for (int i = 0; i < orderedDataset.numInstances(); i++) { sum = 0; for (int j = 0; j < nodes.numInstances(); j++) { sum += rbf(sigma[j], orderedDataset.instance(i), nodes.instance(j)) * coeffs[j]; } predictions.instance(i).setClassValue(sum); } List<Integer> trFieldsIndex = WekaInstancesProcess.getTransformationFieldsAttrIndex(predictions, pmmlObject); predictions = WekaInstancesProcess.removeInstancesAttributes(predictions, trFieldsIndex); Instances resultSet = Instances.mergeInstances(justCompounds, predictions); return resultSet; }