List of usage examples for weka.core Instance setDataset
public void setDataset(Instances instances);
From source file:en_deep.mlprocess.manipulation.featmodif.ReplaceMissing.java
License:Open Source License
/** * Convert a single instance over if the class is nominal. The converted * instance is added to the end of the output queue. * * @param instance the instance to convert */// w w w.j ava 2 s. com private void convertInstance(Instance instance) { // create a copy of the input instance Instance inst = null; if (instance instanceof SparseInstance) { inst = new SparseInstance(instance.weight(), instance.toDoubleArray()); } else { inst = new DenseInstance(instance.weight(), instance.toDoubleArray()); } // copy the string values from this instance as well (only the existing ones) inst.setDataset(getOutputFormat()); copyValues(inst, false, instance.dataset(), getOutputFormat()); // beware of weird behavior of this function (see source)!! inst.setDataset(getOutputFormat()); // find the missing values to be filled + the double values for the new "missing" label and store it double[] vals = instance.toDoubleArray(); for (int j = 0; j < getInputFormat().numAttributes(); j++) { Attribute att = instance.attribute(j); if (m_Columns.isInRange(j) && instance.isMissing(j)) { // find the "missing" value in the output nominal attribute if (att.isNominal()) { vals[j] = inst.dataset().attribute(j).indexOfValue(m_ReplVal); } // add a string value for the new "missing" label else if (att.isString()) { vals[j] = inst.dataset().attribute(j).addStringValue(m_ReplVal); } } } // fill in the missing values found inst.replaceMissingValues(vals); push(inst); }
From source file:en_deep.mlprocess.manipulation.SetAwareNominalToBinary.java
License:Open Source License
/** * Convert a single instance over if the class is nominal. The converted * instance is added to the end of the output queue. * * @param instance the instance to convert *///from w w w . ja va2 s. c om private void convertInstance(Instance instance) { double[] vals = new double[outputFormatPeek().numAttributes()]; int attSoFar = 0; for (int j = 0; j < getInputFormat().numAttributes(); j++) { Attribute att = getInputFormat().attribute(j); if (!att.isNominal() || (j == getInputFormat().classIndex()) || !m_Columns.isInRange(j)) { vals[attSoFar] = instance.value(j); attSoFar++; } else { if ((att.numValues() <= 2) && (!m_TransformAll)) { vals[attSoFar] = instance.value(j); attSoFar++; } else { attSoFar += setConvertedAttribute(att, instance.value(j), vals, attSoFar); } } } Instance inst = null; if (instance instanceof SparseInstance) { inst = new SparseInstance(instance.weight(), vals); } else { inst = new DenseInstance(instance.weight(), vals); } inst.setDataset(getOutputFormat()); copyValues(inst, false, instance.dataset(), getOutputFormat()); inst.setDataset(getOutputFormat()); push(inst); }
From source file:es.jarias.FMC.ClassCompoundTransformation.java
License:Open Source License
/** * /*from w w w . ja v a2 s .c om*/ * @param instance * @param labelIndices * @return tranformed instance * @throws Exception */ public Instance transformInstance(Instance instance, int[] labelIndices) throws Exception { Instance transformedInstance = RemoveAllLabels.transformInstance(instance, labelIndices); transformedInstance.setDataset(null); transformedInstance.insertAttributeAt(transformedInstance.numAttributes()); transformedInstance.setDataset(transformedFormat); return transformedInstance; }
From source file:es.ubu.XRayDetector.modelo.ventana.VentanaAbstracta.java
License:Open Source License
/** * Method that creates an instance which can be classified by Weka. * @return Instance with the features values *///from ww w . j a v a 2 s. c o m protected Instance crearInstancia() { double newVals[] = new double[407]; int count = 0; if (ftStandard != null) { for (int i = 0; i < ftStandard.getVectorResultados().length; i++) { newVals[count] = ftStandard.getVectorResultados()[i]; count++; } } if (ftStandardSaliency != null) { for (int i = 0; i < ftStandardSaliency.getVectorResultados().length; i++) { newVals[count] = ftStandardSaliency.getVectorResultados()[i]; count++; } } if (meanVector != null) { for (int i = 0; i < meanVector.length; i++) { newVals[count] = meanVector[i]; count++; } } if (rangeVector != null) { for (int i = 0; i < rangeVector.length; i++) { newVals[count] = rangeVector[i]; count++; } } if (meanVectorSaliency != null) { for (int i = 0; i < meanVectorSaliency.length; i++) { newVals[count] = meanVectorSaliency[i]; count++; } } if (rangeVectorSaliency != null) { for (int i = 0; i < rangeVectorSaliency.length; i++) { newVals[count] = rangeVectorSaliency[i]; count++; } } if (ftLbp != null) { for (int i = 0; i < ftLbp.getVectorResultados().length; i++) { newVals[count] = ftLbp.getVectorResultados()[i]; count++; } } if (ftLbpSaliency != null) { for (int i = 0; i < ftLbpSaliency.getVectorResultados().length; i++) { newVals[count] = ftLbpSaliency.getVectorResultados()[i]; count++; } } // newVals es el vector de doubles donde tienes los datos de las medias etc. Instance instance = new DenseInstance(1, newVals); List<String> feat; if (prop.getTipoCaracteristicas() == 0) { //todas feat = null; } else { //mejores feat = obtainFeatures(); } instance.setDataset(getHeader(feat)); return instance; }
From source file:eyetracker.ServerCommunicator.java
public Instance getInput() { // For all the attribute, initialize them. int totalAttribute = MLPProcessor.inst.firstInstance().numAttributes(); Instance instance = new SparseInstance(totalAttribute); instance.setDataset(MLPProcessor.inst); String[] attributes = unifiedData.split(","); //String[] attributes = examData.split(","); for (int i = 0; i < totalAttribute - 1; i++) { instance.setValue(i, Double.valueOf(attributes[i])); }/*from w w w . j a v a2 s .co m*/ return instance; }
From source file:fantail.algorithms.RankingViaRegression.java
License:Open Source License
@Override public double[] recommendRanking(Instance metaInst) throws Exception { double[] values = new double[m_NumFeatures + 1]; for (int i = 0; i < values.length - 1; i++) { values[i] = metaInst.value(i);/* w ww.j a va 2s. c o m*/ } values[values.length - 1] = 0; Instance inst = new DenseInstance(1.0, values); inst.setDataset(m_TempHeader); // double[] preds = new double[m_NumTargets]; for (int t = 0; t < m_NumTargets; t++) { double pred = m_Classifiers[t].classifyInstance(inst); if (pred <= 0) { pred = 0; } if (pred >= m_NumTargets) { pred = m_NumTargets; } preds[t] = pred; } return Tools.doubleArrayToRanking(preds); }
From source file:faster_pca.faster_pca.java
License:Open Source License
/** * Signify that this batch of input to the filter is finished. * //www . jav a 2 s . co m * @return true if there are instances pending output * @throws NullPointerException if no input structure has been defined, * @throws Exception if there was a problem finishing the batch. */ @Override public boolean batchFinished() throws Exception { int i; Instances insts; Instance inst; if (getInputFormat() == null) { throw new NullPointerException("No input instance format defined"); } insts = getInputFormat(); if (!isFirstBatchDone()) { setup(insts); } Iterator<Instance> iter = insts.iterator(); while (iter.hasNext()) { inst = convertInstance(iter.next()); inst.setDataset(getOutputFormat()); push(inst); } flushInput(); m_NewBatch = true; m_FirstBatchDone = true; return (numPendingOutput() != 0); }
From source file:fcul.viegas.ml.learners.NetworkStreamLearningClassifierMapFunction.java
public InstanceStreamDTO map(InstanceStreamDTO instance) throws Exception { weka.core.Instance inst = instance.getInstance(); inst.setDataset(this.coreInstances); inst.setClassValue(inst.classValue()); inst = classifier.constructMappedInstance(inst); HoeffdingTree tree = (HoeffdingTree) classifier.getClassifier(); double[] classe = tree.distributionForInstance(inst); instance.setInstance(null);//w w w . ja v a 2 s . c om //System.out.println("\t classe[0]: " + classe[0] + " classe[1]: " + classe[1]); if (classe[0] > classe[1]) { instance.setAssignedClassValueFromLearner(0.0d); } else { instance.setAssignedClassValueFromLearner(1.0d); } return instance; }
From source file:filters.MauiFilter.java
License:Open Source License
/** * Converts an instance./*w ww. ja v a 2s . co m*/ */ private FastVector convertInstance(Instance instance, boolean training) throws Exception { FastVector vector = new FastVector(); String fileName = instance.stringValue(fileNameAtt); if (debugMode) { System.err.println("-- Converting instance for document " + fileName); } // Get the key phrases for the document HashMap<String, Counter> hashKeyphrases = null; if (!instance.isMissing(keyphrasesAtt)) { String keyphrases = instance.stringValue(keyphrasesAtt); hashKeyphrases = getGivenKeyphrases(keyphrases); } // Get the document text String documentText = instance.stringValue(documentAtt); // Compute the candidate topics HashMap<String, Candidate> candidateList; if (allCandidates != null && allCandidates.containsKey(instance)) { candidateList = allCandidates.get(instance); } else { candidateList = getCandidates(documentText); } if (debugMode) { System.err.println(candidateList.size() + " candidates "); } // Set indices for key attributes int tfidfAttIndex = documentAtt + 2; int distAttIndex = documentAtt + 3; int probsAttIndex = documentAtt + numFeatures; int countPos = 0; int countNeg = 0; // Go through the phrases and convert them into instances for (Candidate candidate : candidateList.values()) { if (candidate.getFrequency() < minOccurFrequency) { continue; } String name = candidate.getName(); String orig = candidate.getBestFullForm(); if (!vocabularyName.equals("none")) { orig = candidate.getTitle(); } double[] vals = computeFeatureValues(candidate, training, hashKeyphrases, candidateList); Instance inst = new Instance(instance.weight(), vals); inst.setDataset(classifierData); // Get probability of a phrase being key phrase double[] probs = classifier.distributionForInstance(inst); double prob = probs[0]; if (nominalClassValue) { prob = probs[1]; } // Compute attribute values for final instance double[] newInst = new double[instance.numAttributes() + numFeatures + 2]; int pos = 0; for (int i = 1; i < instance.numAttributes(); i++) { if (i == documentAtt) { // output of values for a given phrase: // Add phrase int index = outputFormatPeek().attribute(pos).addStringValue(name); newInst[pos++] = index; // Add original version if (orig != null) { index = outputFormatPeek().attribute(pos).addStringValue(orig); } else { index = outputFormatPeek().attribute(pos).addStringValue(name); } newInst[pos++] = index; // Add features newInst[pos++] = inst.value(tfIndex); newInst[pos++] = inst.value(idfIndex); newInst[pos++] = inst.value(tfidfIndex); newInst[pos++] = inst.value(firstOccurIndex); newInst[pos++] = inst.value(lastOccurIndex); newInst[pos++] = inst.value(spreadOccurIndex); newInst[pos++] = inst.value(domainKeyphIndex); newInst[pos++] = inst.value(lengthIndex); newInst[pos++] = inst.value(generalityIndex); newInst[pos++] = inst.value(nodeDegreeIndex); newInst[pos++] = inst.value(semRelIndex); newInst[pos++] = inst.value(wikipKeyphrIndex); newInst[pos++] = inst.value(invWikipFreqIndex); newInst[pos++] = inst.value(totalWikipKeyphrIndex); // Add probability probsAttIndex = pos; newInst[pos++] = prob; // Set rank to missing (computed below) newInst[pos++] = Instance.missingValue(); } else if (i == keyphrasesAtt) { newInst[pos++] = inst.classValue(); } else { newInst[pos++] = instance.value(i); } } Instance ins = new Instance(instance.weight(), newInst); ins.setDataset(outputFormatPeek()); vector.addElement(ins); if (inst.classValue() == 0) { countNeg++; } else { countPos++; } } if (debugMode) { System.err.println(countPos + " positive; " + countNeg + " negative instances"); } // Sort phrases according to their distance (stable sort) double[] vals = new double[vector.size()]; for (int i = 0; i < vals.length; i++) { vals[i] = ((Instance) vector.elementAt(i)).value(distAttIndex); } FastVector newVector = new FastVector(vector.size()); int[] sortedIndices = Utils.stableSort(vals); for (int i = 0; i < vals.length; i++) { newVector.addElement(vector.elementAt(sortedIndices[i])); } vector = newVector; // Sort phrases according to their tfxidf value (stable sort) for (int i = 0; i < vals.length; i++) { vals[i] = -((Instance) vector.elementAt(i)).value(tfidfAttIndex); } newVector = new FastVector(vector.size()); sortedIndices = Utils.stableSort(vals); for (int i = 0; i < vals.length; i++) { newVector.addElement(vector.elementAt(sortedIndices[i])); } vector = newVector; // Sort phrases according to their probability (stable sort) for (int i = 0; i < vals.length; i++) { vals[i] = 1 - ((Instance) vector.elementAt(i)).value(probsAttIndex); } newVector = new FastVector(vector.size()); sortedIndices = Utils.stableSort(vals); for (int i = 0; i < vals.length; i++) { newVector.addElement(vector.elementAt(sortedIndices[i])); } vector = newVector; // Compute rank of phrases. Check for subphrases that are ranked // lower than superphrases and assign probability -1 and set the // rank to Integer.MAX_VALUE int rank = 1; for (int i = 0; i < vals.length; i++) { Instance currentInstance = (Instance) vector.elementAt(i); // Short cut: if phrase very unlikely make rank very low and // continue if (Utils.grOrEq(vals[i], 1.0)) { currentInstance.setValue(probsAttIndex + 1, Integer.MAX_VALUE); continue; } // Otherwise look for super phrase starting with first phrase // in list that has same probability, TFxIDF value, and distance as // current phrase. We do this to catch all superphrases // that have same probability, TFxIDF value and distance as current // phrase. int startInd = i; while (startInd < vals.length) { Instance inst = (Instance) vector.elementAt(startInd); if ((inst.value(tfidfAttIndex) != currentInstance.value(tfidfAttIndex)) || (inst.value(probsAttIndex) != currentInstance.value(probsAttIndex)) || (inst.value(distAttIndex) != currentInstance.value(distAttIndex))) { break; } startInd++; } currentInstance.setValue(probsAttIndex + 1, rank++); } return vector; }
From source file:fk.stardust.localizer.machinelearn.WekaFaultLocalizer.java
License:Open Source License
@Override public Ranking<T> localize(final ISpectra<T> spectra) { // == 1. Create Weka training instance final List<INode<T>> nodes = new ArrayList<>(spectra.getNodes()); // nominal true/false values final List<String> tf = new ArrayList<String>(); tf.add("t");//from ww w . ja v a2 s . c om tf.add("f"); // create an attribute for each component final Map<INode<T>, Attribute> attributeMap = new HashMap<INode<T>, Attribute>(); final ArrayList<Attribute> attributeList = new ArrayList<Attribute>(); // NOCS: Weka needs ArrayList.. for (final INode<T> node : nodes) { final Attribute attribute = new Attribute(node.toString(), tf); attributeList.add(attribute); attributeMap.put(node, attribute); } // create class attribute (trace success) final Attribute successAttribute = new Attribute("success", tf); attributeList.add(successAttribute); // create weka training instance final Instances trainingSet = new Instances("TraceInfoInstances", attributeList, 1); trainingSet.setClassIndex(attributeList.size() - 1); // == 2. add traces to training set // add an instance for each trace for (final ITrace<T> trace : spectra.getTraces()) { final Instance instance = new DenseInstance(nodes.size() + 1); instance.setDataset(trainingSet); for (final INode<T> node : nodes) { instance.setValue(attributeMap.get(node), trace.isInvolved(node) ? "t" : "f"); } instance.setValue(successAttribute, trace.isSuccessful() ? "t" : "f"); trainingSet.add(instance); } // == 3. use prediction to localize faults // build classifier try { final Classifier classifier = this.buildClassifier(this.classifierName, this.classifierOptions, trainingSet); final Ranking<T> ranking = new Ranking<>(); System.out.println("begin classifying"); int classified = 0; final Instance instance = new DenseInstance(nodes.size() + 1); instance.setDataset(trainingSet); for (final INode<T> node : nodes) { instance.setValue(attributeMap.get(node), "f"); } instance.setValue(successAttribute, "f"); for (final INode<T> node : nodes) { classified++; if (classified % 1000 == 0) { System.out.println(String.format("Classified %d nodes.", classified)); } // contain only the current node in the network instance.setValue(attributeMap.get(node), "t"); // predict with which probability this setup leads to a failing network final double[] distribution = classifier.distributionForInstance(instance); ranking.rank(node, distribution[1]); // reset involvment for node instance.setValue(attributeMap.get(node), "f"); } return ranking; } catch (final Exception e) { // NOCS: Weka throws only raw exceptions throw new RuntimeException(e); } }