Example usage for weka.core Instance setDataset

Introduction

In this page you can find the example usage for weka.core Instance setDataset.

Prototype

public void setDataset(Instances instances);

Source Link

Document

Sets the reference to the dataset.

Usage

From source file:en_deep.mlprocess.manipulation.featmodif.ReplaceMissing.java

License:Open Source License

/**
 * Convert a single instance over if the class is nominal. The converted
 * instance is added to the end of the output queue.
 *
 * @param instance the instance to convert
 */// w  w w.j ava 2 s. com
private void convertInstance(Instance instance) {

    // create a copy of the input instance
    Instance inst = null;

    if (instance instanceof SparseInstance) {
        inst = new SparseInstance(instance.weight(), instance.toDoubleArray());
    } else {
        inst = new DenseInstance(instance.weight(), instance.toDoubleArray());
    }

    // copy the string values from this instance as well (only the existing ones)
    inst.setDataset(getOutputFormat());
    copyValues(inst, false, instance.dataset(), getOutputFormat()); // beware of weird behavior of this function (see source)!!
    inst.setDataset(getOutputFormat());

    // find the missing values to be filled + the double values for the new "missing" label and store it
    double[] vals = instance.toDoubleArray();

    for (int j = 0; j < getInputFormat().numAttributes(); j++) {

        Attribute att = instance.attribute(j);

        if (m_Columns.isInRange(j) && instance.isMissing(j)) {
            // find the "missing" value in the output nominal attribute
            if (att.isNominal()) {
                vals[j] = inst.dataset().attribute(j).indexOfValue(m_ReplVal);
            }
            // add a string value for the new "missing" label
            else if (att.isString()) {
                vals[j] = inst.dataset().attribute(j).addStringValue(m_ReplVal);
            }
        }
    }

    // fill in the missing values found
    inst.replaceMissingValues(vals);

    push(inst);
}

From source file:en_deep.mlprocess.manipulation.SetAwareNominalToBinary.java

License:Open Source License

/**
 * Convert a single instance over if the class is nominal. The converted
 * instance is added to the end of the output queue.
 *
 * @param instance the instance to convert
 *///from  w w  w .  ja  va2 s.  c om
private void convertInstance(Instance instance) {

    double[] vals = new double[outputFormatPeek().numAttributes()];
    int attSoFar = 0;

    for (int j = 0; j < getInputFormat().numAttributes(); j++) {
        Attribute att = getInputFormat().attribute(j);
        if (!att.isNominal() || (j == getInputFormat().classIndex()) || !m_Columns.isInRange(j)) {
            vals[attSoFar] = instance.value(j);
            attSoFar++;
        } else {
            if ((att.numValues() <= 2) && (!m_TransformAll)) {
                vals[attSoFar] = instance.value(j);
                attSoFar++;
            } else {
                attSoFar += setConvertedAttribute(att, instance.value(j), vals, attSoFar);
            }
        }
    }
    Instance inst = null;
    if (instance instanceof SparseInstance) {
        inst = new SparseInstance(instance.weight(), vals);
    } else {
        inst = new DenseInstance(instance.weight(), vals);
    }
    inst.setDataset(getOutputFormat());
    copyValues(inst, false, instance.dataset(), getOutputFormat());
    inst.setDataset(getOutputFormat());
    push(inst);
}

From source file:es.jarias.FMC.ClassCompoundTransformation.java

License:Open Source License

/**
 * /*from  w w  w  .  ja v  a2 s .c  om*/
 * @param instance
 * @param labelIndices
 * @return tranformed instance
 * @throws Exception
 */
public Instance transformInstance(Instance instance, int[] labelIndices) throws Exception {
    Instance transformedInstance = RemoveAllLabels.transformInstance(instance, labelIndices);
    transformedInstance.setDataset(null);
    transformedInstance.insertAttributeAt(transformedInstance.numAttributes());
    transformedInstance.setDataset(transformedFormat);
    return transformedInstance;
}

From source file:es.ubu.XRayDetector.modelo.ventana.VentanaAbstracta.java

License:Open Source License

/**
 * Method that creates an instance which can be classified by Weka.
 * @return Instance with the features values
 *///from ww  w . j  a  v a 2 s. c  o m
protected Instance crearInstancia() {
    double newVals[] = new double[407];
    int count = 0;

    if (ftStandard != null) {
        for (int i = 0; i < ftStandard.getVectorResultados().length; i++) {
            newVals[count] = ftStandard.getVectorResultados()[i];
            count++;
        }
    }

    if (ftStandardSaliency != null) {
        for (int i = 0; i < ftStandardSaliency.getVectorResultados().length; i++) {
            newVals[count] = ftStandardSaliency.getVectorResultados()[i];
            count++;
        }
    }

    if (meanVector != null) {
        for (int i = 0; i < meanVector.length; i++) {
            newVals[count] = meanVector[i];
            count++;
        }
    }

    if (rangeVector != null) {
        for (int i = 0; i < rangeVector.length; i++) {
            newVals[count] = rangeVector[i];
            count++;
        }
    }

    if (meanVectorSaliency != null) {
        for (int i = 0; i < meanVectorSaliency.length; i++) {
            newVals[count] = meanVectorSaliency[i];
            count++;
        }
    }

    if (rangeVectorSaliency != null) {
        for (int i = 0; i < rangeVectorSaliency.length; i++) {
            newVals[count] = rangeVectorSaliency[i];
            count++;
        }
    }

    if (ftLbp != null) {
        for (int i = 0; i < ftLbp.getVectorResultados().length; i++) {
            newVals[count] = ftLbp.getVectorResultados()[i];
            count++;
        }
    }

    if (ftLbpSaliency != null) {
        for (int i = 0; i < ftLbpSaliency.getVectorResultados().length; i++) {
            newVals[count] = ftLbpSaliency.getVectorResultados()[i];
            count++;
        }
    }
    // newVals es el vector de doubles donde tienes los datos de las medias etc.
    Instance instance = new DenseInstance(1, newVals);
    List<String> feat;
    if (prop.getTipoCaracteristicas() == 0) { //todas
        feat = null;
    } else { //mejores
        feat = obtainFeatures();
    }
    instance.setDataset(getHeader(feat));
    return instance;
}

From source file:eyetracker.ServerCommunicator.java

public Instance getInput() {
    // For all the attribute, initialize them.
    int totalAttribute = MLPProcessor.inst.firstInstance().numAttributes();
    Instance instance = new SparseInstance(totalAttribute);
    instance.setDataset(MLPProcessor.inst);
    String[] attributes = unifiedData.split(",");
    //String[] attributes = examData.split(",");
    for (int i = 0; i < totalAttribute - 1; i++) {
        instance.setValue(i, Double.valueOf(attributes[i]));
    }/*from   w w  w  . j  a  v  a2 s  .co  m*/
    return instance;
}

From source file:fantail.algorithms.RankingViaRegression.java

License:Open Source License

@Override
public double[] recommendRanking(Instance metaInst) throws Exception {

    double[] values = new double[m_NumFeatures + 1];
    for (int i = 0; i < values.length - 1; i++) {
        values[i] = metaInst.value(i);/* w  ww.j a va  2s.  c  o m*/
    }
    values[values.length - 1] = 0;

    Instance inst = new DenseInstance(1.0, values);
    inst.setDataset(m_TempHeader);

    //
    double[] preds = new double[m_NumTargets];

    for (int t = 0; t < m_NumTargets; t++) {

        double pred = m_Classifiers[t].classifyInstance(inst);
        if (pred <= 0) {
            pred = 0;
        }
        if (pred >= m_NumTargets) {
            pred = m_NumTargets;
        }
        preds[t] = pred;
    }

    return Tools.doubleArrayToRanking(preds);
}

From source file:faster_pca.faster_pca.java

License:Open Source License

/**
 * Signify that this batch of input to the filter is finished.
 * //www  .  jav  a 2 s .  co m
 * @return true if there are instances pending output
 * @throws NullPointerException if no input structure has been defined,
 * @throws Exception if there was a problem finishing the batch.
 */
@Override
public boolean batchFinished() throws Exception {
    int i;
    Instances insts;
    Instance inst;

    if (getInputFormat() == null) {
        throw new NullPointerException("No input instance format defined");
    }

    insts = getInputFormat();

    if (!isFirstBatchDone()) {
        setup(insts);
    }

    Iterator<Instance> iter = insts.iterator();

    while (iter.hasNext()) {
        inst = convertInstance(iter.next());
        inst.setDataset(getOutputFormat());
        push(inst);
    }

    flushInput();
    m_NewBatch = true;
    m_FirstBatchDone = true;

    return (numPendingOutput() != 0);
}

From source file:fcul.viegas.ml.learners.NetworkStreamLearningClassifierMapFunction.java

public InstanceStreamDTO map(InstanceStreamDTO instance) throws Exception {

    weka.core.Instance inst = instance.getInstance();
    inst.setDataset(this.coreInstances);
    inst.setClassValue(inst.classValue());
    inst = classifier.constructMappedInstance(inst);

    HoeffdingTree tree = (HoeffdingTree) classifier.getClassifier();
    double[] classe = tree.distributionForInstance(inst);
    instance.setInstance(null);//w  w w  . ja v a 2 s .  c  om
    //System.out.println("\t classe[0]: " + classe[0] + " classe[1]: " + classe[1]);
    if (classe[0] > classe[1]) {
        instance.setAssignedClassValueFromLearner(0.0d);
    } else {
        instance.setAssignedClassValueFromLearner(1.0d);
    }
    return instance;
}

From source file:filters.MauiFilter.java

License:Open Source License

/**
 * Converts an instance./*w  ww.  ja  v  a 2s  .  co m*/
 */
private FastVector convertInstance(Instance instance, boolean training) throws Exception {

    FastVector vector = new FastVector();

    String fileName = instance.stringValue(fileNameAtt);

    if (debugMode) {
        System.err.println("-- Converting instance for document " + fileName);
    }

    // Get the key phrases for the document
    HashMap<String, Counter> hashKeyphrases = null;

    if (!instance.isMissing(keyphrasesAtt)) {
        String keyphrases = instance.stringValue(keyphrasesAtt);
        hashKeyphrases = getGivenKeyphrases(keyphrases);
    }

    // Get the document text
    String documentText = instance.stringValue(documentAtt);

    // Compute the candidate topics
    HashMap<String, Candidate> candidateList;
    if (allCandidates != null && allCandidates.containsKey(instance)) {
        candidateList = allCandidates.get(instance);
    } else {
        candidateList = getCandidates(documentText);
    }
    if (debugMode) {
        System.err.println(candidateList.size() + " candidates ");
    }

    // Set indices for key attributes
    int tfidfAttIndex = documentAtt + 2;
    int distAttIndex = documentAtt + 3;
    int probsAttIndex = documentAtt + numFeatures;

    int countPos = 0;
    int countNeg = 0;

    // Go through the phrases and convert them into instances
    for (Candidate candidate : candidateList.values()) {

        if (candidate.getFrequency() < minOccurFrequency) {
            continue;
        }

        String name = candidate.getName();
        String orig = candidate.getBestFullForm();
        if (!vocabularyName.equals("none")) {
            orig = candidate.getTitle();
        }

        double[] vals = computeFeatureValues(candidate, training, hashKeyphrases, candidateList);

        Instance inst = new Instance(instance.weight(), vals);

        inst.setDataset(classifierData);

        // Get probability of a phrase being key phrase
        double[] probs = classifier.distributionForInstance(inst);

        double prob = probs[0];
        if (nominalClassValue) {
            prob = probs[1];
        }

        // Compute attribute values for final instance
        double[] newInst = new double[instance.numAttributes() + numFeatures + 2];

        int pos = 0;
        for (int i = 1; i < instance.numAttributes(); i++) {

            if (i == documentAtt) {

                // output of values for a given phrase:

                // Add phrase
                int index = outputFormatPeek().attribute(pos).addStringValue(name);
                newInst[pos++] = index;

                // Add original version
                if (orig != null) {
                    index = outputFormatPeek().attribute(pos).addStringValue(orig);
                } else {
                    index = outputFormatPeek().attribute(pos).addStringValue(name);
                }

                newInst[pos++] = index;

                // Add features
                newInst[pos++] = inst.value(tfIndex);
                newInst[pos++] = inst.value(idfIndex);
                newInst[pos++] = inst.value(tfidfIndex);
                newInst[pos++] = inst.value(firstOccurIndex);
                newInst[pos++] = inst.value(lastOccurIndex);
                newInst[pos++] = inst.value(spreadOccurIndex);
                newInst[pos++] = inst.value(domainKeyphIndex);
                newInst[pos++] = inst.value(lengthIndex);
                newInst[pos++] = inst.value(generalityIndex);
                newInst[pos++] = inst.value(nodeDegreeIndex);
                newInst[pos++] = inst.value(semRelIndex);
                newInst[pos++] = inst.value(wikipKeyphrIndex);
                newInst[pos++] = inst.value(invWikipFreqIndex);
                newInst[pos++] = inst.value(totalWikipKeyphrIndex);

                // Add probability
                probsAttIndex = pos;
                newInst[pos++] = prob;

                // Set rank to missing (computed below)
                newInst[pos++] = Instance.missingValue();

            } else if (i == keyphrasesAtt) {
                newInst[pos++] = inst.classValue();
            } else {
                newInst[pos++] = instance.value(i);
            }
        }

        Instance ins = new Instance(instance.weight(), newInst);
        ins.setDataset(outputFormatPeek());
        vector.addElement(ins);

        if (inst.classValue() == 0) {
            countNeg++;
        } else {
            countPos++;
        }
    }
    if (debugMode) {
        System.err.println(countPos + " positive; " + countNeg + " negative instances");
    }

    // Sort phrases according to their distance (stable sort)
    double[] vals = new double[vector.size()];
    for (int i = 0; i < vals.length; i++) {
        vals[i] = ((Instance) vector.elementAt(i)).value(distAttIndex);
    }
    FastVector newVector = new FastVector(vector.size());
    int[] sortedIndices = Utils.stableSort(vals);
    for (int i = 0; i < vals.length; i++) {
        newVector.addElement(vector.elementAt(sortedIndices[i]));
    }
    vector = newVector;

    // Sort phrases according to their tfxidf value (stable sort)
    for (int i = 0; i < vals.length; i++) {
        vals[i] = -((Instance) vector.elementAt(i)).value(tfidfAttIndex);
    }
    newVector = new FastVector(vector.size());
    sortedIndices = Utils.stableSort(vals);
    for (int i = 0; i < vals.length; i++) {
        newVector.addElement(vector.elementAt(sortedIndices[i]));
    }
    vector = newVector;

    // Sort phrases according to their probability (stable sort)
    for (int i = 0; i < vals.length; i++) {
        vals[i] = 1 - ((Instance) vector.elementAt(i)).value(probsAttIndex);
    }
    newVector = new FastVector(vector.size());
    sortedIndices = Utils.stableSort(vals);
    for (int i = 0; i < vals.length; i++) {
        newVector.addElement(vector.elementAt(sortedIndices[i]));
    }
    vector = newVector;

    // Compute rank of phrases. Check for subphrases that are ranked
    // lower than superphrases and assign probability -1 and set the
    // rank to Integer.MAX_VALUE
    int rank = 1;
    for (int i = 0; i < vals.length; i++) {
        Instance currentInstance = (Instance) vector.elementAt(i);
        // Short cut: if phrase very unlikely make rank very low and
        // continue
        if (Utils.grOrEq(vals[i], 1.0)) {
            currentInstance.setValue(probsAttIndex + 1, Integer.MAX_VALUE);
            continue;
        }

        // Otherwise look for super phrase starting with first phrase
        // in list that has same probability, TFxIDF value, and distance as
        // current phrase. We do this to catch all superphrases
        // that have same probability, TFxIDF value and distance as current
        // phrase.
        int startInd = i;
        while (startInd < vals.length) {
            Instance inst = (Instance) vector.elementAt(startInd);
            if ((inst.value(tfidfAttIndex) != currentInstance.value(tfidfAttIndex))
                    || (inst.value(probsAttIndex) != currentInstance.value(probsAttIndex))
                    || (inst.value(distAttIndex) != currentInstance.value(distAttIndex))) {
                break;
            }
            startInd++;
        }
        currentInstance.setValue(probsAttIndex + 1, rank++);

    }

    return vector;
}

From source file:fk.stardust.localizer.machinelearn.WekaFaultLocalizer.java

License:Open Source License

@Override
public Ranking<T> localize(final ISpectra<T> spectra) {

    // == 1. Create Weka training instance

    final List<INode<T>> nodes = new ArrayList<>(spectra.getNodes());

    // nominal true/false values
    final List<String> tf = new ArrayList<String>();
    tf.add("t");//from ww  w . ja v  a2  s . c  om
    tf.add("f");

    // create an attribute for each component
    final Map<INode<T>, Attribute> attributeMap = new HashMap<INode<T>, Attribute>();
    final ArrayList<Attribute> attributeList = new ArrayList<Attribute>(); // NOCS: Weka needs ArrayList..
    for (final INode<T> node : nodes) {
        final Attribute attribute = new Attribute(node.toString(), tf);
        attributeList.add(attribute);
        attributeMap.put(node, attribute);
    }

    // create class attribute (trace success)
    final Attribute successAttribute = new Attribute("success", tf);
    attributeList.add(successAttribute);

    // create weka training instance
    final Instances trainingSet = new Instances("TraceInfoInstances", attributeList, 1);
    trainingSet.setClassIndex(attributeList.size() - 1);

    // == 2. add traces to training set

    // add an instance for each trace
    for (final ITrace<T> trace : spectra.getTraces()) {
        final Instance instance = new DenseInstance(nodes.size() + 1);
        instance.setDataset(trainingSet);
        for (final INode<T> node : nodes) {
            instance.setValue(attributeMap.get(node), trace.isInvolved(node) ? "t" : "f");
        }
        instance.setValue(successAttribute, trace.isSuccessful() ? "t" : "f");
        trainingSet.add(instance);
    }

    // == 3. use prediction to localize faults

    // build classifier
    try {
        final Classifier classifier = this.buildClassifier(this.classifierName, this.classifierOptions,
                trainingSet);
        final Ranking<T> ranking = new Ranking<>();

        System.out.println("begin classifying");
        int classified = 0;

        final Instance instance = new DenseInstance(nodes.size() + 1);
        instance.setDataset(trainingSet);
        for (final INode<T> node : nodes) {
            instance.setValue(attributeMap.get(node), "f");
        }
        instance.setValue(successAttribute, "f");

        for (final INode<T> node : nodes) {
            classified++;
            if (classified % 1000 == 0) {
                System.out.println(String.format("Classified %d nodes.", classified));
            }

            // contain only the current node in the network
            instance.setValue(attributeMap.get(node), "t");

            // predict with which probability this setup leads to a failing network
            final double[] distribution = classifier.distributionForInstance(instance);
            ranking.rank(node, distribution[1]);

            // reset involvment for node
            instance.setValue(attributeMap.get(node), "f");
        }
        return ranking;
    } catch (final Exception e) { // NOCS: Weka throws only raw exceptions
        throw new RuntimeException(e);
    }
}