Example usage for weka.core Instance setDataset

List of usage examples for weka.core Instance setDataset

Introduction

In this page you can find the example usage for weka.core Instance setDataset.

Prototype

public void setDataset(Instances instances);

Source Link

Document

Sets the reference to the dataset.

Usage

From source file:en_deep.mlprocess.manipulation.featmodif.ReplaceMissing.java

License:Open Source License

/**
 * Convert a single instance over if the class is nominal. The converted
 * instance is added to the end of the output queue.
 *
 * @param instance the instance to convert
 */// w  w w.j ava 2 s. com
private void convertInstance(Instance instance) {

    // create a copy of the input instance
    Instance inst = null;

    if (instance instanceof SparseInstance) {
        inst = new SparseInstance(instance.weight(), instance.toDoubleArray());
    } else {
        inst = new DenseInstance(instance.weight(), instance.toDoubleArray());
    }

    // copy the string values from this instance as well (only the existing ones)
    inst.setDataset(getOutputFormat());
    copyValues(inst, false, instance.dataset(), getOutputFormat()); // beware of weird behavior of this function (see source)!!
    inst.setDataset(getOutputFormat());

    // find the missing values to be filled + the double values for the new "missing" label and store it
    double[] vals = instance.toDoubleArray();

    for (int j = 0; j < getInputFormat().numAttributes(); j++) {

        Attribute att = instance.attribute(j);

        if (m_Columns.isInRange(j) && instance.isMissing(j)) {
            // find the "missing" value in the output nominal attribute
            if (att.isNominal()) {
                vals[j] = inst.dataset().attribute(j).indexOfValue(m_ReplVal);
            }
            // add a string value for the new "missing" label
            else if (att.isString()) {
                vals[j] = inst.dataset().attribute(j).addStringValue(m_ReplVal);
            }
        }
    }

    // fill in the missing values found
    inst.replaceMissingValues(vals);

    push(inst);
}

From source file:en_deep.mlprocess.manipulation.SetAwareNominalToBinary.java

License:Open Source License

/**
 * Convert a single instance over if the class is nominal. The converted
 * instance is added to the end of the output queue.
 *
 * @param instance the instance to convert
 *///from  w w  w .  ja  va2 s.  c om
private void convertInstance(Instance instance) {

    double[] vals = new double[outputFormatPeek().numAttributes()];
    int attSoFar = 0;

    for (int j = 0; j < getInputFormat().numAttributes(); j++) {
        Attribute att = getInputFormat().attribute(j);
        if (!att.isNominal() || (j == getInputFormat().classIndex()) || !m_Columns.isInRange(j)) {
            vals[attSoFar] = instance.value(j);
            attSoFar++;
        } else {
            if ((att.numValues() <= 2) && (!m_TransformAll)) {
                vals[attSoFar] = instance.value(j);
                attSoFar++;
            } else {
                attSoFar += setConvertedAttribute(att, instance.value(j), vals, attSoFar);
            }
        }
    }
    Instance inst = null;
    if (instance instanceof SparseInstance) {
        inst = new SparseInstance(instance.weight(), vals);
    } else {
        inst = new DenseInstance(instance.weight(), vals);
    }
    inst.setDataset(getOutputFormat());
    copyValues(inst, false, instance.dataset(), getOutputFormat());
    inst.setDataset(getOutputFormat());
    push(inst);
}

From source file:es.jarias.FMC.ClassCompoundTransformation.java

License:Open Source License

/**
 * /*from  w w  w  .  ja v  a2 s .c  om*/
 * @param instance
 * @param labelIndices
 * @return tranformed instance
 * @throws Exception
 */
public Instance transformInstance(Instance instance, int[] labelIndices) throws Exception {
    Instance transformedInstance = RemoveAllLabels.transformInstance(instance, labelIndices);
    transformedInstance.setDataset(null);
    transformedInstance.insertAttributeAt(transformedInstance.numAttributes());
    transformedInstance.setDataset(transformedFormat);
    return transformedInstance;
}

From source file:es.ubu.XRayDetector.modelo.ventana.VentanaAbstracta.java

License:Open Source License

/**
 * Method that creates an instance which can be classified by Weka.
 * @return Instance with the features values
 *///from ww  w . j  a  v a 2 s. c  o m
protected Instance crearInstancia() {
    double newVals[] = new double[407];
    int count = 0;

    if (ftStandard != null) {
        for (int i = 0; i < ftStandard.getVectorResultados().length; i++) {
            newVals[count] = ftStandard.getVectorResultados()[i];
            count++;
        }
    }

    if (ftStandardSaliency != null) {
        for (int i = 0; i < ftStandardSaliency.getVectorResultados().length; i++) {
            newVals[count] = ftStandardSaliency.getVectorResultados()[i];
            count++;
        }
    }

    if (meanVector != null) {
        for (int i = 0; i < meanVector.length; i++) {
            newVals[count] = meanVector[i];
            count++;
        }
    }

    if (rangeVector != null) {
        for (int i = 0; i < rangeVector.length; i++) {
            newVals[count] = rangeVector[i];
            count++;
        }
    }

    if (meanVectorSaliency != null) {
        for (int i = 0; i < meanVectorSaliency.length; i++) {
            newVals[count] = meanVectorSaliency[i];
            count++;
        }
    }

    if (rangeVectorSaliency != null) {
        for (int i = 0; i < rangeVectorSaliency.length; i++) {
            newVals[count] = rangeVectorSaliency[i];
            count++;
        }
    }

    if (ftLbp != null) {
        for (int i = 0; i < ftLbp.getVectorResultados().length; i++) {
            newVals[count] = ftLbp.getVectorResultados()[i];
            count++;
        }
    }

    if (ftLbpSaliency != null) {
        for (int i = 0; i < ftLbpSaliency.getVectorResultados().length; i++) {
            newVals[count] = ftLbpSaliency.getVectorResultados()[i];
            count++;
        }
    }
    // newVals es el vector de doubles donde tienes los datos de las medias etc.
    Instance instance = new DenseInstance(1, newVals);
    List<String> feat;
    if (prop.getTipoCaracteristicas() == 0) { //todas
        feat = null;
    } else { //mejores
        feat = obtainFeatures();
    }
    instance.setDataset(getHeader(feat));
    return instance;
}

From source file:eyetracker.ServerCommunicator.java

public Instance getInput() {
    // For all the attribute, initialize them.
    int totalAttribute = MLPProcessor.inst.firstInstance().numAttributes();
    Instance instance = new SparseInstance(totalAttribute);
    instance.setDataset(MLPProcessor.inst);
    String[] attributes = unifiedData.split(",");
    //String[] attributes = examData.split(",");
    for (int i = 0; i < totalAttribute - 1; i++) {
        instance.setValue(i, Double.valueOf(attributes[i]));
    }/*from   w w  w  . j  a  v  a2 s  .co  m*/
    return instance;
}

From source file:fantail.algorithms.RankingViaRegression.java

License:Open Source License

@Override
public double[] recommendRanking(Instance metaInst) throws Exception {

    double[] values = new double[m_NumFeatures + 1];
    for (int i = 0; i < values.length - 1; i++) {
        values[i] = metaInst.value(i);/* w  ww.j a va  2s.  c  o m*/
    }
    values[values.length - 1] = 0;

    Instance inst = new DenseInstance(1.0, values);
    inst.setDataset(m_TempHeader);

    //
    double[] preds = new double[m_NumTargets];

    for (int t = 0; t < m_NumTargets; t++) {

        double pred = m_Classifiers[t].classifyInstance(inst);
        if (pred <= 0) {
            pred = 0;
        }
        if (pred >= m_NumTargets) {
            pred = m_NumTargets;
        }
        preds[t] = pred;
    }

    return Tools.doubleArrayToRanking(preds);
}

From source file:faster_pca.faster_pca.java

License:Open Source License

/**
 * Signify that this batch of input to the filter is finished.
 * //www  .  jav  a 2 s .  co m
 * @return true if there are instances pending output
 * @throws NullPointerException if no input structure has been defined,
 * @throws Exception if there was a problem finishing the batch.
 */
@Override
public boolean batchFinished() throws Exception {
    int i;
    Instances insts;
    Instance inst;

    if (getInputFormat() == null) {
        throw new NullPointerException("No input instance format defined");
    }

    insts = getInputFormat();

    if (!isFirstBatchDone()) {
        setup(insts);
    }

    Iterator<Instance> iter = insts.iterator();

    while (iter.hasNext()) {
        inst = convertInstance(iter.next());
        inst.setDataset(getOutputFormat());
        push(inst);
    }

    flushInput();
    m_NewBatch = true;
    m_FirstBatchDone = true;

    return (numPendingOutput() != 0);
}

From source file:fcul.viegas.ml.learners.NetworkStreamLearningClassifierMapFunction.java

public InstanceStreamDTO map(InstanceStreamDTO instance) throws Exception {

    weka.core.Instance inst = instance.getInstance();
    inst.setDataset(this.coreInstances);
    inst.setClassValue(inst.classValue());
    inst = classifier.constructMappedInstance(inst);

    HoeffdingTree tree = (HoeffdingTree) classifier.getClassifier();
    double[] classe = tree.distributionForInstance(inst);
    instance.setInstance(null);//w  w w  . ja v a 2 s .  c  om
    //System.out.println("\t classe[0]: " + classe[0] + " classe[1]: " + classe[1]);
    if (classe[0] > classe[1]) {
        instance.setAssignedClassValueFromLearner(0.0d);
    } else {
        instance.setAssignedClassValueFromLearner(1.0d);
    }
    return instance;
}

From source file:filters.MauiFilter.java

License:Open Source License

/**
 * Converts an instance./*w  ww.  ja  v  a 2s  .  co m*/
 */
private FastVector convertInstance(Instance instance, boolean training) throws Exception {

    FastVector vector = new FastVector();

    String fileName = instance.stringValue(fileNameAtt);

    if (debugMode) {
        System.err.println("-- Converting instance for document " + fileName);
    }

    // Get the key phrases for the document
    HashMap<String, Counter> hashKeyphrases = null;

    if (!instance.isMissing(keyphrasesAtt)) {
        String keyphrases = instance.stringValue(keyphrasesAtt);
        hashKeyphrases = getGivenKeyphrases(keyphrases);
    }

    // Get the document text
    String documentText = instance.stringValue(documentAtt);

    // Compute the candidate topics
    HashMap<String, Candidate> candidateList;
    if (allCandidates != null && allCandidates.containsKey(instance)) {
        candidateList = allCandidates.get(instance);
    } else {
        candidateList = getCandidates(documentText);
    }
    if (debugMode) {
        System.err.println(candidateList.size() + " candidates ");
    }

    // Set indices for key attributes
    int tfidfAttIndex = documentAtt + 2;
    int distAttIndex = documentAtt + 3;
    int probsAttIndex = documentAtt + numFeatures;

    int countPos = 0;
    int countNeg = 0;

    // Go through the phrases and convert them into instances
    for (Candidate candidate : candidateList.values()) {

        if (candidate.getFrequency() < minOccurFrequency) {
            continue;
        }

        String name = candidate.getName();
        String orig = candidate.getBestFullForm();
        if (!vocabularyName.equals("none")) {
            orig = candidate.getTitle();
        }

        double[] vals = computeFeatureValues(candidate, training, hashKeyphrases, candidateList);

        Instance inst = new Instance(instance.weight(), vals);

        inst.setDataset(classifierData);

        // Get probability of a phrase being key phrase
        double[] probs = classifier.distributionForInstance(inst);

        double prob = probs[0];
        if (nominalClassValue) {
            prob = probs[1];
        }

        // Compute attribute values for final instance
        double[] newInst = new double[instance.numAttributes() + numFeatures + 2];

        int pos = 0;
        for (int i = 1; i < instance.numAttributes(); i++) {

            if (i == documentAtt) {

                // output of values for a given phrase:

                // Add phrase
                int index = outputFormatPeek().attribute(pos).addStringValue(name);
                newInst[pos++] = index;

                // Add original version
                if (orig != null) {
                    index = outputFormatPeek().attribute(pos).addStringValue(orig);
                } else {
                    index = outputFormatPeek().attribute(pos).addStringValue(name);
                }

                newInst[pos++] = index;

                // Add features
                newInst[pos++] = inst.value(tfIndex);
                newInst[pos++] = inst.value(idfIndex);
                newInst[pos++] = inst.value(tfidfIndex);
                newInst[pos++] = inst.value(firstOccurIndex);
                newInst[pos++] = inst.value(lastOccurIndex);
                newInst[pos++] = inst.value(spreadOccurIndex);
                newInst[pos++] = inst.value(domainKeyphIndex);
                newInst[pos++] = inst.value(lengthIndex);
                newInst[pos++] = inst.value(generalityIndex);
                newInst[pos++] = inst.value(nodeDegreeIndex);
                newInst[pos++] = inst.value(semRelIndex);
                newInst[pos++] = inst.value(wikipKeyphrIndex);
                newInst[pos++] = inst.value(invWikipFreqIndex);
                newInst[pos++] = inst.value(totalWikipKeyphrIndex);

                // Add probability
                probsAttIndex = pos;
                newInst[pos++] = prob;

                // Set rank to missing (computed below)
                newInst[pos++] = Instance.missingValue();

            } else if (i == keyphrasesAtt) {
                newInst[pos++] = inst.classValue();
            } else {
                newInst[pos++] = instance.value(i);
            }
        }

        Instance ins = new Instance(instance.weight(), newInst);
        ins.setDataset(outputFormatPeek());
        vector.addElement(ins);

        if (inst.classValue() == 0) {
            countNeg++;
        } else {
            countPos++;
        }
    }
    if (debugMode) {
        System.err.println(countPos + " positive; " + countNeg + " negative instances");
    }

    // Sort phrases according to their distance (stable sort)
    double[] vals = new double[vector.size()];
    for (int i = 0; i < vals.length; i++) {
        vals[i] = ((Instance) vector.elementAt(i)).value(distAttIndex);
    }
    FastVector newVector = new FastVector(vector.size());
    int[] sortedIndices = Utils.stableSort(vals);
    for (int i = 0; i < vals.length; i++) {
        newVector.addElement(vector.elementAt(sortedIndices[i]));
    }
    vector = newVector;

    // Sort phrases according to their tfxidf value (stable sort)
    for (int i = 0; i < vals.length; i++) {
        vals[i] = -((Instance) vector.elementAt(i)).value(tfidfAttIndex);
    }
    newVector = new FastVector(vector.size());
    sortedIndices = Utils.stableSort(vals);
    for (int i = 0; i < vals.length; i++) {
        newVector.addElement(vector.elementAt(sortedIndices[i]));
    }
    vector = newVector;

    // Sort phrases according to their probability (stable sort)
    for (int i = 0; i < vals.length; i++) {
        vals[i] = 1 - ((Instance) vector.elementAt(i)).value(probsAttIndex);
    }
    newVector = new FastVector(vector.size());
    sortedIndices = Utils.stableSort(vals);
    for (int i = 0; i < vals.length; i++) {
        newVector.addElement(vector.elementAt(sortedIndices[i]));
    }
    vector = newVector;

    // Compute rank of phrases. Check for subphrases that are ranked
    // lower than superphrases and assign probability -1 and set the
    // rank to Integer.MAX_VALUE
    int rank = 1;
    for (int i = 0; i < vals.length; i++) {
        Instance currentInstance = (Instance) vector.elementAt(i);
        // Short cut: if phrase very unlikely make rank very low and
        // continue
        if (Utils.grOrEq(vals[i], 1.0)) {
            currentInstance.setValue(probsAttIndex + 1, Integer.MAX_VALUE);
            continue;
        }

        // Otherwise look for super phrase starting with first phrase
        // in list that has same probability, TFxIDF value, and distance as
        // current phrase. We do this to catch all superphrases
        // that have same probability, TFxIDF value and distance as current
        // phrase.
        int startInd = i;
        while (startInd < vals.length) {
            Instance inst = (Instance) vector.elementAt(startInd);
            if ((inst.value(tfidfAttIndex) != currentInstance.value(tfidfAttIndex))
                    || (inst.value(probsAttIndex) != currentInstance.value(probsAttIndex))
                    || (inst.value(distAttIndex) != currentInstance.value(distAttIndex))) {
                break;
            }
            startInd++;
        }
        currentInstance.setValue(probsAttIndex + 1, rank++);

    }

    return vector;
}

From source file:fk.stardust.localizer.machinelearn.WekaFaultLocalizer.java

License:Open Source License

@Override
public Ranking<T> localize(final ISpectra<T> spectra) {

    // == 1. Create Weka training instance

    final List<INode<T>> nodes = new ArrayList<>(spectra.getNodes());

    // nominal true/false values
    final List<String> tf = new ArrayList<String>();
    tf.add("t");//from ww  w . ja v  a2  s . c  om
    tf.add("f");

    // create an attribute for each component
    final Map<INode<T>, Attribute> attributeMap = new HashMap<INode<T>, Attribute>();
    final ArrayList<Attribute> attributeList = new ArrayList<Attribute>(); // NOCS: Weka needs ArrayList..
    for (final INode<T> node : nodes) {
        final Attribute attribute = new Attribute(node.toString(), tf);
        attributeList.add(attribute);
        attributeMap.put(node, attribute);
    }

    // create class attribute (trace success)
    final Attribute successAttribute = new Attribute("success", tf);
    attributeList.add(successAttribute);

    // create weka training instance
    final Instances trainingSet = new Instances("TraceInfoInstances", attributeList, 1);
    trainingSet.setClassIndex(attributeList.size() - 1);

    // == 2. add traces to training set

    // add an instance for each trace
    for (final ITrace<T> trace : spectra.getTraces()) {
        final Instance instance = new DenseInstance(nodes.size() + 1);
        instance.setDataset(trainingSet);
        for (final INode<T> node : nodes) {
            instance.setValue(attributeMap.get(node), trace.isInvolved(node) ? "t" : "f");
        }
        instance.setValue(successAttribute, trace.isSuccessful() ? "t" : "f");
        trainingSet.add(instance);
    }

    // == 3. use prediction to localize faults

    // build classifier
    try {
        final Classifier classifier = this.buildClassifier(this.classifierName, this.classifierOptions,
                trainingSet);
        final Ranking<T> ranking = new Ranking<>();

        System.out.println("begin classifying");
        int classified = 0;

        final Instance instance = new DenseInstance(nodes.size() + 1);
        instance.setDataset(trainingSet);
        for (final INode<T> node : nodes) {
            instance.setValue(attributeMap.get(node), "f");
        }
        instance.setValue(successAttribute, "f");

        for (final INode<T> node : nodes) {
            classified++;
            if (classified % 1000 == 0) {
                System.out.println(String.format("Classified %d nodes.", classified));
            }

            // contain only the current node in the network
            instance.setValue(attributeMap.get(node), "t");

            // predict with which probability this setup leads to a failing network
            final double[] distribution = classifier.distributionForInstance(instance);
            ranking.rank(node, distribution[1]);

            // reset involvment for node
            instance.setValue(attributeMap.get(node), "f");
        }
        return ranking;
    } catch (final Exception e) { // NOCS: Weka throws only raw exceptions
        throw new RuntimeException(e);
    }
}