Example usage for weka.core Instance setDataset

List of usage examples for weka.core Instance setDataset

Introduction

In this page you can find the example usage for weka.core Instance setDataset.

Prototype

public void setDataset(Instances instances);

Source Link

Document

Sets the reference to the dataset.

Usage

From source file:moa.classifiers.novelClass.AbstractNovelClassClassifier.java

License:Apache License

final public Instance augmentInstance(Instance x) {
    Instance ret = (Instance) x.copy();
    ret.setDataset(augmentInstances(x.dataset()));

    return ret;//  w w  w . j  a  va2s.c  o m
}

From source file:moa.clusterer.FeS2.java

License:Apache License

/**
 * /*from w ww.  j  a v  a 2 s .  c  o m*/
 * @param c cluster that is being compared against
 * @param x real data instance
 * @return DenseInstance made to work with the outlier-detecting perceptron
 */
private Instance makePerceptronInstance(Riffle c, Instance x) {
    Instance pseudoPoint = new DenseInstance(this.outlierPerceptronTrainingSet.numAttributes());
    pseudoPoint.setDataset(outlierPerceptronTrainingSet);
    double p = c.getInclusionProbability(x);
    double r = (c.getRadius() != 0) ? c.getRadius() : 1;
    //double w = c.getWeight();
    double N = Math.min(c.size(), 1.0 / (this.learningRateAlphaOption.getValue() + 1e-9));
    double d = c.getCenterDistance(x);
    double logP = (p == 0) ? 0 : Math.log(p);
    double logDR = (r == 0 || (d / r) == 0) ? 0 : Math.log(d / r);
    pseudoPoint.setValue(0, logP);
    pseudoPoint.setValue(1, logDR);
    pseudoPoint.setValue(2, logDR * logP);
    pseudoPoint.setValue(3,
            logP - Math.log(1.0 / Math.pow(2.0 * N, this.universalCluster.getHeader().numAttributes())));
    pseudoPoint.setClassValue(0);
    pseudoPoint.setWeight(0.0);
    return pseudoPoint;
}

From source file:moa.clusterer.outliers.Sieve.java

License:Apache License

/**
 *
 * @param c cluster that is being compared against
 * @param x real data instance// w  w w.  ja v a2s  . c om
 * @return DenseInstance made to work with the outlier-detecting perceptron
 */
private Instance makePerceptronInstance(Riffle c, Instance x) {
    Instance pseudoPoint = new DenseInstance(this.outlierPerceptronTrainingSet.numAttributes());
    pseudoPoint.setDataset(outlierPerceptronTrainingSet);
    double p = c.getInclusionProbability(x);
    double r = (c.getRadius() != 0) ? c.getRadius() : 1;
    //double w = c.getWeight();
    double N = Math.min(c.size(), this.cacheSizeOption.getValue());
    double d = c.getCenterDistance(x);
    double logP = (p == 0) ? 0 : Math.log(p);
    double logDR = (r == 0 || (d / r) == 0) ? 0 : Math.log(d / r);
    pseudoPoint.setValue(0, logP);
    pseudoPoint.setValue(1, logDR);
    pseudoPoint.setValue(2, logDR * logP);
    pseudoPoint.setValue(3,
            logP - Math.log(1.0 / Math.pow(2.0 * N, this.universalCluster.getHeader().numAttributes())));
    pseudoPoint.setClassValue(0);
    pseudoPoint.setWeight(0.0);
    return pseudoPoint;
}

From source file:moa.clusterers.AmidstClusteringAlgorithm.java

License:Apache License

/**
 * {@inheritDoc}//from  w  ww. ja va2  s . c  om
 */
@Override
public Clustering getClusteringResult() {
    //sourceClustering = new Clustering();

    Instances dataset = getDataset(attributes_.getNumberOfAttributes(), getNumClusters());
    Instances newInstances = new Instances(dataset);

    if (bnModel_ == null) {
        //parameterLearningAlgorithm_.setParallelMode(isParallelMode_());
        parameterLearningAlgorithm_.setDAG(dag);
        ((SVB) parameterLearningAlgorithm_).setWindowsSize(timeWindowOption.getValue());
        parameterLearningAlgorithm_.initLearning();
        parameterLearningAlgorithm_.updateModel(batch_);
    } else {
        parameterLearningAlgorithm_.updateModel(batch_);
    }

    bnModel_ = parameterLearningAlgorithm_.getLearntBayesianNetwork();
    predictions_.setModel(bnModel_);

    for (DataInstance dataInstance : batch_) {
        this.predictions_.setEvidence(dataInstance);
        this.predictions_.runInference();
        Multinomial multinomial = this.predictions_.getPosterior(clusterVar_);

        double[] results = multinomial.getProbabilities();

        int cnum = IntStream.rangeClosed(0, getNumClusters() - 1)
                .reduce((a, b) -> (results[a] > results[b]) ? a : b).getAsInt();

        double[] attValues = dataInstance.toArray();
        Instance newInst = new DenseInstance(1.0, attValues);
        newInst.insertAttributeAt(attributes_.getNumberOfAttributes());
        newInst.setDataset(dataset);
        newInst.setClassValue(cnum);
        newInstances.add(newInst);
    }
    clustering = new Clustering(newInstances);

    return clustering;
}

From source file:moa.core.utils.Converter.java

License:Open Source License

public Instance formatInstance(Instance original) {

    //Copy the original instance
    Instance converted = (Instance) original.copy();
    converted.setDataset(null);

    //Delete all class attributes
    for (int j = 0; j < m_L; j++) {
        converted.deleteAttributeAt(0);//w w w  .j  ava2 s  . c om
    }

    //Add one of those class attributes at the begginning
    converted.insertAttributeAt(0);

    //Hopefully setting the dataset will configure that attribute properly
    converted.setDataset(m_InstancesTemplate);

    return converted;

}

From source file:moa.gui.BatchCmd.java

License:Apache License

public void run() {
    ArrayList<DataPoint> pointBuffer0 = new ArrayList<DataPoint>();
    int m_timestamp = 0;
    int decayHorizon = stream.getDecayHorizon();

    double decay_threshold = stream.getDecayThreshold();
    double decay_rate = (-1 * Math.log(decay_threshold) / decayHorizon);

    int counter = decayHorizon;

    while (m_timestamp < totalInstances && stream.hasMoreInstances()) {
        m_timestamp++;/* w  ww.j a  va2  s  . c o m*/
        counter--;
        Instance next = stream.nextInstance();
        DataPoint point0 = new DataPoint(next, m_timestamp);
        pointBuffer0.add(point0);

        Instance traininst0 = new DenseInstance(point0);
        if (clusterer instanceof ClusterGenerator)
            traininst0.setDataset(point0.dataset());
        else
            traininst0.deleteAttributeAt(point0.classIndex());

        clusterer.trainOnInstanceImpl(traininst0);

        if (counter <= 0) {
            //                if(m_timestamp%(totalInstances/10) == 0)
            //                    System.out.println("Thread"+threadID+":"+(m_timestamp*100/totalInstances)+"% ");
            for (DataPoint p : pointBuffer0)
                p.updateWeight(m_timestamp, decay_rate);

            Clustering gtClustering0;
            Clustering clustering0 = null;

            gtClustering0 = new Clustering(pointBuffer0);
            if (useMicroGT && stream instanceof RandomRBFGeneratorEvents) {
                gtClustering0 = ((RandomRBFGeneratorEvents) stream).getMicroClustering();
            }

            clustering0 = clusterer.getClusteringResult();
            if (clusterer.implementsMicroClusterer()) {
                if (clusterer instanceof ClusterGenerator && stream instanceof RandomRBFGeneratorEvents) {
                    ((ClusterGenerator) clusterer)
                            .setSourceClustering(((RandomRBFGeneratorEvents) stream).getMicroClustering());
                }
                Clustering microC = clusterer.getMicroClusteringResult();
                if (clusterer.evaluateMicroClusteringOption.isSet()) {
                    clustering0 = microC;
                } else {
                    if (clustering0 == null && microC != null)
                        clustering0 = moa.clusterers.KMeans.gaussianMeans(gtClustering0, microC);
                }
            }

            //evaluate
            for (int i = 0; i < measures.length; i++) {
                try {
                    /*double sec =*/ measures[i].evaluateClusteringPerformance(clustering0, gtClustering0,
                            pointBuffer0);
                    //System.out.println("Eval of "+measures[i].getClass().getSimpleName()+" at "+m_timestamp+" took "+sec);
                } catch (Exception ex) {
                    ex.printStackTrace();
                }
            }

            pointBuffer0.clear();
            counter = decayHorizon;
        }
    }
}

From source file:moa.streams.clustering.FileStream.java

License:Apache License

protected boolean readNextInstanceFromFile() {
    try {//w  ww. j  a  v  a  2 s.com

        if (this.instances.readInstance(this.fileReader)) {
            Instance rawInstance = this.instances.instance(0);

            //remove dataset from instance so we can delete attributes
            rawInstance.setDataset(null);
            for (int i = removeAttributes.length - 1; i >= 0; i--) {
                rawInstance.deleteAttributeAt(removeAttributes[i]);
            }
            //set adjusted dataset for instance
            rawInstance.setDataset(filteredDataset);

            if (normalizeOption.isSet() && valuesMinMaxDiff != null) {
                for (int i = 0; i < rawInstance.numAttributes(); i++) {
                    if (valuesMinMaxDiff.get(i)[2] != 1 && // Already normalized
                            valuesMinMaxDiff.get(i)[2] != 0 && // Max. value is 0 (unable to be normalized)
                            i != rawInstance.classIndex()) { // Class label is not subject to be normalized
                        double v = rawInstance.value(i);
                        v = (v - valuesMinMaxDiff.get(i)[0]) / valuesMinMaxDiff.get(i)[2];
                        rawInstance.setValue(i, v);
                    }
                }
            }

            this.lastInstanceRead = rawInstance;
            this.instances.delete(); // keep instances clean
            this.numInstancesRead++;
            return true;
        }
        if (this.fileReader != null) {
            this.fileReader.close();
            this.fileReader = null;
        }
        return false;
    } catch (IOException ioe) {
        throw new RuntimeException("ArffFileStream failed to read instance from stream.", ioe);
    }
}

From source file:moa.streams.clustering.RandomRBFGeneratorEvents.java

License:Apache License

public Instance nextInstance() {
    numGeneratedInstances++;/*  www  .j  a v  a2s . co  m*/
    eventScheduler();

    //make room for the classlabel
    double[] values_new = new double[numAttsOption.getValue() + 1];
    double[] values = null;
    int clusterChoice = -1;

    if (instanceRandom.nextDouble() > noiseLevelOption.getValue()) {
        clusterChoice = chooseWeightedElement();
        values = kernels.get(clusterChoice).generator.sample(instanceRandom).toDoubleArray();
    } else {
        //get ranodm noise point
        values = getNoisePoint();
    }

    if (Double.isNaN(values[0])) {
        System.out.println("Instance corrupted:" + numGeneratedInstances);
    }
    System.arraycopy(values, 0, values_new, 0, values.length);

    Instance inst = new DenseInstance(1.0, values_new);
    inst.setDataset(getHeader());
    if (clusterChoice == -1) {
        // 2013/06/02 (Yunsu Kim)
        // Noise instance has the last class value instead of "-1"
        // Preventing ArrayIndexOutOfBoundsException in WriteStreamToARFFFile
        inst.setClassValue(numClusterOption.getValue());
    } else {
        inst.setClassValue(kernels.get(clusterChoice).generator.getId());
        //Do we need micro cluster representation if have overlapping clusters?
        //if(!overlappingOption.isSet())
        kernels.get(clusterChoice).addInstance(inst);
    }
    //        System.out.println(numGeneratedInstances+": Overlap is"+updateOverlaps());

    return inst;
}

From source file:moa.streams.ConceptDriftRealStream.java

License:Open Source License

@Override
public Instance nextInstance() {
    numberInstanceStream++;/*from   w w  w  .ja v a 2 s. c om*/
    double numclass = 0.0;
    double x = -4.0 * (double) (numberInstanceStream - this.positionOption.getValue())
            / (double) this.widthOption.getValue();
    double probabilityDrift = 1.0 / (1.0 + Math.exp(x));
    if (this.random.nextDouble() > probabilityDrift) {
        if (this.inputStream.hasMoreInstances() == false) {
            this.inputStream.restart();
        }
        this.inputInstance = this.inputStream.nextInstance();
        numclass = this.inputInstance.classValue();
    } else {
        if (this.driftStream.hasMoreInstances() == false) {
            this.driftStream.restart();
        }
        this.driftInstance = this.driftStream.nextInstance();
        numclass = this.driftInstance.classValue();
    }
    int m = 0;
    double[] newVals = new double[this.inputInstance.numAttributes() + this.driftInstance.numAttributes() - 1];
    for (int j = 0; j < this.inputInstance.numAttributes() - 1; j++, m++) {
        newVals[m] = this.inputInstance.value(j);
    }
    for (int j = 0; j < this.driftInstance.numAttributes() - 1; j++, m++) {
        newVals[m] = this.driftInstance.value(j);
    }
    newVals[m] = numclass;
    //return new Instance(1.0, newVals);
    Instance inst = new DenseInstance(1.0, newVals);
    inst.setDataset(this.getHeader());
    inst.setClassValue(numclass);
    return inst;

}

From source file:moa.streams.generators.AgrawalGenerator.java

License:Open Source License

@Override
public Instance nextInstance() {
    double salary = 0, commission = 0, hvalue = 0, loan = 0;
    int age = 0, elevel = 0, car = 0, zipcode = 0, hyears = 0, group = 0;
    boolean desiredClassFound = false;
    while (!desiredClassFound) {
        // generate attributes
        salary = 20000.0 + 130000.0 * this.instanceRandom.nextDouble();
        commission = (salary >= 75000.0) ? 0 : (10000.0 + 65000.0 * this.instanceRandom.nextDouble());
        // true to c implementation:
        // if (instanceRandom.nextDouble() < 0.5 && salary < 75000.0)
        // commission = 10000.0 + 65000.0 * instanceRandom.nextDouble();
        age = 20 + this.instanceRandom.nextInt(61);
        elevel = this.instanceRandom.nextInt(5);
        car = this.instanceRandom.nextInt(20);
        zipcode = this.instanceRandom.nextInt(9);
        hvalue = (9.0 - zipcode) * 100000.0 * (0.5 + this.instanceRandom.nextDouble());
        hyears = 1 + this.instanceRandom.nextInt(30);
        loan = this.instanceRandom.nextDouble() * 500000.0;
        // determine class
        group = classificationFunctions[this.functionOption.getValue() - 1].determineClass(salary, commission,
                age, elevel, car, zipcode, hvalue, hyears, loan);
        if (!this.balanceClassesOption.isSet()) {
            desiredClassFound = true;/*from w w  w . ja  v  a  2  s  .c  om*/
        } else {
            // balance the classes
            if ((this.nextClassShouldBeZero && (group == 0)) || (!this.nextClassShouldBeZero && (group == 1))) {
                desiredClassFound = true;
                this.nextClassShouldBeZero = !this.nextClassShouldBeZero;
            } // else keep searching
        }
    }
    // perturb values
    if (this.peturbFractionOption.getValue() > 0.0) {
        salary = perturbValue(salary, 20000, 150000);
        if (commission > 0) {
            commission = perturbValue(commission, 10000, 75000);
        }
        age = (int) Math.round(perturbValue(age, 20, 80));
        hvalue = perturbValue(hvalue, (9.0 - zipcode) * 100000.0, 0, 135000);
        hyears = (int) Math.round(perturbValue(hyears, 1, 30));
        loan = perturbValue(loan, 0, 500000);
    }
    // construct instance
    InstancesHeader header = getHeader();
    Instance inst = new DenseInstance(header.numAttributes());
    inst.setValue(0, salary);
    inst.setValue(1, commission);
    inst.setValue(2, age);
    inst.setValue(3, elevel);
    inst.setValue(4, car);
    inst.setValue(5, zipcode);
    inst.setValue(6, hvalue);
    inst.setValue(7, hyears);
    inst.setValue(8, loan);
    inst.setDataset(header);
    inst.setClassValue(group);
    return inst;
}