Example usage for weka.core Instances add

Introduction

In this page you can find the example usage for weka.core Instances add.

Prototype

@Override
public boolean add(Instance instance)

Source Link

Document

Adds one instance to the end of the set.

Usage

From source file:org.conqat.engine.commons.machine_learning.DataSetCreator.java

License:Apache License

/**
 * Creates a weka instance for the given classification object and the given
 * label and adds it to the given data set.
 *///from  www. j  a v a 2s.com
private Instance createInstance(T classificationObject, LABEL label, Instances dataSet) {
    Instance instance = instanceCreator.createWekaInstance(classificationObject, label);
    dataSet.add(instance);
    instance.setDataset(dataSet);
    return instance;
}

From source file:org.goai.classification.impl.WekaClassifier.java

License:Apache License

/**
 * Converts map to weka data set//from w  w w  . j  a  v  a  2s  . c  o m
 * @param itemClassMap Map<double[], String>
 * @return Instances Weka data set
 */
public Instances convertItemClassMapToInstances(Map<double[], String> itemClassMap) {

    if (itemClassMap.isEmpty()) {
        throw new RuntimeException("Map should have at least one element!");
    }

    //Get first row as example for mapping attributes from sample
    Map.Entry<double[], String> row = itemClassMap.entrySet().iterator().next();

    //Number of attributes without class attribute
    int numOfAttr = row.getKey().length;

    //possible class values
    fillClassValues(itemClassMap);

    //Sample size
    int capacity = itemClassMap.entrySet().size();

    //Create empty Instances data set
    Instances newDataSet = createEmptyInstancesDataSet(numOfAttr, capacity);

    //Set class attribute index
    newDataSet.setClassIndex(numOfAttr);

    //Iterating through sample rows
    for (Map.Entry<double[], String> entry : itemClassMap.entrySet()) {

        //double array of values for particular class as String
        double[] el = entry.getKey();
        String klasa = entry.getValue();

        //Instance of double array for values with class attribute value
        double[] rowValues = new double[numOfAttr + 1];

        //Values copy of common attributs
        for (int i = 0; i < numOfAttr; i++) {
            rowValues[i] = el[i];
        }

        //Double value copy of class attribute
        rowValues[numOfAttr] = classVals.get(klasa);

        //dataRow as instance of DenseInstance class, 1 as instance weight and values of all attributes
        Instance dataRow = new DenseInstance(1, rowValues);
        dataRow.setDataset(newDataSet);
        newDataSet.add(dataRow);
    }

    return newDataSet;
}

From source file:org.hypknowsys.wumprep.WUMprepWrapper.java

License:Open Source License

/**
 * Creates a dummy dataset from the input format, sends it to the script and
 * reads the script output's ARFF information that in turn is used to set
 * <code>this</code>' output format.
 * /*from w w  w  .  j  av a  2s.  c o m*/
 * This mechanism allows a WUMprep script to alter the recordset layout as
 * long as this change is documented by the output ARFF header. For example,
 * the <tt>dnsLookup.pl</tt> script changes the <code>host_ip</code> field
 * to <code>host_dns</code> when performing IP lookups.
 * 
 * @param instanceInfo
 *          The input format.
 * @return Object containing the output instance structure.
 */
public Instances getScriptOutputFormat(Instances instanceInfo) {
    Instances outputFormat = instanceInfo;
    Instances testData = new Instances(instanceInfo);
    Instance testInstance = new Instance(testData.numAttributes());

    testData.delete();
    testInstance.setDataset(testData);

    // Initialize the testInstance's attribute values
    for (int i = 0; i < testInstance.numAttributes(); i++) {
        String aName = testInstance.attribute(i).name();
        if (aName.equals("host_ip"))
            testInstance.setValue(i, "127.0.0.1");
        else if (aName.equals("ts_day"))
            testInstance.setValue(i, "01");
        else if (aName.equals("ts_month"))
            testInstance.setValue(i, "Jan");
        else if (aName.equals("ts_year"))
            testInstance.setValue(i, "2005");
        else if (aName.equals("ts_hour"))
            testInstance.setValue(i, "11");
        else if (aName.equals("ts_minutes"))
            testInstance.setValue(i, "55");
        else if (aName.equals("ts_seconds"))
            testInstance.setValue(i, "00");
        else if (aName.equals("tz"))
            testInstance.setValue(i, "+0200");
        else
            testInstance.setValue(i, aName + "-dummy");
    }

    testData.add(testInstance);

    WUMprepWrapper testWrapper = new WUMprepWrapper(m_scriptName, m_args);
    testWrapper.start();
    testWrapper.push(testData.toString());
    testWrapper.push((Instance) null);

    class ErrorReader extends Thread implements Serializable {
        /**  */
        private static final long serialVersionUID = -488779846603045891L;
        PipedReader m_input = null;

        /**
         * Helper class for reading stderr output from the WUMprep script
         * 
         * @param input The script's wrapper's stderr pipe reader
         */
        ErrorReader(PipedReader input) {
            m_input = input;
            this.start();
        }

        public void run() {
            try {
                while (m_input.read() >= 0)
                    ;
            } catch (IOException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            }
        }
    }

    // read the stderr output
    new ErrorReader(testWrapper.getErrorPipe());

    try {
        // ignore the stderr output
        outputFormat = new org.hypknowsys.wumprep4weka.core.Instances(testWrapper.getOutputPipe());

    } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }

    return outputFormat;
}

From source file:org.iobserve.analysis.behavior.filter.ClusterMerger.java

License:Apache License

@Override
protected void execute(final Map<Integer, List<Pair<Instance, Double>>> clustering) throws Exception {
    /**// ww  w  .jav  a2  s . c  o  m
     * simply pick the first instance of every cluster lookup attributes to build a new
     * instances Object
     */
    Instance instance = clustering.entrySet().iterator().next().getValue().get(0).getElement1();
    final FastVector attributes = new FastVector();
    for (int j = 0; j < instance.numAttributes(); j++) {
        attributes.addElement(instance.attribute(j));
    }

    final Instances result = new Instances("Clustering Result", attributes, clustering.size());

    for (final List<Pair<Instance, Double>> entry : clustering.values()) {
        if (!entry.isEmpty()) {
            instance = entry.get(0).getElement1();
            result.add(instance);
        }
    }

    if (ClusterMerger.LOGGER.isDebugEnabled()) {
        this.printInstances(result);
    }
    this.outputPort.send(result);
}

From source file:org.iobserve.analysis.behavior.karlsruhe.AbstractClustering.java

License:Apache License

/**
 * It transforms the user sessions(userSessions in form of counts of their called operation
 * signatures) to Weka instances that can be used for the clustering.
 *
 * @param countModel// w  w w .  ja  v  a 2 s . c o m
 *            contains the userSessions in form of counts of called operation signatures
 * @param listOfDistinctOperationSignatures
 *            contains the extracted distinct operation signatures of the input
 *            entryCallSequenceModel
 * @return the Weka instances that hold the data that is used for the clustering
 */
protected Instances createInstances(final List<UserSessionAsCountsOfCalls> countModel,
        final List<String> listOfDistinctOperationSignatures) {

    final int numberOfDistinctOperationSignatures = listOfDistinctOperationSignatures.size();
    final FastVector fvWekaAttributes = new FastVector(numberOfDistinctOperationSignatures);

    for (int i = 0; i < numberOfDistinctOperationSignatures; i++) {
        final String attributeName = "Attribute" + i;
        final Attribute attribute = new Attribute(attributeName);
        fvWekaAttributes.addElement(attribute);
    }

    final Instances clusterSet = new Instances("CallCounts", fvWekaAttributes, countModel.size());

    for (final UserSessionAsCountsOfCalls userSession : countModel) {

        int indexOfAttribute = 0;
        final Instance instance = new Instance(numberOfDistinctOperationSignatures);

        for (int row = 0; row < listOfDistinctOperationSignatures.size(); row++) {
            instance.setValue((Attribute) fvWekaAttributes.elementAt(indexOfAttribute),
                    userSession.getAbsoluteCountOfCalls()[row]);
            indexOfAttribute++;
        }

        clusterSet.add(instance);
    }

    return clusterSet;
}

From source file:org.iobserve.analysis.behavior.models.data.BehaviorModelTable.java

License:Apache License

/**
 * create an Instances object for clustering.
 *
 * @return instance//from   w w w.  jav a 2s .co m
 */
public Instances toInstances() {
    final FastVector fastVector = new FastVector();

    // add transitions
    for (int i = 0; i < this.signatures.size(); i++) {
        for (int j = 0; j < this.signatures.size(); j++) {
            if (this.transitions[i][j] > AbstractBehaviorModelTable.TRANSITION_THRESHOLD) {
                final Attribute attribute = new Attribute(
                        AbstractBehaviorModelTable.EDGE_INDICATOR + this.inverseSignatures[i]
                                + AbstractBehaviorModelTable.EDGE_DIVIDER + this.inverseSignatures[j]);

                fastVector.addElement(attribute);

            } else {
                continue;
            }
        }
    }

    // add informations
    this.signatures.values().stream().forEach(pair -> Arrays.stream(pair.getSecond())
            .forEach(callInformation -> fastVector.addElement(new Attribute(
                    AbstractBehaviorModelTable.INFORMATION_INDICATOR + this.inverseSignatures[pair.getFirst()]
                            + AbstractBehaviorModelTable.INFORMATION_DIVIDER
                            + callInformation.getSignature()))));
    // TODO name
    final Instances instances = new Instances("Test", fastVector, 0);
    final Instance instance = this.toInstance();
    instances.add(instance);

    return instances;
}

From source file:org.isep.simizer.example.policy.utils.IterativeSimpleKMeans.java

License:Open Source License

/**
 * Generates a clusterer. Has to initialize all fields of the clusterer that
 * are not being set via options.//from  ww w  .  j a v  a2  s  .  co m
 *
 * @param data set of instances serving as training data
 * @throws Exception if the clusterer has not been generated successfully
 */
public void buildClusterer(Instances data) throws Exception {

    // can clusterer handle the data?
    getCapabilities().testWithFail(data);

    m_Iterations = 0;

    m_ReplaceMissingFilter = new ReplaceMissingValues();
    Instances instances = new Instances(data);

    instances.setClassIndex(-1);
    if (!m_dontReplaceMissing) {
        m_ReplaceMissingFilter.setInputFormat(instances);
        instances = Filter.useFilter(instances, m_ReplaceMissingFilter);
    }

    m_FullMissingCounts = new int[instances.numAttributes()];
    if (m_displayStdDevs) {
        m_FullStdDevs = new double[instances.numAttributes()];
    }
    m_FullNominalCounts = new int[instances.numAttributes()][0];

    m_FullMeansOrMediansOrModes = moveCentroid(0, instances, false);
    for (int i = 0; i < instances.numAttributes(); i++) {
        m_FullMissingCounts[i] = instances.attributeStats(i).missingCount;
        if (instances.attribute(i).isNumeric()) {
            if (m_displayStdDevs) {
                m_FullStdDevs[i] = Math.sqrt(instances.variance(i));
            }
            if (m_FullMissingCounts[i] == instances.numInstances()) {
                m_FullMeansOrMediansOrModes[i] = Double.NaN; // mark missing as mean
            }
        } else {
            m_FullNominalCounts[i] = instances.attributeStats(i).nominalCounts;
            if (m_FullMissingCounts[i] > m_FullNominalCounts[i][Utils.maxIndex(m_FullNominalCounts[i])]) {
                m_FullMeansOrMediansOrModes[i] = -1; // mark missing as most common value
            }
        }
    }

    // Modified to account for already set centroids
    if (m_ClusterCentroids == null) {
        m_ClusterCentroids = new Instances(instances, m_NumClusters);
    }

    int[] clusterAssignments = new int[instances.numInstances()];

    if (m_PreserveOrder) {
        m_Assignments = clusterAssignments;
    }

    m_DistanceFunction.setInstances(instances);

    Random RandomO = new Random(getSeed());
    int instIndex;
    HashMap initC = new HashMap();
    DecisionTableHashKey hk = null;

    Instances initInstances = null;
    if (m_PreserveOrder) {
        initInstances = new Instances(instances);
    } else {
        initInstances = instances;
    }
    // Modified to account for already set centroids
    if (m_ClusterCentroids.numInstances() > 0) {
        initC = this.centersMap;
        for (int i = 0; i < m_NumClusters; i++)
            initInstances.add(m_ClusterCentroids.instance(i));
    } else {
        //part de la fin du Data Set. swappe le centre identifi avec la derniere
        for (int j = initInstances.numInstances() - 1; j >= 0; j--) {
            instIndex = RandomO.nextInt(j + 1);
            hk = new DecisionTableHashKey(initInstances.instance(instIndex), initInstances.numAttributes(),
                    true);
            if (!initC.containsKey(hk)) {
                m_ClusterCentroids.add(initInstances.instance(instIndex));
                initC.put(hk, null);
            }
            initInstances.swap(j, instIndex);

            if (m_ClusterCentroids.numInstances() == m_NumClusters) {
                break;
            }
        }

    }
    m_NumClusters = m_ClusterCentroids.numInstances();

    //removing reference
    initInstances = null;

    int i;
    boolean converged = false;
    int emptyClusterCount;
    Instances[] tempI = new Instances[m_NumClusters];
    m_squaredErrors = new double[m_NumClusters];
    m_ClusterNominalCounts = new int[m_NumClusters][instances.numAttributes()][0];
    m_ClusterMissingCounts = new int[m_NumClusters][instances.numAttributes()];

    while (!converged) {
        emptyClusterCount = 0;
        m_Iterations++;
        converged = true;
        for (i = 0; i < instances.numInstances(); i++) {
            Instance toCluster = instances.instance(i);
            int newC = clusterProcessedInstance(toCluster, true);
            if (newC != clusterAssignments[i]) {
                converged = false;
            }
            clusterAssignments[i] = newC;
        }

        // update centroids
        m_ClusterCentroids = new Instances(instances, m_NumClusters);
        for (i = 0; i < m_NumClusters; i++) {
            tempI[i] = new Instances(instances, 0);
        }
        for (i = 0; i < instances.numInstances(); i++) {
            tempI[clusterAssignments[i]].add(instances.instance(i));
        }
        for (i = 0; i < m_NumClusters; i++) {
            if (tempI[i].numInstances() == 0) {
                // empty cluster
                emptyClusterCount++;
            } else {
                moveCentroid(i, tempI[i], true);
            }
        }

        if (m_Iterations == m_MaxIterations) {
            converged = true;
        }

        if (emptyClusterCount > 0) {
            m_NumClusters -= emptyClusterCount;
            if (converged) {
                Instances[] t = new Instances[m_NumClusters];
                int index = 0;
                for (int k = 0; k < tempI.length; k++) {
                    if (tempI[k].numInstances() > 0) {
                        t[index++] = tempI[k];
                    }
                }
                tempI = t;
            } else {
                tempI = new Instances[m_NumClusters];
            }
        }

        if (!converged) {
            m_squaredErrors = new double[m_NumClusters];
            m_ClusterNominalCounts = new int[m_NumClusters][instances.numAttributes()][0];
        }
    }

    if (m_displayStdDevs) {
        m_ClusterStdDevs = new Instances(instances, m_NumClusters);
    }
    m_ClusterSizes = new int[m_NumClusters];
    for (i = 0; i < m_NumClusters; i++) {
        if (m_displayStdDevs) {
            double[] vals2 = new double[instances.numAttributes()];
            for (int j = 0; j < instances.numAttributes(); j++) {
                if (instances.attribute(j).isNumeric()) {
                    vals2[j] = Math.sqrt(tempI[i].variance(j));
                } else {
                    vals2[j] = Instance.missingValue();
                }
            }
            m_ClusterStdDevs.add(new Instance(1.0, vals2));
        }
        m_ClusterSizes[i] = tempI[i].numInstances();
    }
}

From source file:org.knime.knip.suise.node.boundarymodel.contourdata.ContourDataFromClusterSelection.java

License:Open Source License

/**
 * {@inheritDoc}/*  w  ww .j a  v a2  s  .c  o  m*/
 */
@Override
protected void extractContourData(int[] translations, int[] permutation) {
    SimpleKMeans clusterer = new SimpleKMeans();
    try {

        clusterer.setNumClusters(m_numClusters);

        // cluster the data
        ArrayList<Attribute> attInfo = new ArrayList<Attribute>();
        for (int a = 0; a < contourDataGrid().numFeatures(); a++) {
            attInfo.add(new Attribute("att" + a));
        }
        Instances data = new Instances("dataset", attInfo, contourDataGrid().numVectors());
        for (double[] vec : contourDataGrid()) {
            data.add(new DenseInstance(1.0, vec));
        }
        clusterer.buildClusterer(data);

        // create clustered images p(C|x)
        Img[] imgs = new Img[m_numClusters];
        int[] dims = new int[] { contourDataGrid().width(), contourDataGrid().totalLength() };
        Cursor<FloatType>[] cursors = new Cursor[m_numClusters];
        for (int i = 0; i < imgs.length; i++) {
            imgs[i] = new ArrayImgFactory<FloatType>().create(dims, new FloatType());
            cursors[i] = imgs[i].localizingCursor();
        }

        int cluster;
        for (Instance instance : data) {
            for (int i = 0; i < cursors.length; i++) {
                cursors[i].fwd();
            }
            cluster = clusterer.clusterInstance(instance);
            cursors[cluster].get().set(1.0f);
        }

        // greedily select the best cluster combination starting with all
        // clusters together and then removing the one whose removal
        // maximises the score of the remaining clusters
        Img<FloatType> res = imgs[0].factory().create(imgs[0], new FloatType());
        Cursor<FloatType> resC = res.cursor();
        while (resC.hasNext()) {
            resC.fwd();
            resC.get().set(1.0f);
        }
        Img<FloatType> tmp = res.factory().create(res, new FloatType());

        // TODO: normalize img
        // NormalizeIterableInterval<FloatType, Img<FloatType>> imgNorm =
        // new NormalizeIterableInterval<FloatType, Img<FloatType>>();
        double score = 0;
        double bestScore = -Double.MAX_VALUE;
        double globalBestScore = -Double.MAX_VALUE;
        int bestCluster = 0;

        // ShowInSameFrame showInFrame = new ShowInSameFrame();

        for (int i = 0; i < m_numClusters; i++) {
            for (int j = 0; j < m_numClusters; j++) {
                if (imgs[j] != null) {
                    substract(res, imgs[j], tmp);
                    score = calcScore(tmp, m_bias);
                    if (score > bestScore) {
                        bestScore = score;
                        bestCluster = j;
                    }
                }
            }
            substract(res, imgs[bestCluster], res);
            imgs[bestCluster] = null;

            // Pair<FloatType, FloatType> minmax =
            // Operations.compute(new MinMax<FloatType>(), tmp);
            // Operations.<FloatType, FloatType> map(
            // new Normalize<FloatType>(minmax.getA().getRealDouble(),
            // minmax.getB().getRealDouble(),
            // -Float.MAX_VALUE, Float.MAX_VALUE)).compute(
            // tmp, tmp);

            // showInFrame.show(tmp, 2.0);

            if (bestScore < globalBestScore) {
                break;
            }

            globalBestScore = bestScore;
            bestScore = -Double.MAX_VALUE;

        }

        // calculate the translations (mean positions)
        resC = res.localizingCursor();
        double meanPos = 0;
        double num = 0;
        int index = 0;
        while (resC.hasNext()) {
            resC.fwd();

            meanPos += resC.get().get() * resC.getDoublePosition(0);
            num += resC.get().get();
            index++;
            if ((index % res.dimension(0)) == 0) {
                if (num > 0) {
                    translations[(int) ((index - 1) / res.dimension(0))] = (int) Math.round(meanPos / num)
                            - CENTER_COL;
                } else {
                    // setWeight((int)((index - 1) / res.dimension(0)), 0);
                    translations[(int) ((index - 1) / res.dimension(0))] = 0;
                }
                meanPos = 0;
                num = 0;
            }

        }

    } catch (Exception e) {
        // TODO Auto-generated catch block
    }

}

From source file:org.knime.knip.suise.node.boundarymodel.contourdata.IRI.java

License:Open Source License

/**
 * {@inheritDoc}/*from  w w w  .jav a 2s  . c om*/
 */
@Override
public void buildClassifier(Instances miData) throws Exception {

    // can classifier handle the data?
    getCapabilities().testWithFail(miData);

    final Instances tmpMiData = new Instances(miData);
    final Instances flatData = toSingleInstanceDataset(miData, null);

    int numPosBags = 0;
    for (Instance bag : miData) {
        if (bag.value(2) == 1) {
            numPosBags++;
        }
    }

    int remainingNumPosBags = numPosBags;
    Future<Pair<IntervalRule, Double>>[] futures = new Future[m_numThreads];
    ExecutorService pool = Executors.newFixedThreadPool(m_numThreads);

    while (remainingNumPosBags / (double) numPosBags > 1 - m_coverRate) {

        final int numIterations = ((int) (m_sampleRate * remainingNumPosBags)) / m_numThreads + 1;

        for (int t = 0; t < m_numThreads; t++) {
            futures[t] = pool.submit(new Callable<Pair<IntervalRule, Double>>() {
                @Override
                public Pair<IntervalRule, Double> call() throws Exception {
                    return createRule(flatData, tmpMiData, numIterations);
                }
            });
        }

        // select the best rule from the threads
        double score = -Double.MAX_VALUE;
        IntervalRule rule = null;
        for (int f = 0; f < futures.length; f++) {
            if (futures[f].get().getB() > score) {
                score = futures[f].get().getB();
                rule = futures[f].get().getA();
            }
        }

        m_rules.add(rule);

        // only keep the bags whose instances are not covered by this rule
        Instances tmp = new Instances(tmpMiData);
        tmpMiData.clear();
        boolean covered;
        remainingNumPosBags = 0;
        for (Instance bag : tmp) {
            covered = false;
            for (Instance inst : bag.relationalValue(1)) {
                double[] distr;
                distr = rule.distributionForInstance(inst);
                if (distr[1] > distr[0]) {
                    covered = true;
                    break;
                }
            }
            if (!covered) {
                tmpMiData.add(bag);
                if (bag.value(2) == 1) {
                    remainingNumPosBags++;
                }
            }
        }
        flatData.clear();
        toSingleInstanceDataset(tmpMiData, flatData);
    }

    pool.shutdown();

}

From source file:org.knime.knip.suise.node.boundarymodel.contourdata.IRI.java

License:Open Source License

private Instances toSingleInstanceDataset(Instances miData, Instances flatData) throws Exception {
    MultiInstanceToPropositional convertToProp = new MultiInstanceToPropositional();

    convertToProp.setInputFormat(miData);

    for (int i = 0; i < miData.numInstances(); i++) {
        convertToProp.input(miData.instance(i));
    }/*  w  w  w.j a va2 s  . c  o  m*/
    convertToProp.batchFinished();

    if (flatData == null) {
        flatData = convertToProp.getOutputFormat();
        flatData.deleteAttributeAt(0); // remove the bag index attribute

    }

    Instance processed;
    while ((processed = convertToProp.output()) != null) {
        processed.setDataset(null);
        processed.deleteAttributeAt(0); // remove the bag index attribute
        flatData.add(processed);
    }

    // remove class attribute
    // flatData.setClassIndex(-1);
    // flatData.deleteAttributeAt(flatData.numAttributes() - 1);

    // set weights
    int instanceIdx = 0;
    for (Instance bag : miData) {
        for (Instance instance : bag.relationalValue(1)) {
            flatData.get(instanceIdx).setWeight(instance.weight());
            instanceIdx++;
        }
    }
    return flatData;
}