Example usage for weka.core Instances setClassIndex

List of usage examples for weka.core Instances setClassIndex

Introduction

In this page you can find the example usage for weka.core Instances setClassIndex.

Prototype

public void setClassIndex(int classIndex) 

Source Link

Document

Sets the class index of the set.

Usage

From source file:de.uniheidelberg.cl.swp.mlprocess.AblationTesting.java

License:Apache License

/**
 * Creates an Instance object for the specified List of Features.
 * <br>/*from  w  w w  .j a  v a  2 s . co m*/
 * Extracts the Instance objects from a source file and suppresses all features but the ones 
 * specified.
 * 
 * @param fileName File to the training results in ARFF format.
 * @param features List of {@link AbstractFeatureExtractor}s which are currently being tested.
 * @return Instances object consisting of the desired attribute structure.
 * @throws Exception If the ARFF file couldn't be read, an exception is thrown.
 */
public Instances createInstances(String fileName, List<AbstractFeatureExtractor> features) throws Exception {
    final Instances train = new Instances(new BufferedReader(new FileReader(fileName)));
    ArrayList<Attribute> newAttributes = new ArrayList<Attribute>();

    for (int i = 0; i < train.numAttributes(); i++) {
        for (AbstractFeatureExtractor feature : features) {
            if (train.attribute(i).name().equals(feature.getName())) {
                newAttributes.add(train.attribute(i));

                continue;
            }
        }
    }

    /* 
     * add the last two features (ACR-System + correct/false predictions) as those 
     * are no features gathered by a FeatureExtractor.
     */
    newAttributes.add(train.attribute(train.numAttributes() - 2));
    newAttributes.add(train.attribute(train.numAttributes() - 1));
    Instances trainCopy = copyInstances(train, newAttributes);
    trainCopy.setClassIndex(trainCopy.numAttributes() - 1);

    return trainCopy;
}

From source file:de.uniheidelberg.cl.swp.mlprocess.WEKARunner.java

License:Apache License

/**
 * Evaluates our classifier with a test set.
 * <br>/*w w  w.  j  a  va 2 s. c  om*/
 * Not used yet.
 *
 * @param testArff ARFF file to evaluate against.
 * @throws If the evaluation couldn't be initialized.
 */
public void buildEvaluation(String testArff) throws Exception {
    Instances evalIns = new Instances(new BufferedReader(new FileReader(testArff)));
    evalIns.setClassIndex(evalIns.numAttributes() - 1);
    evaluation = new Evaluation(train);
}

From source file:de.unimannheim.dws.algorithms.CustomSimpleKMedian.java

License:Open Source License

/**
 * Generates a clusterer. Has to initialize all fields of the clusterer that
 * are not being set via options./*from w w  w  . j a v a 2  s  .c  o  m*/
 * 
 * @param data set of instances serving as training data
 * @throws Exception if the clusterer has not been generated successfully
 */
@Override
public void buildClusterer(Instances data) throws Exception {

    // can clusterer handle the data?
    getCapabilities().testWithFail(data);

    m_Iterations = 0;

    m_ReplaceMissingFilter = new ReplaceMissingValues();
    Instances instances = new Instances(data);

    instances.setClassIndex(-1);
    if (!m_dontReplaceMissing) {
        m_ReplaceMissingFilter.setInputFormat(instances);
        instances = Filter.useFilter(instances, m_ReplaceMissingFilter);
    }

    m_FullMissingCounts = new int[instances.numAttributes()];
    if (m_displayStdDevs) {
        m_FullStdDevs = new double[instances.numAttributes()];
    }
    m_FullNominalCounts = new int[instances.numAttributes()][0];

    m_FullMeansOrMediansOrModes = moveCentroid(0, instances, false);
    for (int i = 0; i < instances.numAttributes(); i++) {
        m_FullMissingCounts[i] = instances.attributeStats(i).missingCount;
        if (instances.attribute(i).isNumeric()) {
            if (m_displayStdDevs) {
                m_FullStdDevs[i] = Math.sqrt(instances.variance(i));
            }
            if (m_FullMissingCounts[i] == instances.numInstances()) {
                m_FullMeansOrMediansOrModes[i] = Double.NaN; // mark missing as mean
            }
        } else {
            m_FullNominalCounts[i] = instances.attributeStats(i).nominalCounts;
            if (m_FullMissingCounts[i] > m_FullNominalCounts[i][Utils.maxIndex(m_FullNominalCounts[i])]) {
                m_FullMeansOrMediansOrModes[i] = -1; // mark missing as most common
                                                     // value
            }
        }
    }

    m_ClusterCentroids = new Instances(instances, m_NumClusters);
    int[] clusterAssignments = new int[instances.numInstances()];

    if (m_PreserveOrder) {
        m_Assignments = clusterAssignments;
    }

    m_DistanceFunction.setInstances(instances);

    Random RandomO = new Random(getSeed());
    int instIndex;
    HashMap initC = new HashMap();
    DecisionTableHashKey hk = null;

    Instances initInstances = null;
    if (m_PreserveOrder) {
        initInstances = new Instances(instances);
    } else {
        initInstances = instances;
    }

    for (int j = initInstances.numInstances() - 1; j >= 0; j--) {
        instIndex = RandomO.nextInt(j + 1);
        hk = new DecisionTableHashKey(initInstances.instance(instIndex), initInstances.numAttributes(), true);
        if (!initC.containsKey(hk)) {
            m_ClusterCentroids.add(initInstances.instance(instIndex));
            initC.put(hk, null);
        }
        initInstances.swap(j, instIndex);

        if (m_ClusterCentroids.numInstances() == m_NumClusters) {
            break;
        }
    }

    m_NumClusters = m_ClusterCentroids.numInstances();

    // removing reference
    initInstances = null;

    int i;
    boolean converged = false;
    int emptyClusterCount;
    Instances[] tempI = new Instances[m_NumClusters];
    m_squaredErrors = new double[m_NumClusters];
    m_ClusterNominalCounts = new int[m_NumClusters][instances.numAttributes()][0];
    m_ClusterMissingCounts = new int[m_NumClusters][instances.numAttributes()];
    while (!converged) {
        emptyClusterCount = 0;
        m_Iterations++;
        converged = true;
        for (i = 0; i < instances.numInstances(); i++) {
            Instance toCluster = instances.instance(i);
            int newC = clusterProcessedInstance(toCluster, true);
            if (newC != clusterAssignments[i]) {
                converged = false;
            }
            clusterAssignments[i] = newC;
        }

        // update centroids
        m_ClusterCentroids = new Instances(instances, m_NumClusters);
        for (i = 0; i < m_NumClusters; i++) {
            tempI[i] = new Instances(instances, 0);
        }
        for (i = 0; i < instances.numInstances(); i++) {
            tempI[clusterAssignments[i]].add(instances.instance(i));
        }
        for (i = 0; i < m_NumClusters; i++) {
            if (tempI[i].numInstances() == 0) {
                // empty cluster
                emptyClusterCount++;
            } else {
                moveCentroid(i, tempI[i], true);
            }
        }

        if (m_Iterations == m_MaxIterations) {
            converged = true;
        }

        if (emptyClusterCount > 0) {
            m_NumClusters -= emptyClusterCount;
            if (converged) {
                Instances[] t = new Instances[m_NumClusters];
                int index = 0;
                for (int k = 0; k < tempI.length; k++) {
                    if (tempI[k].numInstances() > 0) {
                        t[index] = tempI[k];

                        for (i = 0; i < tempI[k].numAttributes(); i++) {
                            m_ClusterNominalCounts[index][i] = m_ClusterNominalCounts[k][i];
                        }
                        index++;
                    }
                }
                tempI = t;
            } else {
                tempI = new Instances[m_NumClusters];
            }
        }

        if (!converged) {
            m_squaredErrors = new double[m_NumClusters];
            m_ClusterNominalCounts = new int[m_NumClusters][instances.numAttributes()][0];
        }
    }

    if (m_displayStdDevs) {
        m_ClusterStdDevs = new Instances(instances, m_NumClusters);
    }
    m_ClusterSizes = new int[m_NumClusters];
    for (i = 0; i < m_NumClusters; i++) {
        if (m_displayStdDevs) {
            double[] vals2 = new double[instances.numAttributes()];
            for (int j = 0; j < instances.numAttributes(); j++) {
                if (instances.attribute(j).isNumeric()) {
                    vals2[j] = Math.sqrt(tempI[i].variance(j));
                } else {
                    vals2[j] = Instance.missingValue();
                }
            }
            m_ClusterStdDevs.add(new Instance(1.0, vals2));
        }
        m_ClusterSizes[i] = tempI[i].numInstances();
    }

    // Save memory!!
    m_DistanceFunction.clean();
}

From source file:de.uni_koeln.phil_fak.iv.tm.p4.classification.WekaAdapter.java

License:Open Source License

private Instances initTraininSet(Set<Document> trainingData) {
    /* Der FastVector enthlt die Merkmale: */
    FastVector structureVector = new FastVector(vectorSize + 1);
    /* Die Klasse wird in Weka auch als Merkmalsvektor dargestellt: */
    FastVector classesVector = new FastVector(this.classes.size());
    for (String c : classes) {
        /*//from   w ww . j a  v  a  2 s .c  o m
         * Die Klasse ist nicht numerisch, deshalb muessen alle mglichen
         * Werte angegeben werden:
         */
        classesVector.addElement(c);
    }
    /* An Stelle 0 unseres Gesamtvektors kommt der Klassen-Vektor: */
    structureVector.addElement(new Attribute("Ressort", classesVector));
    for (int i = 0; i < vectorSize; i++) {
        /*
         * An jeder Position unseres Merkmalsvektors haben wir ein
         * numerisches Merkmal (reprsentiert als Attribute), dessen Name
         * sein Index ist:
         */
        structureVector.addElement(new Attribute(i + "")); // Merkmal i,
                                                           // d.h. was? >
                                                           // TF-IDF
    }
    /*
     * Schliesslich erstellen wir einen Container fr unsere
     * Trainingsbeispiele, der Instanzen der beschriebenen Merkmale
     * enthalten wird:
     */
    Instances result = new Instances("InstanceStructure", structureVector, vectorSize + 1);
    /*
     * Wobei wir noch angeben muessen, an welcher Stelle der
     * Merkmalsvektoren die Klasse zu finden ist:
     */
    result.setClassIndex(0);
    return result;
}

From source file:de.uni_koeln.spinfo.classification.zoneAnalysis.classifier.WekaClassifier.java

License:Open Source License

private Instances initTrainingSet(List<ClassifyUnit> trainingData) {

    int vectorSize = trainingData.get(0).getFeatureVector().length;
    Set<Integer> classIDs = new TreeSet<Integer>();
    for (ClassifyUnit classifyUnit : trainingData) {
        ZoneClassifyUnit actual = (ZoneClassifyUnit) classifyUnit;
        classIDs.add(actual.getActualClassID());
    }//  w  w  w  .ja  va2  s  . c o  m
    /* Der Vektor enthlt die numerischen Merkmale (bei uns: tf-idf-Werte) sowie ein Klassenattribut: */
    ArrayList<Attribute> structureVector = new ArrayList<Attribute>(vectorSize + 1);
    /* Auch die Klasse wird in Weka als Vektor dargestellt: */
    ArrayList<String> classesVector = new ArrayList<String>();
    for (Integer c : classIDs) {
        /*
         * Da das Klassen-Attribut nicht numerisch ist (sondern, in Weka-Terminologie, ein nominales bzw.
         * String-Attribut), mssen hier alle mglichen Attributwerte angegeben werden:
         */
        classesVector.add(c + "");
    }
    /* An Stelle 0 unseres Strukturvektors kommt der Klassen-Vektor: */
    structureVector.add(new Attribute("topic", classesVector));
    for (int i = 0; i < vectorSize; i++) {
        /*
         * An jeder weiteren Position unseres Merkmalsvektors haben wir ein numerisches Merkmal (reprsentiert als
         * Attribute), dessen Name hier einfach seine Indexposition ist:
         */
        structureVector.add(new Attribute(i + "")); // Merkmal i, d.h. was? > TF-IDF
    }
    /*
     * Schliesslich erstellen wir einen Container, der Instanzen in der hier beschriebenen Struktur enthalten wird
     * (also unsere Trainingsbeispiele):
     */
    Instances result = new Instances("InstanceStructure", structureVector, vectorSize + 1);
    /*
     * Wobei wir hier erneut angeben muessen, an welcher Stelle der Merkmalsvektoren die Klasse zu finden ist:
     */
    result.setClassIndex(0);
    return result;
}

From source file:de.upb.timok.oneclassclassifier.WekaSvmClassifier.java

License:Open Source License

@Override
public void train(List<double[]> trainingSamples) {
    Instances data = DatasetTransformationUtils.trainingSetToInstances(trainingSamples);
    // setting class attribute if the data format does not provide this information
    // For example, the XRFF format saves the class attribute information as well
    try {/*from   ww w  .  j a  v a 2  s  .  c  o  m*/
        if (filter != null) {
            filter.setInputFormat(data);
            data = Filter.useFilter(data, filter);
        }
        if (data.classIndex() == -1) {
            data.setClassIndex(data.numAttributes() - 1);
        }
        wekaSvm.buildClassifier(data);
    } catch (final Exception e) {
        logger.error("Unexpected exception", e);
    }

}

From source file:de.upb.timok.utils.DatasetTransformationUtils.java

License:Open Source License

public static Instances trainingSetToInstances(List<double[]> trainingSet) {
    final double[] sample = trainingSet.get(0);
    final ArrayList<Attribute> fvWekaAttributes = new ArrayList<>(sample.length + 1);
    for (int i = 0; i < sample.length; i++) {
        fvWekaAttributes.add(new Attribute(Integer.toString(i)));
    }//from  w ww.j  av a  2 s.com

    final ArrayList<String> classStrings = new ArrayList<>();
    classStrings.add("normal");
    final Attribute ClassAttribute = new Attribute("class", classStrings);

    // Declare the feature vector
    fvWekaAttributes.add(ClassAttribute);
    final Instances result = new Instances("trainingSet", fvWekaAttributes, trainingSet.size());
    result.setClass(ClassAttribute);
    result.setClassIndex(fvWekaAttributes.size() - 1);
    for (final double[] instance : trainingSet) {
        final double[] newInstance = Arrays.copyOf(instance, instance.length + 1);
        newInstance[newInstance.length - 1] = 0;
        final Instance wekaInstance = new DenseInstance(1, newInstance);
        wekaInstance.setDataset(result);
        result.add(wekaInstance);
    }
    return result;
}

From source file:de.upb.timok.utils.DatasetTransformationUtils.java

License:Open Source License

public static Instances testSetToInstances(List<double[]> testSet) {
    if (testSet.size() == 0) {
        logger.warn("TestSet has size 0");
    }/*  www. j av a  2  s .  com*/
    final double[] sample = testSet.get(0);
    final ArrayList<Attribute> fvWekaAttributes = new ArrayList<>(sample.length);
    for (int i = 0; i < sample.length; i++) {
        fvWekaAttributes.add(new Attribute(Integer.toString(i)));
    }
    final ArrayList<String> classStrings = new ArrayList<>();
    classStrings.add("normal");
    final Attribute ClassAttribute = new Attribute("class", classStrings);
    fvWekaAttributes.add(ClassAttribute);

    // Declare the feature vector
    final Instances result = new Instances("testSet", fvWekaAttributes, testSet.size());
    result.setClassIndex(fvWekaAttributes.size() - 1);
    for (final double[] instance : testSet) {
        final Instance wekaInstance = new DenseInstance(1, instance);
        wekaInstance.setDataset(result);
        result.add(wekaInstance);
    }
    return result;
}

From source file:DiversifyQuery.DivTopK.java

/**
 * Load a set of Instances from an ARFF//from   w w w  . j a  va 2 s  .  co m
 *
 * @param fileName the file name of the ARFF
 * @return a set of Instances from the ARFF
 */
public static Instances loadData(String fileName) {
    Instances data = null;
    try {
        FileReader r;
        r = new FileReader(fileName);
        data = new Instances(r);

        data.setClassIndex(data.numAttributes() - 1);
    } catch (Exception e) {
        System.out.println(" Error =" + e + " in method loadData");
        e.printStackTrace();
    }
    return data;
}

From source file:DiversifyQuery.DivTopK.java

/**
 * Sets the format of the filtered instances that are output. I.e. will
 * include k attributes each shapelet distance and a class value
 *
 * @param inputFormat the format of the input data
 * @return a new Instances object in the desired output format
 * @throws Exception if all required parameters of the filter are not
 * initialised correctly/*  w  ww .  j av  a 2  s .c o  m*/
 */
protected Instances determineOutputFormat(Instances inputFormat, ArrayList<LegacyShapelet> shapelets)
        throws Exception {

    //Set up instances size and format.
    //int length = this.numShapelets;
    int length = shapelets.size();
    FastVector atts = new FastVector();
    String name;
    for (int i = 0; i < length; i++) {
        name = "Shapelet_" + i;
        atts.addElement(new Attribute(name));
    }

    if (inputFormat.classIndex() >= 0) { //Classification set, set class
        //Get the class values as a fast vector
        Attribute target = inputFormat.attribute(inputFormat.classIndex());

        FastVector vals = new FastVector(target.numValues());
        for (int i = 0; i < target.numValues(); i++) {
            vals.addElement(target.value(i));
        }
        atts.addElement(new Attribute(inputFormat.attribute(inputFormat.classIndex()).name(), vals));
    }
    Instances result = new Instances("Shapelets" + inputFormat.relationName(), atts,
            inputFormat.numInstances());
    if (inputFormat.classIndex() >= 0) {
        result.setClassIndex(result.numAttributes() - 1);
    }
    return result;
}