Example usage for weka.core Instance classValue

Introduction

In this page you can find the example usage for weka.core Instance classValue.

Prototype

public double classValue();

Source Link

Document

Returns an instance's class value as a floating-point number.

Usage

From source file:org.scripps.branch.classifier.ManualTree.java

License:Open Source License

/**
 * Builds classifier./*from w w  w  . j a  v  a 2s.c  om*/
 * 
 * @param data
 *            the data to train with
 * @throws Exception
 *             if something goes wrong or the data doesn't fit
 */
@Override
public void buildClassifier(Instances data) throws Exception {
    // Make sure K value is in range
    if (m_KValue > data.numAttributes() - 1)
        m_KValue = data.numAttributes() - 1;
    if (m_KValue < 1)
        m_KValue = (int) Utils.log2(data.numAttributes()) + 1;

    // can classifier handle the data?
    getCapabilities().testWithFail(data);

    // remove instances with missing class
    data = new Instances(data);
    data.deleteWithMissingClass();

    // only class? -> build ZeroR model
    if (data.numAttributes() == 1) {
        System.err.println(
                "Cannot build model (only class attribute present in data!), " + "using ZeroR model instead!");
        m_ZeroR = new weka.classifiers.rules.ZeroR();
        m_ZeroR.buildClassifier(data);
        return;
    } else {
        m_ZeroR = null;
    }

    // Figure out appropriate datasets
    Instances train = null;
    Instances backfit = null;
    Random rand = data.getRandomNumberGenerator(m_randomSeed);
    if (m_NumFolds <= 0) {
        train = data;
    } else {
        data.randomize(rand);
        data.stratify(m_NumFolds);
        train = data.trainCV(m_NumFolds, 1, rand);
        backfit = data.testCV(m_NumFolds, 1);
    }

    //Set Default Instances for selection.
    setRequiredInst(data);

    // Create the attribute indices window
    int[] attIndicesWindow = new int[data.numAttributes() - 1];
    int j = 0;
    for (int i = 0; i < attIndicesWindow.length; i++) {
        if (j == data.classIndex())
            j++; // do not include the class
        attIndicesWindow[i] = j++;
    }

    // Compute initial class counts
    double[] classProbs = new double[train.numClasses()];
    for (int i = 0; i < train.numInstances(); i++) {
        Instance inst = train.instance(i);
        classProbs[(int) inst.classValue()] += inst.weight();
    }

    Instances requiredInstances = getRequiredInst();
    // Build tree
    if (jsontree != null) {
        buildTree(train, classProbs, new Instances(data, 0), m_Debug, 0, jsontree, 0, m_distributionData,
                requiredInstances, listOfFc, cSetList, ccSer, d);
    } else {
        System.out.println("No json tree specified, failing to process tree");
    }
    setRequiredInst(requiredInstances);
    // Backfit if required
    if (backfit != null) {
        backfitData(backfit);
    }
}

From source file:org.scripps.branch.classifier.ManualTree.java

License:Open Source License

/**
 * Computes class distribution for an attribute.
 * /*  w  w w. jav a 2s  .  c  om*/
 * @param props
 * @param dists
 * @param att
 *            the attribute index
 * @param data
 *            the data to work with
 * @throws Exception
 *             if something goes wrong
 */
protected HashMap<String, Double> distribution(double[][] props, double[][][] dists, int att, Instances data,
        double givenSplitPoint, HashMap<String, Classifier> custom_classifiers) throws Exception {

    HashMap<String, Double> mp = new HashMap<String, Double>();
    double splitPoint = givenSplitPoint;
    double origSplitPoint = 0;
    Attribute attribute = null;
    double[][] dist = null;
    int indexOfFirstMissingValue = -1;
    String CustomClassifierId = null;
    CustomSet cSet = null;
    if (att >= data.numAttributes() && att < data.numAttributes() + custom_classifiers.size()) {
        CustomClassifierId = getKeyinMap(custom_classifiers, att, data);
    } else if (att >= data.numAttributes() + custom_classifiers.size()) {
        cSet = getReqCustomSet(att - (data.numAttributes() - 1 + custom_classifiers.size()), cSetList);
    } else {
        attribute = data.attribute(att);
    }
    if (CustomClassifierId == null && cSet == null) {
        if (attribute.isNominal()) {
            // For nominal attributes
            dist = new double[attribute.numValues()][data.numClasses()];
            for (int i = 0; i < data.numInstances(); i++) {
                Instance inst = data.instance(i);
                if (inst.isMissing(att)) {

                    // Skip missing values at this stage
                    if (indexOfFirstMissingValue < 0) {
                        indexOfFirstMissingValue = i;
                    }
                    continue;
                }
                dist[(int) inst.value(att)][(int) inst.classValue()] += inst.weight();
            }
        } else {

            // For numeric attributes
            double[][] currDist = new double[2][data.numClasses()];
            dist = new double[2][data.numClasses()];

            // Sort data
            data.sort(att);

            // Move all instances into second subset
            for (int j = 0; j < data.numInstances(); j++) {
                Instance inst = data.instance(j);
                if (inst.isMissing(att)) {

                    // Can stop as soon as we hit a missing value
                    indexOfFirstMissingValue = j;
                    break;
                }
                currDist[1][(int) inst.classValue()] += inst.weight();
            }

            // Value before splitting
            double priorVal = priorVal(currDist);

            // Save initial distribution
            for (int j = 0; j < currDist.length; j++) {
                System.arraycopy(currDist[j], 0, dist[j], 0, dist[j].length);
            }

            if (Double.isNaN(splitPoint)) {
                // Try all possible split points
                double currSplit = data.instance(0).value(att);
                double currVal, bestVal = -Double.MAX_VALUE;
                for (int i = 0; i < data.numInstances(); i++) {
                    Instance inst = data.instance(i);
                    if (inst.isMissing(att)) {

                        // Can stop as soon as we hit a missing value
                        break;
                    }

                    // Can we place a sensible split point here?
                    if (inst.value(att) > currSplit) {

                        // Compute gain for split point
                        currVal = gain(currDist, priorVal);

                        // Is the current split point the best point so far?
                        if (currVal > bestVal) {

                            // Store value of current point
                            bestVal = currVal;

                            // Save split point
                            splitPoint = (inst.value(att) + currSplit) / 2.0;
                            origSplitPoint = splitPoint;

                            // Save distribution
                            for (int j = 0; j < currDist.length; j++) {
                                System.arraycopy(currDist[j], 0, dist[j], 0, dist[j].length);
                            }
                        }
                    }
                    currSplit = inst.value(att);

                    // Shift over the weight
                    currDist[0][(int) inst.classValue()] += inst.weight();
                    currDist[1][(int) inst.classValue()] -= inst.weight();
                }
            } else {
                double currSplit = data.instance(0).value(att);
                double currVal, bestVal = -Double.MAX_VALUE;
                // Split data set using given split point.
                for (int i = 0; i < data.numInstances(); i++) {
                    Instance inst = data.instance(i);
                    if (inst.isMissing(att)) {
                        // Can stop as soon as we hit a missing value
                        break;
                    }
                    if (inst.value(att) > currSplit) {
                        // Compute gain for split point
                        currVal = gain(currDist, priorVal);
                        // Is the current split point the best point so far?
                        if (currVal > bestVal) {
                            // Store value of current point
                            bestVal = currVal;
                            // Save computed split point
                            origSplitPoint = (inst.value(att) + currSplit) / 2.0;
                        }
                    }
                    currSplit = inst.value(att);
                    // Shift over the weight
                    currDist[0][(int) inst.classValue()] += inst.weight();
                    currDist[1][(int) inst.classValue()] -= inst.weight();
                    if (inst.value(att) <= splitPoint) {
                        // Save distribution since split point is specified
                        for (int j = 0; j < currDist.length; j++) {
                            System.arraycopy(currDist[j], 0, dist[j], 0, dist[j].length);
                        }
                    }
                }
            }
        }
    } else if (CustomClassifierId != null) {
        Classifier fc = custom_classifiers.get(CustomClassifierId);
        dist = new double[data.numClasses()][data.numClasses()];
        Instance inst;
        for (int i = 0; i < data.numInstances(); i++) {
            inst = data.instance(i);
            double predictedClass = fc.classifyInstance(inst);
            if (predictedClass != Instance.missingValue()) {
                dist[(int) predictedClass][(int) inst.classValue()] += inst.weight();
            }
        }
    } else if (cSet != null) {
        dist = new double[data.numClasses()][data.numClasses()];
        JsonNode vertices = mapper.readTree(cSet.getConstraints());
        ArrayList<double[]> attrVertices = generateVerticesList(vertices);
        List<Attribute> aList = generateAttributeList(cSet, data, d);
        double[] testPoint = new double[2];
        int ctr = 0;
        for (int k = 0; k < data.numInstances(); k++) {
            testPoint = new double[2];
            ctr = 0;
            for (Attribute a : aList) {
                if (!data.instance(k).isMissing(a)) {
                    testPoint[ctr] = data.instance(k).value(a);
                    ctr++;
                }
            }
            int check = checkPointInPolygon(attrVertices, testPoint);
            dist[check][(int) data.instance(k).classValue()] += data.instance(k).weight();
        }
    }

    // Compute weights for subsetsCustomClassifierIndex
    props[att] = new double[dist.length];
    for (int k = 0; k < props[att].length; k++) {
        props[att][k] = Utils.sum(dist[k]);
    }
    if (Utils.eq(Utils.sum(props[att]), 0)) {
        for (int k = 0; k < props[att].length; k++) {
            props[att][k] = 1.0 / props[att].length;
        }
    } else {
        Utils.normalize(props[att]);
    }

    // Any instances with missing values ?
    if (indexOfFirstMissingValue > -1) {

        // Distribute weights for instances with missing values
        for (int i = indexOfFirstMissingValue; i < data.numInstances(); i++) {
            Instance inst = data.instance(i);
            if (attribute.isNominal()) {

                // Need to check if attribute value is missing
                if (inst.isMissing(att)) {
                    for (int j = 0; j < dist.length; j++) {
                        dist[j][(int) inst.classValue()] += props[att][j] * inst.weight();
                    }
                }
            } else {

                // Can be sure that value is missing, so no test required
                for (int j = 0; j < dist.length; j++) {
                    dist[j][(int) inst.classValue()] += props[att][j] * inst.weight();
                }
            }
        }
    }

    // Return distribution and split point
    dists[att] = dist;
    mp.put("split_point", splitPoint);
    mp.put("orig_split_point", origSplitPoint);
    return mp;
}

From source file:org.wkwk.classifier.MyC45.java

public double computeEntropy(Instances data) {
    // Hitung kemunculan kelas
    double[] classCounts = new double[data.numClasses()];
    Enumeration instEnum = data.enumerateInstances();
    while (instEnum.hasMoreElements()) {
        Instance inst = (Instance) instEnum.nextElement();
        classCounts[(int) inst.classValue()]++;
    }/*from   www  .  ja va 2s .c o  m*/

    // Hitung entropy
    double entropy = 0;
    for (int i = 0; i < data.numClasses(); i++) {
        if (classCounts[i] > 0) {
            entropy -= classCounts[i] / data.numInstances() * Utils.log2(classCounts[i] / data.numInstances());
        }
    }
    return entropy;
}

From source file:org.wkwk.classifier.MyC45.java

public double bestThreshold(Instances data, Attribute attr) {
    data.sort(attr);/*from ww w . j  a v  a  2s.com*/

    double m_ig = 0;
    double bestThr = 0;
    double classTemp = data.get(0).classValue();
    double valueTemp = data.get(0).value(attr);

    Enumeration instEnum = data.enumerateInstances();
    double dt;
    while (instEnum.hasMoreElements()) {
        Instance inst = (Instance) instEnum.nextElement();
        if (classTemp != inst.classValue()) {
            classTemp = inst.classValue();
            dt = valueTemp;
            valueTemp = inst.value(attr);
            double threshold = dt + ((valueTemp - dt) / 2);
            double igTemp = computeInfoGainCont(data, attr, threshold);
            if (m_ig < igTemp) {
                m_ig = igTemp;
                bestThr = threshold;
            }
        }
    }
    return bestThr;
}

From source file:preprocess.StringToWordVector.java

License:Open Source License

/**
 * determines the dictionary./* w w  w  . ja  v a2s.c o m*/
 */
private void determineDictionary() {
    // initialize stopwords
    Stopwords stopwords = new Stopwords();
    if (getUseStoplist()) {
        try {
            if (getStopwords().exists() && !getStopwords().isDirectory())
                stopwords.read(getStopwords());
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    // Operate on a per-class basis if class attribute is set
    int classInd = getInputFormat().classIndex();
    int values = 1;
    if (!m_doNotOperateOnPerClassBasis && (classInd != -1)) {
        values = getInputFormat().attribute(classInd).numValues();
    }

    //TreeMap dictionaryArr [] = new TreeMap[values];
    TreeMap[] dictionaryArr = new TreeMap[values];
    for (int i = 0; i < values; i++) {
        dictionaryArr[i] = new TreeMap();
    }

    // Make sure we know which fields to convert
    determineSelectedRange();

    // Tokenize all training text into an orderedMap of "words".
    long pruneRate = Math.round((m_PeriodicPruningRate / 100.0) * getInputFormat().numInstances());
    for (int i = 0; i < getInputFormat().numInstances(); i++) {
        Instance instance = getInputFormat().instance(i);
        int vInd = 0;
        if (!m_doNotOperateOnPerClassBasis && (classInd != -1)) {
            vInd = (int) instance.classValue();
        }

        // Iterate through all relevant string attributes of the current instance
        Hashtable h = new Hashtable();
        for (int j = 0; j < instance.numAttributes(); j++) {
            if (m_SelectedRange.isInRange(j) && (instance.isMissing(j) == false)) {

                // Get tokenizer
                m_Tokenizer.tokenize(instance.stringValue(j));

                // Iterate through tokens, perform stemming, and remove stopwords
                // (if required)
                while (m_Tokenizer.hasMoreElements()) {
                    String word = ((String) m_Tokenizer.nextElement()).intern();

                    if (this.m_lowerCaseTokens == true)
                        word = word.toLowerCase();

                    word = m_Stemmer.stem(word);

                    if (this.m_useStoplist == true)
                        if (stopwords.is(word))
                            continue;

                    if (!(h.contains(word)))
                        h.put(word, new Integer(0));

                    Count count = (Count) dictionaryArr[vInd].get(word);
                    if (count == null) {
                        dictionaryArr[vInd].put(word, new Count(1));
                    } else {
                        count.count++;
                    }
                }
            }
        }

        //updating the docCount for the words that have occurred in this
        //instance(document).
        Enumeration e = h.keys();
        while (e.hasMoreElements()) {
            String word = (String) e.nextElement();
            Count c = (Count) dictionaryArr[vInd].get(word);
            if (c != null) {
                c.docCount++;
            } else
                System.err.println(
                        "Warning: A word should definitely be in the " + "dictionary.Please check the code");
        }

        if (pruneRate > 0) {
            if (i % pruneRate == 0 && i > 0) {
                for (int z = 0; z < values; z++) {
                    Vector d = new Vector(1000);
                    Iterator it = dictionaryArr[z].keySet().iterator();
                    while (it.hasNext()) {
                        String word = (String) it.next();
                        Count count = (Count) dictionaryArr[z].get(word);
                        if (count.count <= 1) {
                            d.add(word);
                        }
                    }
                    Iterator iter = d.iterator();
                    while (iter.hasNext()) {
                        String word = (String) iter.next();
                        dictionaryArr[z].remove(word);
                    }
                }
            }
        }
    }

    // Figure out the minimum required word frequency
    int totalsize = 0;
    int prune[] = new int[values];
    for (int z = 0; z < values; z++) {
        totalsize += dictionaryArr[z].size();

        int array[] = new int[dictionaryArr[z].size()];
        int pos = 0;
        Iterator it = dictionaryArr[z].keySet().iterator();
        while (it.hasNext()) {
            String word = (String) it.next();
            Count count = (Count) dictionaryArr[z].get(word);
            array[pos] = count.count;
            pos++;
        }

        // sort the array
        sortArray(array);
        if (array.length < m_WordsToKeep) {
            // if there aren't enough words, set the threshold to
            // minFreq
            prune[z] = m_minTermFreq;
        } else {
            // otherwise set it to be at least minFreq
            prune[z] = Math.max(m_minTermFreq, array[array.length - m_WordsToKeep]);
        }
    }

    // Convert the dictionary into an attribute index
    // and create one attribute per word
    FastVector attributes = new FastVector(totalsize + getInputFormat().numAttributes());

    // Add the non-converted attributes 
    int classIndex = -1;
    for (int i = 0; i < getInputFormat().numAttributes(); i++) {
        if (!m_SelectedRange.isInRange(i)) {
            if (getInputFormat().classIndex() == i) {
                classIndex = attributes.size();
            }
            attributes.addElement(getInputFormat().attribute(i).copy());
        }
    }

    // Add the word vector attributes (eliminating duplicates
    // that occur in multiple classes)
    TreeMap newDictionary = new TreeMap();
    int index = attributes.size();
    for (int z = 0; z < values; z++) {
        Iterator it = dictionaryArr[z].keySet().iterator();
        while (it.hasNext()) {
            String word = (String) it.next();
            Count count = (Count) dictionaryArr[z].get(word);
            if (count.count >= prune[z]) {
                if (newDictionary.get(word) == null) {
                    newDictionary.put(word, new Integer(index++));
                    attributes.addElement(new Attribute(m_Prefix + word));
                }
            }
        }
    }

    // Compute document frequencies
    m_DocsCounts = new int[attributes.size()];
    Iterator it = newDictionary.keySet().iterator();
    while (it.hasNext()) {
        String word = (String) it.next();
        int idx = ((Integer) newDictionary.get(word)).intValue();
        int docsCount = 0;
        for (int j = 0; j < values; j++) {
            Count c = (Count) dictionaryArr[j].get(word);
            if (c != null)
                docsCount += c.docCount;
        }
        m_DocsCounts[idx] = docsCount;
    }

    // Trim vector and set instance variables
    attributes.trimToSize();
    m_Dictionary = newDictionary;
    m_NumInstances = getInputFormat().numInstances();

    // Set the filter's output format
    Instances outputFormat = new Instances(getInputFormat().relationName(), attributes, 0);
    outputFormat.setClassIndex(classIndex);
    setOutputFormat(outputFormat);
}

From source file:put.semantic.fcanew.ml.WekaClassifier.java

@Override
public void loadExamples(File f) throws IOException {
    ArffLoader l = new ArffLoader();
    l.setFile(f);// www. ja  va2s . c  o  m
    Instances structure = l.getStructure();
    Instance i;
    while ((i = l.getNextInstance(structure)) != null) {
        if (!instances.checkInstance(i)) {
            i = convert(i, structure, instances);
        } else {
            i.setDataset(instances);
        }
        if (instances.checkInstance(i)) {
            if (i.classValue() == 0) {
                i.setWeight(getRejectedWeight());
            }
            instances.add(i);
        } else {
            System.err.println("Ignoring incompatible instance");
        }
    }
    updateModel();
    tableModel.fireTableDataChanged();
}

From source file:resample.OverSubsample.java

License:Open Source License

/**
 * Creates a subsample of the current set of input instances. The output
 * instances are pushed onto the output queue for collection.
 *///from   w ww.  ja va2 s .c o m
private void createSubsample() {

    int classI = getInputFormat().classIndex();
    // Sort according to class attribute.
    getInputFormat().sort(classI);
    // Determine where each class starts in the sorted dataset
    int[] classIndices = getClassIndices();

    // Get the existing class distribution
    int[] counts = new int[getInputFormat().numClasses()];
    double[] weights = new double[getInputFormat().numClasses()];
    int max = -1;
    for (int i = 0; i < getInputFormat().numInstances(); i++) {
        Instance current = getInputFormat().instance(i);
        if (current.classIsMissing() == false) {
            counts[(int) current.classValue()]++;
            weights[(int) current.classValue()] += current.weight();
        }
    }

    // Convert from total weight to average weight
    for (int i = 0; i < counts.length; i++) {
        if (counts[i] > 0) {
            weights[i] = weights[i] / counts[i];
        }
        /*
        System.err.println("Class:" + i + " " + getInputFormat().classAttribute().value(i)
                 + " Count:" + counts[i]
                 + " Total:" + weights[i] * counts[i]
                 + " Avg:" + weights[i]);
         */
    }

    // find the class with the minimum number of instances
    int maxIndex = -1;
    for (int i = 0; i < counts.length; i++) {
        if ((max < 0) && (counts[i] > 0)) {
            max = counts[i];
            maxIndex = i;
        } else if ((counts[i] > max) && (counts[i] > 0)) {
            max = counts[i];
            maxIndex = i;
        }
    }

    if (max < 0) {
        System.err.println("SpreadSubsample: *warning* none of the classes have any values in them.");
        return;
    }

    // determine the new distribution 
    int[] new_counts = new int[getInputFormat().numClasses()];
    for (int i = 0; i < counts.length; i++) {
        new_counts[i] = (int) Math.abs(Math.max(counts[i], max * m_DistributionSpread));
        if (i == maxIndex) {
            if (m_DistributionSpread > 0 && m_DistributionSpread < 1.0) {
                // don't undersample the majority class!
                new_counts[i] = counts[i];
            }
        }
        if (m_DistributionSpread == 0) {
            new_counts[i] = counts[i];
        }

        if (m_MaxCount > 0) {
            new_counts[i] = Math.min(new_counts[i], m_MaxCount);
        }
    }

    // Sample with replacement
    Random random = new Random(m_RandomSeed);
    //Hashtable t = new Hashtable();
    for (int j = 0; j < new_counts.length; j++) {
        double newWeight = 1.0;
        if (m_AdjustWeights && (new_counts[j] > 0)) {
            newWeight = weights[j] * counts[j] / new_counts[j];
            /*
            System.err.println("Class:" + j + " " + getInputFormat().classAttribute().value(j) 
                   + " Count:" + counts[j]
                   + " Total:" + weights[j] * counts[j]
                   + " Avg:" + weights[j]
                   + " NewCount:" + new_counts[j]
                   + " NewAvg:" + newWeight);
             */
        }
        int index = -1;
        for (int k = 0; k < new_counts[j]; k++) {
            //boolean ok = false;
            //do {
            index = classIndices[j] + (Math.abs(random.nextInt()) % (classIndices[j + 1] - classIndices[j]));
            // Have we used this instance before?
            //if (t.get("" + index) == null) {
            // if not, add it to the hashtable and use it
            //t.put("" + index, "");
            //ok = true;
            if (index >= 0) {
                Instance newInst = (Instance) getInputFormat().instance(index).copy();
                if (m_AdjustWeights) {
                    newInst.setWeight(newWeight);
                }
                push(newInst);
            }
            //}
            //} while (!ok);
        }
    }
}

From source file:resample.OverSubsample.java

License:Open Source License

/**
 * Creates an index containing the position where each class starts in 
 * the getInputFormat(). m_InputFormat must be sorted on the class attribute.
 * /*from   ww w .ja v  a2s .c  o m*/
 * @return the positions
 */
private int[] getClassIndices() {

    // Create an index of where each class value starts
    int[] classIndices = new int[getInputFormat().numClasses() + 1];
    int currentClass = 0;
    classIndices[currentClass] = 0;
    for (int i = 0; i < getInputFormat().numInstances(); i++) {
        Instance current = getInputFormat().instance(i);
        if (current.classIsMissing()) {
            for (int j = currentClass + 1; j < classIndices.length; j++) {
                classIndices[j] = i;
            }
            break;
        } else if (current.classValue() != currentClass) {
            for (int j = currentClass + 1; j <= current.classValue(); j++) {
                classIndices[j] = i;
            }
            currentClass = (int) current.classValue();
        }
    }
    if (currentClass <= getInputFormat().numClasses()) {
        for (int j = currentClass + 1; j < classIndices.length; j++) {
            classIndices[j] = getInputFormat().numInstances();
        }
    }
    return classIndices;
}

From source file:sg.edu.nus.comp.nlp.ims.classifiers.CMultiClassesSVM.java

License:Open Source License

@Override
public void buildClassifier(Instances p_Instances) throws Exception {
    Instances newInsts = null;/*w  ww .  java 2 s.c  om*/
    if (this.m_Classifier == null) {
        throw new IllegalStateException("No base classifier has been set!");
    }

    this.m_ZeroR = new ZeroR();
    this.m_ZeroR.buildClassifier(p_Instances);

    this.m_ClassAttribute = p_Instances.classAttribute();
    this.getOutputFormat(p_Instances);
    int numClassifiers = p_Instances.numClasses();
    switch (numClassifiers) {
    case 1:
        this.m_Classifiers = null;
        break;
    case 2:
        this.m_Classifiers = Classifier.makeCopies(this.m_Classifier, 1);
        newInsts = new Instances(this.m_OutputFormat, 0);
        for (int i = 0; i < p_Instances.numInstances(); i++) {
            Instance inst = this.filterInstance(p_Instances.instance(i));
            inst.setDataset(newInsts);
            newInsts.add(inst);
        }
        this.m_Classifiers[0].buildClassifier(newInsts);
        break;
    default:
        this.m_Classifiers = Classifier.makeCopies(this.m_Classifier, numClassifiers);
        Hashtable<String, ArrayList<Double>> id2Classes = null;
        if (this.m_IndexOfID >= 0) {
            id2Classes = new Hashtable<String, ArrayList<Double>>();
            for (int i = 0; i < p_Instances.numInstances(); i++) {
                Instance inst = p_Instances.instance(i);
                String id = inst.stringValue(this.m_IndexOfID);
                if (!id2Classes.containsKey(id)) {
                    id2Classes.put(id, new ArrayList<Double>());
                }
                id2Classes.get(id).add(inst.classValue());
            }
        }
        for (int classIdx = 0; classIdx < this.m_Classifiers.length; classIdx++) {
            newInsts = this.genInstances(p_Instances, classIdx, id2Classes);
            this.m_Classifiers[classIdx].buildClassifier(newInsts);
        }
    }
}

From source file:smo2.SMO.java

License:Open Source License

/**
 * Method for building the classifier. Implements a one-against-one wrapper
 * for multi-class problems.//from w w w  .j a  v a2  s . co m
 *
 * @param insts
 *            the set of training instances
 * @exception Exception
 *                if the classifier can't be built successfully
 */
public void buildClassifier(Instances insts) throws Exception {

    if (!m_checksTurnedOff) {
        if (insts.checkForStringAttributes()) {
            throw new UnsupportedAttributeTypeException("Cannot handle string attributes!");
        }
        if (insts.classAttribute().isNumeric()) {
            throw new UnsupportedClassTypeException(
                    "mySMO can't handle a numeric class! Use" + "SMOreg for performing regression.");
        }
        insts = new Instances(insts);
        insts.deleteWithMissingClass();
        if (insts.numInstances() == 0) {
            throw new Exception("No training instances without a missing class!");
        }

        /*
         * Removes all the instances with weight equal to 0. MUST be done
         * since condition (8) of Keerthi's paper is made with the assertion
         * Ci > 0 (See equation (3a).
         */
        Instances data = new Instances(insts, insts.numInstances());
        for (int i = 0; i < insts.numInstances(); i++) {
            if (insts.instance(i).weight() > 0)
                data.add(insts.instance(i));
        }
        if (data.numInstances() == 0) {
            throw new Exception("No training instances left after removing "
                    + "instance with either a weight null or a missing class!");
        }
        insts = data;

    }

    m_onlyNumeric = true;
    if (!m_checksTurnedOff) {
        for (int i = 0; i < insts.numAttributes(); i++) {
            if (i != insts.classIndex()) {
                if (!insts.attribute(i).isNumeric()) {
                    m_onlyNumeric = false;
                    break;
                }
            }
        }
    }

    if (!m_checksTurnedOff) {
        m_Missing = new ReplaceMissingValues();
        m_Missing.setInputFormat(insts);
        insts = Filter.useFilter(insts, m_Missing);
    } else {
        m_Missing = null;
    }

    if (!m_onlyNumeric) {
        m_NominalToBinary = new NominalToBinary();
        m_NominalToBinary.setInputFormat(insts);
        insts = Filter.useFilter(insts, m_NominalToBinary);
    } else {
        m_NominalToBinary = null;
    }

    if (m_filterType == FILTER_STANDARDIZE) {
        m_Filter = new Standardize();
        m_Filter.setInputFormat(insts);
        insts = Filter.useFilter(insts, m_Filter);
    } else if (m_filterType == FILTER_NORMALIZE) {
        m_Filter = new Normalize();
        m_Filter.setInputFormat(insts);
        insts = Filter.useFilter(insts, m_Filter);
    } else {
        m_Filter = null;
    }

    m_classIndex = insts.classIndex();
    m_classAttribute = insts.classAttribute();

    // Generate subsets representing each class
    Instances[] subsets = new Instances[insts.numClasses()];
    for (int i = 0; i < insts.numClasses(); i++) {
        subsets[i] = new Instances(insts, insts.numInstances());
    }
    for (int j = 0; j < insts.numInstances(); j++) {
        Instance inst = insts.instance(j);
        subsets[(int) inst.classValue()].add(inst);
    }
    for (int i = 0; i < insts.numClasses(); i++) {
        subsets[i].compactify();
    }

    // Build the binary classifiers
    Random rand = new Random(m_randomSeed);
    m_classifiers = new BinarymySMO[insts.numClasses()][insts.numClasses()];
    for (int i = 0; i < insts.numClasses(); i++) {
        for (int j = i + 1; j < insts.numClasses(); j++) {
            m_classifiers[i][j] = new BinarymySMO();
            Instances data = new Instances(insts, insts.numInstances());
            for (int k = 0; k < subsets[i].numInstances(); k++) {
                data.add(subsets[i].instance(k));
            }
            for (int k = 0; k < subsets[j].numInstances(); k++) {
                data.add(subsets[j].instance(k));
            }
            data.compactify();
            data.randomize(rand);
            m_classifiers[i][j].buildClassifier(data, i, j, m_fitLogisticModels, m_numFolds, m_randomSeed);
        }
    }
}