Example usage for weka.core Instance classValue

List of usage examples for weka.core Instance classValue

Introduction

In this page you can find the example usage for weka.core Instance classValue.

Prototype

public double classValue();

Source Link

Document

Returns an instance's class value as a floating-point number.

Usage

From source file:org.scripps.branch.classifier.ManualTree.java

License:Open Source License

/**
 * Builds classifier./*from w w  w  . j a  v  a 2s.c  om*/
 * 
 * @param data
 *            the data to train with
 * @throws Exception
 *             if something goes wrong or the data doesn't fit
 */
@Override
public void buildClassifier(Instances data) throws Exception {
    // Make sure K value is in range
    if (m_KValue > data.numAttributes() - 1)
        m_KValue = data.numAttributes() - 1;
    if (m_KValue < 1)
        m_KValue = (int) Utils.log2(data.numAttributes()) + 1;

    // can classifier handle the data?
    getCapabilities().testWithFail(data);

    // remove instances with missing class
    data = new Instances(data);
    data.deleteWithMissingClass();

    // only class? -> build ZeroR model
    if (data.numAttributes() == 1) {
        System.err.println(
                "Cannot build model (only class attribute present in data!), " + "using ZeroR model instead!");
        m_ZeroR = new weka.classifiers.rules.ZeroR();
        m_ZeroR.buildClassifier(data);
        return;
    } else {
        m_ZeroR = null;
    }

    // Figure out appropriate datasets
    Instances train = null;
    Instances backfit = null;
    Random rand = data.getRandomNumberGenerator(m_randomSeed);
    if (m_NumFolds <= 0) {
        train = data;
    } else {
        data.randomize(rand);
        data.stratify(m_NumFolds);
        train = data.trainCV(m_NumFolds, 1, rand);
        backfit = data.testCV(m_NumFolds, 1);
    }

    //Set Default Instances for selection.
    setRequiredInst(data);

    // Create the attribute indices window
    int[] attIndicesWindow = new int[data.numAttributes() - 1];
    int j = 0;
    for (int i = 0; i < attIndicesWindow.length; i++) {
        if (j == data.classIndex())
            j++; // do not include the class
        attIndicesWindow[i] = j++;
    }

    // Compute initial class counts
    double[] classProbs = new double[train.numClasses()];
    for (int i = 0; i < train.numInstances(); i++) {
        Instance inst = train.instance(i);
        classProbs[(int) inst.classValue()] += inst.weight();
    }

    Instances requiredInstances = getRequiredInst();
    // Build tree
    if (jsontree != null) {
        buildTree(train, classProbs, new Instances(data, 0), m_Debug, 0, jsontree, 0, m_distributionData,
                requiredInstances, listOfFc, cSetList, ccSer, d);
    } else {
        System.out.println("No json tree specified, failing to process tree");
    }
    setRequiredInst(requiredInstances);
    // Backfit if required
    if (backfit != null) {
        backfitData(backfit);
    }
}

From source file:org.scripps.branch.classifier.ManualTree.java

License:Open Source License

/**
 * Computes class distribution for an attribute.
 * /*  w  w w. jav a 2s  .  c  om*/
 * @param props
 * @param dists
 * @param att
 *            the attribute index
 * @param data
 *            the data to work with
 * @throws Exception
 *             if something goes wrong
 */
protected HashMap<String, Double> distribution(double[][] props, double[][][] dists, int att, Instances data,
        double givenSplitPoint, HashMap<String, Classifier> custom_classifiers) throws Exception {

    HashMap<String, Double> mp = new HashMap<String, Double>();
    double splitPoint = givenSplitPoint;
    double origSplitPoint = 0;
    Attribute attribute = null;
    double[][] dist = null;
    int indexOfFirstMissingValue = -1;
    String CustomClassifierId = null;
    CustomSet cSet = null;
    if (att >= data.numAttributes() && att < data.numAttributes() + custom_classifiers.size()) {
        CustomClassifierId = getKeyinMap(custom_classifiers, att, data);
    } else if (att >= data.numAttributes() + custom_classifiers.size()) {
        cSet = getReqCustomSet(att - (data.numAttributes() - 1 + custom_classifiers.size()), cSetList);
    } else {
        attribute = data.attribute(att);
    }
    if (CustomClassifierId == null && cSet == null) {
        if (attribute.isNominal()) {
            // For nominal attributes
            dist = new double[attribute.numValues()][data.numClasses()];
            for (int i = 0; i < data.numInstances(); i++) {
                Instance inst = data.instance(i);
                if (inst.isMissing(att)) {

                    // Skip missing values at this stage
                    if (indexOfFirstMissingValue < 0) {
                        indexOfFirstMissingValue = i;
                    }
                    continue;
                }
                dist[(int) inst.value(att)][(int) inst.classValue()] += inst.weight();
            }
        } else {

            // For numeric attributes
            double[][] currDist = new double[2][data.numClasses()];
            dist = new double[2][data.numClasses()];

            // Sort data
            data.sort(att);

            // Move all instances into second subset
            for (int j = 0; j < data.numInstances(); j++) {
                Instance inst = data.instance(j);
                if (inst.isMissing(att)) {

                    // Can stop as soon as we hit a missing value
                    indexOfFirstMissingValue = j;
                    break;
                }
                currDist[1][(int) inst.classValue()] += inst.weight();
            }

            // Value before splitting
            double priorVal = priorVal(currDist);

            // Save initial distribution
            for (int j = 0; j < currDist.length; j++) {
                System.arraycopy(currDist[j], 0, dist[j], 0, dist[j].length);
            }

            if (Double.isNaN(splitPoint)) {
                // Try all possible split points
                double currSplit = data.instance(0).value(att);
                double currVal, bestVal = -Double.MAX_VALUE;
                for (int i = 0; i < data.numInstances(); i++) {
                    Instance inst = data.instance(i);
                    if (inst.isMissing(att)) {

                        // Can stop as soon as we hit a missing value
                        break;
                    }

                    // Can we place a sensible split point here?
                    if (inst.value(att) > currSplit) {

                        // Compute gain for split point
                        currVal = gain(currDist, priorVal);

                        // Is the current split point the best point so far?
                        if (currVal > bestVal) {

                            // Store value of current point
                            bestVal = currVal;

                            // Save split point
                            splitPoint = (inst.value(att) + currSplit) / 2.0;
                            origSplitPoint = splitPoint;

                            // Save distribution
                            for (int j = 0; j < currDist.length; j++) {
                                System.arraycopy(currDist[j], 0, dist[j], 0, dist[j].length);
                            }
                        }
                    }
                    currSplit = inst.value(att);

                    // Shift over the weight
                    currDist[0][(int) inst.classValue()] += inst.weight();
                    currDist[1][(int) inst.classValue()] -= inst.weight();
                }
            } else {
                double currSplit = data.instance(0).value(att);
                double currVal, bestVal = -Double.MAX_VALUE;
                // Split data set using given split point.
                for (int i = 0; i < data.numInstances(); i++) {
                    Instance inst = data.instance(i);
                    if (inst.isMissing(att)) {
                        // Can stop as soon as we hit a missing value
                        break;
                    }
                    if (inst.value(att) > currSplit) {
                        // Compute gain for split point
                        currVal = gain(currDist, priorVal);
                        // Is the current split point the best point so far?
                        if (currVal > bestVal) {
                            // Store value of current point
                            bestVal = currVal;
                            // Save computed split point
                            origSplitPoint = (inst.value(att) + currSplit) / 2.0;
                        }
                    }
                    currSplit = inst.value(att);
                    // Shift over the weight
                    currDist[0][(int) inst.classValue()] += inst.weight();
                    currDist[1][(int) inst.classValue()] -= inst.weight();
                    if (inst.value(att) <= splitPoint) {
                        // Save distribution since split point is specified
                        for (int j = 0; j < currDist.length; j++) {
                            System.arraycopy(currDist[j], 0, dist[j], 0, dist[j].length);
                        }
                    }
                }
            }
        }
    } else if (CustomClassifierId != null) {
        Classifier fc = custom_classifiers.get(CustomClassifierId);
        dist = new double[data.numClasses()][data.numClasses()];
        Instance inst;
        for (int i = 0; i < data.numInstances(); i++) {
            inst = data.instance(i);
            double predictedClass = fc.classifyInstance(inst);
            if (predictedClass != Instance.missingValue()) {
                dist[(int) predictedClass][(int) inst.classValue()] += inst.weight();
            }
        }
    } else if (cSet != null) {
        dist = new double[data.numClasses()][data.numClasses()];
        JsonNode vertices = mapper.readTree(cSet.getConstraints());
        ArrayList<double[]> attrVertices = generateVerticesList(vertices);
        List<Attribute> aList = generateAttributeList(cSet, data, d);
        double[] testPoint = new double[2];
        int ctr = 0;
        for (int k = 0; k < data.numInstances(); k++) {
            testPoint = new double[2];
            ctr = 0;
            for (Attribute a : aList) {
                if (!data.instance(k).isMissing(a)) {
                    testPoint[ctr] = data.instance(k).value(a);
                    ctr++;
                }
            }
            int check = checkPointInPolygon(attrVertices, testPoint);
            dist[check][(int) data.instance(k).classValue()] += data.instance(k).weight();
        }
    }

    // Compute weights for subsetsCustomClassifierIndex
    props[att] = new double[dist.length];
    for (int k = 0; k < props[att].length; k++) {
        props[att][k] = Utils.sum(dist[k]);
    }
    if (Utils.eq(Utils.sum(props[att]), 0)) {
        for (int k = 0; k < props[att].length; k++) {
            props[att][k] = 1.0 / props[att].length;
        }
    } else {
        Utils.normalize(props[att]);
    }

    // Any instances with missing values ?
    if (indexOfFirstMissingValue > -1) {

        // Distribute weights for instances with missing values
        for (int i = indexOfFirstMissingValue; i < data.numInstances(); i++) {
            Instance inst = data.instance(i);
            if (attribute.isNominal()) {

                // Need to check if attribute value is missing
                if (inst.isMissing(att)) {
                    for (int j = 0; j < dist.length; j++) {
                        dist[j][(int) inst.classValue()] += props[att][j] * inst.weight();
                    }
                }
            } else {

                // Can be sure that value is missing, so no test required
                for (int j = 0; j < dist.length; j++) {
                    dist[j][(int) inst.classValue()] += props[att][j] * inst.weight();
                }
            }
        }
    }

    // Return distribution and split point
    dists[att] = dist;
    mp.put("split_point", splitPoint);
    mp.put("orig_split_point", origSplitPoint);
    return mp;
}

From source file:org.wkwk.classifier.MyC45.java

public double computeEntropy(Instances data) {
    // Hitung kemunculan kelas
    double[] classCounts = new double[data.numClasses()];
    Enumeration instEnum = data.enumerateInstances();
    while (instEnum.hasMoreElements()) {
        Instance inst = (Instance) instEnum.nextElement();
        classCounts[(int) inst.classValue()]++;
    }/*from   www  .  ja va 2s .c o  m*/

    // Hitung entropy
    double entropy = 0;
    for (int i = 0; i < data.numClasses(); i++) {
        if (classCounts[i] > 0) {
            entropy -= classCounts[i] / data.numInstances() * Utils.log2(classCounts[i] / data.numInstances());
        }
    }
    return entropy;
}

From source file:org.wkwk.classifier.MyC45.java

public double bestThreshold(Instances data, Attribute attr) {
    data.sort(attr);/*from ww w . j  a v  a  2s.com*/

    double m_ig = 0;
    double bestThr = 0;
    double classTemp = data.get(0).classValue();
    double valueTemp = data.get(0).value(attr);

    Enumeration instEnum = data.enumerateInstances();
    double dt;
    while (instEnum.hasMoreElements()) {
        Instance inst = (Instance) instEnum.nextElement();
        if (classTemp != inst.classValue()) {
            classTemp = inst.classValue();
            dt = valueTemp;
            valueTemp = inst.value(attr);
            double threshold = dt + ((valueTemp - dt) / 2);
            double igTemp = computeInfoGainCont(data, attr, threshold);
            if (m_ig < igTemp) {
                m_ig = igTemp;
                bestThr = threshold;
            }
        }
    }
    return bestThr;
}

From source file:preprocess.StringToWordVector.java

License:Open Source License

/**
 * determines the dictionary./* w w  w  . ja  v a2s.c o m*/
 */
private void determineDictionary() {
    // initialize stopwords
    Stopwords stopwords = new Stopwords();
    if (getUseStoplist()) {
        try {
            if (getStopwords().exists() && !getStopwords().isDirectory())
                stopwords.read(getStopwords());
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    // Operate on a per-class basis if class attribute is set
    int classInd = getInputFormat().classIndex();
    int values = 1;
    if (!m_doNotOperateOnPerClassBasis && (classInd != -1)) {
        values = getInputFormat().attribute(classInd).numValues();
    }

    //TreeMap dictionaryArr [] = new TreeMap[values];
    TreeMap[] dictionaryArr = new TreeMap[values];
    for (int i = 0; i < values; i++) {
        dictionaryArr[i] = new TreeMap();
    }

    // Make sure we know which fields to convert
    determineSelectedRange();

    // Tokenize all training text into an orderedMap of "words".
    long pruneRate = Math.round((m_PeriodicPruningRate / 100.0) * getInputFormat().numInstances());
    for (int i = 0; i < getInputFormat().numInstances(); i++) {
        Instance instance = getInputFormat().instance(i);
        int vInd = 0;
        if (!m_doNotOperateOnPerClassBasis && (classInd != -1)) {
            vInd = (int) instance.classValue();
        }

        // Iterate through all relevant string attributes of the current instance
        Hashtable h = new Hashtable();
        for (int j = 0; j < instance.numAttributes(); j++) {
            if (m_SelectedRange.isInRange(j) && (instance.isMissing(j) == false)) {

                // Get tokenizer
                m_Tokenizer.tokenize(instance.stringValue(j));

                // Iterate through tokens, perform stemming, and remove stopwords
                // (if required)
                while (m_Tokenizer.hasMoreElements()) {
                    String word = ((String) m_Tokenizer.nextElement()).intern();

                    if (this.m_lowerCaseTokens == true)
                        word = word.toLowerCase();

                    word = m_Stemmer.stem(word);

                    if (this.m_useStoplist == true)
                        if (stopwords.is(word))
                            continue;

                    if (!(h.contains(word)))
                        h.put(word, new Integer(0));

                    Count count = (Count) dictionaryArr[vInd].get(word);
                    if (count == null) {
                        dictionaryArr[vInd].put(word, new Count(1));
                    } else {
                        count.count++;
                    }
                }
            }
        }

        //updating the docCount for the words that have occurred in this
        //instance(document).
        Enumeration e = h.keys();
        while (e.hasMoreElements()) {
            String word = (String) e.nextElement();
            Count c = (Count) dictionaryArr[vInd].get(word);
            if (c != null) {
                c.docCount++;
            } else
                System.err.println(
                        "Warning: A word should definitely be in the " + "dictionary.Please check the code");
        }

        if (pruneRate > 0) {
            if (i % pruneRate == 0 && i > 0) {
                for (int z = 0; z < values; z++) {
                    Vector d = new Vector(1000);
                    Iterator it = dictionaryArr[z].keySet().iterator();
                    while (it.hasNext()) {
                        String word = (String) it.next();
                        Count count = (Count) dictionaryArr[z].get(word);
                        if (count.count <= 1) {
                            d.add(word);
                        }
                    }
                    Iterator iter = d.iterator();
                    while (iter.hasNext()) {
                        String word = (String) iter.next();
                        dictionaryArr[z].remove(word);
                    }
                }
            }
        }
    }

    // Figure out the minimum required word frequency
    int totalsize = 0;
    int prune[] = new int[values];
    for (int z = 0; z < values; z++) {
        totalsize += dictionaryArr[z].size();

        int array[] = new int[dictionaryArr[z].size()];
        int pos = 0;
        Iterator it = dictionaryArr[z].keySet().iterator();
        while (it.hasNext()) {
            String word = (String) it.next();
            Count count = (Count) dictionaryArr[z].get(word);
            array[pos] = count.count;
            pos++;
        }

        // sort the array
        sortArray(array);
        if (array.length < m_WordsToKeep) {
            // if there aren't enough words, set the threshold to
            // minFreq
            prune[z] = m_minTermFreq;
        } else {
            // otherwise set it to be at least minFreq
            prune[z] = Math.max(m_minTermFreq, array[array.length - m_WordsToKeep]);
        }
    }

    // Convert the dictionary into an attribute index
    // and create one attribute per word
    FastVector attributes = new FastVector(totalsize + getInputFormat().numAttributes());

    // Add the non-converted attributes 
    int classIndex = -1;
    for (int i = 0; i < getInputFormat().numAttributes(); i++) {
        if (!m_SelectedRange.isInRange(i)) {
            if (getInputFormat().classIndex() == i) {
                classIndex = attributes.size();
            }
            attributes.addElement(getInputFormat().attribute(i).copy());
        }
    }

    // Add the word vector attributes (eliminating duplicates
    // that occur in multiple classes)
    TreeMap newDictionary = new TreeMap();
    int index = attributes.size();
    for (int z = 0; z < values; z++) {
        Iterator it = dictionaryArr[z].keySet().iterator();
        while (it.hasNext()) {
            String word = (String) it.next();
            Count count = (Count) dictionaryArr[z].get(word);
            if (count.count >= prune[z]) {
                if (newDictionary.get(word) == null) {
                    newDictionary.put(word, new Integer(index++));
                    attributes.addElement(new Attribute(m_Prefix + word));
                }
            }
        }
    }

    // Compute document frequencies
    m_DocsCounts = new int[attributes.size()];
    Iterator it = newDictionary.keySet().iterator();
    while (it.hasNext()) {
        String word = (String) it.next();
        int idx = ((Integer) newDictionary.get(word)).intValue();
        int docsCount = 0;
        for (int j = 0; j < values; j++) {
            Count c = (Count) dictionaryArr[j].get(word);
            if (c != null)
                docsCount += c.docCount;
        }
        m_DocsCounts[idx] = docsCount;
    }

    // Trim vector and set instance variables
    attributes.trimToSize();
    m_Dictionary = newDictionary;
    m_NumInstances = getInputFormat().numInstances();

    // Set the filter's output format
    Instances outputFormat = new Instances(getInputFormat().relationName(), attributes, 0);
    outputFormat.setClassIndex(classIndex);
    setOutputFormat(outputFormat);
}

From source file:put.semantic.fcanew.ml.WekaClassifier.java

@Override
public void loadExamples(File f) throws IOException {
    ArffLoader l = new ArffLoader();
    l.setFile(f);// www. ja  va2s . c  o  m
    Instances structure = l.getStructure();
    Instance i;
    while ((i = l.getNextInstance(structure)) != null) {
        if (!instances.checkInstance(i)) {
            i = convert(i, structure, instances);
        } else {
            i.setDataset(instances);
        }
        if (instances.checkInstance(i)) {
            if (i.classValue() == 0) {
                i.setWeight(getRejectedWeight());
            }
            instances.add(i);
        } else {
            System.err.println("Ignoring incompatible instance");
        }
    }
    updateModel();
    tableModel.fireTableDataChanged();
}

From source file:resample.OverSubsample.java

License:Open Source License

/**
 * Creates a subsample of the current set of input instances. The output
 * instances are pushed onto the output queue for collection.
 *///from   w ww.  ja va2 s .c o m
private void createSubsample() {

    int classI = getInputFormat().classIndex();
    // Sort according to class attribute.
    getInputFormat().sort(classI);
    // Determine where each class starts in the sorted dataset
    int[] classIndices = getClassIndices();

    // Get the existing class distribution
    int[] counts = new int[getInputFormat().numClasses()];
    double[] weights = new double[getInputFormat().numClasses()];
    int max = -1;
    for (int i = 0; i < getInputFormat().numInstances(); i++) {
        Instance current = getInputFormat().instance(i);
        if (current.classIsMissing() == false) {
            counts[(int) current.classValue()]++;
            weights[(int) current.classValue()] += current.weight();
        }
    }

    // Convert from total weight to average weight
    for (int i = 0; i < counts.length; i++) {
        if (counts[i] > 0) {
            weights[i] = weights[i] / counts[i];
        }
        /*
        System.err.println("Class:" + i + " " + getInputFormat().classAttribute().value(i)
                 + " Count:" + counts[i]
                 + " Total:" + weights[i] * counts[i]
                 + " Avg:" + weights[i]);
         */
    }

    // find the class with the minimum number of instances
    int maxIndex = -1;
    for (int i = 0; i < counts.length; i++) {
        if ((max < 0) && (counts[i] > 0)) {
            max = counts[i];
            maxIndex = i;
        } else if ((counts[i] > max) && (counts[i] > 0)) {
            max = counts[i];
            maxIndex = i;
        }
    }

    if (max < 0) {
        System.err.println("SpreadSubsample: *warning* none of the classes have any values in them.");
        return;
    }

    // determine the new distribution 
    int[] new_counts = new int[getInputFormat().numClasses()];
    for (int i = 0; i < counts.length; i++) {
        new_counts[i] = (int) Math.abs(Math.max(counts[i], max * m_DistributionSpread));
        if (i == maxIndex) {
            if (m_DistributionSpread > 0 && m_DistributionSpread < 1.0) {
                // don't undersample the majority class!
                new_counts[i] = counts[i];
            }
        }
        if (m_DistributionSpread == 0) {
            new_counts[i] = counts[i];
        }

        if (m_MaxCount > 0) {
            new_counts[i] = Math.min(new_counts[i], m_MaxCount);
        }
    }

    // Sample with replacement
    Random random = new Random(m_RandomSeed);
    //Hashtable t = new Hashtable();
    for (int j = 0; j < new_counts.length; j++) {
        double newWeight = 1.0;
        if (m_AdjustWeights && (new_counts[j] > 0)) {
            newWeight = weights[j] * counts[j] / new_counts[j];
            /*
            System.err.println("Class:" + j + " " + getInputFormat().classAttribute().value(j) 
                   + " Count:" + counts[j]
                   + " Total:" + weights[j] * counts[j]
                   + " Avg:" + weights[j]
                   + " NewCount:" + new_counts[j]
                   + " NewAvg:" + newWeight);
             */
        }
        int index = -1;
        for (int k = 0; k < new_counts[j]; k++) {
            //boolean ok = false;
            //do {
            index = classIndices[j] + (Math.abs(random.nextInt()) % (classIndices[j + 1] - classIndices[j]));
            // Have we used this instance before?
            //if (t.get("" + index) == null) {
            // if not, add it to the hashtable and use it
            //t.put("" + index, "");
            //ok = true;
            if (index >= 0) {
                Instance newInst = (Instance) getInputFormat().instance(index).copy();
                if (m_AdjustWeights) {
                    newInst.setWeight(newWeight);
                }
                push(newInst);
            }
            //}
            //} while (!ok);
        }
    }
}

From source file:resample.OverSubsample.java

License:Open Source License

/**
 * Creates an index containing the position where each class starts in 
 * the getInputFormat(). m_InputFormat must be sorted on the class attribute.
 * /*from   ww w .ja v  a2s .c  o m*/
 * @return the positions
 */
private int[] getClassIndices() {

    // Create an index of where each class value starts
    int[] classIndices = new int[getInputFormat().numClasses() + 1];
    int currentClass = 0;
    classIndices[currentClass] = 0;
    for (int i = 0; i < getInputFormat().numInstances(); i++) {
        Instance current = getInputFormat().instance(i);
        if (current.classIsMissing()) {
            for (int j = currentClass + 1; j < classIndices.length; j++) {
                classIndices[j] = i;
            }
            break;
        } else if (current.classValue() != currentClass) {
            for (int j = currentClass + 1; j <= current.classValue(); j++) {
                classIndices[j] = i;
            }
            currentClass = (int) current.classValue();
        }
    }
    if (currentClass <= getInputFormat().numClasses()) {
        for (int j = currentClass + 1; j < classIndices.length; j++) {
            classIndices[j] = getInputFormat().numInstances();
        }
    }
    return classIndices;
}

From source file:sg.edu.nus.comp.nlp.ims.classifiers.CMultiClassesSVM.java

License:Open Source License

@Override
public void buildClassifier(Instances p_Instances) throws Exception {
    Instances newInsts = null;/*w  ww .  java 2 s.c  om*/
    if (this.m_Classifier == null) {
        throw new IllegalStateException("No base classifier has been set!");
    }

    this.m_ZeroR = new ZeroR();
    this.m_ZeroR.buildClassifier(p_Instances);

    this.m_ClassAttribute = p_Instances.classAttribute();
    this.getOutputFormat(p_Instances);
    int numClassifiers = p_Instances.numClasses();
    switch (numClassifiers) {
    case 1:
        this.m_Classifiers = null;
        break;
    case 2:
        this.m_Classifiers = Classifier.makeCopies(this.m_Classifier, 1);
        newInsts = new Instances(this.m_OutputFormat, 0);
        for (int i = 0; i < p_Instances.numInstances(); i++) {
            Instance inst = this.filterInstance(p_Instances.instance(i));
            inst.setDataset(newInsts);
            newInsts.add(inst);
        }
        this.m_Classifiers[0].buildClassifier(newInsts);
        break;
    default:
        this.m_Classifiers = Classifier.makeCopies(this.m_Classifier, numClassifiers);
        Hashtable<String, ArrayList<Double>> id2Classes = null;
        if (this.m_IndexOfID >= 0) {
            id2Classes = new Hashtable<String, ArrayList<Double>>();
            for (int i = 0; i < p_Instances.numInstances(); i++) {
                Instance inst = p_Instances.instance(i);
                String id = inst.stringValue(this.m_IndexOfID);
                if (!id2Classes.containsKey(id)) {
                    id2Classes.put(id, new ArrayList<Double>());
                }
                id2Classes.get(id).add(inst.classValue());
            }
        }
        for (int classIdx = 0; classIdx < this.m_Classifiers.length; classIdx++) {
            newInsts = this.genInstances(p_Instances, classIdx, id2Classes);
            this.m_Classifiers[classIdx].buildClassifier(newInsts);
        }
    }
}

From source file:smo2.SMO.java

License:Open Source License

/**
 * Method for building the classifier. Implements a one-against-one wrapper
 * for multi-class problems.//from w w w  .j a  v a2  s . co m
 *
 * @param insts
 *            the set of training instances
 * @exception Exception
 *                if the classifier can't be built successfully
 */
public void buildClassifier(Instances insts) throws Exception {

    if (!m_checksTurnedOff) {
        if (insts.checkForStringAttributes()) {
            throw new UnsupportedAttributeTypeException("Cannot handle string attributes!");
        }
        if (insts.classAttribute().isNumeric()) {
            throw new UnsupportedClassTypeException(
                    "mySMO can't handle a numeric class! Use" + "SMOreg for performing regression.");
        }
        insts = new Instances(insts);
        insts.deleteWithMissingClass();
        if (insts.numInstances() == 0) {
            throw new Exception("No training instances without a missing class!");
        }

        /*
         * Removes all the instances with weight equal to 0. MUST be done
         * since condition (8) of Keerthi's paper is made with the assertion
         * Ci > 0 (See equation (3a).
         */
        Instances data = new Instances(insts, insts.numInstances());
        for (int i = 0; i < insts.numInstances(); i++) {
            if (insts.instance(i).weight() > 0)
                data.add(insts.instance(i));
        }
        if (data.numInstances() == 0) {
            throw new Exception("No training instances left after removing "
                    + "instance with either a weight null or a missing class!");
        }
        insts = data;

    }

    m_onlyNumeric = true;
    if (!m_checksTurnedOff) {
        for (int i = 0; i < insts.numAttributes(); i++) {
            if (i != insts.classIndex()) {
                if (!insts.attribute(i).isNumeric()) {
                    m_onlyNumeric = false;
                    break;
                }
            }
        }
    }

    if (!m_checksTurnedOff) {
        m_Missing = new ReplaceMissingValues();
        m_Missing.setInputFormat(insts);
        insts = Filter.useFilter(insts, m_Missing);
    } else {
        m_Missing = null;
    }

    if (!m_onlyNumeric) {
        m_NominalToBinary = new NominalToBinary();
        m_NominalToBinary.setInputFormat(insts);
        insts = Filter.useFilter(insts, m_NominalToBinary);
    } else {
        m_NominalToBinary = null;
    }

    if (m_filterType == FILTER_STANDARDIZE) {
        m_Filter = new Standardize();
        m_Filter.setInputFormat(insts);
        insts = Filter.useFilter(insts, m_Filter);
    } else if (m_filterType == FILTER_NORMALIZE) {
        m_Filter = new Normalize();
        m_Filter.setInputFormat(insts);
        insts = Filter.useFilter(insts, m_Filter);
    } else {
        m_Filter = null;
    }

    m_classIndex = insts.classIndex();
    m_classAttribute = insts.classAttribute();

    // Generate subsets representing each class
    Instances[] subsets = new Instances[insts.numClasses()];
    for (int i = 0; i < insts.numClasses(); i++) {
        subsets[i] = new Instances(insts, insts.numInstances());
    }
    for (int j = 0; j < insts.numInstances(); j++) {
        Instance inst = insts.instance(j);
        subsets[(int) inst.classValue()].add(inst);
    }
    for (int i = 0; i < insts.numClasses(); i++) {
        subsets[i].compactify();
    }

    // Build the binary classifiers
    Random rand = new Random(m_randomSeed);
    m_classifiers = new BinarymySMO[insts.numClasses()][insts.numClasses()];
    for (int i = 0; i < insts.numClasses(); i++) {
        for (int j = i + 1; j < insts.numClasses(); j++) {
            m_classifiers[i][j] = new BinarymySMO();
            Instances data = new Instances(insts, insts.numInstances());
            for (int k = 0; k < subsets[i].numInstances(); k++) {
                data.add(subsets[i].instance(k));
            }
            for (int k = 0; k < subsets[j].numInstances(); k++) {
                data.add(subsets[j].instance(k));
            }
            data.compactify();
            data.randomize(rand);
            m_classifiers[i][j].buildClassifier(data, i, j, m_fitLogisticModels, m_numFolds, m_randomSeed);
        }
    }
}