Example usage for weka.core Instances Instances

Introduction

In this page you can find the example usage for weka.core Instances Instances.

Prototype

public Instances(Instances dataset)

Source Link

Document

Constructor copying all instances and references to the header information from the given set of instances.

Usage

From source file:MLKNNCS.java

License:Open Source License

/**
 * Computing Cond and CondN Probabilities for each class of the training set
 *
 * @throws Exception Potential exception thrown. To be handled in an upper level.
 *///from w ww  .  j  a  v  a  2  s .c  om
private void ComputeCond() throws Exception {
    int[][] temp_Ci = new int[numLabels][numOfNeighbors + 1];
    int[][] temp_NCi = new int[numLabels][numOfNeighbors + 1];

    for (int i = 0; i < train.numInstances(); i++) {

        Instances knn = new Instances(lnn.kNearestNeighbours(train.instance(i), numOfNeighbors));

        // now compute values of temp_Ci and temp_NCi for every class label
        for (int j = 0; j < numLabels; j++) {

            int aces = 0; // num of aces in Knn for j
            for (int k = 0; k < numOfNeighbors; k++) {
                double value = Double.parseDouble(
                        train.attribute(labelIndices[j]).value((int) knn.instance(k).value(labelIndices[j])));
                if (Utils.eq(value, 1.0)) {
                    aces++;
                }
            }
            // raise the counter of temp_Ci[j][aces] and temp_NCi[j][aces] by 1
            if (Utils.eq(Double.parseDouble(
                    train.attribute(labelIndices[j]).value((int) train.instance(i).value(labelIndices[j]))),
                    1.0)) {
                temp_Ci[j][aces]++;
            } else {
                temp_NCi[j][aces]++;
            }
        }
    }

    // compute CondProbabilities[i][..] for labels based on temp_Ci[]
    for (int i = 0; i < numLabels; i++) {
        int temp1 = 0;
        int temp2 = 0;
        for (int j = 0; j < numOfNeighbors + 1; j++) {
            temp1 += temp_Ci[i][j];
            temp2 += temp_NCi[i][j];
        }
        for (int j = 0; j < numOfNeighbors + 1; j++) {
            CondProbabilities[i][j] = (smooth + temp_Ci[i][j]) / (smooth * (numOfNeighbors + 1) + temp1);
            CondNProbabilities[i][j] = (smooth + temp_NCi[i][j]) / (smooth * (numOfNeighbors + 1) + temp2);
        }
    }
}

From source file:MLKNNCS.java

License:Open Source License

protected MultiLabelOutput makePredictionInternal(Instance instance) throws Exception {
    double[] confidences = new double[numLabels];
    boolean[] predictions = new boolean[numLabels];

    Instances knn = null;//from   w ww  .  j a  v  a  2 s  .  c om
    try {
        knn = new Instances(lnn.kNearestNeighbours(instance, numOfNeighbors));
    } catch (Exception ex) {
        Logger.getLogger(MLKNNCS.class.getName()).log(Level.SEVERE, null, ex);
    }
    int trueCount = 0;
    for (int i = 0; i < numLabels; i++) {
        // compute sum of aces in KNN
        int aces = 0; // num of aces in Knn for i
        for (int k = 0; k < numOfNeighbors; k++) {
            double value = Double.parseDouble(
                    train.attribute(labelIndices[i]).value((int) knn.instance(k).value(labelIndices[i])));
            if (Utils.eq(value, 1.0)) {
                aces++;
            }
        }
        double Prob_in = PriorProbabilities[i] * CondProbabilities[i][aces];
        double Prob_out = PriorNProbabilities[i] * CondNProbabilities[i][aces];

        confidences[i] = Cost[i] * Prob_in / (Cost[i] * Prob_in + Prob_out);
        //confidences[i] = 6*Prob_in/(6*Prob_in + Prob_out);

        if (confidences[i] > 0.5) {
            predictions[i] = true;
            trueCount++;
        } else if (confidences[i] < 0.5) {
            predictions[i] = false;
        } else {
            Random rnd = new Random();
            predictions[i] = (rnd.nextInt(2) == 1) ? true : false;
        }
        // ranking function
    }

    MultiLabelOutput mlo = new MultiLabelOutput(predictions, confidences);

    if (trueCount < 3) {
        double[] confidence = mlo.getConfidences();
        double[] confidenceTop4 = new double[4];
        int[] top4 = new int[4];

        Arrays.fill(top4, 0);
        Arrays.fill(confidenceTop4, 0);

        for (int i = 0; i < confidence.length; i++) {
            if (confidence[i] > confidenceTop4[0]) {
                top4[3] = top4[2];
                confidenceTop4[3] = confidenceTop4[2];
                top4[2] = top4[1];
                confidenceTop4[2] = confidenceTop4[1];
                top4[1] = top4[0];
                confidenceTop4[1] = confidenceTop4[0];
                top4[0] = i;
                confidenceTop4[0] = confidence[i];
            } else if (confidence[i] > confidenceTop4[1]) {
                top4[3] = top4[2];
                confidenceTop4[3] = confidenceTop4[2];
                top4[2] = top4[1];
                confidenceTop4[2] = confidenceTop4[1];
                top4[1] = i;
                confidenceTop4[1] = confidence[i];
            } else if (confidence[i] > confidenceTop4[2]) {
                top4[3] = top4[2];
                confidenceTop4[3] = confidenceTop4[2];
                top4[2] = i;
                confidenceTop4[2] = confidence[i];
            } else if (confidence[i] > confidenceTop4[3]) {
                top4[3] = i;
                confidenceTop4[3] = confidence[i];
            }

        }
        for (int i = trueCount; i < 4; i++) {
            if ((confidence[top4[i]] > 0.25 && i == 3) || confidence[top4[i]] > 0.2 && i < 3) {
                predictions[top4[i]] = true;
                trueCount++;
            }
        }
        if (trueCount == 0) {
            predictions[top4[0]] = true;
        }
        mlo = new MultiLabelOutput(predictions, confidences);
    }
    return mlo;
}

From source file:CJWeka.java

License:Open Source License

public void buildClassifier(Instances i) throws Exception {

    // can classifier handle the data?
    getCapabilities().testWithFail(i);//from  www.j  av  a2  s  .co m

    // remove instances with missing class
    i = new Instances(i);
    i.deleteWithMissingClass();

    // only class? -> build ZeroR model
    if (i.numAttributes() == 1) {
        System.err.println(
                "Cannot build model (only class attribute present in data!), " + "using ZeroR model instead!");
        m_ZeroR = new weka.classifiers.rules.ZeroR();
        m_ZeroR.buildClassifier(i);
        return;
    } else {
        m_ZeroR = null;
    }

    m_instances = null;
    m_currentInstance = null;

    m_outputs = new NeuralEnd[0];
    m_inputs = new NeuralEnd[0];
    m_numAttributes = 0;
    m_numClasses = 0;
    m_neuralNodes = new NeuralConnection[0];

    m_nextId = 0;
    m_instances = new Instances(i);
    m_random = new Random(m_randomSeed);

    if (m_useNomToBin) {
        m_nominalToBinaryFilter = new NominalToBinary();
        m_nominalToBinaryFilter.setInputFormat(m_instances);
        m_instances = Filter.useFilter(m_instances, m_nominalToBinaryFilter);
    }
    m_numAttributes = m_instances.numAttributes() - 1;
    m_numClasses = m_instances.numClasses();

    setClassType(m_instances);

    setupInputs();
    setupOutputs();
    setupHiddenLayer();

    //For silly situations in which the network gets accepted before training
    //commenses
    if (m_numeric) {
        setEndsToLinear();
    }

    //connections done.
    double right = 0;
    double tempRate;
    double totalWeight = 0;

    m_hiddenValues = new double[m_hiddenLayers];
    resetNetwork();
    saveValues();
    for (int noa = 1; noa < m_numEpochs + 1; noa++) {
        //            System.out.println(noa);
        resetNetwork();
        totalWeight = 0;
        right = 0;
        for (int nob = 0; nob < m_instances.numInstances(); nob++) {
            m_currentInstance = m_instances.instance(nob);
            if (!m_currentInstance.classIsMissing()) {
                totalWeight += m_currentInstance.weight();

                //this is where the network updating (and training occurs, for the
                //training set
                resetNetwork();
                calculateOutputs();
                tempRate = m_learningRate * m_currentInstance.weight();
                if (m_decay) {
                    tempRate /= noa;
                }

                right += (calculateErrors() / m_instances.numClasses()) * m_currentInstance.weight();
                updateNetworkWeights(tempRate, m_momentum);
                saveValues();
            }
        }
        right /= totalWeight;
        if (Double.isInfinite(right) || Double.isNaN(right)) {
            m_instances = null;
            throw new Exception("Network cannot train. Try restarting with a" + " smaller learning rate.");
        }
        //            System.out.println(noa+ ": " +right);
    }
    resetNetwork();
    if (m_resetAfterTraining) {
        // in that point it saves Double.NaN
        saveValues();
    }

}

From source file:MultiClassClassifier.java

License:Open Source License

/**
 * Builds the classifiers.//from   ww w .j  av  a2s.  co m
 *
 * @param insts the training data.
 * @throws Exception if a classifier can't be built
 */
public void buildClassifier(Instances insts) throws Exception {

    Instances newInsts;

    // can classifier handle the data?
    getCapabilities().testWithFail(insts);

    // remove instances with missing class
    insts = new Instances(insts);
    insts.deleteWithMissingClass();

    if (m_Classifier == null) {
        throw new Exception("No base classifier has been set!");
    }
    m_ZeroR = new ZeroR();
    m_ZeroR.buildClassifier(insts);

    m_TwoClassDataset = null;

    int numClassifiers = insts.numClasses();
    if (numClassifiers <= 2) {

        m_Classifiers = Classifier.makeCopies(m_Classifier, 1);
        m_Classifiers[0].buildClassifier(insts);

        m_ClassFilters = null;

    } else if (m_Method == METHOD_1_AGAINST_1) {
        // generate fastvector of pairs
        FastVector pairs = new FastVector();
        for (int i = 0; i < insts.numClasses(); i++) {
            for (int j = 0; j < insts.numClasses(); j++) {
                if (j <= i)
                    continue;
                int[] pair = new int[2];
                pair[0] = i;
                pair[1] = j;
                pairs.addElement(pair);
            }
        }

        numClassifiers = pairs.size();
        m_Classifiers = Classifier.makeCopies(m_Classifier, numClassifiers);
        m_ClassFilters = new Filter[numClassifiers];
        m_SumOfWeights = new double[numClassifiers];

        // generate the classifiers
        for (int i = 0; i < numClassifiers; i++) {
            RemoveWithValues classFilter = new RemoveWithValues();
            classFilter.setAttributeIndex("" + (insts.classIndex() + 1));
            classFilter.setModifyHeader(true);
            classFilter.setInvertSelection(true);
            classFilter.setNominalIndicesArr((int[]) pairs.elementAt(i));
            Instances tempInstances = new Instances(insts, 0);
            tempInstances.setClassIndex(-1);
            classFilter.setInputFormat(tempInstances);
            newInsts = Filter.useFilter(insts, classFilter);
            if (newInsts.numInstances() > 0) {
                newInsts.setClassIndex(insts.classIndex());
                m_Classifiers[i].buildClassifier(newInsts);
                m_ClassFilters[i] = classFilter;
                m_SumOfWeights[i] = newInsts.sumOfWeights();
            } else {
                m_Classifiers[i] = null;
                m_ClassFilters[i] = null;
            }
        }

        // construct a two-class header version of the dataset
        m_TwoClassDataset = new Instances(insts, 0);
        int classIndex = m_TwoClassDataset.classIndex();
        m_TwoClassDataset.setClassIndex(-1);
        m_TwoClassDataset.deleteAttributeAt(classIndex);
        FastVector classLabels = new FastVector();
        classLabels.addElement("class0");
        classLabels.addElement("class1");
        m_TwoClassDataset.insertAttributeAt(new Attribute("class", classLabels), classIndex);
        m_TwoClassDataset.setClassIndex(classIndex);

    } else {
        // use error correcting code style methods
        Code code = null;
        switch (m_Method) {
        case METHOD_ERROR_EXHAUSTIVE:
            code = new ExhaustiveCode(numClassifiers);
            break;
        case METHOD_ERROR_RANDOM:
            code = new RandomCode(numClassifiers, (int) (numClassifiers * m_RandomWidthFactor), insts);
            break;
        case METHOD_1_AGAINST_ALL:
            code = new StandardCode(numClassifiers);
            break;
        default:
            throw new Exception("Unrecognized correction code type");
        }
        numClassifiers = code.size();
        m_Classifiers = Classifier.makeCopies(m_Classifier, numClassifiers);
        m_ClassFilters = new MakeIndicator[numClassifiers];
        for (int i = 0; i < m_Classifiers.length; i++) {
            m_ClassFilters[i] = new MakeIndicator();
            MakeIndicator classFilter = (MakeIndicator) m_ClassFilters[i];
            classFilter.setAttributeIndex("" + (insts.classIndex() + 1));
            classFilter.setValueIndices(code.getIndices(i));
            classFilter.setNumeric(false);
            classFilter.setInputFormat(insts);
            newInsts = Filter.useFilter(insts, m_ClassFilters[i]);
            m_Classifiers[i].buildClassifier(newInsts);
        }
    }
    m_ClassAttribute = insts.classAttribute();
}

From source file:task2.java

/**
 * Processes requests for both HTTP <code>GET</code> and <code>POST</code>
 * methods./*  w ww  .j  a v  a  2s.  c o m*/
 *
 * @param request servlet request
 * @param response servlet response
 * @throws ServletException if a servlet-specific error occurs
 * @throws IOException if an I/O error occurs
 */
protected void processRequest(HttpServletRequest request, HttpServletResponse response)
        throws ServletException, IOException {
    response.setContentType("text/html;charset=UTF-8");
    try (PrintWriter out = response.getWriter()) {
        /* TODO output your page here. You may use following sample code. */
        out.println("<!DOCTYPE html>");
        out.println("<html>");
        out.println("<head>");
        out.println("<title>Servlet selection</title>");
        out.println("</head>");
        out.println("<body>");
        CSVLoader loader = new CSVLoader();
        loader.setSource(new File("C:/Users//Raguvinoth/Desktop/5339.csv"));
        Instances data = loader.getDataSet();

        //Save ARFF
        ArffSaver saver = new ArffSaver();
        saver.setInstances(data);
        saver.setFile(new File("\"C:/Users/Raguvinoth/Desktop/5339_converted.arff"));
        saver.writeBatch();

        BufferedReader reader = new BufferedReader(
                new FileReader("C://Users//Raguvinoth//Desktop//weka1//5339_nominal.arff"));
        Instances data1 = new Instances(reader);

        if (data1.classIndex() == -1)
            data1.setClassIndex(data1.numAttributes() - 14);
        // 1. meta-classifier
        // useClassifier(data);

        // 2. AttributeSelector
        try {
            AttributeSelection attsel = new AttributeSelection();
            GreedyStepwise search = new GreedyStepwise();
            CfsSubsetEval eval = new CfsSubsetEval();
            attsel.setEvaluator(eval);
            attsel.setSearch(search);
            attsel.SelectAttributes(data);
            int[] indices = attsel.selectedAttributes();

            System.out.println("selected attribute indices:\n" + Utils.arrayToString(indices));
            System.out.println("\n 4. Linear-Regression on above selected attributes");
            long time1 = System.currentTimeMillis();
            long sec1 = time1 / 1000;
            BufferedReader reader1 = new BufferedReader(
                    new FileReader("C://Users//Raguvinoth//Desktop//weka1//5339_linear2.arff"));
            Instances data2 = new Instances(reader1);
            data2.setClassIndex(0);
            LinearRegression lr = new LinearRegression();
            lr.buildClassifier(data2);

            System.out.println(lr.toString());
            long time2 = System.currentTimeMillis();
            long sec2 = time2 / 1000;
            long timeTaken = sec2 - sec1;
            System.out.println("Total time taken for building the model: " + timeTaken + " seconds");

            for (int i = 0; i < 5; i++) {
                out.println("<p>" + "selected attribute indices:\n" + Utils.arrayToString(indices[i]) + "</p>");
            }
            out.println("<p>" + "\n 4. Linear-Regression on above selected attributes" + "</p>");
            out.println("<p>" + lr.toString() + "</p>");
            out.println("<p>" + "Total time taken for building the model: " + timeTaken + " seconds" + "</p>");
            out.println("</body>");
            out.println("</html>");
        } catch (Exception e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
    }
}

From source file:classifyfromimage1.java

private void jButton1ActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_jButton1ActionPerformed
    selectWindow(this.name3);
    this.name3 = IJ.getImage().getTitle();
    this.name4 = this.name3.replaceFirst("[.][^.]+$", "");
    RoiManager rm = RoiManager.getInstance();
    IJ.run("Duplicate...", this.name4);
    IJ.run("Set Measurements...", "area perimeter fit shape limit scientific redirect=None decimal=5");
    selectWindow(this.name3);
    IJ.run("Subtract Background...", "rolling=1.5");
    IJ.run("Enhance Contrast...", "saturated=25 equalize");
    IJ.run("Subtract Background...", "rolling=1.5");
    IJ.run("Convolve...",
            "text1=[-1 -3 -4 -3 -1\n-3 0 6 0 -3\n-4 6 50 6 -4\n-3 0 6 0 -3\n-1 -3 -4 -3 -1\n] normalize");
    IJ.run("8-bit", "");
    IJ.run("Restore Selection", "");
    IJ.run("Make Binary", "");
    Prefs.blackBackground = false;//from   w ww. ja  v  a2s  .  co m
    IJ.run("Convert to Mask", "");
    IJ.run("Restore Selection", "");
    this.valor1 = this.interval3.getText();
    this.valor2 = this.interval4.getText();
    this.text = "size=" + this.valor1 + "-" + this.valor2
            + " pixel show=Outlines display include summarize add";
    IJ.saveAs("tif", this.name3 + "_processed");
    String dest_filename1, dest_filename2, full;
    selectWindow("Results");
    //dest_filename1 = this.name2 + "_complete.txt";
    dest_filename2 = this.name3 + "_complete.csv";
    //IJ.saveAs("Results", prova + File.separator + dest_filename1);
    IJ.run("Input/Output...", "jpeg=85 gif=-1 file=.csv copy_row save_column save_row");
    //IJ.saveAs("Results", dir + File.separator + dest_filename2);
    IJ.saveAs("Results", this.name3 + "_complete.csv");
    IJ.run("Restore Selection");
    IJ.run("Clear Results");

    try {
        CSVLoader loader = new CSVLoader();
        loader.setSource(new File(this.name3 + "_complete.csv"));
        Instances data = loader.getDataSet();
        System.out.println(data);

        // save ARFF
        String arffile = this.name3 + ".arff";
        System.out.println(arffile);
        ArffSaver saver = new ArffSaver();
        saver.setInstances(data);
        saver.setFile(new File(arffile));
        saver.writeBatch();
    } catch (IOException ex) {
        Logger.getLogger(MachinLearningInterface.class.getName()).log(Level.SEVERE, null, ex);
    }

    Instances data;
    try {
        data = new Instances(new BufferedReader(new FileReader(this.name3 + ".arff")));
        Instances newData = null;
        Add filter;
        newData = new Instances(data);
        filter = new Add();
        filter.setAttributeIndex("last");
        filter.setNominalLabels(txtlabel.getText());
        filter.setAttributeName(txtpath2.getText());
        filter.setInputFormat(newData);
        newData = Filter.useFilter(newData, filter);
        System.out.print(newData);
        Vector vec = new Vector();
        newData.setClassIndex(newData.numAttributes() - 1);

        if (!newData.equalHeaders(newData)) {
            throw new IllegalArgumentException("Train and test are not compatible!");
        }

        Classifier cls = (Classifier) weka.core.SerializationHelper.read(txtpath.getText());
        System.out.println("PROVANT MODEL.classifyInstance");
        for (int i = 0; i < newData.numInstances(); i++) {
            double pred = cls.classifyInstance(newData.instance(i));
            double[] dist = cls.distributionForInstance(newData.instance(i));
            System.out.print((i + 1) + " - ");
            System.out.print(newData.classAttribute().value((int) pred) + " - ");
            //txtarea2.setText(Utils.arrayToString(dist));

            System.out.println(Utils.arrayToString(dist));

            vec.add(newData.classAttribute().value((int) pred));
            //txtarea2.append(Utils.arrayToString(dist));
            classif.add(newData.classAttribute().value((int) pred));
        }

        classif.removeAll(Arrays.asList("", null));
        System.out.println(classif);
        String vecstring = "";

        for (Object s : classif) {
            vecstring += s + ",";
            System.out.println("Hola " + vecstring);
        }
        Map<String, Integer> seussCount = new HashMap<String, Integer>();
        for (String t : classif) {
            Integer i = seussCount.get(t);
            if (i == null) {
                i = 0;
            }
            seussCount.put(t, i + 1);
        }
        String s = vecstring;
        int counter = 0;
        for (int i = 0; i < s.length(); i++) {
            if (s.charAt(i) == '$') {
                counter++;
            }
        }
        System.out.println(seussCount);
        System.out.println("hola " + counter++);
        IJ.showMessage("Your file:" + this.name3 + "arff" + "\n is composed by" + seussCount);
        txtpath2.setText("Your file:" + this.name3 + "arff" + "\n is composed by" + seussCount);
        A_MachineLearning nf2 = new A_MachineLearning();
        A_MachineLearning.txtresult2.append(this.txtpath2.getText());
        nf2.setVisible(true);

    } catch (Exception ex) {
        Logger.getLogger(MachinLearningInterface.class.getName()).log(Level.SEVERE, null, ex);
    }

    IJ.run("Close All", "");

    if (WindowManager.getFrame("Results") != null) {
        IJ.selectWindow("Results");
        IJ.run("Close");
    }
    if (WindowManager.getFrame("Summary") != null) {
        IJ.selectWindow("Summary");
        IJ.run("Close");
    }
    if (WindowManager.getFrame("Results") != null) {
        IJ.selectWindow("Results");
        IJ.run("Close");
    }
    if (WindowManager.getFrame("ROI Manager") != null) {
        IJ.selectWindow("ROI Manager");
        IJ.run("Close");
    }

    setVisible(false);
    dispose();// TODO add your handling code here:
    // TODO add your handling code here:
}

From source file:WrapperSubset.java

License:Open Source License

/**
 * Evaluates a subset of attributes//from w  w  w  .j av a2  s. c o m
 *
 * @param subset a bitset representing the attribute subset to be evaluated
 * @return the error rate
 * @throws Exception if the subset could not be evaluated
 */
@Override
public double evaluateSubset(BitSet subset) throws Exception {

    //        if (subset.isEmpty())
    //            return 0.0;

    double evalMetric = 0;
    double[] repError = new double[5];
    int numAttributes = 0;
    int i, j;
    Random Rnd = new Random(m_seed);
    Remove delTransform = new Remove();
    delTransform.setInvertSelection(true);
    // copy the instances
    Instances trainCopy = new Instances(m_trainInstances);

    // count attributes set in the BitSet
    for (i = 0; i < m_numAttribs; i++) {
        if (subset.get(i)) {
            numAttributes++;
        }
    }

    // set up an array of attribute indexes for the filter (+1 for the class)
    int[] featArray = new int[numAttributes + 1];

    for (i = 0, j = 0; i < m_numAttribs; i++) {
        if (subset.get(i)) {
            featArray[j++] = i;
        }
    }

    featArray[j] = m_classIndex;
    delTransform.setAttributeIndicesArray(featArray);
    delTransform.setInputFormat(trainCopy);
    trainCopy = Filter.useFilter(trainCopy, delTransform);

    // max of 5 repetitions of cross validation
    for (i = 0; i < 5; i++) {
        m_Evaluation = new Evaluation(trainCopy);
        m_Evaluation.crossValidateModel(m_BaseClassifier, trainCopy, m_folds, Rnd);

        switch (m_evaluationMeasure) {
        case EVAL_DEFAULT:
            repError[i] = m_Evaluation.errorRate();
            //                     if (m_trainInstances.classAttribute().isNominal()) {
            //                     repError[i] = 1.0 - repError[i];
            //                     }
            break;
        case EVAL_ACCURACY:
            repError[i] = m_Evaluation.errorRate();
            //                     if (m_trainInstances.classAttribute().isNominal()) {
            //                     repError[i] = 1.0 - repError[i];
            //                     }
            break;
        case EVAL_RMSE:
            repError[i] = m_Evaluation.rootMeanSquaredError();
            break;
        case EVAL_MAE:
            repError[i] = m_Evaluation.meanAbsoluteError();
            break;
        case EVAL_FMEASURE:
            if (m_IRClassVal < 0) {
                repError[i] = m_Evaluation.weightedFMeasure();
            } else {
                repError[i] = m_Evaluation.fMeasure(m_IRClassVal);
            }
            break;
        case EVAL_AUC:
            if (m_IRClassVal < 0) {
                repError[i] = m_Evaluation.weightedAreaUnderROC();
            } else {
                repError[i] = m_Evaluation.areaUnderROC(m_IRClassVal);
            }
            break;
        case EVAL_AUPRC:
            if (m_IRClassVal < 0) {
                repError[i] = m_Evaluation.weightedAreaUnderPRC();
            } else {
                repError[i] = m_Evaluation.areaUnderPRC(m_IRClassVal);
            }
            break;
        case EVAL_NEW:
            repError[i] = (1.0 - m_Evaluation.errorRate()) + m_IRfactor * m_Evaluation.weightedFMeasure();
            break;
        }

        // check on the standard deviation
        if (!repeat(repError, i + 1)) {
            i++;
            break;
        }
    }

    for (j = 0; j < i; j++) {
        evalMetric += repError[j];
    }

    evalMetric /= i;
    m_Evaluation = null;

    switch (m_evaluationMeasure) {
    case EVAL_DEFAULT:
    case EVAL_ACCURACY:
    case EVAL_RMSE:
    case EVAL_MAE:
        if (m_trainInstances.classAttribute().isNominal()
                && (m_evaluationMeasure == EVAL_DEFAULT || m_evaluationMeasure == EVAL_ACCURACY)) {
            evalMetric = 1 - evalMetric;
        } else {
            evalMetric = -evalMetric; // maximize
        }

        break;
    }

    return evalMetric;
}

From source file:SMO.java

License:Open Source License

/**
 * Method for building the classifier. Implements a one-against-one
 * wrapper for multi-class problems.//from  w w w.  j ava2  s  .  c  o  m
 *
 * @param insts the set of training instances
 * @throws Exception if the classifier can't be built successfully
 */
public void buildClassifier(Instances insts) throws Exception {

    if (!m_checksTurnedOff) {
        // can classifier handle the data?
        getCapabilities().testWithFail(insts);

        // remove instances with missing class
        insts = new Instances(insts);
        insts.deleteWithMissingClass();

        /* Removes all the instances with weight equal to 0.
         MUST be done since condition (8) of Keerthi's paper 
         is made with the assertion Ci > 0 (See equation (3a). */
        Instances data = new Instances(insts, insts.numInstances());
        for (int i = 0; i < insts.numInstances(); i++) {
            if (insts.instance(i).weight() > 0)
                data.add(insts.instance(i));
        }
        if (data.numInstances() == 0) {
            throw new Exception("No training instances left after removing " + "instances with weight 0!");
        }
        insts = data;
    }

    if (!m_checksTurnedOff) {
        m_Missing = new ReplaceMissingValues();
        m_Missing.setInputFormat(insts);
        insts = Filter.useFilter(insts, m_Missing);
    } else {
        m_Missing = null;
    }

    if (getCapabilities().handles(Capability.NUMERIC_ATTRIBUTES)) {
        boolean onlyNumeric = true;
        if (!m_checksTurnedOff) {
            for (int i = 0; i < insts.numAttributes(); i++) {
                if (i != insts.classIndex()) {
                    if (!insts.attribute(i).isNumeric()) {
                        onlyNumeric = false;
                        break;
                    }
                }
            }
        }

        if (!onlyNumeric) {
            m_NominalToBinary = new NominalToBinary();
            m_NominalToBinary.setInputFormat(insts);
            insts = Filter.useFilter(insts, m_NominalToBinary);
        } else {
            m_NominalToBinary = null;
        }
    } else {
        m_NominalToBinary = null;
    }

    if (m_filterType == FILTER_STANDARDIZE) {
        m_Filter = new Standardize();
        m_Filter.setInputFormat(insts);
        insts = Filter.useFilter(insts, m_Filter);
    } else if (m_filterType == FILTER_NORMALIZE) {
        m_Filter = new Normalize();
        m_Filter.setInputFormat(insts);
        insts = Filter.useFilter(insts, m_Filter);
    } else {
        m_Filter = null;
    }

    m_classIndex = insts.classIndex();
    m_classAttribute = insts.classAttribute();
    m_KernelIsLinear = (m_kernel instanceof PolyKernel) && (((PolyKernel) m_kernel).getExponent() == 1.0);

    // Generate subsets representing each class
    Instances[] subsets = new Instances[insts.numClasses()];
    for (int i = 0; i < insts.numClasses(); i++) {
        subsets[i] = new Instances(insts, insts.numInstances());
    }
    for (int j = 0; j < insts.numInstances(); j++) {
        Instance inst = insts.instance(j);
        subsets[(int) inst.classValue()].add(inst);
    }
    for (int i = 0; i < insts.numClasses(); i++) {
        subsets[i].compactify();
    }

    // Build the binary classifiers
    Random rand = new Random(m_randomSeed);
    m_classifiers = new BinarySMO[insts.numClasses()][insts.numClasses()];
    for (int i = 0; i < insts.numClasses(); i++) {
        for (int j = i + 1; j < insts.numClasses(); j++) {
            m_classifiers[i][j] = new BinarySMO();
            m_classifiers[i][j].setKernel(Kernel.makeCopy(getKernel()));
            Instances data = new Instances(insts, insts.numInstances());
            for (int k = 0; k < subsets[i].numInstances(); k++) {
                data.add(subsets[i].instance(k));
            }
            for (int k = 0; k < subsets[j].numInstances(); k++) {
                data.add(subsets[j].instance(k));
            }
            data.compactify();
            data.randomize(rand);
            m_classifiers[i][j].buildClassifier(data, i, j, m_fitLogisticModels, m_numFolds, m_randomSeed);
        }
    }
}

From source file:ID3Chi.java

License:Open Source License

/**
 * Builds ID3Chi decision tree classifier.
 *
 * @param data//from w  w w.  j a  v a  2  s.  c o  m
 *            the training data
 * @exception Exception
 *                if classifier can't be built successfully
 */
public void buildClassifier(Instances data) throws Exception {

    // can classifier handle the data?
    getCapabilities().testWithFail(data);

    // remove instances with missing class
    data = new Instances(data);
    data.deleteWithMissingClass();

    makeTree(data);
}

From source file:MultiLabelKNN.java

License:Open Source License

protected void buildInternal(MultiLabelInstances trainSet) throws Exception {
    if (trainSet.getNumInstances() < numOfNeighbors) {
        throw new IllegalArgumentException(
                "The number of training instances is less than the number of requested nearest neighbours");
    }//  w  w w .ja v  a 2s  . com
    train = new Instances(trainSet.getDataSet());

    // label attributes don't influence distance estimation
    String labelIndicesString = "";
    for (int i = 0; i < numLabels - 1; i++) {
        labelIndicesString += (labelIndices[i] + 1) + ",";
    }
    labelIndicesString += (labelIndices[numLabels - 1] + 1);
    dfunc.setAttributeIndices(labelIndicesString);
    dfunc.setInvertSelection(true);

    lnn = new LinearNNSearch();
    lnn.setDistanceFunction(dfunc);
    lnn.setInstances(train);
    lnn.setMeasurePerformance(false);
}