Example usage for weka.core Instances classIndex

List of usage examples for weka.core Instances classIndex

Introduction

In this page you can find the example usage for weka.core Instances classIndex.

Prototype


publicint classIndex() 

Source Link

Document

Returns the class attribute's index.

Usage

From source file:cezeri.evaluater.FactoryEvaluation.java

public static Evaluation performCrossValidate(Classifier model, Instances datax, int folds, boolean show_text,
        boolean show_plot, TFigureAttribute attr) {
    Random rand = new Random(1);
    Instances randData = new Instances(datax);
    randData.randomize(rand);/*www. j  a  v a  2s  . c o m*/
    if (randData.classAttribute().isNominal()) {
        randData.stratify(folds);
    }
    Evaluation eval = null;
    try {
        // perform cross-validation
        eval = new Evaluation(randData);
        //            double[] simulated = new double[0];
        //            double[] observed = new double[0];
        //            double[] sim = new double[0];
        //            double[] obs = new double[0];
        for (int n = 0; n < folds; n++) {
            Instances train = randData.trainCV(folds, n, rand);
            Instances validation = randData.testCV(folds, n);
            // build and evaluate classifier
            Classifier clsCopy = Classifier.makeCopy(model);
            clsCopy.buildClassifier(train);

            //                sim = eval.evaluateModel(clsCopy, validation);
            //                obs = validation.attributeToDoubleArray(validation.classIndex());
            //                if (show_plot) {
            //                    double[][] d = new double[2][sim.length];
            //                    d[0] = obs;
            //                    d[1] = sim;
            //                    CMatrix f1 = CMatrix.getInstance(d);
            //                    f1.transpose().plot(attr);
            //                }
            //                if (show_text) {
            //                    // output evaluation
            //                    System.out.println();
            //                    System.out.println("=== Setup for each Cross Validation fold===");
            //                    System.out.println("Classifier: " + model.getClass().getName() + " " + Utils.joinOptions(model.getOptions()));
            //                    System.out.println("Dataset: " + randData.relationName());
            //                    System.out.println("Folds: " + folds);
            //                    System.out.println("Seed: " + 1);
            //                    System.out.println();
            //                    System.out.println(eval.toSummaryString("=== " + folds + "-fold Cross-validation ===", false));
            //                }
            simulated = FactoryUtils.concatenate(simulated, eval.evaluateModel(clsCopy, validation));
            observed = FactoryUtils.concatenate(observed,
                    validation.attributeToDoubleArray(validation.classIndex()));
            //                simulated = FactoryUtils.mean(simulated,eval.evaluateModel(clsCopy, validation));
            //                observed = FactoryUtils.mean(observed,validation.attributeToDoubleArray(validation.classIndex()));
        }

        if (show_plot) {
            double[][] d = new double[2][simulated.length];
            d[0] = observed;
            d[1] = simulated;
            CMatrix f1 = CMatrix.getInstance(d);
            attr.figureCaption = "overall performance";
            f1.transpose().plot(attr);
        }
        if (show_text) {
            // output evaluation
            System.out.println();
            System.out.println("=== Setup for Overall Cross Validation===");
            System.out.println(
                    "Classifier: " + model.getClass().getName() + " " + Utils.joinOptions(model.getOptions()));
            System.out.println("Dataset: " + randData.relationName());
            System.out.println("Folds: " + folds);
            System.out.println("Seed: " + 1);
            System.out.println();
            System.out.println(eval.toSummaryString("=== " + folds + "-fold Cross-validation ===", false));
        }
    } catch (Exception ex) {
        Logger.getLogger(FactoryEvaluation.class.getName()).log(Level.SEVERE, null, ex);
    }
    return eval;
}

From source file:cezeri.evaluater.FactoryEvaluation.java

public static Evaluation performCrossValidateTestAlso(Classifier model, Instances datax, Instances test,
        boolean show_text, boolean show_plot) {
    TFigureAttribute attr = new TFigureAttribute();
    Random rand = new Random(1);
    Instances randData = new Instances(datax);
    randData.randomize(rand);//from  w w w .  j av  a  2s. c om

    Evaluation eval = null;
    int folds = randData.numInstances();
    try {
        eval = new Evaluation(randData);
        for (int n = 0; n < folds; n++) {
            //                randData.randomize(rand);
            //                Instances train = randData;                
            Instances train = randData.trainCV(folds, n);
            //                Instances train = randData.trainCV(folds, n, rand);
            Classifier clsCopy = Classifier.makeCopy(model);
            clsCopy.buildClassifier(train);
            Instances validation = randData.testCV(folds, n);
            //                Instances validation = test.testCV(test.numInstances(), n%test.numInstances());
            //                CMatrix.fromInstances(train).showDataGrid();
            //                CMatrix.fromInstances(validation).showDataGrid();

            simulated = FactoryUtils.concatenate(simulated, eval.evaluateModel(clsCopy, validation));
            observed = FactoryUtils.concatenate(observed,
                    validation.attributeToDoubleArray(validation.classIndex()));
        }

        if (show_plot) {
            double[][] d = new double[2][simulated.length];
            d[0] = observed;
            d[1] = simulated;
            CMatrix f1 = CMatrix.getInstance(d);
            attr.figureCaption = "overall performance";
            f1.transpose().plot(attr);
        }
        if (show_text) {
            // output evaluation
            System.out.println();
            System.out.println("=== Setup for Overall Cross Validation===");
            System.out.println(
                    "Classifier: " + model.getClass().getName() + " " + Utils.joinOptions(model.getOptions()));
            System.out.println("Dataset: " + randData.relationName());
            System.out.println("Folds: " + folds);
            System.out.println("Seed: " + 1);
            System.out.println();
            System.out.println(eval.toSummaryString("=== " + folds + "-fold Cross-validation ===", false));
        }
    } catch (Exception ex) {
        Logger.getLogger(FactoryEvaluation.class.getName()).log(Level.SEVERE, null, ex);
    }
    return eval;
}

From source file:cezeri.evaluater.FactoryEvaluation.java

private static Evaluation doTest(boolean isTrained, Classifier model, Instances train, Instances test,
        boolean show_text, boolean show_plot, TFigureAttribute attr) {
    Instances data = new Instances(train);
    Random rand = new Random(1);
    data.randomize(rand);/*from   ww w .  jav  a2s  . c om*/
    Evaluation eval = null;
    try {
        //            double[] simulated = null;
        eval = new Evaluation(train);
        if (isTrained) {
            simulated = eval.evaluateModel(model, test);
        } else {
            Classifier clsCopy = Classifier.makeCopy(model);
            clsCopy.buildClassifier(train);
            simulated = eval.evaluateModel(clsCopy, test);
        }
        if (show_plot) {
            observed = test.attributeToDoubleArray(test.classIndex());
            double[][] d = new double[2][simulated.length];
            d[0] = observed;
            d[1] = simulated;
            CMatrix f1 = CMatrix.getInstance(d);
            String[] items = { "Observed", "Simulated" };
            attr.items = items;
            attr.figureCaption = model.getClass().getCanonicalName();
            f1.transpose().plot(attr);
            //                if (attr.axis[0].isEmpty() && attr.axis[1].isEmpty()) {
            //                    f1.transpose().plot(attr);
            //                } else {
            //                    f1.transpose().plot(model.getClass().getCanonicalName(), attr.items, attr.axis);
            //                }
        }
        if (show_text) {
            System.out.println();
            System.out.println("=== Setup for Test ===");
            System.out.println(
                    "Classifier: " + model.getClass().getName() + " " + Utils.joinOptions(model.getOptions()));
            System.out.println("Dataset: " + test.relationName());
            System.out.println();
            System.out.println(eval.toSummaryString("=== Test Results ===", false));
        }
    } catch (Exception ex) {
        Logger.getLogger(FactoryEvaluation.class.getName()).log(Level.SEVERE, null, ex);
    }
    return eval;
}

From source file:cezeri.feature.selection.FeatureSelectionRanker.java

private static TFeatureRank[] correlation(Instances data, int type) {
    TFeatureRank[] ret = new TFeatureRank[data.numAttributes() - 1];
    String[] attributeNames = FactoryInstance.getAttributeList(data);
    double[] out = data.attributeToDoubleArray(data.classIndex());
    for (int i = 0; i < data.numAttributes() - 1; i++) {
        TFeatureRank obj = new TFeatureRank();
        obj.featureName = attributeNames[i];
        obj.index = i + "";
        if (type == TCorelation.ARE) {
            obj.value = Math.abs(FactoryStatistic.ARE(data.attributeToDoubleArray(i), out));
        }//from w ww  . ja va  2 s.  c  om
        if (type == TCorelation.CRCF) {
            obj.value = Math.abs(FactoryStatistic.CRCF(data.attributeToDoubleArray(i), out));
        }
        if (type == TCorelation.IOA) {
            obj.value = Math.abs(FactoryStatistic.IOA(data.attributeToDoubleArray(i), out));
        }
        if (type == TCorelation.KENDALL) {
            obj.value = Math.abs(FactoryStatistic.KENDALL(data.attributeToDoubleArray(i), out));
        }
        if (type == TCorelation.MAE) {
            obj.value = Math.abs(FactoryStatistic.MAE(data.attributeToDoubleArray(i), out));
        }
        if (type == TCorelation.MPE) {
            obj.value = Math.abs(FactoryStatistic.MPE(data.attributeToDoubleArray(i), out));
        }
        if (type == TCorelation.MSE) {
            obj.value = Math.abs(FactoryStatistic.MSE(data.attributeToDoubleArray(i), out));
        }
        if (type == TCorelation.NSEC) {
            obj.value = Math.abs(FactoryStatistic.NSEC(data.attributeToDoubleArray(i), out));
        }
        if (type == TCorelation.PEARSON) {
            obj.value = Math.abs(FactoryStatistic.PEARSON(data.attributeToDoubleArray(i), out));
        }
        if (type == TCorelation.R) {
            obj.value = Math.abs(FactoryStatistic.R(data.attributeToDoubleArray(i), out));
        }
        if (type == TCorelation.R2) {
            obj.value = Math.abs(FactoryStatistic.R2(data.attributeToDoubleArray(i), out));
        }
        if (type == TCorelation.RAE) {
            obj.value = Math.abs(FactoryStatistic.RAE(data.attributeToDoubleArray(i), out));
        }
        if (type == TCorelation.RELATIVE_NSEC) {
            obj.value = Math.abs(FactoryStatistic.RELATIVE_NSEC(data.attributeToDoubleArray(i), out));
        }
        if (type == TCorelation.RMSE) {
            obj.value = Math.abs(FactoryStatistic.RMSE(data.attributeToDoubleArray(i), out));
        }
        if (type == TCorelation.RRSE) {
            obj.value = Math.abs(FactoryStatistic.RRSE(data.attributeToDoubleArray(i), out));
        }
        if (type == TCorelation.SPEARMAN) {
            obj.value = Math.abs(FactoryStatistic.SPEARMAN(data.attributeToDoubleArray(i), out));
        }
        //            if (type==FactoryCorrelation.KENDALL) {
        //                obj.value=Math.abs(FactoryCorrelation.rankKendallTauBeta(data.attributeToDoubleArray(i), out));
        //            }
        //            if (type==FactoryCorrelation.PEARSON) {
        //                obj.value=Math.abs(FactoryCorrelation.pearson(data.attributeToDoubleArray(i), out));
        //            }
        //            if (type==FactoryCorrelation.SPEARMAN) {
        //                obj.value=Math.abs(FactoryCorrelation.spearman(data.attributeToDoubleArray(i), out));
        //            }            
        ret[i] = obj;
    }
    ArrayList<TFeatureRank> lst = toArrayList(ret);
    Collections.sort(lst, new CustomComparatorForFeatureRank());
    ret = toArray(lst);
    return ret;
}

From source file:cezeri.utils.FactoryInstance.java

public static String[] getOriginalClasses(Instances data) {
    Attribute att = data.attribute(data.classIndex());
    String[] ret = new String[data.numClasses()];
    Enumeration enu = att.enumerateValues();
    int q = 0;/*from  ww  w  . jav  a2  s .  co m*/
    while (enu.hasMoreElements()) {
        ret[q++] = (String) enu.nextElement();
    }
    return ret;
}

From source file:cezeri.utils.FactoryInstance.java

public static Instances getSubsetData(Instances data, String[] attList) {
    Instances temp = new Instances(data);
    for (int i = 0; i < data.numAttributes(); i++) {
        if (!temp.attribute(0).equals(temp.classAttribute())) {
            temp.deleteAttributeAt(0);// www .  j av a2  s . com
        }
    }
    double[][] m = new double[attList.length + 1][data.numInstances()];
    for (int i = 0; i < attList.length; i++) {
        int n = attList.length - 1 - i;
        String str = attList[n];
        Attribute t = data.attribute(str);
        double[] d = data.attributeToDoubleArray(t.index());
        m[n] = d;
        temp.insertAttributeAt(t, 0);
    }
    m[attList.length] = data.attributeToDoubleArray(data.classIndex());
    m = CMatrix.getInstance(m).transpose().get2DArrayDouble();

    FastVector att = new FastVector();
    for (int i = 0; i < temp.numAttributes(); i++) {
        att.addElement(temp.attribute(i));
    }
    Instances ret = new Instances(temp.relationName(), att, m.length);
    for (int i = 0; i < m.length; i++) {
        Instance ins = new Instance(m[0].length);
        for (int j = 0; j < m[0].length; j++) {
            ins.setValue(j, m[i][j]);
        }
        ret.add(ins);
    }
    ret.setClassIndex(temp.classIndex());

    return ret;
}

From source file:CGLSMethod.LinearRegression.java

License:Open Source License

/**
 * Builds a regression model for the given data.
 *
 * @param data the training data to be used for generating the
 * linear regression function/*w  ww  .  j av  a2  s  . c  o  m*/
 * @throws Exception if the classifier could not be built successfully
 */
public void buildClassifier(Instances data) throws Exception {

    // Preprocess instances
    if (!m_checksTurnedOff) {
        m_TransformFilter = new NominalToBinary();
        m_TransformFilter.setInputFormat(data);
        data = Filter.useFilter(data, m_TransformFilter);
        m_MissingFilter = new ReplaceMissingValues();
        m_MissingFilter.setInputFormat(data);
        data = Filter.useFilter(data, m_MissingFilter);
        data.deleteWithMissingClass();
    } else {
        m_TransformFilter = null;
        m_MissingFilter = null;
    }

    m_ClassIndex = data.classIndex();
    m_TransformedData = data;

    // Turn all attributes on for a start
    m_SelectedAttributes = new boolean[data.numAttributes()];
    for (int i = 0; i < data.numAttributes(); i++) {
        if (i != m_ClassIndex) {
            m_SelectedAttributes[i] = true;
        }
    }
    m_Coefficients = null;

    // Compute means and standard deviations
    m_Means = new double[data.numAttributes()];
    m_StdDevs = new double[data.numAttributes()];
    for (int j = 0; j < data.numAttributes(); j++) {
        if (j != data.classIndex()) {
            m_Means[j] = data.meanOrMode(j);
            m_StdDevs[j] = Math.sqrt(data.variance(j));
            if (m_StdDevs[j] == 0) {
                m_SelectedAttributes[j] = false;
            }
        }
    }

    m_ClassStdDev = Math.sqrt(data.variance(m_TransformedData.classIndex()));
    m_ClassMean = data.meanOrMode(m_TransformedData.classIndex());

    // Perform the regression
    findBestModel();

    // Save memory
    m_TransformedData = new Instances(data, 0);
}

From source file:ChiSquare.ChiSquaredAttributeEval.java

License:Open Source License

/**
 * Initializes a chi-squared attribute evaluator.
 * Discretizes all attributes that are numeric.
 *
 * @param data set of instances serving as training data 
 * @throws Exception if the evaluator has not been 
 * generated successfully//from   www  .  jav a  2s  .c  o  m
 */
public void buildEvaluator(Instances data) throws Exception {

    // can evaluator handle data?
    getCapabilities().testWithFail(data);

    int classIndex = data.classIndex();
    int numInstances = data.numInstances();

    if (!m_Binarize) {
        Discretize disTransform = new Discretize();
        disTransform.setUseBetterEncoding(true);
        disTransform.setInputFormat(data);
        data = Filter.useFilter(data, disTransform);
    } else {
        NumericToBinary binTransform = new NumericToBinary();
        binTransform.setInputFormat(data);
        data = Filter.useFilter(data, binTransform);
    }
    int numClasses = data.attribute(classIndex).numValues();

    // Reserve space and initialize counters
    double[][][] counts = new double[data.numAttributes()][][];
    for (int k = 0; k < data.numAttributes(); k++) {
        if (k != classIndex) {
            int numValues = data.attribute(k).numValues();
            counts[k] = new double[numValues + 1][numClasses + 1];
        }
    }

    // Initialize counters
    double[] temp = new double[numClasses + 1];
    for (int k = 0; k < numInstances; k++) {
        Instance inst = data.instance(k);
        if (inst.classIsMissing()) {
            temp[numClasses] += inst.weight();
        } else {
            temp[(int) inst.classValue()] += inst.weight();
        }
    }
    for (int k = 0; k < counts.length; k++) {
        if (k != classIndex) {
            for (int i = 0; i < temp.length; i++) {
                counts[k][0][i] = temp[i];
            }
        }
    }

    // Get counts
    for (int k = 0; k < numInstances; k++) {
        Instance inst = data.instance(k);
        for (int i = 0; i < inst.numValues(); i++) {
            if (inst.index(i) != classIndex) {
                if (inst.isMissingSparse(i) || inst.classIsMissing()) {
                    if (!inst.isMissingSparse(i)) {
                        counts[inst.index(i)][(int) inst.valueSparse(i)][numClasses] += inst.weight();
                        counts[inst.index(i)][0][numClasses] -= inst.weight();
                    } else if (!inst.classIsMissing()) {
                        counts[inst.index(i)][data.attribute(inst.index(i)).numValues()][(int) inst
                                .classValue()] += inst.weight();
                        counts[inst.index(i)][0][(int) inst.classValue()] -= inst.weight();
                    } else {
                        counts[inst.index(i)][data.attribute(inst.index(i)).numValues()][numClasses] += inst
                                .weight();
                        counts[inst.index(i)][0][numClasses] -= inst.weight();
                    }
                } else {
                    counts[inst.index(i)][(int) inst.valueSparse(i)][(int) inst.classValue()] += inst.weight();
                    counts[inst.index(i)][0][(int) inst.classValue()] -= inst.weight();
                }
            }
        }
    }

    // distribute missing counts if required
    if (m_missing_merge) {

        for (int k = 0; k < data.numAttributes(); k++) {
            if (k != classIndex) {
                int numValues = data.attribute(k).numValues();

                // Compute marginals
                double[] rowSums = new double[numValues];
                double[] columnSums = new double[numClasses];
                double sum = 0;
                for (int i = 0; i < numValues; i++) {
                    for (int j = 0; j < numClasses; j++) {
                        rowSums[i] += counts[k][i][j];
                        columnSums[j] += counts[k][i][j];
                    }
                    sum += rowSums[i];
                }

                if (Utils.gr(sum, 0)) {
                    double[][] additions = new double[numValues][numClasses];

                    // Compute what needs to be added to each row
                    for (int i = 0; i < numValues; i++) {
                        for (int j = 0; j < numClasses; j++) {
                            additions[i][j] = (rowSums[i] / sum) * counts[k][numValues][j];
                        }
                    }

                    // Compute what needs to be added to each column
                    for (int i = 0; i < numClasses; i++) {
                        for (int j = 0; j < numValues; j++) {
                            additions[j][i] += (columnSums[i] / sum) * counts[k][j][numClasses];
                        }
                    }

                    // Compute what needs to be added to each cell
                    for (int i = 0; i < numClasses; i++) {
                        for (int j = 0; j < numValues; j++) {
                            additions[j][i] += (counts[k][j][i] / sum) * counts[k][numValues][numClasses];
                        }
                    }

                    // Make new contingency table
                    double[][] newTable = new double[numValues][numClasses];
                    for (int i = 0; i < numValues; i++) {
                        for (int j = 0; j < numClasses; j++) {
                            newTable[i][j] = counts[k][i][j] + additions[i][j];
                        }
                    }
                    counts[k] = newTable;
                }
            }
        }
    }

    // Compute chi-squared values
    m_ChiSquareds = new double[data.numAttributes()];
    for (int i = 0; i < data.numAttributes(); i++) {
        if (i != classIndex) {
            m_ChiSquareds[i] = ContingencyTables.chiVal(ContingencyTables.reduceMatrix(counts[i]), false);
        }
    }
}

From source file:classifier.SellerClassifier.java

private Instances loadData(String dataset) throws Exception {
    DataSource data = new DataSource(dataset);
    Instances instances = data.getDataSet();
    if (instances.classIndex() == -1) {
        instances.setClassIndex(instances.numAttributes() - 1);
    }//from  w  w  w . j a  va  2 s .  com

    return instances;
}

From source file:classifier.SentenceBasedTextDirectoryLoader.java

License:Open Source License

/**
 * Return the full data set. If the structure hasn't yet been determined by
 * a call to getStructure then method should do so before processing the
 * rest of the data set./*  ww w  .  java2 s .co  m*/
 * 
 * @return the structure of the data set as an empty set of Instances
 * @throws IOException
 *             if there is no source or parsing fails
 */
@Override
public Instances getDataSet() throws IOException {
    if (getDirectory() == null)
        throw new IOException("No directory/source has been specified");

    String directoryPath = getDirectory().getAbsolutePath();
    ArrayList<String> classes = new ArrayList<String>();
    ArrayList<String> filenames = new ArrayList<String>();
    Enumeration enm = getStructure().classAttribute().enumerateValues();
    while (enm.hasMoreElements())
        classes.add((String) enm.nextElement());

    Instances data = getStructure();
    int fileCount = 0;
    // each class is actually the filename - this is preserved around weka,
    // so its useful for tracking associations later and using as an "index"
    //

    for (int k = 0; k < classes.size(); k++) {
        String subdirPath = (String) classes.get(k);
        File subdir = new File(directoryPath + File.separator + subdirPath);
        String[] files = subdir.list();
        for (int j = 0; j < files.length; j++) {

            try {
                fileCount++;
                if (getDebug())
                    System.err.println("processing " + fileCount + " : " + files[j]);

                File txt = new File(directoryPath + File.separator + subdirPath + File.separator + files[j]);
                filenames.add(files[j]);
                BufferedInputStream is;
                is = new BufferedInputStream(new FileInputStream(txt));
                StringBuffer txtStr = new StringBuffer();
                int c;
                while ((c = is.read()) != -1) {
                    txtStr.append((char) c);
                }

                // Here is my extension to Text Directory Loader.
                String regexSentenceSplit = "(\\n)";
                String rawtext = txtStr.toString();
                rawtext = rawtext.toLowerCase();
                rawtext.trim();

                // split the sentences
                String[] sentences = rawtext.split(regexSentenceSplit);
                for (String sentence : sentences) {
                    double[] newInst = null;
                    if (m_OutputFilename)
                        newInst = new double[3];
                    else
                        newInst = new double[2];

                    newInst[0] = (double) data.attribute(0).addStringValue(sentence + "\n");
                    if (m_OutputFilename)
                        newInst[1] = (double) data.attribute(1)
                                .addStringValue(subdirPath + File.separator + files[j]);
                    newInst[data.classIndex()] = (double) k;
                    data.add(new DenseInstance(1.0, newInst));
                    // }
                }

                writeFilenames(directoryPath, filenames);

            } catch (Exception e) {
                System.err.println("failed to convert file: " + directoryPath + File.separator + files[j]);
            }
        }
    }

    // this.m_structure.setClassIndex(-1);
    return data;
}