Example usage for weka.core Instances Instances

Introduction

In this page you can find the example usage for weka.core Instances Instances.

Prototype

public Instances(Instances dataset, int capacity)

Source Link

Document

Constructor creating an empty set of instances.

Usage

From source file:adams.ml.model.clustering.WekaClusteringModel.java

License:Open Source License

/**
 * Initializes the model./* w w w  .j a  v a2s. com*/
 *
 * @param model   the built Weka clusterer
 * @param data   the training data
 * @param inst   the Weka training data
 */
public WekaClusteringModel(weka.clusterers.Clusterer model, Dataset data, Instances inst) {
    m_Model = model;
    m_DatasetInfo = new DatasetInfo(data);
    m_InstancesHeader = new Instances(inst, 0);
}

From source file:adams.ml.model.regression.WekaRegressionModel.java

License:Open Source License

/**
 * Initializes the model.// w ww  .jav a 2s  .com
 *
 * @param model   the built Weka classifier
 * @param data   the training data
 * @param inst   the Weka training data
 */
public WekaRegressionModel(Classifier model, Dataset data, Instances inst) {
    m_Model = model;
    m_DatasetInfo = new DatasetInfo(data);
    m_InstancesHeader = new Instances(inst, 0);
}

From source file:ann.ANN.java

public void percentageSplit(Classifier model, double percent, Instances data) {
    try {/*w  w w  .  j a  v a  2s. com*/
        int trainSize = (int) Math.round(data.numInstances() * percent / 100);
        int testSize = data.numInstances() - trainSize;
        Instances train = new Instances(data, trainSize);
        Instances test = new Instances(data, testSize);
        ;

        for (int i = 0; i < trainSize; i++) {
            train.add(data.instance(i));
        }
        for (int i = trainSize; i < data.numInstances(); i++) {
            test.add(data.instance(i));
        }

        Evaluation eval = new Evaluation(train);
        eval.evaluateModel(model, test);
        System.out.println("================================");
        System.out.println("========Percentage  Split=======");
        System.out.println("================================");
        System.out.println(eval.toSummaryString("\n=== Summary ===\n", false));
        System.out.println(eval.toClassDetailsString("=== Detailed Accuracy By Class ===\n"));
        System.out.println(eval.toMatrixString("=== Confusion Matrix ===\n"));
    } catch (Exception ex) {
        System.out.println("File tidak berhasil di-load");
    }
}

From source file:br.com.ufu.lsi.rebfnetwork.RBFModel.java

License:Open Source License

/**
 * Builds the RBF network regressor based on the given dataset.
 *///from   w  ww  .  j  a v  a  2 s .c om
public void buildClassifier(Instances data) throws Exception {

    // Set up the initial arrays
    m_data = initializeClassifier(data);

    if (m_ZeroR != null) {
        return;
    }

    // Initialise thread pool
    m_Pool = Executors.newFixedThreadPool(m_poolSize);

    // Apply optimization class to train the network
    Optimization opt = null;
    if (!m_useCGD) {
        opt = new OptEng();
    } else {
        opt = new OptEngCGD();
    }
    opt.setDebug(m_Debug);

    // No constraints
    double[][] b = new double[2][m_RBFParameters.length];
    for (int i = 0; i < 2; i++) {
        for (int j = 0; j < m_RBFParameters.length; j++) {
            b[i][j] = Double.NaN;
        }
    }

    m_RBFParameters = opt.findArgmin(m_RBFParameters, b);
    while (m_RBFParameters == null) {
        m_RBFParameters = opt.getVarbValues();
        if (m_Debug) {
            System.out.println("200 iterations finished, not enough!");
        }
        m_RBFParameters = opt.findArgmin(m_RBFParameters, b);
    }
    if (m_Debug) {
        System.out.println("SE (normalized space) after optimization: " + opt.getMinFunction());
    }

    m_data = new Instances(m_data, 0); // Save memory

    // Shut down thread pool
    m_Pool.shutdown();
}

From source file:cerebro.Id3.java

License:Open Source License

/**
 * Splits a dataset according to the values of a nominal attribute.
 *
 * @param data the data which is to be split
 * @param att the attribute to be used for splitting
 * @return the sets of instances produced by the split
 */// w  w  w . j av  a 2 s.  c  o  m
private Instances[] splitData(Instances data, Attribute att) {

    Instances[] splitData = new Instances[att.numValues()];
    for (int j = 0; j < att.numValues(); j++) {
        splitData[j] = new Instances(data, data.numInstances());
    }
    Enumeration instEnum = data.enumerateInstances();
    while (instEnum.hasMoreElements()) {
        Instance inst = (Instance) instEnum.nextElement();
        splitData[(int) inst.value(att)].add(inst);
    }
    for (int i = 0; i < splitData.length; i++) {
        splitData[i].compactify();
    }
    return splitData;
}

From source file:CGLSMethod.LinearRegression.java

License:Open Source License

/**
 * Builds a regression model for the given data.
 *
 * @param data the training data to be used for generating the
 * linear regression function//from   w  w w  .  ja v a  2 s  . c  om
 * @throws Exception if the classifier could not be built successfully
 */
public void buildClassifier(Instances data) throws Exception {

    // Preprocess instances
    if (!m_checksTurnedOff) {
        m_TransformFilter = new NominalToBinary();
        m_TransformFilter.setInputFormat(data);
        data = Filter.useFilter(data, m_TransformFilter);
        m_MissingFilter = new ReplaceMissingValues();
        m_MissingFilter.setInputFormat(data);
        data = Filter.useFilter(data, m_MissingFilter);
        data.deleteWithMissingClass();
    } else {
        m_TransformFilter = null;
        m_MissingFilter = null;
    }

    m_ClassIndex = data.classIndex();
    m_TransformedData = data;

    // Turn all attributes on for a start
    m_SelectedAttributes = new boolean[data.numAttributes()];
    for (int i = 0; i < data.numAttributes(); i++) {
        if (i != m_ClassIndex) {
            m_SelectedAttributes[i] = true;
        }
    }
    m_Coefficients = null;

    // Compute means and standard deviations
    m_Means = new double[data.numAttributes()];
    m_StdDevs = new double[data.numAttributes()];
    for (int j = 0; j < data.numAttributes(); j++) {
        if (j != data.classIndex()) {
            m_Means[j] = data.meanOrMode(j);
            m_StdDevs[j] = Math.sqrt(data.variance(j));
            if (m_StdDevs[j] == 0) {
                m_SelectedAttributes[j] = false;
            }
        }
    }

    m_ClassStdDev = Math.sqrt(data.variance(m_TransformedData.classIndex()));
    m_ClassMean = data.meanOrMode(m_TransformedData.classIndex());

    // Perform the regression
    findBestModel();

    // Save memory
    m_TransformedData = new Instances(data, 0);
}

From source file:CGLSMethod.LinearRegression.java

License:Open Source License

/**
 * Performs a greedy search for the best regression model using
 * Akaike's criterion.//  ww w . j  a v  a2s.  c o m
 *
 * @throws Exception if regression can't be done
 */
private void findBestModel() throws Exception {

    // For the weighted case we still use numInstances in
    // the calculation of the Akaike criterion. 
    int numInstances = m_TransformedData.numInstances();

    if (b_Debug) {
        System.out.println((new Instances(m_TransformedData, 0)).toString());
    }

    // Perform a regression for the full model, and remove colinear attributes
    do {
        m_Coefficients = doRegression(m_SelectedAttributes);
    } while (m_EliminateColinearAttributes && deselectColinearAttributes(m_SelectedAttributes, m_Coefficients));

    // Figure out current number of attributes + 1. (We treat this model
    // as the full model for the Akaike-based methods.)
    int numAttributes = 1;
    for (int i = 0; i < m_SelectedAttributes.length; i++) {
        if (m_SelectedAttributes[i]) {
            numAttributes++;
        }
    }

    double fullMSE = calculateSE(m_SelectedAttributes, m_Coefficients);
    double akaike = (numInstances - numAttributes) + 2 * numAttributes;
    if (b_Debug) {
        System.out.println("Initial Akaike value: " + akaike);
    }

    boolean improved;
    int currentNumAttributes = numAttributes;
    switch (m_AttributeSelection) {

    case SELECTION_GREEDY:

        // Greedy attribute removal
        do {
            boolean[] currentSelected = (boolean[]) m_SelectedAttributes.clone();
            improved = false;
            currentNumAttributes--;

            for (int i = 0; i < m_SelectedAttributes.length; i++) {
                if (currentSelected[i]) {

                    // Calculate the akaike rating without this attribute
                    currentSelected[i] = false;
                    double[] currentCoeffs = doRegression(currentSelected);
                    double currentMSE = calculateSE(currentSelected, currentCoeffs);
                    double currentAkaike = currentMSE / fullMSE * (numInstances - numAttributes)
                            + 2 * currentNumAttributes;
                    if (b_Debug) {
                        System.out.println("(akaike: " + currentAkaike);
                    }

                    // If it is better than the current best
                    if (currentAkaike < akaike) {
                        if (b_Debug) {
                            System.err.println(
                                    "Removing attribute " + (i + 1) + " improved Akaike: " + currentAkaike);
                        }
                        improved = true;
                        akaike = currentAkaike;
                        System.arraycopy(currentSelected, 0, m_SelectedAttributes, 0,
                                m_SelectedAttributes.length);
                        m_Coefficients = currentCoeffs;
                    }
                    currentSelected[i] = true;
                }
            }
        } while (improved);
        break;

    case SELECTION_M5:

        // Step through the attributes removing the one with the smallest 
        // standardised coefficient until no improvement in Akaike
        do {
            improved = false;
            currentNumAttributes--;

            // Find attribute with smallest SC
            double minSC = 0;
            int minAttr = -1, coeff = 0;
            for (int i = 0; i < m_SelectedAttributes.length; i++) {
                if (m_SelectedAttributes[i]) {
                    double SC = Math.abs(m_Coefficients[coeff] * m_StdDevs[i] / m_ClassStdDev);
                    if ((coeff == 0) || (SC < minSC)) {
                        minSC = SC;
                        minAttr = i;
                    }
                    coeff++;
                }
            }

            // See whether removing it improves the Akaike score
            if (minAttr >= 0) {
                m_SelectedAttributes[minAttr] = false;
                double[] currentCoeffs = doRegression(m_SelectedAttributes);
                double currentMSE = calculateSE(m_SelectedAttributes, currentCoeffs);
                double currentAkaike = currentMSE / fullMSE * (numInstances - numAttributes)
                        + 2 * currentNumAttributes;
                if (b_Debug) {
                    System.out.println("(akaike: " + currentAkaike);
                }

                // If it is better than the current best
                if (currentAkaike < akaike) {
                    if (b_Debug) {
                        System.err.println(
                                "Removing attribute " + (minAttr + 1) + " improved Akaike: " + currentAkaike);
                    }
                    improved = true;
                    akaike = currentAkaike;
                    m_Coefficients = currentCoeffs;
                } else {
                    m_SelectedAttributes[minAttr] = true;
                }
            }
        } while (improved);
        break;

    case SELECTION_NONE:
        break;
    }
}

From source file:Classifier.supervised.LinearRegression.java

License:Open Source License

/**
 * Performs a greedy search for the best regression model using
 * Akaike's criterion.//from w w  w . ja v a  2  s. c  o  m
 *
 * @throws Exception if regression can't be done
 */
protected void findBestModel() throws Exception {

    // For the weighted case we still use numInstances in
    // the calculation of the Akaike criterion. 
    int numInstances = m_TransformedData.numInstances();

    if (m_Debug) {
        System.out.println((new Instances(m_TransformedData, 0)).toString());
    }

    // Perform a regression for the full model, and remove colinear attributes
    do {
        m_Coefficients = doRegression(m_SelectedAttributes);
    } while (m_EliminateColinearAttributes && deselectColinearAttributes(m_SelectedAttributes, m_Coefficients));

    // Figure out current number of attributes + 1. (We treat this model
    // as the full model for the Akaike-based methods.)
    int numAttributes = 1;
    for (int i = 0; i < m_SelectedAttributes.length; i++) {
        if (m_SelectedAttributes[i]) {
            numAttributes++;
        }
    }

    double fullMSE = calculateSE(m_SelectedAttributes, m_Coefficients);
    double akaike = (numInstances - numAttributes) + 2 * numAttributes;
    if (m_Debug) {
        System.out.println("Initial Akaike value: " + akaike);
    }

    boolean improved;
    int currentNumAttributes = numAttributes;
    switch (m_AttributeSelection) {

    case SELECTION_GREEDY:

        // Greedy attribute removal
        do {
            boolean[] currentSelected = (boolean[]) m_SelectedAttributes.clone();
            improved = false;
            currentNumAttributes--;

            for (int i = 0; i < m_SelectedAttributes.length; i++) {
                if (currentSelected[i]) {

                    // Calculate the akaike rating without this attribute
                    currentSelected[i] = false;
                    double[] currentCoeffs = doRegression(currentSelected);
                    double currentMSE = calculateSE(currentSelected, currentCoeffs);
                    double currentAkaike = currentMSE / fullMSE * (numInstances - numAttributes)
                            + 2 * currentNumAttributes;
                    if (m_Debug) {
                        System.out.println("(akaike: " + currentAkaike);
                    }

                    // If it is better than the current best
                    if (currentAkaike < akaike) {
                        if (m_Debug) {
                            System.err.println(
                                    "Removing attribute " + (i + 1) + " improved Akaike: " + currentAkaike);
                        }
                        improved = true;
                        akaike = currentAkaike;
                        System.arraycopy(currentSelected, 0, m_SelectedAttributes, 0,
                                m_SelectedAttributes.length);
                        m_Coefficients = currentCoeffs;
                    }
                    currentSelected[i] = true;
                }
            }
        } while (improved);
        break;

    case SELECTION_M5:

        // Step through the attributes removing the one with the smallest 
        // standardised coefficient until no improvement in Akaike
        do {
            improved = false;
            currentNumAttributes--;

            // Find attribute with smallest SC
            double minSC = 0;
            int minAttr = -1, coeff = 0;
            for (int i = 0; i < m_SelectedAttributes.length; i++) {
                if (m_SelectedAttributes[i]) {
                    double SC = Math.abs(m_Coefficients[coeff] * m_StdDevs[i] / m_ClassStdDev);
                    if ((coeff == 0) || (SC < minSC)) {
                        minSC = SC;
                        minAttr = i;
                    }
                    coeff++;
                }
            }

            // See whether removing it improves the Akaike score
            if (minAttr >= 0) {
                m_SelectedAttributes[minAttr] = false;
                double[] currentCoeffs = doRegression(m_SelectedAttributes);
                double currentMSE = calculateSE(m_SelectedAttributes, currentCoeffs);
                double currentAkaike = currentMSE / fullMSE * (numInstances - numAttributes)
                        + 2 * currentNumAttributes;
                if (m_Debug) {
                    System.out.println("(akaike: " + currentAkaike);
                }

                // If it is better than the current best
                if (currentAkaike < akaike) {
                    if (m_Debug) {
                        System.err.println(
                                "Removing attribute " + (minAttr + 1) + " improved Akaike: " + currentAkaike);
                    }
                    improved = true;
                    akaike = currentAkaike;
                    m_Coefficients = currentCoeffs;
                } else {
                    m_SelectedAttributes[minAttr] = true;
                }
            }
        } while (improved);
        break;

    case SELECTION_NONE:
        break;
    }
}

From source file:classifiers.ComplexClassifier.java

@Override
public void bootstrapvalidierungsmenge(Instances inst) {
    if (inst.numAttributes() != 0) {
        int[] hilf = new int[inst.numInstances()];

        for (int i = 0; i < inst.numInstances(); i++) {
            int a = ((int) (Math.random() * inst.numInstances()));

            hilf[i] = a;//from w w  w  .j  a  v  a  2s .c  om
        }

        Modelsindexen = EliminiereDopelt(hilf);
        Modelmenge = new Instances(inst, Modelsindexen.length);
        for (int i = 0; i < Modelsindexen.length; i++) {

            Modelmenge.add(new Instance(inst.instance(Modelsindexen[i])));
        }

        validierungsindexen = new int[inst.numInstances() - Modelsindexen.length];
        validierungsmenge = new Instances(Modelmenge, validierungsindexen.length);

        for (int i = 0, j = 0; i < inst.numInstances() && j < validierungsindexen.length; i++, j++) {
            if (!(HasSet(Modelsindexen, i))) {
                validierungsindexen[j] = i;
                validierungsmenge.add(inst.instance(validierungsindexen[j]));

            }
        }

    }
}

From source file:classifiers.ComplexClassifier.java

@Override
public void Bootstrap(Instances inst) {
    if (Modelmenge.numAttributes() != 0) {
        int[] hilf = new int[Modelmenge.numInstances()];

        for (int i = 0; i < Modelmenge.numInstances(); i++) {
            int a = ((int) (Math.random() * Modelmenge.numInstances()));

            hilf[i] = a;/*from w w w .jav  a  2  s. c  om*/
        }

        trainingsetindexen = EliminiereDopelt(hilf);
        traindaten = new Instances(Modelmenge, trainingsetindexen.length);
        for (int i = 0; i < trainingsetindexen.length; i++) {

            traindaten.add(new Instance(inst.instance(trainingsetindexen[i])));
        }

        testsetindexen = new int[Modelsindexen.length - trainingsetindexen.length];
        testdaten = new Instances(traindaten, testsetindexen.length);

        for (int i = 0, j = 0; i < Modelmenge.numInstances() && j < testsetindexen.length; i++, j++) {
            if (!(HasSet(trainingsetindexen, i))) {
                testsetindexen[j] = i;
                testdaten.add(inst.instance(testsetindexen[j]));

            }
        }

    }

}