Example usage for weka.core Instances numAttributes

List of usage examples for weka.core Instances numAttributes

Introduction

In this page you can find the example usage for weka.core Instances numAttributes.

Prototype


publicint numAttributes() 

Source Link

Document

Returns the number of attributes.

Usage

From source file:c4.pkg5crossv.Classifier.java

public static void C45() throws FileNotFoundException, IOException, Exception {
    Instances data = DataLoad.loadData("./src/data/irysy.arff");

    //Ustawienie atrybutu decyzyjnego (ostatni atrybut)
    data.setClassIndex(data.numAttributes() - 1);

    //OPCJE:/*from   ww w  .j a  v  a  2  s .c  o  m*/
    //-U -> budowa drzewa bez przycinania (ostre liscie)
    //-C -> <wspolczynnik dokladnosci> - ustawienie wspolczynnika dokladnosci dla lisci (default 0.25)
    //-M -> ustawienie minimalnej liczby obiektow w lisciu dla ktorej lisc nie jest dzielony (default 2)

    //Ustalenie opcji
    String[] options = Utils.splitOptions("-U -M 10");

    J48 tree = new J48();
    tree.setOptions(options); //Ustawienie opcji
    tree.buildClassifier(data); // Tworzenie klasyfikatora (drzewa)

    System.out.println(tree.toString()); //Wypisanie drzewa w formie tekstowej

    System.out.println("TRAIN&TEST");
    trainAndTest();
}

From source file:c4.pkg5crossv.Classifier.java

public static void trainAndTest() throws FileNotFoundException, IOException, Exception {

    Instances data = DataLoad.loadData("./src/data/irysy.arff");
    data.setClassIndex(data.numAttributes() - 1);

    //Losowy podzial tablicy
    data.randomize(new Random());
    double percent = 60.0;
    int trainSize = (int) Math.round(data.numInstances() * percent / 100);
    int testSize = data.numInstances() - trainSize;
    Instances trainData = new Instances(data, 0, trainSize);
    Instances testData = new Instances(data, trainSize, testSize);

    String[] options = Utils.splitOptions("-U -M 10");
    J48 tree = new J48();
    tree.setOptions(options);// w  w  w.ja v  a 2  s  . c  om
    tree.buildClassifier(trainData);

    Evaluation eval2 = new Evaluation(trainData);
    eval2.crossValidateModel(tree, testData, 10, new Random(1)); // 5 - fold
    System.out.println(eval2.toSummaryString("Wyniki:", false)); //Wypisanie testovania cross validation
}

From source file:c4.pkg5crossv.Preview.java

/**
 * method to write arff data into s.o.p.
 * @throws IOException //from  w ww.  j  av a  2 s .co  m
 */

public static void showData() throws IOException {

    String source = MainWindow.browsedFileLabel.getText();
    Instances data = DataLoad.loadData(source.replace("\\", "/"));
    data.setClassIndex(data.numAttributes() - 1);
    String field = "";
    for (int i = 0; i < data.numAttributes(); i++) {
        // Print the current attribute.
        System.out.print(data.attribute(i).name() + ": ");
        previewTextArea.append("\n" + data.attribute(i).name() + ": ");
        // Print the values associated with the current attribute.
        double[] values = data.attributeToDoubleArray(i);

        System.out.println(Arrays.toString(values));
        previewTextArea.append(Arrays.toString(values));
    }

}

From source file:ca.uqac.florentinth.speakerauthentication.Learning.Learning.java

License:Apache License

public void trainClassifier(Classifier classifier, FileReader trainingDataset, FileOutputStream trainingModel,
        Integer crossValidationFoldNumber) throws Exception {
    Instances instances = new Instances(new BufferedReader(trainingDataset));

    switch (classifier) {
    case KNN://from  w  w  w.ja va  2 s. c o  m
        int K = (int) Math.ceil(Math.sqrt(instances.numInstances()));
        this.classifier = new IBk(K);
        break;
    case NB:
        this.classifier = new NaiveBayes();
    }

    if (instances.classIndex() == -1) {
        instances.setClassIndex(instances.numAttributes() - 1);
    }

    this.classifier.buildClassifier(instances);

    if (crossValidationFoldNumber > 0) {
        Evaluation evaluation = new Evaluation(instances);
        evaluation.crossValidateModel(this.classifier, instances, crossValidationFoldNumber, new Random(1));
        kappa = evaluation.kappa();
        fMeasure = evaluation.weightedFMeasure();
        confusionMatrix = evaluation.toMatrixString("Confusion matrix: ");
    }

    ObjectOutputStream outputStream = new ObjectOutputStream(trainingModel);
    outputStream.writeObject(this.classifier);
    outputStream.flush();
    outputStream.close();
}

From source file:ca.uqac.florentinth.speakerauthentication.Learning.Learning.java

License:Apache License

public Map<String, String> makePrediction(String username, FileInputStream trainingModel,
        FileReader testingDataset) throws Exception {
    Map<String, String> predictions = new HashMap<>();

    ObjectInputStream inputStream = new ObjectInputStream(trainingModel);
    weka.classifiers.Classifier classifier = (weka.classifiers.Classifier) inputStream.readObject();
    inputStream.close();/*from   ww w  . j  av  a  2 s  .  c om*/

    Instances instances = new Instances(new BufferedReader(testingDataset));

    if (instances.classIndex() == -1) {
        instances.setClassIndex(instances.numAttributes() - 1);
    }

    int last = instances.numInstances() - 1;

    if (instances.instance(last).stringValue(instances.classIndex()).equals(username)) {
        double label = classifier.classifyInstance(instances.instance(last));
        instances.instance(last).setClassValue(label);
        predictions.put(username, instances.instance(last).stringValue(instances.classIndex()));
    }

    return predictions;
}

From source file:categorization.SpectralWEKA.java

License:Open Source License

/**
 * Generates a clusterer by the mean of spectral clustering algorithm.
 *
 * @param data set of instances serving as training data
 * @exception Exception if the clusterer has not been generated successfully
 *///from   w  ww.  j av  a 2 s  . c o m
public void buildClusterer(Instances data) throws java.lang.Exception {
    m_Sequences = new Instances(data);
    int n = data.numInstances();
    int k = data.numAttributes();
    DoubleMatrix2D w;
    if (useSparseMatrix)
        w = DoubleFactory2D.sparse.make(n, n);
    else
        w = DoubleFactory2D.dense.make(n, n);
    double[][] v1 = new double[n][];
    for (int i = 0; i < n; i++)
        v1[i] = data.instance(i).toDoubleArray();
    v = DoubleFactory2D.dense.make(v1);
    double sigma_sq = sigma * sigma;
    //Sets up similarity matrix
    for (int i = 0; i < n; i++)
        for (int j = i; j < n; j++) {
            /*double dist = distnorm2(v.viewRow(i), v.viewRow(j));
            if((r == -1) || (dist < r)) {
              double sim = Math.exp(- (dist * dist) / (2 * sigma_sq));
              w.set(i, j, sim);
              w.set(j, i, sim);
            }*/
            /* String [] key = {data.instance(i).stringValue(0), data.instance(j).stringValue(0)};
             System.out.println(key[0]);
             System.out.println(key[1]);
             System.out.println(simScoreMap.containsKey(key));
             Double simValue = simScoreMap.get(key);*/

            double sim = sim_matrix[i][j];
            w.set(i, j, sim);
            w.set(j, i, sim);
        }

    //Partitions points
    int[][] p = partition(w, alpha_star);

    //Deploys results
    numOfClusters = p.length;
    cluster = new int[n];
    for (int i = 0; i < p.length; i++)
        for (int j = 0; j < p[i].length; j++)
            cluster[p[i][j]] = i;

    //System.out.println("Final partition:");
    // UtilsJS.printMatrix(p);
    // System.out.println("Cluster:\n");
    // UtilsJS.printArray(cluster);
    this.numOfClusters = cluster[Utils.maxIndex(cluster)] + 1;
    //  System.out.println("Num clusters:\t"+this.numOfClusters);
}

From source file:cba.Apriori.java

License:Open Source License

/**
 * Removes columns that are all missing from the data
 * @param instances the instances/*from  ww  w. j av a  2s . c  o m*/
 * @return a new set of instances with all missing columns removed
 * @throws Exception if something goes wrong
 */
protected Instances removeMissingColumns(Instances instances) throws Exception {

    int numInstances = instances.numInstances();
    StringBuffer deleteString = new StringBuffer();
    int removeCount = 0;
    boolean first = true;
    int maxCount = 0;

    for (int i = 0; i < instances.numAttributes(); i++) {
        AttributeStats as = instances.attributeStats(i);
        if (m_upperBoundMinSupport == 1.0 && maxCount != numInstances) {
            // see if we can decrease this by looking for the most frequent value
            int[] counts = as.nominalCounts;
            if (counts[Utils.maxIndex(counts)] > maxCount) {
                maxCount = counts[Utils.maxIndex(counts)];
            }
        }
        if (as.missingCount == numInstances) {
            if (first) {
                deleteString.append((i + 1));
                first = false;
            } else {
                deleteString.append("," + (i + 1));
            }
            removeCount++;
        }
    }
    if (m_verbose) {
        System.err.println("Removed : " + removeCount + " columns with all missing " + "values.");
    }
    if (m_upperBoundMinSupport == 1.0 && maxCount != numInstances) {
        m_upperBoundMinSupport = (double) maxCount / (double) numInstances;
        if (m_verbose) {
            System.err.println("Setting upper bound min support to : " + m_upperBoundMinSupport);
        }
    }

    if (deleteString.toString().length() > 0) {
        Remove af = new Remove();
        af.setAttributeIndices(deleteString.toString());
        af.setInvertSelection(false);
        af.setInputFormat(instances);
        Instances newInst = Filter.useFilter(instances, af);

        return newInst;
    }
    return instances;
}

From source file:cba.Apriori.java

License:Open Source License

/**
 * Method that generates all large itemsets with a minimum support, and from
 * these all association rules with a minimum confidence.
 *
 * @param instances the instances to be used for generating the associations
 * @throws Exception if rules can't be built successfully
 *//*from w w  w  . ja  v  a  2 s  .c  o  m*/
public void buildAssociations(Instances instances) throws Exception {

    double[] confidences, supports;
    int[] indices;
    FastVector[] sortedRuleSet;
    int necSupport = 0;

    instances = new Instances(instances);

    if (m_removeMissingCols) {
        instances = removeMissingColumns(instances);
    }
    if (m_car && m_metricType != CONFIDENCE)
        throw new Exception("For CAR-Mining metric type has to be confidence!");

    // only set class index if CAR is requested
    if (m_car) {
        if (m_classIndex == -1) {
            instances.setClassIndex(instances.numAttributes() - 1);
        } else if (m_classIndex <= instances.numAttributes() && m_classIndex > 0) {
            instances.setClassIndex(m_classIndex - 1);
        } else {
            throw new Exception("Invalid class index.");
        }
    }

    // can associator handle the data?
    getCapabilities().testWithFail(instances);

    m_cycles = 0;
    if (m_car) {
        //m_instances does not contain the class attribute
        m_instances = LabeledItemSet.divide(instances, false);

        //m_onlyClass contains only the class attribute
        m_onlyClass = LabeledItemSet.divide(instances, true);
    } else
        m_instances = instances;

    if (m_car && m_numRules == Integer.MAX_VALUE) {
        // Set desired minimum support
        m_minSupport = m_lowerBoundMinSupport;
    } else {
        // Decrease minimum support until desired number of rules found.
        m_minSupport = m_upperBoundMinSupport - m_delta;
        m_minSupport = (m_minSupport < m_lowerBoundMinSupport) ? m_lowerBoundMinSupport : m_minSupport;
    }

    do {

        // Reserve space for variables
        m_Ls = new FastVector();
        m_hashtables = new FastVector();
        m_allTheRules = new FastVector[6];
        m_allTheRules[0] = new FastVector();
        m_allTheRules[1] = new FastVector();
        m_allTheRules[2] = new FastVector();
        if (m_metricType != CONFIDENCE || m_significanceLevel != -1) {
            m_allTheRules[3] = new FastVector();
            m_allTheRules[4] = new FastVector();
            m_allTheRules[5] = new FastVector();
        }
        sortedRuleSet = new FastVector[6];
        sortedRuleSet[0] = new FastVector();
        sortedRuleSet[1] = new FastVector();
        sortedRuleSet[2] = new FastVector();
        if (m_metricType != CONFIDENCE || m_significanceLevel != -1) {
            sortedRuleSet[3] = new FastVector();
            sortedRuleSet[4] = new FastVector();
            sortedRuleSet[5] = new FastVector();
        }
        if (!m_car) {
            // Find large itemsets and rules
            findLargeItemSets();
            if (m_significanceLevel != -1 || m_metricType != CONFIDENCE)
                findRulesBruteForce();
            else
                findRulesQuickly();
        } else {
            findLargeCarItemSets();
            findCarRulesQuickly();
        }

        // Sort rules according to their support
        /* supports = new double[m_allTheRules[2].size()];
         for (int i = 0; i < m_allTheRules[2].size(); i++) 
        supports[i] = (double)((AprioriItemSet)m_allTheRules[1].elementAt(i)).support();
         indices = Utils.stableSort(supports);
         for (int i = 0; i < m_allTheRules[2].size(); i++) {
        sortedRuleSet[0].addElement(m_allTheRules[0].elementAt(indices[i]));
        sortedRuleSet[1].addElement(m_allTheRules[1].elementAt(indices[i]));
        sortedRuleSet[2].addElement(m_allTheRules[2].elementAt(indices[i]));
        if (m_metricType != CONFIDENCE || m_significanceLevel != -1) {
        sortedRuleSet[3].addElement(m_allTheRules[3].elementAt(indices[i]));
        sortedRuleSet[4].addElement(m_allTheRules[4].elementAt(indices[i]));
        sortedRuleSet[5].addElement(m_allTheRules[5].elementAt(indices[i]));
        }
         }*/

        int j = m_allTheRules[2].size() - 1;
        supports = new double[m_allTheRules[2].size()];
        for (int i = 0; i < (j + 1); i++)
            supports[j - i] = ((double) ((ItemSet) m_allTheRules[1].elementAt(j - i)).support()) * (-1);
        indices = Utils.stableSort(supports);
        for (int i = 0; i < (j + 1); i++) {
            sortedRuleSet[0].addElement(m_allTheRules[0].elementAt(indices[j - i]));
            sortedRuleSet[1].addElement(m_allTheRules[1].elementAt(indices[j - i]));
            sortedRuleSet[2].addElement(m_allTheRules[2].elementAt(indices[j - i]));
            if (m_metricType != CONFIDENCE || m_significanceLevel != -1) {
                sortedRuleSet[3].addElement(m_allTheRules[3].elementAt(indices[j - i]));
                sortedRuleSet[4].addElement(m_allTheRules[4].elementAt(indices[j - i]));
                sortedRuleSet[5].addElement(m_allTheRules[5].elementAt(indices[j - i]));
            }
        }

        // Sort rules according to their confidence
        m_allTheRules[0].removeAllElements();
        m_allTheRules[1].removeAllElements();
        m_allTheRules[2].removeAllElements();
        if (m_metricType != CONFIDENCE || m_significanceLevel != -1) {
            m_allTheRules[3].removeAllElements();
            m_allTheRules[4].removeAllElements();
            m_allTheRules[5].removeAllElements();
        }
        confidences = new double[sortedRuleSet[2].size()];
        int sortType = 2 + m_metricType;

        for (int i = 0; i < sortedRuleSet[2].size(); i++)
            confidences[i] = ((Double) sortedRuleSet[sortType].elementAt(i)).doubleValue();
        indices = Utils.stableSort(confidences);
        for (int i = sortedRuleSet[0].size() - 1; (i >= (sortedRuleSet[0].size() - m_numRules))
                && (i >= 0); i--) {
            m_allTheRules[0].addElement(sortedRuleSet[0].elementAt(indices[i]));
            m_allTheRules[1].addElement(sortedRuleSet[1].elementAt(indices[i]));
            m_allTheRules[2].addElement(sortedRuleSet[2].elementAt(indices[i]));
            if (m_metricType != CONFIDENCE || m_significanceLevel != -1) {
                m_allTheRules[3].addElement(sortedRuleSet[3].elementAt(indices[i]));
                m_allTheRules[4].addElement(sortedRuleSet[4].elementAt(indices[i]));
                m_allTheRules[5].addElement(sortedRuleSet[5].elementAt(indices[i]));
            }
        }

        if (m_verbose) {
            if (m_Ls.size() > 1) {
                System.out.println(toString());
            }
        }
        if (m_minSupport == m_lowerBoundMinSupport || m_minSupport - m_delta > m_lowerBoundMinSupport)
            m_minSupport -= m_delta;
        else
            m_minSupport = m_lowerBoundMinSupport;

        necSupport = Math.round((float) ((m_minSupport * (double) m_instances.numInstances()) + 0.5));

        m_cycles++;
    } while ((m_allTheRules[0].size() < m_numRules) && (Utils.grOrEq(m_minSupport, m_lowerBoundMinSupport))
    /*        (necSupport >= lowerBoundNumInstancesSupport)*/
    /*        (Utils.grOrEq(m_minSupport, m_lowerBoundMinSupport)) */ && (necSupport >= 1));
    m_minSupport += m_delta;
}

From source file:cba.AprioriItemSet.java

License:Open Source License

/**
 * Converts the header info of the given set of instances into a set 
 * of item sets (singletons). The ordering of values in the header file 
 * determines the lexicographic order.// w  ww. j a  va2  s . co m
 *
 * @param instances the set of instances whose header info is to be used
 * @return a set of item sets, each containing a single item
 * @exception Exception if singletons can't be generated successfully
 */
public static FastVector singletons(Instances instances, boolean treatZeroAsMissing) throws Exception {

    FastVector setOfItemSets = new FastVector();
    ItemSet current;

    for (int i = 0; i < instances.numAttributes(); i++) {
        if (instances.attribute(i).isNumeric())
            throw new Exception("Can't handle numeric attributes!");
        int j = (treatZeroAsMissing) ? 1 : 0;
        for (; j < instances.attribute(i).numValues(); j++) {
            current = new AprioriItemSet(instances.numInstances());
            current.setTreatZeroAsMissing(treatZeroAsMissing);
            current.m_items = new int[instances.numAttributes()];
            for (int k = 0; k < instances.numAttributes(); k++)
                current.m_items[k] = -1;
            current.m_items[i] = j;
            setOfItemSets.addElement(current);
        }
    }
    return setOfItemSets;
}

From source file:cba.ItemSet.java

License:Open Source License

/**
 * Converts the header info of the given set of instances into a set 
 * of item sets (singletons). The ordering of values in the header file 
 * determines the lexicographic order./*from www . j  a  v a 2 s  .co  m*/
 *
 * @param instances the set of instances whose header info is to be used
 * @return a set of item sets, each containing a single item
 * @exception Exception if singletons can't be generated successfully
 */
public static FastVector singletons(Instances instances) throws Exception {

    FastVector setOfItemSets = new FastVector();
    ItemSet current;

    for (int i = 0; i < instances.numAttributes(); i++) {
        if (instances.attribute(i).isNumeric())
            throw new Exception("Can't handle numeric attributes!");
        for (int j = 0; j < instances.attribute(i).numValues(); j++) {
            current = new ItemSet(instances.numInstances());
            current.m_items = new int[instances.numAttributes()];
            for (int k = 0; k < instances.numAttributes(); k++)
                current.m_items[k] = -1;
            current.m_items[i] = j;

            setOfItemSets.addElement(current);
        }
    }
    return setOfItemSets;
}