Example usage for weka.core Instances enumerateInstances

List of usage examples for weka.core Instances enumerateInstances

Introduction

In this page you can find the example usage for weka.core Instances enumerateInstances.

Prototype

publicEnumeration<Instance> enumerateInstances() 

Source Link

Document

Returns an enumeration of all instances in the dataset.

Usage

From source file:lascer.WekaClassifier.java

License:Open Source License

/**
 * Generates the classifier.//w  w  w . j av a2  s  . co m
 *
 * @param data  the data to be used.
 *
 * @exception Exception  if the classifier can't built successfully.
 */
public void buildClassifier(Instances data) throws Exception {
    weka.coreExtended.Instances extendedInstances;
    weka.coreExtended.BasicInstance extInst;
    weka.coreExtended.BasicAttribute classAttribut;
    de.unistuttgart.commandline.Option formelnArtOption;
    de.unistuttgart.commandline.Option formelnKlasseOption;
    de.unistuttgart.commandline.Option loggingSwitch;
    Instance readInst;
    Beispieldaten invDatensatz;
    StringReader stringReader;
    Enumeration instEnum;
    Enumeration attribEnum;
    PraedErzParameter praedErzParameter = null;
    KonzErzParameter konzErzParameter = null;
    Pruning pruning;
    String formelArt;
    String formelKlasse;
    String optionWert;
    float posPruneAnt, negPruneAnt;
    int instNumber;
    boolean unbekannteWertBsp;

    Steuerung.parseArguments(parser);

    formelArt = Konstanten.WEKA_FORMEL_ART;
    formelnArtOption = parser.getOption("formelArt");
    if (parser.isEnabled(formelnArtOption)) {
        optionWert = parser.getParameter(formelnArtOption);
        if (!optionWert.equals("dis") && !optionWert.equals("kon") && !optionWert.equals("beste")) {

            System.err.println("Wert der Option formelArt unzulssig");
            System.err.println("Zulssig: " + formelnArtOption.toString());
            throw (new RuntimeException("Wert von Option unzulssig."));
        }
        formelArt = optionWert;
    }

    formelKlasse = Konstanten.WEKA_FORMEL_KLASSE;
    formelnKlasseOption = parser.getOption("formelKlasse");
    if (parser.isEnabled(formelnKlasseOption)) {
        optionWert = parser.getParameter(formelnKlasseOption);
        if (!optionWert.equals("pos") && !optionWert.equals("neg") && !optionWert.equals("beste")
                && !optionWert.equals("beide")) {

            System.err.println("Wert der Option formelKlasse unzulssig");
            System.err.println("Zulssig: " + formelnKlasseOption.toString());
            throw (new RuntimeException("Wert von Option unzulssig."));
        }
        formelKlasse = optionWert;
    }

    loggingSwitch = parser.getOption("logging");
    if (debugMode || parser.isEnabled(loggingSwitch)) {
        Steuerung.setLogLevel(Konstanten.LOGGING_LEVEL);
    }

    // Ermittlung der Parameter.
    unbekannteWertBsp = Steuerung.unbekannteWertBeispiele(parser);
    posPruneAnt = Steuerung.posPruneAnteil(parser);
    negPruneAnt = Steuerung.negPruneAnteil(parser);
    praedErzParameter = Steuerung.praedErzParameter(parser);
    konzErzParameter = Steuerung.konzErzParameter(parser);

    // Einlesen der Daten und Erzeugung des Instanzen-Objekts.
    instNumber = data.numInstances();
    stringReader = new StringReader(data.toString());
    extendedInstances = new weka.coreExtended.Instances(stringReader, instNumber);
    instEnum = data.enumerateInstances();
    while (instEnum.hasMoreElements()) {
        readInst = (Instance) instEnum.nextElement();
        extInst = new weka.coreExtended.BasicInstance(readInst.weight(), readInst.toDoubleArray());
        extendedInstances.addBasicInstance(extInst);
    }

    // Erzeugung der Datenstze.
    posDatensatz = ArffDateiEinlesen.beispieldaten(extendedInstances, unbekannteWertBsp);
    negDatensatz = posDatensatz.kopie(true);

    // Erzeugung der Liste der Attribute.
    attributListe = new LinkedList();
    attribEnum = extendedInstances.enumerateBasicAttributes();
    while (attribEnum.hasMoreElements()) {
        attributListe.add(attribEnum.nextElement());
    }

    // Ermittlung der Werte der Klassifikation.
    classAttribut = extendedInstances.basicClassAttribute();
    wekaClassTrue = classAttribut.indexOfValue("true");
    wekaClassFalse = classAttribut.indexOfValue("false");

    // Die Formel zur Klasse der positiven Beispiele erzeugen.
    if (formelKlasse.equals("pos") || formelKlasse.equals("beste") || formelKlasse.equals("beide")) {

        posFormel = generatedFormula(posDatensatz, praedErzParameter, konzErzParameter, formelArt);
    }

    // Die Formel zur Klasse der negativen Beispiele erzeugen.
    if (formelKlasse.equals("neg") || formelKlasse.equals("beste") || formelKlasse.equals("beide")) {

        negFormel = generatedFormula(negDatensatz, praedErzParameter, konzErzParameter, formelArt);
    }

    if (formelKlasse.equals("beste")) {
        // Die schlechtere Formel lschen.
        if (negFormel.istBesser(posFormel)) {
            posFormel = null;
        } else {
            negFormel = null;
        }
    }

    if ((posPruneAnt > 0) || (negPruneAnt > 0)) {
        pruning = new Pruning();

        if (posFormel != null) {
            posDatensatz = pruning.reduzierteDaten(posDatensatz, posFormel, posPruneAnt, negPruneAnt);
            posFormel = generatedFormula(posDatensatz, praedErzParameter, konzErzParameter, formelArt);
        }

        if (negFormel != null) {
            negDatensatz = pruning.reduzierteDaten(negDatensatz, negFormel, negPruneAnt, posPruneAnt);
            negFormel = generatedFormula(negDatensatz, praedErzParameter, konzErzParameter, formelArt);
        }
    }
}

From source file:machine_learing_clasifier.MyC45.java

public double BestContinousAttribute(Instances i, Attribute att) {

    i.sort(att);/*from www . j  av a  2 s.co  m*/
    Enumeration enumForMissingAttr = i.enumerateInstances();
    double temp = i.get(0).classValue();
    double igtemp = 0;
    double bestthreshold = 0;
    double a;
    double b = i.get(0).value(att);
    while (enumForMissingAttr.hasMoreElements()) {
        Instance inst = (Instance) enumForMissingAttr.nextElement();
        if (temp != inst.classValue()) {
            temp = inst.classValue();
            a = b;
            b = inst.value(att);
            double threshold = a + ((b - a) / 2);
            double igtemp2 = computeInformationGainContinous(i, att, threshold);
            if (igtemp < igtemp2) {
                bestthreshold = threshold;
                igtemp = igtemp2;
            }

        }

    }
    return bestthreshold;
}

From source file:moa.classifiers.rules.GeRules.java

License:Open Source License

public static void main(String[] args) throws Exception {
    // TODO Auto-generated method stub
    //ArffFileStream arffFileStream = new ArffFileStream("resources/UCI_KDD/nominal/cmc.arff", -1);

    // read arff file WEKA way
    DataSource source = new DataSource("data/cmc.arff");

    // stream generator
    RandomTreeGenerator treeGenerator = new RandomTreeGenerator();
    treeGenerator.numClassesOption.setValue(5);
    treeGenerator.numNumericsOption.setValue(0);
    treeGenerator.prepareForUse();//w w w .  j a  v  a  2  s .com

    // HoeffdingRules classifier
    GeRules gErules = new GeRules();
    gErules.prepareForUse();

    // load data into instances set
    Instances data = source.getDataSet();

    // setting class attribute if the data format does not provide this information
    // For example, the XRFF format saves the class attribute information as well
    if (data.classIndex() == -1)
        data.setClassIndex(data.numAttributes() - 1);

    // Using Prism classifier
    //hoeffdingRules.learnRules(Collections.list(data.enumerateInstances()));
    for (Instance instance : Collections.list(data.enumerateInstances())) {
        gErules.trainOnInstanceImpl(instance);

        gErules.correctlyClassifies(instance);
    }

    Instance anInstance = Collections.list(data.enumerateInstances()).get(10);
    System.out.println(anInstance);
    for (Rule aRule : gErules.RulesCoveredInstance(anInstance)) {

        System.out.println(aRule.printRule());
    }

    for (Rule aRule : gErules.rulesList) {
        System.out.println(aRule.printRule());
    }

}

From source file:model.clustering.Clustering.java

public String filledFile(Instances data, int numOfClusters, String remove) throws Exception {

    String mainData = data.toString();
    int index = mainData.indexOf("@data");
    String clusterData = mainData.substring(0, index + 6);

    Remove removeFilter = new Remove();
    removeFilter.setAttributeIndices(remove);

    kMeansCLusterer = new SimpleKMeans();
    kMeansCLusterer.setNumClusters(numOfClusters);

    FilteredClusterer filteredClusterer = new FilteredClusterer();
    filteredClusterer.setClusterer(kMeansCLusterer);
    filteredClusterer.setFilter(removeFilter);
    filteredClusterer.buildClusterer(data);

    Enumeration<Instance> newData = data.enumerateInstances();

    eval = new ClusterEvaluation();
    eval.setClusterer(filteredClusterer);
    eval.evaluateClusterer(data);//from w  w  w  .  j a v a  2 s .  c  o m

    while (newData.hasMoreElements()) {

        Instance i = (Instance) newData.nextElement();
        int kluster = filteredClusterer.clusterInstance(i);
        String instanceString = i.toString() + "," + kluster;
        clusterData = clusterData + instanceString + "\n";

    }
    return clusterData;
}

From source file:myclassifier.MyC45.java

/**
 * Method building ID3 tree.// w w  w  .ja v a 2  s . com
 *
 * @param data the training data
 * @exception Exception if decision tree can't be built successfully
 */
private void makeTree(Instances data) throws Exception {

    // Check if no instances have reached this node.
    if (data.numInstances() == 0) {
        m_Attribute = null;
        m_ClassValue = -1; //Instance.missingValue();
        m_Distribution = new double[data.numClasses()];
        return;
    }

    // Compute attribute with maximum information gain.
    double[] gainRatios = new double[data.numAttributes()];
    Enumeration attEnum = data.enumerateAttributes();
    while (attEnum.hasMoreElements()) {
        Attribute att = (Attribute) attEnum.nextElement();
        gainRatios[att.index()] = computeGainRatio(data, att);
    }
    m_Attribute = data.attribute(Utils.maxIndex(gainRatios));

    // Make leaf if information gain is zero. 
    // Otherwise create successors.

    if (Utils.eq(gainRatios[m_Attribute.index()], 0)) {
        m_Attribute = null;
        m_Distribution = new double[data.numClasses()];
        Enumeration instEnum = data.enumerateInstances();
        while (instEnum.hasMoreElements()) {
            Instance inst = (Instance) instEnum.nextElement();
            m_Distribution[(int) inst.classValue()]++;
        }
        Utils.normalize(m_Distribution);
        m_ClassValue = Utils.maxIndex(m_Distribution);
        m_ClassAttribute = data.classAttribute();
    } else {
        Instances[] splitData = splitData(data, m_Attribute);
        m_Successors = new MyC45[m_Attribute.numValues()];
        for (int j = 0; j < m_Attribute.numValues(); j++) {
            m_Successors[j] = new MyC45();
            m_Successors[j].makeTree(splitData[j]);
        }
    }
}

From source file:myclassifier.MyC45.java

private double computeEntropyFromData(Instances data) throws Exception {
    double[] classCounts = new double[data.numClasses()];
    Enumeration instEnum = data.enumerateInstances();

    while (instEnum.hasMoreElements()) {
        Instance inst = (Instance) instEnum.nextElement();
        classCounts[(int) inst.classValue()]++;
    }/*from w  ww  . j  a  va  2 s  . c o m*/

    double entropy = 0;

    for (int j = 0; j < data.numClasses(); j++) {
        if (classCounts[j] > 0)
            entropy -= (double) (classCounts[j] / data.numInstances())
                    * Utils.log2((double) classCounts[j] / data.numInstances());
    }

    //return entropy + Utils.log2(data.numInstances());
    return entropy;
}

From source file:myclassifier.MyC45.java

/**
 * Splits a dataset according to the values of a nominal attribute.
 *
 * @param data the data which is to be split
 * @param att the attribute to be used for splitting
 * @return the sets of instances produced by the split
 *//*from w  ww . j  a  v  a2  s .c o m*/
private Instances[] splitData(Instances data, Attribute att) {
    Instances[] splitData = new Instances[att.numValues()];

    for (int j = 0; j < att.numValues(); j++)
        splitData[j] = new Instances(data, data.numInstances());

    Enumeration instEnum = data.enumerateInstances();

    while (instEnum.hasMoreElements()) {
        Instance inst = (Instance) instEnum.nextElement();
        splitData[(int) inst.value(att)].add(inst);
    }
    return splitData;
}

From source file:myclassifier.myC45Pack.ClassDistribution.java

/**
 * Constructor distribution dengan satu dataset
 * @param dataSet//from   w ww .  ja v a2  s  . c  om
 * 
 * @exception Exception if something goes wrong
 */
public ClassDistribution(Instances dataSet) throws Exception {
    w_perClassPerSubdataset = new double[1][dataSet.numClasses()];
    w_perSubdataset = new double[1];
    w_perClass = new double[dataSet.numClasses()];
    totalWeights = 0;

    Enumeration E = dataSet.enumerateInstances();
    while (E.hasMoreElements()) {
        Instance inst = (Instance) E.nextElement();
        addInstance(0, inst);
    }
}

From source file:myclassifier.myC45Pack.ClassDistribution.java

/**
 * Creates a distribution according to given instances and
 * split model.// w w  w .j  a  v  a2 s . co  m
 *
 * @exception Exception if something goes wrong
 */

public ClassDistribution(Instances source, C45ClassifierSplitModel modelToUse) throws Exception {

    int index;
    Instance instance;
    double[] weights;

    w_perClassPerSubdataset = new double[modelToUse.numSubsets()][0];
    w_perSubdataset = new double[modelToUse.numSubsets()];
    totalWeights = 0;
    w_perClass = new double[source.numClasses()];
    for (int i = 0; i < modelToUse.numSubsets(); i++) {
        w_perClassPerSubdataset[i] = new double[source.numClasses()];
    }
    Enumeration E = source.enumerateInstances();
    while (E.hasMoreElements()) {
        instance = (Instance) E.nextElement();
        index = modelToUse.getSubsetIndex(instance);
        if (index != -1) {
            addInstance(index, instance);
        } else {
            weights = modelToUse.getWeights(instance);
            addWeights(instance, weights);
        }
    }
}

From source file:myclassifier.myC45Pack.ClassDistribution.java

/**
 * Adds all instances with unknown values for given attribute, weighted
 * according to frequency of instances in each bag.
 *
 * @exception Exception if something goes wrong
 *///from ww  w.j av  a  2  s. c om
public void addInstWithMissValue(Instances dataSet, int attIndex) throws Exception {

    double[] valueProbs;
    double weight, newWeight;
    int classIndex;
    Instance instance;

    valueProbs = new double[w_perSubdataset.length];
    for (int i = 0; i < w_perSubdataset.length; i++) {
        if (totalWeights == 0) {
            valueProbs[i] = 1.0 / valueProbs.length;
        } else {
            valueProbs[i] = w_perSubdataset[i] / totalWeights;
        }
    }

    Enumeration E = dataSet.enumerateInstances();
    while (E.hasMoreElements()) {
        instance = (Instance) E.nextElement();
        if (instance.isMissing(attIndex)) {
            classIndex = (int) instance.classValue();
            weight = instance.weight();
            w_perClass[classIndex] = w_perClass[classIndex] + weight;
            totalWeights += weight;
            for (int i = 0; i < w_perSubdataset.length; i++) {
                newWeight = valueProbs[i] * weight;
                w_perClassPerSubdataset[i][classIndex] += newWeight;
                w_perSubdataset[i] += newWeight;
            }
        }
    }
}