Example usage for weka.core Instances classIndex

List of usage examples for weka.core Instances classIndex

Introduction

In this page you can find the example usage for weka.core Instances classIndex.

Prototype


publicint classIndex() 

Source Link

Document

Returns the class attribute's index.

Usage

From source file:id3classifier.ID3Classifiers.java

@Override
public void buildClassifier(Instances instances) throws Exception {

    // create list of instances of size instances' number of instances
    // create list of attributes of size instances' number of attributes
    List<Instance> instanceList = new ArrayList<>(instances.numInstances());
    List<Attribute> attributeList = new ArrayList<>(instances.numAttributes());

    // from index 0 to instances' number of instances, add instances' current
    // instance to the list of instances... mouthfull
    for (int i = 0; i < instances.numInstances(); i++) {

        instanceList.add(instances.instance(i));
    }//from   w w  w .java2 s .  c o m

    // from index 0 to instances' number of attributes, if the index is not
    // equal to instances' class index... 
    for (int i = 0; i < instances.numAttributes(); i++) {

        if (i != instances.classIndex()) {

            // add instances' current attribute to the attribute list
            attributeList.add(instances.attribute(i));
        }
    }

    // set tree equal to the tree built by buildTree() using the instance
    // list and the attribute list
    tree = buildTree(instanceList, attributeList);
}

From source file:id3j48.WekaAccess.java

public static Instances readArff(String filename) throws Exception {
    ConverterUtils.DataSource source = new ConverterUtils.DataSource(datasetFolder + File.separator + filename);
    Instances data = source.getDataSet();
    if (data.classIndex() == -1)
        data.setClassIndex(data.numAttributes() - 1);
    return data;//from   w w  w.j a v  a2  s  . com
}

From source file:id3j48.WekaAccess.java

public static Instances readCsv(String filename) throws Exception {
    CSVLoader csvLoader = new CSVLoader();
    csvLoader.setSource(new File(datasetFolder + File.separator + filename));
    Instances data = csvLoader.getDataSet();
    if (data.classIndex() == -1) {
        data.setClassIndex(data.numAttributes() - 1);
    }/*  w w w  .j a v  a2 s.  co  m*/
    return data;
}

From source file:imba.classifier.NBTubes.java

@Override
public void buildClassifier(Instances data) {
    dataClassifier = new ArrayList<>();
    infoClassifier = new ArrayList<>();
    validAttribute = new ArrayList<>();
    dataset = null;//  www  .  java2 s . c om
    sumClass = null;
    dataSize = 0;
    header_Instances = data;

    Filter f;
    int i, j, k, l, m;
    int sumVal;

    int numAttr = data.numAttributes(); //ini beserta kelasnya, jadi atribut + 1

    i = 0;
    while (i < numAttr && wasNumeric == false) {
        if (i == classIdx) {
            i++;
        }

        if (i != numAttr && data.attribute(i).isNumeric()) {
            wasNumeric = true;
        }

        i++;
    }

    Instance p;

    //kasih filter
    if (wasNumeric) {
        f = new Normalize();
        //Filter f = new NumericToNominal();
        try {
            f.setInputFormat(data);

            for (Instance i1 : data) {
                f.input(i1);
            }

            f.batchFinished();
        } catch (Exception ex) {
            Logger.getLogger(NBTubes.class.getName()).log(Level.SEVERE, null, ex);
        }

        dataset = f.getOutputFormat();

        while ((p = f.output()) != null) {
            dataset.add(p);
        }
    }

    //f = new NumericToNominal();
    if (filter.equals("Discretize")) {
        f = new Discretize();
    } else {
        f = new NumericToNominal();
    }

    try {
        if (wasNumeric) {
            f.setInputFormat(dataset);
            for (Instance i1 : dataset) {
                f.input(i1);
            }
        } else {
            f.setInputFormat(data);
            for (Instance i1 : data) {
                f.input(i1);
            }
        }

        f.batchFinished();
    } catch (Exception ex) {
        Logger.getLogger(NBTubes.class.getName()).log(Level.SEVERE, null, ex);
    }

    dataset = null;
    dataset = f.getOutputFormat();

    while ((p = f.output()) != null) {
        dataset.add(p);
    }

    //building data structure
    classIdx = data.classIndex();

    dataSize = data.size();

    //isi data dan info classifier dengan array kosong
    i = 0;
    j = i;
    while (j < numAttr) {
        if (i == classIdx) {
            i++;
        } else {
            dataClassifier.add(new ArrayList<>());
            infoClassifier.add(new ArrayList<>());

            if (j < i) {
                m = j - 1;
            } else {
                m = j;
            }

            k = 0;
            while (k < dataset.attribute(j).numValues()) {
                dataClassifier.get(m).add(new ArrayList<>());
                infoClassifier.get(m).add(new ArrayList<>());

                l = 0;
                while (l < dataset.attribute(classIdx).numValues()) {
                    dataClassifier.get(m).get(k).add(0);
                    infoClassifier.get(m).get(k).add(0.0);

                    l++;
                }

                k++;
            }
        }

        i++;
        j++;
    }

    //isi data classifier dari dataset
    sumClass = new int[data.numClasses()];

    i = 0;
    while (i < dataset.size()) {
        j = 0;
        k = j;
        while (k < dataset.numAttributes()) {
            if (j == classIdx) {
                j++;
            } else {
                if (k < j) {
                    m = k - 1;
                } else {
                    m = k;
                }

                dataClassifier.get(m).get((int) dataset.get(i).value(k)).set(
                        (int) dataset.get(i).value(classIdx),
                        dataClassifier.get(m).get((int) dataset.get(i).value(k))
                                .get((int) dataset.get(i).value(classIdx)) + 1);

                if (m == 0) {
                    sumClass[(int) dataset.get(i).value(classIdx)]++;
                }

            }

            k++;
            j++;
        }

        i++;
    }

    //proses double values
    i = 0;
    while (i < dataClassifier.size()) {
        j = 0;
        while (j < dataClassifier.get(i).size()) {
            k = 0;
            while (k < dataClassifier.get(i).get(j).size()) {
                infoClassifier.get(i).get(j).set(k, (double) dataClassifier.get(i).get(j).get(k) / sumClass[k]);

                k++;
            }

            j++;
        }

        i++;
    }

    /*
    //liat apakah ada nilai di tiap atribut
    //yang merepresentasikan lebih dari 80% data
    i = 0;
    while (i < dataClassifier.size()) {
    j = 0;
    while (j < dataClassifier.get(i).size()) {
                
                
        j++;
    }
            
    i++;
    }
    */
}

From source file:irisdata.IrisData.java

/**
 * @param args the command line arguments
 * @throws java.lang.Exception //from w w w. j  ava2  s.co m
 */
public static void main(String[] args) throws Exception {

    String file = "/Users/paul/Desktop/BYU-Idaho/Spring2015/CS450/iris.csv";

    DataSource source = new DataSource(file);
    Instances data = source.getDataSet();

    if (data.classIndex() == -1) {
        data.setClassIndex(data.numAttributes() - 1);
    }

    data.randomize(new Random(1));

    // set training set to 70%
    RemovePercentage remove = new RemovePercentage();
    remove.setPercentage(30);
    remove.setInputFormat(data);
    Instances trainingSet = Filter.useFilter(data, remove);

    // set the rest for the testing set
    remove.setInvertSelection(true);
    Instances testSet = Filter.useFilter(data, remove);

    // train classifier - kind of
    HardCodedClassifier classifier = new HardCodedClassifier();
    classifier.buildClassifier(trainingSet); // this does nothing right now

    // Evaluate classifier
    Evaluation eval = new Evaluation(trainingSet);
    eval.evaluateModel(classifier, testSet);
    //eval.crossValidateModel(classifier, data, 10, new Random(1));

    // Print some statistics
    System.out.println("Results: " + eval.toSummaryString());

}

From source file:irisdriver.IrisDriver.java

/**
 * @param args the command line arguments
 *///from w  w w . ja va 2s.c o  m
public static void main(String[] args) {
    //As an example of arguments: sepallength=5.1 sepalwidth=3.5 petallength=1.4 petalwidth=0.2    
    try {
        Hashtable<String, String> values = new Hashtable<String, String>();
        /*Iris irisModel = new Iris();
                
        for(int i = 0; i < args.length; i++) {
        String[] tokens = args[i].split("=");
                
        values.put(tokens[0], tokens[1]);
        }
                
        System.out.println("Classification: " + irisModel.classifySpecies(values));*/

        //Loading the model
        String pathModel = "";
        String pathTestSet = "";
        JFileChooser chooserModel = new JFileChooser();
        chooserModel.setCurrentDirectory(new java.io.File("."));
        chooserModel.setDialogTitle("Choose the model");
        chooserModel.setFileSelectionMode(JFileChooser.FILES_AND_DIRECTORIES);
        chooserModel.setAcceptAllFileFilterUsed(true);

        if (chooserModel.showOpenDialog(null) == JFileChooser.APPROVE_OPTION) {
            File filePathModel = chooserModel.getSelectedFile();
            pathModel = filePathModel.getPath();

            Iris irisModel = new Iris(pathModel);

            //Loading the model
            JFileChooser chooserTestSet = new JFileChooser();
            chooserTestSet.setDialogTitle("Choose TEST SET");
            chooserTestSet.setFileSelectionMode(JFileChooser.FILES_AND_DIRECTORIES);
            chooserTestSet.setAcceptAllFileFilterUsed(true);

            //Loading the testing dataset
            if (chooserTestSet.showOpenDialog(null) == JFileChooser.APPROVE_OPTION) {
                File filePathTestSet = chooserTestSet.getSelectedFile();
                pathTestSet = filePathTestSet.getPath();

                //WRITTING THE OUTPUT:
                BufferedWriter writer = new BufferedWriter(new FileWriter("D:\\output_file.txt"));

                //Transforming the data set into pairs attribute-value
                ConverterUtils.DataSource unlabeledSource = new ConverterUtils.DataSource(pathTestSet);
                Instances unlabeledData = unlabeledSource.getDataSet();
                if (unlabeledData.classIndex() == -1) {
                    unlabeledData.setClassIndex(unlabeledData.numAttributes() - 1);
                }

                for (int i = 0; i < unlabeledData.numInstances(); i++) {
                    Instance ins = unlabeledData.instance(i);

                    //ins.numAttributes()-1 --> not to include the label
                    for (int j = 0; j < ins.numAttributes() - 1; j++) {

                        String attrib = ins.attribute(j).name();
                        double val = ins.value(ins.attribute(j));

                        values.put(attrib, String.valueOf(val));

                    }

                    String predictedLabel = irisModel.classifySpecies(values);
                    System.out.println("Classification: " + predictedLabel);
                    values.clear();

                    //Writting the results in a txt
                    writer.write("The label is: " + predictedLabel);

                    //writer.newLine();

                    //writers.write("The error rate of the prediction is : " + eval.errorRate());

                    //writer.newLine();

                }

                writer.flush();
                writer.close();

            }

        }

    } catch (Exception ex) {
        Logger.getLogger(IrisDriver.class.getName()).log(Level.SEVERE, null, ex);
    }

}

From source file:j48.C45PruneableClassifierTreeG.java

License:Open Source License

/**
 * finds new nodes that improve accuracy and grafts them onto the tree
 *
 * @param fulldata the instances in whole trainset
 * @param iindex records num tests each instance has failed up to this node
 * @param limits the upper/lower limits for numeric attributes
 * @param parent the node immediately before the current one
 * @param pLaplace laplace for leaf, calculated by parent (in case leaf empty)
 * @param pLeafClass class of leaf, determined by parent (in case leaf empty)
 */// w  ww.j  ava2 s.c om
private void findGraft(Instances fulldata, double[][] iindex, double[][] limits, ClassifierTree parent,
        double pLaplace, int pLeafClass) throws Exception {

    // get the class for this leaf
    int leafClass = (m_isEmpty) ? pLeafClass : localModel().distribution().maxClass();

    // get the laplace value for this leaf
    double leafLaplace = (m_isEmpty) ? pLaplace : laplaceLeaf(leafClass);

    // sort the instances into those at the leaf, those in atbop, and discarded
    Instances l = new Instances(fulldata, fulldata.numInstances());
    Instances n = new Instances(fulldata, fulldata.numInstances());
    int lcount = 0;
    int acount = 0;
    for (int x = 0; x < fulldata.numInstances(); x++) {
        if (iindex[0][x] <= 0 && iindex[1][x] <= 0)
            continue;
        if (iindex[0][x] != 0) {
            l.add(fulldata.instance(x));
            l.instance(lcount).setWeight(iindex[0][x]);
            // move instance's weight in iindex to same index as in l
            iindex[0][lcount++] = iindex[0][x];
        }
        if (iindex[1][x] > 0) {
            n.add(fulldata.instance(x));
            n.instance(acount).setWeight(iindex[1][x]);
            // move instance's weight in iindex to same index as in n
            iindex[1][acount++] = iindex[1][x];
        }
    }

    boolean graftPossible = false;
    double[] classDist = new double[n.numClasses()];
    for (int x = 0; x < n.numInstances(); x++) {
        if (iindex[1][x] > 0 && !n.instance(x).classIsMissing())
            classDist[(int) n.instance(x).classValue()] += iindex[1][x];
    }

    for (int cVal = 0; cVal < n.numClasses(); cVal++) {
        double theLaplace = (classDist[cVal] + 1.0) / (classDist[cVal] + 2.0);
        if (cVal != leafClass && (theLaplace > leafLaplace)
                && (biprob(classDist[cVal], classDist[cVal], leafLaplace) > m_BiProbCrit)) {
            graftPossible = true;
            break;
        }
    }

    if (!graftPossible) {
        return;
    }

    // 1. Initialize to {} a set of tuples t containing potential tests
    ArrayList t = new ArrayList();

    // go through each attribute
    for (int a = 0; a < n.numAttributes(); a++) {
        if (a == n.classIndex())
            continue; // skip the class

        // sort instances in atbop by $a
        int[] sorted = sortByAttribute(n, a);

        // 2. For each continuous attribute $a:
        if (n.attribute(a).isNumeric()) {

            // find min and max values for this attribute at the leaf
            boolean prohibited = false;
            double minLeaf = Double.POSITIVE_INFINITY;
            double maxLeaf = Double.NEGATIVE_INFINITY;
            for (int i = 0; i < l.numInstances(); i++) {
                if (l.instance(i).isMissing(a)) {
                    if (l.instance(i).classValue() == leafClass) {
                        prohibited = true;
                        break;
                    }
                }
                double value = l.instance(i).value(a);
                if (!m_relabel || l.instance(i).classValue() == leafClass) {
                    if (value < minLeaf)
                        minLeaf = value;
                    if (value > maxLeaf)
                        maxLeaf = value;
                }
            }
            if (prohibited) {
                continue;
            }

            // (a) find values of
            //    $n: instances in atbop (already have that, actually)
            //    $v: a value for $a that exists for a case in the atbop, where
            //       $v is < the min value for $a for a case at the leaf which
            //       has the class $c, and $v is > the lowerlimit of $a at
            //       the leaf.
            //       (note: error in original paper stated that $v must be
            //       smaller OR EQUAL TO the min value).
            //    $k: $k is a class
            //  that maximize L' = Laplace({$x: $x contained in cases($n)
            //    & value($a,$x) <= $v & value($a,$x) > lowerlim($l,$a)}, $k).
            double minBestClass = Double.NaN;
            double minBestLaplace = leafLaplace;
            double minBestVal = Double.NaN;
            double minBestPos = Double.NaN;
            double minBestTotal = Double.NaN;
            double[][] minBestCounts = null;
            double[][] counts = new double[2][n.numClasses()];
            for (int x = 0; x < n.numInstances(); x++) {
                if (n.instance(sorted[x]).isMissing(a))
                    break; // missing are sorted to end: no more valid vals

                double theval = n.instance(sorted[x]).value(a);
                if (m_Debug)
                    System.out.println("\t " + theval);

                if (theval <= limits[a][0]) {
                    if (m_Debug)
                        System.out.println("\t  <= lowerlim: continuing...");
                    continue;
                }
                // note: error in paper would have this read "theVal > minLeaf)
                if (theval >= minLeaf) {
                    if (m_Debug)
                        System.out.println("\t  >= minLeaf; breaking...");
                    break;
                }
                counts[0][(int) n.instance(sorted[x]).classValue()] += iindex[1][sorted[x]];

                if (x != n.numInstances() - 1) {
                    int z = x + 1;
                    while (z < n.numInstances() && n.instance(sorted[z]).value(a) == theval) {
                        z++;
                        x++;
                        counts[0][(int) n.instance(sorted[x]).classValue()] += iindex[1][sorted[x]];
                    }
                }

                // work out the best laplace/class (for <= theval)
                double total = Utils.sum(counts[0]);
                for (int c = 0; c < n.numClasses(); c++) {
                    double temp = (counts[0][c] + 1.0) / (total + 2.0);
                    if (temp > minBestLaplace) {
                        minBestPos = counts[0][c];
                        minBestTotal = total;
                        minBestLaplace = temp;
                        minBestClass = c;
                        minBestCounts = copyCounts(counts);

                        minBestVal = (x == n.numInstances() - 1) ? theval
                                : ((theval + n.instance(sorted[x + 1]).value(a)) / 2.0);
                    }
                }
            }

            // (b) add to t tuple <n,a,v,k,L',"<=">
            if (!Double.isNaN(minBestVal) && biprob(minBestPos, minBestTotal, leafLaplace) > m_BiProbCrit) {
                GraftSplit gsplit = null;
                try {
                    gsplit = new GraftSplit(a, minBestVal, 0, leafClass, minBestCounts);
                } catch (Exception e) {
                    System.err.println("graftsplit error: " + e.getMessage());
                    System.exit(1);
                }
                t.add(gsplit);
            }
            // free space
            minBestCounts = null;

            // (c) find values of
            //    n: instances in atbop (already have that, actually)
            //    $v: a value for $a that exists for a case in the atbop, where
            //       $v is > the max value for $a for a case at the leaf which
            //       has the class $c, and $v is <= the upperlimit of $a at
            //       the leaf.
            //    k: k is a class
            //   that maximize L' = Laplace({x: x contained in cases(n)
            //       & value(a,x) > v & value(a,x) <= upperlim(l,a)}, k).
            double maxBestClass = -1;
            double maxBestLaplace = leafLaplace;
            double maxBestVal = Double.NaN;
            double maxBestPos = Double.NaN;
            double maxBestTotal = Double.NaN;
            double[][] maxBestCounts = null;
            for (int c = 0; c < n.numClasses(); c++) { // zero the counts
                counts[0][c] = 0;
                counts[1][c] = 0; // shouldn't need to do this ...
            }

            // check smallest val for a in atbop is < upper limit
            if (n.numInstances() >= 1 && n.instance(sorted[0]).value(a) < limits[a][1]) {
                for (int x = n.numInstances() - 1; x >= 0; x--) {
                    if (n.instance(sorted[x]).isMissing(a))
                        continue;

                    double theval = n.instance(sorted[x]).value(a);
                    if (m_Debug)
                        System.out.println("\t " + theval);

                    if (theval > limits[a][1]) {
                        if (m_Debug)
                            System.out.println("\t  >= upperlim; continuing...");
                        continue;
                    }
                    if (theval <= maxLeaf) {
                        if (m_Debug)
                            System.out.println("\t  < maxLeaf; breaking...");
                        break;
                    }

                    // increment counts
                    counts[1][(int) n.instance(sorted[x]).classValue()] += iindex[1][sorted[x]];

                    if (x != 0 && !n.instance(sorted[x - 1]).isMissing(a)) {
                        int z = x - 1;
                        while (z >= 0 && n.instance(sorted[z]).value(a) == theval) {
                            z--;
                            x--;
                            counts[1][(int) n.instance(sorted[x]).classValue()] += iindex[1][sorted[x]];
                        }
                    }

                    // work out best laplace for > theval
                    double total = Utils.sum(counts[1]);
                    for (int c = 0; c < n.numClasses(); c++) {
                        double temp = (counts[1][c] + 1.0) / (total + 2.0);
                        if (temp > maxBestLaplace) {
                            maxBestPos = counts[1][c];
                            maxBestTotal = total;
                            maxBestLaplace = temp;
                            maxBestClass = c;
                            maxBestCounts = copyCounts(counts);
                            maxBestVal = (x == 0) ? theval
                                    : ((theval + n.instance(sorted[x - 1]).value(a)) / 2.0);
                        }
                    }
                }

                // (d) add to t tuple <n,a,v,k,L',">">
                if (!Double.isNaN(maxBestVal) && biprob(maxBestPos, maxBestTotal, leafLaplace) > m_BiProbCrit) {
                    GraftSplit gsplit = null;
                    try {
                        gsplit = new GraftSplit(a, maxBestVal, 1, leafClass, maxBestCounts);
                    } catch (Exception e) {
                        System.err.println("graftsplit error:" + e.getMessage());
                        System.exit(1);
                    }
                    t.add(gsplit);
                }
            }
        } else { // must be a nominal attribute

            // 3. for each discrete attribute a for which there is no
            //    test at an ancestor of l

            // skip if this attribute has already been used
            if (limits[a][1] == 1) {
                continue;
            }

            boolean[] prohibit = new boolean[l.attribute(a).numValues()];
            for (int aval = 0; aval < n.attribute(a).numValues(); aval++) {
                for (int x = 0; x < l.numInstances(); x++) {
                    if ((l.instance(x).isMissing(a) || l.instance(x).value(a) == aval)
                            && (!m_relabel || (l.instance(x).classValue() == leafClass))) {
                        prohibit[aval] = true;
                        break;
                    }
                }
            }

            // (a) find values of
            //       $n: instances in atbop (already have that, actually)
            //       $v: $v is a value for $a
            //       $k: $k is a class
            //     that maximize L' = Laplace({$x: $x contained in cases($n)
            //           & value($a,$x) = $v}, $k).
            double bestVal = Double.NaN;
            double bestClass = Double.NaN;
            double bestLaplace = leafLaplace;
            double[][] bestCounts = null;
            double[][] counts = new double[2][n.numClasses()];

            for (int x = 0; x < n.numInstances(); x++) {
                if (n.instance(sorted[x]).isMissing(a))
                    continue;

                // zero the counts
                for (int c = 0; c < n.numClasses(); c++)
                    counts[0][c] = 0;

                double theval = n.instance(sorted[x]).value(a);
                counts[0][(int) n.instance(sorted[x]).classValue()] += iindex[1][sorted[x]];

                if (x != n.numInstances() - 1) {
                    int z = x + 1;
                    while (z < n.numInstances() && n.instance(sorted[z]).value(a) == theval) {
                        z++;
                        x++;
                        counts[0][(int) n.instance(sorted[x]).classValue()] += iindex[1][sorted[x]];
                    }
                }

                if (!prohibit[(int) theval]) {
                    // work out best laplace for > theval
                    double total = Utils.sum(counts[0]);
                    bestLaplace = leafLaplace;
                    bestClass = Double.NaN;
                    for (int c = 0; c < n.numClasses(); c++) {
                        double temp = (counts[0][c] + 1.0) / (total + 2.0);
                        if (temp > bestLaplace && biprob(counts[0][c], total, leafLaplace) > m_BiProbCrit) {
                            bestLaplace = temp;
                            bestClass = c;
                            bestVal = theval;
                            bestCounts = copyCounts(counts);
                        }
                    }
                    // add to graft list
                    if (!Double.isNaN(bestClass)) {
                        GraftSplit gsplit = null;
                        try {
                            gsplit = new GraftSplit(a, bestVal, 2, leafClass, bestCounts);
                        } catch (Exception e) {
                            System.err.println("graftsplit error: " + e.getMessage());
                            System.exit(1);
                        }
                        t.add(gsplit);
                    }
                }
            }
            // (b) add to t tuple <n,a,v,k,L',"=">
            // done this already
        }
    }

    // 4. remove from t all tuples <n,a,v,c,L,x> such that L <=
    //    Laplace(cases(l),c) or prob(x,n,Laplace(cases(l),c) <= 0.05
    //      -- checked this constraint prior to adding a tuple --

    // *** step six done before step five for efficiency ***
    // 6. for each <n,a,v,k,L,x> in t ordered on L from highest to lowest
    // order the tuples from highest to lowest laplace
    // (this actually orders lowest to highest)
    Collections.sort(t);

    // 5. remove from t all tuples <n,a,v,c,L,x> such that there is
    //    no tuple <n',a',v',k',L',x'> such that k' != c & L' < L.
    for (int x = 0; x < t.size(); x++) {
        GraftSplit gs = (GraftSplit) t.get(x);
        if (gs.maxClassForSubsetOfInterest() != leafClass) {
            break; // reached a graft with class != leafClass, so stop deleting
        } else {
            t.remove(x);
            x--;
        }
    }

    // if no potential grafts were found, do nothing and return
    if (t.size() < 1) {
        return;
    }

    // create the distributions for each graft
    for (int x = t.size() - 1; x >= 0; x--) {
        GraftSplit gs = (GraftSplit) t.get(x);
        try {
            gs.buildClassifier(l);
            gs.deleteGraftedCases(l); // so they don't go down the other branch
        } catch (Exception e) {
            System.err.println("graftsplit build error: " + e.getMessage());
        }
    }

    // add this stuff to the tree
    ((C45PruneableClassifierTreeG) parent).setDescendents(t, this);
}

From source file:j48.GraftSplit.java

License:Open Source License

/**
 * method for returning information about this GraftSplit
 * @param data instances for determining names of attributes and values
 * @return a string showing this GraftSplit's information
 *//* w  w  w.j  a v a 2  s.c  om*/
public String toString(Instances data) {

    String theTest;
    if (m_testType == 0)
        theTest = " <= ";
    else if (m_testType == 1)
        theTest = " > ";
    else if (m_testType == 2)
        theTest = " = ";
    else
        theTest = " != ";

    if (data.attribute(m_attIndex).isNominal())
        theTest += data.attribute(m_attIndex).value((int) m_splitPoint);
    else
        theTest += Double.toString(m_splitPoint);

    return data.attribute(m_attIndex).name() + theTest + " (" + Double.toString(m_laplace) + ") --> "
            + data.attribute(data.classIndex()).value(m_maxClass);
}

From source file:kea.KEAFilter.java

License:Open Source License

/**
 * Sets the format of the input instances.
 *
 * @param instanceInfo an Instances object containing the input
 * instance structure (any instances contained in the object are
 * ignored - only the structure is required).
 * @return true if the outputFormat may be collected immediately 
 *//*from  w  w  w  .j a  v a 2 s. co  m*/
public boolean setInputFormat(Instances instanceInfo) throws Exception {

    if (instanceInfo.classIndex() >= 0) {
        throw new Exception("Don't know what do to if class index set!");
    }
    if (!instanceInfo.attribute(m_KeyphrasesAtt).isString()
            || !instanceInfo.attribute(m_DocumentAtt).isString()) {
        throw new Exception("Keyphrase attribute and document attribute " + "need to be string attributes.");
    }
    m_PunctFilter = new KEAPhraseFilter();
    int[] arr = new int[1];
    arr[0] = m_DocumentAtt;
    m_PunctFilter.setAttributeIndicesArray(arr);
    m_PunctFilter.setInputFormat(instanceInfo);
    m_PunctFilter.setDisallowInternalPeriods(getDisallowInternalPeriods());
    m_NumbersFilter = new NumbersFilter();
    m_NumbersFilter.setInputFormat(m_PunctFilter.getOutputFormat());
    super.setInputFormat(m_NumbersFilter.getOutputFormat());
    return false;
}

From source file:learn.Classification.Chinese.TextDirectoryLoader.java

License:Open Source License

/**
 * Return the full data set. If the structure hasn't yet been determined by
 * a call to getStructure then method should do so before processing the
 * rest of the data set.//from www.j av  a2  s . co  m
 * 
 * @return the structure of the data set as an empty set of Instances
 * @throws IOException
 *             if there is no source or parsing fails
 */
public Instances getDataSet() throws IOException {
    if (getDirectory() == null)
        throw new IOException("No directory/source has been specified");

    String directoryPath = getDirectory().getAbsolutePath();
    ArrayList<String> classes = new ArrayList<String>();
    Enumeration enm = getStructure().classAttribute().enumerateValues();
    while (enm.hasMoreElements())
        classes.add((String) enm.nextElement());

    Instances data = getStructure();
    int fileCount = 0;
    for (int k = 0; k < classes.size(); k++) {
        String subdirPath = (String) classes.get(k);
        File subdir = new File(directoryPath + File.separator + subdirPath);
        String[] files = subdir.list();
        for (int j = 0; j < files.length; j++) {
            try {
                fileCount++;
                if (getDebug())
                    System.err.println("processing " + fileCount + " : " + subdirPath + " : " + files[j]);

                double[] newInst = null;
                if (m_OutputFilename)
                    newInst = new double[3];
                else
                    newInst = new double[2];
                File txt = new File(directoryPath + File.separator + subdirPath + File.separator + files[j]);
                BufferedInputStream is;
                is = new BufferedInputStream(new FileInputStream(txt));
                StringBuffer txtStr = new StringBuffer();
                int c;
                /*
                 * while ((c = is.read()) != -1) { txtStr.append((char) c);
                 * }
                 */
                //FileReader fr = new FileReader(txt);

                BufferedReader br = new BufferedReader(
                        new InputStreamReader(new FileInputStream(txt), "UTF-8"));

                String line;

                while ((line = br.readLine()) != null) {

                    txtStr.append(line + "\n");

                }
                newInst[0] = (double) data.attribute(0).addStringValue(txtStr.toString());
                if (m_OutputFilename)
                    newInst[1] = (double) data.attribute(1)
                            .addStringValue(subdirPath + File.separator + files[j]);
                newInst[data.classIndex()] = (double) k;
                data.add(new DenseInstance(1.0, newInst));
                is.close();
            } catch (Exception e) {
                System.err.println("failed to convert file: " + directoryPath + File.separator + subdirPath
                        + File.separator + files[j]);
            }
        }
    }

    return data;
}