Example usage for weka.core Instances classIndex

Introduction

In this page you can find the example usage for weka.core Instances classIndex.

Prototype


publicint classIndex()

Source Link

Document

Returns the class attribute's index.

Usage

From source file:lu.lippmann.cdb.lab.mds.UniversalMDS.java

License:Open Source License

public JXPanel buildMDSViewFromDataSet(Instances ds, MDSTypeEnum type) throws Exception {

    final XYSeriesCollection dataset = new XYSeriesCollection();

    final JFreeChart chart = ChartFactory.createScatterPlot("", // title 
            "X", "Y", // axis labels 
            dataset, // dataset 
            PlotOrientation.VERTICAL, true, // legend? yes 
            true, // tooltips? yes 
            false // URLs? no 
    );/*from  w ww  .  j ava 2s  .  c o m*/

    final XYPlot xyPlot = (XYPlot) chart.getPlot();

    chart.setTitle(type.name() + " MDS");

    Attribute clsAttribute = null;
    int nbClass = 1;
    if (ds.classIndex() != -1) {
        clsAttribute = ds.classAttribute();
        nbClass = clsAttribute.numValues();
    }

    final List<XYSeries> lseries = new ArrayList<XYSeries>();
    if (nbClass <= 1) {
        lseries.add(new XYSeries("Serie #1", false));
    } else {
        for (int i = 0; i < nbClass; i++) {
            lseries.add(new XYSeries(clsAttribute.value(i), false));
        }
    }
    dataset.removeAllSeries();

    /**
     * Initialize filtered series
     */
    final List<Instances> filteredInstances = new ArrayList<Instances>();
    for (int i = 0; i < lseries.size(); i++) {
        filteredInstances.add(new Instances(ds, 0));
    }

    for (int i = 0; i < ds.numInstances(); i++) {
        final Instance oInst = ds.instance(i);
        int indexOfSerie = 0;
        if (oInst.classIndex() != -1) {
            indexOfSerie = (int) oInst.value(oInst.classAttribute());
        }
        lseries.get(indexOfSerie).add(coordinates[i][0], coordinates[i][1]);
        filteredInstances.get(indexOfSerie).add(oInst);
    }

    final List<Paint> colors = new ArrayList<Paint>();

    for (final XYSeries series : lseries) {
        dataset.addSeries(series);
    }

    final XYToolTipGenerator gen = new XYToolTipGenerator() {
        @Override
        public String generateToolTip(XYDataset dataset, int series, int item) {
            return InstanceFormatter.htmlFormat(filteredInstances.get(series).instance(item), true);
        }
    };

    final Shape shape = new Ellipse2D.Float(0f, 0f, 5f, 5f);

    ((XYLineAndShapeRenderer) xyPlot.getRenderer()).setUseOutlinePaint(true);

    for (int p = 0; p < nbClass; p++) {
        xyPlot.getRenderer().setSeriesToolTipGenerator(p, gen);
        ((XYLineAndShapeRenderer) xyPlot.getRenderer()).setLegendShape(p, shape);
        xyPlot.getRenderer().setSeriesOutlinePaint(p, Color.BLACK);
    }

    for (int ii = 0; ii < nbClass; ii++) {
        colors.add(xyPlot.getRenderer().getItemPaint(ii, 0));
    }

    final ChartPanel chartPanel = new ChartPanel(chart);
    chartPanel.setMouseWheelEnabled(true);
    chartPanel.setPreferredSize(new Dimension(1200, 900));
    chartPanel.setBorder(new TitledBorder("MDS Projection"));
    chartPanel.setBackground(Color.WHITE);

    final JXPanel allPanel = new JXPanel();
    allPanel.setLayout(new BorderLayout());
    allPanel.add(chartPanel, BorderLayout.CENTER);

    return allPanel;
}

From source file:LVCoref.WekaWrapper.java

License:Open Source License

public static void main(String[] args) {
    try {//from w w  w.j a v  a2  s .  c  o m
        List<Document> docs = new LinkedList<Document>();
        Document d = new Document();
        d.readCONLL("data/pipeline/interview_16.lvsem.conll");
        d.addAnnotationMMAX("data/interview_16_coref_level.xml");
        d.useGoldMentions();
        docs.add(d);
        d = new Document();
        d.readCONLL("data/pipeline/interview_23.lvsem.conll");
        d.addAnnotationMMAX("data/interview_23_coref_level.xml");
        d.useGoldMentions();
        docs.add(d);
        d = new Document();
        d.readCONLL("data/pipeline/interview_27.lvsem.conll");
        d.addAnnotationMMAX("data/interview_27_coref_level.xml");
        d.useGoldMentions();
        docs.add(d);
        d = new Document();
        d.readCONLL("data/pipeline/interview_38.lvsem.conll");
        d.addAnnotationMMAX("data/interview_38_coref_level.xml");
        d.useGoldMentions();
        docs.add(d);

        Instances train = toArff2(docs);
        train.setClassIndex(train.numAttributes() - 1);
        String[] options = { "-U" };//, "-C", "0.5"};
        Classifier cls = new J48();
        cls.setOptions(options);
        cls.buildClassifier(train);

        docs = new LinkedList<Document>();
        d = new Document();
        d.readCONLL("data/pipeline/interview_43.lvsem.conll");
        d.addAnnotationMMAX("data/interview_43_coref_level.xml");
        d.useGoldMentions();
        docs.add(d);
        d = new Document();
        d.readCONLL("data/pipeline/interview_46.lvsem.conll");
        d.addAnnotationMMAX("data/interview_46_coref_level.xml");
        d.useGoldMentions();
        docs.add(d);

        Evaluation eval = new Evaluation(train);

        Instances data = toArff2(docs);
        data.setClassIndex(data.numAttributes() - 1);
        for (int i = 0; i < data.numInstances(); i++) {
            double clsLabel = cls.classifyInstance(data.instance(i));
            //System.out.println(clsLabel);
            data.instance(i).setClassValue(clsLabel);
            System.out.println(data.instance(i).toString(data.classIndex()));
        }

        //     eval.crossValidateModel(cls, train, 10, new Random(1));
        //            // generate curve
        //     ThresholdCurve tc = new ThresholdCurve();
        //     //int classIndex = test.numAttributes()-1;
        //     Instances result = tc.getCurve(eval.predictions());//, classIndex);
        // 
        //     // plot curve
        //     ThresholdVisualizePanel vmc = new ThresholdVisualizePanel();
        //     vmc.setROCString("(Area under ROC = " + 
        //         weka.core.Utils.doubleToString(tc.getROCArea(result), 4) + ")");
        //     vmc.setName(result.relationName());
        //     PlotData2D tempd = new PlotData2D(result);
        //     tempd.setPlotName(result.relationName());
        //     tempd.addInstanceNumberAttribute();
        //     // specify which points are connected
        //     boolean[] cp = new boolean[result.numInstances()];
        //     for (int n = 1; n < cp.length; n++)
        //       cp[n] = true;
        //     tempd.setConnectPoints(cp);
        //     // add plot
        //     vmc.addPlot(tempd);
        // 
        //     // display curve
        //     String plotName = vmc.getName(); 
        //     final javax.swing.JFrame jf = 
        //       new javax.swing.JFrame("Weka Classifier Visualize: "+plotName);
        //     jf.setSize(500,400);
        //     jf.getContentPane().setLayout(new BorderLayout());
        //     jf.getContentPane().add(vmc, BorderLayout.CENTER);
        //     jf.addWindowListener(new java.awt.event.WindowAdapter() {
        //       public void windowClosing(java.awt.event.WindowEvent e) {
        //       jf.dispose();
        //       }
        //     });
        //     jf.setVisible(true);

        //            Instances test = toArff2(docs);
        //            test.setClassIndex(test.numAttributes()-1);
        //            
        //            
        //           Evaluation evals = new Evaluation(train); 
        //
        //            evals.evaluateModel(cls, test);
        //            System.out.println(evals.toSummaryString("\nResults\n======\n", false));
        //             System.out.println(evals.toMatrixString());
        //              System.out.println(evals.toClassDetailsString());
        //            
        //            System.out.println(cls);
        //            //System.out.println(toArff2(docs));

    } catch (Exception ex) {
        Logger.getLogger(WekaWrapper.class.getName()).log(Level.SEVERE, null, ex);
    }

}

From source file:machinelearningproject.RFTree.java

@Override
public Tree buildTree(Instances instances) throws Exception {
    Tree tree = new Tree();
    ArrayList<String> availableAttributes = new ArrayList();
    int largestInfoGainAttrIdx = -1;
    double largestInfoGainAttrValue = 0.0;

    //choose random fraction
    int numAttr = instances.numAttributes();
    int k = (int) round(sqrt(numAttr));
    ArrayList<Integer> randomIdx = randomFraction(numAttr);

    for (int idx = 0; idx < k; idx++) {
        if (idx != instances.classIndex()) {
            availableAttributes.add(instances.attribute(idx).name());
        }/*w w  w  .j ava 2 s.com*/
    }

    if (instances.numInstances() == 0) {
        return null;
    } else if (calculateClassEntropy(instances) == 0.0) {
        // all examples have the sama classification
        tree.attributeName = instances.get(0).stringValue(instances.classIndex());
    } else if (availableAttributes.isEmpty()) {
        // mode classification
        tree.attributeName = getModeClass(instances, instances.classIndex());
    } else {
        for (int idx = 0; idx < instances.numAttributes(); idx++) {
            if (idx != instances.classIndex()) {
                double attrInfoGain = calculateInformationGain(instances, idx, instances.classIndex());
                if (largestInfoGainAttrValue < attrInfoGain) {
                    largestInfoGainAttrIdx = idx;
                    largestInfoGainAttrValue = attrInfoGain;
                }
            }
        }

        if (largestInfoGainAttrIdx != -1) {
            tree.attributeName = instances.attribute(largestInfoGainAttrIdx).name();
            ArrayList<String> attrValues = new ArrayList();
            for (int i = 0; i < instances.numInstances(); i++) {
                Instance instance = instances.get(i);
                String attrValue = instance.stringValue(largestInfoGainAttrIdx);
                if (attrValues.isEmpty() || !attrValues.contains(attrValue)) {
                    attrValues.add(attrValue);
                }
            }

            for (String attrValue : attrValues) {
                Node node = new Node(attrValue);
                Instances copyInstances = new Instances(instances);
                copyInstances.setClassIndex(instances.classIndex());
                int i = 0;
                while (i < copyInstances.numInstances()) {
                    Instance instance = copyInstances.get(i);
                    // reducing examples
                    if (!instance.stringValue(largestInfoGainAttrIdx).equals(attrValue)) {
                        copyInstances.delete(i);
                        i--;
                    }
                    i++;
                }
                copyInstances.deleteAttributeAt(largestInfoGainAttrIdx);
                node.subTree = buildTree(copyInstances);
                tree.nodes.add(node);
            }
        }
    }

    return tree;
}

From source file:machinelearningproject.Tree.java

public Tree buildTree(Instances instances) throws Exception {
    Tree tree = new Tree();
    ArrayList<String> availableAttributes = new ArrayList();

    int largestInfoGainAttrIdx = -1;
    double largestInfoGainAttrValue = 0.0;

    for (int idx = 0; idx < instances.numAttributes(); idx++) {
        if (idx != instances.classIndex()) {
            availableAttributes.add(instances.attribute(idx).name());
        }/*from w ww  . j  a  v  a2s.c  o  m*/
    }

    if (instances.numInstances() == 0) {
        return null;
    } else if (calculateClassEntropy(instances) == 0.0) {
        // all examples have the sama classification
        tree.attributeName = instances.get(0).stringValue(instances.classIndex());
    } else if (availableAttributes.isEmpty()) {
        // mode classification
        tree.attributeName = getModeClass(instances, instances.classIndex());
    } else {
        for (int idx = 0; idx < instances.numAttributes(); idx++) {
            if (idx != instances.classIndex()) {
                double attrInfoGain = calculateInformationGain(instances, idx, instances.classIndex());
                if (largestInfoGainAttrValue < attrInfoGain) {
                    largestInfoGainAttrIdx = idx;
                    largestInfoGainAttrValue = attrInfoGain;
                }
            }
        }

        if (largestInfoGainAttrIdx != -1) {
            tree.attributeName = instances.attribute(largestInfoGainAttrIdx).name();
            ArrayList<String> attrValues = new ArrayList();
            for (int i = 0; i < instances.numInstances(); i++) {
                Instance instance = instances.get(i);
                String attrValue = instance.stringValue(largestInfoGainAttrIdx);
                if (attrValues.isEmpty() || !attrValues.contains(attrValue)) {
                    attrValues.add(attrValue);
                }
            }

            for (String attrValue : attrValues) {
                Node node = new Node(attrValue);
                Instances copyInstances = new Instances(instances);
                copyInstances.setClassIndex(instances.classIndex());
                int i = 0;
                while (i < copyInstances.numInstances()) {
                    Instance instance = copyInstances.get(i);
                    // reducing examples
                    if (!instance.stringValue(largestInfoGainAttrIdx).equals(attrValue)) {
                        copyInstances.delete(i);
                        i--;
                    }
                    i++;
                }
                copyInstances.deleteAttributeAt(largestInfoGainAttrIdx);
                node.subTree = buildTree(copyInstances);
                tree.nodes.add(node);
            }
        }
    }

    return tree;
}

From source file:machine_learing_clasifier.MyC45.java

public void makeTree(Instances data) throws Exception {
    if (data.numInstances() == 0) {
        return;/*w  w w  .  java  2s  .  com*/
    }

    double[] infoGains = new double[data.numAttributes()];
    for (int i = 0; i < data.numAttributes(); i++) {
        Attribute att = data.attribute(i);
        if (data.classIndex() != att.index()) {
            if (att.isNominal()) {
                infoGains[att.index()] = computeInformationGain(data, att);
            } else {
                infoGains[att.index()] = computeInformationGainContinous(data, att,
                        BestContinousAttribute(data, att));
            }
        }
    }

    m_Attribute = data.attribute(Utils.maxIndex(infoGains));
    if (m_Attribute.isNumeric()) {
        numericAttThreshold = BestContinousAttribute(data, m_Attribute);
        System.out.println(" ini kalo continous dengan attribut : " + numericAttThreshold);
    }
    System.out.println("huhu = " + m_Attribute.toString());

    if (Utils.eq(infoGains[m_Attribute.index()], 0)) {
        m_Attribute = null;
        m_Distribution = new double[data.numClasses()];
        for (int i = 0; i < data.numInstances(); i++) {
            int inst = (int) data.instance(i).value(data.classAttribute());
            m_Distribution[inst]++;
        }
        Utils.normalize(m_Distribution);
        m_ClassValue = Utils.maxIndex(m_Distribution);
        m_ClassAttribute = data.classAttribute();
    } else {
        Instances[] splitData;
        if (m_Attribute.isNominal()) {
            splitData = splitData(data, m_Attribute);
        } else {
            splitData = splitDataContinous(data, m_Attribute, numericAttThreshold);
        }

        if (m_Attribute.isNominal()) {
            System.out.println("nominal");
            m_Successors = new MyC45[m_Attribute.numValues()];
            System.out.println(m_Successors.length);
            for (int j = 0; j < m_Attribute.numValues(); j++) {
                m_Successors[j] = new MyC45(head, this);
                m_Successors[j].buildClassifier(splitData[j]);
            }
        } else {
            System.out.println("numeric");
            m_Successors = new MyC45[2];
            System.out.println(m_Successors.length);
            for (int j = 0; j < 2; j++) {
                m_Successors[j] = new MyC45(head, this);
                m_Successors[j].buildClassifier(splitData[j]);
            }
        }
    }
}

From source file:machine_learing_clasifier.MyID3.java

public void makeTree(Instances data) throws Exception {
    if (data.numInstances() == 0) {
        return;//w ww.ja va  2  s.  co  m
    }

    double[] infoGains = new double[data.numAttributes()];
    for (int i = 0; i < data.numAttributes(); i++) {
        Attribute att = data.attribute(i);
        if (data.classIndex() != att.index()) {
            infoGains[att.index()] = computeInformationGain(data, att);
        }
    }

    m_Attribute = data.attribute(Utils.maxIndex(infoGains));
    //System.out.println("huhu = " + m_Attribute.toString());

    if (Utils.eq(infoGains[m_Attribute.index()], 0)) {
        m_Attribute = null;
        m_Distribution = new double[data.numClasses()];
        for (int i = 0; i < data.numInstances(); i++) {
            int inst = (int) data.instance(i).value(data.classAttribute());
            m_Distribution[inst]++;
        }
        Utils.normalize(m_Distribution);
        m_ClassValue = Utils.maxIndex(m_Distribution);
        m_ClassAttribute = data.classAttribute();
    } else {
        Instances[] splitData = splitData(data, m_Attribute);
        m_Successors = new MyID3[m_Attribute.numValues()];
        for (int j = 0; j < m_Attribute.numValues(); j++) {
            m_Successors[j] = new MyID3();
            m_Successors[j].buildClassifier(splitData[j]);
        }
    }
}

From source file:mao.datamining.RemoveUselessColumnsByMissingValues.java

License:Open Source License

/**
 * Signify that this batch of input to the filter is finished.
 *
 * @return true if there are instances pending output
 * @throws Exception if no input format defined
 *//*from w  w  w.  j av a 2s . c  o  m*/
public boolean batchFinished() throws Exception {

    if (getInputFormat() == null) {
        throw new IllegalStateException("No input instance format defined");
    }
    if (m_removeFilter == null) {

        // establish attributes to remove from first batch

        Instances toFilter = getInputFormat();
        int[] attsToDelete = new int[toFilter.numAttributes()];
        int numToDelete = 0;
        for (int i = 0; i < toFilter.numAttributes(); i++) {
            if (i == toFilter.classIndex())
                continue; // skip class
            AttributeStats stats = toFilter.attributeStats(i);

            //remove those attributes who has high ratio of missing values
            if ((stats.missingCount * 100) / stats.totalCount > m_maxMissingPercentage) {
                //            System.out.println("stats.missingPercentage: " + (stats.missingCount*100)/stats.totalCount+"%");            
                attsToDelete[numToDelete++] = i;
            }
            //remove those columns defined in the list by manual check
            if (this.column2DeleteSet.contains(toFilter.attribute(i).name())) {
                attsToDelete[numToDelete++] = i;
            }
        }

        int[] finalAttsToDelete = new int[numToDelete];
        System.arraycopy(attsToDelete, 0, finalAttsToDelete, 0, numToDelete);

        m_removeFilter = new Remove();
        m_removeFilter.setAttributeIndicesArray(finalAttsToDelete);
        m_removeFilter.setInvertSelection(false);
        m_removeFilter.setInputFormat(toFilter);

        for (int i = 0; i < toFilter.numInstances(); i++) {
            m_removeFilter.input(toFilter.instance(i));
        }
        m_removeFilter.batchFinished();

        Instance processed;
        Instances outputDataset = m_removeFilter.getOutputFormat();

        // restore old relation name to hide attribute filter stamp
        outputDataset.setRelationName(toFilter.relationName());

        setOutputFormat(outputDataset);
        while ((processed = m_removeFilter.output()) != null) {
            processed.setDataset(outputDataset);
            push(processed);
        }
    }
    flushInput();

    m_NewBatch = true;
    return (numPendingOutput() != 0);
}

From source file:meka.classifiers.multilabel.AbstractMultiLabelClassifier.java

License:Open Source License

/**
 * TestCapabilities./* w w  w. j  a  v a2 s  . com*/
 * Make sure the training data is suitable.
 * @param D   the data
 */
public void testCapabilities(Instances D) throws Exception {
    // get the classifier's capabilities, enable all class attributes and do the usual test
    Capabilities cap = getCapabilities();
    cap.enableAllClasses();
    //getCapabilities().testWithFail(D);
    // get the capabilities again, test class attributes individually
    int L = D.classIndex();
    for (int j = 0; j < L; j++) {
        Attribute c = D.attribute(j);
        cap.testWithFail(c, true);
    }
}

From source file:meka.classifiers.multilabel.BCC.java

License:Open Source License

@Override
public void buildClassifier(Instances D) throws Exception {
    testCapabilities(D);// www  .  j av  a2 s . c  o m

    m_R = new Random(getSeed());
    int L = D.classIndex();
    int d = D.numAttributes() - L;

    /*
     * Measure [un]conditional label dependencies (frequencies).
     */
    if (getDebug())
        System.out.println("Get unconditional dependencies ...");
    double CD[][] = null;
    if (m_DependencyType.equals("L")) {
        // New Option
        if (getDebug())
            System.out.println("The 'LEAD' method for finding conditional dependence.");
        CD = StatUtils.LEAD(D, getClassifier(), m_R);
    } else {
        // Old/default Option
        if (getDebug())
            System.out.println("The Frequency method for finding marginal dependence.");
        CD = StatUtils.margDepMatrix(D, m_DependencyType);
    }

    if (getDebug())
        System.out.println(MatrixUtils.toString(CD));

    /*
     * Make a fully connected graph, each edge represents the
     * dependence measured between the pair of labels.
     */
    CD = MatrixUtils.multiply(CD, -1); // because we want a *maximum* spanning tree
    if (getDebug())
        System.out.println("Make a graph ...");
    EdgeWeightedGraph G = new EdgeWeightedGraph((int) L);
    for (int i = 0; i < L; i++) {
        for (int j = i + 1; j < L; j++) {
            Edge e = new Edge(i, j, CD[i][j]);
            G.addEdge(e);
        }
    }

    /*
     * Run an off-the-shelf MST algorithm to get a MST.
     */
    if (getDebug())
        System.out.println("Get an MST ...");
    KruskalMST mst = new KruskalMST(G);

    /*
     * Define graph connections based on the MST.
     */
    int paM[][] = new int[L][L];
    for (Edge e : mst.edges()) {
        int j = e.either();
        int k = e.other(j);
        paM[j][k] = 1;
        paM[k][j] = 1;
        //StdOut.println(e);
    }
    if (getDebug())
        System.out.println(MatrixUtils.toString(paM));

    /*
     *  Turn the DAG into a Tree with the m_Seed-th node as root
     */
    int root = getSeed();
    if (getDebug())
        System.out.println("Make a Tree from Root " + root);
    int paL[][] = new int[L][0];
    int visted[] = new int[L];
    Arrays.fill(visted, -1);
    visted[root] = 0;
    treeify(root, paM, paL, visted);
    if (getDebug()) {
        for (int i = 0; i < L; i++) {
            System.out.println("pa_" + i + " = " + Arrays.toString(paL[i]));
        }
    }
    m_Chain = Utils.sort(visted);
    if (getDebug())
        System.out.println("sequence: " + Arrays.toString(m_Chain));
    /*
    * Bulid a classifier 'tree' based on the Tree
    */
    if (getDebug())
        System.out.println("Build Classifier Tree ...");
    nodes = new CNode[L];
    for (int j : m_Chain) {
        if (getDebug())
            System.out.println("\t node h_" + j + " : P(y_" + j + " | x_[1:" + d + "], y_"
                    + Arrays.toString(paL[j]) + ")");
        nodes[j] = new CNode(j, null, paL[j]);
        nodes[j].build(D, m_Classifier);
    }

    if (getDebug())
        System.out.println(" * DONE * ");

    /* 
    * Notes ...
       paL[j] = new int[]{};            // <-- BR !!
       paL[j] = MLUtils.gen_indices(j); // <-- CC !!
    */
}

From source file:meka.classifiers.multilabel.BPNN.java

License:Open Source License

@Override
public void buildClassifier(Instances D) throws Exception {
    testCapabilities(D);/*from www .  j  a  va  2s  . c om*/

    double X_[][] = MLUtils.getXfromD(D);
    double Y_[][] = MLUtils.getYfromD(D);
    r = new Random(m_Seed);

    if (this.W == null) {
        if (getDebug())
            System.out.println("initialize weights ...");
        int h[] = new int[] { m_H }; // TODO: parameterize this
        int d = X_[0].length;
        int L = D.classIndex();
        initWeights(d, L, h);
    }
    // else ... probably pre-initialized, continue ...
    else if (getDebug())
        System.out.println("weights already preset, continue ...");

    train(X_, Y_, m_E);
}