List of usage examples for weka.core Instances classIndex
publicint classIndex()
From source file:lu.lippmann.cdb.lab.mds.UniversalMDS.java
License:Open Source License
public JXPanel buildMDSViewFromDataSet(Instances ds, MDSTypeEnum type) throws Exception { final XYSeriesCollection dataset = new XYSeriesCollection(); final JFreeChart chart = ChartFactory.createScatterPlot("", // title "X", "Y", // axis labels dataset, // dataset PlotOrientation.VERTICAL, true, // legend? yes true, // tooltips? yes false // URLs? no );/*from w ww . j ava 2s . c o m*/ final XYPlot xyPlot = (XYPlot) chart.getPlot(); chart.setTitle(type.name() + " MDS"); Attribute clsAttribute = null; int nbClass = 1; if (ds.classIndex() != -1) { clsAttribute = ds.classAttribute(); nbClass = clsAttribute.numValues(); } final List<XYSeries> lseries = new ArrayList<XYSeries>(); if (nbClass <= 1) { lseries.add(new XYSeries("Serie #1", false)); } else { for (int i = 0; i < nbClass; i++) { lseries.add(new XYSeries(clsAttribute.value(i), false)); } } dataset.removeAllSeries(); /** * Initialize filtered series */ final List<Instances> filteredInstances = new ArrayList<Instances>(); for (int i = 0; i < lseries.size(); i++) { filteredInstances.add(new Instances(ds, 0)); } for (int i = 0; i < ds.numInstances(); i++) { final Instance oInst = ds.instance(i); int indexOfSerie = 0; if (oInst.classIndex() != -1) { indexOfSerie = (int) oInst.value(oInst.classAttribute()); } lseries.get(indexOfSerie).add(coordinates[i][0], coordinates[i][1]); filteredInstances.get(indexOfSerie).add(oInst); } final List<Paint> colors = new ArrayList<Paint>(); for (final XYSeries series : lseries) { dataset.addSeries(series); } final XYToolTipGenerator gen = new XYToolTipGenerator() { @Override public String generateToolTip(XYDataset dataset, int series, int item) { return InstanceFormatter.htmlFormat(filteredInstances.get(series).instance(item), true); } }; final Shape shape = new Ellipse2D.Float(0f, 0f, 5f, 5f); ((XYLineAndShapeRenderer) xyPlot.getRenderer()).setUseOutlinePaint(true); for (int p = 0; p < nbClass; p++) { xyPlot.getRenderer().setSeriesToolTipGenerator(p, gen); ((XYLineAndShapeRenderer) xyPlot.getRenderer()).setLegendShape(p, shape); xyPlot.getRenderer().setSeriesOutlinePaint(p, Color.BLACK); } for (int ii = 0; ii < nbClass; ii++) { colors.add(xyPlot.getRenderer().getItemPaint(ii, 0)); } final ChartPanel chartPanel = new ChartPanel(chart); chartPanel.setMouseWheelEnabled(true); chartPanel.setPreferredSize(new Dimension(1200, 900)); chartPanel.setBorder(new TitledBorder("MDS Projection")); chartPanel.setBackground(Color.WHITE); final JXPanel allPanel = new JXPanel(); allPanel.setLayout(new BorderLayout()); allPanel.add(chartPanel, BorderLayout.CENTER); return allPanel; }
From source file:LVCoref.WekaWrapper.java
License:Open Source License
public static void main(String[] args) { try {//from w w w.j a v a2 s . c o m List<Document> docs = new LinkedList<Document>(); Document d = new Document(); d.readCONLL("data/pipeline/interview_16.lvsem.conll"); d.addAnnotationMMAX("data/interview_16_coref_level.xml"); d.useGoldMentions(); docs.add(d); d = new Document(); d.readCONLL("data/pipeline/interview_23.lvsem.conll"); d.addAnnotationMMAX("data/interview_23_coref_level.xml"); d.useGoldMentions(); docs.add(d); d = new Document(); d.readCONLL("data/pipeline/interview_27.lvsem.conll"); d.addAnnotationMMAX("data/interview_27_coref_level.xml"); d.useGoldMentions(); docs.add(d); d = new Document(); d.readCONLL("data/pipeline/interview_38.lvsem.conll"); d.addAnnotationMMAX("data/interview_38_coref_level.xml"); d.useGoldMentions(); docs.add(d); Instances train = toArff2(docs); train.setClassIndex(train.numAttributes() - 1); String[] options = { "-U" };//, "-C", "0.5"}; Classifier cls = new J48(); cls.setOptions(options); cls.buildClassifier(train); docs = new LinkedList<Document>(); d = new Document(); d.readCONLL("data/pipeline/interview_43.lvsem.conll"); d.addAnnotationMMAX("data/interview_43_coref_level.xml"); d.useGoldMentions(); docs.add(d); d = new Document(); d.readCONLL("data/pipeline/interview_46.lvsem.conll"); d.addAnnotationMMAX("data/interview_46_coref_level.xml"); d.useGoldMentions(); docs.add(d); Evaluation eval = new Evaluation(train); Instances data = toArff2(docs); data.setClassIndex(data.numAttributes() - 1); for (int i = 0; i < data.numInstances(); i++) { double clsLabel = cls.classifyInstance(data.instance(i)); //System.out.println(clsLabel); data.instance(i).setClassValue(clsLabel); System.out.println(data.instance(i).toString(data.classIndex())); } // eval.crossValidateModel(cls, train, 10, new Random(1)); // // generate curve // ThresholdCurve tc = new ThresholdCurve(); // //int classIndex = test.numAttributes()-1; // Instances result = tc.getCurve(eval.predictions());//, classIndex); // // // plot curve // ThresholdVisualizePanel vmc = new ThresholdVisualizePanel(); // vmc.setROCString("(Area under ROC = " + // weka.core.Utils.doubleToString(tc.getROCArea(result), 4) + ")"); // vmc.setName(result.relationName()); // PlotData2D tempd = new PlotData2D(result); // tempd.setPlotName(result.relationName()); // tempd.addInstanceNumberAttribute(); // // specify which points are connected // boolean[] cp = new boolean[result.numInstances()]; // for (int n = 1; n < cp.length; n++) // cp[n] = true; // tempd.setConnectPoints(cp); // // add plot // vmc.addPlot(tempd); // // // display curve // String plotName = vmc.getName(); // final javax.swing.JFrame jf = // new javax.swing.JFrame("Weka Classifier Visualize: "+plotName); // jf.setSize(500,400); // jf.getContentPane().setLayout(new BorderLayout()); // jf.getContentPane().add(vmc, BorderLayout.CENTER); // jf.addWindowListener(new java.awt.event.WindowAdapter() { // public void windowClosing(java.awt.event.WindowEvent e) { // jf.dispose(); // } // }); // jf.setVisible(true); // Instances test = toArff2(docs); // test.setClassIndex(test.numAttributes()-1); // // // Evaluation evals = new Evaluation(train); // // evals.evaluateModel(cls, test); // System.out.println(evals.toSummaryString("\nResults\n======\n", false)); // System.out.println(evals.toMatrixString()); // System.out.println(evals.toClassDetailsString()); // // System.out.println(cls); // //System.out.println(toArff2(docs)); } catch (Exception ex) { Logger.getLogger(WekaWrapper.class.getName()).log(Level.SEVERE, null, ex); } }
From source file:machinelearningproject.RFTree.java
@Override public Tree buildTree(Instances instances) throws Exception { Tree tree = new Tree(); ArrayList<String> availableAttributes = new ArrayList(); int largestInfoGainAttrIdx = -1; double largestInfoGainAttrValue = 0.0; //choose random fraction int numAttr = instances.numAttributes(); int k = (int) round(sqrt(numAttr)); ArrayList<Integer> randomIdx = randomFraction(numAttr); for (int idx = 0; idx < k; idx++) { if (idx != instances.classIndex()) { availableAttributes.add(instances.attribute(idx).name()); }/*w w w .j ava 2 s.com*/ } if (instances.numInstances() == 0) { return null; } else if (calculateClassEntropy(instances) == 0.0) { // all examples have the sama classification tree.attributeName = instances.get(0).stringValue(instances.classIndex()); } else if (availableAttributes.isEmpty()) { // mode classification tree.attributeName = getModeClass(instances, instances.classIndex()); } else { for (int idx = 0; idx < instances.numAttributes(); idx++) { if (idx != instances.classIndex()) { double attrInfoGain = calculateInformationGain(instances, idx, instances.classIndex()); if (largestInfoGainAttrValue < attrInfoGain) { largestInfoGainAttrIdx = idx; largestInfoGainAttrValue = attrInfoGain; } } } if (largestInfoGainAttrIdx != -1) { tree.attributeName = instances.attribute(largestInfoGainAttrIdx).name(); ArrayList<String> attrValues = new ArrayList(); for (int i = 0; i < instances.numInstances(); i++) { Instance instance = instances.get(i); String attrValue = instance.stringValue(largestInfoGainAttrIdx); if (attrValues.isEmpty() || !attrValues.contains(attrValue)) { attrValues.add(attrValue); } } for (String attrValue : attrValues) { Node node = new Node(attrValue); Instances copyInstances = new Instances(instances); copyInstances.setClassIndex(instances.classIndex()); int i = 0; while (i < copyInstances.numInstances()) { Instance instance = copyInstances.get(i); // reducing examples if (!instance.stringValue(largestInfoGainAttrIdx).equals(attrValue)) { copyInstances.delete(i); i--; } i++; } copyInstances.deleteAttributeAt(largestInfoGainAttrIdx); node.subTree = buildTree(copyInstances); tree.nodes.add(node); } } } return tree; }
From source file:machinelearningproject.Tree.java
public Tree buildTree(Instances instances) throws Exception { Tree tree = new Tree(); ArrayList<String> availableAttributes = new ArrayList(); int largestInfoGainAttrIdx = -1; double largestInfoGainAttrValue = 0.0; for (int idx = 0; idx < instances.numAttributes(); idx++) { if (idx != instances.classIndex()) { availableAttributes.add(instances.attribute(idx).name()); }/*from w ww . j a v a2s.c o m*/ } if (instances.numInstances() == 0) { return null; } else if (calculateClassEntropy(instances) == 0.0) { // all examples have the sama classification tree.attributeName = instances.get(0).stringValue(instances.classIndex()); } else if (availableAttributes.isEmpty()) { // mode classification tree.attributeName = getModeClass(instances, instances.classIndex()); } else { for (int idx = 0; idx < instances.numAttributes(); idx++) { if (idx != instances.classIndex()) { double attrInfoGain = calculateInformationGain(instances, idx, instances.classIndex()); if (largestInfoGainAttrValue < attrInfoGain) { largestInfoGainAttrIdx = idx; largestInfoGainAttrValue = attrInfoGain; } } } if (largestInfoGainAttrIdx != -1) { tree.attributeName = instances.attribute(largestInfoGainAttrIdx).name(); ArrayList<String> attrValues = new ArrayList(); for (int i = 0; i < instances.numInstances(); i++) { Instance instance = instances.get(i); String attrValue = instance.stringValue(largestInfoGainAttrIdx); if (attrValues.isEmpty() || !attrValues.contains(attrValue)) { attrValues.add(attrValue); } } for (String attrValue : attrValues) { Node node = new Node(attrValue); Instances copyInstances = new Instances(instances); copyInstances.setClassIndex(instances.classIndex()); int i = 0; while (i < copyInstances.numInstances()) { Instance instance = copyInstances.get(i); // reducing examples if (!instance.stringValue(largestInfoGainAttrIdx).equals(attrValue)) { copyInstances.delete(i); i--; } i++; } copyInstances.deleteAttributeAt(largestInfoGainAttrIdx); node.subTree = buildTree(copyInstances); tree.nodes.add(node); } } } return tree; }
From source file:machine_learing_clasifier.MyC45.java
public void makeTree(Instances data) throws Exception { if (data.numInstances() == 0) { return;/*w w w . java 2s . com*/ } double[] infoGains = new double[data.numAttributes()]; for (int i = 0; i < data.numAttributes(); i++) { Attribute att = data.attribute(i); if (data.classIndex() != att.index()) { if (att.isNominal()) { infoGains[att.index()] = computeInformationGain(data, att); } else { infoGains[att.index()] = computeInformationGainContinous(data, att, BestContinousAttribute(data, att)); } } } m_Attribute = data.attribute(Utils.maxIndex(infoGains)); if (m_Attribute.isNumeric()) { numericAttThreshold = BestContinousAttribute(data, m_Attribute); System.out.println(" ini kalo continous dengan attribut : " + numericAttThreshold); } System.out.println("huhu = " + m_Attribute.toString()); if (Utils.eq(infoGains[m_Attribute.index()], 0)) { m_Attribute = null; m_Distribution = new double[data.numClasses()]; for (int i = 0; i < data.numInstances(); i++) { int inst = (int) data.instance(i).value(data.classAttribute()); m_Distribution[inst]++; } Utils.normalize(m_Distribution); m_ClassValue = Utils.maxIndex(m_Distribution); m_ClassAttribute = data.classAttribute(); } else { Instances[] splitData; if (m_Attribute.isNominal()) { splitData = splitData(data, m_Attribute); } else { splitData = splitDataContinous(data, m_Attribute, numericAttThreshold); } if (m_Attribute.isNominal()) { System.out.println("nominal"); m_Successors = new MyC45[m_Attribute.numValues()]; System.out.println(m_Successors.length); for (int j = 0; j < m_Attribute.numValues(); j++) { m_Successors[j] = new MyC45(head, this); m_Successors[j].buildClassifier(splitData[j]); } } else { System.out.println("numeric"); m_Successors = new MyC45[2]; System.out.println(m_Successors.length); for (int j = 0; j < 2; j++) { m_Successors[j] = new MyC45(head, this); m_Successors[j].buildClassifier(splitData[j]); } } } }
From source file:machine_learing_clasifier.MyID3.java
public void makeTree(Instances data) throws Exception { if (data.numInstances() == 0) { return;//w ww.ja va 2 s. co m } double[] infoGains = new double[data.numAttributes()]; for (int i = 0; i < data.numAttributes(); i++) { Attribute att = data.attribute(i); if (data.classIndex() != att.index()) { infoGains[att.index()] = computeInformationGain(data, att); } } m_Attribute = data.attribute(Utils.maxIndex(infoGains)); //System.out.println("huhu = " + m_Attribute.toString()); if (Utils.eq(infoGains[m_Attribute.index()], 0)) { m_Attribute = null; m_Distribution = new double[data.numClasses()]; for (int i = 0; i < data.numInstances(); i++) { int inst = (int) data.instance(i).value(data.classAttribute()); m_Distribution[inst]++; } Utils.normalize(m_Distribution); m_ClassValue = Utils.maxIndex(m_Distribution); m_ClassAttribute = data.classAttribute(); } else { Instances[] splitData = splitData(data, m_Attribute); m_Successors = new MyID3[m_Attribute.numValues()]; for (int j = 0; j < m_Attribute.numValues(); j++) { m_Successors[j] = new MyID3(); m_Successors[j].buildClassifier(splitData[j]); } } }
From source file:mao.datamining.RemoveUselessColumnsByMissingValues.java
License:Open Source License
/** * Signify that this batch of input to the filter is finished. * * @return true if there are instances pending output * @throws Exception if no input format defined *//*from w w w. j av a 2s . c o m*/ public boolean batchFinished() throws Exception { if (getInputFormat() == null) { throw new IllegalStateException("No input instance format defined"); } if (m_removeFilter == null) { // establish attributes to remove from first batch Instances toFilter = getInputFormat(); int[] attsToDelete = new int[toFilter.numAttributes()]; int numToDelete = 0; for (int i = 0; i < toFilter.numAttributes(); i++) { if (i == toFilter.classIndex()) continue; // skip class AttributeStats stats = toFilter.attributeStats(i); //remove those attributes who has high ratio of missing values if ((stats.missingCount * 100) / stats.totalCount > m_maxMissingPercentage) { // System.out.println("stats.missingPercentage: " + (stats.missingCount*100)/stats.totalCount+"%"); attsToDelete[numToDelete++] = i; } //remove those columns defined in the list by manual check if (this.column2DeleteSet.contains(toFilter.attribute(i).name())) { attsToDelete[numToDelete++] = i; } } int[] finalAttsToDelete = new int[numToDelete]; System.arraycopy(attsToDelete, 0, finalAttsToDelete, 0, numToDelete); m_removeFilter = new Remove(); m_removeFilter.setAttributeIndicesArray(finalAttsToDelete); m_removeFilter.setInvertSelection(false); m_removeFilter.setInputFormat(toFilter); for (int i = 0; i < toFilter.numInstances(); i++) { m_removeFilter.input(toFilter.instance(i)); } m_removeFilter.batchFinished(); Instance processed; Instances outputDataset = m_removeFilter.getOutputFormat(); // restore old relation name to hide attribute filter stamp outputDataset.setRelationName(toFilter.relationName()); setOutputFormat(outputDataset); while ((processed = m_removeFilter.output()) != null) { processed.setDataset(outputDataset); push(processed); } } flushInput(); m_NewBatch = true; return (numPendingOutput() != 0); }
From source file:meka.classifiers.multilabel.AbstractMultiLabelClassifier.java
License:Open Source License
/** * TestCapabilities./* w w w. j a v a2 s . com*/ * Make sure the training data is suitable. * @param D the data */ public void testCapabilities(Instances D) throws Exception { // get the classifier's capabilities, enable all class attributes and do the usual test Capabilities cap = getCapabilities(); cap.enableAllClasses(); //getCapabilities().testWithFail(D); // get the capabilities again, test class attributes individually int L = D.classIndex(); for (int j = 0; j < L; j++) { Attribute c = D.attribute(j); cap.testWithFail(c, true); } }
From source file:meka.classifiers.multilabel.BCC.java
License:Open Source License
@Override public void buildClassifier(Instances D) throws Exception { testCapabilities(D);// www . j av a2 s . c o m m_R = new Random(getSeed()); int L = D.classIndex(); int d = D.numAttributes() - L; /* * Measure [un]conditional label dependencies (frequencies). */ if (getDebug()) System.out.println("Get unconditional dependencies ..."); double CD[][] = null; if (m_DependencyType.equals("L")) { // New Option if (getDebug()) System.out.println("The 'LEAD' method for finding conditional dependence."); CD = StatUtils.LEAD(D, getClassifier(), m_R); } else { // Old/default Option if (getDebug()) System.out.println("The Frequency method for finding marginal dependence."); CD = StatUtils.margDepMatrix(D, m_DependencyType); } if (getDebug()) System.out.println(MatrixUtils.toString(CD)); /* * Make a fully connected graph, each edge represents the * dependence measured between the pair of labels. */ CD = MatrixUtils.multiply(CD, -1); // because we want a *maximum* spanning tree if (getDebug()) System.out.println("Make a graph ..."); EdgeWeightedGraph G = new EdgeWeightedGraph((int) L); for (int i = 0; i < L; i++) { for (int j = i + 1; j < L; j++) { Edge e = new Edge(i, j, CD[i][j]); G.addEdge(e); } } /* * Run an off-the-shelf MST algorithm to get a MST. */ if (getDebug()) System.out.println("Get an MST ..."); KruskalMST mst = new KruskalMST(G); /* * Define graph connections based on the MST. */ int paM[][] = new int[L][L]; for (Edge e : mst.edges()) { int j = e.either(); int k = e.other(j); paM[j][k] = 1; paM[k][j] = 1; //StdOut.println(e); } if (getDebug()) System.out.println(MatrixUtils.toString(paM)); /* * Turn the DAG into a Tree with the m_Seed-th node as root */ int root = getSeed(); if (getDebug()) System.out.println("Make a Tree from Root " + root); int paL[][] = new int[L][0]; int visted[] = new int[L]; Arrays.fill(visted, -1); visted[root] = 0; treeify(root, paM, paL, visted); if (getDebug()) { for (int i = 0; i < L; i++) { System.out.println("pa_" + i + " = " + Arrays.toString(paL[i])); } } m_Chain = Utils.sort(visted); if (getDebug()) System.out.println("sequence: " + Arrays.toString(m_Chain)); /* * Bulid a classifier 'tree' based on the Tree */ if (getDebug()) System.out.println("Build Classifier Tree ..."); nodes = new CNode[L]; for (int j : m_Chain) { if (getDebug()) System.out.println("\t node h_" + j + " : P(y_" + j + " | x_[1:" + d + "], y_" + Arrays.toString(paL[j]) + ")"); nodes[j] = new CNode(j, null, paL[j]); nodes[j].build(D, m_Classifier); } if (getDebug()) System.out.println(" * DONE * "); /* * Notes ... paL[j] = new int[]{}; // <-- BR !! paL[j] = MLUtils.gen_indices(j); // <-- CC !! */ }
From source file:meka.classifiers.multilabel.BPNN.java
License:Open Source License
@Override public void buildClassifier(Instances D) throws Exception { testCapabilities(D);/*from www . j a va 2s . c om*/ double X_[][] = MLUtils.getXfromD(D); double Y_[][] = MLUtils.getYfromD(D); r = new Random(m_Seed); if (this.W == null) { if (getDebug()) System.out.println("initialize weights ..."); int h[] = new int[] { m_H }; // TODO: parameterize this int d = X_[0].length; int L = D.classIndex(); initWeights(d, L, h); } // else ... probably pre-initialized, continue ... else if (getDebug()) System.out.println("weights already preset, continue ..."); train(X_, Y_, m_E); }