List of usage examples for weka.core Instances deleteAttributeAt
public void deleteAttributeAt(int position)
From source file:lu.lippmann.cdb.datasetview.tabs.TableTabView.java
License:Open Source License
/** * Constructor./* w ww.j av a2 s . c om*/ */ public TableTabView(final EventPublisher eventPublisher) { super(); this.instanceTable = new JXTable(); this.instanceTable.setModel(new InstanceTableModel()); this.instanceTable.setEditable(true); this.instanceTable.setShowHorizontalLines(false); this.instanceTable.setShowVerticalLines(false); this.instanceTable.setVisibleRowCount(5); this.instanceTable.setAutoResizeMode(JTable.AUTO_RESIZE_OFF); //Render of numbers this.instanceTable.setDefaultRenderer(Number.class, new TableTabCellRenderer()); this.instanceTable.addMouseListener(new MouseAdapter() { public void mouseReleased(final MouseEvent e) { if (e.isPopupTrigger()) { final InstanceTableModel instanceTableModel = (InstanceTableModel) instanceTable.getModel(); final Instances dataSet = instanceTableModel.getDataSet(); final int row = instanceTable.rowAtPoint(e.getPoint()); final int column = instanceTable.columnAtPoint(e.getPoint()); final int modelColumn = instanceTable.convertColumnIndexToModel(column); final int modelRow = instanceTable.convertRowIndexToModel(row); final JPopupMenu jPopupMenu = new JPopupMenu("feur"); if (modelColumn > 0 && dataSet.classIndex() != modelColumn - 1) { final JMenuItem removeColumnMenuItem = new JMenuItem( "Remove this column ('" + instanceTableModel.getColumnName(modelColumn) + "')"); removeColumnMenuItem.addActionListener(new ActionListener() { @Override public void actionPerformed(final ActionEvent e) { final Instances newdataSet = new Instances(dataSet); newdataSet.deleteAttributeAt(modelColumn - 1); pushDataChange(new DataChange(newdataSet, TabView.DataChangeTypeEnum.Deletion)); } }); jPopupMenu.add(removeColumnMenuItem); } if (modelColumn > 0 && dataSet.attribute(modelColumn - 1).isNumeric() && !dataSet.attribute(modelColumn - 1).isDate()) { final JMenuItem discrColumnMenuItem = new JMenuItem( "Discretize this column ('" + instanceTableModel.getColumnName(modelColumn) + "')"); discrColumnMenuItem.addActionListener(new ActionListener() { @Override public void actionPerformed(final ActionEvent e) { try { final Instances newdataSet = WekaDataProcessingUtil .buildDiscretizedDataSetUnsupervisedForOne(dataSet, modelColumn - 1); pushDataChange(new DataChange(newdataSet, TabView.DataChangeTypeEnum.Update)); } catch (Exception e1) { eventPublisher.publish(new ErrorOccuredEvent("Error during discretization of '" + instanceTableModel.getColumnName(modelColumn) + "'", e1)); } } }); jPopupMenu.add(discrColumnMenuItem); for (final int c : new int[] { 5, 10, 20, 40, 80 }) { final JMenuItem discrColumnMenuItemN = new JMenuItem("Discretize this column ('" + instanceTableModel.getColumnName(modelColumn) + "') bins=" + c); discrColumnMenuItemN.addActionListener(new ActionListener() { @Override public void actionPerformed(final ActionEvent e) { try { final Instances newdataSet = WekaDataProcessingUtil .buildDiscretizedDataSetUnsupervised(dataSet, modelColumn - 1, c); pushDataChange( new DataChange(newdataSet, TabView.DataChangeTypeEnum.Update)); } catch (Exception e1) { eventPublisher .publish(new ErrorOccuredEvent("Error during discretization of '" + instanceTableModel.getColumnName(modelColumn) + "'", e1)); } } }); jPopupMenu.add(discrColumnMenuItemN); } } if (column > 0 && dataSet.attribute(column - 1) .isNumeric()/*WekaDataStatsUtil.isInteger(dataSet,column-1)*/) { final JMenuItem nominalizeColumnMenuItem = new JMenuItem( "Nominalize this column ('" + instanceTableModel.getColumnName(column) + "')"); nominalizeColumnMenuItem.addActionListener(new ActionListener() { @Override public void actionPerformed(final ActionEvent e) { try { final Instances newdataSet = WekaDataProcessingUtil .buildNominalizedDataSet(dataSet, new int[] { modelColumn - 1 }); pushDataChange(new DataChange(newdataSet, TabView.DataChangeTypeEnum.Update)); } catch (Exception e1) { eventPublisher.publish(new ErrorOccuredEvent("Error during nominalization of '" + instanceTableModel.getColumnName(modelColumn) + "'", e1)); } } }); jPopupMenu.add(nominalizeColumnMenuItem); } if (column > 0 && (dataSet.attribute(column - 1).isNominal() || dataSet.attribute(column - 1).isString())) { final JMenuItem numColumnMenuItem = new JMenuItem( "Numerize this column ('" + instanceTableModel.getColumnName(column) + "')"); numColumnMenuItem.addActionListener(new ActionListener() { @Override public void actionPerformed(final ActionEvent e) { try { final Instances newdataSet = WekaDataProcessingUtil .buildDataSetWithNumerizedStringAttribute(dataSet, column - 1); pushDataChange(new DataChange(newdataSet, TabView.DataChangeTypeEnum.Update)); } catch (Exception e1) { eventPublisher.publish(new ErrorOccuredEvent("Error during numerization of '" + instanceTableModel.getColumnName(column) + "'", e1)); } } }); jPopupMenu.add(numColumnMenuItem); } final JMenuItem removeRowMenuItem = new JMenuItem( "Remove this row (id='" + instanceTableModel.getValueAt(row, 0) + "')"); removeRowMenuItem.addActionListener(new ActionListener() { @Override public void actionPerformed(final ActionEvent e) { final Instances newdataSet = new Instances(dataSet); newdataSet.remove(modelRow); instanceTableModel.removeRow(modelRow); pushDataChange(new DataChange(newdataSet, TabView.DataChangeTypeEnum.Deletion)); } }); jPopupMenu.add(removeRowMenuItem); final JMenuItem selectKNNMenuItem = new JMenuItem("Select nearest neighbours of this row (id='" + instanceTableModel.getValueAt(modelRow, 0) + "')"); selectKNNMenuItem.addActionListener(new ActionListener() { @Override public void actionPerformed(final ActionEvent e) { try { final Instances knnResult = WekaMachineLearningUtil .computeNearestNeighbours(dataSet, instanceTableModel.getRow(modelRow), 10); pushDataChange(new DataChange(knnResult, TabView.DataChangeTypeEnum.Selection)); } catch (Exception e1) { eventPublisher.publish(new ErrorOccuredEvent( "Error when selecting nearest neighbours of this row!", e1)); } } }); jPopupMenu.add(selectKNNMenuItem); jPopupMenu.show(instanceTable, e.getX(), e.getY()); } } }); this.instanceTable.packAll(); final int tableWidth = (int) this.instanceTable.getPreferredSize().getWidth() + 30; this.scrollPane = new JScrollPane(this.instanceTable); this.scrollPane.setPreferredSize(new Dimension(Math.min(tableWidth, 500), 500)); this.scrollPane.setHorizontalScrollBarPolicy(ScrollPaneConstants.HORIZONTAL_SCROLLBAR_AS_NEEDED); }
From source file:lu.lippmann.cdb.datasetview.tasks.RemoveFirstAttrTask.java
License:Open Source License
/** * {@inheritDoc}/*from w w w . j av a 2 s .c om*/ */ @Override Instances process0(final Instances dataSet) throws Exception { final Instances newds = new Instances(dataSet); newds.deleteAttributeAt(0); return newds; }
From source file:lu.lippmann.cdb.ext.hydviga.gaps.GapFiller.java
License:Open Source License
private Instances fillAllGaps(final Instances ds) throws Exception { Instances newds = new Instances(ds); final int firstDateIdx = WekaDataStatsUtil.getFirstDateAttributeIdx(newds); final String datename = newds.attribute(firstDateIdx).name(); if (firstDateIdx == -1) { throw new Exception("No date attribute in this dataset!"); }//from w w w . ja v a2 s . c o m /* add a 'fake numerical' time field */ newds.insertAttributeAt(new Attribute(datename + "_fake"), newds.numAttributes()); for (int i = 0; i < newds.numInstances(); i++) { newds.instance(i).setValue(newds.numAttributes() - 1, newds.instance(i).value(firstDateIdx)); } /* remove the 'true' time field */ newds.deleteAttributeAt(firstDateIdx); /* process the dataset */ newds = fillGaps0(newds); /* re-add the 'true' time field according to the 'fake numerical' time field */ final String df = ds.attribute(firstDateIdx).getDateFormat(); newds.insertAttributeAt(new Attribute(datename + "_new", df), newds.numAttributes()); for (int i = 0; i < newds.numInstances(); i++) { newds.instance(i).setValue(newds.numAttributes() - 1, newds.instance(i).value(newds.numAttributes() - 2)); } /* delete the 'fake numerical' time field */ newds.deleteAttributeAt(newds.numAttributes() - 2); newds.sort(newds.numAttributes() - 1); return newds; }
From source file:lu.lippmann.cdb.ext.hydviga.gaps.GapFiller.java
License:Open Source License
private Instances fillAllGapsWithDiscretizedTime(final Instances ds) throws Exception { int firstDateIdx = WekaDataStatsUtil.getFirstDateAttributeIdx(ds); final String datename = ds.attribute(firstDateIdx).name(); if (firstDateIdx == -1) { throw new Exception("No date attribute in this dataset!"); }/* w w w. j a v a 2 s .com*/ Instances newds = new Instances(ds); /* add discretized time */ newds = WekaTimeSeriesUtil.buildDataSetWithDiscretizedTime(newds); /* add fake numerical time */ newds.insertAttributeAt(new Attribute(datename + "_fake"), newds.numAttributes()); for (int i = 0; i < newds.numInstances(); i++) { newds.instance(i).setValue(newds.numAttributes() - 1, newds.instance(i).value(firstDateIdx)); } /* remove 'true' date */ while (firstDateIdx != -1) { newds.deleteAttributeAt(firstDateIdx); firstDateIdx = WekaDataStatsUtil.getFirstDateAttributeIdx(newds); } /* transform nominal as binaries */ for (int iidx : WekaDataStatsUtil.getNominalAttributesIndexes(newds)) { newds = WekaDataProcessingUtil.buildDataSetWithNominalAsBinary(newds, iidx); } /* rename attributes for which the name can occur issues in tree evaluation */ for (int k = 0; k < newds.numAttributes(); k++) { String atn = newds.attribute(k).name(); if (atn.contains("=")) atn = atn.replaceAll("=", (int) (Math.random() * 1000) + ""); if (atn.contains("<")) atn = atn.replaceAll("<", (int) (Math.random() * 1000) + ""); if (atn.contains(">")) atn = atn.replaceAll(">", (int) (Math.random() * 1000) + ""); if (atn.contains(".")) atn = atn.replace(".", (int) (Math.random() * 1000) + ""); newds = WekaDataProcessingUtil.renameAttribute(newds, k, atn); } /* replace missing values */ newds = fillGaps0(newds); /* reconstruct date according to discretized time */ final String df = ds.attribute(WekaDataStatsUtil.getFirstDateAttributeIdx(ds)).getDateFormat(); newds.insertAttributeAt(new Attribute(datename + "_new", df), newds.numAttributes()); final int newfirstDateIdx = WekaDataStatsUtil.getFirstDateAttributeIdx(newds); for (int i = 0; i < newds.numInstances(); i++) { final Instance inst = newds.instance(i); inst.setValue(newfirstDateIdx, newds.instance(i).value(newds.numAttributes() - 2)); } /* sort by date ! */ newds.sort(newfirstDateIdx); /* remove discretized time */ final Set<String> toRemove = new HashSet<String>(); for (int i = 0; i < newds.numAttributes(); i++) { if (newds.attribute(i).name().startsWith("t_")) toRemove.add(newds.attribute(i).name()); } for (final String tr : toRemove) newds.deleteAttributeAt(newds.attribute(tr).index()); /* delete the fake attribute time */ newds.deleteAttributeAt(newds.numAttributes() - 2); return newds; }
From source file:lu.lippmann.cdb.ext.hydviga.ui.GapsUIUtil.java
License:Open Source License
public static ChartPanel buildGapChartPanelWithCorrection(final Instances pdataSet, final int dateIdx, final Attribute attr, final int gapsize, final int position, final GapFiller gapFiller, final java.util.Collection<String> attrs) throws Exception { final Instances dataSetWithTheGap = new Instances(pdataSet); for (int i = position; i < position + gapsize; i++) dataSetWithTheGap.instance(i).setMissing(attr); int[] arr = new int[] { attr.index(), dateIdx }; for (final String sss : attrs) { arr = ArraysUtil.concat(arr, new int[] { dataSetWithTheGap.attribute(sss).index() }); }/*from www.ja va 2 s .co m*/ Instances filteredDsWithTheGap = WekaDataProcessingUtil.buildFilteredByAttributesDataSet(dataSetWithTheGap, arr); filteredDsWithTheGap = WekaDataProcessingUtil.buildFilteredDataSet(filteredDsWithTheGap, 0, filteredDsWithTheGap.numAttributes() - 1, Math.max(0, position - GapsUtil.VALUES_BEFORE_AND_AFTER_RATIO * gapsize), Math.min(position + gapsize + GapsUtil.VALUES_BEFORE_AND_AFTER_RATIO * gapsize, filteredDsWithTheGap.numInstances() - 1)); final Instances completedds = gapFiller.fillGaps(filteredDsWithTheGap); final Instances diff = WekaTimeSeriesUtil.buildDiff(filteredDsWithTheGap, completedds); Instances filteredDsWithoutTheGap = WekaDataProcessingUtil.buildFilteredByAttributesDataSet(pdataSet, arr); filteredDsWithoutTheGap = WekaDataProcessingUtil.buildFilteredDataSet(filteredDsWithoutTheGap, 0, filteredDsWithoutTheGap.numAttributes() - 1, Math.max(0, position - GapsUtil.VALUES_BEFORE_AND_AFTER_RATIO * gapsize), Math.min(position + gapsize + GapsUtil.VALUES_BEFORE_AND_AFTER_RATIO * gapsize, filteredDsWithoutTheGap.numInstances() - 1)); diff.insertAttributeAt(new Attribute(attr.name() + "_orig"), diff.numAttributes()); for (int i = 0; i < filteredDsWithoutTheGap.numInstances(); i++) { diff.instance(i).setValue(diff.numAttributes() - 1, filteredDsWithoutTheGap.instance(i).value(filteredDsWithoutTheGap.attribute(attr.name()))); } //System.out.println(attr.name()+"\n"+diff.toSummaryString()); final java.util.List<String> toRemove = new java.util.ArrayList<String>(); for (int j = 0; j < diff.numAttributes(); j++) { final String consideredAttrName = diff.attribute(j).name(); if (!consideredAttrName.contains("timestamp") && !consideredAttrName.contains(attr.name())) toRemove.add(consideredAttrName); } diff.setClassIndex(-1); for (final String ssss : toRemove) diff.deleteAttributeAt(diff.attribute(ssss).index()); //System.out.println(attr.name()+"\n"+diff.toSummaryString()); final ChartPanel cp = TimeSeriesChartUtil.buildChartPanelForAllAttributes(diff, false, WekaDataStatsUtil.getFirstDateAttributeIdx(diff), null); final XYPlot xyp = (XYPlot) cp.getChart().getPlot(); xyp.getDomainAxis().setLabel(""); xyp.getRangeAxis().setLabel(""); final Marker gapBeginMarker = new ValueMarker( dataSetWithTheGap.instance(Math.max(0, position - 1)).value(dateIdx)); gapBeginMarker.setPaint(Color.RED); gapBeginMarker.setLabel("Gap begin"); gapBeginMarker.setLabelAnchor(RectangleAnchor.TOP_LEFT); gapBeginMarker.setLabelTextAnchor(TextAnchor.TOP_RIGHT); cp.getChart().getXYPlot().addDomainMarker(gapBeginMarker); final Marker gapEndMarker = new ValueMarker(dataSetWithTheGap .instance(Math.min(dataSetWithTheGap.numInstances() - 1, position + gapsize)).value(dateIdx)); gapEndMarker.setPaint(Color.RED); gapEndMarker.setLabel("Gap end"); gapEndMarker.setLabelAnchor(RectangleAnchor.TOP_RIGHT); gapEndMarker.setLabelTextAnchor(TextAnchor.TOP_LEFT); cp.getChart().getXYPlot().addDomainMarker(gapEndMarker); addExportPopupMenu(diff, cp); return cp; }
From source file:lu.lippmann.cdb.lab.beta.shih.Shih2010.java
License:Open Source License
/** * /*from ww w .j a v a 2 s .c o m*/ * @return */ public Instances getModifiedInstances() { //Copy attribute list (and change categorical by numerical) final ArrayList<Attribute> lAttrs = new ArrayList<Attribute>(); for (int i = 0; i < instances.numAttributes(); i++) { Attribute attr = instances.attribute(i); if (attr.isNumeric() || attr.index() == instances.classIndex()) { lAttrs.add(attr); } else { Attribute newAttr = new Attribute(attr.name()); lAttrs.add(newAttr); } } //Build new instance final Instances newInstances = new Instances("Shih instance", lAttrs, instances.numInstances()); newInstances.setClassIndex(instances.classIndex()); for (int i = 0; i < instances.numInstances(); i++) { final Instance instance = instances.instance(i); final Instance cpyInstance = (Instance) instance.copy(); for (int j = 0; j < instance.numAttributes(); j++) { Attribute attribute = instance.attribute(j); int k = 0; if (attribute.index() == instances.classIndex()) { //The class index is nominal cpyInstance.setValue(attribute, instance.stringValue(j)); } else if (!attribute.isNumeric()) { String elt = attribute.value((int) instance.value(j)); cpyInstance.setValue(attribute, F.get(new TupleSI(elt, j))); } else { if (maxNum[k] > 1) { cpyInstance.setValue(attribute, instance.value(j) / maxNum[k]); } k++; } } newInstances.add(cpyInstance); } if (ignoreClass && instances.classIndex() != -1) { newInstances.deleteAttributeAt(instances.classIndex()); } return newInstances; }
From source file:machinelearningproject.RFTree.java
@Override public Tree buildTree(Instances instances) throws Exception { Tree tree = new Tree(); ArrayList<String> availableAttributes = new ArrayList(); int largestInfoGainAttrIdx = -1; double largestInfoGainAttrValue = 0.0; //choose random fraction int numAttr = instances.numAttributes(); int k = (int) round(sqrt(numAttr)); ArrayList<Integer> randomIdx = randomFraction(numAttr); for (int idx = 0; idx < k; idx++) { if (idx != instances.classIndex()) { availableAttributes.add(instances.attribute(idx).name()); }/*w w w . j av a2s . c o m*/ } if (instances.numInstances() == 0) { return null; } else if (calculateClassEntropy(instances) == 0.0) { // all examples have the sama classification tree.attributeName = instances.get(0).stringValue(instances.classIndex()); } else if (availableAttributes.isEmpty()) { // mode classification tree.attributeName = getModeClass(instances, instances.classIndex()); } else { for (int idx = 0; idx < instances.numAttributes(); idx++) { if (idx != instances.classIndex()) { double attrInfoGain = calculateInformationGain(instances, idx, instances.classIndex()); if (largestInfoGainAttrValue < attrInfoGain) { largestInfoGainAttrIdx = idx; largestInfoGainAttrValue = attrInfoGain; } } } if (largestInfoGainAttrIdx != -1) { tree.attributeName = instances.attribute(largestInfoGainAttrIdx).name(); ArrayList<String> attrValues = new ArrayList(); for (int i = 0; i < instances.numInstances(); i++) { Instance instance = instances.get(i); String attrValue = instance.stringValue(largestInfoGainAttrIdx); if (attrValues.isEmpty() || !attrValues.contains(attrValue)) { attrValues.add(attrValue); } } for (String attrValue : attrValues) { Node node = new Node(attrValue); Instances copyInstances = new Instances(instances); copyInstances.setClassIndex(instances.classIndex()); int i = 0; while (i < copyInstances.numInstances()) { Instance instance = copyInstances.get(i); // reducing examples if (!instance.stringValue(largestInfoGainAttrIdx).equals(attrValue)) { copyInstances.delete(i); i--; } i++; } copyInstances.deleteAttributeAt(largestInfoGainAttrIdx); node.subTree = buildTree(copyInstances); tree.nodes.add(node); } } } return tree; }
From source file:machinelearningproject.Tree.java
public Tree buildTree(Instances instances) throws Exception { Tree tree = new Tree(); ArrayList<String> availableAttributes = new ArrayList(); int largestInfoGainAttrIdx = -1; double largestInfoGainAttrValue = 0.0; for (int idx = 0; idx < instances.numAttributes(); idx++) { if (idx != instances.classIndex()) { availableAttributes.add(instances.attribute(idx).name()); }/*from ww w . j a v a 2s. c o m*/ } if (instances.numInstances() == 0) { return null; } else if (calculateClassEntropy(instances) == 0.0) { // all examples have the sama classification tree.attributeName = instances.get(0).stringValue(instances.classIndex()); } else if (availableAttributes.isEmpty()) { // mode classification tree.attributeName = getModeClass(instances, instances.classIndex()); } else { for (int idx = 0; idx < instances.numAttributes(); idx++) { if (idx != instances.classIndex()) { double attrInfoGain = calculateInformationGain(instances, idx, instances.classIndex()); if (largestInfoGainAttrValue < attrInfoGain) { largestInfoGainAttrIdx = idx; largestInfoGainAttrValue = attrInfoGain; } } } if (largestInfoGainAttrIdx != -1) { tree.attributeName = instances.attribute(largestInfoGainAttrIdx).name(); ArrayList<String> attrValues = new ArrayList(); for (int i = 0; i < instances.numInstances(); i++) { Instance instance = instances.get(i); String attrValue = instance.stringValue(largestInfoGainAttrIdx); if (attrValues.isEmpty() || !attrValues.contains(attrValue)) { attrValues.add(attrValue); } } for (String attrValue : attrValues) { Node node = new Node(attrValue); Instances copyInstances = new Instances(instances); copyInstances.setClassIndex(instances.classIndex()); int i = 0; while (i < copyInstances.numInstances()) { Instance instance = copyInstances.get(i); // reducing examples if (!instance.stringValue(largestInfoGainAttrIdx).equals(attrValue)) { copyInstances.delete(i); i--; } i++; } copyInstances.deleteAttributeAt(largestInfoGainAttrIdx); node.subTree = buildTree(copyInstances); tree.nodes.add(node); } } } return tree; }
From source file:meka.core.CCUtils.java
License:Open Source License
/** * LinkTransform - prepare 'D' for training at a node 'j' of the chain, by excluding 'exl'. * @param D dataset//from w ww.ja v a 2 s. c o m * @param j index of the label of this node * @param exl indices of labels which are NOT parents of j * @return the transformed dataset (which can be used as a template) */ public static Instances linkTransform(Instances D, int j, int exl[]) { Instances D_j = new Instances(D); D_j.setClassIndex(-1); // delete all the attributes (and track where our index ends up) int ndx = j; for (int i = exl.length - 1; i >= 0; i--) { D_j.deleteAttributeAt(exl[i]); if (exl[i] < ndx) ndx--; } D_j.setClassIndex(ndx); return D_j; }
From source file:meka.core.MLUtils.java
License:Open Source License
/** * Delete attributes from a dataset 'D' indexed by 'indicesToRemove[]'. * @param D dataset/*w w w .j a va2 s . c o m*/ * @param indicesToRemove array of attribute indices * @return the modified dataset */ public static final Instances deleteAttributesAt(Instances D, int indicesToRemove[]) {//, boolean keep) { Arrays.sort(indicesToRemove); for (int j = indicesToRemove.length - 1; j >= 0; j--) { D.deleteAttributeAt(indicesToRemove[j]); } return D; }