Example usage for weka.core Instances numInstances

List of usage examples for weka.core Instances numInstances

Introduction

In this page you can find the example usage for weka.core Instances numInstances.

Prototype


publicint numInstances() 

Source Link

Document

Returns the number of instances in the dataset.

Usage

From source file:lu.lippmann.cdb.datasetview.tabs.UnsupervisedFeatureEvaluationTabView.java

License:Open Source License

private static Instances buildDerivatedDataset(final Instances dataSet, final List<String> possibleValues,
        final List<Integer> valueForEachFeature) throws Exception {
    final int numInstances = dataSet.numInstances();
    final ArrayList<Attribute> attrs = new ArrayList<Attribute>(numInstances + 2);
    attrs.add(new Attribute(FEATUREDESC_ATTRNAME, (java.util.List<String>) null));
    for (int i = 0; i < numInstances; i++) {
        attrs.add(new Attribute(i + "_eval"));
    }//from  w w w .  j a  v a  2s .  c  om
    attrs.add(new Attribute("__", possibleValues));

    final Instances newds = new Instances("unsupervisedFeaturesEval", attrs, 0);
    final int numAttributes = dataSet.numAttributes();
    for (int j = 0; j < numAttributes; j++) {
        double[] val = ArraysUtil.concat(dataSet.attributeToDoubleArray(j), new double[] { 0.0d });
        val = ArraysUtil.concat(new double[] { 0.0d }, val);
        newds.add(new DenseInstance(1.0d, val));
    }
    for (int j = 0; j < numAttributes; j++) {
        newds.instance(j).setValue(0, dataSet.attribute(j).name());
        newds.instance(j).setValue(numInstances + 1, possibleValues.get(valueForEachFeature.get(j)));
    }
    newds.setClassIndex(numInstances + 1);
    return newds;
}

From source file:lu.lippmann.cdb.datasetview.tabs.WeightedMapOfDecisionTreesTabView.java

License:Open Source License

/**
 * {@inheritDoc}// w w  w.jav  a2s  .  com
 */
@Override
public void update0(final Instances dataSet) throws Exception {
    if (this.mp != null)
        this.panel.remove(this.mp);

    if (this.cl != null)
        this.slider.removeChangeListener(cl);
    //if (this.cl!=null) this.slider.removeChangeListener(cl);

    this.cl = new ChangeListener() {
        @Override
        public void stateChanged(final ChangeEvent e) {
            if (!slider.getValueIsAdjusting()) {
                dtFactory = new J48DecisionTreeFactory(slider.getValue() / 100d, false);
                update(dataSet);
            }
        }
    };
    this.slider.addChangeListener(cl);

    final double frameWidth = this.panel.getSize().getWidth() * 0.95d;
    final double frameHeight = this.panel.getSize().getHeight() * 0.95d;

    final ListOrderedMap<JComponent, Integer> mapPanels = new ListOrderedMap<JComponent, Integer>();

    final String oldSelected;
    if (this.attrSelectionCombo.getSelectedItem() == null) {
        oldSelected = dataSet.classAttribute().name();
    } else {
        final Attribute oldAttr = dataSet.attribute(this.attrSelectionCombo.getSelectedItem().toString());
        if (oldAttr != null) {
            oldSelected = oldAttr.name();
        } else {
            oldSelected = dataSet.classAttribute().name();
        }
    }
    final int idx = dataSet.attribute(oldSelected).index();
    final Set<Object> presentValues = WekaDataStatsUtil.getNominalRepartition(dataSet, idx).keySet();
    for (final Object o : presentValues) {
        final Instances part = WekaDataProcessingUtil.filterDataSetOnNominalValue(dataSet, idx, o.toString());
        final DecisionTree dti = dtFactory.buildDecisionTree(part);

        final int ratio = 100 * part.numInstances() / dataSet.numInstances();
        final GraphView myGraph = DecisionTreeToGraphViewHelper.buildGraphView(dti, eventPublisher,
                commandDispatcher);
        myGraph.hideSharedLabel();
        myGraph.addMetaInfo("size=" + dti.getSize(), "");
        myGraph.addMetaInfo("depth=" + dti.getDepth(), "");
        myGraph.addMetaInfo("err=" + FormatterUtil.DECIMAL_FORMAT.format(100d * dti.getErrorRate()) + "%", "");

        final JButton openInEditorButton = new JButton("Edit");
        openInEditorButton.addActionListener(new ActionListener() {
            @Override
            public void actionPerformed(ActionEvent e) {
                GraphUtil.importDecisionTreeInEditor(dtFactory, part, applicationContext, eventPublisher,
                        commandDispatcher);
            }
        });
        myGraph.addMetaInfoComponent(openInEditorButton);

        myGraph.fitGraphToSubPanel(frameWidth - 10 * presentValues.size(), frameHeight - 10, ratio);
        mapPanels.put((JComponent) myGraph, ratio);

    }
    this.mp = new MultiPanel(mapPanels, (int) frameWidth, (int) frameHeight,
            this.withWeightCheckBox.isSelected());

    this.panel.add(this.mp, BorderLayout.CENTER);

    if (this.attrSelectionCombo.getActionListeners().length > 0) {
        this.attrSelectionCombo.removeActionListener(attrSelectionComboListener);
    }
    if (this.withWeightCheckBox.getActionListeners().length > 0) {
        this.withWeightCheckBox.removeActionListener(attrSelectionComboListener);
    }

    this.attrSelectionCombo.removeAllItems();
    for (final Attribute attr : WekaDataStatsUtil.getNominalAttributesList(dataSet)) {
        this.attrSelectionCombo.addItem(attr.name());
    }
    this.attrSelectionCombo.setSelectedItem(oldSelected);

    this.attrSelectionComboListener = new ActionListener() {
        @Override
        public void actionPerformed(ActionEvent e) {
            update(dataSet);
        }
    };
    this.attrSelectionCombo.addActionListener(attrSelectionComboListener);
    this.withWeightCheckBox.addActionListener(attrSelectionComboListener);

}

From source file:lu.lippmann.cdb.datasetview.tasks.SetAttributeAsTimestampTask.java

License:Open Source License

/**
 * {@inheritDoc}//from   w  ww.  j a v  a  2  s  .  co  m
 */
@Override
Instances process0(final Instances dataSet) throws Exception {
    final String s = (String) JOptionPane.showInputDialog(null, "Select an attribute:\n", "Attribute selection",
            JOptionPane.PLAIN_MESSAGE, null, WekaDataStatsUtil.getNumericAttributesNames(dataSet).toArray(),
            "");

    if (s != null) {
        final Instances newds = new Instances(dataSet);
        newds.insertAttributeAt(new Attribute("date", "dd-MM-yyyy HH:mm"), newds.numAttributes());

        final int sidx = newds.attribute(s).index();

        for (int i = 0; i < newds.numInstances(); i++) {
            newds.instance(i).setValue(newds.numAttributes() - 1, newds.instance(i).value(sidx));
        }

        return newds;
    } else
        return dataSet;
}

From source file:lu.lippmann.cdb.ext.hydviga.cbr.GapFillingKnowledgeDB.java

License:Open Source License

public static Instances findSimilarCases(final String attrname, final double x, final double y, final int year,
        final String season, final int gapSize, final int gapPosition, final boolean isDuringRising,
        final boolean hasDownstream, final boolean hasUpstream, final String flow) throws Exception {
    /* build the current case */
    final StringBuilder newsb = new StringBuilder(DATABASE_AS_STRINGBUILDER);
    newsb.append(attrname).append(",").append(x).append(",").append(y).append(",")

            .append(gapSize).append(",").append(gapPosition).append(",")

            .append(season).append(",").append(year).append(",")

            .append(isDuringRising).append(",").append(flow).append(",")

            .append(hasDownstream).append(",").append(hasUpstream).append(",")

            .append("?").append(",").append("?").append(",").append("?").append(",").append("?").append(",")
            .append("?").append(",").append("?").append(",").append(0) // MAE
            .append(",").append(0) // RMSE
            .append(",").append(0) // RSR
            .append(",").append(0) // PBIAS
            .append(",").append(1) // NS
            .append(",").append(1) // IOA
            .append(",").append(true) // BEST SOLUTION          
            .append("\n");
    final Instances tmpDB = WekaDataAccessUtil.loadInstancesFromCSVString(newsb.toString(), false);

    final Instance newcase = tmpDB.instance(tmpDB.numInstances() - 1);

    /* compute NN for the current case */
    final Instances knn = WekaMachineLearningUtil.computeNearestNeighbours(tmpDB, newcase, 10,
            "2,3,4,6,7,8,9,10,23");
    knn.add(0, newcase);//  w w w. java2s  .  com

    System.out.println(knn.toSummaryString());

    return knn;

}

From source file:lu.lippmann.cdb.ext.hydviga.cbr.GapFillingKnowledgeDBAnalyzer.java

License:Open Source License

private static int getCountOfFictiveGaps(final Instances newkdb) {
    final Set<String> set = new HashSet<String>();
    for (int i = 0; i < newkdb.numInstances(); i++) {
        final String key = newkdb.instance(i).stringValue(newkdb.attribute("serieName").index()) + "-"
                + newkdb.instance(i).value(newkdb.attribute("gapSize").index()) + "-"
                + newkdb.instance(i).value(newkdb.attribute("gapPosition").index());
        set.add(key);/*from w w  w .  j ava2  s . co  m*/
    }
    return set.size();
}

From source file:lu.lippmann.cdb.ext.hydviga.cbr.GapFillingKnowledgeDBAnalyzer.java

License:Open Source License

/**
 * Main method.//from  w  w w . ja v  a  2 s.c o m
 * @param args command line arguments
 */
public static void main(final String[] args) {
    try {
        HydroRunner.init(false);

        Instances newkdb = new Instances(GapFillingKnowledgeDB.getKnowledgeDB());

        System.out.println("Considered fictive gaps -> " + getCountOfFictiveGaps(newkdb));

        System.out.println(newkdb.toSummaryString());

        newkdb = WekaDataProcessingUtil.filterDataSetOnNominalValue(newkdb,
                newkdb.attribute("useDownstream").index(), "false");
        newkdb = WekaDataProcessingUtil.filterDataSetOnNominalValue(newkdb,
                newkdb.attribute("useUpstream").index(), "false");
        //newkdb=WekaDataProcessingUtil.filterDataSetOnNominalValue(newkdb,newkdb.attribute("useNearest").index(),"false");
        //newkdb=WekaDataProcessingUtil.filterDataSetOnNominalValue(newkdb,newkdb.attribute("useMostSimilar").index(),"false");

        //System.out.println(newkdb.toSummaryString());

        Instances withGoodNashSutcliffe = new Instances(newkdb, 0);
        for (int i = 0; i < newkdb.numInstances(); i++) {
            if (newkdb.instance(i).value(newkdb.attribute("NashSutcliffe").index()) > 0.5d) {
                withGoodNashSutcliffe.add(new DenseInstance(1d, newkdb.instance(i).toDoubleArray()));
            }
        }

        System.out.println(withGoodNashSutcliffe.numInstances() + " / " + newkdb.numInstances());

        final double perc = (double) getCountOfFictiveGaps(withGoodNashSutcliffe)
                / getCountOfFictiveGaps(newkdb);
        System.out.println("Fictive gaps that are infilled with a good Nash-Sutcliffe -> "
                + getCountOfFictiveGaps(withGoodNashSutcliffe) + " (" + perc + "%)");

        WekaDataAccessUtil.saveInstancesIntoARFFFile(withGoodNashSutcliffe,
                new File("./withGoodNashSutcliffe.arff"));
    } catch (final Exception e) {
        e.printStackTrace();
    }
}

From source file:lu.lippmann.cdb.ext.hydviga.data.StationsDataProvider.java

License:Open Source License

private ChartPanel buildMapPanel(final Instances dataSet, final int xidx, final int yidx,
        final boolean withLegend) {
    final XYSeriesCollection data = new XYSeriesCollection();
    final Map<Integer, java.util.List<Instance>> filteredInstances = new HashMap<Integer, java.util.List<Instance>>();
    final int classIndex = dataSet.classIndex();
    if (classIndex < 0) {
        final XYSeries series = new XYSeries("Serie", false);
        for (int i = 0; i < dataSet.numInstances(); i++) {
            series.add(dataSet.instance(i).value(xidx), dataSet.instance(i).value(yidx));
        }//from   w ww.ja v  a  2 s  .  com
        data.addSeries(series);
    } else {
        final Set<String> pvs = new TreeSet<String>(
                WekaDataStatsUtil.getPresentValuesForNominalAttribute(dataSet, classIndex));
        int p = 0;
        for (final String pv : pvs) {
            final XYSeries series = new XYSeries(pv, false);
            for (int i = 0; i < dataSet.numInstances(); i++) {
                if (dataSet.instance(i).stringValue(classIndex).equals(pv)) {
                    if (!filteredInstances.containsKey(p)) {
                        filteredInstances.put(p, new ArrayList<Instance>());
                    }
                    filteredInstances.get(p).add(dataSet.instance(i));

                    series.add(dataSet.instance(i).value(xidx), dataSet.instance(i).value(yidx));
                }
            }
            data.addSeries(series);

            p++;
        }

    }

    final JFreeChart chart = ChartFactory.createScatterPlot(null, // chart title
            dataSet.attribute(xidx).name(), // x axis label
            dataSet.attribute(yidx).name(), // y axis label
            data, // data
            PlotOrientation.VERTICAL, withLegend, // include legend
            true, // tooltips
            false // urls
    );

    final XYPlot xyPlot = (XYPlot) chart.getPlot();
    xyPlot.setBackgroundImage(shapeImage);

    final XYItemRenderer renderer = xyPlot.getRenderer();
    final XYToolTipGenerator gen = new XYToolTipGenerator() {
        @Override
        public String generateToolTip(XYDataset dataset, int series, int item) {
            if (classIndex < 0) {
                return InstanceFormatter.htmlFormat(dataSet.instance(item), true);
            } else {
                return InstanceFormatter.htmlFormat(filteredInstances.get(series).get(item), true);
            }
        }
    };

    xyPlot.getRangeAxis().setVisible(false);
    xyPlot.getDomainAxis().setVisible(false);

    xyPlot.getRangeAxis().setLowerBound(60000);
    xyPlot.getRangeAxis().setUpperBound(135000);
    xyPlot.getDomainAxis().setLowerBound(45000);
    xyPlot.getDomainAxis().setUpperBound(110000);

    xyPlot.setDomainGridlinesVisible(false);
    xyPlot.setRangeGridlinesVisible(false);

    xyPlot.setBackgroundPaint(Color.white);

    int nbSeries;
    if (classIndex < 0) {
        nbSeries = 1;
    } else {
        nbSeries = filteredInstances.keySet().size();
    }

    for (int i = 0; i < nbSeries; i++) {
        renderer.setSeriesToolTipGenerator(i, gen);
    }

    final XYItemLabelGenerator lg = new XYItemLabelGenerator() {
        @Override
        public String generateLabel(final XYDataset ds, final int series, final int item) {
            final Instance iii = filteredInstances.get(series).get(item);
            if (iii.stringValue(3).equals(SELECTED_STATUS)) {
                final String label = iii.stringValue(0);
                return label.substring(0, label.length() - 4);
            } else
                return null;
        }
    };
    xyPlot.getRenderer().setBaseItemLabelGenerator(lg);
    xyPlot.getRenderer().setBaseItemLabelsVisible(true);
    xyPlot.getRenderer().setBaseItemLabelFont(new Font("Tahoma", Font.PLAIN, 12));

    xyPlot.getRenderer().setSeriesPaint(1, Color.BLUE);
    xyPlot.getRenderer().setSeriesPaint(0, new Color(210, 210, 210));
    xyPlot.getRenderer().setSeriesPaint(2, Color.DARK_GRAY);

    //System.out.println("shape -> "+xyPlot.getRenderer().getSeriesStroke(0));

    final ChartPanel cp = new ChartPanel(chart);
    cp.setDomainZoomable(false);
    cp.setRangeZoomable(false);

    return cp;
}

From source file:lu.lippmann.cdb.ext.hydviga.gaps.GapFiller.java

License:Open Source License

private Instances fillAllGaps(final Instances ds) throws Exception {
    Instances newds = new Instances(ds);

    final int firstDateIdx = WekaDataStatsUtil.getFirstDateAttributeIdx(newds);
    final String datename = newds.attribute(firstDateIdx).name();
    if (firstDateIdx == -1) {
        throw new Exception("No date attribute in this dataset!");
    }/*w  w  w.jav a2s.co m*/

    /* add a 'fake numerical' time field */
    newds.insertAttributeAt(new Attribute(datename + "_fake"), newds.numAttributes());
    for (int i = 0; i < newds.numInstances(); i++) {
        newds.instance(i).setValue(newds.numAttributes() - 1, newds.instance(i).value(firstDateIdx));
    }

    /* remove the 'true' time field */
    newds.deleteAttributeAt(firstDateIdx);

    /* process the dataset */
    newds = fillGaps0(newds);

    /* re-add the 'true' time field according to the 'fake numerical' time field */
    final String df = ds.attribute(firstDateIdx).getDateFormat();
    newds.insertAttributeAt(new Attribute(datename + "_new", df), newds.numAttributes());
    for (int i = 0; i < newds.numInstances(); i++) {
        newds.instance(i).setValue(newds.numAttributes() - 1,
                newds.instance(i).value(newds.numAttributes() - 2));
    }

    /* delete the 'fake numerical' time field */
    newds.deleteAttributeAt(newds.numAttributes() - 2);

    newds.sort(newds.numAttributes() - 1);

    return newds;
}

From source file:lu.lippmann.cdb.ext.hydviga.gaps.GapFiller.java

License:Open Source License

private Instances fillAllGapsWithDiscretizedTime(final Instances ds) throws Exception {
    int firstDateIdx = WekaDataStatsUtil.getFirstDateAttributeIdx(ds);
    final String datename = ds.attribute(firstDateIdx).name();
    if (firstDateIdx == -1) {
        throw new Exception("No date attribute in this dataset!");
    }/*from  w  w  w.java  2 s. c  o m*/

    Instances newds = new Instances(ds);

    /* add discretized time */
    newds = WekaTimeSeriesUtil.buildDataSetWithDiscretizedTime(newds);

    /* add fake numerical time */
    newds.insertAttributeAt(new Attribute(datename + "_fake"), newds.numAttributes());
    for (int i = 0; i < newds.numInstances(); i++) {
        newds.instance(i).setValue(newds.numAttributes() - 1, newds.instance(i).value(firstDateIdx));
    }

    /* remove 'true' date */
    while (firstDateIdx != -1) {
        newds.deleteAttributeAt(firstDateIdx);
        firstDateIdx = WekaDataStatsUtil.getFirstDateAttributeIdx(newds);
    }

    /* transform nominal as binaries */
    for (int iidx : WekaDataStatsUtil.getNominalAttributesIndexes(newds)) {
        newds = WekaDataProcessingUtil.buildDataSetWithNominalAsBinary(newds, iidx);
    }

    /* rename attributes for which the name can occur issues in tree evaluation */
    for (int k = 0; k < newds.numAttributes(); k++) {
        String atn = newds.attribute(k).name();
        if (atn.contains("="))
            atn = atn.replaceAll("=", (int) (Math.random() * 1000) + "");
        if (atn.contains("<"))
            atn = atn.replaceAll("<", (int) (Math.random() * 1000) + "");
        if (atn.contains(">"))
            atn = atn.replaceAll(">", (int) (Math.random() * 1000) + "");
        if (atn.contains("."))
            atn = atn.replace(".", (int) (Math.random() * 1000) + "");
        newds = WekaDataProcessingUtil.renameAttribute(newds, k, atn);
    }

    /* replace missing values */
    newds = fillGaps0(newds);

    /* reconstruct date according to discretized time */
    final String df = ds.attribute(WekaDataStatsUtil.getFirstDateAttributeIdx(ds)).getDateFormat();
    newds.insertAttributeAt(new Attribute(datename + "_new", df), newds.numAttributes());
    final int newfirstDateIdx = WekaDataStatsUtil.getFirstDateAttributeIdx(newds);
    for (int i = 0; i < newds.numInstances(); i++) {
        final Instance inst = newds.instance(i);
        inst.setValue(newfirstDateIdx, newds.instance(i).value(newds.numAttributes() - 2));
    }

    /* sort by date ! */
    newds.sort(newfirstDateIdx);

    /* remove discretized time */
    final Set<String> toRemove = new HashSet<String>();
    for (int i = 0; i < newds.numAttributes(); i++) {
        if (newds.attribute(i).name().startsWith("t_"))
            toRemove.add(newds.attribute(i).name());
    }
    for (final String tr : toRemove)
        newds.deleteAttributeAt(newds.attribute(tr).index());

    /* delete the fake attribute time */
    newds.deleteAttributeAt(newds.numAttributes() - 2);

    return newds;
}