Example usage for weka.core Instances add

List of usage examples for weka.core Instances add

Introduction

In this page you can find the example usage for weka.core Instances add.

Prototype

@Override
public boolean add(Instance instance) 

Source Link

Document

Adds one instance to the end of the set.

Usage

From source file:lu.lippmann.cdb.ext.hydviga.cbr.GapFillingKnowledgeDBAnalyzer.java

License:Open Source License

/**
 * Main method./*www  . j a v  a 2  s .  c o m*/
 * @param args command line arguments
 */
public static void main(final String[] args) {
    try {
        HydroRunner.init(false);

        Instances newkdb = new Instances(GapFillingKnowledgeDB.getKnowledgeDB());

        System.out.println("Considered fictive gaps -> " + getCountOfFictiveGaps(newkdb));

        System.out.println(newkdb.toSummaryString());

        newkdb = WekaDataProcessingUtil.filterDataSetOnNominalValue(newkdb,
                newkdb.attribute("useDownstream").index(), "false");
        newkdb = WekaDataProcessingUtil.filterDataSetOnNominalValue(newkdb,
                newkdb.attribute("useUpstream").index(), "false");
        //newkdb=WekaDataProcessingUtil.filterDataSetOnNominalValue(newkdb,newkdb.attribute("useNearest").index(),"false");
        //newkdb=WekaDataProcessingUtil.filterDataSetOnNominalValue(newkdb,newkdb.attribute("useMostSimilar").index(),"false");

        //System.out.println(newkdb.toSummaryString());

        Instances withGoodNashSutcliffe = new Instances(newkdb, 0);
        for (int i = 0; i < newkdb.numInstances(); i++) {
            if (newkdb.instance(i).value(newkdb.attribute("NashSutcliffe").index()) > 0.5d) {
                withGoodNashSutcliffe.add(new DenseInstance(1d, newkdb.instance(i).toDoubleArray()));
            }
        }

        System.out.println(withGoodNashSutcliffe.numInstances() + " / " + newkdb.numInstances());

        final double perc = (double) getCountOfFictiveGaps(withGoodNashSutcliffe)
                / getCountOfFictiveGaps(newkdb);
        System.out.println("Fictive gaps that are infilled with a good Nash-Sutcliffe -> "
                + getCountOfFictiveGaps(withGoodNashSutcliffe) + " (" + perc + "%)");

        WekaDataAccessUtil.saveInstancesIntoARFFFile(withGoodNashSutcliffe,
                new File("./withGoodNashSutcliffe.arff"));
    } catch (final Exception e) {
        e.printStackTrace();
    }
}

From source file:lu.lippmann.cdb.ext.hydviga.data.StationsDataProvider.java

License:Open Source License

private Instances getDataSetForMap(final Collection<String> sel, final Collection<String> usable) {
    final Instances ds = new Instances("ds", new ArrayList<Attribute>(), 0);
    ds.insertAttributeAt(new Attribute("name", new ArrayList<String>(this.coordinatesMap.keySet())),
            ds.numAttributes());//from www  .  ja  va 2  s . c o  m
    ds.insertAttributeAt(new Attribute("x"), ds.numAttributes());
    ds.insertAttributeAt(new Attribute("y"), ds.numAttributes());
    ds.insertAttributeAt(
            new Attribute("status",
                    Arrays.asList(new String[] { SELECTED_STATUS, USABLE_STATUS, NOT_USABLE_STATUS })),
            ds.numAttributes());
    ds.setClassIndex(ds.numAttributes() - 1);

    final Set<String> coordSelected = new HashSet<String>();
    for (final String ssel : sel) {
        final String coordsKey = coordinatesMap.get(ssel)[0] + "-" + coordinatesMap.get(ssel)[1];
        coordSelected.add(coordsKey);
    }
    final Set<String> coordUsable = new HashSet<String>();
    for (final String uu : usable) {
        final String coordsKey = coordinatesMap.get(uu)[0] + "-" + coordinatesMap.get(uu)[1];
        coordUsable.add(coordsKey);
    }

    final Set<String> coordAlreadyLoaded = new HashSet<String>();
    for (final Map.Entry<String, double[]> entry : this.coordinatesMap.entrySet()) {
        final String coordsKey = entry.getValue()[0] + "-" + entry.getValue()[1];
        if (coordAlreadyLoaded.contains(coordsKey))
            continue;
        final Instance inst = new DenseInstance(1.0d, new double[] { 0d, 0d, 0d, 0d });
        inst.setDataset(ds);
        inst.setValue(0, entry.getKey());
        inst.setValue(1, entry.getValue()[0]);
        inst.setValue(2, entry.getValue()[1]);
        //System.out.println(sel+" "+entry.getKey());
        inst.setValue(3, (coordSelected.contains(coordsKey)) ? SELECTED_STATUS
                : ((coordUsable.contains(coordsKey)) ? USABLE_STATUS : NOT_USABLE_STATUS));
        ds.add(inst);
        coordAlreadyLoaded.add(coordsKey);
    }

    return ds;
}

From source file:lu.lippmann.cdb.ext.hydviga.gaps.GapFillerClassifier.java

License:Open Source License

/**
 * {@inheritDoc}/*from  w  w w.  j  av a 2  s .co  m*/
 */
@Override
Instances fillGaps0(final Instances ds) throws Exception {
    final Instances newds = WekaDataProcessingUtil.buildDataSetWithoutConstantAttributes(ds);

    final int attrWithMissingIdx = WekaDataStatsUtil.getFirstAttributeWithMissingValue(newds);
    if (attrWithMissingIdx == -1)
        throw new IllegalStateException();

    final Instances trainingSet = new Instances(newds, 0);
    for (int i = 0; i < newds.numInstances(); i++) {
        if (!newds.instance(i).hasMissingValue())
            trainingSet.add(newds.instance(i));
    }
    //System.out.println(trainingSet);      
    trainingSet.setClassIndex(attrWithMissingIdx);

    //System.out.println("Training (size="+trainingSet.numInstances()+") ...");      
    this.classifier.buildClassifier(trainingSet);
    //System.out.println("... trained!");

    newds.setClassIndex(attrWithMissingIdx);
    for (int i = 0; i < newds.numInstances(); i++) {
        if (newds.instance(i).isMissing(attrWithMissingIdx)) {
            final Instance newrecord = new DenseInstance(newds.instance(i));
            newrecord.setDataset(newds);
            final double newval = this.classifier.classifyInstance(newrecord);
            newds.instance(i).setValue(attrWithMissingIdx, newval);
        }
    }

    //System.out.println("initial -> "+ds.toSummaryString());
    //System.out.println("corrected -> "+newds.toSummaryString());

    this.model = this.classifier.toString();

    return newds;
}

From source file:lu.lippmann.cdb.ext.hydviga.gaps.GapFillerRegressions.java

License:Open Source License

/**
 * {@inheritDoc}//from ww w .jav a2 s  . c  o m
 */
@Override
Instances fillGaps0(final Instances ds) throws Exception {
    final Instances newds = WekaDataProcessingUtil.buildDataSetWithoutConstantAttributes(ds);
    final int numInstances = newds.numInstances();

    final int attrWithMissingIdx = WekaDataStatsUtil.getFirstAttributeWithMissingValue(newds);
    if (attrWithMissingIdx == -1)
        throw new IllegalStateException();

    final Instances trainingSet = new Instances(newds, 0);
    for (int i = 0; i < numInstances; i++) {
        if (!newds.instance(i).hasMissingValue())
            trainingSet.add(newds.instance(i));
    }
    //System.out.println(trainingSet);

    final Regression reg = new Regression(trainingSet, attrWithMissingIdx);
    final double[] coeffs = reg.getCoe();
    //System.out.println(reg.getR2());
    //System.out.println(reg.getCoeDesc());

    for (int i = 0; i < numInstances; i++) {
        if (newds.instance(i).isMissing(attrWithMissingIdx)) {
            double newval = coeffs[0];
            for (int j = 1; j < trainingSet.numAttributes(); j++) {
                if (j == attrWithMissingIdx)
                    continue;

                final String attrName = trainingSet.attribute(j).name();

                //System.out.println(reg.getCoef(attrName)+" * "+attrName);

                newval += reg.getCoef(attrName) * newds.instance(i).value(newds.attribute(attrName));
            }
            //System.out.println("oldval -> "+newds.instance(i).value(attrWithMissingIdx));
            //System.out.println("newval -> "+newval);
            newds.instance(i).setValue(attrWithMissingIdx, newval);
        }
    }

    //System.out.println("corrected -> "+newds);

    this.model = reg.getCoeDesc();

    return newds;
}

From source file:lu.lippmann.cdb.lab.beta.shih.Shih2010.java

License:Open Source License

/**
 * //from  w ww. ja v a  2  s .  co m
 * @return
 */
public Instances getModifiedInstances() {

    //Copy attribute list (and change categorical by numerical)
    final ArrayList<Attribute> lAttrs = new ArrayList<Attribute>();
    for (int i = 0; i < instances.numAttributes(); i++) {
        Attribute attr = instances.attribute(i);
        if (attr.isNumeric() || attr.index() == instances.classIndex()) {
            lAttrs.add(attr);
        } else {
            Attribute newAttr = new Attribute(attr.name());
            lAttrs.add(newAttr);
        }
    }

    //Build new instance
    final Instances newInstances = new Instances("Shih instance", lAttrs, instances.numInstances());
    newInstances.setClassIndex(instances.classIndex());
    for (int i = 0; i < instances.numInstances(); i++) {
        final Instance instance = instances.instance(i);
        final Instance cpyInstance = (Instance) instance.copy();
        for (int j = 0; j < instance.numAttributes(); j++) {
            Attribute attribute = instance.attribute(j);
            int k = 0;
            if (attribute.index() == instances.classIndex()) {
                //The class index is nominal
                cpyInstance.setValue(attribute, instance.stringValue(j));
            } else if (!attribute.isNumeric()) {
                String elt = attribute.value((int) instance.value(j));
                cpyInstance.setValue(attribute, F.get(new TupleSI(elt, j)));
            } else {
                if (maxNum[k] > 1) {
                    cpyInstance.setValue(attribute, instance.value(j) / maxNum[k]);
                }
                k++;
            }
        }
        newInstances.add(cpyInstance);
    }

    if (ignoreClass && instances.classIndex() != -1) {
        newInstances.deleteAttributeAt(instances.classIndex());
    }
    return newInstances;
}

From source file:lu.lippmann.cdb.lab.mds.MDSResult.java

License:Open Source License

/**
 * //from   w  w  w.  j  a va  2  s  .  c  om
 * @return
 */
public Instances buildInstancesFromMatrix() {
    final int nbInstances = coordinates.numRows();
    final ArrayList<Attribute> attrs = new ArrayList<Attribute>();
    attrs.add(new Attribute("X", 0));
    attrs.add(new Attribute("Y", 1));
    final Instances ds = new Instances("Projection dataset", attrs, nbInstances);
    for (int i = 0; i < nbInstances; i++) {
        final Instance inst = new DenseInstance(1.0d,
                new double[] { coordinates.get(i, 0), coordinates.get(i, 1) });
        ds.add(inst);
    }
    return ds;
}

From source file:lu.lippmann.cdb.lab.mds.MDSResult.java

License:Open Source License

/**
 * //from   w  ww .  ja va  2  s  .co m
 * @return
 */
public Instances getCollapsedInstances() {
    if (cInstances.isCollapsed()) {
        final List<Instance> centroids = cInstances.getCentroidMap().getCentroids();
        final int nbCentroids = centroids.size();
        Instances collapsedInstances = new Instances(cInstances.getInstances(), 0);
        for (int i = 0; i < nbCentroids; i++) {
            collapsedInstances.add(centroids.get(i));
        }
        return collapsedInstances;
    } else {
        return cInstances.getInstances();
    }
}

From source file:lu.lippmann.cdb.lab.mds.MDSViewBuilder.java

License:Open Source License

/**
 * // ww  w.  j a va 2  s .c  om
 */
public static JXPanel buildMDSViewFromDataSet(final Instances instances, final MDSResult mdsResult,
        final int maxInstances, final Listener<Instances> listener, final String... attrNameToUseAsPointTitle)
        throws Exception {

    final XYSeriesCollection dataset = new XYSeriesCollection();

    final JFreeChart chart = ChartFactory.createScatterPlot("", // title 
            "X", "Y", // axis labels 
            dataset, // dataset 
            PlotOrientation.VERTICAL, attrNameToUseAsPointTitle.length == 0, // legend? 
            true, // tooltips? yes 
            false // URLs? no 
    );

    final XYPlot xyPlot = (XYPlot) chart.getPlot();

    xyPlot.setBackgroundPaint(Color.WHITE);
    xyPlot.getDomainAxis().setTickLabelsVisible(false);
    xyPlot.getRangeAxis().setTickLabelsVisible(false);

    //FIXME : should be different for Shih
    if (!mdsResult.isNormalized()) {
        String stress = FormatterUtil.DECIMAL_FORMAT
                .format(ClassicMDS.getKruskalStressFromMDSResult(mdsResult));
        chart.setTitle(mdsResult.getCInstances().isCollapsed()
                ? "Collapsed MDS(Instances=" + maxInstances + ",Stress=" + stress + ")"
                : "MDS(Stress=" + stress + ")");
    } else {
        chart.setTitle(mdsResult.getCInstances().isCollapsed() ? "Collapsed MDS(Instances=" + maxInstances + ")"
                : "MDS");
    }

    final SimpleMatrix coordinates = mdsResult.getCoordinates();
    buildFilteredSeries(mdsResult, xyPlot, attrNameToUseAsPointTitle);

    final ChartPanel chartPanel = new ChartPanel(chart);
    chartPanel.setMouseWheelEnabled(true);
    chartPanel.setPreferredSize(new Dimension(1200, 900));
    chartPanel.setBorder(new TitledBorder("MDS Projection"));
    chartPanel.setBackground(Color.WHITE);

    final JButton selectionButton = new JButton("Select data");
    selectionButton.addActionListener(new ActionListener() {

        @Override
        public void actionPerformed(ActionEvent e) {
            final org.jfree.data.Range XDomainRange = xyPlot.getDomainAxis().getRange();
            final org.jfree.data.Range YDomainRange = xyPlot.getRangeAxis().getRange();
            final Instances cInstances = mdsResult.getCollapsedInstances();
            final Instances selectedInstances = new Instances(cInstances, 0);
            List<Instances> clusters = null;
            if (mdsResult.getCInstances().isCollapsed()) {
                clusters = mdsResult.getCInstances().getCentroidMap().getClusters();
            }
            for (int i = 0; i < cInstances.numInstances(); i++) {
                final Instance centroid = instances.instance(i);
                if (XDomainRange.contains(coordinates.get(i, 0))
                        && YDomainRange.contains(coordinates.get(i, 1))) {
                    if (mdsResult.getCInstances().isCollapsed()) {
                        if (clusters != null) {
                            final Instances elementsOfCluster = clusters.get(i);
                            final int nbElements = elementsOfCluster.numInstances();
                            for (int k = 0; k < nbElements; k++) {
                                selectedInstances.add(elementsOfCluster.get(k));
                            }
                        }
                    } else {
                        selectedInstances.add(centroid);
                    }
                }
            }
            if (listener != null) {
                listener.onAction(selectedInstances);
            }
        }
    });

    final JXPanel allPanel = new JXPanel();
    allPanel.setLayout(new BorderLayout());
    allPanel.add(chartPanel, BorderLayout.CENTER);
    final JXPanel southPanel = new JXPanel();
    southPanel.add(selectionButton);
    allPanel.add(southPanel, BorderLayout.SOUTH);
    return allPanel;
}

From source file:LVCoref.WekaWrapper.java

License:Open Source License

public static Instances toArff(Collection<Document> docs) throws Exception {
    FastVector atts;/*from   w w w.ja  v a2  s.  c  o  m*/
    Instances data;
    double[] vals;
    FastVector attVals;

    atts = new FastVector();

    FastVector true_false = new FastVector();
    true_false.addElement("true");
    true_false.addElement("false");

    atts.addElement(new Attribute("coreference", true_false));
    atts.addElement(new Attribute("same_head", true_false));
    atts.addElement(new Attribute("distance"));
    atts.addElement(new Attribute("exact_match", true_false));
    atts.addElement(new Attribute("i_pronoun", true_false));
    atts.addElement(new Attribute("j_pronoun", true_false));
    atts.addElement(new Attribute("i_proper", true_false));

    data = new Instances("Coreferences", atts, 0);

    for (Document d : docs) {
        for (Mention m : d.mentions) {
            Mention ant = m.prev();
            while (ant != null) {
                if (ant.corefClusterID == m.corefClusterID || m.sentNum - ant.sentNum < 10) {
                    vals = new double[data.numAttributes()];
                    vals[0] = true_false.indexOf(Boolean.toString(
                            ant.node.goldMention.goldCorefClusterID == m.node.goldMention.goldCorefClusterID));
                    vals[1] = true_false.indexOf(Boolean.toString(Filter.sameHead(ant, m)));
                    vals[2] = m.node.id - ant.node.id;
                    vals[3] = true_false.indexOf(Boolean.toString(Filter.exactMatch(ant, m)));
                    vals[4] = true_false.indexOf(Boolean.toString(Filter.pronominal(m)));
                    vals[5] = true_false.indexOf(Boolean.toString(Filter.pronominal(ant)));
                    vals[6] = true_false.indexOf(Boolean.toString(Filter.proper(m)));
                    data.add(new Instance(1.0, vals));
                }
                ant = ant.prevGold();
            }
        }
    }
    return data;
}

From source file:LVCoref.WekaWrapper.java

License:Open Source License

public static Instances toArff2(Collection<Document> docs) throws Exception {
    FastVector atts;//from  ww w .ja  v a2  s  .co  m
    Instances data;
    double[] vals;
    FastVector attVals;

    atts = new FastVector();

    FeatureFactory ff = new FeatureFactory();
    ff.init(new Flags());

    FastVector true_false = new FastVector();
    true_false.addElement("true");
    true_false.addElement("false");

    ff.features(null, null);
    for (int i = 0; i < ff.types.size(); i++) {
        switch (ff.types.get(i)) {
        case BOOL:
            atts.addElement(new Attribute(ff.featNames.get(i), true_false));
            break;
        default:
            atts.addElement(new Attribute(ff.featNames.get(i)));
        }
    }
    atts.addElement(new Attribute("coreferent", true_false));

    //System.out.println(ff.types);
    //System.out.println(ff.featNames);

    data = new Instances("Coreferences", atts, 0);
    int c = 0;
    for (Document d : docs) {
        d.useGoldClusters();

        for (Mention m : d.mentions) {
            Mention ant = m.prev();
            int cc = 0;
            while (ant != null) {
                cc++;
                if (ant.corefClusterID == m.corefClusterID || cc++ < 5 && m.sentNum - ant.sentNum < 10) {

                    List<String> features = ff.features(m, ant);
                    //System.out.println(features);
                    vals = new double[features.size() + 1];
                    int idx = 0;
                    for (String fs : features) {
                        vals[idx++] = true_false.indexOf(fs);

                        //System.out.println(fs)
                    }
                    vals[vals.length - 1] = true_false.indexOf(Boolean.toString(
                            ant.node.goldMention.goldCorefClusterID == m.node.goldMention.goldCorefClusterID));
                    data.add(new Instance(1.0, vals));
                    c++;
                    //if (ant.corefClusterID == m.corefClusterID) break;
                }
                ant = ant.prevGold();
            }
        }
    }

    System.out.println(c);

    return data;
}