List of usage examples for weka.core Instances add
@Override public boolean add(Instance instance)
From source file:lu.lippmann.cdb.ext.hydviga.cbr.GapFillingKnowledgeDBAnalyzer.java
License:Open Source License
/** * Main method./*www . j a v a 2 s . c o m*/ * @param args command line arguments */ public static void main(final String[] args) { try { HydroRunner.init(false); Instances newkdb = new Instances(GapFillingKnowledgeDB.getKnowledgeDB()); System.out.println("Considered fictive gaps -> " + getCountOfFictiveGaps(newkdb)); System.out.println(newkdb.toSummaryString()); newkdb = WekaDataProcessingUtil.filterDataSetOnNominalValue(newkdb, newkdb.attribute("useDownstream").index(), "false"); newkdb = WekaDataProcessingUtil.filterDataSetOnNominalValue(newkdb, newkdb.attribute("useUpstream").index(), "false"); //newkdb=WekaDataProcessingUtil.filterDataSetOnNominalValue(newkdb,newkdb.attribute("useNearest").index(),"false"); //newkdb=WekaDataProcessingUtil.filterDataSetOnNominalValue(newkdb,newkdb.attribute("useMostSimilar").index(),"false"); //System.out.println(newkdb.toSummaryString()); Instances withGoodNashSutcliffe = new Instances(newkdb, 0); for (int i = 0; i < newkdb.numInstances(); i++) { if (newkdb.instance(i).value(newkdb.attribute("NashSutcliffe").index()) > 0.5d) { withGoodNashSutcliffe.add(new DenseInstance(1d, newkdb.instance(i).toDoubleArray())); } } System.out.println(withGoodNashSutcliffe.numInstances() + " / " + newkdb.numInstances()); final double perc = (double) getCountOfFictiveGaps(withGoodNashSutcliffe) / getCountOfFictiveGaps(newkdb); System.out.println("Fictive gaps that are infilled with a good Nash-Sutcliffe -> " + getCountOfFictiveGaps(withGoodNashSutcliffe) + " (" + perc + "%)"); WekaDataAccessUtil.saveInstancesIntoARFFFile(withGoodNashSutcliffe, new File("./withGoodNashSutcliffe.arff")); } catch (final Exception e) { e.printStackTrace(); } }
From source file:lu.lippmann.cdb.ext.hydviga.data.StationsDataProvider.java
License:Open Source License
private Instances getDataSetForMap(final Collection<String> sel, final Collection<String> usable) { final Instances ds = new Instances("ds", new ArrayList<Attribute>(), 0); ds.insertAttributeAt(new Attribute("name", new ArrayList<String>(this.coordinatesMap.keySet())), ds.numAttributes());//from www . ja va 2 s . c o m ds.insertAttributeAt(new Attribute("x"), ds.numAttributes()); ds.insertAttributeAt(new Attribute("y"), ds.numAttributes()); ds.insertAttributeAt( new Attribute("status", Arrays.asList(new String[] { SELECTED_STATUS, USABLE_STATUS, NOT_USABLE_STATUS })), ds.numAttributes()); ds.setClassIndex(ds.numAttributes() - 1); final Set<String> coordSelected = new HashSet<String>(); for (final String ssel : sel) { final String coordsKey = coordinatesMap.get(ssel)[0] + "-" + coordinatesMap.get(ssel)[1]; coordSelected.add(coordsKey); } final Set<String> coordUsable = new HashSet<String>(); for (final String uu : usable) { final String coordsKey = coordinatesMap.get(uu)[0] + "-" + coordinatesMap.get(uu)[1]; coordUsable.add(coordsKey); } final Set<String> coordAlreadyLoaded = new HashSet<String>(); for (final Map.Entry<String, double[]> entry : this.coordinatesMap.entrySet()) { final String coordsKey = entry.getValue()[0] + "-" + entry.getValue()[1]; if (coordAlreadyLoaded.contains(coordsKey)) continue; final Instance inst = new DenseInstance(1.0d, new double[] { 0d, 0d, 0d, 0d }); inst.setDataset(ds); inst.setValue(0, entry.getKey()); inst.setValue(1, entry.getValue()[0]); inst.setValue(2, entry.getValue()[1]); //System.out.println(sel+" "+entry.getKey()); inst.setValue(3, (coordSelected.contains(coordsKey)) ? SELECTED_STATUS : ((coordUsable.contains(coordsKey)) ? USABLE_STATUS : NOT_USABLE_STATUS)); ds.add(inst); coordAlreadyLoaded.add(coordsKey); } return ds; }
From source file:lu.lippmann.cdb.ext.hydviga.gaps.GapFillerClassifier.java
License:Open Source License
/** * {@inheritDoc}/*from w w w. j av a 2 s .co m*/ */ @Override Instances fillGaps0(final Instances ds) throws Exception { final Instances newds = WekaDataProcessingUtil.buildDataSetWithoutConstantAttributes(ds); final int attrWithMissingIdx = WekaDataStatsUtil.getFirstAttributeWithMissingValue(newds); if (attrWithMissingIdx == -1) throw new IllegalStateException(); final Instances trainingSet = new Instances(newds, 0); for (int i = 0; i < newds.numInstances(); i++) { if (!newds.instance(i).hasMissingValue()) trainingSet.add(newds.instance(i)); } //System.out.println(trainingSet); trainingSet.setClassIndex(attrWithMissingIdx); //System.out.println("Training (size="+trainingSet.numInstances()+") ..."); this.classifier.buildClassifier(trainingSet); //System.out.println("... trained!"); newds.setClassIndex(attrWithMissingIdx); for (int i = 0; i < newds.numInstances(); i++) { if (newds.instance(i).isMissing(attrWithMissingIdx)) { final Instance newrecord = new DenseInstance(newds.instance(i)); newrecord.setDataset(newds); final double newval = this.classifier.classifyInstance(newrecord); newds.instance(i).setValue(attrWithMissingIdx, newval); } } //System.out.println("initial -> "+ds.toSummaryString()); //System.out.println("corrected -> "+newds.toSummaryString()); this.model = this.classifier.toString(); return newds; }
From source file:lu.lippmann.cdb.ext.hydviga.gaps.GapFillerRegressions.java
License:Open Source License
/** * {@inheritDoc}//from ww w .jav a2 s . c o m */ @Override Instances fillGaps0(final Instances ds) throws Exception { final Instances newds = WekaDataProcessingUtil.buildDataSetWithoutConstantAttributes(ds); final int numInstances = newds.numInstances(); final int attrWithMissingIdx = WekaDataStatsUtil.getFirstAttributeWithMissingValue(newds); if (attrWithMissingIdx == -1) throw new IllegalStateException(); final Instances trainingSet = new Instances(newds, 0); for (int i = 0; i < numInstances; i++) { if (!newds.instance(i).hasMissingValue()) trainingSet.add(newds.instance(i)); } //System.out.println(trainingSet); final Regression reg = new Regression(trainingSet, attrWithMissingIdx); final double[] coeffs = reg.getCoe(); //System.out.println(reg.getR2()); //System.out.println(reg.getCoeDesc()); for (int i = 0; i < numInstances; i++) { if (newds.instance(i).isMissing(attrWithMissingIdx)) { double newval = coeffs[0]; for (int j = 1; j < trainingSet.numAttributes(); j++) { if (j == attrWithMissingIdx) continue; final String attrName = trainingSet.attribute(j).name(); //System.out.println(reg.getCoef(attrName)+" * "+attrName); newval += reg.getCoef(attrName) * newds.instance(i).value(newds.attribute(attrName)); } //System.out.println("oldval -> "+newds.instance(i).value(attrWithMissingIdx)); //System.out.println("newval -> "+newval); newds.instance(i).setValue(attrWithMissingIdx, newval); } } //System.out.println("corrected -> "+newds); this.model = reg.getCoeDesc(); return newds; }
From source file:lu.lippmann.cdb.lab.beta.shih.Shih2010.java
License:Open Source License
/** * //from w ww. ja v a 2 s . co m * @return */ public Instances getModifiedInstances() { //Copy attribute list (and change categorical by numerical) final ArrayList<Attribute> lAttrs = new ArrayList<Attribute>(); for (int i = 0; i < instances.numAttributes(); i++) { Attribute attr = instances.attribute(i); if (attr.isNumeric() || attr.index() == instances.classIndex()) { lAttrs.add(attr); } else { Attribute newAttr = new Attribute(attr.name()); lAttrs.add(newAttr); } } //Build new instance final Instances newInstances = new Instances("Shih instance", lAttrs, instances.numInstances()); newInstances.setClassIndex(instances.classIndex()); for (int i = 0; i < instances.numInstances(); i++) { final Instance instance = instances.instance(i); final Instance cpyInstance = (Instance) instance.copy(); for (int j = 0; j < instance.numAttributes(); j++) { Attribute attribute = instance.attribute(j); int k = 0; if (attribute.index() == instances.classIndex()) { //The class index is nominal cpyInstance.setValue(attribute, instance.stringValue(j)); } else if (!attribute.isNumeric()) { String elt = attribute.value((int) instance.value(j)); cpyInstance.setValue(attribute, F.get(new TupleSI(elt, j))); } else { if (maxNum[k] > 1) { cpyInstance.setValue(attribute, instance.value(j) / maxNum[k]); } k++; } } newInstances.add(cpyInstance); } if (ignoreClass && instances.classIndex() != -1) { newInstances.deleteAttributeAt(instances.classIndex()); } return newInstances; }
From source file:lu.lippmann.cdb.lab.mds.MDSResult.java
License:Open Source License
/** * //from w w w. j a va 2 s . c om * @return */ public Instances buildInstancesFromMatrix() { final int nbInstances = coordinates.numRows(); final ArrayList<Attribute> attrs = new ArrayList<Attribute>(); attrs.add(new Attribute("X", 0)); attrs.add(new Attribute("Y", 1)); final Instances ds = new Instances("Projection dataset", attrs, nbInstances); for (int i = 0; i < nbInstances; i++) { final Instance inst = new DenseInstance(1.0d, new double[] { coordinates.get(i, 0), coordinates.get(i, 1) }); ds.add(inst); } return ds; }
From source file:lu.lippmann.cdb.lab.mds.MDSResult.java
License:Open Source License
/** * //from w ww . ja va 2 s .co m * @return */ public Instances getCollapsedInstances() { if (cInstances.isCollapsed()) { final List<Instance> centroids = cInstances.getCentroidMap().getCentroids(); final int nbCentroids = centroids.size(); Instances collapsedInstances = new Instances(cInstances.getInstances(), 0); for (int i = 0; i < nbCentroids; i++) { collapsedInstances.add(centroids.get(i)); } return collapsedInstances; } else { return cInstances.getInstances(); } }
From source file:lu.lippmann.cdb.lab.mds.MDSViewBuilder.java
License:Open Source License
/** * // ww w. j a va 2 s .c om */ public static JXPanel buildMDSViewFromDataSet(final Instances instances, final MDSResult mdsResult, final int maxInstances, final Listener<Instances> listener, final String... attrNameToUseAsPointTitle) throws Exception { final XYSeriesCollection dataset = new XYSeriesCollection(); final JFreeChart chart = ChartFactory.createScatterPlot("", // title "X", "Y", // axis labels dataset, // dataset PlotOrientation.VERTICAL, attrNameToUseAsPointTitle.length == 0, // legend? true, // tooltips? yes false // URLs? no ); final XYPlot xyPlot = (XYPlot) chart.getPlot(); xyPlot.setBackgroundPaint(Color.WHITE); xyPlot.getDomainAxis().setTickLabelsVisible(false); xyPlot.getRangeAxis().setTickLabelsVisible(false); //FIXME : should be different for Shih if (!mdsResult.isNormalized()) { String stress = FormatterUtil.DECIMAL_FORMAT .format(ClassicMDS.getKruskalStressFromMDSResult(mdsResult)); chart.setTitle(mdsResult.getCInstances().isCollapsed() ? "Collapsed MDS(Instances=" + maxInstances + ",Stress=" + stress + ")" : "MDS(Stress=" + stress + ")"); } else { chart.setTitle(mdsResult.getCInstances().isCollapsed() ? "Collapsed MDS(Instances=" + maxInstances + ")" : "MDS"); } final SimpleMatrix coordinates = mdsResult.getCoordinates(); buildFilteredSeries(mdsResult, xyPlot, attrNameToUseAsPointTitle); final ChartPanel chartPanel = new ChartPanel(chart); chartPanel.setMouseWheelEnabled(true); chartPanel.setPreferredSize(new Dimension(1200, 900)); chartPanel.setBorder(new TitledBorder("MDS Projection")); chartPanel.setBackground(Color.WHITE); final JButton selectionButton = new JButton("Select data"); selectionButton.addActionListener(new ActionListener() { @Override public void actionPerformed(ActionEvent e) { final org.jfree.data.Range XDomainRange = xyPlot.getDomainAxis().getRange(); final org.jfree.data.Range YDomainRange = xyPlot.getRangeAxis().getRange(); final Instances cInstances = mdsResult.getCollapsedInstances(); final Instances selectedInstances = new Instances(cInstances, 0); List<Instances> clusters = null; if (mdsResult.getCInstances().isCollapsed()) { clusters = mdsResult.getCInstances().getCentroidMap().getClusters(); } for (int i = 0; i < cInstances.numInstances(); i++) { final Instance centroid = instances.instance(i); if (XDomainRange.contains(coordinates.get(i, 0)) && YDomainRange.contains(coordinates.get(i, 1))) { if (mdsResult.getCInstances().isCollapsed()) { if (clusters != null) { final Instances elementsOfCluster = clusters.get(i); final int nbElements = elementsOfCluster.numInstances(); for (int k = 0; k < nbElements; k++) { selectedInstances.add(elementsOfCluster.get(k)); } } } else { selectedInstances.add(centroid); } } } if (listener != null) { listener.onAction(selectedInstances); } } }); final JXPanel allPanel = new JXPanel(); allPanel.setLayout(new BorderLayout()); allPanel.add(chartPanel, BorderLayout.CENTER); final JXPanel southPanel = new JXPanel(); southPanel.add(selectionButton); allPanel.add(southPanel, BorderLayout.SOUTH); return allPanel; }
From source file:LVCoref.WekaWrapper.java
License:Open Source License
public static Instances toArff(Collection<Document> docs) throws Exception { FastVector atts;/*from w w w.ja v a2 s. c o m*/ Instances data; double[] vals; FastVector attVals; atts = new FastVector(); FastVector true_false = new FastVector(); true_false.addElement("true"); true_false.addElement("false"); atts.addElement(new Attribute("coreference", true_false)); atts.addElement(new Attribute("same_head", true_false)); atts.addElement(new Attribute("distance")); atts.addElement(new Attribute("exact_match", true_false)); atts.addElement(new Attribute("i_pronoun", true_false)); atts.addElement(new Attribute("j_pronoun", true_false)); atts.addElement(new Attribute("i_proper", true_false)); data = new Instances("Coreferences", atts, 0); for (Document d : docs) { for (Mention m : d.mentions) { Mention ant = m.prev(); while (ant != null) { if (ant.corefClusterID == m.corefClusterID || m.sentNum - ant.sentNum < 10) { vals = new double[data.numAttributes()]; vals[0] = true_false.indexOf(Boolean.toString( ant.node.goldMention.goldCorefClusterID == m.node.goldMention.goldCorefClusterID)); vals[1] = true_false.indexOf(Boolean.toString(Filter.sameHead(ant, m))); vals[2] = m.node.id - ant.node.id; vals[3] = true_false.indexOf(Boolean.toString(Filter.exactMatch(ant, m))); vals[4] = true_false.indexOf(Boolean.toString(Filter.pronominal(m))); vals[5] = true_false.indexOf(Boolean.toString(Filter.pronominal(ant))); vals[6] = true_false.indexOf(Boolean.toString(Filter.proper(m))); data.add(new Instance(1.0, vals)); } ant = ant.prevGold(); } } } return data; }
From source file:LVCoref.WekaWrapper.java
License:Open Source License
public static Instances toArff2(Collection<Document> docs) throws Exception { FastVector atts;//from ww w .ja v a2 s .co m Instances data; double[] vals; FastVector attVals; atts = new FastVector(); FeatureFactory ff = new FeatureFactory(); ff.init(new Flags()); FastVector true_false = new FastVector(); true_false.addElement("true"); true_false.addElement("false"); ff.features(null, null); for (int i = 0; i < ff.types.size(); i++) { switch (ff.types.get(i)) { case BOOL: atts.addElement(new Attribute(ff.featNames.get(i), true_false)); break; default: atts.addElement(new Attribute(ff.featNames.get(i))); } } atts.addElement(new Attribute("coreferent", true_false)); //System.out.println(ff.types); //System.out.println(ff.featNames); data = new Instances("Coreferences", atts, 0); int c = 0; for (Document d : docs) { d.useGoldClusters(); for (Mention m : d.mentions) { Mention ant = m.prev(); int cc = 0; while (ant != null) { cc++; if (ant.corefClusterID == m.corefClusterID || cc++ < 5 && m.sentNum - ant.sentNum < 10) { List<String> features = ff.features(m, ant); //System.out.println(features); vals = new double[features.size() + 1]; int idx = 0; for (String fs : features) { vals[idx++] = true_false.indexOf(fs); //System.out.println(fs) } vals[vals.length - 1] = true_false.indexOf(Boolean.toString( ant.node.goldMention.goldCorefClusterID == m.node.goldMention.goldCorefClusterID)); data.add(new Instance(1.0, vals)); c++; //if (ant.corefClusterID == m.corefClusterID) break; } ant = ant.prevGold(); } } } System.out.println(c); return data; }