List of usage examples for weka.core Instance setDataset
public void setDataset(Instances instances);
From source file:focusedCrawler.target.classifier.WekaTargetClassifier.java
License:Open Source License
public double[] distributionForInstance(String target) throws TargetClassifierException { double[] result = null; try {// w ww. j ava2 s. co m double[] values = getValues(new Page(null, target)); weka.core.Instance instanceWeka = new weka.core.Instance(1, values); instanceWeka.setDataset(instances); result = classifier.distributionForInstance(instanceWeka); } catch (Exception ex) { ex.printStackTrace(); throw new TargetClassifierException(ex.getMessage()); } return result; }
From source file:focusedCrawler.target.classifier.WekaTargetClassifier.java
License:Open Source License
public double[] distributionForInstance(Page page) throws TargetClassifierException { double[] result = null; try {//from w ww. j a va 2 s . c o m double[] values = getValues(page); weka.core.Instance instanceWeka = new weka.core.Instance(1, values); instanceWeka.setDataset(instances); result = classifier.distributionForInstance(instanceWeka); } catch (Exception ex) { ex.printStackTrace(); throw new TargetClassifierException(ex.getMessage()); } return result; }
From source file:focusedCrawler.target.TargetClassifierImpl.java
License:Open Source License
public boolean classify(Target target) throws TargetClassifierException { boolean relevant = false; try {//from w w w. java 2 s . com double[] values = getValues(target); weka.core.Instance instanceWeka = new weka.core.Instance(1, values); instanceWeka.setDataset(instances); double classificationResult = classifier.classifyInstance(instanceWeka); if (classificationResult == 0) { relevant = true; } else { relevant = false; } } catch (Exception ex) { ex.printStackTrace(); throw new TargetClassifierException(ex.getMessage()); } return relevant; }
From source file:focusedCrawler.target.TargetClassifierImpl.java
License:Open Source License
public double[] distributionForInstance(Target target) throws TargetClassifierException { double[] result = null; try {//w w w . j a v a2 s.com double[] values = getValues(target); weka.core.Instance instanceWeka = new weka.core.Instance(1, values); instanceWeka.setDataset(instances); result = classifier.distributionForInstance(instanceWeka); } catch (Exception ex) { ex.printStackTrace(); throw new TargetClassifierException(ex.getMessage()); } return result; }
From source file:form.ml.ClassifierTemplate.java
/** * make the Instance weka object from a String * * @param text the String to be converted * @return Instance object// w ww. j av a2 s .co m */ private Instance makeInstance(String text) { Instance instance = new Instance(2); Attribute attribute = train.attribute("text"); instance.setValue(attribute, attribute.addStringValue(text)); instance.setDataset(train); return instance; }
From source file:fr.loria.synalp.jtrans.phonetiseur.Classifieurs.java
License:Open Source License
/** * => on peut passer null au parametre Postag si on ne veut pas remplir le champ *///from ww w . j ava 2s . c o m public AlignementGraphemesPhonemes phonetiser(String[] tGraphemes, String Postag) throws Exception { ArrayList<String> alGraphemes = new ArrayList<String>(); ArrayList<String> alPhonemes = new ArrayList<String>(); String graphemeCourant, doublePhoneme1er; Instance instance; int indiceGraphemeCourant; for (int i = 0; i < tGraphemes.length; i++) { // Est-ce-que le grapheme i est un simple ou double phoneme ? instance = new Instance(11); instance.setDataset(instancesSimpleOuDoublePhoneme); setval(instance, 0, tGraphemes[i]); setval(instance, 1, (i >= 1) ? tGraphemes[i - 1] : Configuration.GRAPHEME_DEBUT_DE_MOT_VECTEUR); setval(instance, 2, (i >= 2) ? tGraphemes[i - 2] : Configuration.GRAPHEME_DEBUT_DE_MOT_VECTEUR); setval(instance, 3, (i >= 3) ? tGraphemes[i - 3] : Configuration.GRAPHEME_DEBUT_DE_MOT_VECTEUR); setval(instance, 4, (i >= 4) ? tGraphemes[i - 4] : Configuration.GRAPHEME_DEBUT_DE_MOT_VECTEUR); setval(instance, 5, (i < tGraphemes.length - 1) ? tGraphemes[i + 1] : Configuration.GRAPHEME_FIN_DE_MOT_VECTEUR); setval(instance, 6, (i < tGraphemes.length - 2) ? tGraphemes[i + 2] : Configuration.GRAPHEME_FIN_DE_MOT_VECTEUR); setval(instance, 7, (i < tGraphemes.length - 3) ? tGraphemes[i + 3] : Configuration.GRAPHEME_FIN_DE_MOT_VECTEUR); setval(instance, 8, (i < tGraphemes.length - 4) ? tGraphemes[i + 4] : Configuration.GRAPHEME_FIN_DE_MOT_VECTEUR); if (Postag != null) { setval(instance, 9, Postag); } instance = appliquerFiltreAUneInstance(filtreSimpleOuDoublePhoneme, instance, instancesSimpleOuDoublePhoneme); if (resultatClassifieur(instance, classifieurSimpleOuDoublePhoneme, instancesSimpleOuDoublePhoneme) .equals(Configuration.VALEUR_SORTIE_VECTEUR_SIMPLE_PHONEME)) { // Cas d'un simple phoneme graphemeCourant = tGraphemes[i]; indiceGraphemeCourant = lexique.getIndiceFromGrapheme(graphemeCourant); if (indiceGraphemeCourant < 0) { System.err.println("unknown grapheme " + graphemeCourant); continue; } instance = new Instance(11); instance.setDataset(tInstancesSimplePhoneme[indiceGraphemeCourant]); setval(instance, 0, tGraphemes[i]); setval(instance, 1, (i >= 1) ? tGraphemes[i - 1] : Configuration.GRAPHEME_DEBUT_DE_MOT_VECTEUR); setval(instance, 2, (i >= 2) ? tGraphemes[i - 2] : Configuration.GRAPHEME_DEBUT_DE_MOT_VECTEUR); setval(instance, 3, (i >= 3) ? tGraphemes[i - 3] : Configuration.GRAPHEME_DEBUT_DE_MOT_VECTEUR); setval(instance, 4, (i >= 4) ? tGraphemes[i - 4] : Configuration.GRAPHEME_DEBUT_DE_MOT_VECTEUR); setval(instance, 5, (i < tGraphemes.length - 1) ? tGraphemes[i + 1] : Configuration.GRAPHEME_FIN_DE_MOT_VECTEUR); setval(instance, 6, (i < tGraphemes.length - 2) ? tGraphemes[i + 2] : Configuration.GRAPHEME_FIN_DE_MOT_VECTEUR); setval(instance, 7, (i < tGraphemes.length - 3) ? tGraphemes[i + 3] : Configuration.GRAPHEME_FIN_DE_MOT_VECTEUR); setval(instance, 8, (i < tGraphemes.length - 4) ? tGraphemes[i + 4] : Configuration.GRAPHEME_FIN_DE_MOT_VECTEUR); if (Postag != null) { setval(instance, 9, Postag); } instance = appliquerFiltreAUneInstance(filtreSimplePhoneme, instance, tInstancesSimplePhoneme[indiceGraphemeCourant]); alGraphemes.add(graphemeCourant); alPhonemes.add(resultatClassifieur(instance, tClassifieurSimplePhoneme[indiceGraphemeCourant], tInstancesSimplePhoneme[indiceGraphemeCourant])); } else { // Cas d'un double phoneme // Double phoneme 1 instance = new Instance(11); instance.setDataset(instancesDoublePhoneme1er); setval(instance, 0, tGraphemes[i]); setval(instance, 1, (i >= 1) ? tGraphemes[i - 1] : Configuration.GRAPHEME_DEBUT_DE_MOT_VECTEUR); setval(instance, 2, (i >= 2) ? tGraphemes[i - 2] : Configuration.GRAPHEME_DEBUT_DE_MOT_VECTEUR); setval(instance, 3, (i >= 3) ? tGraphemes[i - 3] : Configuration.GRAPHEME_DEBUT_DE_MOT_VECTEUR); setval(instance, 4, (i >= 4) ? tGraphemes[i - 4] : Configuration.GRAPHEME_DEBUT_DE_MOT_VECTEUR); setval(instance, 5, (i < tGraphemes.length - 1) ? tGraphemes[i + 1] : Configuration.GRAPHEME_FIN_DE_MOT_VECTEUR); setval(instance, 6, (i < tGraphemes.length - 2) ? tGraphemes[i + 2] : Configuration.GRAPHEME_FIN_DE_MOT_VECTEUR); setval(instance, 7, (i < tGraphemes.length - 3) ? tGraphemes[i + 3] : Configuration.GRAPHEME_FIN_DE_MOT_VECTEUR); setval(instance, 8, (i < tGraphemes.length - 4) ? tGraphemes[i + 4] : Configuration.GRAPHEME_FIN_DE_MOT_VECTEUR); if (Postag != null) { setval(instance, 9, Postag); } instance = appliquerFiltreAUneInstance(filtreDoublePhoneme1er, instance, instancesDoublePhoneme1er); doublePhoneme1er = resultatClassifieur(instance, classifieurDoublePhoneme1er, instancesDoublePhoneme1er); alGraphemes.add(tGraphemes[i]); alPhonemes.add(doublePhoneme1er); // Double phoneme 2 instance = new Instance(12); instance.setDataset(instancesDoublePhoneme2eme); setval(instance, 0, tGraphemes[i]); setval(instance, 1, doublePhoneme1er); setval(instance, 2, (i >= 1) ? tGraphemes[i - 1] : Configuration.GRAPHEME_DEBUT_DE_MOT_VECTEUR); setval(instance, 3, (i >= 2) ? tGraphemes[i - 2] : Configuration.GRAPHEME_DEBUT_DE_MOT_VECTEUR); setval(instance, 4, (i >= 3) ? tGraphemes[i - 3] : Configuration.GRAPHEME_DEBUT_DE_MOT_VECTEUR); setval(instance, 5, (i >= 4) ? tGraphemes[i - 4] : Configuration.GRAPHEME_DEBUT_DE_MOT_VECTEUR); setval(instance, 6, (i < tGraphemes.length - 1) ? tGraphemes[i + 1] : Configuration.GRAPHEME_FIN_DE_MOT_VECTEUR); setval(instance, 7, (i < tGraphemes.length - 2) ? tGraphemes[i + 2] : Configuration.GRAPHEME_FIN_DE_MOT_VECTEUR); setval(instance, 8, (i < tGraphemes.length - 3) ? tGraphemes[i + 3] : Configuration.GRAPHEME_FIN_DE_MOT_VECTEUR); setval(instance, 9, (i < tGraphemes.length - 4) ? tGraphemes[i + 4] : Configuration.GRAPHEME_FIN_DE_MOT_VECTEUR); if (Postag != null) { setval(instance, 10, Postag); } instance = appliquerFiltreAUneInstance(filtreDoublePhoneme2eme, instance, instancesDoublePhoneme2eme); alGraphemes.add(Configuration.STRING_DE_REMPLACEMENT_GRAPHEME_VIDE); alPhonemes.add( resultatClassifieur(instance, classifieurDoublePhoneme2eme, instancesDoublePhoneme2eme)); } } return new AlignementGraphemesPhonemes(alGraphemes, alPhonemes); }
From source file:GClass.EvaluationInternal.java
License:Open Source License
/** * Prints the predictions for the given dataset into a String variable. *//* w w w .j a v a 2 s . c om*/ protected static String printClassifications(Classifier classifier, Instances train, String testFileName, int classIndex, Range attributesToOutput) throws Exception { StringBuffer text = new StringBuffer(); if (testFileName.length() != 0) { BufferedReader testReader = null; try { testReader = new BufferedReader(new FileReader(testFileName)); } catch (Exception e) { throw new Exception("Can't open file " + e.getMessage() + '.'); } Instances test = new Instances(testReader, 1); if (classIndex != -1) { test.setClassIndex(classIndex - 1); } else { test.setClassIndex(test.numAttributes() - 1); } int i = 0; while (test.readInstance(testReader)) { Instance instance = test.instance(0); Instance withMissing = (Instance) instance.copy(); withMissing.setDataset(test); double predValue = ((Classifier) classifier).classifyInstance(withMissing); if (test.classAttribute().isNumeric()) { if (Instance.isMissingValue(predValue)) { text.append(i + " missing "); } else { text.append(i + " " + predValue + " "); } if (instance.classIsMissing()) { text.append("missing"); } else { text.append(instance.classValue()); } text.append(" " + attributeValuesString(withMissing, attributesToOutput) + "\n"); } else { if (Instance.isMissingValue(predValue)) { text.append(i + " missing "); } else { text.append(i + " " + test.classAttribute().value((int) predValue) + " "); } if (Instance.isMissingValue(predValue)) { text.append("missing "); } else { text.append(classifier.distributionForInstance(withMissing)[(int) predValue] + " "); } text.append(instance.toString(instance.classIndex()) + " " + attributeValuesString(withMissing, attributesToOutput) + "\n"); } test.delete(0); i++; } testReader.close(); } return text.toString(); }
From source file:gnusmail.filters.FilterManager.java
License:Open Source License
/** * Extracts Attributes for a given message. A connection is opened and * closed for each mail, as the number of open folders is limited (and we * cannot predict it, since we are iterating over the mails * chronologically):w/*from w ww . ja v a 2 s.c om*/ * */ public Instance makeInstance(Document document) { if (dataset == null) { Logger.getLogger(FilterManager.class.getName()).log(Level.SEVERE, "Dataset is null"); return null; } Instance inst = new DenseInstance(dataset.numAttributes()); inst.setDataset(dataset); for (Filter filter : filterList) { filter.updateInstance(inst, document); } return inst; }
From source file:gr.ntua.ece.cslab.panic.core.models.AbstractWekaModel.java
License:Apache License
/** * Converts an input space point to a Weka instance. * @param point// www . j a v a 2s .co m * @return */ public static Instance convertPointToInstance(InputSpacePoint point, OutputSpacePoint outputPoint) { Instance inst = new Instance(point.numberDimensions() + outputPoint.numberDimensions()); int index = 0; for (String k : point.getKeysAsCollection()) { Attribute att = new Attribute(k, index++); inst.setValue(att, point.getValue(k)); } for (Entry<String, Double> e : outputPoint.getOutputPoints().entrySet()) { if (e.getValue() == null) { inst.setMissing(index++); } else { Attribute att = new Attribute(e.getKey(), index++); inst.setValue(att, e.getValue()); } } //assign instance to dataset FastVector att = new FastVector(point.numberDimensions() + 1); for (String s : point.getKeysAsCollection()) att.addElement(new Attribute(s, index++)); for (String k : outputPoint.getOutputPoints().keySet()) { att.addElement(new Attribute(k, index++)); } Instances dataset = new Instances("instances", att, point.numberDimensions() + 1); dataset.setClassIndex(dataset.numAttributes() - 1); inst.setDataset(dataset); return inst; }
From source file:gr.ntua.ece.cslab.panic.core.models.AbstractWekaModel.java
License:Apache License
public static Instance convertPointToInstance(InputSpacePoint point) { Instance inst = new Instance(point.numberDimensions() + 1); int index = 0; for (String k : point.getKeysAsCollection()) { Attribute att = new Attribute(k, index++); inst.setValue(att, point.getValue(k)); }/*from www .j av a 2 s.co m*/ inst.setMissing(index); //assign instance to dataset FastVector att = new FastVector(point.numberDimensions() + 1); for (String s : point.getKeysAsCollection()) att.addElement(new Attribute(s, index++)); att.addElement(new Attribute("objective", index++)); Instances dataset = new Instances("instances", att, point.numberDimensions() + 1); dataset.setClassIndex(dataset.numAttributes() - 1); inst.setDataset(dataset); return inst; }