Example usage for weka.core Instance setDataset

List of usage examples for weka.core Instance setDataset

Introduction

In this page you can find the example usage for weka.core Instance setDataset.

Prototype

public void setDataset(Instances instances);

Source Link

Document

Sets the reference to the dataset.

Usage

From source file:focusedCrawler.target.classifier.WekaTargetClassifier.java

License:Open Source License

public double[] distributionForInstance(String target) throws TargetClassifierException {
    double[] result = null;
    try {// w ww.  j ava2 s. co  m
        double[] values = getValues(new Page(null, target));
        weka.core.Instance instanceWeka = new weka.core.Instance(1, values);
        instanceWeka.setDataset(instances);
        result = classifier.distributionForInstance(instanceWeka);
    } catch (Exception ex) {
        ex.printStackTrace();
        throw new TargetClassifierException(ex.getMessage());
    }
    return result;
}

From source file:focusedCrawler.target.classifier.WekaTargetClassifier.java

License:Open Source License

public double[] distributionForInstance(Page page) throws TargetClassifierException {
    double[] result = null;
    try {//from   w  ww.  j  a va 2  s .  c o m
        double[] values = getValues(page);
        weka.core.Instance instanceWeka = new weka.core.Instance(1, values);
        instanceWeka.setDataset(instances);
        result = classifier.distributionForInstance(instanceWeka);
    } catch (Exception ex) {
        ex.printStackTrace();
        throw new TargetClassifierException(ex.getMessage());
    }
    return result;
}

From source file:focusedCrawler.target.TargetClassifierImpl.java

License:Open Source License

public boolean classify(Target target) throws TargetClassifierException {
    boolean relevant = false;
    try {//from   w  w  w.  java  2 s  .  com
        double[] values = getValues(target);
        weka.core.Instance instanceWeka = new weka.core.Instance(1, values);
        instanceWeka.setDataset(instances);
        double classificationResult = classifier.classifyInstance(instanceWeka);
        if (classificationResult == 0) {
            relevant = true;
        } else {
            relevant = false;
        }
    } catch (Exception ex) {
        ex.printStackTrace();
        throw new TargetClassifierException(ex.getMessage());
    }
    return relevant;
}

From source file:focusedCrawler.target.TargetClassifierImpl.java

License:Open Source License

public double[] distributionForInstance(Target target) throws TargetClassifierException {
    double[] result = null;
    try {//w  w w  .  j a v a2 s.com
        double[] values = getValues(target);
        weka.core.Instance instanceWeka = new weka.core.Instance(1, values);
        instanceWeka.setDataset(instances);
        result = classifier.distributionForInstance(instanceWeka);
    } catch (Exception ex) {
        ex.printStackTrace();
        throw new TargetClassifierException(ex.getMessage());
    }
    return result;
}

From source file:form.ml.ClassifierTemplate.java

/**
 * make the Instance weka object from a String
 *
 * @param text the String to be converted
 * @return Instance object// w ww. j av  a2 s  .co  m
 */
private Instance makeInstance(String text) {
    Instance instance = new Instance(2);
    Attribute attribute = train.attribute("text");
    instance.setValue(attribute, attribute.addStringValue(text));
    instance.setDataset(train);
    return instance;
}

From source file:fr.loria.synalp.jtrans.phonetiseur.Classifieurs.java

License:Open Source License

/**
 * => on peut passer null au parametre Postag si on ne veut pas remplir le champ
 *///from  ww  w  . j  ava 2s  . c o  m
public AlignementGraphemesPhonemes phonetiser(String[] tGraphemes, String Postag) throws Exception {
    ArrayList<String> alGraphemes = new ArrayList<String>();
    ArrayList<String> alPhonemes = new ArrayList<String>();

    String graphemeCourant, doublePhoneme1er;
    Instance instance;
    int indiceGraphemeCourant;

    for (int i = 0; i < tGraphemes.length; i++) {
        // Est-ce-que le grapheme i est un simple ou double phoneme ?
        instance = new Instance(11);
        instance.setDataset(instancesSimpleOuDoublePhoneme);
        setval(instance, 0, tGraphemes[i]);
        setval(instance, 1, (i >= 1) ? tGraphemes[i - 1] : Configuration.GRAPHEME_DEBUT_DE_MOT_VECTEUR);
        setval(instance, 2, (i >= 2) ? tGraphemes[i - 2] : Configuration.GRAPHEME_DEBUT_DE_MOT_VECTEUR);
        setval(instance, 3, (i >= 3) ? tGraphemes[i - 3] : Configuration.GRAPHEME_DEBUT_DE_MOT_VECTEUR);
        setval(instance, 4, (i >= 4) ? tGraphemes[i - 4] : Configuration.GRAPHEME_DEBUT_DE_MOT_VECTEUR);
        setval(instance, 5,
                (i < tGraphemes.length - 1) ? tGraphemes[i + 1] : Configuration.GRAPHEME_FIN_DE_MOT_VECTEUR);
        setval(instance, 6,
                (i < tGraphemes.length - 2) ? tGraphemes[i + 2] : Configuration.GRAPHEME_FIN_DE_MOT_VECTEUR);
        setval(instance, 7,
                (i < tGraphemes.length - 3) ? tGraphemes[i + 3] : Configuration.GRAPHEME_FIN_DE_MOT_VECTEUR);
        setval(instance, 8,
                (i < tGraphemes.length - 4) ? tGraphemes[i + 4] : Configuration.GRAPHEME_FIN_DE_MOT_VECTEUR);
        if (Postag != null) {
            setval(instance, 9, Postag);
        }
        instance = appliquerFiltreAUneInstance(filtreSimpleOuDoublePhoneme, instance,
                instancesSimpleOuDoublePhoneme);

        if (resultatClassifieur(instance, classifieurSimpleOuDoublePhoneme, instancesSimpleOuDoublePhoneme)
                .equals(Configuration.VALEUR_SORTIE_VECTEUR_SIMPLE_PHONEME)) {
            // Cas d'un simple phoneme
            graphemeCourant = tGraphemes[i];
            indiceGraphemeCourant = lexique.getIndiceFromGrapheme(graphemeCourant);
            if (indiceGraphemeCourant < 0) {
                System.err.println("unknown grapheme " + graphemeCourant);
                continue;
            }
            instance = new Instance(11);
            instance.setDataset(tInstancesSimplePhoneme[indiceGraphemeCourant]);
            setval(instance, 0, tGraphemes[i]);
            setval(instance, 1, (i >= 1) ? tGraphemes[i - 1] : Configuration.GRAPHEME_DEBUT_DE_MOT_VECTEUR);
            setval(instance, 2, (i >= 2) ? tGraphemes[i - 2] : Configuration.GRAPHEME_DEBUT_DE_MOT_VECTEUR);
            setval(instance, 3, (i >= 3) ? tGraphemes[i - 3] : Configuration.GRAPHEME_DEBUT_DE_MOT_VECTEUR);
            setval(instance, 4, (i >= 4) ? tGraphemes[i - 4] : Configuration.GRAPHEME_DEBUT_DE_MOT_VECTEUR);
            setval(instance, 5, (i < tGraphemes.length - 1) ? tGraphemes[i + 1]
                    : Configuration.GRAPHEME_FIN_DE_MOT_VECTEUR);
            setval(instance, 6, (i < tGraphemes.length - 2) ? tGraphemes[i + 2]
                    : Configuration.GRAPHEME_FIN_DE_MOT_VECTEUR);
            setval(instance, 7, (i < tGraphemes.length - 3) ? tGraphemes[i + 3]
                    : Configuration.GRAPHEME_FIN_DE_MOT_VECTEUR);
            setval(instance, 8, (i < tGraphemes.length - 4) ? tGraphemes[i + 4]
                    : Configuration.GRAPHEME_FIN_DE_MOT_VECTEUR);
            if (Postag != null) {
                setval(instance, 9, Postag);
            }
            instance = appliquerFiltreAUneInstance(filtreSimplePhoneme, instance,
                    tInstancesSimplePhoneme[indiceGraphemeCourant]);

            alGraphemes.add(graphemeCourant);
            alPhonemes.add(resultatClassifieur(instance, tClassifieurSimplePhoneme[indiceGraphemeCourant],
                    tInstancesSimplePhoneme[indiceGraphemeCourant]));
        } else {
            // Cas d'un double phoneme
            // Double phoneme 1
            instance = new Instance(11);
            instance.setDataset(instancesDoublePhoneme1er);
            setval(instance, 0, tGraphemes[i]);
            setval(instance, 1, (i >= 1) ? tGraphemes[i - 1] : Configuration.GRAPHEME_DEBUT_DE_MOT_VECTEUR);
            setval(instance, 2, (i >= 2) ? tGraphemes[i - 2] : Configuration.GRAPHEME_DEBUT_DE_MOT_VECTEUR);
            setval(instance, 3, (i >= 3) ? tGraphemes[i - 3] : Configuration.GRAPHEME_DEBUT_DE_MOT_VECTEUR);
            setval(instance, 4, (i >= 4) ? tGraphemes[i - 4] : Configuration.GRAPHEME_DEBUT_DE_MOT_VECTEUR);
            setval(instance, 5, (i < tGraphemes.length - 1) ? tGraphemes[i + 1]
                    : Configuration.GRAPHEME_FIN_DE_MOT_VECTEUR);
            setval(instance, 6, (i < tGraphemes.length - 2) ? tGraphemes[i + 2]
                    : Configuration.GRAPHEME_FIN_DE_MOT_VECTEUR);
            setval(instance, 7, (i < tGraphemes.length - 3) ? tGraphemes[i + 3]
                    : Configuration.GRAPHEME_FIN_DE_MOT_VECTEUR);
            setval(instance, 8, (i < tGraphemes.length - 4) ? tGraphemes[i + 4]
                    : Configuration.GRAPHEME_FIN_DE_MOT_VECTEUR);
            if (Postag != null) {
                setval(instance, 9, Postag);
            }
            instance = appliquerFiltreAUneInstance(filtreDoublePhoneme1er, instance, instancesDoublePhoneme1er);

            doublePhoneme1er = resultatClassifieur(instance, classifieurDoublePhoneme1er,
                    instancesDoublePhoneme1er);

            alGraphemes.add(tGraphemes[i]);
            alPhonemes.add(doublePhoneme1er);

            // Double phoneme 2
            instance = new Instance(12);
            instance.setDataset(instancesDoublePhoneme2eme);
            setval(instance, 0, tGraphemes[i]);
            setval(instance, 1, doublePhoneme1er);
            setval(instance, 2, (i >= 1) ? tGraphemes[i - 1] : Configuration.GRAPHEME_DEBUT_DE_MOT_VECTEUR);
            setval(instance, 3, (i >= 2) ? tGraphemes[i - 2] : Configuration.GRAPHEME_DEBUT_DE_MOT_VECTEUR);
            setval(instance, 4, (i >= 3) ? tGraphemes[i - 3] : Configuration.GRAPHEME_DEBUT_DE_MOT_VECTEUR);
            setval(instance, 5, (i >= 4) ? tGraphemes[i - 4] : Configuration.GRAPHEME_DEBUT_DE_MOT_VECTEUR);
            setval(instance, 6, (i < tGraphemes.length - 1) ? tGraphemes[i + 1]
                    : Configuration.GRAPHEME_FIN_DE_MOT_VECTEUR);
            setval(instance, 7, (i < tGraphemes.length - 2) ? tGraphemes[i + 2]
                    : Configuration.GRAPHEME_FIN_DE_MOT_VECTEUR);
            setval(instance, 8, (i < tGraphemes.length - 3) ? tGraphemes[i + 3]
                    : Configuration.GRAPHEME_FIN_DE_MOT_VECTEUR);
            setval(instance, 9, (i < tGraphemes.length - 4) ? tGraphemes[i + 4]
                    : Configuration.GRAPHEME_FIN_DE_MOT_VECTEUR);
            if (Postag != null) {
                setval(instance, 10, Postag);
            }
            instance = appliquerFiltreAUneInstance(filtreDoublePhoneme2eme, instance,
                    instancesDoublePhoneme2eme);

            alGraphemes.add(Configuration.STRING_DE_REMPLACEMENT_GRAPHEME_VIDE);
            alPhonemes.add(
                    resultatClassifieur(instance, classifieurDoublePhoneme2eme, instancesDoublePhoneme2eme));
        }
    }

    return new AlignementGraphemesPhonemes(alGraphemes, alPhonemes);
}

From source file:GClass.EvaluationInternal.java

License:Open Source License

/**
 * Prints the predictions for the given dataset into a String variable.
 *//*  w w  w .j  a v a  2 s .  c om*/
protected static String printClassifications(Classifier classifier, Instances train, String testFileName,
        int classIndex, Range attributesToOutput) throws Exception {

    StringBuffer text = new StringBuffer();
    if (testFileName.length() != 0) {
        BufferedReader testReader = null;
        try {
            testReader = new BufferedReader(new FileReader(testFileName));
        } catch (Exception e) {
            throw new Exception("Can't open file " + e.getMessage() + '.');
        }
        Instances test = new Instances(testReader, 1);
        if (classIndex != -1) {
            test.setClassIndex(classIndex - 1);
        } else {
            test.setClassIndex(test.numAttributes() - 1);
        }
        int i = 0;
        while (test.readInstance(testReader)) {
            Instance instance = test.instance(0);
            Instance withMissing = (Instance) instance.copy();
            withMissing.setDataset(test);
            double predValue = ((Classifier) classifier).classifyInstance(withMissing);
            if (test.classAttribute().isNumeric()) {
                if (Instance.isMissingValue(predValue)) {
                    text.append(i + " missing ");
                } else {
                    text.append(i + " " + predValue + " ");
                }
                if (instance.classIsMissing()) {
                    text.append("missing");
                } else {
                    text.append(instance.classValue());
                }
                text.append(" " + attributeValuesString(withMissing, attributesToOutput) + "\n");
            } else {
                if (Instance.isMissingValue(predValue)) {
                    text.append(i + " missing ");
                } else {
                    text.append(i + " " + test.classAttribute().value((int) predValue) + " ");
                }
                if (Instance.isMissingValue(predValue)) {
                    text.append("missing ");
                } else {
                    text.append(classifier.distributionForInstance(withMissing)[(int) predValue] + " ");
                }
                text.append(instance.toString(instance.classIndex()) + " "
                        + attributeValuesString(withMissing, attributesToOutput) + "\n");
            }
            test.delete(0);
            i++;
        }
        testReader.close();
    }
    return text.toString();
}

From source file:gnusmail.filters.FilterManager.java

License:Open Source License

/**
 * Extracts Attributes for a given message. A connection is opened and
 * closed for each mail, as the number of open folders is limited (and we
 * cannot predict it, since we are iterating over the mails
 * chronologically):w/*from w  ww  .  ja  v  a  2  s.c om*/
 * 
 */
public Instance makeInstance(Document document) {
    if (dataset == null) {
        Logger.getLogger(FilterManager.class.getName()).log(Level.SEVERE, "Dataset is null");
        return null;
    }
    Instance inst = new DenseInstance(dataset.numAttributes());
    inst.setDataset(dataset);
    for (Filter filter : filterList) {
        filter.updateInstance(inst, document);
    }

    return inst;
}

From source file:gr.ntua.ece.cslab.panic.core.models.AbstractWekaModel.java

License:Apache License

/**
 * Converts an input space point to a Weka instance.
 * @param point//  www  .  j a v a 2s  .co  m
 * @return 
 */
public static Instance convertPointToInstance(InputSpacePoint point, OutputSpacePoint outputPoint) {
    Instance inst = new Instance(point.numberDimensions() + outputPoint.numberDimensions());
    int index = 0;
    for (String k : point.getKeysAsCollection()) {
        Attribute att = new Attribute(k, index++);
        inst.setValue(att, point.getValue(k));
    }
    for (Entry<String, Double> e : outputPoint.getOutputPoints().entrySet()) {
        if (e.getValue() == null) {
            inst.setMissing(index++);
        } else {
            Attribute att = new Attribute(e.getKey(), index++);
            inst.setValue(att, e.getValue());
        }
    }

    //assign instance to dataset
    FastVector att = new FastVector(point.numberDimensions() + 1);
    for (String s : point.getKeysAsCollection())
        att.addElement(new Attribute(s, index++));
    for (String k : outputPoint.getOutputPoints().keySet()) {
        att.addElement(new Attribute(k, index++));
    }

    Instances dataset = new Instances("instances", att, point.numberDimensions() + 1);
    dataset.setClassIndex(dataset.numAttributes() - 1);
    inst.setDataset(dataset);
    return inst;
}

From source file:gr.ntua.ece.cslab.panic.core.models.AbstractWekaModel.java

License:Apache License

public static Instance convertPointToInstance(InputSpacePoint point) {
    Instance inst = new Instance(point.numberDimensions() + 1);
    int index = 0;
    for (String k : point.getKeysAsCollection()) {
        Attribute att = new Attribute(k, index++);
        inst.setValue(att, point.getValue(k));
    }/*from   www .j  av  a 2 s.co m*/
    inst.setMissing(index);

    //assign instance to dataset
    FastVector att = new FastVector(point.numberDimensions() + 1);
    for (String s : point.getKeysAsCollection())
        att.addElement(new Attribute(s, index++));
    att.addElement(new Attribute("objective", index++));

    Instances dataset = new Instances("instances", att, point.numberDimensions() + 1);
    dataset.setClassIndex(dataset.numAttributes() - 1);
    inst.setDataset(dataset);
    return inst;
}