Example usage for weka.core Instances Instances

List of usage examples for weka.core Instances Instances

Introduction

In this page you can find the example usage for weka.core Instances Instances.

Prototype

public Instances(String name, ArrayList<Attribute> attInfo, int capacity) 

Source Link

Document

Creates an empty set of instances.

Usage

From source file:adams.ml.data.InstancesView.java

License:Open Source License

/**
 * Returns a dummy dataset./*www.  j  a  va  2  s  .  co  m*/
 *
 * @return      the dataset
 */
protected static Instances createDummy() {
    ArrayList<Attribute> atts;

    atts = new ArrayList<>();
    atts.add(new Attribute("dummy"));

    return new Instances("dummy", atts, 0);
}

From source file:aprendizadodemaquina.Featurizador.java

License:Open Source License

/**
 * Gera o objeto Instances - cabealho dos dados a serem utilizados pelo
 * classificador, guarda os atributos das instncias e seus tipos
 * //  ww w  . j a v  a 2 s.  c o  m
 * @return Objeto Instances que  armazenado na classe Classificador
 */
public Instances geraInstances() {

    FastVector atributos = new FastVector();

    for (Feature f : features)
        if (f.tipo().equals("NUMERICO")) {
            if (f.quantosValores() == 1)
                atributos.addElement(new Attribute(f.nome()));
            else
                for (int i = 0; i < f.quantosValores(); ++i)
                    atributos.addElement(new Attribute(f.nome() + (i + 1)));
        } else if (f.tipo().equals("NOMINAL")) {
            FastVector valoresPossiveis = new FastVector();
            for (String valor : f.valoresPossivis())
                valoresPossiveis.addElement(valor);
            if (f.quantosValores() == 1)
                atributos.addElement(new Attribute(f.nome(), valoresPossiveis));
            else
                for (int i = 0; i < f.quantosValores(); ++i)
                    atributos.addElement(new Attribute(f.nome() + (i + 1), valoresPossiveis));
        }

    // Qual relao 
    FastVector valoresPossiveis = new FastVector();
    for (String s : tiposRelacoes)
        valoresPossiveis.addElement(s);
    atributos.addElement(new Attribute("relacao", valoresPossiveis));

    return (new Instances("dados_de_treinamento", atributos, 0));

}

From source file:ARFF.ARFFParser.java

public ARFFParser(Object[] params) {
    this.atts = new FastVector();
    this.params = new ArrayList<ARFFParsable>();
    for (Object param : params) {
        ARFFParsable tmpParam = (ARFFParsable) (param);
        this.params.add(tmpParam);
        switch (tmpParam.getARFFParamType()) {
        case INTEGER:
        case REAL:
            atts.addElement(new Attribute(tmpParam.getARFFParamName()));
            break;
        case NOMINAL:

            atts.addElement(new Attribute(tmpParam.getARFFParamName(), tmpParam.getARFFParamNominalValues()));
            break;
        case STRING:
            atts.addElement(new Attribute(tmpParam.getARFFParamName(), (FastVector) null));
            break;
        }// ww w  . j av a 2s . com

    }
    data = new Instances(relationName, atts, 0);
}

From source file:arffcreator.arffFrame.java

private void createActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_createActionPerformed
    // TODO add your handling code here:

    FastVector atts;//from   www  . j  av a  2s.  c om
    FastVector attsRel;
    FastVector attVals;
    FastVector attValsRel;
    Instances data;
    Instances dataRel;
    double[] vals;
    double[] valsRel;
    int i;

    // 1. set up attributes
    atts = new FastVector();
    // - numeric
    atts.addElement(new Attribute("att1"));
    // - nominal
    attVals = new FastVector();
    for (i = 0; i < 5; i++)
        attVals.addElement("val" + (i + 1));
    atts.addElement(new Attribute("att2", attVals));
    // - string
    atts.addElement(new Attribute("att3", (FastVector) null));
    // - date
    atts.addElement(new Attribute("att4", "yyyy-MM-dd"));
    // - relational
    attsRel = new FastVector();
    // -- numeric
    attsRel.addElement(new Attribute("att5.1"));
    // -- nominal
    attValsRel = new FastVector();
    for (i = 0; i < 5; i++)
        attValsRel.addElement("val5." + (i + 1));
    attsRel.addElement(new Attribute("att5.2", attValsRel));
    dataRel = new Instances("att5", attsRel, 0);
    atts.addElement(new Attribute("att5", dataRel, 0));

    // 2. create Instances object
    data = new Instances("MyRelation", atts, 0);

    // 3. fill with data
    // first instance
    vals = new double[data.numAttributes()];
    // - numeric
    vals[0] = Math.PI;
    // - nominal
    vals[1] = attVals.indexOf("val3");
    // - string
    vals[2] = data.attribute(2).addStringValue("This is a string!");
    try {
        // - date
        vals[3] = data.attribute(3).parseDate("2015-07-30");
    } catch (ParseException ex) {
        Logger.getLogger(arffFrame.class.getName()).log(Level.SEVERE, null, ex);
    }
    // - relational
    dataRel = new Instances(data.attribute(4).relation(), 0);
    // -- first instance
    valsRel = new double[2];
    valsRel[0] = Math.PI + 1;
    valsRel[1] = attValsRel.indexOf("val5.3");
    dataRel.add(new Instance(1.0, valsRel));
    // -- second instance
    valsRel = new double[2];
    valsRel[0] = Math.PI + 2;
    valsRel[1] = attValsRel.indexOf("val5.2");
    dataRel.add(new Instance(1.0, valsRel));
    vals[4] = data.attribute(4).addRelation(dataRel);
    // add
    data.add(new Instance(1.0, vals));

    // second instance
    vals = new double[data.numAttributes()]; // important: needs NEW array!
    // - numeric
    vals[0] = Math.E;
    // - nominal
    vals[1] = attVals.indexOf("val1");
    // - string
    vals[2] = data.attribute(2).addStringValue("And another one!");
    try {
        // - date
        vals[3] = data.attribute(3).parseDate("2015-07-30");
    } catch (ParseException ex) {
        Logger.getLogger(arffFrame.class.getName()).log(Level.SEVERE, null, ex);
    }
    // - relational
    dataRel = new Instances(data.attribute(4).relation(), 0);
    // -- first instance
    valsRel = new double[2];
    valsRel[0] = Math.E + 1;
    valsRel[1] = attValsRel.indexOf("val5.4");
    dataRel.add(new Instance(1.0, valsRel));
    // -- second instance
    valsRel = new double[2];
    valsRel[0] = Math.E + 2;
    valsRel[1] = attValsRel.indexOf("val5.1");
    dataRel.add(new Instance(1.0, valsRel));
    vals[4] = data.attribute(4).addRelation(dataRel);
    // add
    data.add(new Instance(1.0, vals));

    // 4. output data

    textArea.append(data.toString());

    dataset = data.toString();

}

From source file:arffGenerator.TextDirectoryToArff.java

License:Open Source License

@Override
public Instances createDatasetSupervised(String filePath) throws Exception {

    File dir = new File(filePath);
    File fileAux;//  ww w  . j a v a2s. c om
    String[] files = dir.list();
    FastVector classValues = new FastVector(files.length);
    for (int i = 0; i < files.length; i++) {
        classValues.addElement(files[i]);
    }
    FastVector atts = new FastVector(2);
    atts.addElement(new Attribute("contents", (FastVector) null));
    atts.addElement(new Attribute("class", classValues));
    Instances data = new Instances("text_files_in_" + filePath, atts, 0);
    for (int i = 0; i < files.length; i++) {
        fileAux = new File(filePath + "/" + files[i]);
        if (fileAux.isDirectory()) {
            cargarAtrribDeClase(files[i], filePath + File.separator + files[i], data);
        }
    }
    return data;
}

From source file:arffGenerator.TextDirectoryToArff.java

License:Open Source License

@Override
public Instances createDatasetUnsupervised(String filePath) throws Exception {
    FastVector atts = new FastVector(2);
    atts.addElement(new Attribute("contents", (FastVector) null));
    FastVector classValues = new FastVector(1);
    classValues.addElement("");
    atts.addElement(new Attribute("class", classValues));
    Instances data = new Instances("text_files_in_" + filePath, atts, 0);
    cargarAtrribDeClase(null, filePath, data);
    return data;/* www . j ava2  s.  com*/
}

From source file:assign00.ExperimentShell.java

/**
 * @param args the command line arguments
 *///from  w  ww  .ja  va 2s .  c  o  m
public static void main(String[] args) throws Exception {
    DataSource source = new DataSource(file);
    Instances dataSet = source.getDataSet();

    //Set up data
    dataSet.setClassIndex(dataSet.numAttributes() - 1);
    dataSet.randomize(new Random(1));

    //determine sizes
    int trainingSize = (int) Math.round(dataSet.numInstances() * .7);
    int testSize = dataSet.numInstances() - trainingSize;

    Instances training = new Instances(dataSet, 0, trainingSize);

    Instances test = new Instances(dataSet, trainingSize, testSize);

    Standardize standardizedData = new Standardize();
    standardizedData.setInputFormat(training);

    Instances newTest = Filter.useFilter(test, standardizedData);
    Instances newTraining = Filter.useFilter(training, standardizedData);

    NeuralNetworkClassifier NWC = new NeuralNetworkClassifier();
    NWC.buildClassifier(newTraining);

    Evaluation eval = new Evaluation(newTraining);
    eval.evaluateModel(NWC, newTest);

    System.out.println(eval.toSummaryString("\nResults\n======\n", false));
}

From source file:at.ac.tuwien.ifs.myluceneanalyzers.fa.algorithm.PersianDictionaryCountCompoundWord.java

/**
* Creates a new {@link PersianDictionaryCountCompoundWord}. Unlike {@link DictionaryCompoundWordTokenFilter} it considers
* onlyLongestMatch to be true and it will only return subwords of maximal size. <br/>
* Example: "moonlight" will be returned as "moonlight" only if it is in the dictionary (not as "moonlight, light" as 
* the DictionaryCompoundWordTokenFilter with onlyLongestMatch=true would.
* 
* @param input/*from   w  w w .  ja va  2  s.  c om*/
*          the {@link TokenStream} to process
* @param dictionary
*          the word dictionary to match against.
* @param minWordSize
*          only words longer than this get processed
* @param minSubwordSize
*          only subwords longer than this get to the output stream
* @param maxSubwordSize
*          only subwords shorter than this get to the output stream
 * @throws Exception 
*/
public PersianDictionaryCountCompoundWord(CharArraySet dictionary, Map<String, Double> mapWordCount,
        int minWordSize, int minSubwordSize, int maxSubwordSize) throws Exception {
    if (dictionary == null) {
        throw new IllegalArgumentException("dictionary cannot be null");
    }
    this.dictionary = dictionary;
    this.mapWordCount = mapWordCount;
    this.minWordSize = minWordSize;
    this.minSubwordSize = minSubwordSize;
    this.maxSubwordSize = maxSubwordSize;

    // Create the attributes
    attributeHermmean = new Attribute("harmmean");

    // Declare the class attribute along with its values
    FastVector fvClassVal = new FastVector(2);
    fvClassVal.addElement("1");
    fvClassVal.addElement("0");
    Attribute classAttribute = new Attribute("iscorrect", fvClassVal);

    // Create list of instances with one element 
    FastVector fvWekaAttributes = new FastVector(2);
    fvWekaAttributes.addElement(attributeHermmean);
    fvWekaAttributes.addElement(classAttribute);

    instances = new Instances("Test relation", fvWekaAttributes, 1);
    instances.setClassIndex(1);

    cls = (Classifier) weka.core.SerializationHelper.read("content/adtree.model");

}

From source file:at.aictopic1.sentimentanalysis.machinelearning.impl.TwitterClassifer.java

public Integer classify(Tweet[] tweets) {
    // TEST//w  w w . j  a  v  a  2 s.c o  m

    // Generate two tweet examples
    Tweet exOne = new Tweet("This is good and fantastic");
    exOne.setPreprocessedText("This is good and fantastic");
    Tweet exTwo = new Tweet("Horribly, terribly bad and more");
    exTwo.setPreprocessedText("Horribly, terribly bad and more");
    Tweet exThree = new Tweet(
            "I want to update lj and read my friends list, but I\\'m groggy and sick and blargh.");
    exThree.setPreprocessedText(
            "I want to update lj and read my friends list, but I\\'m groggy and sick and blargh.");
    Tweet exFour = new Tweet("bad hate worst sick");
    exFour.setPreprocessedText("bad hate worst sick");
    tweets = new Tweet[] { exOne, exTwo, exThree, exFour };
    // TEST

    // Load model
    //        loadModel();
    // Convert Tweet to Instance type 
    // Get String Data
    // Create attributes for the Instances set
    Attribute twitter_id = new Attribute("twitter_id");
    //        Attribute body = new Attribute("body");

    FastVector classVal = new FastVector(2);
    classVal.addElement("pos");
    classVal.addElement("neg");

    Attribute class_attr = new Attribute("class_attr", classVal);

    // Add them to a list
    FastVector attrVector = new FastVector(3);
    //        attrVector.addElement(twitter_id);
    //        attrVector.addElement(new Attribute("body", (FastVector) null));
    //        attrVector.addElement(class_attr);

    // Get the number of tweets and then create predictSet
    int numTweets = tweets.length;
    Enumeration structAttrs = dataStructure.enumerateAttributes();

    //        ArrayList<Attribute> attrList = new ArrayList<Attribute>(dataStructure.numAttributes());
    while (structAttrs.hasMoreElements()) {
        attrVector.addElement((Attribute) structAttrs.nextElement());
    }
    Instances predictSet = new Instances("predictInstances", attrVector, numTweets);
    //        Instances predictSet = new Instances(dataStructure);
    predictSet.setClassIndex(2);

    // init prediction
    double prediction = -1;

    System.out.println("PredictSet matches source structure: " + predictSet.equalHeaders(dataStructure));

    System.out.println("PredSet struct: " + predictSet.attribute(0));
    System.out.println("PredSet struct: " + predictSet.attribute(1));
    System.out.println("PredSet struct: " + predictSet.attribute(2));
    // Array to return predictions 
    //double[] tweetsClassified = new double[2][numTweets];
    //List<Integer, Double> tweetsClass = new ArrayList<Integer, Double>(numTweets);
    for (int i = 0; i < numTweets; i++) {
        String content = (String) tweets[i].getPreprocessedText();

        System.out.println("Tweet content: " + content);

        //            attrList
        Instance tweetInstance = new Instance(predictSet.numAttributes());

        tweetInstance.setDataset(predictSet);

        tweetInstance.setValue(predictSet.attribute(0), i);
        tweetInstance.setValue(predictSet.attribute(1), content);
        tweetInstance.setClassMissing();

        predictSet.add(tweetInstance);

        try {
            // Apply string filter
            StringToWordVector filter = new StringToWordVector();

            filter.setInputFormat(predictSet);
            Instances filteredPredictSet = Filter.useFilter(predictSet, filter);

            // Apply model
            prediction = trainedModel.classifyInstance(filteredPredictSet.instance(i));
            filteredPredictSet.instance(i).setClassValue(prediction);
            System.out.println("Classification: " + filteredPredictSet.instance(i).toString());
            System.out.println("Prediction: " + prediction);

        } catch (Exception ex) {
            Logger.getLogger(TwitterClassifer.class.getName()).log(Level.SEVERE, null, ex);
        }
    }

    return 0;
}

From source file:at.tuflowgraphy.semanticapps.semdroid.DalvikBaseAnalyzer.java

License:Apache License

public Instances getWekaInstances() {
    Instances instances = null;/*  ww w .j ava  2s .c o  m*/
    List<DActivationPatternPackage> dActivationPatternPackages = mAnalysisChain.getFinalLayers().get(0)
            .getResultAnalysisPackage().getActivationPatternPackages();
    int counter = 0;
    for (DActivationPatternPackage dActivationPatternPackage : dActivationPatternPackages) {
        if (counter > 0) {
            throw new RuntimeException("More than one DActivationPatternPackage found!");
        }
        counter++;
        DActivationPattern activationPatternTemp = dActivationPatternPackage.getActivationPatterns().get(0);
        FastVector fvWekaAttributes = new FastVector(activationPatternTemp.getRawPattern().length);
        for (int j = 0; j < activationPatternTemp.getRawPattern().length; j++) {
            Attribute attribute = new Attribute(j + "");
            fvWekaAttributes.addElement(attribute);
        }

        Set<String> labelSet = getLabelSet(dActivationPatternPackage);

        FastVector classValues = new FastVector(labelSet.size());
        for (String label : labelSet) {
            classValues.addElement(label);
        }

        Attribute classAttribute = new Attribute("Class", classValues);
        fvWekaAttributes.addElement(classAttribute);

        instances = new Instances(mAnalysisConfig.getApplicationAnalysisName(), fvWekaAttributes,
                dActivationPatternPackage.getActivationPatterns().size());
        instances.setClassIndex(instances.numAttributes() - 1);

        for (int i = 0; i < dActivationPatternPackage.getActivationPatterns().size(); i++) {
            DActivationPattern activationPattern = dActivationPatternPackage.getActivationPatterns().get(i);
            Instance instance = new Instance(fvWekaAttributes.size());

            for (int j = 0; j < activationPattern.getRawPattern().length; j++) {
                instance.setValue((Attribute) fvWekaAttributes.elementAt(j),
                        activationPattern.getRawPattern()[j]);
            }

            instance.setDataset(instances);

            DSimpleStringMetaData metadata = (DSimpleStringMetaData) activationPattern.getMetaData();
            List<String> keys = metadata.getMetaDataKeys();
            for (int k = 0; k < keys.size(); k++) {
                if (keys.get(k).equals(DalvikInputPlugin.TAG_LABEL)) {
                    String label = metadata.getMetaDataEntries().get(k);
                    instance.setClassValue(label);
                    break;
                }
            }

            instances.add(instance);
        }

    }
    return instances;
}