Example usage for weka.core Instances Instances

Introduction

In this page you can find the example usage for weka.core Instances Instances.

Prototype

public Instances(String name, ArrayList<Attribute> attInfo, int capacity)

Source Link

Document

Creates an empty set of instances.

Usage

From source file:adams.ml.data.InstancesView.java

License:Open Source License

/**
 * Returns a dummy dataset./*www.  j  a  va  2  s  .  co  m*/
 *
 * @return      the dataset
 */
protected static Instances createDummy() {
    ArrayList<Attribute> atts;

    atts = new ArrayList<>();
    atts.add(new Attribute("dummy"));

    return new Instances("dummy", atts, 0);
}

From source file:aprendizadodemaquina.Featurizador.java

License:Open Source License

/**
 * Gera o objeto Instances - cabealho dos dados a serem utilizados pelo
 * classificador, guarda os atributos das instncias e seus tipos
 * //  ww w  . j a v  a 2 s.  c o  m
 * @return Objeto Instances que  armazenado na classe Classificador
 */
public Instances geraInstances() {

    FastVector atributos = new FastVector();

    for (Feature f : features)
        if (f.tipo().equals("NUMERICO")) {
            if (f.quantosValores() == 1)
                atributos.addElement(new Attribute(f.nome()));
            else
                for (int i = 0; i < f.quantosValores(); ++i)
                    atributos.addElement(new Attribute(f.nome() + (i + 1)));
        } else if (f.tipo().equals("NOMINAL")) {
            FastVector valoresPossiveis = new FastVector();
            for (String valor : f.valoresPossivis())
                valoresPossiveis.addElement(valor);
            if (f.quantosValores() == 1)
                atributos.addElement(new Attribute(f.nome(), valoresPossiveis));
            else
                for (int i = 0; i < f.quantosValores(); ++i)
                    atributos.addElement(new Attribute(f.nome() + (i + 1), valoresPossiveis));
        }

    // Qual relao 
    FastVector valoresPossiveis = new FastVector();
    for (String s : tiposRelacoes)
        valoresPossiveis.addElement(s);
    atributos.addElement(new Attribute("relacao", valoresPossiveis));

    return (new Instances("dados_de_treinamento", atributos, 0));

}

From source file:ARFF.ARFFParser.java

public ARFFParser(Object[] params) {
    this.atts = new FastVector();
    this.params = new ArrayList<ARFFParsable>();
    for (Object param : params) {
        ARFFParsable tmpParam = (ARFFParsable) (param);
        this.params.add(tmpParam);
        switch (tmpParam.getARFFParamType()) {
        case INTEGER:
        case REAL:
            atts.addElement(new Attribute(tmpParam.getARFFParamName()));
            break;
        case NOMINAL:

            atts.addElement(new Attribute(tmpParam.getARFFParamName(), tmpParam.getARFFParamNominalValues()));
            break;
        case STRING:
            atts.addElement(new Attribute(tmpParam.getARFFParamName(), (FastVector) null));
            break;
        }// ww w  . j av a 2s . com

    }
    data = new Instances(relationName, atts, 0);
}

From source file:arffcreator.arffFrame.java

private void createActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_createActionPerformed
    // TODO add your handling code here:

    FastVector atts;//from   www  . j  av a  2s.  c om
    FastVector attsRel;
    FastVector attVals;
    FastVector attValsRel;
    Instances data;
    Instances dataRel;
    double[] vals;
    double[] valsRel;
    int i;

    // 1. set up attributes
    atts = new FastVector();
    // - numeric
    atts.addElement(new Attribute("att1"));
    // - nominal
    attVals = new FastVector();
    for (i = 0; i < 5; i++)
        attVals.addElement("val" + (i + 1));
    atts.addElement(new Attribute("att2", attVals));
    // - string
    atts.addElement(new Attribute("att3", (FastVector) null));
    // - date
    atts.addElement(new Attribute("att4", "yyyy-MM-dd"));
    // - relational
    attsRel = new FastVector();
    // -- numeric
    attsRel.addElement(new Attribute("att5.1"));
    // -- nominal
    attValsRel = new FastVector();
    for (i = 0; i < 5; i++)
        attValsRel.addElement("val5." + (i + 1));
    attsRel.addElement(new Attribute("att5.2", attValsRel));
    dataRel = new Instances("att5", attsRel, 0);
    atts.addElement(new Attribute("att5", dataRel, 0));

    // 2. create Instances object
    data = new Instances("MyRelation", atts, 0);

    // 3. fill with data
    // first instance
    vals = new double[data.numAttributes()];
    // - numeric
    vals[0] = Math.PI;
    // - nominal
    vals[1] = attVals.indexOf("val3");
    // - string
    vals[2] = data.attribute(2).addStringValue("This is a string!");
    try {
        // - date
        vals[3] = data.attribute(3).parseDate("2015-07-30");
    } catch (ParseException ex) {
        Logger.getLogger(arffFrame.class.getName()).log(Level.SEVERE, null, ex);
    }
    // - relational
    dataRel = new Instances(data.attribute(4).relation(), 0);
    // -- first instance
    valsRel = new double[2];
    valsRel[0] = Math.PI + 1;
    valsRel[1] = attValsRel.indexOf("val5.3");
    dataRel.add(new Instance(1.0, valsRel));
    // -- second instance
    valsRel = new double[2];
    valsRel[0] = Math.PI + 2;
    valsRel[1] = attValsRel.indexOf("val5.2");
    dataRel.add(new Instance(1.0, valsRel));
    vals[4] = data.attribute(4).addRelation(dataRel);
    // add
    data.add(new Instance(1.0, vals));

    // second instance
    vals = new double[data.numAttributes()]; // important: needs NEW array!
    // - numeric
    vals[0] = Math.E;
    // - nominal
    vals[1] = attVals.indexOf("val1");
    // - string
    vals[2] = data.attribute(2).addStringValue("And another one!");
    try {
        // - date
        vals[3] = data.attribute(3).parseDate("2015-07-30");
    } catch (ParseException ex) {
        Logger.getLogger(arffFrame.class.getName()).log(Level.SEVERE, null, ex);
    }
    // - relational
    dataRel = new Instances(data.attribute(4).relation(), 0);
    // -- first instance
    valsRel = new double[2];
    valsRel[0] = Math.E + 1;
    valsRel[1] = attValsRel.indexOf("val5.4");
    dataRel.add(new Instance(1.0, valsRel));
    // -- second instance
    valsRel = new double[2];
    valsRel[0] = Math.E + 2;
    valsRel[1] = attValsRel.indexOf("val5.1");
    dataRel.add(new Instance(1.0, valsRel));
    vals[4] = data.attribute(4).addRelation(dataRel);
    // add
    data.add(new Instance(1.0, vals));

    // 4. output data

    textArea.append(data.toString());

    dataset = data.toString();

}

From source file:arffGenerator.TextDirectoryToArff.java

License:Open Source License

@Override
public Instances createDatasetSupervised(String filePath) throws Exception {

    File dir = new File(filePath);
    File fileAux;//  ww w  . j a v a2s. c om
    String[] files = dir.list();
    FastVector classValues = new FastVector(files.length);
    for (int i = 0; i < files.length; i++) {
        classValues.addElement(files[i]);
    }
    FastVector atts = new FastVector(2);
    atts.addElement(new Attribute("contents", (FastVector) null));
    atts.addElement(new Attribute("class", classValues));
    Instances data = new Instances("text_files_in_" + filePath, atts, 0);
    for (int i = 0; i < files.length; i++) {
        fileAux = new File(filePath + "/" + files[i]);
        if (fileAux.isDirectory()) {
            cargarAtrribDeClase(files[i], filePath + File.separator + files[i], data);
        }
    }
    return data;
}

From source file:arffGenerator.TextDirectoryToArff.java

License:Open Source License

@Override
public Instances createDatasetUnsupervised(String filePath) throws Exception {
    FastVector atts = new FastVector(2);
    atts.addElement(new Attribute("contents", (FastVector) null));
    FastVector classValues = new FastVector(1);
    classValues.addElement("");
    atts.addElement(new Attribute("class", classValues));
    Instances data = new Instances("text_files_in_" + filePath, atts, 0);
    cargarAtrribDeClase(null, filePath, data);
    return data;/* www . j ava2  s.  com*/
}

From source file:assign00.ExperimentShell.java

/**
 * @param args the command line arguments
 *///from  w  ww  .ja  va 2s .  c  o  m
public static void main(String[] args) throws Exception {
    DataSource source = new DataSource(file);
    Instances dataSet = source.getDataSet();

    //Set up data
    dataSet.setClassIndex(dataSet.numAttributes() - 1);
    dataSet.randomize(new Random(1));

    //determine sizes
    int trainingSize = (int) Math.round(dataSet.numInstances() * .7);
    int testSize = dataSet.numInstances() - trainingSize;

    Instances training = new Instances(dataSet, 0, trainingSize);

    Instances test = new Instances(dataSet, trainingSize, testSize);

    Standardize standardizedData = new Standardize();
    standardizedData.setInputFormat(training);

    Instances newTest = Filter.useFilter(test, standardizedData);
    Instances newTraining = Filter.useFilter(training, standardizedData);

    NeuralNetworkClassifier NWC = new NeuralNetworkClassifier();
    NWC.buildClassifier(newTraining);

    Evaluation eval = new Evaluation(newTraining);
    eval.evaluateModel(NWC, newTest);

    System.out.println(eval.toSummaryString("\nResults\n======\n", false));
}

From source file:at.ac.tuwien.ifs.myluceneanalyzers.fa.algorithm.PersianDictionaryCountCompoundWord.java

/**
* Creates a new {@link PersianDictionaryCountCompoundWord}. Unlike {@link DictionaryCompoundWordTokenFilter} it considers
* onlyLongestMatch to be true and it will only return subwords of maximal size. <br/>
* Example: "moonlight" will be returned as "moonlight" only if it is in the dictionary (not as "moonlight, light" as 
* the DictionaryCompoundWordTokenFilter with onlyLongestMatch=true would.
* 
* @param input/*from   w  w w .  ja va  2  s.  c om*/
*          the {@link TokenStream} to process
* @param dictionary
*          the word dictionary to match against.
* @param minWordSize
*          only words longer than this get processed
* @param minSubwordSize
*          only subwords longer than this get to the output stream
* @param maxSubwordSize
*          only subwords shorter than this get to the output stream
 * @throws Exception 
*/
public PersianDictionaryCountCompoundWord(CharArraySet dictionary, Map<String, Double> mapWordCount,
        int minWordSize, int minSubwordSize, int maxSubwordSize) throws Exception {
    if (dictionary == null) {
        throw new IllegalArgumentException("dictionary cannot be null");
    }
    this.dictionary = dictionary;
    this.mapWordCount = mapWordCount;
    this.minWordSize = minWordSize;
    this.minSubwordSize = minSubwordSize;
    this.maxSubwordSize = maxSubwordSize;

    // Create the attributes
    attributeHermmean = new Attribute("harmmean");

    // Declare the class attribute along with its values
    FastVector fvClassVal = new FastVector(2);
    fvClassVal.addElement("1");
    fvClassVal.addElement("0");
    Attribute classAttribute = new Attribute("iscorrect", fvClassVal);

    // Create list of instances with one element 
    FastVector fvWekaAttributes = new FastVector(2);
    fvWekaAttributes.addElement(attributeHermmean);
    fvWekaAttributes.addElement(classAttribute);

    instances = new Instances("Test relation", fvWekaAttributes, 1);
    instances.setClassIndex(1);

    cls = (Classifier) weka.core.SerializationHelper.read("content/adtree.model");

}

From source file:at.aictopic1.sentimentanalysis.machinelearning.impl.TwitterClassifer.java

public Integer classify(Tweet[] tweets) {
    // TEST//w  w w . j  a  v  a  2 s.c o  m

    // Generate two tweet examples
    Tweet exOne = new Tweet("This is good and fantastic");
    exOne.setPreprocessedText("This is good and fantastic");
    Tweet exTwo = new Tweet("Horribly, terribly bad and more");
    exTwo.setPreprocessedText("Horribly, terribly bad and more");
    Tweet exThree = new Tweet(
            "I want to update lj and read my friends list, but I\\'m groggy and sick and blargh.");
    exThree.setPreprocessedText(
            "I want to update lj and read my friends list, but I\\'m groggy and sick and blargh.");
    Tweet exFour = new Tweet("bad hate worst sick");
    exFour.setPreprocessedText("bad hate worst sick");
    tweets = new Tweet[] { exOne, exTwo, exThree, exFour };
    // TEST

    // Load model
    //        loadModel();
    // Convert Tweet to Instance type 
    // Get String Data
    // Create attributes for the Instances set
    Attribute twitter_id = new Attribute("twitter_id");
    //        Attribute body = new Attribute("body");

    FastVector classVal = new FastVector(2);
    classVal.addElement("pos");
    classVal.addElement("neg");

    Attribute class_attr = new Attribute("class_attr", classVal);

    // Add them to a list
    FastVector attrVector = new FastVector(3);
    //        attrVector.addElement(twitter_id);
    //        attrVector.addElement(new Attribute("body", (FastVector) null));
    //        attrVector.addElement(class_attr);

    // Get the number of tweets and then create predictSet
    int numTweets = tweets.length;
    Enumeration structAttrs = dataStructure.enumerateAttributes();

    //        ArrayList<Attribute> attrList = new ArrayList<Attribute>(dataStructure.numAttributes());
    while (structAttrs.hasMoreElements()) {
        attrVector.addElement((Attribute) structAttrs.nextElement());
    }
    Instances predictSet = new Instances("predictInstances", attrVector, numTweets);
    //        Instances predictSet = new Instances(dataStructure);
    predictSet.setClassIndex(2);

    // init prediction
    double prediction = -1;

    System.out.println("PredictSet matches source structure: " + predictSet.equalHeaders(dataStructure));

    System.out.println("PredSet struct: " + predictSet.attribute(0));
    System.out.println("PredSet struct: " + predictSet.attribute(1));
    System.out.println("PredSet struct: " + predictSet.attribute(2));
    // Array to return predictions 
    //double[] tweetsClassified = new double[2][numTweets];
    //List<Integer, Double> tweetsClass = new ArrayList<Integer, Double>(numTweets);
    for (int i = 0; i < numTweets; i++) {
        String content = (String) tweets[i].getPreprocessedText();

        System.out.println("Tweet content: " + content);

        //            attrList
        Instance tweetInstance = new Instance(predictSet.numAttributes());

        tweetInstance.setDataset(predictSet);

        tweetInstance.setValue(predictSet.attribute(0), i);
        tweetInstance.setValue(predictSet.attribute(1), content);
        tweetInstance.setClassMissing();

        predictSet.add(tweetInstance);

        try {
            // Apply string filter
            StringToWordVector filter = new StringToWordVector();

            filter.setInputFormat(predictSet);
            Instances filteredPredictSet = Filter.useFilter(predictSet, filter);

            // Apply model
            prediction = trainedModel.classifyInstance(filteredPredictSet.instance(i));
            filteredPredictSet.instance(i).setClassValue(prediction);
            System.out.println("Classification: " + filteredPredictSet.instance(i).toString());
            System.out.println("Prediction: " + prediction);

        } catch (Exception ex) {
            Logger.getLogger(TwitterClassifer.class.getName()).log(Level.SEVERE, null, ex);
        }
    }

    return 0;
}

From source file:at.tuflowgraphy.semanticapps.semdroid.DalvikBaseAnalyzer.java

License:Apache License

public Instances getWekaInstances() {
    Instances instances = null;/*  ww w .j ava  2s .c o  m*/
    List<DActivationPatternPackage> dActivationPatternPackages = mAnalysisChain.getFinalLayers().get(0)
            .getResultAnalysisPackage().getActivationPatternPackages();
    int counter = 0;
    for (DActivationPatternPackage dActivationPatternPackage : dActivationPatternPackages) {
        if (counter > 0) {
            throw new RuntimeException("More than one DActivationPatternPackage found!");
        }
        counter++;
        DActivationPattern activationPatternTemp = dActivationPatternPackage.getActivationPatterns().get(0);
        FastVector fvWekaAttributes = new FastVector(activationPatternTemp.getRawPattern().length);
        for (int j = 0; j < activationPatternTemp.getRawPattern().length; j++) {
            Attribute attribute = new Attribute(j + "");
            fvWekaAttributes.addElement(attribute);
        }

        Set<String> labelSet = getLabelSet(dActivationPatternPackage);

        FastVector classValues = new FastVector(labelSet.size());
        for (String label : labelSet) {
            classValues.addElement(label);
        }

        Attribute classAttribute = new Attribute("Class", classValues);
        fvWekaAttributes.addElement(classAttribute);

        instances = new Instances(mAnalysisConfig.getApplicationAnalysisName(), fvWekaAttributes,
                dActivationPatternPackage.getActivationPatterns().size());
        instances.setClassIndex(instances.numAttributes() - 1);

        for (int i = 0; i < dActivationPatternPackage.getActivationPatterns().size(); i++) {
            DActivationPattern activationPattern = dActivationPatternPackage.getActivationPatterns().get(i);
            Instance instance = new Instance(fvWekaAttributes.size());

            for (int j = 0; j < activationPattern.getRawPattern().length; j++) {
                instance.setValue((Attribute) fvWekaAttributes.elementAt(j),
                        activationPattern.getRawPattern()[j]);
            }

            instance.setDataset(instances);

            DSimpleStringMetaData metadata = (DSimpleStringMetaData) activationPattern.getMetaData();
            List<String> keys = metadata.getMetaDataKeys();
            for (int k = 0; k < keys.size(); k++) {
                if (keys.get(k).equals(DalvikInputPlugin.TAG_LABEL)) {
                    String label = metadata.getMetaDataEntries().get(k);
                    instance.setClassValue(label);
                    break;
                }
            }

            instances.add(instance);
        }

    }
    return instances;
}