List of usage examples for weka.core Instances Instances
public Instances(String name, ArrayList<Attribute> attInfo, int capacity)
From source file:adams.ml.data.InstancesView.java
License:Open Source License
/** * Returns a dummy dataset./*www. j a va 2 s . co m*/ * * @return the dataset */ protected static Instances createDummy() { ArrayList<Attribute> atts; atts = new ArrayList<>(); atts.add(new Attribute("dummy")); return new Instances("dummy", atts, 0); }
From source file:aprendizadodemaquina.Featurizador.java
License:Open Source License
/** * Gera o objeto Instances - cabealho dos dados a serem utilizados pelo * classificador, guarda os atributos das instncias e seus tipos * // ww w . j a v a 2 s. c o m * @return Objeto Instances que armazenado na classe Classificador */ public Instances geraInstances() { FastVector atributos = new FastVector(); for (Feature f : features) if (f.tipo().equals("NUMERICO")) { if (f.quantosValores() == 1) atributos.addElement(new Attribute(f.nome())); else for (int i = 0; i < f.quantosValores(); ++i) atributos.addElement(new Attribute(f.nome() + (i + 1))); } else if (f.tipo().equals("NOMINAL")) { FastVector valoresPossiveis = new FastVector(); for (String valor : f.valoresPossivis()) valoresPossiveis.addElement(valor); if (f.quantosValores() == 1) atributos.addElement(new Attribute(f.nome(), valoresPossiveis)); else for (int i = 0; i < f.quantosValores(); ++i) atributos.addElement(new Attribute(f.nome() + (i + 1), valoresPossiveis)); } // Qual relao FastVector valoresPossiveis = new FastVector(); for (String s : tiposRelacoes) valoresPossiveis.addElement(s); atributos.addElement(new Attribute("relacao", valoresPossiveis)); return (new Instances("dados_de_treinamento", atributos, 0)); }
From source file:ARFF.ARFFParser.java
public ARFFParser(Object[] params) { this.atts = new FastVector(); this.params = new ArrayList<ARFFParsable>(); for (Object param : params) { ARFFParsable tmpParam = (ARFFParsable) (param); this.params.add(tmpParam); switch (tmpParam.getARFFParamType()) { case INTEGER: case REAL: atts.addElement(new Attribute(tmpParam.getARFFParamName())); break; case NOMINAL: atts.addElement(new Attribute(tmpParam.getARFFParamName(), tmpParam.getARFFParamNominalValues())); break; case STRING: atts.addElement(new Attribute(tmpParam.getARFFParamName(), (FastVector) null)); break; }// ww w . j av a 2s . com } data = new Instances(relationName, atts, 0); }
From source file:arffcreator.arffFrame.java
private void createActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_createActionPerformed // TODO add your handling code here: FastVector atts;//from www . j av a 2s. c om FastVector attsRel; FastVector attVals; FastVector attValsRel; Instances data; Instances dataRel; double[] vals; double[] valsRel; int i; // 1. set up attributes atts = new FastVector(); // - numeric atts.addElement(new Attribute("att1")); // - nominal attVals = new FastVector(); for (i = 0; i < 5; i++) attVals.addElement("val" + (i + 1)); atts.addElement(new Attribute("att2", attVals)); // - string atts.addElement(new Attribute("att3", (FastVector) null)); // - date atts.addElement(new Attribute("att4", "yyyy-MM-dd")); // - relational attsRel = new FastVector(); // -- numeric attsRel.addElement(new Attribute("att5.1")); // -- nominal attValsRel = new FastVector(); for (i = 0; i < 5; i++) attValsRel.addElement("val5." + (i + 1)); attsRel.addElement(new Attribute("att5.2", attValsRel)); dataRel = new Instances("att5", attsRel, 0); atts.addElement(new Attribute("att5", dataRel, 0)); // 2. create Instances object data = new Instances("MyRelation", atts, 0); // 3. fill with data // first instance vals = new double[data.numAttributes()]; // - numeric vals[0] = Math.PI; // - nominal vals[1] = attVals.indexOf("val3"); // - string vals[2] = data.attribute(2).addStringValue("This is a string!"); try { // - date vals[3] = data.attribute(3).parseDate("2015-07-30"); } catch (ParseException ex) { Logger.getLogger(arffFrame.class.getName()).log(Level.SEVERE, null, ex); } // - relational dataRel = new Instances(data.attribute(4).relation(), 0); // -- first instance valsRel = new double[2]; valsRel[0] = Math.PI + 1; valsRel[1] = attValsRel.indexOf("val5.3"); dataRel.add(new Instance(1.0, valsRel)); // -- second instance valsRel = new double[2]; valsRel[0] = Math.PI + 2; valsRel[1] = attValsRel.indexOf("val5.2"); dataRel.add(new Instance(1.0, valsRel)); vals[4] = data.attribute(4).addRelation(dataRel); // add data.add(new Instance(1.0, vals)); // second instance vals = new double[data.numAttributes()]; // important: needs NEW array! // - numeric vals[0] = Math.E; // - nominal vals[1] = attVals.indexOf("val1"); // - string vals[2] = data.attribute(2).addStringValue("And another one!"); try { // - date vals[3] = data.attribute(3).parseDate("2015-07-30"); } catch (ParseException ex) { Logger.getLogger(arffFrame.class.getName()).log(Level.SEVERE, null, ex); } // - relational dataRel = new Instances(data.attribute(4).relation(), 0); // -- first instance valsRel = new double[2]; valsRel[0] = Math.E + 1; valsRel[1] = attValsRel.indexOf("val5.4"); dataRel.add(new Instance(1.0, valsRel)); // -- second instance valsRel = new double[2]; valsRel[0] = Math.E + 2; valsRel[1] = attValsRel.indexOf("val5.1"); dataRel.add(new Instance(1.0, valsRel)); vals[4] = data.attribute(4).addRelation(dataRel); // add data.add(new Instance(1.0, vals)); // 4. output data textArea.append(data.toString()); dataset = data.toString(); }
From source file:arffGenerator.TextDirectoryToArff.java
License:Open Source License
@Override public Instances createDatasetSupervised(String filePath) throws Exception { File dir = new File(filePath); File fileAux;// ww w . j a v a2s. c om String[] files = dir.list(); FastVector classValues = new FastVector(files.length); for (int i = 0; i < files.length; i++) { classValues.addElement(files[i]); } FastVector atts = new FastVector(2); atts.addElement(new Attribute("contents", (FastVector) null)); atts.addElement(new Attribute("class", classValues)); Instances data = new Instances("text_files_in_" + filePath, atts, 0); for (int i = 0; i < files.length; i++) { fileAux = new File(filePath + "/" + files[i]); if (fileAux.isDirectory()) { cargarAtrribDeClase(files[i], filePath + File.separator + files[i], data); } } return data; }
From source file:arffGenerator.TextDirectoryToArff.java
License:Open Source License
@Override public Instances createDatasetUnsupervised(String filePath) throws Exception { FastVector atts = new FastVector(2); atts.addElement(new Attribute("contents", (FastVector) null)); FastVector classValues = new FastVector(1); classValues.addElement(""); atts.addElement(new Attribute("class", classValues)); Instances data = new Instances("text_files_in_" + filePath, atts, 0); cargarAtrribDeClase(null, filePath, data); return data;/* www . j ava2 s. com*/ }
From source file:assign00.ExperimentShell.java
/** * @param args the command line arguments *///from w ww .ja va 2s . c o m public static void main(String[] args) throws Exception { DataSource source = new DataSource(file); Instances dataSet = source.getDataSet(); //Set up data dataSet.setClassIndex(dataSet.numAttributes() - 1); dataSet.randomize(new Random(1)); //determine sizes int trainingSize = (int) Math.round(dataSet.numInstances() * .7); int testSize = dataSet.numInstances() - trainingSize; Instances training = new Instances(dataSet, 0, trainingSize); Instances test = new Instances(dataSet, trainingSize, testSize); Standardize standardizedData = new Standardize(); standardizedData.setInputFormat(training); Instances newTest = Filter.useFilter(test, standardizedData); Instances newTraining = Filter.useFilter(training, standardizedData); NeuralNetworkClassifier NWC = new NeuralNetworkClassifier(); NWC.buildClassifier(newTraining); Evaluation eval = new Evaluation(newTraining); eval.evaluateModel(NWC, newTest); System.out.println(eval.toSummaryString("\nResults\n======\n", false)); }
From source file:at.ac.tuwien.ifs.myluceneanalyzers.fa.algorithm.PersianDictionaryCountCompoundWord.java
/** * Creates a new {@link PersianDictionaryCountCompoundWord}. Unlike {@link DictionaryCompoundWordTokenFilter} it considers * onlyLongestMatch to be true and it will only return subwords of maximal size. <br/> * Example: "moonlight" will be returned as "moonlight" only if it is in the dictionary (not as "moonlight, light" as * the DictionaryCompoundWordTokenFilter with onlyLongestMatch=true would. * * @param input/*from w w w . ja va 2 s. c om*/ * the {@link TokenStream} to process * @param dictionary * the word dictionary to match against. * @param minWordSize * only words longer than this get processed * @param minSubwordSize * only subwords longer than this get to the output stream * @param maxSubwordSize * only subwords shorter than this get to the output stream * @throws Exception */ public PersianDictionaryCountCompoundWord(CharArraySet dictionary, Map<String, Double> mapWordCount, int minWordSize, int minSubwordSize, int maxSubwordSize) throws Exception { if (dictionary == null) { throw new IllegalArgumentException("dictionary cannot be null"); } this.dictionary = dictionary; this.mapWordCount = mapWordCount; this.minWordSize = minWordSize; this.minSubwordSize = minSubwordSize; this.maxSubwordSize = maxSubwordSize; // Create the attributes attributeHermmean = new Attribute("harmmean"); // Declare the class attribute along with its values FastVector fvClassVal = new FastVector(2); fvClassVal.addElement("1"); fvClassVal.addElement("0"); Attribute classAttribute = new Attribute("iscorrect", fvClassVal); // Create list of instances with one element FastVector fvWekaAttributes = new FastVector(2); fvWekaAttributes.addElement(attributeHermmean); fvWekaAttributes.addElement(classAttribute); instances = new Instances("Test relation", fvWekaAttributes, 1); instances.setClassIndex(1); cls = (Classifier) weka.core.SerializationHelper.read("content/adtree.model"); }
From source file:at.aictopic1.sentimentanalysis.machinelearning.impl.TwitterClassifer.java
public Integer classify(Tweet[] tweets) { // TEST//w w w . j a v a 2 s.c o m // Generate two tweet examples Tweet exOne = new Tweet("This is good and fantastic"); exOne.setPreprocessedText("This is good and fantastic"); Tweet exTwo = new Tweet("Horribly, terribly bad and more"); exTwo.setPreprocessedText("Horribly, terribly bad and more"); Tweet exThree = new Tweet( "I want to update lj and read my friends list, but I\\'m groggy and sick and blargh."); exThree.setPreprocessedText( "I want to update lj and read my friends list, but I\\'m groggy and sick and blargh."); Tweet exFour = new Tweet("bad hate worst sick"); exFour.setPreprocessedText("bad hate worst sick"); tweets = new Tweet[] { exOne, exTwo, exThree, exFour }; // TEST // Load model // loadModel(); // Convert Tweet to Instance type // Get String Data // Create attributes for the Instances set Attribute twitter_id = new Attribute("twitter_id"); // Attribute body = new Attribute("body"); FastVector classVal = new FastVector(2); classVal.addElement("pos"); classVal.addElement("neg"); Attribute class_attr = new Attribute("class_attr", classVal); // Add them to a list FastVector attrVector = new FastVector(3); // attrVector.addElement(twitter_id); // attrVector.addElement(new Attribute("body", (FastVector) null)); // attrVector.addElement(class_attr); // Get the number of tweets and then create predictSet int numTweets = tweets.length; Enumeration structAttrs = dataStructure.enumerateAttributes(); // ArrayList<Attribute> attrList = new ArrayList<Attribute>(dataStructure.numAttributes()); while (structAttrs.hasMoreElements()) { attrVector.addElement((Attribute) structAttrs.nextElement()); } Instances predictSet = new Instances("predictInstances", attrVector, numTweets); // Instances predictSet = new Instances(dataStructure); predictSet.setClassIndex(2); // init prediction double prediction = -1; System.out.println("PredictSet matches source structure: " + predictSet.equalHeaders(dataStructure)); System.out.println("PredSet struct: " + predictSet.attribute(0)); System.out.println("PredSet struct: " + predictSet.attribute(1)); System.out.println("PredSet struct: " + predictSet.attribute(2)); // Array to return predictions //double[] tweetsClassified = new double[2][numTweets]; //List<Integer, Double> tweetsClass = new ArrayList<Integer, Double>(numTweets); for (int i = 0; i < numTweets; i++) { String content = (String) tweets[i].getPreprocessedText(); System.out.println("Tweet content: " + content); // attrList Instance tweetInstance = new Instance(predictSet.numAttributes()); tweetInstance.setDataset(predictSet); tweetInstance.setValue(predictSet.attribute(0), i); tweetInstance.setValue(predictSet.attribute(1), content); tweetInstance.setClassMissing(); predictSet.add(tweetInstance); try { // Apply string filter StringToWordVector filter = new StringToWordVector(); filter.setInputFormat(predictSet); Instances filteredPredictSet = Filter.useFilter(predictSet, filter); // Apply model prediction = trainedModel.classifyInstance(filteredPredictSet.instance(i)); filteredPredictSet.instance(i).setClassValue(prediction); System.out.println("Classification: " + filteredPredictSet.instance(i).toString()); System.out.println("Prediction: " + prediction); } catch (Exception ex) { Logger.getLogger(TwitterClassifer.class.getName()).log(Level.SEVERE, null, ex); } } return 0; }
From source file:at.tuflowgraphy.semanticapps.semdroid.DalvikBaseAnalyzer.java
License:Apache License
public Instances getWekaInstances() { Instances instances = null;/* ww w .j ava 2s .c o m*/ List<DActivationPatternPackage> dActivationPatternPackages = mAnalysisChain.getFinalLayers().get(0) .getResultAnalysisPackage().getActivationPatternPackages(); int counter = 0; for (DActivationPatternPackage dActivationPatternPackage : dActivationPatternPackages) { if (counter > 0) { throw new RuntimeException("More than one DActivationPatternPackage found!"); } counter++; DActivationPattern activationPatternTemp = dActivationPatternPackage.getActivationPatterns().get(0); FastVector fvWekaAttributes = new FastVector(activationPatternTemp.getRawPattern().length); for (int j = 0; j < activationPatternTemp.getRawPattern().length; j++) { Attribute attribute = new Attribute(j + ""); fvWekaAttributes.addElement(attribute); } Set<String> labelSet = getLabelSet(dActivationPatternPackage); FastVector classValues = new FastVector(labelSet.size()); for (String label : labelSet) { classValues.addElement(label); } Attribute classAttribute = new Attribute("Class", classValues); fvWekaAttributes.addElement(classAttribute); instances = new Instances(mAnalysisConfig.getApplicationAnalysisName(), fvWekaAttributes, dActivationPatternPackage.getActivationPatterns().size()); instances.setClassIndex(instances.numAttributes() - 1); for (int i = 0; i < dActivationPatternPackage.getActivationPatterns().size(); i++) { DActivationPattern activationPattern = dActivationPatternPackage.getActivationPatterns().get(i); Instance instance = new Instance(fvWekaAttributes.size()); for (int j = 0; j < activationPattern.getRawPattern().length; j++) { instance.setValue((Attribute) fvWekaAttributes.elementAt(j), activationPattern.getRawPattern()[j]); } instance.setDataset(instances); DSimpleStringMetaData metadata = (DSimpleStringMetaData) activationPattern.getMetaData(); List<String> keys = metadata.getMetaDataKeys(); for (int k = 0; k < keys.size(); k++) { if (keys.get(k).equals(DalvikInputPlugin.TAG_LABEL)) { String label = metadata.getMetaDataEntries().get(k); instance.setClassValue(label); break; } } instances.add(instance); } } return instances; }