List of usage examples for weka.core Instances add
@Override public boolean add(Instance instance)
From source file:elh.eus.absa.Features.java
License:Open Source License
/** * Function fills the attribute vectors for the instances existing in the Conll tabulated formatted corpus given. * Attribute vectors contain the features loaded by the creatFeatureSet() function. * /*from www. j a va 2s .c o m*/ * @param boolean save : whether the Instances file should be saved to an arff file or not. * @return Weka Instances object containing the attribute vectors filled with the features specified * in the parameter file. */ public Instances loadInstancesConll(boolean save, String prefix) { String savePath = params.getProperty("fVectorDir") + File.separator + "arff" + File.separator + "train_" + prefix; HashMap<String, Opinion> trainExamples = corpus.getOpinions(); String nafdir = params.getProperty("kafDir"); int trainExamplesNum = trainExamples.size(); int bowWin = 0; if (params.containsKey("window")) { bowWin = Integer.parseInt(params.getProperty("window")); savePath = savePath + "_w" + bowWin; } //System.out.println("train examples: "+trainExamplesNum); //Create the Weka object for the training set Instances rsltdata = new Instances("train", atts, trainExamplesNum); // setting class attribute (last attribute in train data. //traindata.setClassIndex(traindata.numAttributes() - 1); System.err.println("Features: loadInstancesConll() - featNum: " + this.featNum + " - trainset attrib num -> " + rsltdata.numAttributes() + " - "); System.out.println("Features: loadInstancesConll() - featNum: " + this.featNum + " - trainset attrib num -> " + rsltdata.numAttributes() + " - "); int instId = 1; // fill the vectors for each training example for (String oId : trainExamples.keySet()) { //System.err.println("sentence: "+ corpus.getOpinionSentence(o.getId())); //value vector double[] values = new double[featNum]; // first element is the instanceId values[rsltdata.attribute("instanceId").index()] = instId; LinkedList<String> ngrams = new LinkedList<String>(); int ngramDim; try { ngramDim = Integer.valueOf(params.getProperty("wfngrams")); } catch (Exception e) { ngramDim = 0; } boolean polNgrams = false; if (params.containsKey("polNgrams")) { polNgrams = params.getProperty("polNgrams").equalsIgnoreCase("yes"); } String nafPath = nafdir + File.separator + trainExamples.get(oId).getsId().replace(':', '_'); String taggedFile = ""; try { if (!FileUtilsElh.checkFile(nafPath + ".kaf")) { nafPath = NLPpipelineWrapper.tagSentence(corpus.getOpinionSentence(oId), nafPath, corpus.getLang(), params.getProperty("pos-model"), params.getProperty("lemma-model"), postagger); } else { nafPath = nafPath + ".kaf"; } InputStream reader = new FileInputStream(new File(nafPath)); taggedFile = IOUtils.toString(reader); reader.close(); } catch (IOException | JDOMException fe) { // TODO Auto-generated catch block fe.printStackTrace(); } String[] noWindow = taggedFile.split("\n"); //counter for opinion sentence token number. Used for computing relative values of the features int tokNum = noWindow.length; //System.err.println("Features::loadInstancesConll - tagged File read lines:"+tokNum); List<String> window = Arrays.asList(noWindow); Integer end = corpus.getOpinion(oId).getTo(); // apply window if window active (>0) and if the target is not null (to=0) if ((bowWin > 0) && (end > 0)) { Integer start = corpus.getOpinion(oId).getFrom(); Integer from = start - bowWin; if (from < 0) { from = 0; } Integer to = end + bowWin; if (to > noWindow.length - 1) { to = noWindow.length - 1; } window = Arrays.asList(Arrays.copyOfRange(noWindow, from, to)); } //System.out.println("Sentence: "+corpus.getOpinionSentence(oId)+" - target: "+corpus.getOpinion(oId).getTarget()+ // "\n window: from-> "+window.get(0).getForm()+" to-> "+window.get(window.size()-1)+" .\n"); //System.err.println(Arrays.toString(window.toArray())); // word form ngram related features for (String wf : window) { String[] fields = wf.split("\\s"); String wfStr = normalize(fields[0], params.getProperty("normalization", "none")); // blank line means we found a sentence end. Empty n-gram list and reiniciate. if (wf.equals("")) { // add ngrams to the feature vector checkNgramFeatures(ngrams, values, "", 1, true); //toknum // since wf is empty no need to check for clusters and other features. continue; } if (params.containsKey("wfngrams") && ngramDim > 0) { if (!savePath.contains("_wf" + ngramDim)) { savePath = savePath + "_wf" + ngramDim; } //if the current word form is in the ngram list activate the feature in the vector if (ngrams.size() >= ngramDim) { ngrams.removeFirst(); } ngrams.add(wfStr); // add ngrams to the feature vector checkNgramFeatures(ngrams, values, "", 1, false); //toknum } // Clark cluster info corresponding to the current word form if (params.containsKey("clark") && attributeSets.get("ClarkCl").containsKey(wfStr)) { if (!savePath.contains("_cl")) { savePath = savePath + "_cl"; } values[rsltdata.attribute("ClarkClId_" + attributeSets.get("ClarkCl").get(wfStr)).index()]++; } // Clark cluster info corresponding to the current word form if (params.containsKey("brown") && attributeSets.get("BrownCl").containsKey(wfStr)) { if (!savePath.contains("_br")) { savePath = savePath + "_br"; } values[rsltdata.attribute("BrownClId_" + attributeSets.get("BrownCl").get(wfStr)).index()]++; } // Clark cluster info corresponding to the current word form if (params.containsKey("word2vec") && attributeSets.get("w2vCl").containsKey(wfStr)) { if (!savePath.contains("_w2v")) { savePath = savePath + "_w2v"; } values[rsltdata.attribute("w2vClId_" + attributeSets.get("w2vCl").get(wfStr)).index()]++; } } //empty ngram list and add remaining ngrams to the feature list checkNgramFeatures(ngrams, values, "", 1, true); //toknum // PoS tagger related attributes: lemmas and pos tags if (params.containsKey("lemmaNgrams") || (params.containsKey("pos") && !params.getProperty("pos").equalsIgnoreCase("0")) || params.containsKey("polarLexiconGeneral") || params.containsKey("polarLexiconDomain")) { ngrams = new LinkedList<String>(); if (params.containsKey("lemmaNgrams") && (!params.getProperty("lemmaNgrams").equalsIgnoreCase("0"))) { ngramDim = Integer.valueOf(params.getProperty("lemmaNgrams")); } else { ngramDim = 3; } LinkedList<String> posNgrams = new LinkedList<String>(); int posNgramDim = 0; if (params.containsKey("pos")) { posNgramDim = Integer.valueOf(params.getProperty("pos")); } for (String t : window) { //lemmas // && (!params.getProperty("lemmaNgrams").equalsIgnoreCase("0")) if ((params.containsKey("lemmaNgrams")) || params.containsKey("polarLexiconGeneral") || params.containsKey("polarLexiconDomain")) { if (!savePath.contains("_l" + ngramDim)) { savePath = savePath + "_l" + ngramDim; } //blank line means we found a sentence end. Empty n-gram list and reiniciate. if (t.equals("")) { // check both lemma n-grams and polarity lexicons, and add values to the feature vector checkNgramsAndPolarLexicons(ngrams, values, "lemma", 1, tokNum, true, polNgrams); //toknum // since t is empty no need to check for clusters and other features. continue; } String[] fields = t.split("\\s"); if (fields.length < 2) { continue; } String lemma = normalize(fields[1], params.getProperty("normalization", "none")); if (ngrams.size() >= ngramDim) { ngrams.removeFirst(); } ngrams.add(lemma); // check both lemma n-grams and polarity lexicons, and add values to the feature vector checkNgramsAndPolarLexicons(ngrams, values, "lemma", 1, tokNum, false, polNgrams); } //pos tags if (params.containsKey("pos") && !params.getProperty("pos").equalsIgnoreCase("0")) { if (!savePath.contains("_p")) { savePath = savePath + "_p"; } if (posNgrams.size() >= posNgramDim) { posNgrams.removeFirst(); } String[] fields = t.split("\\s"); if (fields.length < 3) { continue; } String pos = fields[2]; posNgrams.add(pos); // add ngrams to the feature vector checkNgramFeatures(posNgrams, values, "pos", 1, false); } } //endFor //empty ngram list and add remaining ngrams to the feature list // check both lemma n-grams and polarity lexicons, and add values to the feature vector checkNgramsAndPolarLexicons(ngrams, values, "", 1, tokNum, true, polNgrams); //empty pos ngram list and add remaining pos ngrams to the feature list checkNgramFeatures(posNgrams, values, "pos", 1, true); } // add sentence length as a feature if (params.containsKey("sentenceLength") && (!params.getProperty("sentenceLength").equalsIgnoreCase("no"))) { values[rsltdata.attribute("sentenceLength").index()] = tokNum; } // compute uppercase ratio before normalization (if needed) //double upRatio =0.0; //if (params.getProperty("upperCaseRatio", "no").equalsIgnoreCase("yes")) //{ // String upper = opNormalized.replaceAll("[a-z]", ""); // upRatio = (double)upper.length() / (double)opNormalized.length(); // values[rsltdata.attribute("upperCaseRation").index()] = upRatio; //} //create object for the current instance and associate it with the current train dataset. Instance inst = new SparseInstance(1.0, values); inst.setDataset(rsltdata); // add category attributte values String cat = trainExamples.get(oId).getCategory(); if (params.containsKey("categories") && params.getProperty("categories").compareTo("E&A") == 0) { if (cat.compareTo("NULL") == 0) { inst.setValue(rsltdata.attribute("entCat").index(), cat); inst.setValue(rsltdata.attribute("attCat").index(), cat); } else { String[] splitCat = cat.split("#"); inst.setValue(rsltdata.attribute("entCat").index(), splitCat[0]); inst.setValue(rsltdata.attribute("attCat").index(), splitCat[1]); } //inst.setValue(attIndexes.get("entAttCat"), cat); } else if (params.containsKey("categories") && params.getProperty("categories").compareTo("E#A") == 0) { inst.setValue(rsltdata.attribute("entAttCat").index(), cat); } if (params.containsKey("polarity") && params.getProperty("polarity").compareTo("yes") == 0) { // add class value as a double (Weka stores all values as doubles ) String pol = normalizePolarity(trainExamples.get(oId).getPolarity()); if (pol != null && !pol.isEmpty()) { inst.setValue(rsltdata.attribute("polarityCat"), pol); } else { //System.err.println("polarity: _"+pol+"_"); inst.setMissing(rsltdata.attribute("polarityCat")); } } //add instance to train data rsltdata.add(inst); //store opinion Id and instance Id this.opInst.put(oId, instId); instId++; } System.err.println("Features : loadInstancesConll() - training data ready total number of examples -> " + trainExamplesNum + " - " + rsltdata.numInstances()); if (save) { try { savePath = savePath + ".arff"; System.err.println("arff written to: " + savePath); ArffSaver saver = new ArffSaver(); saver.setInstances(rsltdata); saver.setFile(new File(savePath)); saver.writeBatch(); } catch (IOException e1) { e1.printStackTrace(); } catch (Exception e2) { e2.printStackTrace(); } } return rsltdata; }
From source file:entities.WekaBaselineBOWFeatureVector.java
public Instances fillInstanceSet(ArrayList<BaselineBOWFeatureVector> vList, ArrayList<BaselineBOWFeatureVector> vList2) throws IOException { ArrayList<Attribute> attributes = initializeWekaFeatureVector(); Instances isSet = new Instances(vList.get(0).getLabel(), attributes, vList.size()); isSet.setClassIndex(isSet.numAttributes() - 1); for (BaselineBOWFeatureVector BOWv : vList) { Instance i = fillFeatureVector(BOWv, isSet); isSet.add(i); }/*ww w.j av a 2 s. co m*/ for (BaselineBOWFeatureVector BOWv : vList2) { Instance i = fillFeatureVector(BOWv, isSet); isSet.add(i); } ArffSaver saver = new ArffSaver(); saver.setInstances(isSet); saver.setFile(new File("./data/test.arff")); saver.writeBatch(); return isSet; }
From source file:entities.WekaBOWFeatureVector.java
public Instances fillInstanceSet(ArrayList<BOWFeatureVector> vList, ArrayList<BOWFeatureVector> vList2) throws IOException { ArrayList<Attribute> attributes = initializeWekaFeatureVector(); Instances isSet = new Instances(vList.get(0).getLabel(), attributes, vList.size()); isSet.setClassIndex(isSet.numAttributes() - 1); for (BOWFeatureVector BOWv : vList) { Instance i = fillFeatureVector(BOWv, isSet); isSet.add(i); }/*from w w w. j a v a 2s. c o m*/ for (BOWFeatureVector BOWv : vList2) { Instance i = fillFeatureVector(BOWv, isSet); isSet.add(i); } ArffSaver saver = new ArffSaver(); saver.setInstances(isSet); saver.setFile(new File("./data/test.arff")); saver.writeBatch(); return isSet; }
From source file:entities.WekaHMMFeatureVector.java
public Instances fillInstanceSet(ArrayList<HMMFeatureVector> vList, ArrayList<HMMFeatureVector> vList2) throws IOException { //FastVector fvWekaAttributesHmm = new FastVector(3); ArrayList<Attribute> attributes = initializeWekaFeatureVector(); Instances isSet = new Instances("dataset", attributes, vList.size()); isSet.setClassIndex(isSet.numAttributes() - 1); for (HMMFeatureVector HMMv : vList) { Instance i = fillFeatureVector(HMMv, isSet); isSet.add(i); }//from w w w .j a v a 2s .c om for (HMMFeatureVector HMMv : vList2) { Instance i = fillFeatureVector(HMMv, isSet); isSet.add(i); } ArffSaver saver = new ArffSaver(); saver.setInstances(isSet); saver.setFile(new File("./data/test.arff")); saver.writeBatch(); return isSet; }
From source file:entities.WekaNGGFeatureVector.java
public Instances fillInstanceSet(ArrayList<NGGFeatureVector> vList, ArrayList<NGGFeatureVector> vList2, String datasetType) throws IOException { ArrayList<Attribute> attributes = initializeWekaFeatureVector(); Instances isSet = new Instances(vList.get(0).getLabel(), attributes, vList.size()); isSet.setClassIndex(isSet.numAttributes() - 1); for (NGGFeatureVector NGGv : vList) { Instance i = fillFeatureVector(NGGv, isSet); isSet.add(i); }/* w ww . ja v a 2 s.c om*/ for (NGGFeatureVector NGGv : vList2) { Instance i = fillFeatureVector(NGGv, isSet); isSet.add(i); } ArffSaver saver = new ArffSaver(); saver.setInstances(isSet); saver.setFile(new File("./data/" + datasetType + ".arff")); saver.writeBatch(); return isSet; }
From source file:entity.DifficultyResamplingManager.java
License:Open Source License
/** * called by generateResampledSubdataset * /*from www . j av a 2s . c om*/ * @param originalDataset * @param subdatasetDimensions * @return */ private Instances generateResampledSubdataset(Instances originalDataset, SubdatasetDimensions subdatasetDimensions) { // creates an empty dataset Instances resampledSubdataset = new Instances(originalDataset); resampledSubdataset.delete(); // randomize dataset instances order originalDataset.randomize(RandomizationManager.randomGenerator); // calc number of positives to insert int positivesToInsert = subdatasetDimensions.getP(); if (verbose) System.out.println("[DifficultyResamplingManager, generateResampledSubdataset] positivesToInsert = " + positivesToInsert); // calc number of negatives to insert int negativesToInsert = subdatasetDimensions.getN(); // iterates over the original dataset instances for (int i = 0; i < originalDataset.numInstances(); i++) { // if instance is positive and more are needed in the new dataset, inserts into new dataset if ((positivesToInsert > 0) && (originalDataset.instance(i).stringValue(originalDataset.classIndex()) .equals(Settings.buggyLabel))) { resampledSubdataset.add(originalDataset.instance(i)); positivesToInsert--; } // if instance is negative and more are needed in the new dataset, inserts into new dataset if ((negativesToInsert > 0) && (originalDataset.instance(i).stringValue(originalDataset.classIndex()) .equals(Settings.nonbuggyLabel))) { resampledSubdataset.add(originalDataset.instance(i)); negativesToInsert--; } } if (verbose) System.out.println("[DifficultyResamplingManager, generateResampledSubdataset] resampling terminato: " + this.printDatasetInfo(resampledSubdataset)); return resampledSubdataset; }
From source file:ergasia2pkg.LP_ROS.java
/** * Creates a new MultiLabelInstances object given a list of Instance * * @param Hashmap<String,List<Instance> map from which to create instances * @param MultiLabelInstances used just to get the Label metadata * @return MultiLabelInstances new MultiLabelInstances Object *//* w ww . j a v a2s. co m*/ private MultiLabelInstances createNewMultilabelInstance(HashMap<String, List<Instance>> labelsetGroup, MultiLabelInstances mlData) throws InvalidDataFormatException { Instances in = mlData.getDataSet(); Enumeration enumeration = in.enumerateAttributes(); ArrayList attlist = Collections.list(enumeration); int capacity = 0; for (String labelset : labelsetGroup.keySet()) { capacity += labelsetGroup.get(labelset).size(); } Instances newInstances = new Instances("sampledDataset", attlist, capacity); for (String labelset : labelsetGroup.keySet()) { List<Instance> instanceList = (ArrayList<Instance>) labelsetGroup.get(labelset); for (Instance inst : instanceList) { newInstances.add(inst); } } MultiLabelInstances newData = new MultiLabelInstances(newInstances, mlData.getLabelsMetaData()); return newData; }
From source file:ergasia2pkg.ML_RUS.java
/** * Method to perform undersampling on the initial dataset. The method * removes instances from the dataset according to the algorithm proposed on * the paper, utilising the Mean Imbalance Ratio measure. * * @param mlData MultiLabelInstances object, holds a set of multilabel * instances//from w w w. ja v a 2 s. co m * @return MultiLabelInstances object containing the initial labels minus * the labels removed by undersampling * @throws Exception */ @Override public MultiLabelInstances transformInstances(MultiLabelInstances mlData) throws Exception { //Initialise the label counters labelCount(mlData); //Clone the dataset into a new object MultiLabelInstances mlDataClone = mlData.clone(); //Clone a new set to contain all the instances that will be returned Instances mlDataReturned = mlData.clone().getDataSet(); mlDataReturned.delete(); //Calculate the number of samples to remove int samplesToDelete = (int) (mlData.getNumInstances() / (100 * P)); int remainingLabels; //Declare two lists of lists, a minorityBag and a majorityBag. The minBag //will contain lists (bags) of instances having labels with //an imbalance ratio higher than the mean imbalance ratio. These will be //set aside and not tampered with in any way. The majBag will also contain //lists of instances having labels with an imbalance ratio lower than or //equal to the mean imbalance ratio. These instances will be the candidates //for deletion. List<List<Instance>> minBags = new ArrayList<>(); List<List<Instance>> majBags = new ArrayList<>(); //Get an array with the indices of all the labels int L[] = mlDataClone.getLabelIndices(); //Calculate the dataset's mean imbalance ratio double meanIR = meanImbalanceRatio(mlDataClone); String labelName; int i = 0, m = 0, x, labelCounter = 0; //Declare a boolean array which will follow the labelset L, and determine //whether or not a label's instances should be considered for undersampling //Initialise all its values to true. boolean included[] = new boolean[L.length]; for (int k = 0; k < L.length; k++) { included[k] = true; } Random rand = new Random(); //Perform the following operation for each label //Note that labels are represented by their integer index, which is then //transformed to its string name. This was done to avoid problems and //exceptions thrown by methods required below for (int label : L) { //Get the label name from the current instance, based on label index labelName = mlDataClone.getDataSet().attribute(label).name(); if (imbalanceRatioPerLabel(mlDataClone, labelName) > meanIR) { //if the imbalance ratio of the label is greater than the mean //imbalance ratio of the dataset, add it to the minbag corresponding //to the specific label. minBags.add(new ArrayList<Instance>()); //Add all instances containing this label to the minbag we just //created for (int l = 0; l < mlDataClone.getNumInstances(); l++) { if (mlDataClone.getDataSet().get(l).value(label) == 1.0) { minBags.get(i).add(mlDataClone.getDataSet().get(l)); //Remove the label from the dataset mlDataClone.getDataSet().delete(l); } } //Set the included flag as false, so that the label is not added //to the majbags included[labelCounter] = false; i++; } labelCounter++; } //For every label again for (int label : L) { //Add a new majbag (one for each label) majBags.add(new ArrayList<Instance>()); //Add all the instances having this label to the majbag. Note that //this operation takes place on the cloned dataset, which now contains //only the instances not having minority labels for (int l = 0; l < mlDataClone.getNumInstances(); l++) { if (mlDataClone.getDataSet().get(l).value(label) == 1.0) { majBags.get(m).add(mlDataClone.getDataSet().get(l)); } } m++; } remainingLabels = L.length - minBags.size(); //While we haven't deleted all the samples yet and we still have labels //to delete while (samplesToDelete > 0 && remainingLabels > 0) { //For each of the INITIAL labels (not only the ones in the cloned dataset) for (int j = 0; j < mlData.getNumLabels(); j++) { if (included[j]) { //if it is to be included (meaning it is a majority label), check //if this bag contains instances. If it doesn't, decrease the //numbers and go to the next iteration if (majBags.get(j).size() == 0) { included[j] = false; remainingLabels--; continue; } //Get a random instance from the bag x = rand.nextInt(majBags.get(j).size()); //Based on the instance and the index, get its label labelName = majBags.get(j).get(x).attribute(L[j]).name(); //Remove the instance from the bag majBags.get(j).remove(x); //If the imbalance ratio of the label has increased beyond the //acceptable limit of the mean imbalance ratio, remove this //majbag from future candidates if (imbalanceRatioPerLabel(mlDataClone, labelName) >= meanIR) { included[j] = false; remainingLabels--; } samplesToDelete--; } } } //Add the contents of the minbags and the majbags to an empty dataset //and return it for (List<Instance> list : minBags) { for (Instance inst : list) { mlDataReturned.add(inst); } } for (List<Instance> list : majBags) { for (Instance inst : list) { mlDataReturned.add(inst); } } return new MultiLabelInstances(mlDataReturned, mlData.getLabelsMetaData()); }
From source file:EsperTest.CEPListener.java
public void update(EventBean[] newData, EventBean[] oldData) { System.out.println("Event received: " + newData[0].getUnderlying()); if (newData.length > 2) { //create the column name and type, these are strings //http://weka.wikispaces.com/Creating+an+ARFF+file Instances data; FastVector atts = new FastVector(); for (int j = 0; j < columnNumbers.length; j++) { FastVector values = new FastVector(); for (int i = 0; i < labels.NominalCount(j); i++) { values.addElement(labels.GetLabel(columnNumbers[j], i)); }/*from w w w . j av a2s .c o m*/ atts.addElement(new Attribute(labels.GetHeader(columnNumbers[j]), values)); } data = new Instances("Title", atts, 0); for (int i = 0; i < newData.length; i++) { Instance inst = new Instance(columnNumbers.length); for (int j = 0; j < columnNumbers.length; j++) { inst.setValue(j, newData[i].get("eventType").toString()); } data.add(inst); } Apriori aprioriObj = new weka.associations.Apriori(); try { aprioriObj.buildAssociations(data); } catch (Exception e) { System.out.println(e); } FastVector rules[] = aprioriObj.getAllTheRules(); } }
From source file:etc.aloe.data.SegmentSet.java
License:Open Source License
/** * Convert the segment set into an ExampleSet (ready for feature * extraction). The returned example set includes an id attribute, the * message text, a label attribute, and several basic features extracted * from the segment./*from ww w. j a va 2s . c om*/ * * @return */ public ExampleSet getBasicExamples() { ArrayList<Attribute> attributes = new ArrayList<Attribute>(); attributes.add(new Attribute(ExampleSet.ID_ATTR_NAME)); attributes.add(new Attribute(ExampleSet.MESSAGE_ATTR_NAME, (List<String>) null)); attributes.add(new Attribute(ExampleSet.LABEL_ATTR_NAME, Arrays.asList(new String[] { "false", "true" }))); attributes.add(new Attribute(ExampleSet.PARTICIPANT_ATTR_NAME, (List<String>) null)); attributes.add(new Attribute(DURATION_ATTR_NAME)); attributes.add(new Attribute(LENGTH_ATTR_NAME)); attributes.add(new Attribute(CPS_ATTR_NAME)); attributes.add(new Attribute(RATE_ATTR_NAME)); Instances instances = new Instances("BasicExamples", attributes, 0); instances.setClassIndex(2); Attribute idAttr = instances.attribute(ExampleSet.ID_ATTR_NAME); Attribute messageAttr = instances.attribute(ExampleSet.MESSAGE_ATTR_NAME); Attribute labelAttr = instances.attribute(ExampleSet.LABEL_ATTR_NAME); Attribute participantAttr = instances.attribute(ExampleSet.PARTICIPANT_ATTR_NAME); Attribute durationAttr = instances.attribute(DURATION_ATTR_NAME); Attribute lengthAttr = instances.attribute(LENGTH_ATTR_NAME); Attribute cpsAttr = instances.attribute(CPS_ATTR_NAME); Attribute rateAttr = instances.attribute(RATE_ATTR_NAME); for (int i = 0; i < size(); i++) { Segment segment = get(i); Instance instance = new DenseInstance(instances.numAttributes()); String messageStr = segment.concatMessages(); String participantStr = segment.concatParticipants(); instance.setValue(idAttr, segment.getId()); instance.setValue(messageAttr, messageStr); instance.setValue(participantAttr, participantStr); if (segment.hasTrueLabel()) { instance.setValue(labelAttr, segment.getTrueLabel() ? "true" : "false"); } computeRateValues(segment, instance, messageStr, durationAttr, lengthAttr, cpsAttr, rateAttr); instances.add(instance); } return new ExampleSet(instances); }