List of usage examples for weka.core Instances setClassIndex
public void setClassIndex(int classIndex)
From source file:es.jarias.FMC.ClassCompoundTransformation.java
License:Open Source License
/** * /*from w w w . j a v a 2s . c o m*/ * @param mlData * @return the transformed instances * @throws Exception */ public Instances transformInstances(MultiLabelInstances mlData) throws Exception { data = mlData.getDataSet(); numLabels = mlData.getNumLabels(); labelIndices = mlData.getLabelIndices(); Instances newData = null; // This must be different in order to combine ALL class states, not only existing ones. // gather distinct label combinations // ASSUME CLASSES ARE BINARY ArrayList<LabelSet> labelSets = new ArrayList<LabelSet>(); double[] dblLabels = new double[numLabels]; double nCombinations = Math.pow(2, numLabels); for (int i = 0; i < nCombinations; i++) { for (int l = 0; l < numLabels; l++) { int digit = (int) Math.pow(2, numLabels - 1 - l); dblLabels[l] = (digit & i) / digit; } LabelSet labelSet = new LabelSet(dblLabels); labelSets.add(labelSet); } // for (int i = 0; i < numInstances; i++) { // // construct labelset // double[] dblLabels = new double[numLabels]; // for (int j = 0; j < numLabels; j++) { // int index = labelIndices[j]; // dblLabels[j] = Double.parseDouble(data.attribute(index).value((int) data.instance(i).value(index))); // } // LabelSet labelSet = new LabelSet(dblLabels); // // // add labelset if not already present // labelSets.add(labelSet); // } // create class attribute ArrayList<String> classValues = new ArrayList<String>(labelSets.size()); for (LabelSet subset : labelSets) { classValues.add(subset.toBitString()); } newClass = new Attribute("class", classValues); // for (String s : classValues) // { // System.out.print(s+", "); // // } // System.out.println(); // remove all labels newData = RemoveAllLabels.transformInstances(data, labelIndices); // add new class attribute newData.insertAttributeAt(newClass, newData.numAttributes()); newData.setClassIndex(newData.numAttributes() - 1); // add class values for (int i = 0; i < newData.numInstances(); i++) { //System.out.println(newData.instance(i).toString()); String strClass = ""; for (int j = 0; j < numLabels; j++) { int index = labelIndices[j]; strClass = strClass + data.attribute(index).value((int) data.instance(i).value(index)); } //System.out.println(strClass); newData.instance(i).setClassValue(strClass); } transformedFormat = new Instances(newData, 0); return newData; }
From source file:es.jarias.FMC.FMC.java
License:Open Source License
public static void buildModel(MultiLabelInstances trainData, MultiLabelInstances testData, int fold, String baseClassifierClass, String discType, String fss, String outPath, String prune) throws Exception { double start = System.nanoTime(); try {/*w w w .ja v a2 s .c om*/ // DATA PREPROCESING: weka.filters.unsupervised.attribute.Discretize m_unsuperDiscretize = null; if (discType.equals("supervised")) { // pass // Supervised discretization is applied to each model later during the training step. } else if (discType.equals("unsupervised")) { // Apply a baseline discretization filter: m_unsuperDiscretize = new weka.filters.unsupervised.attribute.Discretize(); m_unsuperDiscretize.setUseEqualFrequency(false); m_unsuperDiscretize.setBins(3); m_unsuperDiscretize.setInputFormat(trainData.getDataSet()); trainData = trainData .reintegrateModifiedDataSet(Filter.useFilter(trainData.getDataSet(), m_unsuperDiscretize)); } else throw new Exception("Invalid Discretization Type"); if (!fss.equals("no") && !fss.equals("CFS")) throw new Exception("Invalid FSS strategy"); if (!prune.equals("full") && !prune.equals("tree") && !prune.equals("best") && !prune.equals("hiton") && !prune.equals("bdeu")) throw new Exception("Invalid Pruning strategy"); // Label information int m_numLabels = trainData.getNumLabels(); int[] m_labelIndices = trainData.getLabelIndices(); // Map for reference: HashMap<Integer, Integer> mapLabels = new HashMap<Integer, Integer>(m_numLabels); String[] mapLabelsName = new String[m_numLabels]; for (int l = 0; l < m_numLabels; l++) { mapLabels.put(trainData.getLabelIndices()[l], l); mapLabelsName[l] = trainData.getDataSet().attribute(trainData.getLabelIndices()[l]).name(); } // Get label combinations: int m_numPairs = (m_labelIndices.length * (m_labelIndices.length - 1)) / 2; int[][] labelCombinations = new int[m_numPairs][2]; int counter = 0; for (int i = 0; i < m_labelIndices.length; i++) { for (int j = i + 1; j < m_labelIndices.length; j++) { labelCombinations[counter] = new int[] { m_labelIndices[i], m_labelIndices[j] }; counter++; } } // Select the pairs: int m_numSelected = m_numPairs; int m_numSingleton = 0; int[] ordered; boolean[] selectedPair = new boolean[m_numPairs]; boolean[] singleton = new boolean[m_numLabels]; for (int i = 0; i < m_numPairs; i++) selectedPair[i] = true; if (!prune.equals("full")) { m_numSelected = 0; selectedPair = new boolean[m_numPairs]; // Info gain for pruned model: double[][] mutualInfoPairs = mutualInfo(trainData.getDataSet(), trainData.getLabelIndices()); double[] mutualInfo = new double[m_numPairs]; counter = 0; for (int i = 0; i < m_labelIndices.length; i++) { Instances tempInstances = new Instances(trainData.getDataSet()); tempInstances.setClassIndex(m_labelIndices[i]); for (int j = i + 1; j < m_labelIndices.length; j++) { mutualInfo[counter] = mutualInfoPairs[i][j]; counter++; } } ordered = orderBy(mutualInfo); if (prune.equals("tree")) { // Each labels correspond to its own connex component HashMap<Integer, ArrayList<Integer>> tree_compo = new HashMap<Integer, ArrayList<Integer>>( m_numLabels); HashMap<Integer, Integer> tree_index = new HashMap<Integer, Integer>(m_numLabels); for (int i = 0; i < m_numLabels; i++) { tree_compo.put(i, new ArrayList<Integer>()); tree_compo.get(i).add(i); tree_index.put(i, i); } for (int i = 0; i < m_numPairs; i++) { if (m_numSelected >= m_numLabels - 1) break; int pairIndex = ordered[i]; int pair_i = mapLabels.get(labelCombinations[pairIndex][0]); int pair_j = mapLabels.get(labelCombinations[pairIndex][1]); int conex_i = tree_index.get(pair_i); int conex_j = tree_index.get(pair_j); if (conex_i != conex_j) { ArrayList<Integer> family = tree_compo.get(conex_j); tree_compo.get(conex_i).addAll(family); for (int element : family) { tree_index.put(element, conex_i); } selectedPair[pairIndex] = true; m_numSelected++; } } } // End of the chow-liu algorithm if (prune.equals("best") || prune.equals("tree")) { int amount = 0; if (prune.equals("best")) amount = (int) (m_numLabels * 2); int index = 0; while (m_numSelected < amount && index < m_numPairs) { if (!selectedPair[ordered[index]]) { m_numSelected++; selectedPair[ordered[index]] = true; } index++; } } // End of the linear tree and best procedures if (prune.equals("hiton")) { weka.filters.unsupervised.attribute.Remove m_remove = new weka.filters.unsupervised.attribute.Remove(); m_remove.setAttributeIndicesArray(trainData.getLabelIndices()); m_remove.setInvertSelection(true); m_remove.setInputFormat(trainData.getDataSet()); Instances hitonData = Filter.useFilter(trainData.getDataSet(), m_remove); HITON hiton = new HITON(hitonData); HashSet<Integer>[] markovBlanket = new HashSet[m_numLabels]; for (int l = 0; l < m_numLabels; l++) markovBlanket[l] = hiton.HITONMB(l); for (int p = 0; p < m_numPairs; p++) { int p_i = mapLabels.get(labelCombinations[p][0]); int p_j = mapLabels.get(labelCombinations[p][1]); if (markovBlanket[p_i].contains(p_j) || markovBlanket[p_j].contains(p_i)) { selectedPair[p] = true; m_numSelected++; } } } // end of the hiton pruning algorithm if (prune.equals("bdeu")) { weka.filters.unsupervised.attribute.Remove m_remove = new weka.filters.unsupervised.attribute.Remove(); m_remove.setAttributeIndicesArray(trainData.getLabelIndices()); m_remove.setInvertSelection(true); m_remove.setInputFormat(trainData.getDataSet()); Instances hitonData = Filter.useFilter(trainData.getDataSet(), m_remove); BDeu hiton = new BDeu(hitonData); double[] scores = hiton.singleScore; double[] pairScores = new double[m_numPairs]; double[] sumScores = new double[m_numLabels]; for (int p = 0; p < m_numPairs; p++) { int head = mapLabels.get(labelCombinations[p][0]); int tail = mapLabels.get(labelCombinations[p][1]); pairScores[p] = -1 * (scores[tail] - (hiton.localBdeuScore(tail, new Integer[] { head }))); sumScores[tail] += pairScores[p]; sumScores[head] += pairScores[p]; } HashSet<Integer>[] parents = new HashSet[m_numLabels]; for (int i = 0; i < m_numLabels; i++) parents[i] = new HashSet<Integer>(); ordered = orderBy(pairScores); int[] topologicalOrdering = orderBy(sumScores); int[] relevance = new int[m_numLabels]; for (int i = 0; i < m_numLabels; i++) relevance[topologicalOrdering[i]] = i; for (int p = 0; p < m_numPairs; p++) { int pair = ordered[p]; int head = mapLabels.get(labelCombinations[pair][0]); int tail = mapLabels.get(labelCombinations[pair][1]); if (relevance[head] > relevance[tail]) { int aux = head; head = tail; tail = aux; } // Check if adding this improves parents[tail].add(head); double scoreAdd = hiton.localBdeuScore(tail, parents[tail].toArray(new Integer[parents[tail].size()])); double diff = scores[tail] - scoreAdd; if (diff < 0) { scores[tail] = scoreAdd; selectedPair[pair] = true; m_numSelected++; } else { parents[tail].remove(head); } } // End of the BDeu procedure } // End of the Pruning algorithms // // Determine singleton variables for (int i = 0; i < m_labelIndices.length; i++) singleton[i] = true; for (int p = 0; p < m_numPairs; p++) { if (selectedPair[p]) { singleton[mapLabels.get(labelCombinations[p][0])] = false; singleton[mapLabels.get(labelCombinations[p][1])] = false; } } for (int i = 0; i < m_labelIndices.length; i++) if (singleton[i]) m_numSingleton++; mutualInfo = null; } // Generate single class datasets from the full ML data and learn models: HashMap<Integer, Classifier> models = new HashMap<Integer, Classifier>(); HashMap<Integer, Classifier> singletonModels = new HashMap<Integer, Classifier>(); HashMap<Integer, weka.filters.supervised.attribute.AttributeSelection> singletonFilterSel = new HashMap<Integer, weka.filters.supervised.attribute.AttributeSelection>(); HashMap<Integer, weka.filters.supervised.attribute.Discretize> singletonFilter = new HashMap<Integer, weka.filters.supervised.attribute.Discretize>(); weka.filters.supervised.attribute.AttributeSelection[] m_selecters = new weka.filters.supervised.attribute.AttributeSelection[m_numPairs]; weka.filters.supervised.attribute.Discretize[] m_discretizers = new weka.filters.supervised.attribute.Discretize[m_numPairs]; ClassCompoundTransformation[] converters = new ClassCompoundTransformation[m_numPairs]; for (int i = 0; i < m_numPairs; i++) { if (!selectedPair[i]) { continue; } MultiLabelInstances filteredLabelData = trainData .reintegrateModifiedDataSet(RemoveAllLabels.transformInstances(trainData.getDataSet(), complement(m_labelIndices, labelCombinations[i]))); converters[i] = new ClassCompoundTransformation(); Instances singleLabelData = converters[i].transformInstances(filteredLabelData); if (discType.equals("supervised")) { m_discretizers[i] = new Discretize(); m_discretizers[i].setInputFormat(singleLabelData); singleLabelData = Filter.useFilter(singleLabelData, m_discretizers[i]); } if (fss.equals("CFS")) { m_selecters[i] = new weka.filters.supervised.attribute.AttributeSelection(); m_selecters[i].setSearch(new weka.attributeSelection.BestFirst()); m_selecters[i].setEvaluator(new weka.attributeSelection.CfsSubsetEval()); m_selecters[i].setInputFormat(singleLabelData); singleLabelData = Filter.useFilter(singleLabelData, m_selecters[i]); } models.put(i, (Classifier) Class.forName("weka.classifiers." + baseClassifierClass).newInstance()); models.get(i).buildClassifier(singleLabelData); } // Learn singleton models: for (int i = 0; i < m_labelIndices.length; i++) { if (singleton[i]) { Instances singleLabelData = new Instances(trainData.getDataSet()); singleLabelData.setClassIndex(m_labelIndices[i]); singleLabelData = RemoveAllLabels.transformInstances(singleLabelData, complement(m_labelIndices, new int[] { m_labelIndices[i] })); if (discType.equals("supervised")) { singletonFilter.put(i, new Discretize()); singletonFilter.get(i).setInputFormat(singleLabelData); singleLabelData = Filter.useFilter(singleLabelData, singletonFilter.get(i)); } if (fss.equals("CFS")) { weka.filters.supervised.attribute.AttributeSelection tempFilter = new weka.filters.supervised.attribute.AttributeSelection(); tempFilter.setSearch(new weka.attributeSelection.BestFirst()); tempFilter.setEvaluator(new weka.attributeSelection.CfsSubsetEval()); tempFilter.setInputFormat(singleLabelData); singletonFilterSel.put(i, tempFilter); singleLabelData = Filter.useFilter(singleLabelData, singletonFilterSel.get(i)); } Classifier single; single = (Classifier) Class.forName("weka.classifiers." + baseClassifierClass).newInstance(); single.buildClassifier(singleLabelData); singletonModels.put(i, single); } } // // END OF THE LEARNING STAGE // double train = System.nanoTime() - start; start = System.nanoTime(); Writer writerConf = null; Writer writerDist = null; Writer writerSing = null; Writer writerLayo = null; try { writerConf = new BufferedWriter( new OutputStreamWriter(new FileOutputStream(outPath + "/conf_" + fold + ".txt"), "utf-8")); writerDist = new BufferedWriter( new OutputStreamWriter(new FileOutputStream(outPath + "/dist_" + fold + ".txt"), "utf-8")); writerSing = new BufferedWriter( new OutputStreamWriter(new FileOutputStream(outPath + "/sing_" + fold + ".txt"), "utf-8")); writerLayo = new BufferedWriter( new OutputStreamWriter(new FileOutputStream(outPath + "/layo_" + fold + ".txt"), "utf-8")); for (int l = 0; l < m_numLabels; l++) { writerLayo.write(trainData.getDataSet().attribute(m_labelIndices[l]).numValues() + "\t"); } writerLayo.write("\n"); writerLayo.write(m_numSelected + "\t" + m_numSingleton); writerLayo.close(); // Get distributions for instance for each variable pairs: double[] distributions; for (int i = 0; i < testData.getDataSet().size(); i++) { for (int l : testData.getLabelIndices()) writerConf.write((int) testData.getDataSet().instance(i).value(l) + "\t"); writerConf.write("\n"); Instance inst = testData.getDataSet().get(i); if (discType.equals("unsupervised")) { m_unsuperDiscretize.input(inst); inst = m_unsuperDiscretize.output(); } for (int p = 0; p < m_numPairs; p++) { if (!selectedPair[p]) { continue; } Instance processed = converters[p].transformInstance(inst, testData.getLabelIndices()); if (discType.equals("supervised")) { m_discretizers[p].input(processed); processed = m_discretizers[p].output(); // m_removers[p].input(processed); // processed = m_removers[p].output(); } if (!fss.equals("no")) { m_selecters[p].input(processed); processed = m_selecters[p].output(); } distributions = models.get(p).distributionForInstance(processed); writerDist.write(mapLabels.get(labelCombinations[p][0]) + "\t" + mapLabels.get(labelCombinations[p][1]) + "\t"); for (int d = 0; d < distributions.length; d++) writerDist.write(distributions[d] + "\t"); writerDist.write("\n"); } // Get predictions for singleton labels: for (int m = 0; m < m_labelIndices.length; m++) { if (singleton[m]) { Instance processed = RemoveAllLabels.transformInstance(inst, complement(m_labelIndices, new int[] { m_labelIndices[m] })); if (discType.equals("supervised")) { singletonFilter.get(m).input(processed); processed = singletonFilter.get(m).output(); } if (!fss.equals("no")) { singletonFilterSel.get(m).input(processed); processed = singletonFilterSel.get(m).output(); } double[] distribution = singletonModels.get(m).distributionForInstance(processed); double maxValue = 0; int conf = -1; for (int v = 0; v < distribution.length; v++) { if (distribution[v] > maxValue) { maxValue = distribution[v]; conf = v; } } writerSing.write(i + "\t" + m + "\t" + conf + "\n"); } } } writerConf.close(); writerDist.close(); writerSing.close(); double test = System.nanoTime() - start; // train /= 1000000000.0; // test /= 1000000000.0; // System.out.println(java.lang.String.format("FMC-%s\t%s\t%s\t%d\t%s\t%s\t%.4f\t%.4f",prune,baseClassifierClass,dbName,fold,discType,fss,train,test)); } catch (IOException ex) { // report } finally { try { writerConf.close(); } catch (Exception ex) { } try { writerDist.close(); } catch (Exception ex) { } } } catch (Exception e) { e.printStackTrace(); } }
From source file:es.ubu.XRayDetector.datos.GestorArff.java
License:Open Source License
/** * Reads an ARFF file./*from ww w. j a va 2 s . c o m*/ * * @param url The PATH of the ARFF file. * @return The instances include in the ARFF file. */ public Instances leerArff(String url) { BufferedReader reader = null; try { reader = new BufferedReader(new FileReader(url)); } catch (FileNotFoundException e) { throw new RuntimeException(e); } ArffReader arff = null; try { arff = new ArffReader(reader); } catch (IOException e) { throw new RuntimeException(e); } Instances data = arff.getData(); data.setClassIndex(data.numAttributes() - 1); return data; }
From source file:es.ubu.XRayDetector.modelo.Fachada.java
License:Open Source License
/** * Creates a model training a classifier using bagging. * /*from w w w . j a v a 2 s .c om*/ * @param data Contains all the instances of the ARFF file * @param sizeWindow The size of the window */ public void createModel(Instances data, String sizeWindow) { // se crea, opciones, setiputformat Classifier cls = null; //String separator = System.getProperty("file.separator"); String path = prop.getPathModel(); int opcionClasificacion = prop.getTipoClasificacion(); switch (opcionClasificacion) { case 0: //CLASIFICADOR CLASES NOMINALES (TRUE,FALSE) Classifier base; base = new REPTree(); cls = new Bagging(); ((Bagging) cls).setNumIterations(25); ((Bagging) cls).setBagSizePercent(100); ((Bagging) cls).setNumExecutionSlots(Runtime.getRuntime().availableProcessors()); ((Bagging) cls).setClassifier(base); break; case 1: //REGRESIN LINEAL (CLASES NUMRICAS, 1,0) cls = new REPTree(); break; } ObjectOutputStream oos = null; try { data.setClassIndex(data.numAttributes() - 1); cls.buildClassifier(data); /*if (arffName.contains("mejores")) oos = new ObjectOutputStream(new FileOutputStream((path + separator + "Modelos" + separator + "Bagging_" + "mejores_" + sizeWindow + ".model"))); if (arffName.contains("todas"))*/ oos = new ObjectOutputStream(new FileOutputStream((path + "todas_" + sizeWindow + ".model"))); oos.writeObject(cls); oos.flush(); oos.close(); } catch (Exception e) { throw new RuntimeException(e); } }
From source file:es.ubu.XRayDetector.modelo.ventana.VentanaAbstracta.java
License:Open Source License
/** * This method gets the headers of the features. * @param features a List of features/*from ww w . jav a 2s. c om*/ * @return header with features headers */ public Instances getHeader(List<String> features) { int capacity = 100000; List<String> featuresCopy = null; ArrayList<Attribute> atts = new ArrayList<Attribute>(); ArrayList<String> defect = new ArrayList<String>(); defect.add("true"); defect.add("false"); if (features != null) { featuresCopy = new ArrayList<String>(features); for (int i = 0; i < featuresCopy.size(); i++) { String rest = featuresCopy.get(i).substring(1); char first = featuresCopy.get(i).charAt(0); first = Character.toLowerCase(first); featuresCopy.set(i, (first + rest).replaceAll(" ", "")); } } for (int j = 0; j < ftStandard.getHead().length; j++) { if (features == null || featuresCopy.contains(ftStandard.getHead()[j])) atts.add(new Attribute(ftStandard.getHead()[j])); } for (int j = 0; j < ftStandardSaliency.getHead().length; j++) { if (features == null || featuresCopy.contains(ftStandard.getHead()[j] + "(S)")) atts.add(new Attribute(ftStandardSaliency.getHead()[j] + "(S)")); } for (int j = 1; j < 6; j++) { for (int i = 0; i < ftHaralick.getHead().length; i++) { if (features == null || featuresCopy.contains(ftHaralick.getHead()[i])) atts.add(new Attribute(ftHaralick.getHead()[i] + "_mean" + j)); } } for (int j = 1; j < 6; j++) { for (int i = 0; i < ftHaralick.getHead().length; i++) { if (features == null || featuresCopy.contains(ftHaralick.getHead()[i])) atts.add(new Attribute(ftHaralick.getHead()[i] + "_range" + j)); } } for (int j = 1; j < 6; j++) { for (int i = 0; i < ftHaralickSaliency.getHead().length; i++) { if (features == null || featuresCopy.contains(ftHaralick.getHead()[i] + "(S)")) atts.add(new Attribute(ftHaralickSaliency.getHead()[i] + "_mean" + j + "(S)")); } } for (int j = 1; j < 6; j++) { for (int i = 0; i < ftHaralickSaliency.getHead().length; i++) { if (features == null || featuresCopy.contains(ftHaralick.getHead()[i] + "(S)")) atts.add(new Attribute(ftHaralickSaliency.getHead()[i] + "_range" + j + "(S)")); } } for (int j = 1; j < 60; j++) { if (features == null || featuresCopy.contains(ftLbp.getHead() + "_" + j)) atts.add(new Attribute(ftLbp.getHead() + "(" + j + ")")); } for (int j = 1; j < 60; j++) { if (features == null || featuresCopy.contains(ftLbpSaliency.getHead() + "_" + j + "(S)")) atts.add(new Attribute(ftLbpSaliency.getHead() + "(" + j + ")(S)")); } atts.add(new Attribute("Defecto", defect)); // Capacidad es el nmero de instancias. Instances header = new Instances("NuevaInstancia", atts, capacity); // Establecer la clase header.setClassIndex(header.numAttributes() - 1); return header; }
From source file:es.upm.dit.gsi.barmas.dataset.utils.DatasetSplitter.java
License:Open Source License
/** * @param csvFilePath// w ww . j a va2 s .c o m * @return * @throws Exception */ private Instances getDataFromCSV(String csvFilePath) throws Exception { DataSource source = new DataSource(csvFilePath); Instances data = source.getDataSet(); data.setClassIndex(data.numAttributes() - 1); return data; }
From source file:es.upm.dit.gsi.barmas.launcher.WekaClassifiersValidator.java
License:Open Source License
/** * @param csvFilePath/*from w w w. ja v a2s . c o m*/ * @return * @throws Exception */ public static Instances getDataFromCSV(String csvFilePath) throws Exception { DataSource source = new DataSource(csvFilePath); Instances data = source.getDataSet(); data.setClassIndex(data.numAttributes() - 1); return data; }
From source file:etc.aloe.data.SegmentSet.java
License:Open Source License
/** * Convert the segment set into an ExampleSet (ready for feature * extraction). The returned example set includes an id attribute, the * message text, a label attribute, and several basic features extracted * from the segment./*w w w . java 2s . c o m*/ * * @return */ public ExampleSet getBasicExamples() { ArrayList<Attribute> attributes = new ArrayList<Attribute>(); attributes.add(new Attribute(ExampleSet.ID_ATTR_NAME)); attributes.add(new Attribute(ExampleSet.MESSAGE_ATTR_NAME, (List<String>) null)); attributes.add(new Attribute(ExampleSet.LABEL_ATTR_NAME, Arrays.asList(new String[] { "false", "true" }))); attributes.add(new Attribute(ExampleSet.PARTICIPANT_ATTR_NAME, (List<String>) null)); attributes.add(new Attribute(DURATION_ATTR_NAME)); attributes.add(new Attribute(LENGTH_ATTR_NAME)); attributes.add(new Attribute(CPS_ATTR_NAME)); attributes.add(new Attribute(RATE_ATTR_NAME)); Instances instances = new Instances("BasicExamples", attributes, 0); instances.setClassIndex(2); Attribute idAttr = instances.attribute(ExampleSet.ID_ATTR_NAME); Attribute messageAttr = instances.attribute(ExampleSet.MESSAGE_ATTR_NAME); Attribute labelAttr = instances.attribute(ExampleSet.LABEL_ATTR_NAME); Attribute participantAttr = instances.attribute(ExampleSet.PARTICIPANT_ATTR_NAME); Attribute durationAttr = instances.attribute(DURATION_ATTR_NAME); Attribute lengthAttr = instances.attribute(LENGTH_ATTR_NAME); Attribute cpsAttr = instances.attribute(CPS_ATTR_NAME); Attribute rateAttr = instances.attribute(RATE_ATTR_NAME); for (int i = 0; i < size(); i++) { Segment segment = get(i); Instance instance = new DenseInstance(instances.numAttributes()); String messageStr = segment.concatMessages(); String participantStr = segment.concatParticipants(); instance.setValue(idAttr, segment.getId()); instance.setValue(messageAttr, messageStr); instance.setValue(participantAttr, participantStr); if (segment.hasTrueLabel()) { instance.setValue(labelAttr, segment.getTrueLabel() ? "true" : "false"); } computeRateValues(segment, instance, messageStr, durationAttr, lengthAttr, cpsAttr, rateAttr); instances.add(instance); } return new ExampleSet(instances); }
From source file:etc.aloe.filters.StringToDictionaryVector.java
License:Open Source License
public static void main(String[] args) { //Create a test dataset ArrayList<Attribute> attributes = new ArrayList<Attribute>(); attributes.add(new Attribute("message", (ArrayList<String>) null)); attributes.add(new Attribute("id")); {/*from w ww.j av a 2s . c o m*/ ArrayList<String> classValues = new ArrayList<String>(); classValues.add("0"); classValues.add("1"); attributes.add(new Attribute("class", classValues)); } Instances instances = new Instances("test", attributes, 0); instances.setClassIndex(2); String[] messages = new String[] { "No emoticons here", "I have a smiley :)", "Two smileys and a frownie :) :) :(", "Several emoticons :( :-( :) :-) ;-) 8-) :-/ :-P" }; for (int i = 0; i < messages.length; i++) { Instance instance = new DenseInstance(instances.numAttributes()); instance.setValue(instances.attribute(0), messages[i]); instance.setValue(instances.attribute(1), i); instance.setValue(instances.attribute(2), Integer.toString(i % 2)); instances.add(instance); } System.out.println("Before filter:"); for (int i = 0; i < instances.size(); i++) { System.out.println(instances.instance(i).toString()); } try { String dictionaryName = "emoticons.txt"; StringToDictionaryVector filter = new StringToDictionaryVector(); List<String> termList = StringToDictionaryVector.readDictionaryFile(new File(dictionaryName)); filter.setTermList(termList); filter.setMinTermFreq(1); filter.setTFTransform(true); filter.setIDFTransform(true); filter.setNormalizeDocLength(new SelectedTag(FILTER_NORMALIZE_TEST_ONLY, TAGS_FILTER)); filter.setOutputWordCounts(true); filter.setStringAttribute("message"); filter.setInputFormat(instances); Instances trans1 = Filter.useFilter(instances, filter); Instances trans2 = Filter.useFilter(instances, filter); System.out.println("\nFirst application:"); System.out.println(trans1.toString()); System.out.println("\nSecond application:"); System.out.println(trans2.toString()); } catch (Exception e) { e.printStackTrace(); } }
From source file:eu.linda.analytics.formats.ArffInputFormat.java
@Override public AbstractList importData4weka(String pathToFile, boolean isForRDFOutput, Analytics analytics) { helpfulFuncions.nicePrintMessage("import Arff file " + pathToFile); Instances data = null; //Instances newData = null; try {//from w ww .j a va2s . c om data = ConverterUtils.DataSource.read(pathToFile); //NominalToString filter1 = new NominalToString(); //filter1.setInputFormat(data); //data = Filter.useFilter(data, filter1); /*/first 2 colums are metadata info used for rdf output if (excludeMetadataInfo) { String[] options = new String[2]; options[0] = "-R"; // "range" options[1] = "1,2"; // first attribute Remove remove = new Remove(); // new instance of filter remove.setOptions(options); // set options remove.setInputFormat(data); // inform filter about dataset **AFTER** setting options newData = Filter.useFilter(data, remove); // apply filter newData.setClassIndex(newData.numAttributes() - 1); return newData; }*/ data.setClassIndex(data.numAttributes() - 1); } catch (Exception ex) { Logger.getLogger(ArffInputFormat.class.getName()).log(Level.SEVERE, null, ex); } return data; }