List of usage examples for weka.core Instances attribute
publicAttribute attribute(String name)
From source file:FFNN.MultiplePerceptron.java
public static void main(String args[]) throws Exception { // System.out.println("input jumlah layer 0/1 :"); // Scanner input = new Scanner(System.in); // int layer = input.nextInt(); // System.out.println("input learning rate"); // double rate = input.nextDouble(); // int hidden = 0; // if(layer==1){ // System.out.println("input jumlah neuron di hidden layer"); // hidden = input.nextInt(); // }/*from www.j ava 2 s .co m*/ // // System.out.print("Masukkan nama file : "); // String filename = input.next(); ConverterUtils.DataSource source = new ConverterUtils.DataSource( ("D:\\Program Files\\Weka-3-8\\data\\iris.arff")); Instances train = source.getDataSet(); // Normalize nm = new Normalize(); // nm.setInputFormat(train); // train = Filter.useFilter(train, nm); for (int i = 0; i < train.numAttributes(); i++) System.out.println(i + ". " + train.attribute(i).name()); System.out.print("Masukkan indeks kelas : "); //int classIdx = input.nextInt(); train.setClassIndex(train.numAttributes() - 1); MultiplePerceptron mlp = new MultiplePerceptron(10000, 1, 13, train); mlp.buildClassifier(train); Evaluation eval = new Evaluation(train); eval.evaluateModel(mlp, train); System.out.println(eval.toSummaryString()); // System.out.println(eval.toMatrixString()); }
From source file:filters.MauiFilter.java
License:Open Source License
/** * Sets the format of the input instances. * /* w w w .j av a2s .com*/ * @param instanceInfo * an Instances object containing the input instance structure * (any instances contained in the object are ignored - only the * structure is required). * @return true if the outputFormat may be collected immediately */ public boolean setInputFormat(Instances instanceInfo) throws Exception { if (instanceInfo.classIndex() >= 0) { throw new Exception("Don't know what do to if class index set!"); } if (!instanceInfo.attribute(keyphrasesAtt).isString() || !instanceInfo.attribute(documentAtt).isString()) { throw new Exception("Keyphrase attribute and document attribute " + "need to be string attributes."); } phraseFilter = new MauiPhraseFilter(); int[] arr = new int[1]; arr[0] = documentAtt; phraseFilter.setAttributeIndicesArray(arr); phraseFilter.setInputFormat(instanceInfo); if (vocabularyName.equals("none")) { numbersFilter = new NumbersFilter(); numbersFilter.setInputFormat(phraseFilter.getOutputFormat()); super.setInputFormat(numbersFilter.getOutputFormat()); } else { super.setInputFormat(phraseFilter.getOutputFormat()); } return false; }
From source file:fr.ign.cogit.geoxygene.util.conversion.ConversionToARFF.java
License:Open Source License
/** * Permet de convertir des FeatureCollections en fichier ARFF * //from w w w . j a va 2 s.c o m * @param featColl la collection en entre * @param outFilePath le fichier en sortie * @throws ParseException * @throws IOException */ public static void export(IFeatureCollection<IFeature> featColl, String outFilePath) throws ParseException, IOException { ArrayList<Attribute> atts = new ArrayList<Attribute>(); Instances data; double[] vals; int i; // 1. Prparation des attributs IFeature feat = featColl.get(0); FeatureType ft = (FeatureType) feat.getFeatureType(); List<GF_AttributeType> lAttributeTypes = ft.getFeatureAttributes(); int nbAttributes = lAttributeTypes.size(); for (i = 0; i < nbAttributes; i++) { GF_AttributeType attT = lAttributeTypes.get(i); if (attT.getValueType().equalsIgnoreCase("String")) { atts.add(new Attribute(attT.getMemberName(), (List<String>) null)); } else { atts.add(new Attribute(attT.getMemberName())); } } // 2 on cre l'instance data = new Instances("MyRelation", atts, 0); // 3 on ajoute les donnes int nbElem = featColl.size(); for (i = 0; i < nbElem; i++) { feat = featColl.get(i); vals = new double[nbAttributes]; for (int j = 0; j < nbAttributes; j++) { GF_AttributeType attT = lAttributeTypes.get(j); if (attT.getValueType().equalsIgnoreCase("String")) { vals[j] = data.attribute(j).addStringValue(feat.getAttribute(attT.getMemberName()).toString()); } else { vals[j] = Double.parseDouble(feat.getAttribute(attT.getMemberName()).toString()); } } DenseInstance densInstance = new DenseInstance(1.0, vals); data.add(densInstance); } ArffSaver arffSaver = new ArffSaver(); arffSaver.setInstances(data); arffSaver.setFile(new File(outFilePath)); arffSaver.writeBatch(); }
From source file:fr.loria.synalp.jtrans.phonetiseur.Classifieurs.java
License:Open Source License
private String resultatClassifieur(Instance instance, Classifier classifieur, Instances instances) throws Exception { double r = classifieur.classifyInstance(instance); return instances.attribute(instances.numAttributes() - 1).value((int) r); }
From source file:gnusmail.learning.ClassifierManager.java
License:Open Source License
public void setDataSet(Instances dataSet) { this.dataSet = dataSet; this.dataSet.setClass(dataSet.attribute("Label")); }
From source file:gov.va.chir.tagline.dao.DatasetUtil.java
License:Open Source License
@SuppressWarnings("unchecked") public static Instances createDataset(final Instances header, final Collection<Document> documents) throws Exception { // Update header to include all docIDs from the passed in documents // (Weka requires all values for nominal features) final Set<String> docIds = new TreeSet<String>(); for (Document document : documents) { docIds.add(document.getName());//w w w . j a v a 2s.c o m } final AddValues avf = new AddValues(); avf.setLabels(StringUtils.join(docIds, ",")); // Have to add 1 because SingleIndex.setValue() has a bug, expecting // the passed in index to be 1-based rather than 0-based. Why? I have // no idea. // Calling path: AddValues.setInputFormat() --> // SingleIndex.setUpper() --> // SingleIndex.setValue() avf.setAttributeIndex(String.valueOf(header.attribute(DOC_ID).index() + 1)); avf.setInputFormat(header); final Instances newHeader = Filter.useFilter(header, avf); final Instances instances = new Instances(newHeader, documents.size()); // Map attributes final Map<String, Attribute> attrMap = new HashMap<String, Attribute>(); final Enumeration<Attribute> en = newHeader.enumerateAttributes(); while (en.hasMoreElements()) { final Attribute attr = en.nextElement(); attrMap.put(attr.name(), attr); } attrMap.put(newHeader.classAttribute().name(), newHeader.classAttribute()); final Attribute docId = attrMap.get(DOC_ID); final Attribute lineId = attrMap.get(LINE_ID); final Attribute classAttr = attrMap.get(LABEL); // Add data for (Document document : documents) { final Map<String, Object> docFeatures = document.getFeatures(); for (Line line : document.getLines()) { final Instance instance = new DenseInstance(attrMap.size()); final Map<String, Object> lineFeatures = line.getFeatures(); lineFeatures.putAll(docFeatures); instance.setValue(docId, document.getName()); instance.setValue(lineId, line.getLineId()); if (line.getLabel() == null) { instance.setMissing(classAttr); } else { instance.setValue(classAttr, line.getLabel()); } for (Attribute attribute : attrMap.values()) { if (!attribute.equals(docId) && !attribute.equals(lineId) && !attribute.equals(classAttr)) { final String name = attribute.name(); final Object obj = lineFeatures.get(name); if (obj instanceof Double) { instance.setValue(attribute, ((Double) obj).doubleValue()); } else if (obj instanceof Integer) { instance.setValue(attribute, ((Integer) obj).doubleValue()); } else { instance.setValue(attribute, obj.toString()); } } } instances.add(instance); } } // Set last attribute as class instances.setClassIndex(attrMap.size() - 1); return instances; }
From source file:gr.auth.ee.lcs.data.representations.complex.ComplexRepresentation.java
License:Open Source License
/** * Build the representation for some instances. * //from www. ja va 2s . c o m * @param instances * the instances */ protected void buildRepresentationFromInstance(final Instances instances) { for (int i = 0; i < (instances.numAttributes() - numberOfLabels); i++) { final String attributeName = instances.attribute(i).name(); if (instances.attribute(i).isNominal()) { String[] attributeNames = new String[instances.attribute(i).numValues()]; final Enumeration<?> values = instances.attribute(i).enumerateValues(); for (int j = 0; j < attributeNames.length; j++) { attributeNames[j] = (String) values.nextElement(); } // Create boolean or generic nominal if (attributeNames.length > 2) attributeList[i] = new ComplexRepresentation.NominalAttribute(this.chromosomeSize, attributeName, attributeNames, attributeGeneralizationRate); else attributeList[i] = new ComplexRepresentation.BooleanAttribute(chromosomeSize, attributeName, attributeGeneralizationRate); } else if (instances.attribute(i).isNumeric()) { float minValue, maxValue; minValue = (float) instances.instance(0).toDoubleArray()[i]; maxValue = minValue; for (int sample = 0; sample < instances.numInstances(); sample++) { final float currentVal = (float) instances.instance(sample).toDoubleArray()[i]; if (currentVal > maxValue) maxValue = currentVal; if (currentVal < minValue) minValue = currentVal; } attributeList[i] = new ComplexRepresentation.IntervalAttribute(this.chromosomeSize, attributeName, minValue, maxValue, precision, attributeGeneralizationRate); } } createClassRepresentation(instances); }
From source file:gr.auth.ee.lcs.data.representations.complex.GenericMultiLabelRepresentation.java
License:Open Source License
@Override protected void createClassRepresentation(final Instances instances) { for (int i = 0; i < numberOfLabels; i++) { final int labelIndex = (attributeList.length - numberOfLabels) + i; final String attributeName = instances.attribute(labelIndex).name(); attributeList[labelIndex] = new GenericLabel(chromosomeSize, attributeName, labelGeneralizationRate); }//from w w w .ja v a 2 s . c o m }
From source file:gr.auth.ee.lcs.data.representations.complex.StrictMultiLabelRepresentation.java
License:Open Source License
@Override protected void createClassRepresentation(final Instances instances) { for (int i = 0; i < numberOfLabels; i++) { final int labelIndex = (attributeList.length - numberOfLabels) + i; final String attributeName = instances.attribute(labelIndex).name(); attributeList[labelIndex] = new Label(chromosomeSize, attributeName); }/* ww w . j a v a2s.c o m*/ }
From source file:gr.iti.mklab.visual.quantization.SimpleKMeansWithOutput.java
License:Open Source License
/** * Generates a clusterer. Has to initialize all fields of the clusterer that are not being set via * options./*from w ww . jav a 2 s . com*/ * * @param data * set of instances serving as training data * @throws Exception * if the clusterer has not been generated successfully */ @Override public void buildClusterer(Instances data) throws Exception { // can clusterer handle the data? getCapabilities().testWithFail(data); m_Iterations = 0; m_ReplaceMissingFilter = new ReplaceMissingValues(); Instances instances = new Instances(data); instances.setClassIndex(-1); if (!m_dontReplaceMissing) { m_ReplaceMissingFilter.setInputFormat(instances); instances = Filter.useFilter(instances, m_ReplaceMissingFilter); } m_FullMissingCounts = new int[instances.numAttributes()]; if (m_displayStdDevs) { m_FullStdDevs = new double[instances.numAttributes()]; } m_FullNominalCounts = new int[instances.numAttributes()][0]; m_FullMeansOrMediansOrModes = moveCentroid(0, instances, false, false); for (int i = 0; i < instances.numAttributes(); i++) { m_FullMissingCounts[i] = instances.attributeStats(i).missingCount; if (instances.attribute(i).isNumeric()) { if (m_displayStdDevs) { m_FullStdDevs[i] = Math.sqrt(instances.variance(i)); } if (m_FullMissingCounts[i] == instances.numInstances()) { m_FullMeansOrMediansOrModes[i] = Double.NaN; // mark missing as mean } } else { m_FullNominalCounts[i] = instances.attributeStats(i).nominalCounts; if (m_FullMissingCounts[i] > m_FullNominalCounts[i][Utils.maxIndex(m_FullNominalCounts[i])]) { m_FullMeansOrMediansOrModes[i] = -1; // mark missing as most common // value } } } m_ClusterCentroids = new Instances(instances, m_NumClusters); int[] clusterAssignments = new int[instances.numInstances()]; if (m_PreserveOrder) m_Assignments = clusterAssignments; m_DistanceFunction.setInstances(instances); Random RandomO = new Random(getSeed()); int instIndex; HashMap initC = new HashMap(); DecisionTableHashKey hk = null; Instances initInstances = null; if (m_PreserveOrder) initInstances = new Instances(instances); else initInstances = instances; if (m_initializeWithKMeansPlusPlus) { kMeansPlusPlusInit(initInstances); } else { for (int j = initInstances.numInstances() - 1; j >= 0; j--) { instIndex = RandomO.nextInt(j + 1); hk = new DecisionTableHashKey(initInstances.instance(instIndex), initInstances.numAttributes(), true); if (!initC.containsKey(hk)) { m_ClusterCentroids.add(initInstances.instance(instIndex)); initC.put(hk, null); } initInstances.swap(j, instIndex); if (m_ClusterCentroids.numInstances() == m_NumClusters) { break; } } } m_NumClusters = m_ClusterCentroids.numInstances(); // removing reference initInstances = null; int i; boolean converged = false; int emptyClusterCount; Instances[] tempI = new Instances[m_NumClusters]; m_squaredErrors = new double[m_NumClusters]; m_ClusterNominalCounts = new int[m_NumClusters][instances.numAttributes()][0]; m_ClusterMissingCounts = new int[m_NumClusters][instances.numAttributes()]; startExecutorPool(); long start = System.currentTimeMillis(); while (!converged) { emptyClusterCount = 0; m_Iterations++; converged = true; System.out.print(new Date() + ": " + "Iter " + m_Iterations + " "); if (m_executionSlots <= 1 || instances.numInstances() < 2 * m_executionSlots) { for (i = 0; i < instances.numInstances(); i++) { Instance toCluster = instances.instance(i); int newC = clusterProcessedInstance(toCluster, true, true); if (newC != clusterAssignments[i]) { converged = false; } clusterAssignments[i] = newC; } } else { converged = launchAssignToClusters(instances, clusterAssignments); } // update centroids m_ClusterCentroids = new Instances(instances, m_NumClusters); for (i = 0; i < m_NumClusters; i++) { tempI[i] = new Instances(instances, 0); } for (i = 0; i < instances.numInstances(); i++) { tempI[clusterAssignments[i]].add(instances.instance(i)); } if (m_executionSlots <= 1 || instances.numInstances() < 2 * m_executionSlots) { for (i = 0; i < m_NumClusters; i++) { if (tempI[i].numInstances() == 0) { // empty cluster emptyClusterCount++; } else { moveCentroid(i, tempI[i], true, true); } } } else { emptyClusterCount = launchMoveCentroids(tempI); } if (m_Iterations == m_MaxIterations) converged = true; if (emptyClusterCount > 0) { m_NumClusters -= emptyClusterCount; if (converged) { Instances[] t = new Instances[m_NumClusters]; int index = 0; for (int k = 0; k < tempI.length; k++) { if (tempI[k].numInstances() > 0) { t[index++] = tempI[k]; } } tempI = t; } else { tempI = new Instances[m_NumClusters]; } } if (!converged) { m_ClusterNominalCounts = new int[m_NumClusters][instances.numAttributes()][0]; } System.out.println("Sum of within cluster distances: " + Utils.sum(m_squaredErrors)); // reset erros to zero m_squaredErrors = new double[m_NumClusters]; } long end = System.currentTimeMillis(); System.out.println("\nClustering completed in " + (end - start) + " ms and converged in " + m_Iterations + " iterations"); // calculate errors if (!m_FastDistanceCalc) { for (i = 0; i < instances.numInstances(); i++) { clusterProcessedInstance(instances.instance(i), true, false); } } if (m_displayStdDevs) { m_ClusterStdDevs = new Instances(instances, m_NumClusters); } m_ClusterSizes = new int[m_NumClusters]; for (i = 0; i < m_NumClusters; i++) { if (m_displayStdDevs) { double[] vals2 = new double[instances.numAttributes()]; for (int j = 0; j < instances.numAttributes(); j++) { if (instances.attribute(j).isNumeric()) { vals2[j] = Math.sqrt(tempI[i].variance(j)); } else { vals2[j] = Utils.missingValue(); } } m_ClusterStdDevs.add(new DenseInstance(1.0, vals2)); } m_ClusterSizes[i] = tempI[i].numInstances(); } m_executorPool.shutdown(); }