List of usage examples for weka.core Instances classIndex
publicint classIndex()
From source file:Classifier.supervised.LinearRegression.java
License:Open Source License
/** * Builds a regression model for the given data. * * @param data the training data to be used for generating the * linear regression function// w w w . ja v a 2 s .c o m * @throws Exception if the classifier could not be built successfully */ public void buildClassifier(Instances data) throws Exception { m_ModelBuilt = false; if (!m_checksTurnedOff) { // can classifier handle the data? getCapabilities().testWithFail(data); // remove instances with missing class data = new Instances(data); data.deleteWithMissingClass(); } // Preprocess instances if (!m_checksTurnedOff) { m_TransformFilter = new NominalToBinary(); m_TransformFilter.setInputFormat(data); data = Filter.useFilter(data, m_TransformFilter); m_MissingFilter = new ReplaceMissingValues(); m_MissingFilter.setInputFormat(data); data = Filter.useFilter(data, m_MissingFilter); data.deleteWithMissingClass(); } else { m_TransformFilter = null; m_MissingFilter = null; } m_ClassIndex = data.classIndex(); m_TransformedData = data; // Turn all attributes on for a start m_SelectedAttributes = new boolean[data.numAttributes()]; for (int i = 0; i < data.numAttributes(); i++) { if (i != m_ClassIndex) { m_SelectedAttributes[i] = true; } } m_Coefficients = null; // Compute means and standard deviations m_Means = new double[data.numAttributes()]; m_StdDevs = new double[data.numAttributes()]; for (int j = 0; j < data.numAttributes(); j++) { if (j != data.classIndex()) { m_Means[j] = data.meanOrMode(j); m_StdDevs[j] = Math.sqrt(data.variance(j)); if (m_StdDevs[j] == 0) { m_SelectedAttributes[j] = false; } } } m_ClassStdDev = Math.sqrt(data.variance(m_TransformedData.classIndex())); m_ClassMean = data.meanOrMode(m_TransformedData.classIndex()); // Perform the regression findBestModel(); // Save memory if (m_Minimal) { m_TransformedData = null; m_Means = null; m_StdDevs = null; } else { m_TransformedData = new Instances(data, 0); } m_ModelBuilt = true; }
From source file:cn.edu.xjtu.dbmine.TextDirectoryLoader.java
License:Open Source License
/** * Return the full data set. If the structure hasn't yet been determined * by a call to getStructure then method should do so before processing * the rest of the data set.//from w w w . j a va 2s . co m * * @return the structure of the data set as an empty set of Instances * @throws IOException if there is no source or parsing fails */ public Instances getDataSet() throws IOException { if (getDirectory() == null) throw new IOException("No directory/source has been specified"); String directoryPath = getDirectory().getAbsolutePath(); FastVector classes = new FastVector(); Enumeration enm = getStructure().classAttribute().enumerateValues(); while (enm.hasMoreElements()) classes.addElement(enm.nextElement()); Instances data = getStructure(); int fileCount = 0; for (int k = 0; k < classes.size(); k++) { String subdirPath = (String) classes.elementAt(k); File subdir = new File(directoryPath + File.separator + subdirPath); String[] files = subdir.list(); for (int j = 0; j < files.length; j++) { try { fileCount++; if (getDebug()) System.err.println("processing " + fileCount + " : " + subdirPath + " : " + files[j]); double[] newInst = null; if (m_OutputFilename) newInst = new double[3]; else newInst = new double[2]; File txt = new File(directoryPath + File.separator + subdirPath + File.separator + files[j]); BufferedInputStream is; is = new BufferedInputStream(new FileInputStream(txt)); StringBuffer txtStr = new StringBuffer(); int c; /*while ((c = is.read()) != -1) { txtStr.append((char) c); }*/ FileReader fr = new FileReader(txt); BufferedReader br = new BufferedReader(fr); String line; while ((line = br.readLine()) != null) { txtStr.append(line + "\n"); } newInst[0] = (double) data.attribute(0).addStringValue(txtStr.toString()); if (m_OutputFilename) newInst[1] = (double) data.attribute(1) .addStringValue(subdirPath + File.separator + files[j]); newInst[data.classIndex()] = (double) k; data.add(new Instance(1.0, newInst)); is.close(); } catch (Exception e) { System.err.println("failed to convert file: " + directoryPath + File.separator + subdirPath + File.separator + files[j]); } } } return data; }
From source file:cn.ict.zyq.bestConf.COMT2.COMT2.java
License:Open Source License
private static double computeOmegaDelta(M5P model, M5P modelPi, Instances omega) throws Exception { double retval = 0., y; Enumeration<Instance> enu = omega.enumerateInstances(); int idxClass = omega.classIndex(); Instance ins;/* w ww .jav a 2s . c o m*/ while (enu.hasMoreElements()) { ins = enu.nextElement(); y = ins.value(idxClass); retval += Math.pow(y - model.classifyInstance(ins), 2) - Math.pow(y - modelPi.classifyInstance(ins), 2); } return retval; }
From source file:com.edwardraff.WekaMNIST.java
License:Open Source License
public static void main(String[] args) throws IOException, Exception { String folder = args[0];/*from w w w.j a v a 2 s.c o m*/ String trainPath = folder + "MNISTtrain.arff"; String testPath = folder + "MNISTtest.arff"; System.out.println("Weka Timings"); Instances mnistTrainWeka = new Instances(new BufferedReader(new FileReader(new File(trainPath)))); mnistTrainWeka.setClassIndex(mnistTrainWeka.numAttributes() - 1); Instances mnistTestWeka = new Instances(new BufferedReader(new FileReader(new File(testPath)))); mnistTestWeka.setClassIndex(mnistTestWeka.numAttributes() - 1); //normalize range like into [0, 1] Normalize normalizeFilter = new Normalize(); normalizeFilter.setInputFormat(mnistTrainWeka); mnistTestWeka = Normalize.useFilter(mnistTestWeka, normalizeFilter); mnistTrainWeka = Normalize.useFilter(mnistTrainWeka, normalizeFilter); long start, end; System.out.println("RBF SVM (Full Cache)"); SMO smo = new SMO(); smo.setKernel(new RBFKernel(mnistTrainWeka, 0/*0 causes Weka to cache the whole matrix...*/, 0.015625)); smo.setC(8.0); smo.setBuildLogisticModels(false); evalModel(smo, mnistTrainWeka, mnistTestWeka); System.out.println("RBF SVM (No Cache)"); smo = new SMO(); smo.setKernel(new RBFKernel(mnistTrainWeka, 1, 0.015625)); smo.setC(8.0); smo.setBuildLogisticModels(false); evalModel(smo, mnistTrainWeka, mnistTestWeka); System.out.println("Decision Tree C45"); J48 wekaC45 = new J48(); wekaC45.setUseLaplace(false); wekaC45.setCollapseTree(false); wekaC45.setUnpruned(true); wekaC45.setMinNumObj(2); wekaC45.setUseMDLcorrection(true); evalModel(wekaC45, mnistTrainWeka, mnistTestWeka); System.out.println("Random Forest 50 trees"); int featuresToUse = (int) Math.sqrt(28 * 28);//Weka uses different defaults, so lets make sure they both use the published way RandomForest wekaRF = new RandomForest(); wekaRF.setNumExecutionSlots(1); wekaRF.setMaxDepth(0/*0 for unlimited*/); wekaRF.setNumFeatures(featuresToUse); wekaRF.setNumTrees(50); evalModel(wekaRF, mnistTrainWeka, mnistTestWeka); System.out.println("1-NN (brute)"); IBk wekaNN = new IBk(1); wekaNN.setNearestNeighbourSearchAlgorithm(new LinearNNSearch()); wekaNN.setCrossValidate(false); evalModel(wekaNN, mnistTrainWeka, mnistTestWeka); System.out.println("1-NN (Ball Tree)"); wekaNN = new IBk(1); wekaNN.setNearestNeighbourSearchAlgorithm(new BallTree()); wekaNN.setCrossValidate(false); evalModel(wekaNN, mnistTrainWeka, mnistTestWeka); System.out.println("1-NN (Cover Tree)"); wekaNN = new IBk(1); wekaNN.setNearestNeighbourSearchAlgorithm(new CoverTree()); wekaNN.setCrossValidate(false); evalModel(wekaNN, mnistTrainWeka, mnistTestWeka); System.out.println("Logistic Regression LBFGS lambda = 1e-4"); Logistic logisticLBFGS = new Logistic(); logisticLBFGS.setRidge(1e-4); logisticLBFGS.setMaxIts(500); evalModel(logisticLBFGS, mnistTrainWeka, mnistTestWeka); System.out.println("k-means (Loyd)"); int origClassIndex = mnistTrainWeka.classIndex(); mnistTrainWeka.setClassIndex(-1); mnistTrainWeka.deleteAttributeAt(origClassIndex); { long totalTime = 0; for (int i = 0; i < 10; i++) { SimpleKMeans wekaKMeans = new SimpleKMeans(); wekaKMeans.setNumClusters(10); wekaKMeans.setNumExecutionSlots(1); wekaKMeans.setFastDistanceCalc(true); start = System.currentTimeMillis(); wekaKMeans.buildClusterer(mnistTrainWeka); end = System.currentTimeMillis(); totalTime += (end - start); } System.out.println("\tClustering took: " + (totalTime / 10.0) / 1000.0 + " on average"); } }
From source file:com.entopix.maui.filters.MauiFilter.java
License:Open Source License
/** * Sets the format of the input instances. * * @param instanceInfo an Instances object containing the input instance * structure (any instances contained in the object are ignored - only the * structure is required)./*from w w w. j ava2 s . c o m*/ * @return true if the outputFormat may be collected immediately */ public boolean setInputFormat(Instances instanceInfo) throws MauiFilterException { if (instanceInfo.classIndex() >= 0) { throw new MauiFilterException("Don't know what do to if class index set!"); } if (!instanceInfo.attribute(keyphrasesAtt).isString() || !instanceInfo.attribute(documentAtt).isString()) { throw new MauiFilterException( "Keyphrase attribute and document attribute " + "need to be string attributes."); } try { phraseFilter = new MauiPhraseFilter(); int[] arr = new int[1]; arr[0] = documentAtt; phraseFilter.setAttributeIndicesArray(arr); phraseFilter.setInputFormat(instanceInfo); } catch (Exception e) { throw new MauiFilterException("Exception loading MauiPhraseFilter"); } try { if (vocabularyName.equals("none")) { numbersFilter = new NumbersFilter(); numbersFilter.setInputFormat(phraseFilter.getOutputFormat()); super.setInputFormat(numbersFilter.getOutputFormat()); } else { super.setInputFormat(phraseFilter.getOutputFormat()); } } catch (Exception e) { throw new MauiFilterException("Exception loading NumbersFilter"); } return false; }
From source file:com.mycompany.neuralnetwork.NeuralNetworkClassifier.java
@Override public void buildClassifier(Instances instances) throws Exception { int inputCount = instances.numAttributes() - 1; List<Integer> nodesPerLayer = new ArrayList<>(); for (int i = 0; i < layers - 1; i++) { nodesPerLayer.add(inputCount);/* w ww .j a v a 2 s. c o m*/ } nodesPerLayer.add(instances.numDistinctValues(instances.classIndex())); network = new Network(inputCount, nodesPerLayer); ArrayList<Double> errorsPerIteration = new ArrayList<>(); for (int j = 0; j < iterations; j++) { double errorsPer = 0; for (int k = 0; k < instances.numInstances(); k++) { Instance instance = instances.instance(k); List<Double> input = new ArrayList<>(); for (int i = 0; i < instance.numAttributes(); i++) { if (Double.isNaN(instance.value(i)) && i != instance.classIndex()) input.add(0.0); else if (i != instance.classIndex()) input.add(instance.value(i)); } errorsPer += network.train(input, instance.value(instance.classIndex()), learningFactor); } errorsPerIteration.add(errorsPer); } //Display Errors This is used to collect the data for the graph //for (Double d : errorsPerIteration) //{ // System.out.println(d); //} }
From source file:com.openkm.kea.filter.KEAFilter.java
License:Open Source License
/** * Sets the format of the input instances. * * @param instanceInfo an Instances object containing the input * instance structure (any instances contained in the object are * ignored - only the structure is required). * @return true if the outputFormat may be collected immediately *///from ww w . ja va2 s.co m public boolean setInputFormat(Instances instanceInfo) throws Exception { if (instanceInfo.classIndex() >= 0) { throw new Exception("Don't know what do to if class index set!"); } if (!instanceInfo.attribute(m_KeyphrasesAtt).isString() || !instanceInfo.attribute(m_DocumentAtt).isString()) { throw new Exception("Keyphrase attribute and document attribute " + "need to be string attributes."); } m_PunctFilter = new KEAPhraseFilter(); int[] arr = new int[1]; arr[0] = m_DocumentAtt; m_PunctFilter.setAttributeIndicesArray(arr); m_PunctFilter.setInputFormat(instanceInfo); m_PunctFilter.setDisallowInternalPeriods(getDisallowInternalPeriods()); if (m_vocabulary.equals("none")) { m_NumbersFilter = new NumbersFilter(); m_NumbersFilter.setInputFormat(m_PunctFilter.getOutputFormat()); super.setInputFormat(m_NumbersFilter.getOutputFormat()); } else { super.setInputFormat(m_PunctFilter.getOutputFormat()); } return false; }
From source file:com.rapidminer.tools.WekaTools.java
License:Open Source License
/** * Creates a RapidMiner example set from Weka instances. Only a label can be used * as special attributes, other types of special attributes are not * supported. If <code>attributeNamePrefix</code> is not null, the given * string prefix plus a number is used as attribute names. *//* w w w. j a va 2 s. c om*/ public static ExampleSet toRapidMinerExampleSet(Instances instances, String attributeNamePrefix, int datamanagement) { int classIndex = instances.classIndex(); // create example table // 1. Extract attributes List<Attribute> attributes = new ArrayList<Attribute>(); int number = 1; // use for attribute names for (int i = 0; i < instances.numAttributes(); i++) { weka.core.Attribute wekaAttribute = instances.attribute(i); int rapidMinerAttributeValueType = Ontology.REAL; if (wekaAttribute.isNominal()) rapidMinerAttributeValueType = Ontology.NOMINAL; else if (wekaAttribute.isString()) rapidMinerAttributeValueType = Ontology.STRING; Attribute attribute = AttributeFactory.createAttribute(wekaAttribute.name(), rapidMinerAttributeValueType); if ((i != classIndex) && (attributeNamePrefix != null) && (attributeNamePrefix.length() > 0)) { attribute.setName(attributeNamePrefix + "_" + (number++)); } if (wekaAttribute.isNominal()) { for (int a = 0; a < wekaAttribute.numValues(); a++) { String nominalValue = wekaAttribute.value(a); attribute.getMapping().mapString(nominalValue); } } attributes.add(attribute); } Attribute label = null; if (classIndex >= 0) { label = attributes.get(classIndex); label.setName("label"); } // 2. Guarantee alphabetical mapping to numbers for (int j = 0; j < attributes.size(); j++) { Attribute attribute = attributes.get(j); if (attribute.isNominal()) attribute.getMapping().sortMappings(); } // 3. Read data MemoryExampleTable table = new MemoryExampleTable(attributes); DataRowFactory factory = new DataRowFactory(datamanagement, '.'); // create data List<DataRow> dataList = new LinkedList<DataRow>(); int numberOfRapidMinerAttributes = instances.numAttributes(); for (int i = 0; i < instances.numInstances(); i++) { Instance instance = instances.instance(i); DataRow dataRow = factory.create(numberOfRapidMinerAttributes); for (int a = 0; a < instances.numAttributes(); a++) { Attribute attribute = table.getAttribute(a); double wekaValue = instance.value(a); if (attribute.isNominal()) { String nominalValue = instances.attribute(a).value((int) wekaValue); dataRow.set(attribute, attribute.getMapping().mapString(nominalValue)); } else { dataRow.set(attribute, wekaValue); } } dataRow.trim(); dataList.add(dataRow); } // handle label extra table.readExamples(new ListDataRowReader(dataList.iterator())); // create and return example set return table.createExampleSet(label); }
From source file:com.spread.experiment.tempuntilofficialrelease.ClassificationViaClustering108.java
License:Open Source License
/** * builds the classifier//from ww w.j a v a 2 s . com * * @param data the training instances * @throws Exception if something goes wrong */ @Override public void buildClassifier(Instances data) throws Exception { // can classifier handle the data? getCapabilities().testWithFail(data); // save original header (needed for clusters to classes output) m_OriginalHeader = data.stringFreeStructure(); // remove class attribute for clusterer Instances clusterData = new Instances(data); clusterData.setClassIndex(-1); clusterData.deleteAttributeAt(data.classIndex()); m_ClusteringHeader = clusterData.stringFreeStructure(); if (m_ClusteringHeader.numAttributes() == 0) { System.err.println("Data contains only class attribute, defaulting to ZeroR model."); m_ZeroR = new ZeroR(); m_ZeroR.buildClassifier(data); } else { m_ZeroR = null; // build clusterer m_ActualClusterer = AbstractClusterer.makeCopy(m_Clusterer); m_ActualClusterer.buildClusterer(clusterData); if (!getLabelAllClusters()) { // determine classes-to-clusters mapping ClusterEvaluation eval = new ClusterEvaluation(); eval.setClusterer(m_ActualClusterer); eval.evaluateClusterer(clusterData); double[] clusterAssignments = eval.getClusterAssignments(); int[][] counts = new int[eval.getNumClusters()][m_OriginalHeader.numClasses()]; int[] clusterTotals = new int[eval.getNumClusters()]; double[] best = new double[eval.getNumClusters() + 1]; double[] current = new double[eval.getNumClusters() + 1]; for (int i = 0; i < data.numInstances(); i++) { Instance instance = data.instance(i); if (!instance.classIsMissing()) { counts[(int) clusterAssignments[i]][(int) instance.classValue()]++; clusterTotals[(int) clusterAssignments[i]]++; } } best[eval.getNumClusters()] = Double.MAX_VALUE; ClusterEvaluation.mapClasses(eval.getNumClusters(), 0, counts, clusterTotals, current, best, 0); m_ClustersToClasses = new double[best.length]; System.arraycopy(best, 0, m_ClustersToClasses, 0, best.length); } else { m_ClusterClassProbs = new double[m_ActualClusterer.numberOfClusters()][data.numClasses()]; for (int i = 0; i < data.numInstances(); i++) { Instance clusterInstance = clusterData.instance(i); Instance originalInstance = data.instance(i); if (!originalInstance.classIsMissing()) { double[] probs = m_ActualClusterer.distributionForInstance(clusterInstance); for (int j = 0; j < probs.length; j++) { m_ClusterClassProbs[j][(int) originalInstance.classValue()] += probs[j]; } } } for (int i = 0; i < m_ClusterClassProbs.length; i++) { Utils.normalize(m_ClusterClassProbs[i]); } } } }
From source file:com.tum.classifiertest.DataCache.java
License:Open Source License
/** * Creates a DataCache by copying data from a weka.core.Instances object. *//* w w w. ja v a 2 s .c om*/ public DataCache(Instances origData) throws Exception { classIndex = origData.classIndex(); numAttributes = origData.numAttributes(); numClasses = origData.numClasses(); numInstances = origData.numInstances(); attNumVals = new int[origData.numAttributes()]; for (int i = 0; i < attNumVals.length; i++) { if (origData.attribute(i).isNumeric()) { attNumVals[i] = 0; } else if (origData.attribute(i).isNominal()) { attNumVals[i] = origData.attribute(i).numValues(); } else throw new Exception("Only numeric and nominal attributes are supported."); } /* Array is indexed by attribute first, to speed access in RF splitting. */ vals = new float[numAttributes][numInstances]; for (int a = 0; a < numAttributes; a++) { for (int i = 0; i < numInstances; i++) { if (origData.instance(i).isMissing(a)) vals[a][i] = Float.MAX_VALUE; // to make sure missing values go to the end else vals[a][i] = (float) origData.instance(i).value(a); // deep copy } } instWeights = new double[numInstances]; instClassValues = new int[numInstances]; for (int i = 0; i < numInstances; i++) { instWeights[i] = origData.instance(i).weight(); instClassValues[i] = (int) origData.instance(i).classValue(); } /* compute the sortedInstances for the whole dataset */ sortedIndices = new int[numAttributes][]; for (int a = 0; a < numAttributes; a++) { // ================= attr by attr if (a == classIndex) continue; if (attNumVals[a] > 0) { // ------------------------------------- nominal // Handling nominal attributes: as of FastRF 0.99, they're sorted as well // missing values are coded as Float.MAX_VALUE and go to the end sortedIndices[a] = new int[numInstances]; //int count = 0; sortedIndices[a] = FastRfUtils.sort(vals[a]); /*for (int i = 0; i < numInstances; i++) { if ( !this.isValueMissing(a, i) ) { sortedIndices[a][count] = i; count++; } } for (int i = 0; i < numInstances; i++) { if ( this.isValueMissing(a, i) ) { sortedIndices[a][count] = i; count++; } }*/ } else { // ----------------------------------------------------- numeric // Sorted indices are computed for numeric attributes // missing values are coded as Float.MAX_VALUE and go to the end sortedIndices[a] = FastRfUtils.sort(vals[a]); } // ---------------------------------------------------------- attr kind } // ========================================================= attr by attr // System.out.println(" Done."); }