List of usage examples for weka.core Instances toString
@Override
public String toString()
From source file:mulan.data.ConverterLibSVM.java
License:Open Source License
/** * Converts a multi-label dataset from LibSVM format to the format * that is compatible with Mulan. It constructs one ARFF and one XML file. * * @param path the directory that contains the source file and will contain * the target files//w w w .j a v a 2s.co m * @param sourceFilename the name of the source file * @param relationName the relation name of the arff file that will be * constructed * @param targetFilestem the filestem for the target files (.arff and .xml) */ public static void convertFromLibSVM(String path, String sourceFilename, String targetFilestem, String relationName) { BufferedReader aReader = null; BufferedWriter aWriter = null; int numLabels = 0; int numAttributes = 0; int numInstances = 0; double meanParsedAttributes = 0; // Calculate number of labels and attributes String Line = null; try { aReader = new BufferedReader(new FileReader(path + sourceFilename)); while ((Line = aReader.readLine()) != null) { numInstances++; StringTokenizer strTok = new StringTokenizer(Line, " "); while (strTok.hasMoreTokens()) { String token = strTok.nextToken(); if (token.indexOf(":") == -1) { // parse label info StringTokenizer labelTok = new StringTokenizer(token, ","); while (labelTok.hasMoreTokens()) { String strLabel = labelTok.nextToken(); int intLabel = Integer.parseInt(strLabel); if (intLabel > numLabels) { numLabels = intLabel; } } } else { // parse attribute info meanParsedAttributes++; StringTokenizer attrTok = new StringTokenizer(token, ":"); String strAttrIndex = attrTok.nextToken(); int intAttrIndex = Integer.parseInt(strAttrIndex); if (intAttrIndex > numAttributes) { numAttributes = intAttrIndex; } } } } numLabels++; System.out.println("Number of attributes: " + numAttributes); System.out.println("Number of instances: " + numInstances); System.out.println("Number of classes: " + numLabels); System.out.println("Constructing XML file... "); LabelsMetaDataImpl meta = new LabelsMetaDataImpl(); for (int label = 0; label < numLabels; label++) { meta.addRootNode(new LabelNodeImpl("Label" + (label + 1))); } String labelsFilePath = path + targetFilestem + ".xml"; try { LabelsBuilder.dumpLabels(meta, labelsFilePath); System.out.println("Done!"); } catch (LabelsBuilderException e) { File labelsFile = new File(labelsFilePath); if (labelsFile.exists()) { labelsFile.delete(); } System.out.println("Construction of labels XML failed!"); } meanParsedAttributes /= numInstances; boolean Sparse = false; if (meanParsedAttributes < numAttributes) { Sparse = true; System.out.println("Dataset is sparse."); } // Define Instances class to hold data ArrayList<Attribute> attInfo = new ArrayList<Attribute>(numAttributes + numLabels); Attribute[] att = new Attribute[numAttributes + numLabels]; for (int i = 0; i < numAttributes; i++) { att[i] = new Attribute("Att" + (i + 1)); attInfo.add(att[i]); } ArrayList<String> ClassValues = new ArrayList<String>(2); ClassValues.add("0"); ClassValues.add("1"); for (int i = 0; i < numLabels; i++) { att[numAttributes + i] = new Attribute("Label" + (i + 1), ClassValues); attInfo.add(att[numAttributes + i]); } // Re-read file and convert into multi-label arff int countInstances = 0; aWriter = new BufferedWriter(new FileWriter(path + targetFilestem + ".arff")); Instances data = new Instances(relationName, attInfo, 0); aWriter.write(data.toString()); aReader = new BufferedReader(new FileReader(path + sourceFilename)); while ((Line = aReader.readLine()) != null) { countInstances++; // set all values to 0 double[] attValues = new double[numAttributes + numLabels]; Arrays.fill(attValues, 0); Instance tempInstance = new DenseInstance(1, attValues); tempInstance.setDataset(data); // separate class info from attribute info // ensure class info exists StringTokenizer strTok = new StringTokenizer(Line, " "); while (strTok.hasMoreTokens()) { String token = strTok.nextToken(); if (token.indexOf(":") == -1) { // parse label info StringTokenizer labelTok = new StringTokenizer(token, ","); while (labelTok.hasMoreTokens()) { String strLabel = labelTok.nextToken(); int intLabel = Integer.parseInt(strLabel); tempInstance.setValue(numAttributes + intLabel, 1); } } else { // parse attribute info StringTokenizer AttrTok = new StringTokenizer(token, ":"); String strAttrIndex = AttrTok.nextToken(); String strAttrValue = AttrTok.nextToken(); tempInstance.setValue(Integer.parseInt(strAttrIndex) - 1, Double.parseDouble(strAttrValue)); } } if (Sparse) { SparseInstance tempSparseInstance = new SparseInstance(tempInstance); aWriter.write(tempSparseInstance.toString() + "\n"); } else { aWriter.write(tempInstance.toString() + "\n"); } } } catch (IOException e) { e.printStackTrace(); } finally { try { if (aReader != null) { aReader.close(); } if (aWriter != null) { aWriter.close(); } } catch (IOException ex) { ex.printStackTrace(); } } }
From source file:myclassifier.Main.java
public static void main(String[] args) throws Exception { System.out.println("C"); File f = new File("weather.nominal.arrf"); if (f.exists() && !f.isDirectory()) { System.out.println("A"); } else {//from w w w . j ava 2 s . c o m System.out.println("B"); } System.out.println("C"); WekaAccessor access = new WekaAccessor(); Instances train_data = access.loadArff("weather.nominal.arrf"); train_data.toString(); train_data.firstInstance().toString(); MyId3 id3 = new MyId3(); id3.buildClassifier(train_data); access.tenFoldCrossValidation(id3, train_data); }
From source file:myclassifier.MyClassifier.java
/** * @param args the command line arguments *///from w w w .ja va2 s . co m public static void main(String[] args) throws Exception { WekaAccessor access = new WekaAccessor(); Instances train_data = access.loadArff("weather.nominal.arff"); train_data.toString(); train_data.firstInstance().toString(); MyId3 tree1 = new MyId3(); MyC45 tree2 = new MyC45(); J48 tree3 = access.buildC45Classifier(train_data); Id3 tree4 = access.buildId3Classifier(train_data); tree1.buildClassifier(train_data); tree2.buildClassifier(train_data); System.out.println("=== My Id3 ==="); access.tenFoldCrossValidation(tree1, train_data); System.out.println("=== My C45 ==="); access.tenFoldCrossValidation(tree2, train_data); System.out.println("=== Weka C45 ==="); access.tenFoldCrossValidation(tree3, train_data); System.out.println("=== Weka Id3 ==="); access.tenFoldCrossValidation(tree4, train_data); }
From source file:nl.detoren.ijc.neural.Voorspeller.java
License:Open Source License
public String voorspel(String bestandsnaam) throws FileNotFoundException, IOException, Exception { // Lees instances BufferedReader reader = new BufferedReader(new FileReader(bestandsnaam)); Instances datapredict = new Instances(reader); datapredict.setClassIndex(datapredict.numAttributes() - 1); Instances predicteddata = new Instances(datapredict); reader.close();/*from w w w. j av a 2 s .co m*/ // Predict instances for (int i = 0; i < datapredict.numInstances(); i++) { double clsLabel = mlp.classifyInstance(datapredict.instance(i)); predicteddata.instance(i).setClassValue(clsLabel); } logger.log(Level.INFO, predicteddata.toString()); // Save instances String outputBestand = bestandsnaam.substring(0, bestandsnaam.length() - 5) + "_solved.arff"; BufferedWriter writer = new BufferedWriter(new FileWriter(outputBestand)); writer.write(predicteddata.toString()); writer.newLine(); writer.flush(); writer.close(); return null; }
From source file:OAT.trading.classification.Weka.java
License:Open Source License
@Override public Prediction predict(InputSample input) { if (classifier == null) { log(Level.WARNING, "null classifier"); return null; }//ww w . ja va 2s . c o m Instances data = getInstances(input); if (data == null) { log(Level.WARNING, "null data"); return null; } if (!isCrossValidating()) { if (isLoggable(Level.FINER)) { log(Level.FINER, data.toString()); } } try { double output = new Evaluation(data).evaluateModelOnce(classifier, data.firstInstance()); return Prediction.valueOf(output < 0.5 ? -1 : 1); } catch (Exception ex) { log(Level.SEVERE, null, ex); } return null; }
From source file:OAT.trading.classification.Weka.java
License:Open Source License
@Override public void train(List<TrainingSample> trainingSet) { initClassifier();/*from ww w. j a v a2 s.com*/ if (classifier == null) { log(Level.WARNING, "null classifier"); return; } Instances data = getInstances(trainingSet); if (data == null) { log(Level.WARNING, "null data"); return; } if (!isCrossValidating()) { log(Level.FINE, "Training set size: {0}", data.numInstances()); if (isLoggable(Level.FINER)) { log(Level.FINER, data.toString()); } } try { classifier.buildClassifier(data); } catch (UnsupportedAttributeTypeException ex) { log(Level.WARNING, "{1}\nCapabilities: {0}", new Object[] { ex.getMessage(), classifier.getCapabilities() }); } catch (Exception ex) { log(Level.SEVERE, null, ex); } }
From source file:org.hypknowsys.wumprep.WUMprepWrapper.java
License:Open Source License
/** * Creates a dummy dataset from the input format, sends it to the script and * reads the script output's ARFF information that in turn is used to set * <code>this</code>' output format. * //from w w w. j a v a 2s . c o m * This mechanism allows a WUMprep script to alter the recordset layout as * long as this change is documented by the output ARFF header. For example, * the <tt>dnsLookup.pl</tt> script changes the <code>host_ip</code> field * to <code>host_dns</code> when performing IP lookups. * * @param instanceInfo * The input format. * @return Object containing the output instance structure. */ public Instances getScriptOutputFormat(Instances instanceInfo) { Instances outputFormat = instanceInfo; Instances testData = new Instances(instanceInfo); Instance testInstance = new Instance(testData.numAttributes()); testData.delete(); testInstance.setDataset(testData); // Initialize the testInstance's attribute values for (int i = 0; i < testInstance.numAttributes(); i++) { String aName = testInstance.attribute(i).name(); if (aName.equals("host_ip")) testInstance.setValue(i, "127.0.0.1"); else if (aName.equals("ts_day")) testInstance.setValue(i, "01"); else if (aName.equals("ts_month")) testInstance.setValue(i, "Jan"); else if (aName.equals("ts_year")) testInstance.setValue(i, "2005"); else if (aName.equals("ts_hour")) testInstance.setValue(i, "11"); else if (aName.equals("ts_minutes")) testInstance.setValue(i, "55"); else if (aName.equals("ts_seconds")) testInstance.setValue(i, "00"); else if (aName.equals("tz")) testInstance.setValue(i, "+0200"); else testInstance.setValue(i, aName + "-dummy"); } testData.add(testInstance); WUMprepWrapper testWrapper = new WUMprepWrapper(m_scriptName, m_args); testWrapper.start(); testWrapper.push(testData.toString()); testWrapper.push((Instance) null); class ErrorReader extends Thread implements Serializable { /** */ private static final long serialVersionUID = -488779846603045891L; PipedReader m_input = null; /** * Helper class for reading stderr output from the WUMprep script * * @param input The script's wrapper's stderr pipe reader */ ErrorReader(PipedReader input) { m_input = input; this.start(); } public void run() { try { while (m_input.read() >= 0) ; } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } } } // read the stderr output new ErrorReader(testWrapper.getErrorPipe()); try { // ignore the stderr output outputFormat = new org.hypknowsys.wumprep4weka.core.Instances(testWrapper.getOutputPipe()); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } return outputFormat; }
From source file:org.montp2.m1decol.ter.gramms.filters.FilterTokenizerBoolean.java
License:Open Source License
public void indexingToTokenizer(String inPath, String outPath) throws Exception { WordTokenizer wordTokenizer = new WordTokenizer(); wordTokenizer.setDelimiters("\r \t.,;:'\"()?!"); Instances inputInstances = WekaUtils.loadARFF(inPath); StringToWordVector filter = new StringToWordVector(); filter.setInputFormat(inputInstances); filter.setDoNotOperateOnPerClassBasis(false); filter.setInvertSelection(false);//from ww w . j av a2 s. c o m filter.setLowerCaseTokens(true); filter.setOutputWordCounts(false); filter.setTokenizer(wordTokenizer); filter.setUseStoplist(true); filter.setWordsToKeep(wordsTokeep); Instances outputInstances = Filter.useFilter(inputInstances, filter); OutputStreamUtils.writeSimple(outputInstances.toString(), outPath); }
From source file:org.montp2.m1decol.ter.gramms.filters.FilterTokenizerIDFT.java
License:Open Source License
public void indexingToTokenizer(String inPath, String outPath) throws Exception { WordTokenizer wordTokenizer = new WordTokenizer(); wordTokenizer.setDelimiters("\r \t.,;:'\"()?!"); Instances inputInstances = WekaUtils.loadARFF(inPath); StringToWordVector filter = new StringToWordVector(); filter.setInputFormat(inputInstances); filter.setIDFTransform(true);//ww w. j a v a 2 s.co m filter.setTFTransform(true); filter.setDoNotOperateOnPerClassBasis(false); filter.setInvertSelection(false); filter.setLowerCaseTokens(true); filter.setMinTermFreq(3); filter.setOutputWordCounts(true); filter.setTokenizer(wordTokenizer); filter.setUseStoplist(true); filter.setWordsToKeep(200); Instances outputInstances = Filter.useFilter(inputInstances, filter); OutputStreamUtils.writeSimple(outputInstances.toString(), outPath); }
From source file:org.montp2.m1decol.ter.gramms.filters.FilterTokenizerVector.java
License:Open Source License
public void indexingToTokenizer(String inPath, String outPath) throws Exception { WordTokenizer wordTokenizer = new WordTokenizer(); wordTokenizer.setDelimiters("\r \t.,;:'\"()?!"); Instances inputInstances = WekaUtils.loadARFF(inPath); StringToWordVector filter = new StringToWordVector(); filter.setInputFormat(inputInstances); filter.setDoNotOperateOnPerClassBasis(false); filter.setInvertSelection(false);/*from ww w . j av a2 s. c o m*/ filter.setLowerCaseTokens(true); filter.setMinTermFreq(3); filter.setOutputWordCounts(true); filter.setTokenizer(wordTokenizer); filter.setUseStoplist(true); filter.setWordsToKeep(200); Instances outputInstances = Filter.useFilter(inputInstances, filter); OutputStreamUtils.writeSimple(outputInstances.toString(), outPath); }