List of usage examples for weka.core.converters TextDirectoryLoader setDirectory
public void setDirectory(File dir) throws IOException
From source file:adams.flow.transformer.WekaTextDirectoryReader.java
License:Open Source License
/** * Executes the flow item./*from ww w .j av a 2s . c o m*/ * * @return null if everything is fine, otherwise error message */ @Override protected String doExecute() { String result; TextDirectoryLoader loader; Instances data; File file; result = null; try { if (m_InputToken.getPayload() instanceof File) file = (File) m_InputToken.getPayload(); else file = new PlaceholderFile((String) m_InputToken.getPayload()); if (file.isDirectory()) { loader = new TextDirectoryLoader(); loader.setDirectory(file); loader.setOutputFilename(m_StoreFilename); loader.setCharSet(m_CharSet); data = loader.getDataSet(); m_OutputToken = new Token(data); updateProvenance(m_OutputToken); } else { result = "Input is not a directory: " + file; } } catch (Exception e) { result = handleException("Failed to load directory with text files: ", e); } return result; }
From source file:io.TextToArffConverter.java
/** * Example class that converts HTML files stored in a directory structure into * and ARFF file using the TextDirectoryLoader converter. //from ww w. ja v a2 s. c o m /** * Expects the first parameter to point to the directory with the text files. * In that directory, each sub-directory represents a class and the text * files in these sub-directories will be labeled as such. * * @param Directory * @param args the commandline arguments * @throws java.io.IOException * @throws Exception if something goes wrong * */ public TextToArffConverter(String Directory) throws IOException { // convert the directory into a dataset TextDirectoryLoader loader = new TextDirectoryLoader(); loader.setDirectory(new File(Directory)); Instances dataRaw = loader.getDataSet(); //System.out.println("\n\nImported data:\n\n" + dataRaw); PrintWriter writer = new PrintWriter( "/home/nikos/NetBeansProjects/NucleosomePatternClassifier/ARFF/Data.txt"); writer.println(dataRaw); }
From source file:nl.uva.expose.classification.WekaClassification.java
private void loadToArff(String dataDir, Instances dRaw) throws IOException { TextDirectoryLoader loader = new TextDirectoryLoader(); loader.setDirectory(new File(dataDir)); dRaw = loader.getDataSet();//from w ww . j av a 2s . c o m System.out.println("\n\nImported data:\n\n" + dRaw); }
From source file:org.ml.classifier.TextDirectoryToArff.java
License:Open Source License
public Instances createDataset(String directoryPath) throws Exception { // FastVector atts = new FastVector(2); // atts.addElement(new Attribute("filename", (FastVector) null)); // atts.addElement(new Attribute("contents", (FastVector) null)); // Instances data = new Instances("text_files_in_" + directoryPath, atts, 0); ////ww w .j a v a 2 s.com // File dir = new File(directoryPath); // String[] files = dir.list(); // for (int i = 0; i < files.length; i++) { // if (files[i].endsWith(".txt")) { // try { // double[] newInst = new double[2]; // newInst[0] = (double)data.attribute(0).addStringValue(files[i]); // File txt = new File(directoryPath + File.separator + files[i]); // InputStreamReader is; // is = new InputStreamReader(new FileInputStream(txt)); // StringBuffer txtStr = new StringBuffer(); // int c; // while ((c = is.read()) != -1) { // txtStr.append((char)c); // } // newInst[1] = (double)data.attribute(1).addStringValue(txtStr.toString()); // data.add(new Instance(1.0, newInst)); // } catch (Exception e) { // //System.err.println("failed to convert file: " + directoryPath + File.separator + files[i]); // } // } // } // convert the directory into a dataset TextDirectoryLoader loader = new TextDirectoryLoader(); loader.setDirectory(new File(directoryPath)); loader.setOutputFilename(false); Instances data = loader.getDataSet(); //System.out.println("\n\nImported data:\n\n" + dataRaw); return data; }
From source file:util.FeatureExtract.java
public static void createArff(String directory) { TextDirectoryLoader loader = new TextDirectoryLoader(); try {//from www. java2 s . co m // convert the directory into a dataset loader.setDirectory(new File(directory)); Instances dataRaw = loader.getDataSet(); // apply the StringToWordVector and tf-idf weighting StringToWordVector filter = new StringToWordVector(); filter.setIDFTransform(true); filter.setInputFormat(dataRaw); Instances dataFiltered = Filter.useFilter(dataRaw, filter); // output the arff file ArffSaver saver = new ArffSaver(); saver.setInstances(dataFiltered); saver.setFile(new File(SpamFilterConfig.getArffFilePath())); saver.writeBatch(); // train with simple cart SimpleCart classifier = new SimpleCart(); classifier.buildClassifier(dataFiltered); System.out.println("\n\nClassifier model:\n\n" + classifier.toString()); // using 10 cross validation Evaluation eval = new Evaluation(dataFiltered); eval.crossValidateModel(classifier, dataFiltered, 10, new Random(1)); System.out.println("\n\nCross fold:\n\n" + eval.toSummaryString()); } catch (Exception ex) { Logger.getLogger(FeatureExtract.class.getName()).log(Level.SEVERE, null, ex); } }