List of usage examples for weka.core.converters TextDirectoryLoader getDataSet
@Override public Instances getDataSet() throws IOException
From source file:adams.flow.transformer.WekaTextDirectoryReader.java
License:Open Source License
/** * Executes the flow item.//from ww w . j a va2s . c o m * * @return null if everything is fine, otherwise error message */ @Override protected String doExecute() { String result; TextDirectoryLoader loader; Instances data; File file; result = null; try { if (m_InputToken.getPayload() instanceof File) file = (File) m_InputToken.getPayload(); else file = new PlaceholderFile((String) m_InputToken.getPayload()); if (file.isDirectory()) { loader = new TextDirectoryLoader(); loader.setDirectory(file); loader.setOutputFilename(m_StoreFilename); loader.setCharSet(m_CharSet); data = loader.getDataSet(); m_OutputToken = new Token(data); updateProvenance(m_OutputToken); } else { result = "Input is not a directory: " + file; } } catch (Exception e) { result = handleException("Failed to load directory with text files: ", e); } return result; }
From source file:io.TextToArffConverter.java
/** * Example class that converts HTML files stored in a directory structure into * and ARFF file using the TextDirectoryLoader converter. /* w w w . j a v a 2 s.c o m*/ /** * Expects the first parameter to point to the directory with the text files. * In that directory, each sub-directory represents a class and the text * files in these sub-directories will be labeled as such. * * @param Directory * @param args the commandline arguments * @throws java.io.IOException * @throws Exception if something goes wrong * */ public TextToArffConverter(String Directory) throws IOException { // convert the directory into a dataset TextDirectoryLoader loader = new TextDirectoryLoader(); loader.setDirectory(new File(Directory)); Instances dataRaw = loader.getDataSet(); //System.out.println("\n\nImported data:\n\n" + dataRaw); PrintWriter writer = new PrintWriter( "/home/nikos/NetBeansProjects/NucleosomePatternClassifier/ARFF/Data.txt"); writer.println(dataRaw); }
From source file:nl.uva.expose.classification.WekaClassification.java
private void loadToArff(String dataDir, Instances dRaw) throws IOException { TextDirectoryLoader loader = new TextDirectoryLoader(); loader.setDirectory(new File(dataDir)); dRaw = loader.getDataSet(); System.out.println("\n\nImported data:\n\n" + dRaw); }
From source file:org.ml.classifier.TextDirectoryToArff.java
License:Open Source License
public Instances createDataset(String directoryPath) throws Exception { // FastVector atts = new FastVector(2); // atts.addElement(new Attribute("filename", (FastVector) null)); // atts.addElement(new Attribute("contents", (FastVector) null)); // Instances data = new Instances("text_files_in_" + directoryPath, atts, 0); ///* w w w. j a v a2 s . com*/ // File dir = new File(directoryPath); // String[] files = dir.list(); // for (int i = 0; i < files.length; i++) { // if (files[i].endsWith(".txt")) { // try { // double[] newInst = new double[2]; // newInst[0] = (double)data.attribute(0).addStringValue(files[i]); // File txt = new File(directoryPath + File.separator + files[i]); // InputStreamReader is; // is = new InputStreamReader(new FileInputStream(txt)); // StringBuffer txtStr = new StringBuffer(); // int c; // while ((c = is.read()) != -1) { // txtStr.append((char)c); // } // newInst[1] = (double)data.attribute(1).addStringValue(txtStr.toString()); // data.add(new Instance(1.0, newInst)); // } catch (Exception e) { // //System.err.println("failed to convert file: " + directoryPath + File.separator + files[i]); // } // } // } // convert the directory into a dataset TextDirectoryLoader loader = new TextDirectoryLoader(); loader.setDirectory(new File(directoryPath)); loader.setOutputFilename(false); Instances data = loader.getDataSet(); //System.out.println("\n\nImported data:\n\n" + dataRaw); return data; }
From source file:preprocess.TextDirectoryLoaderEX.java
License:Open Source License
/** * Main method.// w w w.j a v a2 s . com * * @param args should contain the name of an input file. */ public static void main(String[] args) { if (args.length > 0) { try { TextDirectoryLoader loader = new TextDirectoryLoader(); loader.setOptions(args); System.out.println(loader.getDataSet()); } catch (Exception e) { e.printStackTrace(); } } else { System.err.println("\nUsage:\n" + "\tTextDirectoryLoader [options]\n" + "\n" + "Options:\n"); Enumeration enm = ((OptionHandler) new TextDirectoryLoader()).listOptions(); while (enm.hasMoreElements()) { Option option = (Option) enm.nextElement(); System.err.println(option.synopsis()); System.err.println(option.description()); } System.err.println(); } }
From source file:util.FeatureExtract.java
public static void createArff(String directory) { TextDirectoryLoader loader = new TextDirectoryLoader(); try {// w ww .j a v a2 s .c om // convert the directory into a dataset loader.setDirectory(new File(directory)); Instances dataRaw = loader.getDataSet(); // apply the StringToWordVector and tf-idf weighting StringToWordVector filter = new StringToWordVector(); filter.setIDFTransform(true); filter.setInputFormat(dataRaw); Instances dataFiltered = Filter.useFilter(dataRaw, filter); // output the arff file ArffSaver saver = new ArffSaver(); saver.setInstances(dataFiltered); saver.setFile(new File(SpamFilterConfig.getArffFilePath())); saver.writeBatch(); // train with simple cart SimpleCart classifier = new SimpleCart(); classifier.buildClassifier(dataFiltered); System.out.println("\n\nClassifier model:\n\n" + classifier.toString()); // using 10 cross validation Evaluation eval = new Evaluation(dataFiltered); eval.crossValidateModel(classifier, dataFiltered, 10, new Random(1)); System.out.println("\n\nCross fold:\n\n" + eval.toSummaryString()); } catch (Exception ex) { Logger.getLogger(FeatureExtract.class.getName()).log(Level.SEVERE, null, ex); } }