Example usage for weka.core.converters TextDirectoryLoader getDataSet

List of usage examples for weka.core.converters TextDirectoryLoader getDataSet

Introduction

In this page you can find the example usage for weka.core.converters TextDirectoryLoader getDataSet.

Prototype

@Override
public Instances getDataSet() throws IOException 

Source Link

Document

Return the full data set.

Usage

From source file:adams.flow.transformer.WekaTextDirectoryReader.java

License:Open Source License

/**
 * Executes the flow item.//from  ww w  .  j  a va2s  . c o  m
 *
 * @return      null if everything is fine, otherwise error message
 */
@Override
protected String doExecute() {
    String result;
    TextDirectoryLoader loader;
    Instances data;
    File file;

    result = null;

    try {
        if (m_InputToken.getPayload() instanceof File)
            file = (File) m_InputToken.getPayload();
        else
            file = new PlaceholderFile((String) m_InputToken.getPayload());

        if (file.isDirectory()) {
            loader = new TextDirectoryLoader();
            loader.setDirectory(file);
            loader.setOutputFilename(m_StoreFilename);
            loader.setCharSet(m_CharSet);
            data = loader.getDataSet();
            m_OutputToken = new Token(data);
            updateProvenance(m_OutputToken);
        } else {
            result = "Input is not a directory: " + file;
        }
    } catch (Exception e) {
        result = handleException("Failed to load directory with text files: ", e);
    }

    return result;
}

From source file:io.TextToArffConverter.java

/**
 * Example class that converts HTML files stored in a directory structure into 
 * and ARFF file using the TextDirectoryLoader converter.
        /*  w  w w  .  j  a  v  a  2  s.c o m*/
  /**
   * Expects the first parameter to point to the directory with the text files.
   * In that directory, each sub-directory represents a class and the text
   * files in these sub-directories will be labeled as such.
   *
     * @param Directory
   * @param args        the commandline arguments
     * @throws java.io.IOException
   * @throws Exception  if something goes wrong
   * */
public TextToArffConverter(String Directory) throws IOException {
    // convert the directory into a dataset
    TextDirectoryLoader loader = new TextDirectoryLoader();
    loader.setDirectory(new File(Directory));
    Instances dataRaw = loader.getDataSet();
    //System.out.println("\n\nImported data:\n\n" + dataRaw);
    PrintWriter writer = new PrintWriter(
            "/home/nikos/NetBeansProjects/NucleosomePatternClassifier/ARFF/Data.txt");
    writer.println(dataRaw);
}

From source file:nl.uva.expose.classification.WekaClassification.java

private void loadToArff(String dataDir, Instances dRaw) throws IOException {
    TextDirectoryLoader loader = new TextDirectoryLoader();
    loader.setDirectory(new File(dataDir));
    dRaw = loader.getDataSet();
    System.out.println("\n\nImported data:\n\n" + dRaw);
}

From source file:org.ml.classifier.TextDirectoryToArff.java

License:Open Source License

public Instances createDataset(String directoryPath) throws Exception {

    //    FastVector atts = new FastVector(2);
    //    atts.addElement(new Attribute("filename", (FastVector) null));
    //    atts.addElement(new Attribute("contents", (FastVector) null));
    //    Instances data = new Instances("text_files_in_" + directoryPath, atts, 0);
    ///* w  w  w. j  a  v  a2  s  .  com*/
    //    File dir = new File(directoryPath);
    //    String[] files = dir.list();
    //    for (int i = 0; i < files.length; i++) {
    //      if (files[i].endsWith(".txt")) {
    //   try {
    //     double[] newInst = new double[2];
    //     newInst[0] = (double)data.attribute(0).addStringValue(files[i]);
    //     File txt = new File(directoryPath + File.separator + files[i]);  
    //     InputStreamReader is;
    //     is = new InputStreamReader(new FileInputStream(txt));
    //     StringBuffer txtStr = new StringBuffer();
    //     int c;
    //     while ((c = is.read()) != -1) {
    //       txtStr.append((char)c);
    //     }
    //     newInst[1] = (double)data.attribute(1).addStringValue(txtStr.toString());
    //     data.add(new Instance(1.0, newInst));
    //   } catch (Exception e) {
    //     //System.err.println("failed to convert file: " + directoryPath + File.separator + files[i]);
    //   }
    //      }
    //    }

    // convert the directory into a dataset
    TextDirectoryLoader loader = new TextDirectoryLoader();
    loader.setDirectory(new File(directoryPath));
    loader.setOutputFilename(false);

    Instances data = loader.getDataSet();
    //System.out.println("\n\nImported data:\n\n" + dataRaw);

    return data;
}

From source file:preprocess.TextDirectoryLoaderEX.java

License:Open Source License

/**
 * Main method.//  w w  w.j a  v a2  s  . com
 *
 * @param args should contain the name of an input file.
 */
public static void main(String[] args) {
    if (args.length > 0) {
        try {
            TextDirectoryLoader loader = new TextDirectoryLoader();
            loader.setOptions(args);
            System.out.println(loader.getDataSet());
        } catch (Exception e) {
            e.printStackTrace();
        }
    } else {
        System.err.println("\nUsage:\n" + "\tTextDirectoryLoader [options]\n" + "\n" + "Options:\n");

        Enumeration enm = ((OptionHandler) new TextDirectoryLoader()).listOptions();
        while (enm.hasMoreElements()) {
            Option option = (Option) enm.nextElement();
            System.err.println(option.synopsis());
            System.err.println(option.description());
        }

        System.err.println();
    }
}

From source file:util.FeatureExtract.java

public static void createArff(String directory) {
    TextDirectoryLoader loader = new TextDirectoryLoader();
    try {// w  ww  .j a  v  a2  s .c  om
        // convert the directory into a dataset
        loader.setDirectory(new File(directory));
        Instances dataRaw = loader.getDataSet();

        // apply the StringToWordVector and tf-idf weighting
        StringToWordVector filter = new StringToWordVector();
        filter.setIDFTransform(true);
        filter.setInputFormat(dataRaw);
        Instances dataFiltered = Filter.useFilter(dataRaw, filter);

        // output the arff file
        ArffSaver saver = new ArffSaver();
        saver.setInstances(dataFiltered);
        saver.setFile(new File(SpamFilterConfig.getArffFilePath()));
        saver.writeBatch();

        // train with simple cart
        SimpleCart classifier = new SimpleCart();
        classifier.buildClassifier(dataFiltered);
        System.out.println("\n\nClassifier model:\n\n" + classifier.toString());

        // using 10 cross validation
        Evaluation eval = new Evaluation(dataFiltered);
        eval.crossValidateModel(classifier, dataFiltered, 10, new Random(1));

        System.out.println("\n\nCross fold:\n\n" + eval.toSummaryString());
    } catch (Exception ex) {
        Logger.getLogger(FeatureExtract.class.getName()).log(Level.SEVERE, null, ex);
    }
}