Example usage for weka.core.converters TextDirectoryLoader TextDirectoryLoader

List of usage examples for weka.core.converters TextDirectoryLoader TextDirectoryLoader

Introduction

In this page you can find the example usage for weka.core.converters TextDirectoryLoader TextDirectoryLoader.

Prototype

public TextDirectoryLoader() 

Source Link

Document

default constructor

Usage

From source file:adams.flow.transformer.WekaTextDirectoryReader.java

License:Open Source License

/**
 * Returns a string describing the object.
 *
 * @return          a description suitable for displaying in the gui
 *//* w ww . j  ava2  s . c  o  m*/
@Override
public String globalInfo() {
    return new TextDirectoryLoader().globalInfo() + "\n" + "Uses the WEKA "
            + TextDirectoryLoader.class.getName() + " converter.";
}

From source file:adams.flow.transformer.WekaTextDirectoryReader.java

License:Open Source License

/**
 * Executes the flow item.//w  w w  .  j  ava2s. com
 *
 * @return      null if everything is fine, otherwise error message
 */
@Override
protected String doExecute() {
    String result;
    TextDirectoryLoader loader;
    Instances data;
    File file;

    result = null;

    try {
        if (m_InputToken.getPayload() instanceof File)
            file = (File) m_InputToken.getPayload();
        else
            file = new PlaceholderFile((String) m_InputToken.getPayload());

        if (file.isDirectory()) {
            loader = new TextDirectoryLoader();
            loader.setDirectory(file);
            loader.setOutputFilename(m_StoreFilename);
            loader.setCharSet(m_CharSet);
            data = loader.getDataSet();
            m_OutputToken = new Token(data);
            updateProvenance(m_OutputToken);
        } else {
            result = "Input is not a directory: " + file;
        }
    } catch (Exception e) {
        result = handleException("Failed to load directory with text files: ", e);
    }

    return result;
}

From source file:io.TextToArffConverter.java

/**
 * Example class that converts HTML files stored in a directory structure into 
 * and ARFF file using the TextDirectoryLoader converter.
        //from w  w w .j a v  a  2s .  c  o  m
  /**
   * Expects the first parameter to point to the directory with the text files.
   * In that directory, each sub-directory represents a class and the text
   * files in these sub-directories will be labeled as such.
   *
     * @param Directory
   * @param args        the commandline arguments
     * @throws java.io.IOException
   * @throws Exception  if something goes wrong
   * */
public TextToArffConverter(String Directory) throws IOException {
    // convert the directory into a dataset
    TextDirectoryLoader loader = new TextDirectoryLoader();
    loader.setDirectory(new File(Directory));
    Instances dataRaw = loader.getDataSet();
    //System.out.println("\n\nImported data:\n\n" + dataRaw);
    PrintWriter writer = new PrintWriter(
            "/home/nikos/NetBeansProjects/NucleosomePatternClassifier/ARFF/Data.txt");
    writer.println(dataRaw);
}

From source file:nl.uva.expose.classification.WekaClassification.java

private void loadToArff(String dataDir, Instances dRaw) throws IOException {
    TextDirectoryLoader loader = new TextDirectoryLoader();
    loader.setDirectory(new File(dataDir));
    dRaw = loader.getDataSet();/*  ww w . ja va2s . co  m*/
    System.out.println("\n\nImported data:\n\n" + dRaw);
}

From source file:org.ml.classifier.TextDirectoryToArff.java

License:Open Source License

public Instances createDataset(String directoryPath) throws Exception {

    //    FastVector atts = new FastVector(2);
    //    atts.addElement(new Attribute("filename", (FastVector) null));
    //    atts.addElement(new Attribute("contents", (FastVector) null));
    //    Instances data = new Instances("text_files_in_" + directoryPath, atts, 0);
    ////from w  ww.  jav  a 2s.  c  om
    //    File dir = new File(directoryPath);
    //    String[] files = dir.list();
    //    for (int i = 0; i < files.length; i++) {
    //      if (files[i].endsWith(".txt")) {
    //   try {
    //     double[] newInst = new double[2];
    //     newInst[0] = (double)data.attribute(0).addStringValue(files[i]);
    //     File txt = new File(directoryPath + File.separator + files[i]);  
    //     InputStreamReader is;
    //     is = new InputStreamReader(new FileInputStream(txt));
    //     StringBuffer txtStr = new StringBuffer();
    //     int c;
    //     while ((c = is.read()) != -1) {
    //       txtStr.append((char)c);
    //     }
    //     newInst[1] = (double)data.attribute(1).addStringValue(txtStr.toString());
    //     data.add(new Instance(1.0, newInst));
    //   } catch (Exception e) {
    //     //System.err.println("failed to convert file: " + directoryPath + File.separator + files[i]);
    //   }
    //      }
    //    }

    // convert the directory into a dataset
    TextDirectoryLoader loader = new TextDirectoryLoader();
    loader.setDirectory(new File(directoryPath));
    loader.setOutputFilename(false);

    Instances data = loader.getDataSet();
    //System.out.println("\n\nImported data:\n\n" + dataRaw);

    return data;
}

From source file:preprocess.TextDirectoryLoaderEX.java

License:Open Source License

/**
 * Main method.//  w w w.j  a  v  a 2  s .  c o m
 *
 * @param args should contain the name of an input file.
 */
public static void main(String[] args) {
    if (args.length > 0) {
        try {
            TextDirectoryLoader loader = new TextDirectoryLoader();
            loader.setOptions(args);
            System.out.println(loader.getDataSet());
        } catch (Exception e) {
            e.printStackTrace();
        }
    } else {
        System.err.println("\nUsage:\n" + "\tTextDirectoryLoader [options]\n" + "\n" + "Options:\n");

        Enumeration enm = ((OptionHandler) new TextDirectoryLoader()).listOptions();
        while (enm.hasMoreElements()) {
            Option option = (Option) enm.nextElement();
            System.err.println(option.synopsis());
            System.err.println(option.description());
        }

        System.err.println();
    }
}

From source file:util.FeatureExtract.java

public static void createArff(String directory) {
    TextDirectoryLoader loader = new TextDirectoryLoader();
    try {/* w ww  .ja v  a 2 s.  co  m*/
        // convert the directory into a dataset
        loader.setDirectory(new File(directory));
        Instances dataRaw = loader.getDataSet();

        // apply the StringToWordVector and tf-idf weighting
        StringToWordVector filter = new StringToWordVector();
        filter.setIDFTransform(true);
        filter.setInputFormat(dataRaw);
        Instances dataFiltered = Filter.useFilter(dataRaw, filter);

        // output the arff file
        ArffSaver saver = new ArffSaver();
        saver.setInstances(dataFiltered);
        saver.setFile(new File(SpamFilterConfig.getArffFilePath()));
        saver.writeBatch();

        // train with simple cart
        SimpleCart classifier = new SimpleCart();
        classifier.buildClassifier(dataFiltered);
        System.out.println("\n\nClassifier model:\n\n" + classifier.toString());

        // using 10 cross validation
        Evaluation eval = new Evaluation(dataFiltered);
        eval.crossValidateModel(classifier, dataFiltered, 10, new Random(1));

        System.out.println("\n\nCross fold:\n\n" + eval.toSummaryString());
    } catch (Exception ex) {
        Logger.getLogger(FeatureExtract.class.getName()).log(Level.SEVERE, null, ex);
    }
}