Example usage for weka.core.converters TextDirectoryLoader setDirectory

List of usage examples for weka.core.converters TextDirectoryLoader setDirectory

Introduction

In this page you can find the example usage for weka.core.converters TextDirectoryLoader setDirectory.

Prototype

public void setDirectory(File dir) throws IOException 

Source Link

Document

sets the source directory

Usage

From source file:adams.flow.transformer.WekaTextDirectoryReader.java

License:Open Source License

/**
 * Executes the flow item./*from ww  w .j  av a  2s  . c  o  m*/
 *
 * @return      null if everything is fine, otherwise error message
 */
@Override
protected String doExecute() {
    String result;
    TextDirectoryLoader loader;
    Instances data;
    File file;

    result = null;

    try {
        if (m_InputToken.getPayload() instanceof File)
            file = (File) m_InputToken.getPayload();
        else
            file = new PlaceholderFile((String) m_InputToken.getPayload());

        if (file.isDirectory()) {
            loader = new TextDirectoryLoader();
            loader.setDirectory(file);
            loader.setOutputFilename(m_StoreFilename);
            loader.setCharSet(m_CharSet);
            data = loader.getDataSet();
            m_OutputToken = new Token(data);
            updateProvenance(m_OutputToken);
        } else {
            result = "Input is not a directory: " + file;
        }
    } catch (Exception e) {
        result = handleException("Failed to load directory with text files: ", e);
    }

    return result;
}

From source file:io.TextToArffConverter.java

/**
 * Example class that converts HTML files stored in a directory structure into 
 * and ARFF file using the TextDirectoryLoader converter.
        //from  ww  w. ja v  a2  s.  c  o  m
  /**
   * Expects the first parameter to point to the directory with the text files.
   * In that directory, each sub-directory represents a class and the text
   * files in these sub-directories will be labeled as such.
   *
     * @param Directory
   * @param args        the commandline arguments
     * @throws java.io.IOException
   * @throws Exception  if something goes wrong
   * */
public TextToArffConverter(String Directory) throws IOException {
    // convert the directory into a dataset
    TextDirectoryLoader loader = new TextDirectoryLoader();
    loader.setDirectory(new File(Directory));
    Instances dataRaw = loader.getDataSet();
    //System.out.println("\n\nImported data:\n\n" + dataRaw);
    PrintWriter writer = new PrintWriter(
            "/home/nikos/NetBeansProjects/NucleosomePatternClassifier/ARFF/Data.txt");
    writer.println(dataRaw);
}

From source file:nl.uva.expose.classification.WekaClassification.java

private void loadToArff(String dataDir, Instances dRaw) throws IOException {
    TextDirectoryLoader loader = new TextDirectoryLoader();
    loader.setDirectory(new File(dataDir));
    dRaw = loader.getDataSet();//from   w ww  .  j av  a 2s .  c  o  m
    System.out.println("\n\nImported data:\n\n" + dRaw);
}

From source file:org.ml.classifier.TextDirectoryToArff.java

License:Open Source License

public Instances createDataset(String directoryPath) throws Exception {

    //    FastVector atts = new FastVector(2);
    //    atts.addElement(new Attribute("filename", (FastVector) null));
    //    atts.addElement(new Attribute("contents", (FastVector) null));
    //    Instances data = new Instances("text_files_in_" + directoryPath, atts, 0);
    ////ww  w .j  a  v a 2 s.com
    //    File dir = new File(directoryPath);
    //    String[] files = dir.list();
    //    for (int i = 0; i < files.length; i++) {
    //      if (files[i].endsWith(".txt")) {
    //   try {
    //     double[] newInst = new double[2];
    //     newInst[0] = (double)data.attribute(0).addStringValue(files[i]);
    //     File txt = new File(directoryPath + File.separator + files[i]);  
    //     InputStreamReader is;
    //     is = new InputStreamReader(new FileInputStream(txt));
    //     StringBuffer txtStr = new StringBuffer();
    //     int c;
    //     while ((c = is.read()) != -1) {
    //       txtStr.append((char)c);
    //     }
    //     newInst[1] = (double)data.attribute(1).addStringValue(txtStr.toString());
    //     data.add(new Instance(1.0, newInst));
    //   } catch (Exception e) {
    //     //System.err.println("failed to convert file: " + directoryPath + File.separator + files[i]);
    //   }
    //      }
    //    }

    // convert the directory into a dataset
    TextDirectoryLoader loader = new TextDirectoryLoader();
    loader.setDirectory(new File(directoryPath));
    loader.setOutputFilename(false);

    Instances data = loader.getDataSet();
    //System.out.println("\n\nImported data:\n\n" + dataRaw);

    return data;
}

From source file:util.FeatureExtract.java

public static void createArff(String directory) {
    TextDirectoryLoader loader = new TextDirectoryLoader();
    try {//from www.  java2  s . co m
        // convert the directory into a dataset
        loader.setDirectory(new File(directory));
        Instances dataRaw = loader.getDataSet();

        // apply the StringToWordVector and tf-idf weighting
        StringToWordVector filter = new StringToWordVector();
        filter.setIDFTransform(true);
        filter.setInputFormat(dataRaw);
        Instances dataFiltered = Filter.useFilter(dataRaw, filter);

        // output the arff file
        ArffSaver saver = new ArffSaver();
        saver.setInstances(dataFiltered);
        saver.setFile(new File(SpamFilterConfig.getArffFilePath()));
        saver.writeBatch();

        // train with simple cart
        SimpleCart classifier = new SimpleCart();
        classifier.buildClassifier(dataFiltered);
        System.out.println("\n\nClassifier model:\n\n" + classifier.toString());

        // using 10 cross validation
        Evaluation eval = new Evaluation(dataFiltered);
        eval.crossValidateModel(classifier, dataFiltered, 10, new Random(1));

        System.out.println("\n\nCross fold:\n\n" + eval.toSummaryString());
    } catch (Exception ex) {
        Logger.getLogger(FeatureExtract.class.getName()).log(Level.SEVERE, null, ex);
    }
}