Example usage for weka.core.converters ArffLoader getStructure

List of usage examples for weka.core.converters ArffLoader getStructure

Introduction

In this page you can find the example usage for weka.core.converters ArffLoader getStructure.

Prototype

@Override
public Instances getStructure() throws IOException 

Source Link

Document

Determines and returns (if possible) the structure (internally the header) of the data set as an empty set of instances.

Usage

From source file:br.com.edu.partition.Tranning.java

public static Double Tranning_JRIP(String test, String tranning) throws IOException, Exception {
    Double result_ = null;/*w  ww  .  jav a 2s  .  c om*/
    ArffLoader loader;
    loader = new ArffLoader();
    loader.setFile(new File(tranning));
    loader.getStructure();

    Instances trainingset = loader.getDataSet();
    int classIndex = trainingset.numAttributes() - 1;
    trainingset.setClassIndex(classIndex);

    //J48 j48 = new J48();
    JRip jRip = new JRip();
    //String[] options2 = {"-F", "3", "-N", "2.0", "-O", "2", "-S", "1"};
    //jRip.setOptions(options2);
    //jRip.buildClassifier(trainingset);
    jRip.buildClassifier(trainingset);

    loader = new ArffLoader();
    loader.setFile(new File(test));
    loader.getStructure();

    Instances testset = loader.getDataSet();
    testset.setClassIndex(testset.numAttributes() - 1);
    for (Instance instance : testset) {
        //double[] result = jRip.distributionForInstance(instance);
        double[] result = jRip.distributionForInstance(instance);
        result_ = result[1];
        //System.out.println(test + " " + result[1] + " " + tranning);
    }
    return result_;

}

From source file:CEP.GenerateStream.java

public void MakeStream() {
    File file = new File("C:\\Users\\Weary\\Documents\\w4ndata\\w4ndata.arff");
    String pc = System.getProperty("user.dir").toString();
    if (pc.contains("gs023850")) {
        file = new File("C:\\Users\\gs023850\\Documents\\w4ndata\\w4ndata.arff");
    }//  www . j av a 2  s. c o  m
    try {
        ArffLoader loader = new ArffLoader();
        loader.setFile(file);
        Instances structure = loader.getStructure();
        int j = structure.numAttributes();

        HeaderManager.SetStructure(new Instances(structure));
        Instance current;
        long previousTimeStamp = 0;
        String timeStamp = "0";
        long wait = 0;

        while ((current = loader.getNextInstance(structure)) != null) {
            timeStamp = current.stringValue(0);
            cepRT.sendEvent(current);
            System.out.println("Sending event");
            previousTimeStamp = WaitTime(timeStamp, previousTimeStamp, wait);
        }
    } catch (Exception e) {
        if (e.equals(new FileNotFoundException())) {
            System.out.println("File not found - could not generate stream");
            return;
        } else if (e.equals(new IOException())) {
            System.out.println("Unable to read file");
        } else if (e.equals(new NumberFormatException())) {
            System.out.println("Unable to convert to time to number - bad time");
        } else {
            System.out.println(e.toString());
        }
    }
}

From source file:com.daniel.convert.IncrementalClassifier.java

License:Open Source License

/**
 * Expects an ARFF file as first argument (class attribute is assumed to be
 * the last attribute).// ww  w. j  a v  a  2  s. co  m
 * 
 * @param args
 *            the commandline arguments
 * @throws Exception
 *             if something goes wrong
 */
public static BayesNet treinar(String[] args) throws Exception {
    // load data
    ArffLoader loader = new ArffLoader();
    loader.setFile(new File(args[0]));
    Instances structure = loader.getStructure();
    structure.setClassIndex(structure.numAttributes() - 1);

    // train NaiveBayes
    BayesNet BayesNet = new BayesNet();

    Instance current;
    while ((current = loader.getNextInstance(structure)) != null) {
        structure.add(current);
    }
    BayesNet.buildClassifier(structure);

    // output generated model
    // System.out.println(nb);

    // test set
    BayesNet BayesNetTest = new BayesNet();

    // test the model
    Evaluation eTest = new Evaluation(structure);
    // eTest.evaluateModel(nb, structure);
    eTest.crossValidateModel(BayesNetTest, structure, 15, new Random(1));

    // Print the result  la Weka explorer:
    String strSummary = eTest.toSummaryString();
    System.out.println(strSummary);

    return BayesNet;
}

From source file:cs.man.ac.uk.predict.Predictor.java

License:Open Source License

public static void makePredictionsEnsembleNew(String trainPath, String testPath, String resultPath) {
    System.out.println("Training set: " + trainPath);
    System.out.println("Test set: " + testPath);

    /**//from   ww  w . j  ava  2 s.com
     * The ensemble classifiers. This is a heterogeneous ensemble.
     */
    J48 learner1 = new J48();
    SMO learner2 = new SMO();
    NaiveBayes learner3 = new NaiveBayes();
    MultilayerPerceptron learner5 = new MultilayerPerceptron();

    System.out.println("Training Ensemble.");
    long startTime = System.nanoTime();
    try {
        BufferedReader reader = new BufferedReader(new FileReader(trainPath));
        Instances data = new Instances(reader);
        data.setClassIndex(data.numAttributes() - 1);
        System.out.println("Training data length: " + data.numInstances());

        learner1.buildClassifier(data);
        learner2.buildClassifier(data);
        learner3.buildClassifier(data);
        learner5.buildClassifier(data);

        long endTime = System.nanoTime();
        long nanoseconds = endTime - startTime;
        double seconds = (double) nanoseconds / 1000000000.0;
        System.out.println("Training Ensemble completed in " + nanoseconds + " (ns) or " + seconds + " (s).");
    } catch (IOException e) {
        System.out.println("Could not train Ensemble classifier IOException on training data file.");
    } catch (Exception e) {
        System.out.println("Could not train Ensemble classifier Exception building model.");
    }

    try {
        String line = "";

        // Read the file and display it line by line. 
        BufferedReader in = null;

        // Read in and store each positive prediction in the tree map.
        try {
            //open stream to file
            in = new BufferedReader(new FileReader(testPath));

            while ((line = in.readLine()) != null) {
                if (line.toLowerCase().contains("@data"))
                    break;
            }
        } catch (Exception e) {
        }

        // A different ARFF loader used here (compared to above) as
        // the ARFF file may be extremely large. In which case the whole
        // file cannot be read in. Instead it is read in incrementally.
        ArffLoader loader = new ArffLoader();
        loader.setFile(new File(testPath));

        Instances data = loader.getStructure();
        data.setClassIndex(data.numAttributes() - 1);

        System.out.println("Ensemble Classifier is ready.");
        System.out.println("Testing on all instances avaialable.");

        startTime = System.nanoTime();

        int instanceNumber = 0;

        // label instances
        Instance current;

        while ((current = loader.getNextInstance(data)) != null) {
            instanceNumber += 1;
            line = in.readLine();

            double classification1 = learner1.classifyInstance(current);
            double classification2 = learner2.classifyInstance(current);
            double classification3 = learner3.classifyInstance(current);
            double classification5 = learner5.classifyInstance(current);

            // All classifiers must agree. This is a very primitive ensemble strategy!
            if (classification1 == 1 && classification2 == 1 && classification3 == 1 && classification5 == 1) {
                if (line != null) {
                    //System.out.println("Instance: "+instanceNumber+"\t"+line);
                    //System.in.read();
                }
                Writer.append(resultPath, instanceNumber + "\n");
            }
        }

        in.close();

        System.out.println("Test set instances: " + instanceNumber);

        long endTime = System.nanoTime();
        long duration = endTime - startTime;
        double seconds = (double) duration / 1000000000.0;

        System.out.println("Testing Ensemble completed in " + duration + " (ns) or " + seconds + " (s).");
    } catch (Exception e) {
        System.out.println("Could not test Ensemble classifier due to an error.");
    }
}

From source file:cs.man.ac.uk.predict.Predictor.java

License:Open Source License

public static void makePredictionsEnsembleStream(String trainPath, String testPath, String resultPath) {
    System.out.println("Training set: " + trainPath);
    System.out.println("Test set: " + testPath);

    /**//from  w w  w .  j av a  2s. co  m
     * The ensemble classifiers. This is a heterogeneous ensemble.
     */
    J48 learner1 = new J48();
    SMO learner2 = new SMO();
    NaiveBayes learner3 = new NaiveBayes();
    MultilayerPerceptron learner5 = new MultilayerPerceptron();

    System.out.println("Training Ensemble.");
    long startTime = System.nanoTime();
    try {
        BufferedReader reader = new BufferedReader(new FileReader(trainPath));
        Instances data = new Instances(reader);
        data.setClassIndex(data.numAttributes() - 1);
        System.out.println("Training data length: " + data.numInstances());

        learner1.buildClassifier(data);
        learner2.buildClassifier(data);
        learner3.buildClassifier(data);
        learner5.buildClassifier(data);

        long endTime = System.nanoTime();
        long nanoseconds = endTime - startTime;
        double seconds = (double) nanoseconds / 1000000000.0;
        System.out.println("Training Ensemble completed in " + nanoseconds + " (ns) or " + seconds + " (s).");
    } catch (IOException e) {
        System.out.println("Could not train Ensemble classifier IOException on training data file.");
    } catch (Exception e) {
        System.out.println("Could not train Ensemble classifier Exception building model.");
    }

    try {
        // A different ARFF loader used here (compared to above) as
        // the ARFF file may be extremely large. In which case the whole
        // file cannot be read in. Instead it is read in incrementally.
        ArffLoader loader = new ArffLoader();
        loader.setFile(new File(testPath));

        Instances data = loader.getStructure();
        data.setClassIndex(data.numAttributes() - 1);

        System.out.println("Ensemble Classifier is ready.");
        System.out.println("Testing on all instances avaialable.");

        startTime = System.nanoTime();

        int instanceNumber = 0;

        // label instances
        Instance current;

        while ((current = loader.getNextInstance(data)) != null) {
            instanceNumber += 1;

            double classification1 = learner1.classifyInstance(current);
            double classification2 = learner2.classifyInstance(current);
            double classification3 = learner3.classifyInstance(current);
            double classification5 = learner5.classifyInstance(current);

            // All classifiers must agree. This is a very primitive ensemble strategy!
            if (classification1 == 1 && classification2 == 1 && classification3 == 1 && classification5 == 1) {
                Writer.append(resultPath, instanceNumber + "\n");
            }
        }

        System.out.println("Test set instances: " + instanceNumber);

        long endTime = System.nanoTime();
        long duration = endTime - startTime;
        double seconds = (double) duration / 1000000000.0;

        System.out.println("Testing Ensemble completed in " + duration + " (ns) or " + seconds + " (s).");
    } catch (Exception e) {
        System.out.println("Could not test Ensemble classifier due to an error.");
    }
}

From source file:cs.man.ac.uk.predict.Predictor.java

License:Open Source License

public static void makePredictionsJ48(String trainPath, String testPath, String resultPath) {
    /**//from www  .ja va 2s .c  o  m
     * The decision tree classifier.
     */
    J48 learner = new J48();

    System.out.println("Training set: " + trainPath);
    System.out.println("Test set: " + testPath);

    System.out.println("Training J48");
    long startTime = System.nanoTime();
    try {
        BufferedReader reader = new BufferedReader(new FileReader(trainPath));
        Instances data = new Instances(reader);
        data.setClassIndex(data.numAttributes() - 1);
        System.out.println("Training data length: " + data.numInstances());
        learner.buildClassifier(data);

        long endTime = System.nanoTime();
        long nanoseconds = endTime - startTime;
        double seconds = (double) nanoseconds / 1000000000.0;
        System.out.println("Training J48 completed in " + nanoseconds + " (ns) or " + seconds + " (s)");
    } catch (IOException e) {
        System.out.println("Could not train J48 classifier IOException on training data file");
    } catch (Exception e) {
        System.out.println("Could not train J48 classifier Exception building model");
    }

    try {
        // Prepare data for testing
        //BufferedReader reader = new BufferedReader( new FileReader(testPath));
        //Instances data = new Instances(reader);
        //data.setClassIndex(data.numAttributes() - 1);

        ArffLoader loader = new ArffLoader();
        loader.setFile(new File(testPath));
        Instances data = loader.getStructure();
        data.setClassIndex(data.numAttributes() - 1);

        System.out.println("J48 Classifier is ready.");
        System.out.println("Testing on all instances avaialable.");
        System.out.println("Test set instances: " + data.numInstances());

        startTime = System.nanoTime();

        int instanceNumber = 0;

        // label instances
        Instance current;

        //for (int i = 0; i < data.numInstances(); i++) 
        while ((current = loader.getNextInstance(data)) != null) {
            instanceNumber += 1;

            //double classification = learner.classifyInstance(data.instance(i));
            double classification = learner.classifyInstance(current);
            //String instanceClass= Double.toString(data.instance(i).classValue());

            if (classification == 1)// Predicted positive, actually negative
            {
                Writer.append(resultPath, instanceNumber + "\n");
            }
        }

        long endTime = System.nanoTime();
        long duration = endTime - startTime;
        double seconds = (double) duration / 1000000000.0;

        System.out.println("Testing J48 completed in " + duration + " (ns) or " + seconds + " (s)");
    } catch (Exception e) {
        System.out.println("Could not test J48 classifier due to an error");
    }
}

From source file:de.citec.sc.matoll.classifiers.WEKAclassifier.java

public HashMap<Integer, Double> predict(Provenance provenance, Set<String> pattern_lookup,
        Set<String> pos_lookup) throws IOException, Exception {

    /*// ww w .  ja v a  2 s . c  o  m
    we want predict that the entry is true
    */
    provenance.setAnnotation(1);
    List<Provenance> tmp_prov = new ArrayList<Provenance>();
    tmp_prov.add(provenance);
    writeVectors(tmp_prov, "tmp.arff", pattern_lookup, pos_lookup);

    ArffLoader loader = new ArffLoader();
    loader.setFile(new File("tmp.arff"));
    Instances structure = loader.getStructure();
    structure.setClassIndex(structure.numAttributes() - 1);
    HashMap<Integer, Double> hm = new HashMap<Integer, Double>();

    Instance current;
    while ((current = loader.getNextInstance(structure)) != null) {
        /*
        * value_to_predict
        * can be only 0 or 1, as only two classes are given 
        */

        double value = cls.classifyInstance(current);
        double[] percentage = cls.distributionForInstance(current);

        List<String> result = new ArrayList<String>();
        int prediction = (int) value;
        double distribution = percentage[(int) value];
        hm.put(prediction, distribution);
    }
    return hm;
}

From source file:fr.loria.synalp.jtrans.phonetiseur.Classifieurs.java

License:Open Source License

private Instances loadInstances(String partialFilename) throws IOException {
    final String filename = repertoireFichiersARFF + partialFilename + ".arff";

    // This may be overkill, but since Weka doesn't specify what charset
    // it will use, make sure we read the file as UTF-8.
    ArffLoader loader = new ArffLoader() {
        {// ww w. j  a  v  a 2 s  .  co m
            m_sourceReader = new InputStreamReader(new FileInputStream(filename), "UTF-8");
        }
    };
    Instances i = loader.getStructure();
    i.setClassIndex(i.numAttributes() - 1);
    return i;
}

From source file:gov.va.chir.tagline.dao.FileDao.java

License:Open Source License

private static Instances loadHeader(final File file) throws IOException {
    ArffLoader loader = new ArffLoader();
    loader.setFile(file);/*www  .  ja va2  s .  c  o  m*/
    return loader.getStructure();
}

From source file:gr.demokritos.iit.cpgislanddetection.analysis.VectorSequenceDetector.java

License:Apache License

public VectorSequenceDetector(List<BaseSequence> sequences, List<String> labels)
        throws FileNotFoundException, IOException, Exception {

    //gia ola ta seq
    //gia kathe seq pare to vector me vash ton analyzer 
    //vale kai to label
    //kai update classify

    // load data/*w  w  w.j  a va  2s.c o  m*/
    ArffLoader loader = new ArffLoader();
    loader.setFile(new File("/Desktop/filesForWeka/2o_peirama/dataForWeka.arff"));
    Instances structure = loader.getStructure();
    // setting class attribute
    structure.setClassIndex(structure.numAttributes() - 1);

    // train NaiveBayes
    NaiveBayesUpdateable nb = new NaiveBayesUpdateable();
    nb.buildClassifier(structure);
    Instance current;
    while ((current = loader.getNextInstance(structure)) != null)
        nb.updateClassifier(current);
}