Java tutorial
/** * Copyright 2012, Wisdom Omuya. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.deafgoat.ml.prognosticator; // Java import java.io.BufferedReader; import java.io.File; import java.io.FileReader; import java.io.IOException; // Weka import weka.core.Instances; import weka.core.converters.CSVLoader; import weka.core.converters.ConverterUtils.DataSource; /** * Utility to read from several data sources. This class provides an interface * to read from ARFF, CSV, and MongoDB. For MongoDB, it works with simple * collections with no nested fields. */ public class InstancesReader { /** * @return all instances read from the data source */ public Instances getInstances() { return _instances; } /** * Read from a .ARFF file * * @throws IOException * If the ARFF file can not be found * @return the set of instances read */ public Instances readFromARFF() throws IOException { BufferedReader reader = new BufferedReader(new FileReader(_filename)); _instances = new Instances(reader); reader.close(); return _instances; } /** * Read from a .CSV file * * @throws IOException * @return the set of instances read */ public Instances readFromCSV() throws IOException { CSVLoader loader = new CSVLoader(); loader.setSource(new File(_filename)); _instances = loader.getDataSet(); return _instances; } /** * Read directly from a MongoDB collection * * @param csv * The CSV file to read into * @throws Exception * If collection can not be read * @return the set of instances read */ public Instances readFromMongoDB(String csv) throws Exception { _m2v = new Mongo2CSV(_host, _port, _db, _coll); _m2v.setFields(_fields); _m2v.writeCSV(csv); _m2v.close(); _source = new DataSource(csv); _instances = _source.getDataSet(); return _instances; } /** * working collection */ private String _coll; /** * working database */ private String _db; /** * working host */ private String _host; /** * working instances to read to */ private Instances _instances; /** * working port */ private Integer _port; /** * sets of fields to read */ private String[] _fields; /** * the file to write to */ private String _filename; /** * Mongo2CSV handle */ private Mongo2CSV _m2v; /** * handler for data source */ private DataSource _source; /** * @param filename * The source file to read from */ public InstancesReader(String filename) { _filename = filename; } /** * @param host * The hostname of the database * @param port * The port number of the database * @param db * The name of the database * @param coll * The name of the collection * @param fields * The set of fields to read from the collection */ public InstancesReader(String host, Integer port, String db, String coll, String[] fields) { _host = host; _port = port; _db = db; _coll = coll; _fields = fields; } }