Source code for qikify.models.dataset

"""
.. warning:: Deprecated in version 0.2.

"""
import numpy as np
import gzip, StringIO, sys, os, csv, pandas

from qikify.helpers import is1D
from qikify.term_helpers import colors
from .dotdict import dotdict

[docs]class Dataset(dotdict): """This class is the fundamental data structure of the Qikify framework. """ def __init__(self, filename=None, files=None, dataset=None): """Dataset can be constructed from: filename: individual file to load files: list of files to concatenate and load dataset: an existing dataset """ if filename is not None: self.raw = self._loadfile(filename) elif files is not None: data = self._loadfile(files[0]) for filename in files[1:]: newdata = self._loadfile(filename) data = pandas.concat([data, newdata], axis=0, ignore_index=True) self.raw = data elif dataset is not None: self.raw = dataset else: self.raw = None def _loadfile(self, filename): filetype = filename.split('.')[-1] if filetype == 'csv': data = pandas.read_csv(filename) elif filetype == 'gz': with gzip.open(filename, 'r') as f: data = pandas.read_csv(StringIO.StringIO(f.read())) else: raise Exception("Wrong file type, expected .csv or .csv.gz.") return data def __repr__(self): """Print a summary of the dataset.""" output = colors().GREEN + \ '===============================================\n' + \ 'Dataset #Rows #Cols \n' + \ '===============================================\n' + colors().ENDC for key in self.keys(): if is1D(self[key]): output += '%-30s %5d %5d\n' % (key, self[key].shape[0], 1) else: output += '%-30s %5d %5d\n' % (key, self[key].shape[0], self[key].shape[1]) return output
if __name__ == "__main__": d = Dataset()