CSV file reader : CSV File « Development Class « Java






CSV file reader

  
/*
 *  Copyright (C) 2010 takaji
 *
 *  This program is free software: you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation, either version 3 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */
//package dakside.csv;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.util.ArrayList;
import java.util.logging.Level;
import java.util.logging.Logger;

/**
 * CSV file reader
 * 
 * @author Takaji
 */
public final class CSVFileReader {

    private CSVFormat format;
    private final BufferedReader reader;
    private int col = 1;
    private int row = 1;
    private StringBuilder element = new StringBuilder(); // store current
    // element
    private CSVLine currentLine = null; // current CSV line
    private boolean isOpened = false; // a string is opened with text delimiter

    // <editor-fold defaultstate="collapsed" desc="Constructors">
    public CSVFileReader(Reader reader) {
        //validate parameter
        checkNotNull(reader, "Reader cannot be null");
        this.reader = new BufferedReader(reader);
        this.format = new CSVFormat();
    }

    public CSVFileReader(Reader reader, CSVFormat format) {
        //validate parameter
        checkNotNull(reader, "Reader cannot be null");
        this.reader = new BufferedReader(reader);
        this.format = format;
    }

    public CSVFileReader(BufferedReader reader) throws FileNotFoundException {
        //validate parameter
        checkNotNull(reader, "Reader cannot be null");
        this.reader = reader;
        this.format = new CSVFormat();
    }

    public CSVFileReader(BufferedReader reader, CSVFormat format) {
        //validate parameter
        checkNotNull(reader, "Reader cannot be null");
        this.reader = reader;
        this.format = format;
    }

    public CSVFileReader(String filename) throws CSVException {
        //validate parameter
        checkNotNull(filename, "File name cannot be empty");
        try {
            this.reader = new BufferedReader(new FileReader(filename));
        } catch (FileNotFoundException ex) {
            Logger.getLogger(CSVFileReader.class.getName()).log(Level.SEVERE, null, ex);
            throw new CSVException("Cannot create reading buffer", ex);
        }
        this.format = new CSVFormat();
    }

    public CSVFileReader(File f) {
        //validate parameter
        checkNotNull(f, "File object cannot be null");
        try {
            this.reader = new BufferedReader(new FileReader(f));
        } catch (FileNotFoundException ex) {
            Logger.getLogger(CSVFileReader.class.getName()).log(Level.SEVERE, null, ex);
            throw new CSVException("Cannot create reading buffer", ex);
        }
        this.format = new CSVFormat();
    }

    public CSVFileReader(InputStream input) {
        //validate parameter
        checkNotNull(input, "InputStream cannot be null");
        reader = new BufferedReader(new InputStreamReader(input));
        this.format = new CSVFormat();
    }

    public CSVFileReader(String filename, CSVFormat format) {
        //validate parameter
        checkNotNull(filename, "File name cannot be empty");
        try {
            this.reader = new BufferedReader(new FileReader(filename));
        } catch (FileNotFoundException ex) {
            Logger.getLogger(CSVFileReader.class.getName()).log(Level.SEVERE, null, ex);
            throw new CSVException("Cannot create reading buffer", ex);
        }
        this.format = format;
    }

    public CSVFileReader(File f, CSVFormat format) {
        //validate parameter
        checkNotNull(f, "File object cannot be null");
        try {
            this.reader = new BufferedReader(new FileReader(f));
        } catch (FileNotFoundException ex) {
            Logger.getLogger(CSVFileReader.class.getName()).log(Level.SEVERE, null, ex);
            throw new CSVException("Cannot create reading buffer", ex);
        }
        this.format = format;
    }

    public CSVFileReader(InputStream input, CSVFormat format) {
        //validate parameter
        checkNotNull(input, "InputStream cannot be null");
        reader = new BufferedReader(new InputStreamReader(input));
        this.format = format;
    }

    // </editor-fold>
    /**
     * read next char
     */
    private int read() throws IOException {
        int next = -1;
        next = reader.read();
        if (next != -1) {
            col++;
        }
        return next;
    }

    /**
     * Do carriage return
     */
    private void carriageReturn() {
        synchronized (this) {
            row++;
            col = 1;
        }
    }

    /**
     * Finished read a cell, flush to current line
     */
    private void flushElement() {
        currentLine.add(element.toString());
        element.setLength(0); // reset current element
        isOpened = false;
    }

    /**
     * Parse a CSV string to data
     * 
     * @param rawData
     * @return a simple CSV object with data. null if the raw data cannot be
     *         parse
     * @throw CSVException
     */
    public CSVLine readLine() throws CSVException {
        if (reader == null) {
            throw new CSVException("No reader found.");
        }
        synchronized (this) {
            element = new StringBuilder(); // store current
            // element
            currentLine = null; // current CSV line
            isOpened = false; // a string is opened with text delimiter

            try {
                int currentInt = -1;
                // parse data
                while ((currentInt = read()) != -1) {
                    if (currentLine == null) {
                        currentLine = new CSVLine();
                    }

                    // reach field delimiter
                    if (currentInt == format.getTextDelimiter()) {
                        if (isOpened) {
                            // check next char is escape?
                            if ((currentInt = read()) != -1) {
                                if (currentInt == format.getTextDelimiter()) {
                                    // is special char
                                    element.append(format.getTextDelimiter());
                                    continue;
                                } else {
                                    // is close string then flush
                                    flushElement();
                                    // next char must be field delimiter
                                    //or a line terminator
                                    if (currentInt == format.getFieldDelimiter()) {
                                        continue;
                                    } else if (currentInt == format.getLineTerminator()) {
                                        carriageReturn();
                                        return currentLine;
                                    } else {
                                        raiseExpected(format.getFieldDelimiter(),
                                                (char) currentInt);
                                    }
                                    continue;
                                }
                            } else {
                                flushElement();
                                continue;
                            }
                        } else {
                            // is first open of the string
                            isOpened = true;
                            continue;
                        }
                    }

                    if (!isOpened) {
                        // ignore character
                        if (format.isIgnored((char) currentInt)) {
                            continue; // just ignore
                        } else if (currentInt == format.getLineTerminator()) {
                            flushElement();

                            if (currentLine != null) {
                                carriageReturn();
                            }
                            return currentLine;
                        }
                    }

                    // meet field delimiter
                    if (currentInt == format.getFieldDelimiter()) {
                        if (isOpened) {
                            element.append((char) currentInt);
                            continue;
                        } else {
                            flushElement();
                            continue;
                        }
                    }

                    // otherwise, it's a normal character, just add
                    element.append((char) currentInt);
                }// continue while

                // flush last element if needed
                if (element != null && currentLine != null) {
                    flushElement();
                }
            } catch (Exception ex) {
                ex.printStackTrace();
                raiseException(ex);
            }
            // if currentLine exists, return carriage
            if (currentLine != null) {
                carriageReturn();
            }
        }// end of synchronize
        return currentLine;
    }

    // <editor-fold defaultstate="collapsed" desc="Exception related tasks">
    /**
     * Expected char [expected] but found another char [actual] so we
     * throw an exception here.
     * @param expected
     * @param actual
     */
    private void raiseExpected(char expected, char actual) {
        raiseException(new Exception(String.format(
                "Invalid data. Expected [%s] but [%s] was found.", String.valueOf(expected), String.valueOf(actual))));
    }

    private void raiseException(Exception ex) {
        throw new CSVException("Error happened while processing at row: " + row
                + " - col: " + (col - 1) + ")", ex);
    }

    /**
     * Check if argument null then throw a new CSV Exception
     * @param argument
     */
    public static void checkNotNull(Object argument, String message) throws CSVException {
        if (argument == null) {
            throw new CSVException(message);
        }
    }

    /**
     * Close current reader and underline stream
     */
    public void close() {
        try {
            synchronized (reader) {
                reader.close();
            }
        } catch (Exception ex) {
            Logger.getLogger(CSVFileReader.class.getName()).log(Level.SEVERE,
                    null, ex);
        }
    }
    // </editor-fold>

    // <editor-fold defaultstate="collapsed" desc="Quick CSV reading">
    /**
     * Parse a CSV stream to data
     * 
     * @param stream
     *            InputStream
     * @return a simple CSV object with data. null if the raw data cannot be
     *         parse
     */
    public static CSVFile parse(InputStream stream) throws CSVException {
        try {
            CSVFileReader reader = new CSVFileReader(stream);
            CSVFile file = new CSVFile();
            CSVLine line = null;
            do {
                line = reader.readLine();
                file.append(line);
            } while (line != null);
            return file;
        } catch (Exception ex) {
            throw new CSVException(ex);
        }
    }

    /**
     * Parse file content to CSV
     * 
     * @param path
     * @return
     */
    public static CSVFile parseFile(String path) {
        if (path == null || !(new File(path)).exists()) {
            throw new CSVException("Invalid path");
        }
        try {
            return parse(new FileInputStream(path));
        } catch (Exception ex) {
            Logger.getLogger(CSVFile.class.getName()).log(Level.SEVERE, null,
                    ex);
            throw new CSVException(ex);
        }
    }
    // </editor-fold>
}
class CSVFile {

    private ArrayList<CSVLine> lines; //lines inside a CSV file

    /**
     * Default constructor
     */
    public CSVFile() {
        this.lines = new ArrayList<CSVLine>();
    }

    public CSVLine[] getLines() {
        return this.lines.toArray(new CSVLine[0]);
    }

    /**
     * Get line at a specified index
     * @param idx
     * @return
     * @throws IndexOutOfBoundsException
     */
    public CSVLine getLine(int idx) throws IndexOutOfBoundsException{
        return this.lines.get(idx);
    }

    /**
     * Add a new line to current csv
     * @return new line object
     */
    public CSVLine newLine() {
        CSVLine line = new CSVLine();
        this.lines.add(line);
        return line;
    }

    /**
     * get number of line
     * @return number of line
     */
    public int size() {
        return this.lines.size();
    }

    /**
     * discard a line at the specified index
     * @param idx
     */
    public void discard(int idx) {
        if (idx >= 0 && idx < size()) {
            this.lines.remove(idx);
        }
    }

    /**
     * discard empty record
     */
    public void discardEmpty() {
        for (int i = this.lines.size() - 1; i > -1; i--) {
            if (this.lines.get(i) == null || this.lines.get(i).isEmpty()) {
                discard(i);
            }
        }
    }

    /**
     * append a CSV line to file (line is ignored if null)
     * @param line
     */
    void append(CSVLine line) {
        if (line == null) {
            return;
        }
        this.lines.add(line);
    }
}
class CSVException extends RuntimeException {

    private static final long serialVersionUID = 1L;

    public CSVException() {
    }

    public CSVException(Throwable throwable) {
        super(throwable);
    }

    public CSVException(String message) {
        super(message);
    }

    public CSVException(String message, Throwable throwable) {
        super(message, throwable);
    }

}
class CSVLine {

    private ArrayList<Object> elements;

    /**
     * Default constructor
     */
    public CSVLine() {
        this.elements = new ArrayList<Object>();
    }

    /**
     * Get all elements (cells) inside a line
     * @return empty String array if there's no element found inside
     */
    public Object[] getElements() {
        return this.elements.toArray();
    }

    /**
     * Get element at index
     * @param idx
     * @return
     * @throws IndexOutOfBoundsException
     */
    public Object getElementAt(int idx) throws IndexOutOfBoundsException {
        return elements.get(idx);
    }

    /**
     * Set element at
     * @param idx
     * @param value
     * @throws IndexOutOfBoundsException
     */
    public void setElementAt(int idx, Object value) throws IndexOutOfBoundsException {
        elements.set(idx, value);
    }

    /**
     * count elements inside this line
     * @return
     */
    public int size() {
        return elements.size();
    }

    /**
     * Add a new element
     */
    public CSVLine add(Object obj) {
        this.elements.add(obj);
        return this;
    }

    /**
     * if CSV line is empty
     * @return
     */
    public boolean isEmpty() {
        return this.elements.isEmpty();
    }

    /**
     * Remove all elements
     */
    public void clear() {
        this.elements.clear();
    }

    /**
     * Remove element at a specified index
     * @param idx
     * @throws IndexOutOfBoundsException
     */
    public void remove(int idx) throws IndexOutOfBoundsException {
        elements.remove(idx);
    }

    /**
     * Trim down or expand with null cell to a new length
     * @param newLength
     */
    public void setLength(int newLength) {
        if (elements.size() > newLength) {
            //trim down
            while (elements.size() > newLength) {
                elements.remove(elements.size() - 1);
            }
        } else {
            //add more
            while (elements.size() < newLength) {
                add(null);
            }
        }
    }
}
class CSVFormat {

    private String charset;
    private char fieldDelimiter;
    private char textDelimiter;
    private char lineTerminator;
    private char[] ignoreCharacters;

    /**
     * Construct a standard CSV format
     */
    public CSVFormat() {
        this.setCharset("UTF-8");
        this.setFieldDelimiter(',');
        this.setTextDelimiter('"');

        //auto detect line separator
        String s = System.getProperty("line.separator");
        if (s.length() > 0) {
            this.setLineTerminator(s.charAt(0));
            char[] ignoreChars = new char[s.length() - 1];
            for (int i = 1; i < s.length(); i++) {
                ignoreChars[i-1] = s.charAt(i);
            }
            this.setIgnoreCharacters(ignoreChars);
        } else {
            this.setLineTerminator('\n');
            this.setIgnoreCharacters(new char[]{'\r'});
        }
    }

    /**
     * @param lineTerminator
     *            the lineTerminator to set
     */
    public void setLineTerminator(char lineTerminator) {
        this.lineTerminator = lineTerminator;
    }

    /**
     * @return the lineTerminator
     */
    public char getLineTerminator() {
        return lineTerminator;
    }

    /**
     * @param ignoreCharacters
     *            the ignoreCharacters to set
     */
    public void setIgnoreCharacters(char[] ignoreCharacters) {
        this.ignoreCharacters = ignoreCharacters;
    }

    /**
     * @return the ignoreCharacters
     */
    public char[] getIgnoreCharacters() {
        return ignoreCharacters;
    }

    /**
     * @param charset
     *            the charset to set
     */
    public void setCharset(String charset) {
        this.charset = charset;
    }

    /**
     * @return the charset
     */
    public String getCharset() {
        return charset;
    }

    /**
     * @param fieldDelimiter
     *            the fieldDelimiter to set
     */
    public void setFieldDelimiter(char fieldDelimiter) {
        this.fieldDelimiter = fieldDelimiter;
    }

    /**
     * @return the fieldDelimiter
     */
    public char getFieldDelimiter() {
        return fieldDelimiter;
    }

    /**
     * @param textDelimiter
     *            the textDelimiter to set
     */
    public void setTextDelimiter(char textDelimiter) {
        this.textDelimiter = textDelimiter;
    }

    /**
     * @return the textDelimiter
     */
    public char getTextDelimiter() {
        return textDelimiter;
    }

    /**
     * is ignored characters
     * @param c
     * @return
     */
    public boolean isIgnored(char c) {
        return Arrays.binarySearch(ignoreCharacters, c) >= 0;
    }
}

   
    
  








Related examples in the same category

1.A utility class that parses a Comma Separated Values (CSV) file
2.Simple demo of CSV parser classSimple demo of CSV parser class
3.CSV in action: lines from a file and printCSV in action: lines from a file and print
4.Simple demo of CSV matching using Regular Expressions
5.Helper class to write table data to a csv-file (comma separated values).
6.Builds a bracketed CSV list from the array
7.Builds a CSV list from the specified String[], separator string and quote string
8.Builds a CSV list from the specified two dimensional String[][], separator string and quote string.
9.The csv tokenizer class allows an application to break a Comma Separated Value format into tokens.
10.The CSVQuoter is a helper class to encode a string for the CSV file format.
11.A stream based parser for parsing delimited text data from a file or a stream
12.Reads CSV (Comma Separated Value) files
13.Writes CSV (Comma Separated Value) files
14.Csv Converter
15.CVS reader
16.CSV Writer
17.CSV parser
18.Csv Reader
19.A very simple CSV parser released under a commercial-friendly license.
20.A very simple CSV reader released under a commercial-friendly license.
21.A very simple CSV writer released under a commercial-friendly license.
22.CSV file writer
23.CSV Tokenizer Util
24.Parse a line of text in CSV format and returns array of Strings Implementation of parsing is extracted from open-csv.
25.CSV Writer
26.Parse comma-separated list of ints and return as array
27.Parse comma-separated list of longs and return as array