A utility class that parses a Comma Separated Values (CSV) file : CSV File « Development Class « Java






A utility class that parses a Comma Separated Values (CSV) file

    
/*
Java and XSLT
By Eric M.Burke
1st Edition September 2001 
ISBN: 0-596-00143-6
*/


import java.io.IOException;
import java.util.*;
import org.xml.sax.*;


import java.io.*;
import java.net.URL;

import org.xml.sax.*;
import org.xml.sax.helpers.*;


/**
 * A utility class that parses a Comma Separated Values (CSV) file
 * and outputs its contents using SAX2 events. The format of CSV that
 * this class reads is identical to the export format for Microsoft
 * Excel. For simple values, the CSV file may look like this:
 * <pre>
 * a,b,c
 * d,e,f
 * </pre>
 * Quotes are used as delimiters when the values contain commas:
 * <pre>
 * a,"b,c",d
 * e,"f,g","h,i"
 * </pre>
 * And double quotes are used when the values contain quotes. This parser
 * is smart enough to trim spaces around commas, as well.
 *
 * @author Eric M. Burke
 */
public class CSVXMLReader extends AbstractXMLReader {

    // an empty attribute for use with SAX
    private static final Attributes EMPTY_ATTR = new AttributesImpl();

    /**
     * Parse a CSV file. SAX events are delivered to the ContentHandler
     * that was registered via <code>setContentHandler</code>.
     *
     * @param input the comma separated values file to parse.
     */
    public void parse(InputSource input) throws IOException,
            SAXException {
        // if no handler is registered to receive events, don't bother
        // to parse the CSV file
        ContentHandler ch = getContentHandler();
        if (ch == null) {
            return;
        }

        // convert the InputSource into a BufferedReader
        BufferedReader br = null;
        if (input.getCharacterStream() != null) {
            br = new BufferedReader(input.getCharacterStream());
        } else if (input.getByteStream() != null) {
            br = new BufferedReader(new InputStreamReader(
                    input.getByteStream()));
        } else if (input.getSystemId() != null) {
            java.net.URL url = new URL(input.getSystemId());
            br = new BufferedReader(new InputStreamReader(url.openStream()));
        } else {
            throw new SAXException("Invalid InputSource object");
        }

        ch.startDocument();

        // emit <csvFile>
        ch.startElement("","","csvFile",EMPTY_ATTR);

        // read each line of the file until EOF is reached
        String curLine = null;
        while ((curLine = br.readLine()) != null) {
            curLine = curLine.trim();
            if (curLine.length() > 0) {
                // create the <line> element
                ch.startElement("","","line",EMPTY_ATTR);
                // output data from this line
                parseLine(curLine, ch);
                // close the </line> element
                ch.endElement("","","line");
            }
        }

        // emit </csvFile>
        ch.endElement("","","csvFile");
        ch.endDocument();
    }

    // Break an individual line into tokens. This is a recursive function
    // that extracts the first token, then recursively parses the
    // remainder of the line.
    private void parseLine(String curLine, ContentHandler ch)
        throws IOException, SAXException {

        String firstToken = null;
        String remainderOfLine = null;
        int commaIndex = locateFirstDelimiter(curLine);
        if (commaIndex > -1) {
            firstToken = curLine.substring(0, commaIndex).trim();
            remainderOfLine = curLine.substring(commaIndex+1).trim();
        } else {
            // no commas, so the entire line is the token
            firstToken = curLine;
        }

        // remove redundant quotes
        firstToken = cleanupQuotes(firstToken);

        // emit the <value> element
        ch.startElement("","","value",EMPTY_ATTR);
        ch.characters(firstToken.toCharArray(), 0, firstToken.length());
        ch.endElement("","","value");

        // recursively process the remainder of the line
        if (remainderOfLine != null) {
            parseLine(remainderOfLine, ch);
        }
    }

    // locate the position of the comma, taking into account that
    // a quoted token may contain ignorable commas.
    private int locateFirstDelimiter(String curLine) {
        if (curLine.startsWith("\"")) {
            boolean inQuote = true;
            int numChars = curLine.length();
            for (int i=1; i<numChars; i++) {
                char curChar = curLine.charAt(i);
                if (curChar == '"') {
                    inQuote = !inQuote;
                } else if (curChar == ',' && !inQuote) {
                    return i;
                }
            }
            return -1;
        } else {
            return curLine.indexOf(',');
        }
    }

    // remove quotes around a token, as well as pairs of quotes
    // within a token.
    private String cleanupQuotes(String token) {
        StringBuffer buf = new StringBuffer();
        int length = token.length();
        int curIndex = 0;

        if (token.startsWith("\"") && token.endsWith("\"")) {
            curIndex = 1;
            length--;
        }

        boolean oneQuoteFound = false;
        boolean twoQuotesFound = false;

        while (curIndex < length) {
            char curChar = token.charAt(curIndex);
            if (curChar == '"') {
                twoQuotesFound = (oneQuoteFound) ? true : false;
                oneQuoteFound = true;
            } else {
                oneQuoteFound = false;
                twoQuotesFound = false;
            }

            if (twoQuotesFound) {
                twoQuotesFound = false;
                oneQuoteFound = false;
                curIndex++;
                continue;
            }

            buf.append(curChar);
            curIndex++;
        }

        return buf.toString();
    }
}

/**
 * An abstract class that implements the SAX2 XMLReader interface. The
 * intent of this class is to make it easy for subclasses to act as
 * SAX2 XMLReader implementations. This makes it possible, for example, for
 * them to emit SAX2 events that can be fed into an XSLT processor for
 * transformation.
 *
 * @author Eric M. Burke
 */
abstract class AbstractXMLReader implements org.xml.sax.XMLReader {
    private Map featureMap = new HashMap();
    private Map propertyMap = new HashMap();
    private EntityResolver entityResolver;
    private DTDHandler dtdHandler;
    private ContentHandler contentHandler;
    private ErrorHandler errorHandler;

    /**
     * The only abstract method in this class. Derived classes can parse
     * any source of data and emit SAX2 events to the ContentHandler.
     */
    public abstract void parse(InputSource input) throws IOException,
            SAXException;

    public boolean getFeature(String name)
            throws SAXNotRecognizedException, SAXNotSupportedException {
        Boolean featureValue = (Boolean) this.featureMap.get(name);
        return (featureValue == null) ? false
                : featureValue.booleanValue();
    }

    public void setFeature(String name, boolean value)
            throws SAXNotRecognizedException, SAXNotSupportedException {
        this.featureMap.put(name, new Boolean(value));
    }

    public Object getProperty(String name)
            throws SAXNotRecognizedException, SAXNotSupportedException {
        return this.propertyMap.get(name);
    }

    public void setProperty(String name, Object value)
            throws SAXNotRecognizedException, SAXNotSupportedException {
        this.propertyMap.put(name, value);
    }

    public void setEntityResolver(EntityResolver entityResolver) {
        this.entityResolver = entityResolver;
    }

    public EntityResolver getEntityResolver() {
        return this.entityResolver;
    }

    public void setDTDHandler(DTDHandler dtdHandler) {
        this.dtdHandler = dtdHandler;
    }

    public DTDHandler getDTDHandler() {
        return this.dtdHandler;
    }

    public void setContentHandler(ContentHandler contentHandler) {
        this.contentHandler = contentHandler;
    }

    public ContentHandler getContentHandler() {
        return this.contentHandler;
    }

    public void setErrorHandler(ErrorHandler errorHandler) {
        this.errorHandler = errorHandler;
    }

    public ErrorHandler getErrorHandler() {
        return this.errorHandler;
    }

    public void parse(String systemId) throws IOException, SAXException {
        parse(new InputSource(systemId));
    }
}


           
         
    
    
    
  








Related examples in the same category

1.Simple demo of CSV parser classSimple demo of CSV parser class
2.CSV in action: lines from a file and printCSV in action: lines from a file and print
3.Simple demo of CSV matching using Regular Expressions
4.Helper class to write table data to a csv-file (comma separated values).
5.Builds a bracketed CSV list from the array
6.Builds a CSV list from the specified String[], separator string and quote string
7.Builds a CSV list from the specified two dimensional String[][], separator string and quote string.
8.The csv tokenizer class allows an application to break a Comma Separated Value format into tokens.
9.The CSVQuoter is a helper class to encode a string for the CSV file format.
10.A stream based parser for parsing delimited text data from a file or a stream
11.Reads CSV (Comma Separated Value) files
12.Writes CSV (Comma Separated Value) files
13.Csv Converter
14.CVS reader
15.CSV Writer
16.CSV parser
17.Csv Reader
18.A very simple CSV parser released under a commercial-friendly license.
19.A very simple CSV reader released under a commercial-friendly license.
20.A very simple CSV writer released under a commercial-friendly license.
21.CSV file reader
22.CSV file writer
23.CSV Tokenizer Util
24.Parse a line of text in CSV format and returns array of Strings Implementation of parsing is extracted from open-csv.
25.CSV Writer
26.Parse comma-separated list of ints and return as array
27.Parse comma-separated list of longs and return as array