com.opengamma.strata.collect.io.CsvFile.java Source code

Java tutorial

Introduction

Here is the source code for com.opengamma.strata.collect.io.CsvFile.java

Source

/**
 * Copyright (C) 2015 - present by OpenGamma Inc. and the OpenGamma group of companies
 * 
 * Please see distribution for license.
 */
package com.opengamma.strata.collect.io;

import static com.opengamma.strata.collect.Guavate.toImmutableList;

import java.io.Reader;
import java.io.UncheckedIOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;

import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.common.io.CharSource;
import com.google.common.io.CharStreams;
import com.opengamma.strata.collect.ArgChecker;
import com.opengamma.strata.collect.Unchecked;

/**
 * A CSV file.
 * <p>
 * Represents a CSV file together with the ability to parse it from a {@link CharSource}.
 * The separator may be specified, allowing TSV files (tab-separated) and other similar formats to be parsed.
 * <p>
 * This class loads the entire CSV file into memory.
 * To process the CSV file row-by-row, use {@link CsvIterator}.
 * <p>
 * The CSV file format is a general-purpose comma-separated value format.
 * The format is parsed line-by-line, with lines separated by CR, LF or CRLF.
 * Each line can contain one or more fields.
 * Each field is separated by a comma character ({@literal ,}) or tab.
 * Any field may be quoted using a double quote at the start and end.
 * The content of a quoted field may include commas and additional double quotes.
 * Two adjacent double quotes in a quoted field will be replaced by a single double quote.
 * Quoted fields are not trimmed. Non-quoted fields are trimmed.
 * <p>
 * The first line may be treated as a header row.
 * The header row is accessed separately from the data rows.
 * <p>
 * Blank lines are ignored.
 * Lines may be commented with has '#' or semicolon ';'.
 */
public final class CsvFile {

    /**
     * The header row, ordered as the headers appear in the file.
     */
    private final ImmutableList<String> headers;
    /**
     * The header map, transformed for case-insensitive searching.
     */
    private final ImmutableMap<String, Integer> searchHeaders;
    /**
     * The data rows in the CSV file.
     */
    private final ImmutableList<CsvRow> rows;

    //------------------------------------------------------------------------
    /**
     * Parses the specified source as a CSV file, using a comma as the separator.
     * 
     * @param source  the CSV file resource
     * @param headerRow  whether the source has a header row, an empty source must still contain the header
     * @return the CSV file
     * @throws UncheckedIOException if an IO exception occurs
     * @throws IllegalArgumentException if the file cannot be parsed
     */
    public static CsvFile of(CharSource source, boolean headerRow) {
        return of(source, headerRow, ',');
    }

    /**
     * Parses the specified source as a CSV file where the separator is specified and might not be a comma.
     * <p>
     * This overload allows the separator to be controlled.
     * For example, a tab-separated file is very similar to a CSV file, the only difference is the separator.
     * 
     * @param source  the file resource
     * @param headerRow  whether the source has a header row, an empty source must still contain the header
     * @param separator  the separator used to separate each field, typically a comma, but a tab is sometimes used
     * @return the CSV file
     * @throws UncheckedIOException if an IO exception occurs
     * @throws IllegalArgumentException if the file cannot be parsed
     */
    public static CsvFile of(CharSource source, boolean headerRow, char separator) {
        ArgChecker.notNull(source, "source");
        List<String> lines = Unchecked.wrap(() -> source.readLines());
        return create(lines, headerRow, separator);
    }

    /**
     * Parses the specified reader as a CSV file, using a comma as the separator.
     * <p>
     * This factory method takes a {@link Reader}.
     * Callers are encouraged to use {@link CharSource} instead of {@code Reader}
     * as it allows the resource to be safely managed.
     * <p>
     * This factory method allows the separator to be controlled.
     * For example, a tab-separated file is very similar to a CSV file, the only difference is the separator.
     * 
     * @param reader  the file resource
     * @param headerRow  whether the source has a header row, an empty source must still contain the header
     * @return the CSV file
     * @throws UncheckedIOException if an IO exception occurs
     * @throws IllegalArgumentException if the file cannot be parsed
     */
    public static CsvFile of(Reader reader, boolean headerRow) {
        return of(reader, headerRow, ',');
    }

    /**
     * Parses the specified reader as a CSV file where the separator is specified and might not be a comma.
     * <p>
     * This factory method takes a {@link Reader}.
     * Callers are encouraged to use {@link CharSource} instead of {@code Reader}
     * as it allows the resource to be safely managed.
     * <p>
     * This factory method allows the separator to be controlled.
     * For example, a tab-separated file is very similar to a CSV file, the only difference is the separator.
     * 
     * @param reader  the file resource
     * @param headerRow  whether the source has a header row, an empty source must still contain the header
     * @param separator  the separator used to separate each field, typically a comma, but a tab is sometimes used
     * @return the CSV file
     * @throws UncheckedIOException if an IO exception occurs
     * @throws IllegalArgumentException if the file cannot be parsed
     */
    public static CsvFile of(Reader reader, boolean headerRow, char separator) {
        ArgChecker.notNull(reader, "source");
        List<String> lines = Unchecked.wrap(() -> CharStreams.readLines(reader));
        return create(lines, headerRow, separator);
    }

    // creates the file
    private static CsvFile create(List<String> lines, boolean headerRow, char separator) {
        ArrayList<ImmutableList<String>> parsedCsv = parseAll(lines, separator);
        if (!headerRow) {
            return new CsvFile(ImmutableList.of(), ImmutableMap.of(), ImmutableList.copyOf(parsedCsv));
        }
        if (parsedCsv.isEmpty()) {
            throw new IllegalArgumentException("Could not read header row from empty CSV file");
        }
        ImmutableList<String> headers = parsedCsv.remove(0);
        return new CsvFile(headers, buildSearchHeaders(headers), ImmutableList.copyOf(parsedCsv));
    }

    //------------------------------------------------------------------------
    /**
     * Obtains an instance from a list of headers and rows.
     * <p>
     * The headers may be an empty list.
     * All the rows must contain a list of the same size, matching the header if present.
     * 
     * @param headers  the headers, empty if no headers
     * @param rows  the data rows
     * @return the CSV file
     * @throws IllegalArgumentException if the rows do not match the headers
     */
    public static CsvFile of(List<String> headers, List<? extends List<String>> rows) {
        ArgChecker.notNull(headers, "headers");
        ArgChecker.notNull(rows, "rows");
        int size = (headers.size() == 0 && rows.size() > 0 ? rows.get(0).size() : headers.size());
        if (rows.stream().filter(row -> row.size() != size).findAny().isPresent()) {
            throw new IllegalArgumentException("Invalid data rows, each row must have same columns as header row");
        }
        ImmutableList<String> copiedHeaders = ImmutableList.copyOf(headers);
        ImmutableList<ImmutableList<String>> copiedRows = rows.stream().map(row -> ImmutableList.copyOf(row))
                .collect(toImmutableList());
        return new CsvFile(copiedHeaders, buildSearchHeaders(copiedHeaders), copiedRows);
    }

    //------------------------------------------------------------------------
    // parses the CSV file format
    private static ArrayList<ImmutableList<String>> parseAll(List<String> lines, char separator) {
        ArrayList<ImmutableList<String>> parsedLines = new ArrayList<>();
        for (String line : lines) {
            ImmutableList<String> parsed = parseLine(line, separator);
            if (!parsed.isEmpty()) {
                parsedLines.add(parsed);
            }
        }
        return parsedLines;
    }

    // parse a single line
    static ImmutableList<String> parseLine(String line, char separator) {
        if (line.length() == 0 || line.startsWith("#") || line.startsWith(";")) {
            return ImmutableList.of();
        }
        ImmutableList.Builder<String> builder = ImmutableList.builder();
        int start = 0;
        String terminated = line + separator;
        int nextSeparator = terminated.indexOf(separator, start);
        while (nextSeparator >= 0) {
            String possible = terminated.substring(start, nextSeparator).trim();
            if (possible.startsWith("\"")) {
                while (true) {
                    if (possible.substring(1).replace("\"\"", "").endsWith("\"")) {
                        possible = possible.substring(1, possible.length() - 1).replace("\"\"", "\"");
                        break;
                    } else {
                        nextSeparator = terminated.indexOf(separator, nextSeparator + 1);
                        if (nextSeparator < 0) {
                            throw new IllegalArgumentException("Mismatched quotes on line: " + line);
                        }
                        possible = terminated.substring(start, nextSeparator).trim();
                    }
                }
            }
            builder.add(possible);
            start = nextSeparator + 1;
            nextSeparator = terminated.indexOf(separator, start);
        }
        ImmutableList<String> fields = builder.build();
        if (!hasContent(fields)) {
            return ImmutableList.of();
        }
        return fields;
    }

    // determines whether there is any content on a line
    // this handles lines that contain separators but nothing else
    private static boolean hasContent(ImmutableList<String> fields) {
        for (String field : fields) {
            if (!field.trim().isEmpty()) {
                return true;
            }
        }
        return false;
    }

    // build the search headers
    static ImmutableMap<String, Integer> buildSearchHeaders(ImmutableList<String> headers) {
        // need to allow duplicate headers and only store the first instance
        Map<String, Integer> searchHeaders = new HashMap<>();
        for (int i = 0; i < headers.size(); i++) {
            String searchHeader = headers.get(i).toLowerCase(Locale.ENGLISH);
            searchHeaders.putIfAbsent(searchHeader, i);
        }
        return ImmutableMap.copyOf(searchHeaders);
    }

    //------------------------------------------------------------------------
    /**
     * Restricted constructor.
     * 
     * @param headers  the header row
     * @param rows  the data rows
     */
    private CsvFile(ImmutableList<String> headers, ImmutableMap<String, Integer> searchHeaders,
            ImmutableList<ImmutableList<String>> rows) {

        this.headers = headers;
        this.searchHeaders = searchHeaders;
        this.rows = rows.stream().map(cols -> new CsvRow(headers, this.searchHeaders, cols))
                .collect(toImmutableList());
    }

    //------------------------------------------------------------------------
    /**
     * Gets the header row.
     * <p>
     * If there is no header row, an empty list is returned.
     * 
     * @return the header row
     */
    public ImmutableList<String> headers() {
        return headers;
    }

    /**
     * Gets all data rows in the file.
     * 
     * @return the data rows
     */
    public ImmutableList<CsvRow> rows() {
        return rows;
    }

    /**
     * Gets the number of data rows.
     * 
     * @return the number of data rows
     */
    public int rowCount() {
        return rows.size();
    }

    /**
     * Gets a single row.
     * 
     * @param index  the row index, zero-based
     * @return the row
     */
    public CsvRow row(int index) {
        return rows.get(index);
    }

    //-------------------------------------------------------------------------
    /**
     * Checks if this CSV file equals another.
     * <p>
     * The comparison checks the content.
     * 
     * @param obj  the other file, null returns false
     * @return true if equal
     */
    @Override
    public boolean equals(Object obj) {
        if (obj == this) {
            return true;
        }
        if (obj instanceof CsvFile) {
            CsvFile other = (CsvFile) obj;
            return headers.equals(other.headers) && rows.equals(other.rows);
        }
        return false;
    }

    /**
     * Returns a suitable hash code for the CSV file.
     * 
     * @return the hash code
     */
    @Override
    public int hashCode() {
        return headers.hashCode() ^ rows.hashCode();
    }

    /**
     * Returns a string describing the CSV file.
     * 
     * @return the descriptive string
     */
    @Override
    public String toString() {
        return "CsvFile" + headers.toString();
    }

}