Example usage for org.apache.commons.csv CSVParser getCurrentLineNumber

List of usage examples for org.apache.commons.csv CSVParser getCurrentLineNumber

Introduction

In this page you can find the example usage for org.apache.commons.csv CSVParser getCurrentLineNumber.

Prototype

public long getCurrentLineNumber() 

Source Link

Document

Returns the current line number in the input stream.

Usage

From source file:edu.harvard.iq.dataverse.ingest.tabulardata.impl.plugins.csv.CSVFileReader.java

public int readFile(BufferedReader csvReader, DataTable dataTable, PrintWriter finalOut) throws IOException {

    List<DataVariable> variableList = new ArrayList<>();
    CSVParser parser = new CSVParser(csvReader, inFormat.withHeader());
    Map<String, Integer> headers = parser.getHeaderMap();

    int i = 0;// www.  jav a2 s .co m
    for (String varName : headers.keySet()) {
        if (varName == null || varName.isEmpty()) {
            // TODO:
            // Add a sensible variable name validation algorithm.
            // -- L.A. 4.0 alpha 1
            throw new IOException(BundleUtil.getStringFromBundle("ingest.csv.invalidHeader"));
        }

        DataVariable dv = new DataVariable();
        dv.setName(varName);
        dv.setLabel(varName);
        dv.setInvalidRanges(new ArrayList<>());
        dv.setSummaryStatistics(new ArrayList<>());
        dv.setUnf("UNF:6:NOTCALCULATED");
        dv.setCategories(new ArrayList<>());
        variableList.add(dv);

        dv.setTypeCharacter();
        dv.setIntervalDiscrete();
        dv.setFileOrder(i);
        dv.setDataTable(dataTable);
        i++;
    }

    dataTable.setVarQuantity((long) variableList.size());
    dataTable.setDataVariables(variableList);

    boolean[] isNumericVariable = new boolean[headers.size()];
    boolean[] isIntegerVariable = new boolean[headers.size()];
    boolean[] isTimeVariable = new boolean[headers.size()];
    boolean[] isDateVariable = new boolean[headers.size()];

    for (i = 0; i < headers.size(); i++) {
        // OK, let's assume that every variable is numeric;
        // but we'll go through the file and examine every value; the
        // moment we find a value that's not a legit numeric one, we'll
        // assume that it is in fact a String.
        isNumericVariable[i] = true;
        isIntegerVariable[i] = true;
        isDateVariable[i] = true;
        isTimeVariable[i] = true;
    }

    // First, "learning" pass.
    // (we'll save the incoming stream in another temp file:)
    SimpleDateFormat[] selectedDateTimeFormat = new SimpleDateFormat[headers.size()];
    SimpleDateFormat[] selectedDateFormat = new SimpleDateFormat[headers.size()];

    File firstPassTempFile = File.createTempFile("firstpass-", ".csv");

    try (CSVPrinter csvFilePrinter = new CSVPrinter(
            // TODO allow other parsers of tabular data to use this parser by changin inFormat
            new FileWriter(firstPassTempFile.getAbsolutePath()), inFormat)) {
        //Write  headers
        csvFilePrinter.printRecord(headers.keySet());
        for (CSVRecord record : parser.getRecords()) {
            // Checks if #records = #columns in header
            if (!record.isConsistent()) {
                List<String> args = Arrays.asList(new String[] { "" + (parser.getCurrentLineNumber() - 1),
                        "" + headers.size(), "" + record.size() });
                throw new IOException(BundleUtil.getStringFromBundle("ingest.csv.recordMismatch", args));
            }

            for (i = 0; i < headers.size(); i++) {
                String varString = record.get(i);
                isIntegerVariable[i] = isIntegerVariable[i] && varString != null
                        && (varString.isEmpty() || varString.equals("null")
                                || (firstNumCharSet.contains(varString.charAt(0))
                                        && StringUtils.isNumeric(varString.substring(1))));
                if (isNumericVariable[i]) {
                    // If variable might be "numeric" test to see if this value is a parsable number:
                    if (varString != null && !varString.isEmpty()) {

                        boolean isNumeric = false;
                        boolean isInteger = false;

                        if (varString.equalsIgnoreCase("NaN") || varString.equalsIgnoreCase("NA")
                                || varString.equalsIgnoreCase("Inf") || varString.equalsIgnoreCase("+Inf")
                                || varString.equalsIgnoreCase("-Inf") || varString.equalsIgnoreCase("null")) {
                            continue;
                        } else {
                            try {
                                Double testDoubleValue = new Double(varString);
                                continue;
                            } catch (NumberFormatException ex) {
                                // the token failed to parse as a double
                                // so the column is a string variable.
                            }
                        }
                        isNumericVariable[i] = false;
                    }
                }

                // If this is not a numeric column, see if it is a date collumn
                // by parsing the cell as a date or date-time value:
                if (!isNumericVariable[i]) {

                    Date dateResult = null;

                    if (isTimeVariable[i]) {
                        if (varString != null && !varString.isEmpty()) {
                            boolean isTime = false;

                            if (selectedDateTimeFormat[i] != null) {
                                ParsePosition pos = new ParsePosition(0);
                                dateResult = selectedDateTimeFormat[i].parse(varString, pos);

                                if (dateResult != null && pos.getIndex() == varString.length()) {
                                    // OK, successfully parsed a value!
                                    isTime = true;
                                }
                            } else {
                                for (SimpleDateFormat format : TIME_FORMATS) {
                                    ParsePosition pos = new ParsePosition(0);
                                    dateResult = format.parse(varString, pos);
                                    if (dateResult != null && pos.getIndex() == varString.length()) {
                                        // OK, successfully parsed a value!
                                        isTime = true;
                                        selectedDateTimeFormat[i] = format;
                                        break;
                                    }
                                }
                            }
                            if (!isTime) {
                                isTimeVariable[i] = false;
                                // if the token didn't parse as a time value,
                                // we will still try to parse it as a date, below.
                                // unless this column is NOT a date.
                            } else {
                                // And if it is a time value, we are going to assume it's
                                // NOT a date.
                                isDateVariable[i] = false;
                            }
                        }
                    }

                    if (isDateVariable[i]) {
                        if (varString != null && !varString.isEmpty()) {
                            boolean isDate = false;

                            // TODO:
                            // Strictly speaking, we should be doing the same thing
                            // here as with the time formats above; select the
                            // first one that works, then insist that all the
                            // other values in this column match it... but we
                            // only have one, as of now, so it should be ok.
                            // -- L.A. 4.0 beta
                            for (SimpleDateFormat format : DATE_FORMATS) {
                                // Strict parsing - it will throw an
                                // exception if it doesn't parse!
                                format.setLenient(false);
                                try {
                                    format.parse(varString);
                                    isDate = true;
                                    selectedDateFormat[i] = format;
                                    break;
                                } catch (ParseException ex) {
                                    //Do nothing
                                }
                            }
                            isDateVariable[i] = isDate;
                        }
                    }
                }
            }

            csvFilePrinter.printRecord(record);
        }
    }
    dataTable.setCaseQuantity(parser.getRecordNumber());
    parser.close();
    csvReader.close();

    // Re-type the variables that we've determined are numerics:
    for (i = 0; i < headers.size(); i++) {
        if (isNumericVariable[i]) {
            dataTable.getDataVariables().get(i).setTypeNumeric();

            if (isIntegerVariable[i]) {
                dataTable.getDataVariables().get(i).setIntervalDiscrete();
            } else {
                dataTable.getDataVariables().get(i).setIntervalContinuous();
            }
        } else if (isDateVariable[i] && selectedDateFormat[i] != null) {
            // Dates are still Strings, i.e., they are "character" and "discrete";
            // But we add special format values for them:
            dataTable.getDataVariables().get(i).setFormat(DATE_FORMATS[0].toPattern());
            dataTable.getDataVariables().get(i).setFormatCategory("date");
        } else if (isTimeVariable[i] && selectedDateTimeFormat[i] != null) {
            // Same for time values:
            dataTable.getDataVariables().get(i).setFormat(selectedDateTimeFormat[i].toPattern());
            dataTable.getDataVariables().get(i).setFormatCategory("time");
        }
    }
    // Second, final pass.
    try (BufferedReader secondPassReader = new BufferedReader(new FileReader(firstPassTempFile))) {
        parser = new CSVParser(secondPassReader, inFormat.withHeader());
        String[] caseRow = new String[headers.size()];

        for (CSVRecord record : parser) {
            if (!record.isConsistent()) {
                List<String> args = Arrays.asList(new String[] { "" + (parser.getCurrentLineNumber() - 1),
                        "" + headers.size(), "" + record.size() });
                throw new IOException(BundleUtil.getStringFromBundle("ingest.csv.recordMismatch", args));
            }

            for (i = 0; i < headers.size(); i++) {
                String varString = record.get(i);
                if (isNumericVariable[i]) {
                    if (varString == null || varString.isEmpty() || varString.equalsIgnoreCase("NA")) {
                        // Missing value - represented as an empty string in
                        // the final tab file
                        caseRow[i] = "";
                    } else if (varString.equalsIgnoreCase("NaN")) {
                        // "Not a Number" special value:
                        caseRow[i] = "NaN";
                    } else if (varString.equalsIgnoreCase("Inf") || varString.equalsIgnoreCase("+Inf")) {
                        // Positive infinity:
                        caseRow[i] = "Inf";
                    } else if (varString.equalsIgnoreCase("-Inf")) {
                        // Negative infinity:
                        caseRow[i] = "-Inf";
                    } else if (varString.equalsIgnoreCase("null")) {
                        // By request from Gus - "NULL" is recognized as a
                        // numeric zero:
                        caseRow[i] = isIntegerVariable[i] ? "0" : "0.0";
                    } else {
                        /* No re-formatting is done on any other numeric values.
                         * We'll save them as they were, for archival purposes.
                         * The alternative solution - formatting in sci. notation
                         * is commented-out below.
                         */
                        caseRow[i] = varString;
                        /*
                         if (isIntegerVariable[i]) {
                        try {
                            Integer testIntegerValue = new Integer(varString);
                            caseRow[i] = testIntegerValue.toString();
                        } catch (NumberFormatException ex) {
                            throw new IOException("Failed to parse a value recognized as an integer in the first pass! (?)");
                        }
                        } else {
                        try {
                            Double testDoubleValue = new Double(varString);
                            if (testDoubleValue.equals(0.0)) {
                                caseRow[i] = "0.0";
                            } else {
                                                                    // One possible implementation:
                                //
                                // Round our fractional values to 15 digits
                                // (minimum number of digits of precision guaranteed by
                                // type Double) and format the resulting representations
                                // in a IEEE 754-like "scientific notation" - for ex.,
                                // 753.24 will be encoded as 7.5324e2
                                BigDecimal testBigDecimal = new BigDecimal(varString, doubleMathContext);
                                caseRow[i] = String.format(FORMAT_IEEE754, testBigDecimal);
                                
                                // Strip meaningless zeros and extra + signs:
                                caseRow[i] = caseRow[i].replaceFirst("00*e", "e");
                                caseRow[i] = caseRow[i].replaceFirst("\\.e", ".0e");
                                caseRow[i] = caseRow[i].replaceFirst("e\\+00", "");
                                caseRow[i] = caseRow[i].replaceFirst("^\\+", "");
                            }
                        } catch (NumberFormatException ex) {
                            throw new IOException("Failed to parse a value recognized as numeric in the first pass! (?)");
                        }
                        }
                         */
                    }
                } else if (isTimeVariable[i] || isDateVariable[i]) {
                    // Time and Dates are stored NOT quoted (don't ask).
                    if (varString != null) {
                        // Dealing with quotes:
                        // remove the leading and trailing quotes, if present:
                        varString = varString.replaceFirst("^\"*", "");
                        varString = varString.replaceFirst("\"*$", "");
                        caseRow[i] = varString;
                    } else {
                        caseRow[i] = "";
                    }
                } else {
                    // Treat as a String:
                    // Strings are stored in tab files quoted;
                    // Missing values are stored as an empty string
                    // between two tabs (or one tab and the new line);
                    // Empty strings stored as "" (quoted empty string).
                    // For the purposes  of this CSV ingest reader, we are going
                    // to assume that all the empty strings in the file are
                    // indeed empty strings, and NOT missing values:
                    if (varString != null) {
                        // escape the quotes, newlines, and tabs:
                        varString = varString.replace("\"", "\\\"");
                        varString = varString.replace("\n", "\\n");
                        varString = varString.replace("\t", "\\t");
                        // final pair of quotes:
                        varString = "\"" + varString + "\"";
                        caseRow[i] = varString;
                    } else {
                        caseRow[i] = "\"\"";
                    }
                }
            }
            finalOut.println(StringUtils.join(caseRow, "\t"));
        }
    }
    long linecount = parser.getRecordNumber();
    finalOut.close();
    parser.close();
    dbglog.fine("Tmp File: " + firstPassTempFile);
    // Firstpass file is deleted to prevent tmp from filling up.
    firstPassTempFile.delete();
    if (dataTable.getCaseQuantity().intValue() != linecount) {
        List<String> args = Arrays
                .asList(new String[] { "" + dataTable.getCaseQuantity().intValue(), "" + linecount });
        throw new IOException(BundleUtil.getStringFromBundle("ingest.csv.line_mismatch", args));
    }
    return (int) linecount;
}

From source file:org.cast.cwm.service.UserSpreadsheetReader.java

/**
 * Read spreadsheet of user information and generate potential users.
 * Returns true if all was sucessful and users could be created as specified.
 * /* w w  w.  j  a va2s .  com*/
 * This method does NOT modify the datastore.
 * 
 * @param stream the input stream of CSV data
 * @return true if no errors encountered.
 */
@Override
public boolean readInput(InputStream stream) {
    potentialUsers = new ArrayList<PotentialUserSave>();
    potentialSites = new HashMap<String, Site>();
    potentialPeriods = new HashMap<Site, Map<String, Period>>();

    CSVParser parser;
    try {
        parser = CSVFormat.EXCEL.withHeader().withIgnoreEmptyLines().withIgnoreSurroundingSpaces()
                .parse(new InputStreamReader(new BOMInputStream(stream), "UTF-8"));
    } catch (IOException e) {
        log.error(e.getMessage());
        globalError = e.getMessage();
        return false;
    }

    // Make our own secondary mapping of header names to fields, by
    // lowercasing and removing spaces from all header names
    headerMap = parser.getHeaderMap();
    for (String hdr : new HashSet<String>(headerMap.keySet())) {
        String normalized = hdr.toLowerCase().replaceAll("\\s", "");
        if (!normalized.equals(hdr)) {
            headerMap.put(normalized, headerMap.get(hdr));
        }
    }

    globalError = checkRequiredHeaders(headerMap);
    if (!Strings.isEmpty(globalError))
        return false;

    // Read the CSV file, create PotentialUserSave objects, record error messages, add to potentialUsers List
    try {
        boolean errors = false; // have errors been encountered?
        for (CSVRecord record : parser) {

            try {
                User user = createUserObject(record);
                String messages = populateUserObject(user, record);
                if (Strings.isEmpty(messages))
                    messages = validateUser(user);

                // Add a PotentialUserSave to the list.
                potentialUsers.add(new PotentialUserSave(modelProvider.modelOf(user), messages, record));
                if (!Strings.isEmpty(messages))
                    errors = true;

            } catch (ArrayIndexOutOfBoundsException e) {
                // This can happen if the last row is missing values; Excel doesn't fill them out to the last column
                log.error("Caught exception importing line {}: {}", parser.getCurrentLineNumber(),
                        e.getClass());
                potentialUsers.add(new PotentialUserSave(null, "Data missing from CSV.\n", record));
                errors = true;
            } catch (Exception e) {
                e.printStackTrace();
                log.error("Caught exception importing line {}: {}", parser.getCurrentLineNumber(),
                        e.getClass());
                potentialUsers.add(new PotentialUserSave(null, "Error: " + e, record));
                errors = true;
            }
        }

        // If CSV file has only one line, it is either empty or has unrecognized LF/CR values.
        if (parser.getCurrentLineNumber() == 1) {
            potentialUsers.add(
                    new PotentialUserSave(null, "Empty or Corrupted File.  Note: Save as Windows CSV.", null));
            globalError = "Empty or Corrupted File - LF/CR values may be invalid!";
            throw new CharacterCodingException();
        }
        return (!errors);

    } catch (CharacterCodingException e) {
        log.error("Empty or Corrupted File - only 1 line found - CR/LF issue?. {}", e.getClass());
        return false;
    }

}

From source file:org.wso2.carbon.event.simulator.csvFeedSimulation.core.CSVFeedEventSimulator.java

/**
 * This method must be called within a synchronized block to avoid multiple file simulators from running simultaneously.
 * Read the values from uploaded CSV file and convert those values into event and send those events to
 * input handler/*from ww  w .ja v a2 s . c o  m*/
 * <p>
 * <p>
 * To read the CSV file It uses CSV parser Library.
 * {@link <a href="https://commons.apache.org/proper/commons-csv/apidocs/org/apache/commons/csv/CSVParser.html">CSVParser</a>}
 * </p>
 * <p>
 * <p>
 * CSV file can be separated by one of these fallowing character , , ; , \t by default
 * It has capability to have user defined delimiter
 * Any field may be quoted (with double quotes)
 * Fields with embedded commas or delimiter characters must be double quoted.
 * </p>
 * <p>
 * Initialize CSVParser
 *
 * @param executionPlanDto ExecutionPlanDto
 * @param csvFileConfig    CSVFileSimulationDto
 */
private void sendEvent(ExecutionPlanDto executionPlanDto, CSVFileSimulationDto csvFileConfig) {

    /*
      return no of events read from CSV file during ever iteration
     */
    long noOfEvents = 0;
    int delay = csvFileConfig.getDelay();
    /*
    Reader for reading character streams from file
     */
    Reader in = null;
    /*
    CSVParser to read CSV Values
     */
    CSVParser csvParser = null;
    if (delay <= 0) {
        log.warn("Events will be sent continuously since the delay between events are set to " + delay
                + "milliseconds");
        delay = 0;
    }

    try {
        /*
        Initialize Reader
         */
        in = new FileReader(String.valueOf(Paths.get(System.getProperty("java.io.tmpdir"),
                csvFileConfig.getFileDto().getFileInfo().getFileName())));

        /*
        Initialize CSVParser with appropriate CSVFormat according to delimiter
         */

        switch (csvFileConfig.getDelimiter()) {
        case ",":
            csvParser = CSVParser.parse(in, CSVFormat.DEFAULT);
            break;
        case ";":
            csvParser = CSVParser.parse(in, CSVFormat.EXCEL);
            break;
        case "\\t":
            csvParser = CSVParser.parse(in, CSVFormat.TDF);
            break;
        default:
            csvParser = CSVParser.parse(in, CSVFormat.newFormat(csvFileConfig.getDelimiter().charAt(0)));
        }

        int attributeSize = executionPlanDto.getInputStreamDtoMap().get(csvFileConfig.getStreamName())
                .getStreamAttributeDtos().size();

        /*
        Iterate through the CSV file line by line
         */

        for (CSVRecord record : csvParser) {
            try {
                synchronized (this) {
                    if (isStopped) {
                        isStopped = false;
                        break;
                    }
                    if (isPaused) {
                        this.wait();
                    }
                }

                if (record.size() != attributeSize) {
                    log.warn("No of attribute is not equal to attribute size: " + attributeSize + " is needed"
                            + "in Row no:" + noOfEvents + 1);
                }
                String[] attributes = new String[attributeSize];
                noOfEvents = csvParser.getCurrentLineNumber();

                for (int i = 0; i < record.size(); i++) {
                    attributes[i] = record.get(i);
                }

                //convert Attribute values into event
                Event event = EventConverter.eventConverter(csvFileConfig.getStreamName(), attributes,
                        executionPlanDto);
                // TODO: 13/12/16 delete sout
                System.out.println("Input Event " + Arrays.deepToString(event.getEventData()));
                //

                //send the event to input handler
                send(csvFileConfig.getStreamName(), event);

                //delay between two events
                if (delay > 0) {
                    Thread.sleep(delay);
                }
            } catch (EventSimulationException e) {
                log.error("Event dropped due to Error occurred during generating an event" + e.getMessage());
            } catch (InterruptedException e) {
                log.error("Error occurred during send event" + e.getMessage());
            }
        }

    } catch (IllegalArgumentException e) {
        // TODO: 02/12/16 proper error message
        throw new EventSimulationException("File Parameters are null" + e.getMessage());
    } catch (FileNotFoundException e) {
        throw new EventSimulationException(
                "File not found :" + csvFileConfig.getFileDto().getFileInfo().getFileName());
    } catch (IOException e) {
        throw new EventSimulationException("Error occurred while reading the file");
    } finally {
        try {
            if (in != null && csvParser != null)
                in.close();
            csvParser.close();
        } catch (IOException e) {
            throw new EventSimulationException("Error occurred during closing the file");
        }
    }
}