List of usage examples for org.apache.commons.csv CSVParser getCurrentLineNumber
public long getCurrentLineNumber()
From source file:edu.harvard.iq.dataverse.ingest.tabulardata.impl.plugins.csv.CSVFileReader.java
public int readFile(BufferedReader csvReader, DataTable dataTable, PrintWriter finalOut) throws IOException { List<DataVariable> variableList = new ArrayList<>(); CSVParser parser = new CSVParser(csvReader, inFormat.withHeader()); Map<String, Integer> headers = parser.getHeaderMap(); int i = 0;// www. jav a2 s .co m for (String varName : headers.keySet()) { if (varName == null || varName.isEmpty()) { // TODO: // Add a sensible variable name validation algorithm. // -- L.A. 4.0 alpha 1 throw new IOException(BundleUtil.getStringFromBundle("ingest.csv.invalidHeader")); } DataVariable dv = new DataVariable(); dv.setName(varName); dv.setLabel(varName); dv.setInvalidRanges(new ArrayList<>()); dv.setSummaryStatistics(new ArrayList<>()); dv.setUnf("UNF:6:NOTCALCULATED"); dv.setCategories(new ArrayList<>()); variableList.add(dv); dv.setTypeCharacter(); dv.setIntervalDiscrete(); dv.setFileOrder(i); dv.setDataTable(dataTable); i++; } dataTable.setVarQuantity((long) variableList.size()); dataTable.setDataVariables(variableList); boolean[] isNumericVariable = new boolean[headers.size()]; boolean[] isIntegerVariable = new boolean[headers.size()]; boolean[] isTimeVariable = new boolean[headers.size()]; boolean[] isDateVariable = new boolean[headers.size()]; for (i = 0; i < headers.size(); i++) { // OK, let's assume that every variable is numeric; // but we'll go through the file and examine every value; the // moment we find a value that's not a legit numeric one, we'll // assume that it is in fact a String. isNumericVariable[i] = true; isIntegerVariable[i] = true; isDateVariable[i] = true; isTimeVariable[i] = true; } // First, "learning" pass. // (we'll save the incoming stream in another temp file:) SimpleDateFormat[] selectedDateTimeFormat = new SimpleDateFormat[headers.size()]; SimpleDateFormat[] selectedDateFormat = new SimpleDateFormat[headers.size()]; File firstPassTempFile = File.createTempFile("firstpass-", ".csv"); try (CSVPrinter csvFilePrinter = new CSVPrinter( // TODO allow other parsers of tabular data to use this parser by changin inFormat new FileWriter(firstPassTempFile.getAbsolutePath()), inFormat)) { //Write headers csvFilePrinter.printRecord(headers.keySet()); for (CSVRecord record : parser.getRecords()) { // Checks if #records = #columns in header if (!record.isConsistent()) { List<String> args = Arrays.asList(new String[] { "" + (parser.getCurrentLineNumber() - 1), "" + headers.size(), "" + record.size() }); throw new IOException(BundleUtil.getStringFromBundle("ingest.csv.recordMismatch", args)); } for (i = 0; i < headers.size(); i++) { String varString = record.get(i); isIntegerVariable[i] = isIntegerVariable[i] && varString != null && (varString.isEmpty() || varString.equals("null") || (firstNumCharSet.contains(varString.charAt(0)) && StringUtils.isNumeric(varString.substring(1)))); if (isNumericVariable[i]) { // If variable might be "numeric" test to see if this value is a parsable number: if (varString != null && !varString.isEmpty()) { boolean isNumeric = false; boolean isInteger = false; if (varString.equalsIgnoreCase("NaN") || varString.equalsIgnoreCase("NA") || varString.equalsIgnoreCase("Inf") || varString.equalsIgnoreCase("+Inf") || varString.equalsIgnoreCase("-Inf") || varString.equalsIgnoreCase("null")) { continue; } else { try { Double testDoubleValue = new Double(varString); continue; } catch (NumberFormatException ex) { // the token failed to parse as a double // so the column is a string variable. } } isNumericVariable[i] = false; } } // If this is not a numeric column, see if it is a date collumn // by parsing the cell as a date or date-time value: if (!isNumericVariable[i]) { Date dateResult = null; if (isTimeVariable[i]) { if (varString != null && !varString.isEmpty()) { boolean isTime = false; if (selectedDateTimeFormat[i] != null) { ParsePosition pos = new ParsePosition(0); dateResult = selectedDateTimeFormat[i].parse(varString, pos); if (dateResult != null && pos.getIndex() == varString.length()) { // OK, successfully parsed a value! isTime = true; } } else { for (SimpleDateFormat format : TIME_FORMATS) { ParsePosition pos = new ParsePosition(0); dateResult = format.parse(varString, pos); if (dateResult != null && pos.getIndex() == varString.length()) { // OK, successfully parsed a value! isTime = true; selectedDateTimeFormat[i] = format; break; } } } if (!isTime) { isTimeVariable[i] = false; // if the token didn't parse as a time value, // we will still try to parse it as a date, below. // unless this column is NOT a date. } else { // And if it is a time value, we are going to assume it's // NOT a date. isDateVariable[i] = false; } } } if (isDateVariable[i]) { if (varString != null && !varString.isEmpty()) { boolean isDate = false; // TODO: // Strictly speaking, we should be doing the same thing // here as with the time formats above; select the // first one that works, then insist that all the // other values in this column match it... but we // only have one, as of now, so it should be ok. // -- L.A. 4.0 beta for (SimpleDateFormat format : DATE_FORMATS) { // Strict parsing - it will throw an // exception if it doesn't parse! format.setLenient(false); try { format.parse(varString); isDate = true; selectedDateFormat[i] = format; break; } catch (ParseException ex) { //Do nothing } } isDateVariable[i] = isDate; } } } } csvFilePrinter.printRecord(record); } } dataTable.setCaseQuantity(parser.getRecordNumber()); parser.close(); csvReader.close(); // Re-type the variables that we've determined are numerics: for (i = 0; i < headers.size(); i++) { if (isNumericVariable[i]) { dataTable.getDataVariables().get(i).setTypeNumeric(); if (isIntegerVariable[i]) { dataTable.getDataVariables().get(i).setIntervalDiscrete(); } else { dataTable.getDataVariables().get(i).setIntervalContinuous(); } } else if (isDateVariable[i] && selectedDateFormat[i] != null) { // Dates are still Strings, i.e., they are "character" and "discrete"; // But we add special format values for them: dataTable.getDataVariables().get(i).setFormat(DATE_FORMATS[0].toPattern()); dataTable.getDataVariables().get(i).setFormatCategory("date"); } else if (isTimeVariable[i] && selectedDateTimeFormat[i] != null) { // Same for time values: dataTable.getDataVariables().get(i).setFormat(selectedDateTimeFormat[i].toPattern()); dataTable.getDataVariables().get(i).setFormatCategory("time"); } } // Second, final pass. try (BufferedReader secondPassReader = new BufferedReader(new FileReader(firstPassTempFile))) { parser = new CSVParser(secondPassReader, inFormat.withHeader()); String[] caseRow = new String[headers.size()]; for (CSVRecord record : parser) { if (!record.isConsistent()) { List<String> args = Arrays.asList(new String[] { "" + (parser.getCurrentLineNumber() - 1), "" + headers.size(), "" + record.size() }); throw new IOException(BundleUtil.getStringFromBundle("ingest.csv.recordMismatch", args)); } for (i = 0; i < headers.size(); i++) { String varString = record.get(i); if (isNumericVariable[i]) { if (varString == null || varString.isEmpty() || varString.equalsIgnoreCase("NA")) { // Missing value - represented as an empty string in // the final tab file caseRow[i] = ""; } else if (varString.equalsIgnoreCase("NaN")) { // "Not a Number" special value: caseRow[i] = "NaN"; } else if (varString.equalsIgnoreCase("Inf") || varString.equalsIgnoreCase("+Inf")) { // Positive infinity: caseRow[i] = "Inf"; } else if (varString.equalsIgnoreCase("-Inf")) { // Negative infinity: caseRow[i] = "-Inf"; } else if (varString.equalsIgnoreCase("null")) { // By request from Gus - "NULL" is recognized as a // numeric zero: caseRow[i] = isIntegerVariable[i] ? "0" : "0.0"; } else { /* No re-formatting is done on any other numeric values. * We'll save them as they were, for archival purposes. * The alternative solution - formatting in sci. notation * is commented-out below. */ caseRow[i] = varString; /* if (isIntegerVariable[i]) { try { Integer testIntegerValue = new Integer(varString); caseRow[i] = testIntegerValue.toString(); } catch (NumberFormatException ex) { throw new IOException("Failed to parse a value recognized as an integer in the first pass! (?)"); } } else { try { Double testDoubleValue = new Double(varString); if (testDoubleValue.equals(0.0)) { caseRow[i] = "0.0"; } else { // One possible implementation: // // Round our fractional values to 15 digits // (minimum number of digits of precision guaranteed by // type Double) and format the resulting representations // in a IEEE 754-like "scientific notation" - for ex., // 753.24 will be encoded as 7.5324e2 BigDecimal testBigDecimal = new BigDecimal(varString, doubleMathContext); caseRow[i] = String.format(FORMAT_IEEE754, testBigDecimal); // Strip meaningless zeros and extra + signs: caseRow[i] = caseRow[i].replaceFirst("00*e", "e"); caseRow[i] = caseRow[i].replaceFirst("\\.e", ".0e"); caseRow[i] = caseRow[i].replaceFirst("e\\+00", ""); caseRow[i] = caseRow[i].replaceFirst("^\\+", ""); } } catch (NumberFormatException ex) { throw new IOException("Failed to parse a value recognized as numeric in the first pass! (?)"); } } */ } } else if (isTimeVariable[i] || isDateVariable[i]) { // Time and Dates are stored NOT quoted (don't ask). if (varString != null) { // Dealing with quotes: // remove the leading and trailing quotes, if present: varString = varString.replaceFirst("^\"*", ""); varString = varString.replaceFirst("\"*$", ""); caseRow[i] = varString; } else { caseRow[i] = ""; } } else { // Treat as a String: // Strings are stored in tab files quoted; // Missing values are stored as an empty string // between two tabs (or one tab and the new line); // Empty strings stored as "" (quoted empty string). // For the purposes of this CSV ingest reader, we are going // to assume that all the empty strings in the file are // indeed empty strings, and NOT missing values: if (varString != null) { // escape the quotes, newlines, and tabs: varString = varString.replace("\"", "\\\""); varString = varString.replace("\n", "\\n"); varString = varString.replace("\t", "\\t"); // final pair of quotes: varString = "\"" + varString + "\""; caseRow[i] = varString; } else { caseRow[i] = "\"\""; } } } finalOut.println(StringUtils.join(caseRow, "\t")); } } long linecount = parser.getRecordNumber(); finalOut.close(); parser.close(); dbglog.fine("Tmp File: " + firstPassTempFile); // Firstpass file is deleted to prevent tmp from filling up. firstPassTempFile.delete(); if (dataTable.getCaseQuantity().intValue() != linecount) { List<String> args = Arrays .asList(new String[] { "" + dataTable.getCaseQuantity().intValue(), "" + linecount }); throw new IOException(BundleUtil.getStringFromBundle("ingest.csv.line_mismatch", args)); } return (int) linecount; }
From source file:org.cast.cwm.service.UserSpreadsheetReader.java
/** * Read spreadsheet of user information and generate potential users. * Returns true if all was sucessful and users could be created as specified. * /* w w w. j a va2s . com*/ * This method does NOT modify the datastore. * * @param stream the input stream of CSV data * @return true if no errors encountered. */ @Override public boolean readInput(InputStream stream) { potentialUsers = new ArrayList<PotentialUserSave>(); potentialSites = new HashMap<String, Site>(); potentialPeriods = new HashMap<Site, Map<String, Period>>(); CSVParser parser; try { parser = CSVFormat.EXCEL.withHeader().withIgnoreEmptyLines().withIgnoreSurroundingSpaces() .parse(new InputStreamReader(new BOMInputStream(stream), "UTF-8")); } catch (IOException e) { log.error(e.getMessage()); globalError = e.getMessage(); return false; } // Make our own secondary mapping of header names to fields, by // lowercasing and removing spaces from all header names headerMap = parser.getHeaderMap(); for (String hdr : new HashSet<String>(headerMap.keySet())) { String normalized = hdr.toLowerCase().replaceAll("\\s", ""); if (!normalized.equals(hdr)) { headerMap.put(normalized, headerMap.get(hdr)); } } globalError = checkRequiredHeaders(headerMap); if (!Strings.isEmpty(globalError)) return false; // Read the CSV file, create PotentialUserSave objects, record error messages, add to potentialUsers List try { boolean errors = false; // have errors been encountered? for (CSVRecord record : parser) { try { User user = createUserObject(record); String messages = populateUserObject(user, record); if (Strings.isEmpty(messages)) messages = validateUser(user); // Add a PotentialUserSave to the list. potentialUsers.add(new PotentialUserSave(modelProvider.modelOf(user), messages, record)); if (!Strings.isEmpty(messages)) errors = true; } catch (ArrayIndexOutOfBoundsException e) { // This can happen if the last row is missing values; Excel doesn't fill them out to the last column log.error("Caught exception importing line {}: {}", parser.getCurrentLineNumber(), e.getClass()); potentialUsers.add(new PotentialUserSave(null, "Data missing from CSV.\n", record)); errors = true; } catch (Exception e) { e.printStackTrace(); log.error("Caught exception importing line {}: {}", parser.getCurrentLineNumber(), e.getClass()); potentialUsers.add(new PotentialUserSave(null, "Error: " + e, record)); errors = true; } } // If CSV file has only one line, it is either empty or has unrecognized LF/CR values. if (parser.getCurrentLineNumber() == 1) { potentialUsers.add( new PotentialUserSave(null, "Empty or Corrupted File. Note: Save as Windows CSV.", null)); globalError = "Empty or Corrupted File - LF/CR values may be invalid!"; throw new CharacterCodingException(); } return (!errors); } catch (CharacterCodingException e) { log.error("Empty or Corrupted File - only 1 line found - CR/LF issue?. {}", e.getClass()); return false; } }
From source file:org.wso2.carbon.event.simulator.csvFeedSimulation.core.CSVFeedEventSimulator.java
/** * This method must be called within a synchronized block to avoid multiple file simulators from running simultaneously. * Read the values from uploaded CSV file and convert those values into event and send those events to * input handler/*from ww w .ja v a2 s . c o m*/ * <p> * <p> * To read the CSV file It uses CSV parser Library. * {@link <a href="https://commons.apache.org/proper/commons-csv/apidocs/org/apache/commons/csv/CSVParser.html">CSVParser</a>} * </p> * <p> * <p> * CSV file can be separated by one of these fallowing character , , ; , \t by default * It has capability to have user defined delimiter * Any field may be quoted (with double quotes) * Fields with embedded commas or delimiter characters must be double quoted. * </p> * <p> * Initialize CSVParser * * @param executionPlanDto ExecutionPlanDto * @param csvFileConfig CSVFileSimulationDto */ private void sendEvent(ExecutionPlanDto executionPlanDto, CSVFileSimulationDto csvFileConfig) { /* return no of events read from CSV file during ever iteration */ long noOfEvents = 0; int delay = csvFileConfig.getDelay(); /* Reader for reading character streams from file */ Reader in = null; /* CSVParser to read CSV Values */ CSVParser csvParser = null; if (delay <= 0) { log.warn("Events will be sent continuously since the delay between events are set to " + delay + "milliseconds"); delay = 0; } try { /* Initialize Reader */ in = new FileReader(String.valueOf(Paths.get(System.getProperty("java.io.tmpdir"), csvFileConfig.getFileDto().getFileInfo().getFileName()))); /* Initialize CSVParser with appropriate CSVFormat according to delimiter */ switch (csvFileConfig.getDelimiter()) { case ",": csvParser = CSVParser.parse(in, CSVFormat.DEFAULT); break; case ";": csvParser = CSVParser.parse(in, CSVFormat.EXCEL); break; case "\\t": csvParser = CSVParser.parse(in, CSVFormat.TDF); break; default: csvParser = CSVParser.parse(in, CSVFormat.newFormat(csvFileConfig.getDelimiter().charAt(0))); } int attributeSize = executionPlanDto.getInputStreamDtoMap().get(csvFileConfig.getStreamName()) .getStreamAttributeDtos().size(); /* Iterate through the CSV file line by line */ for (CSVRecord record : csvParser) { try { synchronized (this) { if (isStopped) { isStopped = false; break; } if (isPaused) { this.wait(); } } if (record.size() != attributeSize) { log.warn("No of attribute is not equal to attribute size: " + attributeSize + " is needed" + "in Row no:" + noOfEvents + 1); } String[] attributes = new String[attributeSize]; noOfEvents = csvParser.getCurrentLineNumber(); for (int i = 0; i < record.size(); i++) { attributes[i] = record.get(i); } //convert Attribute values into event Event event = EventConverter.eventConverter(csvFileConfig.getStreamName(), attributes, executionPlanDto); // TODO: 13/12/16 delete sout System.out.println("Input Event " + Arrays.deepToString(event.getEventData())); // //send the event to input handler send(csvFileConfig.getStreamName(), event); //delay between two events if (delay > 0) { Thread.sleep(delay); } } catch (EventSimulationException e) { log.error("Event dropped due to Error occurred during generating an event" + e.getMessage()); } catch (InterruptedException e) { log.error("Error occurred during send event" + e.getMessage()); } } } catch (IllegalArgumentException e) { // TODO: 02/12/16 proper error message throw new EventSimulationException("File Parameters are null" + e.getMessage()); } catch (FileNotFoundException e) { throw new EventSimulationException( "File not found :" + csvFileConfig.getFileDto().getFileInfo().getFileName()); } catch (IOException e) { throw new EventSimulationException("Error occurred while reading the file"); } finally { try { if (in != null && csvParser != null) in.close(); csvParser.close(); } catch (IOException e) { throw new EventSimulationException("Error occurred during closing the file"); } } }