List of usage examples for org.apache.commons.csv CSVParser getRecordNumber
public long getRecordNumber()
From source file:ch.silviowangler.i18n.ResourceBundler.java
public void generateResourceBundle() throws IOException { CSVParser records = CSVFormat.RFC4180.withDelimiter(separator.charAt(0)).withFirstRecordAsHeader() .withQuoteMode(QuoteMode.ALL) .parse(new InputStreamReader(new FileInputStream(this.csvFile), this.inputEncoding)); final Map<String, Integer> headers = records.getHeaderMap(); processHeader(headers.keySet());// w w w. ja va 2 s. com for (CSVRecord record : records) { processData(record); } final int propertiesFilesAmount = this.propertiesStore.size(); LOGGER.info("Will generate {} properties files with {} records each", propertiesFilesAmount, records.getRecordNumber()); // Properties Dateien schreiben for (int i = 0; i < propertiesFilesAmount; i++) { Map<String, String> properties = this.propertiesStore.get(i); File outputFile = new File(this.outputDir, this.bundleBaseName + "_" + this.languages.get(i) + ".properties"); LOGGER.info("Writing {} to {}", outputFile.getName(), outputFile.getParentFile().getAbsolutePath()); FileOutputStream outputStream = new FileOutputStream(outputFile); try (OutputStreamWriter writer = new OutputStreamWriter(outputStream, this.native2ascii ? Consts.ASCII : this.outputEncoding)) { properties.forEach((key, value) -> { try { writer.append(key).append("=").append(value).append("\n"); } catch (IOException e) { e.printStackTrace(); } }); writer.flush(); } } }
From source file:edu.harvard.iq.dataverse.ingest.tabulardata.impl.plugins.csv.CSVFileReader.java
public int readFile(BufferedReader csvReader, DataTable dataTable, PrintWriter finalOut) throws IOException { List<DataVariable> variableList = new ArrayList<>(); CSVParser parser = new CSVParser(csvReader, inFormat.withHeader()); Map<String, Integer> headers = parser.getHeaderMap(); int i = 0;/* www . j a va 2 s . c om*/ for (String varName : headers.keySet()) { if (varName == null || varName.isEmpty()) { // TODO: // Add a sensible variable name validation algorithm. // -- L.A. 4.0 alpha 1 throw new IOException(BundleUtil.getStringFromBundle("ingest.csv.invalidHeader")); } DataVariable dv = new DataVariable(); dv.setName(varName); dv.setLabel(varName); dv.setInvalidRanges(new ArrayList<>()); dv.setSummaryStatistics(new ArrayList<>()); dv.setUnf("UNF:6:NOTCALCULATED"); dv.setCategories(new ArrayList<>()); variableList.add(dv); dv.setTypeCharacter(); dv.setIntervalDiscrete(); dv.setFileOrder(i); dv.setDataTable(dataTable); i++; } dataTable.setVarQuantity((long) variableList.size()); dataTable.setDataVariables(variableList); boolean[] isNumericVariable = new boolean[headers.size()]; boolean[] isIntegerVariable = new boolean[headers.size()]; boolean[] isTimeVariable = new boolean[headers.size()]; boolean[] isDateVariable = new boolean[headers.size()]; for (i = 0; i < headers.size(); i++) { // OK, let's assume that every variable is numeric; // but we'll go through the file and examine every value; the // moment we find a value that's not a legit numeric one, we'll // assume that it is in fact a String. isNumericVariable[i] = true; isIntegerVariable[i] = true; isDateVariable[i] = true; isTimeVariable[i] = true; } // First, "learning" pass. // (we'll save the incoming stream in another temp file:) SimpleDateFormat[] selectedDateTimeFormat = new SimpleDateFormat[headers.size()]; SimpleDateFormat[] selectedDateFormat = new SimpleDateFormat[headers.size()]; File firstPassTempFile = File.createTempFile("firstpass-", ".csv"); try (CSVPrinter csvFilePrinter = new CSVPrinter( // TODO allow other parsers of tabular data to use this parser by changin inFormat new FileWriter(firstPassTempFile.getAbsolutePath()), inFormat)) { //Write headers csvFilePrinter.printRecord(headers.keySet()); for (CSVRecord record : parser.getRecords()) { // Checks if #records = #columns in header if (!record.isConsistent()) { List<String> args = Arrays.asList(new String[] { "" + (parser.getCurrentLineNumber() - 1), "" + headers.size(), "" + record.size() }); throw new IOException(BundleUtil.getStringFromBundle("ingest.csv.recordMismatch", args)); } for (i = 0; i < headers.size(); i++) { String varString = record.get(i); isIntegerVariable[i] = isIntegerVariable[i] && varString != null && (varString.isEmpty() || varString.equals("null") || (firstNumCharSet.contains(varString.charAt(0)) && StringUtils.isNumeric(varString.substring(1)))); if (isNumericVariable[i]) { // If variable might be "numeric" test to see if this value is a parsable number: if (varString != null && !varString.isEmpty()) { boolean isNumeric = false; boolean isInteger = false; if (varString.equalsIgnoreCase("NaN") || varString.equalsIgnoreCase("NA") || varString.equalsIgnoreCase("Inf") || varString.equalsIgnoreCase("+Inf") || varString.equalsIgnoreCase("-Inf") || varString.equalsIgnoreCase("null")) { continue; } else { try { Double testDoubleValue = new Double(varString); continue; } catch (NumberFormatException ex) { // the token failed to parse as a double // so the column is a string variable. } } isNumericVariable[i] = false; } } // If this is not a numeric column, see if it is a date collumn // by parsing the cell as a date or date-time value: if (!isNumericVariable[i]) { Date dateResult = null; if (isTimeVariable[i]) { if (varString != null && !varString.isEmpty()) { boolean isTime = false; if (selectedDateTimeFormat[i] != null) { ParsePosition pos = new ParsePosition(0); dateResult = selectedDateTimeFormat[i].parse(varString, pos); if (dateResult != null && pos.getIndex() == varString.length()) { // OK, successfully parsed a value! isTime = true; } } else { for (SimpleDateFormat format : TIME_FORMATS) { ParsePosition pos = new ParsePosition(0); dateResult = format.parse(varString, pos); if (dateResult != null && pos.getIndex() == varString.length()) { // OK, successfully parsed a value! isTime = true; selectedDateTimeFormat[i] = format; break; } } } if (!isTime) { isTimeVariable[i] = false; // if the token didn't parse as a time value, // we will still try to parse it as a date, below. // unless this column is NOT a date. } else { // And if it is a time value, we are going to assume it's // NOT a date. isDateVariable[i] = false; } } } if (isDateVariable[i]) { if (varString != null && !varString.isEmpty()) { boolean isDate = false; // TODO: // Strictly speaking, we should be doing the same thing // here as with the time formats above; select the // first one that works, then insist that all the // other values in this column match it... but we // only have one, as of now, so it should be ok. // -- L.A. 4.0 beta for (SimpleDateFormat format : DATE_FORMATS) { // Strict parsing - it will throw an // exception if it doesn't parse! format.setLenient(false); try { format.parse(varString); isDate = true; selectedDateFormat[i] = format; break; } catch (ParseException ex) { //Do nothing } } isDateVariable[i] = isDate; } } } } csvFilePrinter.printRecord(record); } } dataTable.setCaseQuantity(parser.getRecordNumber()); parser.close(); csvReader.close(); // Re-type the variables that we've determined are numerics: for (i = 0; i < headers.size(); i++) { if (isNumericVariable[i]) { dataTable.getDataVariables().get(i).setTypeNumeric(); if (isIntegerVariable[i]) { dataTable.getDataVariables().get(i).setIntervalDiscrete(); } else { dataTable.getDataVariables().get(i).setIntervalContinuous(); } } else if (isDateVariable[i] && selectedDateFormat[i] != null) { // Dates are still Strings, i.e., they are "character" and "discrete"; // But we add special format values for them: dataTable.getDataVariables().get(i).setFormat(DATE_FORMATS[0].toPattern()); dataTable.getDataVariables().get(i).setFormatCategory("date"); } else if (isTimeVariable[i] && selectedDateTimeFormat[i] != null) { // Same for time values: dataTable.getDataVariables().get(i).setFormat(selectedDateTimeFormat[i].toPattern()); dataTable.getDataVariables().get(i).setFormatCategory("time"); } } // Second, final pass. try (BufferedReader secondPassReader = new BufferedReader(new FileReader(firstPassTempFile))) { parser = new CSVParser(secondPassReader, inFormat.withHeader()); String[] caseRow = new String[headers.size()]; for (CSVRecord record : parser) { if (!record.isConsistent()) { List<String> args = Arrays.asList(new String[] { "" + (parser.getCurrentLineNumber() - 1), "" + headers.size(), "" + record.size() }); throw new IOException(BundleUtil.getStringFromBundle("ingest.csv.recordMismatch", args)); } for (i = 0; i < headers.size(); i++) { String varString = record.get(i); if (isNumericVariable[i]) { if (varString == null || varString.isEmpty() || varString.equalsIgnoreCase("NA")) { // Missing value - represented as an empty string in // the final tab file caseRow[i] = ""; } else if (varString.equalsIgnoreCase("NaN")) { // "Not a Number" special value: caseRow[i] = "NaN"; } else if (varString.equalsIgnoreCase("Inf") || varString.equalsIgnoreCase("+Inf")) { // Positive infinity: caseRow[i] = "Inf"; } else if (varString.equalsIgnoreCase("-Inf")) { // Negative infinity: caseRow[i] = "-Inf"; } else if (varString.equalsIgnoreCase("null")) { // By request from Gus - "NULL" is recognized as a // numeric zero: caseRow[i] = isIntegerVariable[i] ? "0" : "0.0"; } else { /* No re-formatting is done on any other numeric values. * We'll save them as they were, for archival purposes. * The alternative solution - formatting in sci. notation * is commented-out below. */ caseRow[i] = varString; /* if (isIntegerVariable[i]) { try { Integer testIntegerValue = new Integer(varString); caseRow[i] = testIntegerValue.toString(); } catch (NumberFormatException ex) { throw new IOException("Failed to parse a value recognized as an integer in the first pass! (?)"); } } else { try { Double testDoubleValue = new Double(varString); if (testDoubleValue.equals(0.0)) { caseRow[i] = "0.0"; } else { // One possible implementation: // // Round our fractional values to 15 digits // (minimum number of digits of precision guaranteed by // type Double) and format the resulting representations // in a IEEE 754-like "scientific notation" - for ex., // 753.24 will be encoded as 7.5324e2 BigDecimal testBigDecimal = new BigDecimal(varString, doubleMathContext); caseRow[i] = String.format(FORMAT_IEEE754, testBigDecimal); // Strip meaningless zeros and extra + signs: caseRow[i] = caseRow[i].replaceFirst("00*e", "e"); caseRow[i] = caseRow[i].replaceFirst("\\.e", ".0e"); caseRow[i] = caseRow[i].replaceFirst("e\\+00", ""); caseRow[i] = caseRow[i].replaceFirst("^\\+", ""); } } catch (NumberFormatException ex) { throw new IOException("Failed to parse a value recognized as numeric in the first pass! (?)"); } } */ } } else if (isTimeVariable[i] || isDateVariable[i]) { // Time and Dates are stored NOT quoted (don't ask). if (varString != null) { // Dealing with quotes: // remove the leading and trailing quotes, if present: varString = varString.replaceFirst("^\"*", ""); varString = varString.replaceFirst("\"*$", ""); caseRow[i] = varString; } else { caseRow[i] = ""; } } else { // Treat as a String: // Strings are stored in tab files quoted; // Missing values are stored as an empty string // between two tabs (or one tab and the new line); // Empty strings stored as "" (quoted empty string). // For the purposes of this CSV ingest reader, we are going // to assume that all the empty strings in the file are // indeed empty strings, and NOT missing values: if (varString != null) { // escape the quotes, newlines, and tabs: varString = varString.replace("\"", "\\\""); varString = varString.replace("\n", "\\n"); varString = varString.replace("\t", "\\t"); // final pair of quotes: varString = "\"" + varString + "\""; caseRow[i] = varString; } else { caseRow[i] = "\"\""; } } } finalOut.println(StringUtils.join(caseRow, "\t")); } } long linecount = parser.getRecordNumber(); finalOut.close(); parser.close(); dbglog.fine("Tmp File: " + firstPassTempFile); // Firstpass file is deleted to prevent tmp from filling up. firstPassTempFile.delete(); if (dataTable.getCaseQuantity().intValue() != linecount) { List<String> args = Arrays .asList(new String[] { "" + dataTable.getCaseQuantity().intValue(), "" + linecount }); throw new IOException(BundleUtil.getStringFromBundle("ingest.csv.line_mismatch", args)); } return (int) linecount; }
From source file:nl.utwente.ewi.caes.tactiletriana.simulation.devices.UncontrollableLoad.java
/** * * @param profileNumber - A number between 0 and 5 (inclusive) which selects * the profile data on which this instance is based *//*from w ww .j a v a 2 s .c o m*/ public UncontrollableLoad(int profileNumber, Simulation simulation) { super("Uncontrollable Load", simulation); if (profileNumber < 0 || profileNumber > 5) { throw new IllegalArgumentException("profileNumber must be in the range of 0 to 5"); } this.profileNumber = profileNumber; //Load the profile data into an array from the CSV file containing power consumptions for 6 houses. if (profile == null) { profile = new double[6][525608]; try { File csvData = new File("src/main/resources/datasets/watt_house_profiles_year.csv"); // Jan Harm: je kan gewoon een format aanmaken :) CSVFormat format = CSVFormat.DEFAULT.withDelimiter(';'); CSVParser parser = CSVParser.parse(csvData, Charset.defaultCharset(), format); for (CSVRecord csvRecord : parser) { for (int p = 0; p < 6; p++) { profile[p][(int) parser.getRecordNumber()] = Double.parseDouble(csvRecord.get(p)); } } } catch (IOException e) { throw new RuntimeException("Error while parsing house profile dataset", e); } } }
From source file:org.nuxeo.ecm.csv.core.CSVImporterWork.java
protected long getLineNumber(CSVParser parser) { return parser.getRecordNumber() + 1; }
From source file:org.nuxeo.ecm.user.center.profile.UserProfileImporter.java
public void doImport(CoreSession session, CSVParser parser, UserProfileService userProfileService) throws IOException { log.info(String.format("Importing CSV file: %s", dataFileName)); DocumentType docType = Framework.getLocalService(SchemaManager.class) .getDocumentType(UserProfileConstants.USER_PROFILE_DOCTYPE); if (docType == null) { log.error("The type " + UserProfileConstants.USER_PROFILE_DOCTYPE + " does not exist"); return;/*from w w w. ja v a 2 s .co m*/ } Map<String, Integer> header = parser.getHeaderMap(); if (header == null) { // empty file? log.error("No header line, empty file?"); return; } // find the index for the required name and type values Integer nameIndex = header.get(UserProfileImporter.USER_PROFILE_IMPORTER_USERNAME_COL); if (nameIndex == null) { log.error("Missing 'username' column"); return; } long docsUpdatedCount = 0; totalRecords = parser.getRecordNumber(); try { int batchSize = config.getBatchSize(); long lineNumber = 0; for (CSVRecord record : parser.getRecords()) { lineNumber++; currentRecord = lineNumber; try { if (importLine(record, lineNumber, nameIndex, docType, session, userProfileService, header)) { docsUpdatedCount++; if (docsUpdatedCount % batchSize == 0) { commitOrRollbackTransaction(); startTransaction(); } } } catch (NuxeoException e) { // try next line Throwable unwrappedException = unwrapException(e); logImportError(lineNumber, "Error while importing line: %s", unwrappedException.getMessage()); log.debug(unwrappedException, unwrappedException); } } session.save(); } finally { commitOrRollbackTransaction(); startTransaction(); } log.info(String.format("Done importing %s entries from CSV file: %s", docsUpdatedCount, dataFileName)); }
From source file:testes.CveCsvReader.java
public static void main(String[] args) { int contCandidate = 0, contEntry = 0, contReserved = 0, contReject = 0, contDisputed = 0, contEqual = 0; String last = ""; try {//from ww w. j a v a 2 s. com File csvData = new File("dataset/base_dados_cve.csv"); // File csvDataOut = new File("dataset/cve_out.csv"); // FileWriter outFile = new FileWriter(csvDataOut); // CSVPrinter csvPrinter = new CSVPrinter((Appendable) outFile, CSVFormat.RFC4180); CSVParser parser = CSVParser.parse(csvData, Charset.forName("ISO-8859-1"), CSVFormat.RFC4180); for (CSVRecord csvRecord : parser) { //System.out.println("Nmero de campos: " + csvRecord.size()); //System.out.println(csvRecord.get(0)); if (csvRecord.get(1).equals("Candidate")) { contCandidate++; } else if (csvRecord.get(1).equals("Entry")) { contEntry++; } if (csvRecord.get(2).startsWith("** RESERVED **")) { contReserved++; } else if (csvRecord.get(2).startsWith("** REJECT **")) { contReject++; } else if (csvRecord.get(2).startsWith("** DISPUTED **")) { contDisputed++; } else { if (last.equals(csvRecord.get(2))) { contEqual++; } else { // csvPrinter.printRecord(csvRecord); } last = csvRecord.get(2); } } System.out.println("Nmero de Registros: " + parser.getRecordNumber()); //csvPrinter.close(); } catch (IOException ex) { Logger.getLogger(CveCsvReader.class.getName()).log(Level.SEVERE, null, ex); } System.out.println("Nmero CANDIDATE: " + contCandidate); System.out.println("Nmero ENTRY: " + contEntry); System.out.println("Nmero REJECT: " + contReject); System.out.println("Nmero RESERVED: " + contReserved); System.out.println("Nmero DISPUTED: " + contDisputed); System.out.println("Nmero IGUAIS: " + contEqual); }