Example usage for org.apache.commons.csv CSVParser getRecordNumber

List of usage examples for org.apache.commons.csv CSVParser getRecordNumber

Introduction

In this page you can find the example usage for org.apache.commons.csv CSVParser getRecordNumber.

Prototype

public long getRecordNumber() 

Source Link

Document

Returns the current record number in the input stream.

Usage

From source file:ch.silviowangler.i18n.ResourceBundler.java

public void generateResourceBundle() throws IOException {

    CSVParser records = CSVFormat.RFC4180.withDelimiter(separator.charAt(0)).withFirstRecordAsHeader()
            .withQuoteMode(QuoteMode.ALL)
            .parse(new InputStreamReader(new FileInputStream(this.csvFile), this.inputEncoding));

    final Map<String, Integer> headers = records.getHeaderMap();

    processHeader(headers.keySet());//  w w w.  ja  va  2  s.  com

    for (CSVRecord record : records) {
        processData(record);
    }

    final int propertiesFilesAmount = this.propertiesStore.size();
    LOGGER.info("Will generate {} properties files with {} records each", propertiesFilesAmount,
            records.getRecordNumber());

    // Properties Dateien schreiben
    for (int i = 0; i < propertiesFilesAmount; i++) {
        Map<String, String> properties = this.propertiesStore.get(i);
        File outputFile = new File(this.outputDir,
                this.bundleBaseName + "_" + this.languages.get(i) + ".properties");

        LOGGER.info("Writing {} to {}", outputFile.getName(), outputFile.getParentFile().getAbsolutePath());

        FileOutputStream outputStream = new FileOutputStream(outputFile);

        try (OutputStreamWriter writer = new OutputStreamWriter(outputStream,
                this.native2ascii ? Consts.ASCII : this.outputEncoding)) {
            properties.forEach((key, value) -> {
                try {
                    writer.append(key).append("=").append(value).append("\n");
                } catch (IOException e) {
                    e.printStackTrace();
                }
            });
            writer.flush();
        }
    }
}

From source file:edu.harvard.iq.dataverse.ingest.tabulardata.impl.plugins.csv.CSVFileReader.java

public int readFile(BufferedReader csvReader, DataTable dataTable, PrintWriter finalOut) throws IOException {

    List<DataVariable> variableList = new ArrayList<>();
    CSVParser parser = new CSVParser(csvReader, inFormat.withHeader());
    Map<String, Integer> headers = parser.getHeaderMap();

    int i = 0;/*  www . j a  va  2 s  . c  om*/
    for (String varName : headers.keySet()) {
        if (varName == null || varName.isEmpty()) {
            // TODO:
            // Add a sensible variable name validation algorithm.
            // -- L.A. 4.0 alpha 1
            throw new IOException(BundleUtil.getStringFromBundle("ingest.csv.invalidHeader"));
        }

        DataVariable dv = new DataVariable();
        dv.setName(varName);
        dv.setLabel(varName);
        dv.setInvalidRanges(new ArrayList<>());
        dv.setSummaryStatistics(new ArrayList<>());
        dv.setUnf("UNF:6:NOTCALCULATED");
        dv.setCategories(new ArrayList<>());
        variableList.add(dv);

        dv.setTypeCharacter();
        dv.setIntervalDiscrete();
        dv.setFileOrder(i);
        dv.setDataTable(dataTable);
        i++;
    }

    dataTable.setVarQuantity((long) variableList.size());
    dataTable.setDataVariables(variableList);

    boolean[] isNumericVariable = new boolean[headers.size()];
    boolean[] isIntegerVariable = new boolean[headers.size()];
    boolean[] isTimeVariable = new boolean[headers.size()];
    boolean[] isDateVariable = new boolean[headers.size()];

    for (i = 0; i < headers.size(); i++) {
        // OK, let's assume that every variable is numeric;
        // but we'll go through the file and examine every value; the
        // moment we find a value that's not a legit numeric one, we'll
        // assume that it is in fact a String.
        isNumericVariable[i] = true;
        isIntegerVariable[i] = true;
        isDateVariable[i] = true;
        isTimeVariable[i] = true;
    }

    // First, "learning" pass.
    // (we'll save the incoming stream in another temp file:)
    SimpleDateFormat[] selectedDateTimeFormat = new SimpleDateFormat[headers.size()];
    SimpleDateFormat[] selectedDateFormat = new SimpleDateFormat[headers.size()];

    File firstPassTempFile = File.createTempFile("firstpass-", ".csv");

    try (CSVPrinter csvFilePrinter = new CSVPrinter(
            // TODO allow other parsers of tabular data to use this parser by changin inFormat
            new FileWriter(firstPassTempFile.getAbsolutePath()), inFormat)) {
        //Write  headers
        csvFilePrinter.printRecord(headers.keySet());
        for (CSVRecord record : parser.getRecords()) {
            // Checks if #records = #columns in header
            if (!record.isConsistent()) {
                List<String> args = Arrays.asList(new String[] { "" + (parser.getCurrentLineNumber() - 1),
                        "" + headers.size(), "" + record.size() });
                throw new IOException(BundleUtil.getStringFromBundle("ingest.csv.recordMismatch", args));
            }

            for (i = 0; i < headers.size(); i++) {
                String varString = record.get(i);
                isIntegerVariable[i] = isIntegerVariable[i] && varString != null
                        && (varString.isEmpty() || varString.equals("null")
                                || (firstNumCharSet.contains(varString.charAt(0))
                                        && StringUtils.isNumeric(varString.substring(1))));
                if (isNumericVariable[i]) {
                    // If variable might be "numeric" test to see if this value is a parsable number:
                    if (varString != null && !varString.isEmpty()) {

                        boolean isNumeric = false;
                        boolean isInteger = false;

                        if (varString.equalsIgnoreCase("NaN") || varString.equalsIgnoreCase("NA")
                                || varString.equalsIgnoreCase("Inf") || varString.equalsIgnoreCase("+Inf")
                                || varString.equalsIgnoreCase("-Inf") || varString.equalsIgnoreCase("null")) {
                            continue;
                        } else {
                            try {
                                Double testDoubleValue = new Double(varString);
                                continue;
                            } catch (NumberFormatException ex) {
                                // the token failed to parse as a double
                                // so the column is a string variable.
                            }
                        }
                        isNumericVariable[i] = false;
                    }
                }

                // If this is not a numeric column, see if it is a date collumn
                // by parsing the cell as a date or date-time value:
                if (!isNumericVariable[i]) {

                    Date dateResult = null;

                    if (isTimeVariable[i]) {
                        if (varString != null && !varString.isEmpty()) {
                            boolean isTime = false;

                            if (selectedDateTimeFormat[i] != null) {
                                ParsePosition pos = new ParsePosition(0);
                                dateResult = selectedDateTimeFormat[i].parse(varString, pos);

                                if (dateResult != null && pos.getIndex() == varString.length()) {
                                    // OK, successfully parsed a value!
                                    isTime = true;
                                }
                            } else {
                                for (SimpleDateFormat format : TIME_FORMATS) {
                                    ParsePosition pos = new ParsePosition(0);
                                    dateResult = format.parse(varString, pos);
                                    if (dateResult != null && pos.getIndex() == varString.length()) {
                                        // OK, successfully parsed a value!
                                        isTime = true;
                                        selectedDateTimeFormat[i] = format;
                                        break;
                                    }
                                }
                            }
                            if (!isTime) {
                                isTimeVariable[i] = false;
                                // if the token didn't parse as a time value,
                                // we will still try to parse it as a date, below.
                                // unless this column is NOT a date.
                            } else {
                                // And if it is a time value, we are going to assume it's
                                // NOT a date.
                                isDateVariable[i] = false;
                            }
                        }
                    }

                    if (isDateVariable[i]) {
                        if (varString != null && !varString.isEmpty()) {
                            boolean isDate = false;

                            // TODO:
                            // Strictly speaking, we should be doing the same thing
                            // here as with the time formats above; select the
                            // first one that works, then insist that all the
                            // other values in this column match it... but we
                            // only have one, as of now, so it should be ok.
                            // -- L.A. 4.0 beta
                            for (SimpleDateFormat format : DATE_FORMATS) {
                                // Strict parsing - it will throw an
                                // exception if it doesn't parse!
                                format.setLenient(false);
                                try {
                                    format.parse(varString);
                                    isDate = true;
                                    selectedDateFormat[i] = format;
                                    break;
                                } catch (ParseException ex) {
                                    //Do nothing
                                }
                            }
                            isDateVariable[i] = isDate;
                        }
                    }
                }
            }

            csvFilePrinter.printRecord(record);
        }
    }
    dataTable.setCaseQuantity(parser.getRecordNumber());
    parser.close();
    csvReader.close();

    // Re-type the variables that we've determined are numerics:
    for (i = 0; i < headers.size(); i++) {
        if (isNumericVariable[i]) {
            dataTable.getDataVariables().get(i).setTypeNumeric();

            if (isIntegerVariable[i]) {
                dataTable.getDataVariables().get(i).setIntervalDiscrete();
            } else {
                dataTable.getDataVariables().get(i).setIntervalContinuous();
            }
        } else if (isDateVariable[i] && selectedDateFormat[i] != null) {
            // Dates are still Strings, i.e., they are "character" and "discrete";
            // But we add special format values for them:
            dataTable.getDataVariables().get(i).setFormat(DATE_FORMATS[0].toPattern());
            dataTable.getDataVariables().get(i).setFormatCategory("date");
        } else if (isTimeVariable[i] && selectedDateTimeFormat[i] != null) {
            // Same for time values:
            dataTable.getDataVariables().get(i).setFormat(selectedDateTimeFormat[i].toPattern());
            dataTable.getDataVariables().get(i).setFormatCategory("time");
        }
    }
    // Second, final pass.
    try (BufferedReader secondPassReader = new BufferedReader(new FileReader(firstPassTempFile))) {
        parser = new CSVParser(secondPassReader, inFormat.withHeader());
        String[] caseRow = new String[headers.size()];

        for (CSVRecord record : parser) {
            if (!record.isConsistent()) {
                List<String> args = Arrays.asList(new String[] { "" + (parser.getCurrentLineNumber() - 1),
                        "" + headers.size(), "" + record.size() });
                throw new IOException(BundleUtil.getStringFromBundle("ingest.csv.recordMismatch", args));
            }

            for (i = 0; i < headers.size(); i++) {
                String varString = record.get(i);
                if (isNumericVariable[i]) {
                    if (varString == null || varString.isEmpty() || varString.equalsIgnoreCase("NA")) {
                        // Missing value - represented as an empty string in
                        // the final tab file
                        caseRow[i] = "";
                    } else if (varString.equalsIgnoreCase("NaN")) {
                        // "Not a Number" special value:
                        caseRow[i] = "NaN";
                    } else if (varString.equalsIgnoreCase("Inf") || varString.equalsIgnoreCase("+Inf")) {
                        // Positive infinity:
                        caseRow[i] = "Inf";
                    } else if (varString.equalsIgnoreCase("-Inf")) {
                        // Negative infinity:
                        caseRow[i] = "-Inf";
                    } else if (varString.equalsIgnoreCase("null")) {
                        // By request from Gus - "NULL" is recognized as a
                        // numeric zero:
                        caseRow[i] = isIntegerVariable[i] ? "0" : "0.0";
                    } else {
                        /* No re-formatting is done on any other numeric values.
                         * We'll save them as they were, for archival purposes.
                         * The alternative solution - formatting in sci. notation
                         * is commented-out below.
                         */
                        caseRow[i] = varString;
                        /*
                         if (isIntegerVariable[i]) {
                        try {
                            Integer testIntegerValue = new Integer(varString);
                            caseRow[i] = testIntegerValue.toString();
                        } catch (NumberFormatException ex) {
                            throw new IOException("Failed to parse a value recognized as an integer in the first pass! (?)");
                        }
                        } else {
                        try {
                            Double testDoubleValue = new Double(varString);
                            if (testDoubleValue.equals(0.0)) {
                                caseRow[i] = "0.0";
                            } else {
                                                                    // One possible implementation:
                                //
                                // Round our fractional values to 15 digits
                                // (minimum number of digits of precision guaranteed by
                                // type Double) and format the resulting representations
                                // in a IEEE 754-like "scientific notation" - for ex.,
                                // 753.24 will be encoded as 7.5324e2
                                BigDecimal testBigDecimal = new BigDecimal(varString, doubleMathContext);
                                caseRow[i] = String.format(FORMAT_IEEE754, testBigDecimal);
                                
                                // Strip meaningless zeros and extra + signs:
                                caseRow[i] = caseRow[i].replaceFirst("00*e", "e");
                                caseRow[i] = caseRow[i].replaceFirst("\\.e", ".0e");
                                caseRow[i] = caseRow[i].replaceFirst("e\\+00", "");
                                caseRow[i] = caseRow[i].replaceFirst("^\\+", "");
                            }
                        } catch (NumberFormatException ex) {
                            throw new IOException("Failed to parse a value recognized as numeric in the first pass! (?)");
                        }
                        }
                         */
                    }
                } else if (isTimeVariable[i] || isDateVariable[i]) {
                    // Time and Dates are stored NOT quoted (don't ask).
                    if (varString != null) {
                        // Dealing with quotes:
                        // remove the leading and trailing quotes, if present:
                        varString = varString.replaceFirst("^\"*", "");
                        varString = varString.replaceFirst("\"*$", "");
                        caseRow[i] = varString;
                    } else {
                        caseRow[i] = "";
                    }
                } else {
                    // Treat as a String:
                    // Strings are stored in tab files quoted;
                    // Missing values are stored as an empty string
                    // between two tabs (or one tab and the new line);
                    // Empty strings stored as "" (quoted empty string).
                    // For the purposes  of this CSV ingest reader, we are going
                    // to assume that all the empty strings in the file are
                    // indeed empty strings, and NOT missing values:
                    if (varString != null) {
                        // escape the quotes, newlines, and tabs:
                        varString = varString.replace("\"", "\\\"");
                        varString = varString.replace("\n", "\\n");
                        varString = varString.replace("\t", "\\t");
                        // final pair of quotes:
                        varString = "\"" + varString + "\"";
                        caseRow[i] = varString;
                    } else {
                        caseRow[i] = "\"\"";
                    }
                }
            }
            finalOut.println(StringUtils.join(caseRow, "\t"));
        }
    }
    long linecount = parser.getRecordNumber();
    finalOut.close();
    parser.close();
    dbglog.fine("Tmp File: " + firstPassTempFile);
    // Firstpass file is deleted to prevent tmp from filling up.
    firstPassTempFile.delete();
    if (dataTable.getCaseQuantity().intValue() != linecount) {
        List<String> args = Arrays
                .asList(new String[] { "" + dataTable.getCaseQuantity().intValue(), "" + linecount });
        throw new IOException(BundleUtil.getStringFromBundle("ingest.csv.line_mismatch", args));
    }
    return (int) linecount;
}

From source file:nl.utwente.ewi.caes.tactiletriana.simulation.devices.UncontrollableLoad.java

/**
 *
 * @param profileNumber - A number between 0 and 5 (inclusive) which selects
 * the profile data on which this instance is based
 *//*from  w ww  .j  a  v a  2  s  .c o  m*/
public UncontrollableLoad(int profileNumber, Simulation simulation) {
    super("Uncontrollable Load", simulation);

    if (profileNumber < 0 || profileNumber > 5) {
        throw new IllegalArgumentException("profileNumber must be in the range of 0 to 5");
    }

    this.profileNumber = profileNumber;

    //Load the profile data into an array from the CSV file containing power consumptions for 6 houses.
    if (profile == null) {
        profile = new double[6][525608];
        try {
            File csvData = new File("src/main/resources/datasets/watt_house_profiles_year.csv");
            // Jan Harm: je kan gewoon een format aanmaken :)
            CSVFormat format = CSVFormat.DEFAULT.withDelimiter(';');
            CSVParser parser = CSVParser.parse(csvData, Charset.defaultCharset(), format);
            for (CSVRecord csvRecord : parser) {
                for (int p = 0; p < 6; p++) {
                    profile[p][(int) parser.getRecordNumber()] = Double.parseDouble(csvRecord.get(p));
                }
            }
        } catch (IOException e) {
            throw new RuntimeException("Error while parsing house profile dataset", e);
        }
    }
}

From source file:org.nuxeo.ecm.csv.core.CSVImporterWork.java

protected long getLineNumber(CSVParser parser) {
    return parser.getRecordNumber() + 1;
}

From source file:org.nuxeo.ecm.user.center.profile.UserProfileImporter.java

public void doImport(CoreSession session, CSVParser parser, UserProfileService userProfileService)
        throws IOException {
    log.info(String.format("Importing CSV file: %s", dataFileName));

    DocumentType docType = Framework.getLocalService(SchemaManager.class)
            .getDocumentType(UserProfileConstants.USER_PROFILE_DOCTYPE);
    if (docType == null) {
        log.error("The type " + UserProfileConstants.USER_PROFILE_DOCTYPE + " does not exist");
        return;/*from   w  w w. ja  v a 2  s .co  m*/
    }

    Map<String, Integer> header = parser.getHeaderMap();

    if (header == null) {
        // empty file?
        log.error("No header line, empty file?");
        return;
    }

    // find the index for the required name and type values
    Integer nameIndex = header.get(UserProfileImporter.USER_PROFILE_IMPORTER_USERNAME_COL);
    if (nameIndex == null) {
        log.error("Missing 'username' column");
        return;
    }

    long docsUpdatedCount = 0;
    totalRecords = parser.getRecordNumber();
    try {
        int batchSize = config.getBatchSize();
        long lineNumber = 0;

        for (CSVRecord record : parser.getRecords()) {
            lineNumber++;
            currentRecord = lineNumber;

            try {
                if (importLine(record, lineNumber, nameIndex, docType, session, userProfileService, header)) {
                    docsUpdatedCount++;
                    if (docsUpdatedCount % batchSize == 0) {
                        commitOrRollbackTransaction();
                        startTransaction();
                    }
                }
            } catch (NuxeoException e) {
                // try next line
                Throwable unwrappedException = unwrapException(e);
                logImportError(lineNumber, "Error while importing line: %s", unwrappedException.getMessage());
                log.debug(unwrappedException, unwrappedException);
            }
        }

        session.save();
    } finally {
        commitOrRollbackTransaction();
        startTransaction();
    }
    log.info(String.format("Done importing %s entries from CSV file: %s", docsUpdatedCount, dataFileName));
}

From source file:testes.CveCsvReader.java

public static void main(String[] args) {
    int contCandidate = 0, contEntry = 0, contReserved = 0, contReject = 0, contDisputed = 0, contEqual = 0;
    String last = "";

    try {//from ww w. j  a  v  a 2 s. com
        File csvData = new File("dataset/base_dados_cve.csv");
        //            File csvDataOut = new File("dataset/cve_out.csv");
        //            FileWriter outFile = new FileWriter(csvDataOut);
        //            CSVPrinter csvPrinter = new CSVPrinter((Appendable) outFile, CSVFormat.RFC4180);

        CSVParser parser = CSVParser.parse(csvData, Charset.forName("ISO-8859-1"), CSVFormat.RFC4180);
        for (CSVRecord csvRecord : parser) {
            //System.out.println("Nmero de campos: " + csvRecord.size());
            //System.out.println(csvRecord.get(0));
            if (csvRecord.get(1).equals("Candidate")) {
                contCandidate++;
            } else if (csvRecord.get(1).equals("Entry")) {
                contEntry++;
            }

            if (csvRecord.get(2).startsWith("** RESERVED **")) {
                contReserved++;
            } else if (csvRecord.get(2).startsWith("** REJECT **")) {
                contReject++;
            } else if (csvRecord.get(2).startsWith("** DISPUTED **")) {
                contDisputed++;
            } else {
                if (last.equals(csvRecord.get(2))) {
                    contEqual++;
                } else {
                    //                      csvPrinter.printRecord(csvRecord);
                }

                last = csvRecord.get(2);
            }
        }
        System.out.println("Nmero de Registros: " + parser.getRecordNumber());
        //csvPrinter.close();

    } catch (IOException ex) {
        Logger.getLogger(CveCsvReader.class.getName()).log(Level.SEVERE, null, ex);
    }
    System.out.println("Nmero CANDIDATE: " + contCandidate);
    System.out.println("Nmero ENTRY: " + contEntry);

    System.out.println("Nmero REJECT: " + contReject);
    System.out.println("Nmero RESERVED: " + contReserved);
    System.out.println("Nmero DISPUTED: " + contDisputed);

    System.out.println("Nmero IGUAIS: " + contEqual);
}