List of usage examples for org.apache.commons.csv CSVRecord size
public int size()
From source file:com.kdmanalytics.toif.report.internal.importWizard.TsvImportWizardPage.java
/** * Perform the actual load./*from w w w .ja v a2s. c om*/ * * @return */ public boolean finish() { // Check source file final String name = editor.getStringValue(); setErrorMessage("Importing " + name + " into " + project + "..."); IPath location = new Path(name); File file = location.toFile(); Reader in = null; CSVParser parser = null; try { in = new FileReader(file); CSVFormat format = CSVFormat.EXCEL.withDelimiter('\t').withIgnoreEmptyLines(); parser = new CSVParser(in, format); System.err.println("FILE: " + name); Map<Integer, String> lookup = new HashMap<Integer, String>(); boolean header = true; for (CSVRecord record : parser) { int size = record.size(); IFile ifile = null; String tool = null; String description = null; int line = 0; int offset = 0; int trust = 0; Boolean status = null; int kdmLine = 0; String cwe = null; String sfp = null; // Read the header first if (header) { System.err.print(" "); for (int i = 0; i < size; i++) { if (i > 0) System.err.print(","); String cell = record.get(i); lookup.put(i, cell); System.err.print(cell); } header = false; System.err.println(); System.err.println(" ------------------------------------------"); } // Otherwise this is a data row else { for (int i = 0; i < size; i++) { String cell = record.get(i); String colName = lookup.get(i); if ("Resource".equals(colName)) { IFileGroup group = new FileGroup(cell); try { IResource resource = MemberUtil.findMembers(project, group); if (resource != null) { ifile = (IFile) resource; } } catch (CoreException e) { e.printStackTrace(); } } else if ("SFP".equals(colName)) { sfp = cell; } else if ("CWE".equals(colName)) { cwe = cell; } // Valid is *old* name for "Citing Status" else if ("Valid".equals(colName)) { if (cell != null && !cell.trim().isEmpty()) { status = Boolean.parseBoolean(cell); } } else if ("Citing Status".equals(colName)) { if (cell != null && !cell.trim().isEmpty()) { status = Boolean.parseBoolean(cell); } } else if ("Trust".equals(colName)) { if (cell != null && !cell.trim().isEmpty()) { try { trust = Integer.parseInt(cell); } catch (NumberFormatException e) { } } } else if ("Confidence".equals(colName)) { if (cell != null && !cell.trim().isEmpty()) { try { trust = Integer.parseInt(cell); } catch (NumberFormatException e) { } } } else if ("Line Number".equals(colName)) { if (cell != null && !cell.trim().isEmpty()) { try { line = Integer.parseInt(cell); } catch (NumberFormatException e) { } } } else if ("KDM Line Number".equals(colName)) { if (cell != null && !cell.trim().isEmpty()) { try { kdmLine = Integer.parseInt(cell); } catch (NumberFormatException e) { } } } // "Generator Tool" is *old* name for "SCA Tool" else if ("Generator Tool".equals(colName)) { tool = cell; } else if ("SCA tool".equalsIgnoreCase(colName)) { tool = cell; } else if ("Weakness Description".equals(colName)) { description = cell; } else { System.err.println("WARNING: Unknown column name '" + colName + "'"); } } System.err.print(" "); System.err.print(sfp); System.err.print(","); System.err.print(cwe); System.err.print(","); System.err.print(status); System.err.print(","); System.err.print(trust); System.err.print(","); System.err.print(ifile); System.err.print(","); System.err.print(line); System.err.print(","); System.err.print(kdmLine); System.err.print(","); System.err.print(tool); System.err.print(","); System.err.print(description); System.err.println(); if (ifile != null) { // Create an associated finding. This will allow us to // set the citing status for the finding. If the // finding does not actually exist in the database this information // is still stored in case the finding exists in the future. FindingData finding = new FindingData(ifile, tool, description, line, offset, cwe, sfp); if (status != null) { finding.cite(status); } } } } try { IWorkbenchWindow window = PlatformUI.getWorkbench().getActiveWorkbenchWindow(); if (window != null) { IWorkbenchPage page = window.getActivePage(); if (page != null) { FindingView view = (FindingView) page.showView("com.kdmanalytics.toif.views.FindingView"); view.refresh(); } } } catch (PartInitException e) { e.printStackTrace(); } } catch (IOException e) { e.printStackTrace(); } finally { if (parser != null) { try { parser.close(); } catch (IOException e) { e.printStackTrace(); } } if (in != null) { try { in.close(); } catch (IOException e) { e.printStackTrace(); } } } // PlatformUI.getWorkbench().getDisplay().asyncExec(new Runnable() // { // public void run() // { // final ToifReportImportJob job = new ToifReportImportJob("Import SFP/CWE Data", project, // name); // job.setUser(true); // job.setPriority(Job.BUILD); // job.setRule(project); // job.schedule(); // } // }); return true; }
From source file:com.archimatetool.csv.importer.CSVImporter.java
/** * @param csvRecord/* w w w . ja v a 2 s .co m*/ * @param fields * @return True if csvRecord matches a header with given fields */ private boolean isHeaderRecord(CSVRecord csvRecord, String[] fields) { if (csvRecord.getRecordNumber() != 1 && csvRecord.size() != fields.length) { return false; } for (int i = 0; i < fields.length; i++) { String field = fields[i]; if (!field.equalsIgnoreCase(csvRecord.get(i))) { return false; } } return true; }
From source file:com.adobe.aem.demo.communities.Loader.java
private static List<NameValuePair> buildNVP(CSVRecord record, int start) { List<NameValuePair> nameValuePairs = new ArrayList<NameValuePair>(); nameValuePairs.add(new BasicNameValuePair("_charset_", "UTF-8")); for (int i = start; i < record.size() - 1; i = i + 2) { if (record.get(i) != null && record.get(i + 1) != null && record.get(i).length() > 0) { // We have a non String hint to pass to the POST servlet String name = record.get(i); String value = record.get(i + 1); if (value.equals("TRUE")) { value = "true"; }// w w w . j a va 2s . c o m if (value.equals("FALSE")) { value = "false"; } int hint = name.indexOf("@"); if (hint > 0) { logger.debug(name.substring(0, hint) + "@TypeHint:" + name.substring(1 + hint)); nameValuePairs.add(new BasicNameValuePair(name.substring(0, hint) + "@TypeHint", name.substring(1 + hint))); name = name.substring(0, hint); } else { nameValuePairs.add(new BasicNameValuePair(name + "@TypeHint", "String")); } // We have multiple values to pass to the POST servlet, e.g. for a String[] int multiple = value.indexOf("|"); if (multiple > 0) { List<String> values = Arrays.asList(value.split("\\|", -1)); for (String currentValue : values) { nameValuePairs.add(new BasicNameValuePair(name, currentValue)); logger.debug(name + " " + currentValue); } } else { nameValuePairs.add(new BasicNameValuePair(name, value)); } logger.debug("Setting property " + name + " with value " + value); } } return nameValuePairs; }
From source file:com.datascience.cascading.scheme.CsvScheme.java
/** * Method to validate if input file has same number of columns as headers passed. *//* ww w. java2 s .c om*/ protected boolean validateHeaders(FlowProcess<JobConf> flowProcess, Tap tap) { CSVRecord headerRecord = getHeaderRecord(flowProcess, tap); int i = headerRecord.size(); return headerRecord.size() == format.getHeader().length; }
From source file:com.datascience.cascading.scheme.CsvScheme.java
/** * Checks whether the given fields match the columns in the source header record. *//*ww w . j a v a 2s. com*/ protected boolean doFieldsMatchColumns(FlowProcess<JobConf> flowProcess, Tap tap, Fields sourceFields) { CSVRecord headerRecord = getHeaderRecord(flowProcess, tap); return sourceFields.size() == headerRecord.size(); }
From source file:com.datascience.cascading.scheme.CsvScheme.java
/** * Detects the header fields.// w w w.j av a 2 s . co m */ @SuppressWarnings("unchecked") protected Fields detectHeader(FlowProcess<JobConf> flowProcess, Tap tap, boolean genericNames) { CSVRecord record = getHeaderRecord(flowProcess, tap); String[] fields = new String[record.size()]; for (int i = 0; i < record.size(); i++) { if (genericNames) { fields[i] = String.format("col%d", i); } else { fields[i] = record.get(i); } } return new Fields(fields); }
From source file:com.datascience.cascading.scheme.CsvScheme.java
/** * Method to validate Fields passed present in the headers. *//*from w w w . java 2s . c o m*/ protected boolean validateFields(FlowProcess<JobConf> flowProcess, Tap tap, Fields sourceFields) { CSVRecord headerRecord = getHeaderRecord(flowProcess, tap); if (sourceFields.size() > headerRecord.size()) { return false; } List<String> recordList = new ArrayList<String>(); for (int i = 0; i < headerRecord.size(); i++) { recordList.add(headerRecord.get(i)); } for (int i = 0; i < sourceFields.size(); i++) { if (!recordList.contains(sourceFields.get(i))) { return false; } } return true; }
From source file:edu.harvard.iq.dataverse.ingest.tabulardata.impl.plugins.csv.CSVFileReader.java
public int readFile(BufferedReader csvReader, DataTable dataTable, PrintWriter finalOut) throws IOException { List<DataVariable> variableList = new ArrayList<>(); CSVParser parser = new CSVParser(csvReader, inFormat.withHeader()); Map<String, Integer> headers = parser.getHeaderMap(); int i = 0;// ww w .j a va 2 s .co m for (String varName : headers.keySet()) { if (varName == null || varName.isEmpty()) { // TODO: // Add a sensible variable name validation algorithm. // -- L.A. 4.0 alpha 1 throw new IOException(BundleUtil.getStringFromBundle("ingest.csv.invalidHeader")); } DataVariable dv = new DataVariable(); dv.setName(varName); dv.setLabel(varName); dv.setInvalidRanges(new ArrayList<>()); dv.setSummaryStatistics(new ArrayList<>()); dv.setUnf("UNF:6:NOTCALCULATED"); dv.setCategories(new ArrayList<>()); variableList.add(dv); dv.setTypeCharacter(); dv.setIntervalDiscrete(); dv.setFileOrder(i); dv.setDataTable(dataTable); i++; } dataTable.setVarQuantity((long) variableList.size()); dataTable.setDataVariables(variableList); boolean[] isNumericVariable = new boolean[headers.size()]; boolean[] isIntegerVariable = new boolean[headers.size()]; boolean[] isTimeVariable = new boolean[headers.size()]; boolean[] isDateVariable = new boolean[headers.size()]; for (i = 0; i < headers.size(); i++) { // OK, let's assume that every variable is numeric; // but we'll go through the file and examine every value; the // moment we find a value that's not a legit numeric one, we'll // assume that it is in fact a String. isNumericVariable[i] = true; isIntegerVariable[i] = true; isDateVariable[i] = true; isTimeVariable[i] = true; } // First, "learning" pass. // (we'll save the incoming stream in another temp file:) SimpleDateFormat[] selectedDateTimeFormat = new SimpleDateFormat[headers.size()]; SimpleDateFormat[] selectedDateFormat = new SimpleDateFormat[headers.size()]; File firstPassTempFile = File.createTempFile("firstpass-", ".csv"); try (CSVPrinter csvFilePrinter = new CSVPrinter( // TODO allow other parsers of tabular data to use this parser by changin inFormat new FileWriter(firstPassTempFile.getAbsolutePath()), inFormat)) { //Write headers csvFilePrinter.printRecord(headers.keySet()); for (CSVRecord record : parser.getRecords()) { // Checks if #records = #columns in header if (!record.isConsistent()) { List<String> args = Arrays.asList(new String[] { "" + (parser.getCurrentLineNumber() - 1), "" + headers.size(), "" + record.size() }); throw new IOException(BundleUtil.getStringFromBundle("ingest.csv.recordMismatch", args)); } for (i = 0; i < headers.size(); i++) { String varString = record.get(i); isIntegerVariable[i] = isIntegerVariable[i] && varString != null && (varString.isEmpty() || varString.equals("null") || (firstNumCharSet.contains(varString.charAt(0)) && StringUtils.isNumeric(varString.substring(1)))); if (isNumericVariable[i]) { // If variable might be "numeric" test to see if this value is a parsable number: if (varString != null && !varString.isEmpty()) { boolean isNumeric = false; boolean isInteger = false; if (varString.equalsIgnoreCase("NaN") || varString.equalsIgnoreCase("NA") || varString.equalsIgnoreCase("Inf") || varString.equalsIgnoreCase("+Inf") || varString.equalsIgnoreCase("-Inf") || varString.equalsIgnoreCase("null")) { continue; } else { try { Double testDoubleValue = new Double(varString); continue; } catch (NumberFormatException ex) { // the token failed to parse as a double // so the column is a string variable. } } isNumericVariable[i] = false; } } // If this is not a numeric column, see if it is a date collumn // by parsing the cell as a date or date-time value: if (!isNumericVariable[i]) { Date dateResult = null; if (isTimeVariable[i]) { if (varString != null && !varString.isEmpty()) { boolean isTime = false; if (selectedDateTimeFormat[i] != null) { ParsePosition pos = new ParsePosition(0); dateResult = selectedDateTimeFormat[i].parse(varString, pos); if (dateResult != null && pos.getIndex() == varString.length()) { // OK, successfully parsed a value! isTime = true; } } else { for (SimpleDateFormat format : TIME_FORMATS) { ParsePosition pos = new ParsePosition(0); dateResult = format.parse(varString, pos); if (dateResult != null && pos.getIndex() == varString.length()) { // OK, successfully parsed a value! isTime = true; selectedDateTimeFormat[i] = format; break; } } } if (!isTime) { isTimeVariable[i] = false; // if the token didn't parse as a time value, // we will still try to parse it as a date, below. // unless this column is NOT a date. } else { // And if it is a time value, we are going to assume it's // NOT a date. isDateVariable[i] = false; } } } if (isDateVariable[i]) { if (varString != null && !varString.isEmpty()) { boolean isDate = false; // TODO: // Strictly speaking, we should be doing the same thing // here as with the time formats above; select the // first one that works, then insist that all the // other values in this column match it... but we // only have one, as of now, so it should be ok. // -- L.A. 4.0 beta for (SimpleDateFormat format : DATE_FORMATS) { // Strict parsing - it will throw an // exception if it doesn't parse! format.setLenient(false); try { format.parse(varString); isDate = true; selectedDateFormat[i] = format; break; } catch (ParseException ex) { //Do nothing } } isDateVariable[i] = isDate; } } } } csvFilePrinter.printRecord(record); } } dataTable.setCaseQuantity(parser.getRecordNumber()); parser.close(); csvReader.close(); // Re-type the variables that we've determined are numerics: for (i = 0; i < headers.size(); i++) { if (isNumericVariable[i]) { dataTable.getDataVariables().get(i).setTypeNumeric(); if (isIntegerVariable[i]) { dataTable.getDataVariables().get(i).setIntervalDiscrete(); } else { dataTable.getDataVariables().get(i).setIntervalContinuous(); } } else if (isDateVariable[i] && selectedDateFormat[i] != null) { // Dates are still Strings, i.e., they are "character" and "discrete"; // But we add special format values for them: dataTable.getDataVariables().get(i).setFormat(DATE_FORMATS[0].toPattern()); dataTable.getDataVariables().get(i).setFormatCategory("date"); } else if (isTimeVariable[i] && selectedDateTimeFormat[i] != null) { // Same for time values: dataTable.getDataVariables().get(i).setFormat(selectedDateTimeFormat[i].toPattern()); dataTable.getDataVariables().get(i).setFormatCategory("time"); } } // Second, final pass. try (BufferedReader secondPassReader = new BufferedReader(new FileReader(firstPassTempFile))) { parser = new CSVParser(secondPassReader, inFormat.withHeader()); String[] caseRow = new String[headers.size()]; for (CSVRecord record : parser) { if (!record.isConsistent()) { List<String> args = Arrays.asList(new String[] { "" + (parser.getCurrentLineNumber() - 1), "" + headers.size(), "" + record.size() }); throw new IOException(BundleUtil.getStringFromBundle("ingest.csv.recordMismatch", args)); } for (i = 0; i < headers.size(); i++) { String varString = record.get(i); if (isNumericVariable[i]) { if (varString == null || varString.isEmpty() || varString.equalsIgnoreCase("NA")) { // Missing value - represented as an empty string in // the final tab file caseRow[i] = ""; } else if (varString.equalsIgnoreCase("NaN")) { // "Not a Number" special value: caseRow[i] = "NaN"; } else if (varString.equalsIgnoreCase("Inf") || varString.equalsIgnoreCase("+Inf")) { // Positive infinity: caseRow[i] = "Inf"; } else if (varString.equalsIgnoreCase("-Inf")) { // Negative infinity: caseRow[i] = "-Inf"; } else if (varString.equalsIgnoreCase("null")) { // By request from Gus - "NULL" is recognized as a // numeric zero: caseRow[i] = isIntegerVariable[i] ? "0" : "0.0"; } else { /* No re-formatting is done on any other numeric values. * We'll save them as they were, for archival purposes. * The alternative solution - formatting in sci. notation * is commented-out below. */ caseRow[i] = varString; /* if (isIntegerVariable[i]) { try { Integer testIntegerValue = new Integer(varString); caseRow[i] = testIntegerValue.toString(); } catch (NumberFormatException ex) { throw new IOException("Failed to parse a value recognized as an integer in the first pass! (?)"); } } else { try { Double testDoubleValue = new Double(varString); if (testDoubleValue.equals(0.0)) { caseRow[i] = "0.0"; } else { // One possible implementation: // // Round our fractional values to 15 digits // (minimum number of digits of precision guaranteed by // type Double) and format the resulting representations // in a IEEE 754-like "scientific notation" - for ex., // 753.24 will be encoded as 7.5324e2 BigDecimal testBigDecimal = new BigDecimal(varString, doubleMathContext); caseRow[i] = String.format(FORMAT_IEEE754, testBigDecimal); // Strip meaningless zeros and extra + signs: caseRow[i] = caseRow[i].replaceFirst("00*e", "e"); caseRow[i] = caseRow[i].replaceFirst("\\.e", ".0e"); caseRow[i] = caseRow[i].replaceFirst("e\\+00", ""); caseRow[i] = caseRow[i].replaceFirst("^\\+", ""); } } catch (NumberFormatException ex) { throw new IOException("Failed to parse a value recognized as numeric in the first pass! (?)"); } } */ } } else if (isTimeVariable[i] || isDateVariable[i]) { // Time and Dates are stored NOT quoted (don't ask). if (varString != null) { // Dealing with quotes: // remove the leading and trailing quotes, if present: varString = varString.replaceFirst("^\"*", ""); varString = varString.replaceFirst("\"*$", ""); caseRow[i] = varString; } else { caseRow[i] = ""; } } else { // Treat as a String: // Strings are stored in tab files quoted; // Missing values are stored as an empty string // between two tabs (or one tab and the new line); // Empty strings stored as "" (quoted empty string). // For the purposes of this CSV ingest reader, we are going // to assume that all the empty strings in the file are // indeed empty strings, and NOT missing values: if (varString != null) { // escape the quotes, newlines, and tabs: varString = varString.replace("\"", "\\\""); varString = varString.replace("\n", "\\n"); varString = varString.replace("\t", "\\t"); // final pair of quotes: varString = "\"" + varString + "\""; caseRow[i] = varString; } else { caseRow[i] = "\"\""; } } } finalOut.println(StringUtils.join(caseRow, "\t")); } } long linecount = parser.getRecordNumber(); finalOut.close(); parser.close(); dbglog.fine("Tmp File: " + firstPassTempFile); // Firstpass file is deleted to prevent tmp from filling up. firstPassTempFile.delete(); if (dataTable.getCaseQuantity().intValue() != linecount) { List<String> args = Arrays .asList(new String[] { "" + dataTable.getCaseQuantity().intValue(), "" + linecount }); throw new IOException(BundleUtil.getStringFromBundle("ingest.csv.line_mismatch", args)); } return (int) linecount; }
From source file:com.kdmanalytics.toif.ui.common.AdaptorConfiguration.java
/** * Parse the header row/*from w w w . ja v a 2s . c o m*/ * * @param record */ private void parseHeader(CSVRecord record) { int size = record.size(); headers = new LinkedList<String>(); // Import the cells for (int i = 0; i < size; i++) { String text = record.get(i); headers.add(text); if (COLUMN_SFP_STRING.equalsIgnoreCase(text)) COLUMN_SFP = i; else if (COLUMN_CWE_STRING.equalsIgnoreCase(text)) COLUMN_CWE = i; else if (COLUMN_SHOW_STRING.equalsIgnoreCase(text)) COLUMN_SHOW = i; else if (COLUMN_SHOW_STRING_OLD.equalsIgnoreCase(text)) COLUMN_SHOW = i; else if (COLUMN_CPPCHECK_STRING.equalsIgnoreCase(text)) COLUMN_CPPCHECK = i; else if (COLUMN_RATS_STRING.equalsIgnoreCase(text)) COLUMN_RATS = i; else if (COLUMN_SPLINT_STRING.equalsIgnoreCase(text)) COLUMN_SPLINT = i; else if (COLUMN_JLINT_STRING.equalsIgnoreCase(text)) COLUMN_JLINT = i; else if (COLUMN_FINDBUGS_STRING.equalsIgnoreCase(text)) COLUMN_FINDBUGS = i; else if (COLUMN_COUNT_C_STRING1.equalsIgnoreCase(text)) { COLUMN_COUNT_C = i; // Convert to new name text = COLUMN_COUNT_C_STRING2; } else if (COLUMN_COUNT_JAVA_STRING1.equalsIgnoreCase(text)) { COLUMN_COUNT_JAVA = i; // Convert to new name text = COLUMN_COUNT_JAVA_STRING2; } else if (COLUMN_COUNT_C_STRING2.equalsIgnoreCase(text)) COLUMN_COUNT_C = i; else if (COLUMN_COUNT_JAVA_STRING2.equalsIgnoreCase(text)) COLUMN_COUNT_JAVA = i; else { extraColumns.add(text); } columnMap.put(text.toLowerCase(), i); } }
From source file:com.kdmanalytics.toif.ui.common.AdaptorConfiguration.java
/** * Parse the record as a row of data/*from w w w.ja v a2 s . c o m*/ * * @param record * @param rcount * @return */ private int parseData(CSVRecord record, int rcount) { int size = record.size(); List<Object> row = new LinkedList<Object>(); // Import the cells for (int i = 0; i < size; i++) { String text = record.get(i); row.add(getCell(i, text)); } if (row.size() > COLUMN_CWE) { String cwe = (String) row.get(COLUMN_CWE); // Fix the CWE ID and replace the value cwe = fixSfpCweIdentifier(cwe); row.remove(COLUMN_CWE); row.add(COLUMN_CWE, cwe); String sfp = (String) row.get(COLUMN_SFP); // Fix the CWE ID and replace the value sfp = fixSfpCweIdentifier(sfp); row.remove(COLUMN_SFP); row.add(COLUMN_SFP, sfp); // Only add a new row if this is a non-empty row and the CWE // does not exist in the map yet. if (!cwe.isEmpty() && !rowMap.containsKey(cwe)) { data.add(row); rowMap.put(cwe, rcount); sfpMap.put(cwe, (String) row.get(COLUMN_SFP)); ShowField showState = (ShowField) row.get(COLUMN_SHOW); visibilityMap.put((String) row.get(COLUMN_CWE), showState.toBoolean()); // We just added a new row rcount++; dirty = true; } } return rcount; }