List of usage examples for org.apache.commons.csv CSVRecord size
public int size()
From source file:org.thegalactic.context.io.ContextSerializerCsv.java
/** * Read a context from a csv file./* ww w . j ava 2s . com*/ * * The following format is respected: * * The first line contains the attribute names, the other lines contains the * observations identifier followed by boolean values * * ~~~ * "",a,b,c,d,e * 1,1,0,1,0,0 * 2,1,1,0,0,0 * 3,0,1,0,1,1 * 4,0,0,1,0,1 * ~~~ * * If the first attribute is the empty string, the first column corresponds * to the individual identifiers. In the other case, the individual * identifiers will be generated by successive integers. * * ~~~ * a,b,c,d,e * 1,0,1,0,0 * 1,1,0,0,0 * 0,1,0,1,1 * 0,0,1,0,1 * ~~~ * * @param context a context to read * @param file a file * * @throws IOException When an IOException occurs */ public void read(Context context, BufferedReader file) throws IOException { // Parse the file CSVParser parser = CSVFormat.RFC4180.parse(file); // Get the records and record size List<CSVRecord> records = parser.getRecords(); int length = records.size(); // Verify length if (length == 0) { throw new IOException("CSV cannot be empty"); } // Get the attributes and the attribute size CSVRecord attributes = records.get(0); int size = attributes.size(); // Detect invalid attribute size if (size == 1 && attributes.get(0).equals("")) { throw new IOException("Attribute size cannot be 0"); } // Index of the first attribute int first = 0; if (attributes.get(0).equals("")) { first = 1; } // Get the attributes for (int i = first; i < size; i++) { String attribute = attributes.get(i); // Detect duplicated attribute if (!context.addToAttributes(attribute)) { throw new IOException("Duplicated attribute"); } // Detect empty attribute if ("".equals(attribute)) { throw new IOException("Empty attribute"); } } // Get the data for (int j = 1; j < length; j++) { // Get the current record CSVRecord record = records.get(j); // Detect incorrect size if (record.size() != size) { throw new IOException("Line does not have the correct number of attributes"); } // Get the observation identifier String identifier; if (first == 1) { identifier = record.get(0); } else { identifier = String.valueOf(j); } // Detect duplicated identifier if (!context.addToObservations(identifier)) { throw new IOException("Duplicated identifier"); } // Add the extent/intent for the current identifier and current attribute for (int i = first; i < size; i++) { if (record.get(i).equals("1")) { context.addExtentIntent(identifier, attributes.get(i)); } } } // Close the parser parser.close(); context.setBitSets(); }
From source file:org.totschnig.myexpenses.task.CsvImportTask.java
private String saveGetFromRecord(CSVRecord record, int index) { return record.size() > index ? record.get(index).trim() : ""; }
From source file:org.transitime.utils.csv.CsvBaseReader.java
/** * Parse the CSV file. Reads in the header info and then each line. Calls * the abstract handleRecord() method for each record. Adds each resulting * CSV object to the gtfsObjecgts array. *//*w ww . j a v a2 s . c o m*/ private void parse() { CSVRecord record = null; try { IntervalTimer timer = new IntervalTimer(); logger.debug("Parsing CSV file {} ...", fileName); // Open the file for reading. Use UTF-8 format since that will work // for both regular ASCII format and UTF-8 extended format files // since UTF-8 was designed to be backwards compatible with ASCII. // This way will work for Chinese and other character sets. Use // InputStreamReader so can specify that using UTF-8 format. Use // BufferedReader so that can determine if first character is an // optional BOM (Byte Order Mark) character used to indicate that // file is in UTF-8 format. BufferedReader allows us to read in // first character and then discard if it is a BOM character or // reset the reader to back to the beginning if it is not. This // way the CSV parser will process the file starting with the first // true character. Reader in = new BufferedReader(new InputStreamReader(new FileInputStream(fileName), "UTF-8")); // Deal with the possible BOM character at the beginning of the file in.mark(1); int firstRead = in.read(); final int BOM_CHARACTER = 0xFEFF; if (firstRead != BOM_CHARACTER) in.reset(); // Get ready to parse the CSV file. // Allow lines to be comments if they start with "-" so that can // easily comment out problems and also test what happens when // certain data is missing. Using the '-' character so can // comment out line that starts with "--", which is what is // used for SQL. CSVFormat formatter = CSVFormat.DEFAULT.withHeader().withCommentMarker('-'); // Parse the file Iterable<CSVRecord> records = formatter.parse(in); logger.debug("Finished CSV parsing of file {}. Took {} msec.", fileName, timer.elapsedMsec()); int lineNumberWhenLogged = 0; timer = new IntervalTimer(); IntervalTimer loggingTimer = new IntervalTimer(); Iterator<CSVRecord> iterator = records.iterator(); while (iterator.hasNext()) { // Determine the record to process record = iterator.next(); // If blank line then skip it. This way avoid error messages since // expected data column won't exist if (record.size() == 0) continue; // Process the record using appropriate handler // and create the corresponding CSV object T gtfsObject; try { gtfsObject = handleRecord(record, supplemental); } catch (ParseException e) { logger.error("ParseException occurred for record {} " + "(comment lines not included when determing record #) for " + "filename {} . {}", record.getRecordNumber(), fileName, e.getMessage()); // Continue even though there was an error so that all errors // logged at once. continue; } catch (NumberFormatException e) { logger.error("NumberFormatException occurred for record {} " + "(comment lines not included when determing record #) " + "for filename {} . {}", record.getRecordNumber(), fileName, e.getMessage()); // Continue even though there was an error so that all errors // logged at once. continue; } // Add the newly created CSV object to the object list if (gtfsObject != null) gtfsObjects.add(gtfsObject); // Log info if it has been a while. Check only every 20,000 // lines to see if the 10 seconds has gone by. If so, then log // number of lines. By only looking at timer every 20,000 lines // not slowing things down by for every line doing system call // for to get current time. final int LINES_TO_PROCESS_BEFORE_CHECKING_IF_SHOULD_LOG = 20000; final long SECONDS_ELSAPSED_UNTIL_SHOULD_LOG = 5; if (record.getRecordNumber() >= lineNumberWhenLogged + LINES_TO_PROCESS_BEFORE_CHECKING_IF_SHOULD_LOG) { lineNumberWhenLogged = (int) record.getRecordNumber(); if (loggingTimer.elapsedMsec() > SECONDS_ELSAPSED_UNTIL_SHOULD_LOG * Time.MS_PER_SEC) { logger.info(" Processed {} lines. Took {} msec...", lineNumberWhenLogged, timer.elapsedMsec()); loggingTimer = new IntervalTimer(); } } } // End of while iterating over records // Close up the file reader in.close(); // Determine number of records for logging message long numberRecords = 0; if (record != null) numberRecords = record.getRecordNumber(); logger.info("Finished parsing {} records from file {} . Took {} msec.", numberRecords, fileName, timer.elapsedMsec()); } catch (FileNotFoundException e) { if (required) logger.error("Required CSV file {} not found.", fileName); else logger.info("CSV file {} not found but OK because this file " + "not required.", fileName); } catch (IOException e) { logger.error("IOException occurred when reading in filename {}.", fileName, e); } }
From source file:org.wheatinitiative.vitro.webapp.ontology.update.OntologyChangeParser.java
/** * @param args//from ww w . java2 s . c o m * @throws IOException */ @SuppressWarnings({ "unchecked", "null", "static-access" }) public ArrayList<AtomicOntologyChange> parseFile(String diffPath) throws IOException { AtomicOntologyChange changeObj; ArrayList<AtomicOntologyChange> changeObjects = new ArrayList<AtomicOntologyChange>(); int countColumns = 0; String URI = null; String rename = null; String sourceURI = null; String destinationURI = null; StringTokenizer stArr = null; InputStreamReader in = new InputStreamReader(this.getClass().getResourceAsStream(diffPath)); Iterable<CSVRecord> records = CSVFormat.TDF.parse(in); Iterator<CSVRecord> rows = records.iterator(); //CSVReader readFile = new SimpleReader(); //readFile.setSeperator('\t'); //List<String[]> rows = readFile.parse(in); int rowNum = 0; while (rows.hasNext()) { rowNum++; CSVRecord row = rows.next(); if (row.size() != 5) { log.error("Invalid PromptDiff data at row " + (rowNum + 1) + ". Expected 5 columns; found " + row.size()); } else { changeObj = new AtomicOntologyChange(); if (row.get(0) != null && row.get(0).length() > 0) { changeObj.setSourceURI(row.get(0)); } if (row.get(1) != null && row.get(1).length() > 0) { changeObj.setDestinationURI(row.get(1)); } if (row.get(4) != null && row.get(4).length() > 0) { changeObj.setNotes(row.get(4)); } if ("Yes".equals(row.get(2))) { changeObj.setAtomicChangeType(AtomicChangeType.RENAME); } else if ("Delete".equals(row.get(3))) { changeObj.setAtomicChangeType(AtomicChangeType.DELETE); } else if ("Add".equals(row.get(3))) { changeObj.setAtomicChangeType(AtomicChangeType.ADD); } else { log.error("Invalid rename or change type data: '" + row.get(2) + " " + row.get(3) + "'"); } log.debug(changeObj); changeObjects.add(changeObj); } } if (changeObjects.size() == 0) { log.debug("No ABox updates are required."); } return changeObjects; }
From source file:org.wso2.carbon.event.simulator.csvFeedSimulation.core.CSVFeedEventSimulator.java
/** * This method must be called within a synchronized block to avoid multiple file simulators from running simultaneously. * Read the values from uploaded CSV file and convert those values into event and send those events to * input handler//from w w w.j a v a2 s.c o m * <p> * <p> * To read the CSV file It uses CSV parser Library. * {@link <a href="https://commons.apache.org/proper/commons-csv/apidocs/org/apache/commons/csv/CSVParser.html">CSVParser</a>} * </p> * <p> * <p> * CSV file can be separated by one of these fallowing character , , ; , \t by default * It has capability to have user defined delimiter * Any field may be quoted (with double quotes) * Fields with embedded commas or delimiter characters must be double quoted. * </p> * <p> * Initialize CSVParser * * @param executionPlanDto ExecutionPlanDto * @param csvFileConfig CSVFileSimulationDto */ private void sendEvent(ExecutionPlanDto executionPlanDto, CSVFileSimulationDto csvFileConfig) { /* return no of events read from CSV file during ever iteration */ long noOfEvents = 0; int delay = csvFileConfig.getDelay(); /* Reader for reading character streams from file */ Reader in = null; /* CSVParser to read CSV Values */ CSVParser csvParser = null; if (delay <= 0) { log.warn("Events will be sent continuously since the delay between events are set to " + delay + "milliseconds"); delay = 0; } try { /* Initialize Reader */ in = new FileReader(String.valueOf(Paths.get(System.getProperty("java.io.tmpdir"), csvFileConfig.getFileDto().getFileInfo().getFileName()))); /* Initialize CSVParser with appropriate CSVFormat according to delimiter */ switch (csvFileConfig.getDelimiter()) { case ",": csvParser = CSVParser.parse(in, CSVFormat.DEFAULT); break; case ";": csvParser = CSVParser.parse(in, CSVFormat.EXCEL); break; case "\\t": csvParser = CSVParser.parse(in, CSVFormat.TDF); break; default: csvParser = CSVParser.parse(in, CSVFormat.newFormat(csvFileConfig.getDelimiter().charAt(0))); } int attributeSize = executionPlanDto.getInputStreamDtoMap().get(csvFileConfig.getStreamName()) .getStreamAttributeDtos().size(); /* Iterate through the CSV file line by line */ for (CSVRecord record : csvParser) { try { synchronized (this) { if (isStopped) { isStopped = false; break; } if (isPaused) { this.wait(); } } if (record.size() != attributeSize) { log.warn("No of attribute is not equal to attribute size: " + attributeSize + " is needed" + "in Row no:" + noOfEvents + 1); } String[] attributes = new String[attributeSize]; noOfEvents = csvParser.getCurrentLineNumber(); for (int i = 0; i < record.size(); i++) { attributes[i] = record.get(i); } //convert Attribute values into event Event event = EventConverter.eventConverter(csvFileConfig.getStreamName(), attributes, executionPlanDto); // TODO: 13/12/16 delete sout System.out.println("Input Event " + Arrays.deepToString(event.getEventData())); // //send the event to input handler send(csvFileConfig.getStreamName(), event); //delay between two events if (delay > 0) { Thread.sleep(delay); } } catch (EventSimulationException e) { log.error("Event dropped due to Error occurred during generating an event" + e.getMessage()); } catch (InterruptedException e) { log.error("Error occurred during send event" + e.getMessage()); } } } catch (IllegalArgumentException e) { // TODO: 02/12/16 proper error message throw new EventSimulationException("File Parameters are null" + e.getMessage()); } catch (FileNotFoundException e) { throw new EventSimulationException( "File not found :" + csvFileConfig.getFileDto().getFileInfo().getFileName()); } catch (IOException e) { throw new EventSimulationException("Error occurred while reading the file"); } finally { try { if (in != null && csvParser != null) in.close(); csvParser.close(); } catch (IOException e) { throw new EventSimulationException("Error occurred during closing the file"); } } }
From source file:org.wso2.carbon.event.simulator.randomdatafeedsimulation.bean.CustomBasedAttribute.java
/** * Method to split the data list into seperated values and assign it to customDataList * * @param customData String that has data list values * Initial string format is ""CEP,Siddhi",ESB,DAS" *//* w ww . j av a 2s .c om*/ public void setCustomData(String customData) { CSVParser csvParser = null; List<String> dataList = null; try { csvParser = CSVParser.parse(customData, CSVFormat.newFormat(',').withQuote('/')); dataList = new ArrayList<>(); for (CSVRecord record : csvParser) { for (int i = 0; i < record.size(); i++) { dataList.add(record.get(i)); } } } catch (IOException e) { throw new EventSimulationException("I/O error occurs :" + e.getMessage()); } catch (IllegalArgumentException e) { throw new EventSimulationException("Data set is null :" + e.getMessage()); } customDataList = dataList.toArray(new String[dataList.size()]); }
From source file:org.zanata.adapter.glossary.GlossaryCSVReader.java
public Map<LocaleId, List<GlossaryEntry>> extractGlossary(Reader reader, String qualifiedName) throws IOException { try {/*www . j ava 2 s. co m*/ Iterable<CSVRecord> rawRecords = CSVFormat.RFC4180.parse(reader); List<CSVRecord> records = Lists.newArrayList(rawRecords); validateCSVEntries(records); Map<String, Integer> descriptionMap = setupDescMap(records); Map<Integer, LocaleId> localeColMap = setupLocalesMap(records, descriptionMap); LocaleId srcLocale = localeColMap.get(0); if (!srcLang.equals(srcLocale)) { throw new RuntimeException("input source language '" + srcLang + "' does not match source language in file '" + srcLocale + "'"); } Map<LocaleId, List<GlossaryEntry>> results = Maps.newHashMap(); for (int i = 1; i < records.size(); i++) { CSVRecord row = records.get(i); for (int x = 1; x < row.size() && localeColMap.containsKey(x); x++) { GlossaryEntry entry = new GlossaryEntry(); entry.setSrcLang(srcLocale); entry.setPos(row.get(descriptionMap.get(POS))); entry.setDescription(row.get(descriptionMap.get(DESC))); entry.setQualifiedName(new QualifiedName(qualifiedName)); GlossaryTerm srcTerm = new GlossaryTerm(); srcTerm.setLocale(srcLocale); srcTerm.setContent(row.get(0)); entry.getGlossaryTerms().add(srcTerm); LocaleId transLocaleId = localeColMap.get(x); String transContent = row.get(x); GlossaryTerm transTerm = new GlossaryTerm(); transTerm.setLocale(transLocaleId); transTerm.setContent(transContent); entry.getGlossaryTerms().add(transTerm); List<GlossaryEntry> list = results.get(transLocaleId); if (list == null) { list = Lists.newArrayList(); } list.add(entry); results.put(transLocaleId, list); } } return results; } finally { reader.close(); } }
From source file:org.zanata.adapter.glossary.GlossaryCSVReader.java
/** * Basic validation of CVS file format - At least 2 rows in the CVS file - * Empty content validation - All row must have the same column count */// www . j a v a2s . c om private void validateCSVEntries(@Nonnull List<CSVRecord> records) { if (records.isEmpty()) { throw new RuntimeException("Invalid CSV file - empty file"); } if (records.size() < 2) { throw new RuntimeException("Invalid CSV file - no entries found"); } for (int i = 1; i < records.size(); i++) { CSVRecord record = records.get(i); //checking each row size is matching with header size if (records.get(0).size() != record.size()) { throw new RuntimeException("Invalid CSV file - inconsistency of columns with header"); } } }
From source file:org.zanata.adapter.glossary.GlossaryCSVReader.java
/** * Parser reads from all from first row and exclude column from description * map. Format of CVS: {source locale},{locale},{locale}...,pos,description *//*from w w w .j a va2 s .co m*/ private Map<Integer, LocaleId> setupLocalesMap(List<CSVRecord> records, Map<String, Integer> descriptionMap) { Map<Integer, LocaleId> localeColMap = new HashMap<Integer, LocaleId>(); CSVRecord headerRow = records.get(0); for (int row = 0; row <= headerRow.size() && !descriptionMap.containsValue(row); row++) { LocaleId locale = new LocaleId(StringUtils.trim(headerRow.get(row))); localeColMap.put(row, locale); } return localeColMap; }
From source file:org.zanata.adapter.glossary.GlossaryCSVReader.java
/** * Read last 2 columns in CSV:// www . jav a2s . co m * {source locale},{locale},{locale}...,pos,description * * @param records */ private Map<String, Integer> setupDescMap(List<CSVRecord> records) { Map<String, Integer> descMap = new HashMap<String, Integer>(); CSVRecord headerRow = records.get(0); descMap.put(POS, headerRow.size() - 2); descMap.put(DESC, headerRow.size() - 1); return descMap; }