List of usage examples for org.apache.commons.csv CSVFormat DEFAULT
CSVFormat DEFAULT
To view the source code for org.apache.commons.csv CSVFormat DEFAULT.
Click Source Link
From source file:com.streamsets.pipeline.lib.csv.TestCsvParser.java
@Test public void testParserRecords() throws Exception { CsvParser parser = new CsvParser(getReader("TestCsvParser-default.csv"), CSVFormat.DEFAULT.withHeader((String[]) null).withSkipHeaderRecord(true), -1); try {/* w w w .ja va 2s.co m*/ Assert.assertEquals(12, parser.getReaderPosition()); String[] record = parser.read(); Assert.assertEquals(20, parser.getReaderPosition()); Assert.assertNotNull(record); Assert.assertArrayEquals(new String[] { "a", "b", "c", "d" }, record); record = parser.read(); Assert.assertEquals(33, parser.getReaderPosition()); Assert.assertNotNull(record); Assert.assertArrayEquals(new String[] { "w", "x", "y", "z", "extra" }, record); Assert.assertNull(parser.read()); Assert.assertEquals(33, parser.getReaderPosition()); } finally { parser.close(); } }
From source file:de.tudarmstadt.ukp.experiments.argumentation.sequence.evaluation.GenerateCrossDomainCVReport.java
/** * Merges id2outcome files from sub-folders with cross-domain and creates a new folder * with overall results// w w w. j a va2 s . c om * * @param folder folder * @throws java.io.IOException */ public static void aggregateDomainResults(File folder, String subDirPrefix, final String taskFolderSubText, String outputFolderName) throws IOException { // list all sub-folders File[] folders = folder.listFiles(new FileFilter() { @Override public boolean accept(File pathname) { return pathname.isDirectory() && pathname.getName().contains(taskFolderSubText); } }); if (folders.length == 0) { throw new IllegalArgumentException("No sub-folders 'SVMHMMTestTask*' found in " + folder); } // write to a file File outFolder = new File(folder, outputFolderName); File output = new File(outFolder, subDirPrefix); output.mkdirs(); File outCsv = new File(output, TOKEN_LEVEL_PREDICTIONS_CSV); CSVPrinter csvPrinter = new CSVPrinter(new FileWriter(outCsv), SVMHMMUtils.CSV_FORMAT); csvPrinter.printComment(SVMHMMUtils.CSV_COMMENT); ConfusionMatrix cm = new ConfusionMatrix(); for (File domain : folders) { File tokenLevelPredictionsCsv = new File(domain, subDirPrefix + "/" + TOKEN_LEVEL_PREDICTIONS_CSV); if (!tokenLevelPredictionsCsv.exists()) { throw new IllegalArgumentException( "Cannot locate tokenLevelPredictions.csv: " + tokenLevelPredictionsCsv); } CSVParser csvParser = new CSVParser(new FileReader(tokenLevelPredictionsCsv), CSVFormat.DEFAULT.withCommentMarker('#')); for (CSVRecord csvRecord : csvParser) { // copy record csvPrinter.printRecord(csvRecord); // update confusion matrix cm.increaseValue(csvRecord.get(0), csvRecord.get(1)); } } // write to file FileUtils.writeStringToFile(new File(outFolder, "confusionMatrix.txt"), cm.toString() + "\n" + cm.printNiceResults() + "\n" + cm.printLabelPrecRecFm() + "\n" + cm.printClassDistributionGold()); // write csv IOUtils.closeQuietly(csvPrinter); }
From source file:co.cask.hydrator.transforms.ParseCSV.java
@Override public void initialize(TransformContext context) throws Exception { super.initialize(context); String csvFormatString = config.format.toLowerCase(); switch (csvFormatString) { case "default": csvFormat = CSVFormat.DEFAULT; break;/*from w w w . j a v a 2 s .c om*/ case "excel": csvFormat = CSVFormat.EXCEL; break; case "mysql": csvFormat = CSVFormat.MYSQL; break; case "rfc4180": csvFormat = CSVFormat.RFC4180; break; case "tdf": csvFormat = CSVFormat.TDF; break; default: throw new IllegalArgumentException( "Format {} specified is not one of the allowed format. Allowed formats are" + "DEFAULT, EXCEL, MYSQL, RFC4180 and TDF"); } try { outSchema = Schema.parseJson(config.schema); fields = outSchema.getFields(); } catch (IOException e) { throw new IllegalArgumentException("Format of schema specified is invalid. Please check the format."); } }
From source file:geovista.readers.csv.GeogCSVReader_old.java
public Object[] readFileStreaming(InputStream is, ArrayList<Integer> columns) { BufferedReader in = new BufferedReader(new InputStreamReader(is)); Iterable<CSVRecord> parser = null; try {/* w w w. j a va 2s . co m*/ parser = CSVFormat.DEFAULT.withDelimiter(this.delimiter).parse(in); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } int count = 0; for (CSVRecord rec : parser) { // eDays.add(rec.get(0)); // type.add(rec.get(10) + " - " + rec.get(8)); System.out.println(rec.get(0)); System.out.println(rec.toString()); count++; } // CSVParser shredder = new CSVParser() // CSVParser shredder = new CSVParser(is); // shredder.setCommentStart("#;!"); // shredder.setEscapes("nrtf", "\n\r\t\f"); String[] headers = null; String[] types = null; int[] dataTypes = null; String[][] fileContent = null; int dataBegin; Object[] data; try { // fileContent = shredder.getAllValues(); } catch (Exception ex) { ex.printStackTrace(); } types = fileContent[0];// first line tells us types dataTypes = new int[types.length]; int len; if (types[0].equalsIgnoreCase("int") || types[0].equalsIgnoreCase("double") || types[0].equalsIgnoreCase("string")) { dataBegin = 2; headers = fileContent[1]; data = new Object[headers.length + 1];// plus one for the headers // themselves len = fileContent.length - dataBegin; for (int i = 0; i < headers.length; i++) { if (types[i].equalsIgnoreCase("int")) { data[i + 1] = new int[len]; dataTypes[i] = GeogCSVReader_old.DATA_TYPE_INT; } else if (types[i].equalsIgnoreCase("double")) { data[i + 1] = new double[len]; dataTypes[i] = GeogCSVReader_old.DATA_TYPE_DOUBLE; } else if (types[i].equalsIgnoreCase("string")) { data[i + 1] = new String[len]; dataTypes[i] = GeogCSVReader_old.DATA_TYPE_STRING; } else { throw new IllegalArgumentException("GeogCSVReader.readFile, unknown type = " + types[i]); } } } else { dataBegin = 1; headers = fileContent[0]; data = new Object[headers.length + 1];// plus one for the headers // themselves len = fileContent.length - dataBegin; for (int i = 0; i < headers.length; i++) { String firstString = fileContent[1][i]; String secondString = fileContent[2][i]; String thirdString = fileContent[3][i]; String lastString = fileContent[fileContent[0].length][i]; if (isNumeric(firstString) && isNumeric(secondString) && isNumeric(thirdString) && isNumeric(lastString)) { if (isInt(fileContent, i) == false) { // if (isDouble(firstString) || isDouble(secondString) // || isDouble(thirdString) || isDouble(lastString)) { data[i + 1] = new double[len]; dataTypes[i] = GeogCSVReader_old.DATA_TYPE_DOUBLE; } else { data[i + 1] = new int[len]; dataTypes[i] = GeogCSVReader_old.DATA_TYPE_INT; } } else { data[i + 1] = new String[len]; dataTypes[i] = GeogCSVReader_old.DATA_TYPE_STRING; } } } data[0] = headers; String[] line = null; for (int row = dataBegin; row < len + dataBegin; row++) { line = fileContent[row]; int[] ints = null; double[] doubles = null; String[] strings = null; for (int column = 0; column < line.length; column++) { String item = line[column]; if (dataTypes[column] == GeogCSVReader_old.DATA_TYPE_INT) { if (Arrays.binarySearch(GeogCSVReader_old.NULL_STRINGS, item) >= 0) { ints = (int[]) data[column + 1]; ints[row - dataBegin] = GeogCSVReader_old.NULL_INT; } else { ints = (int[]) data[column + 1]; try { ints[row - dataBegin] = Integer.parseInt(item); } catch (NumberFormatException nfe) { logger.warning("could not parse " + item + " in column " + column); // nfe.printStackTrace(); ints[row - dataBegin] = GeogCSVReader_old.NULL_INT; } } } else if (dataTypes[column] == GeogCSVReader_old.DATA_TYPE_DOUBLE) { if (Arrays.binarySearch(GeogCSVReader_old.NULL_STRINGS, item) >= 0) { doubles = (double[]) data[column + 1]; doubles[row - dataBegin] = GeogCSVReader_old.NULL_DOUBLE; } else { doubles = (double[]) data[column + 1]; doubles[row - dataBegin] = parseDouble(item); } } else if (dataTypes[column] == GeogCSVReader_old.DATA_TYPE_STRING) { strings = (String[]) data[column + 1]; strings[row - dataBegin] = item; } else { throw new IllegalArgumentException("GeogCSVReader.readFile, unknown type = " + types[row]); } // end if } // next column } // next row return data; }
From source file:edu.emory.mathcs.nlp.zzz.CSVRadiology.java
public void tokenize(String inputFile, int outputStart) throws Exception { CSVParser parser = new CSVParser(IOUtils.createBufferedReader(inputFile), CSVFormat.DEFAULT); String inputPath = FileUtils.getPath(inputFile) + "/"; List<CSVRecord> records = parser.getRecords(); Tokenizer tokenizer = new EnglishTokenizer(); P_BEFORE = new ArrayList<>(); P_AFTER = new ArrayList<>(); for (String s : BEFORE) P_BEFORE.add(new Pair<>(Pattern.compile(s), "\n" + s)); for (String s : AFTER) P_AFTER.add(new Pair<>(Pattern.compile(s), s + "\n")); for (int i = 0; i < records.size(); i++) { PrintStream fout = IOUtils.createBufferedPrintStream(getOuputFilename(inputPath, i + outputStart)); for (List<Token> tokens : tokenizer.segmentize(records.get(i).get(0))) print(fout, tokens);//from w w w . ja v a 2 s.co m fout.close(); } parser.close(); }
From source file:geovista.readers.csv.GeogCSVReader.java
public Object[] readFileStreaming(InputStream is, ArrayList<Integer> columns) { BufferedReader in = new BufferedReader(new InputStreamReader(is)); Iterable<CSVRecord> parser = null; try {/*from w w w. j a v a2s . c o m*/ parser = CSVFormat.DEFAULT.withDelimiter(this.currDelimiter).parse(in); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } int count = 0; for (CSVRecord rec : parser) { // eDays.add(rec.get(0)); // type.add(rec.get(10) + " - " + rec.get(8)); System.out.println(rec.get(0)); System.out.println(rec.toString()); count++; } // CSVParser shredder = new CSVParser() // CSVParser shredder = new CSVParser(is); // shredder.setCommentStart("#;!"); // shredder.setEscapes("nrtf", "\n\r\t\f"); String[] headers = null; String[] types = null; int[] dataTypes = null; String[][] fileContent = null; int dataBegin; Object[] data; try { // fileContent = shredder.getAllValues(); } catch (Exception ex) { ex.printStackTrace(); } types = fileContent[0];// first line tells us types dataTypes = new int[types.length]; int len; if (types[0].equalsIgnoreCase("int") || types[0].equalsIgnoreCase("double") || types[0].equalsIgnoreCase("string")) { dataBegin = 2; headers = fileContent[1]; data = new Object[headers.length + 1];// plus one for the headers // themselves len = fileContent.length - dataBegin; for (int i = 0; i < headers.length; i++) { if (types[i].equalsIgnoreCase("int")) { data[i + 1] = new int[len]; dataTypes[i] = GeogCSVReader.DATA_TYPE_INT; } else if (types[i].equalsIgnoreCase("double")) { data[i + 1] = new double[len]; dataTypes[i] = GeogCSVReader.DATA_TYPE_DOUBLE; } else if (types[i].equalsIgnoreCase("string")) { data[i + 1] = new String[len]; dataTypes[i] = GeogCSVReader.DATA_TYPE_STRING; } else { throw new IllegalArgumentException("GeogCSVReader.readFile, unknown type = " + types[i]); } } } else { dataBegin = 1; headers = fileContent[0]; data = new Object[headers.length + 1];// plus one for the headers // themselves len = fileContent.length - dataBegin; for (int i = 0; i < headers.length; i++) { String firstString = fileContent[1][i]; String secondString = fileContent[2][i]; String thirdString = fileContent[3][i]; String lastString = fileContent[fileContent[0].length][i]; if (isNumeric(firstString) && isNumeric(secondString) && isNumeric(thirdString) && isNumeric(lastString)) { if (isInt(fileContent, i) == false) { // if (isDouble(firstString) || isDouble(secondString) // || isDouble(thirdString) || isDouble(lastString)) { data[i + 1] = new double[len]; dataTypes[i] = GeogCSVReader.DATA_TYPE_DOUBLE; } else { data[i + 1] = new int[len]; dataTypes[i] = GeogCSVReader.DATA_TYPE_INT; } } else { data[i + 1] = new String[len]; dataTypes[i] = GeogCSVReader.DATA_TYPE_STRING; } } } data[0] = headers; String[] line = null; for (int row = dataBegin; row < len + dataBegin; row++) { line = fileContent[row]; int[] ints = null; double[] doubles = null; String[] strings = null; for (int column = 0; column < line.length; column++) { String item = line[column]; if (dataTypes[column] == GeogCSVReader.DATA_TYPE_INT) { if (Arrays.binarySearch(GeogCSVReader.NULL_STRINGS, item) >= 0) { ints = (int[]) data[column + 1]; ints[row - dataBegin] = GeogCSVReader.NULL_INT; } else { ints = (int[]) data[column + 1]; try { ints[row - dataBegin] = Integer.parseInt(item); } catch (NumberFormatException nfe) { logger.warning("could not parse " + item + " in column " + column); // nfe.printStackTrace(); ints[row - dataBegin] = GeogCSVReader.NULL_INT; } } } else if (dataTypes[column] == GeogCSVReader.DATA_TYPE_DOUBLE) { if (Arrays.binarySearch(GeogCSVReader.NULL_STRINGS, item) >= 0) { doubles = (double[]) data[column + 1]; doubles[row - dataBegin] = GeogCSVReader.NULL_DOUBLE; } else { doubles = (double[]) data[column + 1]; doubles[row - dataBegin] = parseDouble(item); } } else if (dataTypes[column] == GeogCSVReader.DATA_TYPE_STRING) { strings = (String[]) data[column + 1]; strings[row - dataBegin] = item; } else { throw new IllegalArgumentException("GeogCSVReader.readFile, unknown type = " + types[row]); } // end if } // next column } // next row return data; }
From source file:com.kumarvv.setl.utils.CsvParser.java
/** * parse file//from w w w .ja va2 s . c om * @param file * @return * @throws IOException */ protected CSVParser parseFile(File file) throws IOException { if (file == null) { return null; } return CSVParser.parse(file, Charset.defaultCharset(), CSVFormat.DEFAULT); }
From source file:edu.emory.mathcs.nlp.zzz.CSVSentiment.java
public void toTSV(String inputFile) throws Exception { CSVParser parser = new CSVParser(IOUtils.createBufferedReader(inputFile), CSVFormat.DEFAULT); PrintStream fout = IOUtils.createBufferedPrintStream(inputFile + ".tsv"); List<CSVRecord> records = parser.getRecords(); List<Token> tokens;//from ww w . j a va 2s . c o m CSVRecord record; int label; System.out.println(inputFile); for (int i = 0; i < records.size(); i++) { if (i == 0) continue; record = records.get(i); label = toIntLabel(record.get(0)); tokens = tokenizer.tokenize(record.get(6)); fout.println(label + "\t" + Joiner.join(tokens, " ", Token::getWordForm)); } fout.close(); parser.close(); }
From source file:com.publictransitanalytics.scoregenerator.datalayer.directories.GTFSReadingTripDetailsDirectory.java
private void parseTripsFile(final Reader tripReader) throws IOException, InterruptedException { final CSVParser tripParser = new CSVParser(tripReader, CSVFormat.DEFAULT.withHeader()); final List<CSVRecord> tripRecords = tripParser.getRecords(); for (CSVRecord record : tripRecords) { final String rawTripId = record.get("trip_id"); final String routeId = record.get("route_id"); final String serviceType = record.get("service_id"); populateTripDetail(rawTripId, routeId, serviceType); }/*from w w w .j a va2s .c o m*/ }
From source file:licenseUtil.LicensingList.java
public void readFromSpreadsheet(String spreadsheetFN) throws IOException, IncompleteLicenseObjectException { logger.info("read spreadsheet from \"" + spreadsheetFN + "\""); InputStreamReader inputStreamReader = null; try {//from www. j av a 2 s . c o m inputStreamReader = new InputStreamReader(new FileInputStream(spreadsheetFN), "UTF-8"); } catch (FileNotFoundException e) { e.printStackTrace(); } BufferedReader bufferedReader = new BufferedReader(inputStreamReader); CSVParser parser = new CSVParser(bufferedReader, CSVFormat.DEFAULT.withHeader().withDelimiter(columnDelimiter)); for (CSVRecord record : parser) { add(new LicensingObject(record)); } }