List of usage examples for org.apache.commons.csv CSVFormat newFormat
public static CSVFormat newFormat(final char delimiter)
From source file:cz.pichlik.goodsentiment.common.CSVFormats.java
public static CSVFormat format(String... header) { CSVFormat csvFormat = CSVFormat.newFormat(',').withRecordSeparator("\r\n").withQuote('"') .withNullString(""); return (header.length > 0) ? csvFormat.withHeader(header) : csvFormat; }
From source file:com.marklogic.contentpump.utilities.CSVParserFormatter.java
/** * //from w w w.ja v a 2 s . c om * @param delimiter * @param encapsulator * @param ignoreSurroundingSpaces * @param ignoreEmptyLines * @return */ public static CSVFormat getFormat(char delimiter, char encapsulator, boolean ignoreSurroundingSpaces, boolean ignoreEmptyLines) { CSVFormat format = CSVFormat.newFormat(delimiter); format = format.withIgnoreEmptyLines(ignoreEmptyLines).withIgnoreSurroundingSpaces(ignoreSurroundingSpaces) .withAllowMissingColumnNames(true).withQuote(encapsulator); return format; }
From source file:cz.pichlik.goodsentiment.MockDataGenerator.java
private static void generateFile(File file, LocalDate date) throws IOException { CSVFormat format = CSVFormat .newFormat(',').withRecordSeparator("\r\n").withQuote('"').withHeader("id", "sentimentCode", "orgUnit", "latitude", "longitude", "city", "gender", "yearsInCompany", "timestamp") .withNullString(""); CSVPrinter printer = null;/* w ww . j a v a 2 s . c o m*/ try (OutputStreamWriter output = new OutputStreamWriter(new FileOutputStream(file))) { printer = new CSVPrinter(output, format); for (int i = 0; i < 150 + rg().nextInt(100); i++) { long id = sequence++; int sentimentCode = generateSentiment(); String orgUnit = generateOrgUnit(); Object geo[] = generateGeo(); Object latitude = geo[0]; Object longitude = geo[1]; Object city = geo[2]; String gender = generateGender(); int daysInCompany = generateYearsInCompany(); LocalDateTime timestamp = generateTimestamp(date); printer.printRecord(id, sentimentCode, orgUnit, latitude, longitude, city, gender, daysInCompany, timestamp); } } finally { printer.close(); } }
From source file:com.lithium.flow.util.CsvFormats.java
@Nonnull public static CSVFormat fromConfig(@Nonnull Config config) { checkNotNull(config);/*from w ww .java 2s .com*/ switch (config.getString("csv.format", "default")) { case "default": return CSVFormat.DEFAULT; case "excel": return CSVFormat.EXCEL; case "mysql": return CSVFormat.MYSQL; case "rfc4180": return CSVFormat.RFC4180; case "tdf": return CSVFormat.TDF; case "custom": return CSVFormat.newFormat(getChar(config, "csv.delimiter", ',')) .withAllowMissingColumnNames(getBoolean(config, "csv.allowMissingColumnNames")) .withCommentMarker(getChar(config, "csv.commentMarker")) .withEscape(getChar(config, "csv.escape")).withHeader(getHeader(config, "csv.header")) .withIgnoreEmptyLines(getBoolean(config, "csv.ignoreEmptyLines")) .withIgnoreSurroundingSpaces(getBoolean(config, "csv.ignoreSurroundingSpaces")) .withNullString(getString(config, "csv.nullString")).withQuote(getChar(config, "csv.quote")) .withQuoteMode(getQuoteMode(config, "csv.quoteMode")) .withRecordSeparator(getString(config, "csv.recordSeparator")) .withSkipHeaderRecord(getBoolean(config, "csv.skipHeaderRecord")); default: return CSVFormat.DEFAULT; } }
From source file:com.github.jferard.pgloaderutils.sniffer.csv.CSVOptionalHeaderSniffer.java
public static CSVOptionalHeaderSniffer getSniffer(final byte delimiter, final byte quote, final byte escape, final Charset charset) { CSVFormat csvFormat = CSVFormat.newFormat((char) delimiter).withQuote((char) quote) .withQuoteMode(QuoteMode.MINIMAL).withAllowMissingColumnNames(); if (escape != quote) csvFormat = csvFormat.withEscape((char) escape); return new CSVOptionalHeaderSniffer(csvFormat, charset); }
From source file:com.gs.obevo.db.impl.core.changetypes.CsvReaderDataSource.java
/** * Returns the standard CSV format used by Obevo by both readers (for deploy) and writers (for reverse-engineering) of CSV. *//*from ww w . j av a2 s .c o m*/ public static CSVFormat getCsvFormat(char delim, String nullToken) { return CSVFormat.newFormat(delim).withRecordSeparator("\r\n").withIgnoreSurroundingSpaces(true) .withQuote('"').withEscape('\\').withNullString(nullToken); }
From source file:jp.co.cyberagent.parquet.msgpack.compat.TestParquetCompatibility.java
@Test public void testing() throws Exception { JSONIterator parquet = ParquetAsJSONIterator.fromResource(parquetFilename); boolean isImpala = parquetFilename.contains("impala"); CSVHeaderMap headerMap = CSVHeaderMap.builder() .add(isImpala ? "n_nationkey" : "nation_key", CSVColumnWriter.NUMBER) .add(isImpala ? "n_name" : "name", CSVColumnWriter.STRING) .add(isImpala ? "n_regionkey" : "region_key", CSVColumnWriter.NUMBER) .add(isImpala ? "n_comment" : "comment_col", CSVColumnWriter.STRING).build(); CSVFormat format = CSVFormat.newFormat('|'); JSONIterator csv = CSVAsJSONIterator.fromResource("test-data/parquet-python/nation.csv", format, headerMap); assertThat(parquet, is(sameAs(csv))); }
From source file:com.datascience.cascading.CsvSchemeTest.java
/** * Tests the CSV scheme source with headers. *//*from ww w .j ava 2s .c om*/ private void testCsvSourceWithHeaders(String inputPath) throws Exception { String sinkPath = "src/test/resources/output/source-with-headers"; String expectedPath = "src/test/resources/expected/with-headers.txt"; CSVFormat sourceFormat = CSVFormat.newFormat(',').withQuote('"').withHeader("id", "first name", "last name") .withSkipHeaderRecord().withEscape('\\').withRecordSeparator('\n'); CSVFormat sinkFormat = CSVFormat.newFormat('\t').withEscape('\\').withRecordSeparator('\n'); testScheme(inputPath, sourceFormat, sinkPath, sinkFormat, expectedPath, true); }
From source file:fr.univ_tours.li.mdjedaini.ideb.io.CsvLogLoader.java
/** * /*www .j a v a 2s. c o m*/ * @param arg_sessionFilePath * @return */ public Session loadSession(String arg_sessionFilePath) { Session result = new Session(); try { Reader in = new FileReader(arg_sessionFilePath); Iterable<CSVRecord> records = CSVFormat.newFormat(';').withFirstRecordAsHeader().parse(in); // each record is a query for (CSVRecord record : records) { // System.out.println("I am parsing the line: " + record); String cubeName = record.get("cube"); EAB_Cube cube = this.be.getBenchmarkData().getInternalCubeList().get(cubeName); QueryTriplet q_tmp = new QueryTriplet(cube); // extract measures String currentMeasure = record.get("Measures"); // only add measure if not empty if (!currentMeasure.equals("[]")) { Pattern p = Pattern.compile("([a-zA-Z_0-9][a-zA-Z_0-9 ]+)"); Matcher m = p.matcher(currentMeasure); // manage multiple measures while (m.find()) { //System.out.println("Current measure: " + currentMeasure + " --- trouve: " + m.groupCount()); String measure = m.group(1); // System.out.println("Measure: " + measure); // add the current measure to the current query MeasureFragment mf = new MeasureFragment(q_tmp, measure); if (null == mf.getMeasure()) { int i = 2; } q_tmp.addMeasure(mf); } } // extract GBS String currentProjection = record.get("GroupBy"); // only add projections if not empty if (!currentProjection.equals("[]")) { Pattern p = Pattern.compile("([a-zA-Z_0-9][a-zA-Z_0-9 ]+)"); Matcher m = p.matcher(currentProjection); // manage multiple group by while (m.find()) { //System.out.println("Group " + i + ": " + m.group(i)); String level = m.group(1); // System.out.println("Level: " + level); EAB_Level l_tmp = cube.getLevelByAtomicName(level); ProjectionFragment pf_tmp = new ProjectionFragment(q_tmp, l_tmp); if (null == pf_tmp.getLevel()) { int i = 2; } q_tmp.addProjection(pf_tmp); } } // extract filters String currentSelection = record.get("Filters"); // only add projections if not empty if (!currentSelection.equals("[]")) { Pattern p = Pattern.compile("([a-zA-Z_0-9][a-zA-Z_0-9 ]+)=>\\[EQUAL ([a-zA-Z_0-9& ]+)\\]"); Matcher m = p.matcher(currentSelection); // manage multiple occurrences while (m.find()) { // System.out.println("Current selection: " + currentSelection + " --- trouve: " + m.groupCount()); String level = m.group(1); String member = m.group(2); EAB_Level l_tmp = cube.getLevelByAtomicName(level); // System.out.println("Cube: " + cubeName); // System.out.println("Level: " + level); // System.out.println("Member: " + member); if (null == l_tmp) { int i = 2; } String dimName = l_tmp.getHierarchy().getDimension().getMondrianDimension().getName(); String hieName = l_tmp.getHierarchy().getName(); //hieName.spl SelectionFragment sf_tmp = new SelectionFragment(q_tmp, dimName, hieName, level, member); if (null != sf_tmp.getMemberValue()) { q_tmp.addSelection(sf_tmp); } } } // add the query to the session result.addQuery(q_tmp); QueryConverter qc = new QueryConverter(this.be); try { System.out.println("******************"); System.out.println("Record:" + record); QueryMdx q_mdx = qc.toMdx(q_tmp); System.out.println("MDX with my converter:"); System.out.println(q_mdx); q_mdx.execute(Boolean.TRUE); // System.out.println("-----"); // System.out.println("Query: " + q_tmp); // System.out.println("-----"); // System.out.println("Mdx: " + qc.toMdx(q_tmp)); // System.out.println("******************"); } catch (Exception arg_e) { System.out.println("******************"); System.out.println("Exception: " + arg_e.getClass().getName()); System.out.println("Record:" + record); // System.out.println("-----"); // System.out.println("Query: " + q_tmp); // System.out.println("-----"); //qc.toMdx(q_tmp); //System.out.println("******************"); //System.err.println("Exception avec: "); //System.err.println("Record: " + record); } } // end foreach record } catch (Exception arg_e) { arg_e.printStackTrace(); } // SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss,SSS"); // // // add the name as metadata of the session // result.addMetaData("name", arg_sessionFilePath); // // System.out.println("I am parsing the file: " + arg_sessionFilePath); // // // pattern for extracting cube name // Pattern p = Pattern.compile("from \\[(.*?)\\].*"); // // File file = new File(arg_sessionFilePath); // // // try { // //BufferedReader br = new BufferedReader(new FileReader(file)); // BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(arg_sessionFilePath), "UTF-8")); // String line = null; // // String currentQuery = ""; // // // pour parser une requete, je cherche "select" // // je prends toutes les lignes suivantes, jusqu'a rencontrer une ligne vide... // while ((line = br.readLine()) != null) { // // if(line.contains("select")) { // // // look for the time before query execution // String date = line.substring(0, 23); // Date d = sdf.parse(date); // Long tsBefore = d.getTime(); // // // je recupere la position du mot "select" dans la ligne // Integer position = line.indexOf("select"); // currentQuery = line.substring(position, line.length()); // // String line_tmp = br.readLine(); // while(!line_tmp.equals("")) { // currentQuery += System.lineSeparator(); // //currentQuery += System.lineSeparator(); // currentQuery += line_tmp; // line_tmp = br.readLine(); // } // // // extract cubename from the query text // // Normally, the pattern is always found! // Matcher m = p.matcher(currentQuery); // m.find(); // String cubeName = m.group(1); // // //System.out.println(currentQuery); // //System.out.println("cubeName: " + cubeName); // //System.out.println("-------"); // // // look for the execution time // while(!line_tmp.contains("exec:")) { // line_tmp = br.readLine(); // } // // // here the line contains exec // // look for the time before query execution // date = line_tmp.substring(0, 23); // d = sdf.parse(date); // Long tsAfter = d.getTime(); // // Query q_tmp = new QueryMdx(this.be.getInternalCubeByName(cubeName), currentQuery); // // result.addQuery(q_tmp, tsBefore, tsAfter); // } // // } // // br.close(); // } catch(Exception arg_e) { // arg_e.printStackTrace(); // } return result; }
From source file:com.datascience.hadoop.CsvOutputFormat.java
/** * Creates a CSV format from a Hadoop configuration. *//*from w w w .j a v a 2s . c o m*/ private static CSVFormat createFormat(Configuration conf) { CSVFormat format = CSVFormat .newFormat(conf.get(CSV_WRITER_DELIMITER, DEFAULT_CSV_WRITER_DELIMITER).charAt(0)) .withSkipHeaderRecord(conf.getBoolean(CSV_WRITER_SKIP_HEADER, DEFAULT_CSV_WRITER_SKIP_HEADER)) .withRecordSeparator(conf.get(CSV_WRITER_RECORD_SEPARATOR, DEFAULT_CSV_WRITER_RECORD_SEPARATOR)) .withIgnoreEmptyLines( conf.getBoolean(CSV_WRITER_IGNORE_EMPTY_LINES, DEFAULT_CSV_WRITER_IGNORE_EMPTY_LINES)) .withIgnoreSurroundingSpaces(conf.getBoolean(CSV_WRITER_IGNORE_SURROUNDING_SPACES, DEFAULT_CSV_WRITER_IGNORE_SURROUNDING_SPACES)) .withNullString(conf.get(CSV_WRITER_NULL_STRING, DEFAULT_CSV_WRITER_NULL_STRING)); String[] header = conf.getStrings(CSV_WRITER_COLUMNS); if (header != null && header.length > 0) format = format.withHeader(header); String escape = conf.get(CSV_WRITER_ESCAPE_CHARACTER, DEFAULT_CSV_WRITER_ESCAPE_CHARACTER); if (escape != null) format = format.withEscape(escape.charAt(0)); String quote = conf.get(CSV_WRITER_QUOTE_CHARACTER, DEFAULT_CSV_WRITER_QUOTE_CHARACTER); if (quote != null) format = format.withQuote(quote.charAt(0)); String quoteMode = conf.get(CSV_WRITER_QUOTE_MODE, DEFAULT_CSV_WRITER_QUOTE_MODE); if (quoteMode != null) format = format.withQuoteMode(QuoteMode.valueOf(quoteMode)); return format; }