Example usage for org.apache.commons.csv CSVFormat newFormat

Introduction

In this page you can find the example usage for org.apache.commons.csv CSVFormat newFormat.

Prototype

public static CSVFormat newFormat(final char delimiter)

Source Link

Document

Creates a new CSV format with the specified delimiter.

Usage

From source file:cz.pichlik.goodsentiment.common.CSVFormats.java

public static CSVFormat format(String... header) {
    CSVFormat csvFormat = CSVFormat.newFormat(',').withRecordSeparator("\r\n").withQuote('"')
            .withNullString("");
    return (header.length > 0) ? csvFormat.withHeader(header) : csvFormat;
}

From source file:com.marklogic.contentpump.utilities.CSVParserFormatter.java

/**
 * //from   w w  w.ja  v  a  2  s .  c  om
 * @param delimiter
 * @param encapsulator
 * @param ignoreSurroundingSpaces
 * @param ignoreEmptyLines
 * @return
 */
public static CSVFormat getFormat(char delimiter, char encapsulator, boolean ignoreSurroundingSpaces,
        boolean ignoreEmptyLines) {
    CSVFormat format = CSVFormat.newFormat(delimiter);
    format = format.withIgnoreEmptyLines(ignoreEmptyLines).withIgnoreSurroundingSpaces(ignoreSurroundingSpaces)
            .withAllowMissingColumnNames(true).withQuote(encapsulator);

    return format;
}

From source file:cz.pichlik.goodsentiment.MockDataGenerator.java

private static void generateFile(File file, LocalDate date) throws IOException {
    CSVFormat format = CSVFormat
            .newFormat(',').withRecordSeparator("\r\n").withQuote('"').withHeader("id", "sentimentCode",
                    "orgUnit", "latitude", "longitude", "city", "gender", "yearsInCompany", "timestamp")
            .withNullString("");

    CSVPrinter printer = null;/*  w  ww . j  a v a  2 s  . c  o m*/
    try (OutputStreamWriter output = new OutputStreamWriter(new FileOutputStream(file))) {
        printer = new CSVPrinter(output, format);
        for (int i = 0; i < 150 + rg().nextInt(100); i++) {
            long id = sequence++;
            int sentimentCode = generateSentiment();
            String orgUnit = generateOrgUnit();
            Object geo[] = generateGeo();
            Object latitude = geo[0];
            Object longitude = geo[1];
            Object city = geo[2];
            String gender = generateGender();
            int daysInCompany = generateYearsInCompany();
            LocalDateTime timestamp = generateTimestamp(date);
            printer.printRecord(id, sentimentCode, orgUnit, latitude, longitude, city, gender, daysInCompany,
                    timestamp);
        }
    } finally {
        printer.close();
    }
}

From source file:com.lithium.flow.util.CsvFormats.java

@Nonnull
public static CSVFormat fromConfig(@Nonnull Config config) {
    checkNotNull(config);/*from w  ww .java  2s  .com*/
    switch (config.getString("csv.format", "default")) {
    case "default":
        return CSVFormat.DEFAULT;
    case "excel":
        return CSVFormat.EXCEL;
    case "mysql":
        return CSVFormat.MYSQL;
    case "rfc4180":
        return CSVFormat.RFC4180;
    case "tdf":
        return CSVFormat.TDF;
    case "custom":
        return CSVFormat.newFormat(getChar(config, "csv.delimiter", ','))
                .withAllowMissingColumnNames(getBoolean(config, "csv.allowMissingColumnNames"))
                .withCommentMarker(getChar(config, "csv.commentMarker"))
                .withEscape(getChar(config, "csv.escape")).withHeader(getHeader(config, "csv.header"))
                .withIgnoreEmptyLines(getBoolean(config, "csv.ignoreEmptyLines"))
                .withIgnoreSurroundingSpaces(getBoolean(config, "csv.ignoreSurroundingSpaces"))
                .withNullString(getString(config, "csv.nullString")).withQuote(getChar(config, "csv.quote"))
                .withQuoteMode(getQuoteMode(config, "csv.quoteMode"))
                .withRecordSeparator(getString(config, "csv.recordSeparator"))
                .withSkipHeaderRecord(getBoolean(config, "csv.skipHeaderRecord"));
    default:
        return CSVFormat.DEFAULT;
    }
}

From source file:com.github.jferard.pgloaderutils.sniffer.csv.CSVOptionalHeaderSniffer.java

public static CSVOptionalHeaderSniffer getSniffer(final byte delimiter, final byte quote, final byte escape,
        final Charset charset) {
    CSVFormat csvFormat = CSVFormat.newFormat((char) delimiter).withQuote((char) quote)
            .withQuoteMode(QuoteMode.MINIMAL).withAllowMissingColumnNames();
    if (escape != quote)
        csvFormat = csvFormat.withEscape((char) escape);
    return new CSVOptionalHeaderSniffer(csvFormat, charset);
}

From source file:com.gs.obevo.db.impl.core.changetypes.CsvReaderDataSource.java

/**
 * Returns the standard CSV format used by Obevo by both readers (for deploy) and writers (for reverse-engineering) of CSV.
 *//*from   ww w .  j av  a2 s  .c o  m*/
public static CSVFormat getCsvFormat(char delim, String nullToken) {
    return CSVFormat.newFormat(delim).withRecordSeparator("\r\n").withIgnoreSurroundingSpaces(true)
            .withQuote('"').withEscape('\\').withNullString(nullToken);
}

From source file:jp.co.cyberagent.parquet.msgpack.compat.TestParquetCompatibility.java

@Test
public void testing() throws Exception {
    JSONIterator parquet = ParquetAsJSONIterator.fromResource(parquetFilename);

    boolean isImpala = parquetFilename.contains("impala");

    CSVHeaderMap headerMap = CSVHeaderMap.builder()
            .add(isImpala ? "n_nationkey" : "nation_key", CSVColumnWriter.NUMBER)
            .add(isImpala ? "n_name" : "name", CSVColumnWriter.STRING)
            .add(isImpala ? "n_regionkey" : "region_key", CSVColumnWriter.NUMBER)
            .add(isImpala ? "n_comment" : "comment_col", CSVColumnWriter.STRING).build();
    CSVFormat format = CSVFormat.newFormat('|');
    JSONIterator csv = CSVAsJSONIterator.fromResource("test-data/parquet-python/nation.csv", format, headerMap);

    assertThat(parquet, is(sameAs(csv)));
}

From source file:com.datascience.cascading.CsvSchemeTest.java

/**
 * Tests the CSV scheme source with headers.
 *//*from  ww w  .j  ava 2s  .c  om*/
private void testCsvSourceWithHeaders(String inputPath) throws Exception {

    String sinkPath = "src/test/resources/output/source-with-headers";
    String expectedPath = "src/test/resources/expected/with-headers.txt";

    CSVFormat sourceFormat = CSVFormat.newFormat(',').withQuote('"').withHeader("id", "first name", "last name")
            .withSkipHeaderRecord().withEscape('\\').withRecordSeparator('\n');

    CSVFormat sinkFormat = CSVFormat.newFormat('\t').withEscape('\\').withRecordSeparator('\n');

    testScheme(inputPath, sourceFormat, sinkPath, sinkFormat, expectedPath, true);

}

From source file:fr.univ_tours.li.mdjedaini.ideb.io.CsvLogLoader.java

/**
 * /*www  .j  a  v  a 2s. c o m*/
 * @param arg_sessionFilePath
 * @return 
 */
public Session loadSession(String arg_sessionFilePath) {

    Session result = new Session();

    try {

        Reader in = new FileReader(arg_sessionFilePath);
        Iterable<CSVRecord> records = CSVFormat.newFormat(';').withFirstRecordAsHeader().parse(in);

        // each record is a query
        for (CSVRecord record : records) {

            //                System.out.println("I am parsing the line: " + record);

            String cubeName = record.get("cube");
            EAB_Cube cube = this.be.getBenchmarkData().getInternalCubeList().get(cubeName);

            QueryTriplet q_tmp = new QueryTriplet(cube);

            // extract measures
            String currentMeasure = record.get("Measures");

            // only add measure if not empty
            if (!currentMeasure.equals("[]")) {
                Pattern p = Pattern.compile("([a-zA-Z_0-9][a-zA-Z_0-9 ]+)");
                Matcher m = p.matcher(currentMeasure);

                // manage multiple measures
                while (m.find()) {
                    //System.out.println("Current measure: " + currentMeasure + " --- trouve: " + m.groupCount());
                    String measure = m.group(1);
                    //                        System.out.println("Measure: " + measure);    

                    // add the current measure to the current query
                    MeasureFragment mf = new MeasureFragment(q_tmp, measure);

                    if (null == mf.getMeasure()) {
                        int i = 2;
                    }

                    q_tmp.addMeasure(mf);
                }

            }

            // extract GBS
            String currentProjection = record.get("GroupBy");

            // only add projections if not empty
            if (!currentProjection.equals("[]")) {
                Pattern p = Pattern.compile("([a-zA-Z_0-9][a-zA-Z_0-9 ]+)");
                Matcher m = p.matcher(currentProjection);

                // manage multiple group by
                while (m.find()) {
                    //System.out.println("Group " + i + ": " + m.group(i));
                    String level = m.group(1);
                    //                        System.out.println("Level: " + level);

                    EAB_Level l_tmp = cube.getLevelByAtomicName(level);
                    ProjectionFragment pf_tmp = new ProjectionFragment(q_tmp, l_tmp);

                    if (null == pf_tmp.getLevel()) {
                        int i = 2;
                    }

                    q_tmp.addProjection(pf_tmp);
                }

            }

            // extract filters
            String currentSelection = record.get("Filters");
            // only add projections if not empty
            if (!currentSelection.equals("[]")) {
                Pattern p = Pattern.compile("([a-zA-Z_0-9][a-zA-Z_0-9 ]+)=>\\[EQUAL ([a-zA-Z_0-9& ]+)\\]");
                Matcher m = p.matcher(currentSelection);

                // manage multiple occurrences
                while (m.find()) {
                    //                        System.out.println("Current selection: " + currentSelection + " --- trouve: " + m.groupCount());

                    String level = m.group(1);
                    String member = m.group(2);

                    EAB_Level l_tmp = cube.getLevelByAtomicName(level);

                    //                        System.out.println("Cube: " + cubeName);
                    //                        System.out.println("Level: " + level);
                    //                        System.out.println("Member: " + member);

                    if (null == l_tmp) {
                        int i = 2;
                    }

                    String dimName = l_tmp.getHierarchy().getDimension().getMondrianDimension().getName();
                    String hieName = l_tmp.getHierarchy().getName();

                    //hieName.spl

                    SelectionFragment sf_tmp = new SelectionFragment(q_tmp, dimName, hieName, level, member);

                    if (null != sf_tmp.getMemberValue()) {
                        q_tmp.addSelection(sf_tmp);
                    }

                }

            }

            // add the query to the session
            result.addQuery(q_tmp);

            QueryConverter qc = new QueryConverter(this.be);

            try {
                System.out.println("******************");
                System.out.println("Record:" + record);

                QueryMdx q_mdx = qc.toMdx(q_tmp);
                System.out.println("MDX with my converter:");
                System.out.println(q_mdx);
                q_mdx.execute(Boolean.TRUE);
                //                    System.out.println("-----");
                //                    System.out.println("Query: " + q_tmp);
                //                    System.out.println("-----");
                //                    System.out.println("Mdx: " + qc.toMdx(q_tmp));
                //                    System.out.println("******************");
            } catch (Exception arg_e) {
                System.out.println("******************");
                System.out.println("Exception: " + arg_e.getClass().getName());
                System.out.println("Record:" + record);
                //                    System.out.println("-----");
                //                    System.out.println("Query: " + q_tmp);
                //                    System.out.println("-----");
                //qc.toMdx(q_tmp);
                //System.out.println("******************");
                //System.err.println("Exception avec: ");
                //System.err.println("Record: " + record);
            }

        } // end foreach record

    } catch (Exception arg_e) {
        arg_e.printStackTrace();
    }

    //        SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss,SSS");
    //        
    //        // add the name as metadata of the session
    //        result.addMetaData("name", arg_sessionFilePath);
    //        
    //        System.out.println("I am parsing the file: " + arg_sessionFilePath);
    //        
    //        // pattern for extracting cube name
    //        Pattern p = Pattern.compile("from \\[(.*?)\\].*");
    //
    //        File file   = new File(arg_sessionFilePath);
    //        
    //        
    //        try {
    //            //BufferedReader br = new BufferedReader(new FileReader(file));
    //            BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(arg_sessionFilePath), "UTF-8"));
    //            String line = null;
    //            
    //            String currentQuery = "";
    //            
    //            // pour parser une requete, je cherche "select"
    //            // je prends toutes les lignes suivantes, jusqu'a rencontrer une ligne vide...
    //            while ((line = br.readLine()) != null) {
    //
    //                if(line.contains("select")) {
    //                    
    //                    // look for the time before query execution
    //                    String date     = line.substring(0, 23);
    //                    Date d          = sdf.parse(date);
    //                    Long tsBefore   = d.getTime();
    //                    
    //                    // je recupere la position du mot "select" dans la ligne
    //                    Integer position    = line.indexOf("select");
    //                    currentQuery        = line.substring(position, line.length());
    //                    
    //                    String line_tmp = br.readLine();
    //                    while(!line_tmp.equals("")) {
    //                        currentQuery    += System.lineSeparator();
    //                        //currentQuery    += System.lineSeparator();
    //                        currentQuery    += line_tmp;
    //                        line_tmp    = br.readLine();
    //                    }
    //                    
    //                    // extract cubename from the query text
    //                    // Normally, the pattern is always found!
    //                    Matcher m = p.matcher(currentQuery);
    //                    m.find();
    //                    String cubeName = m.group(1);
    //                    
    //                    //System.out.println(currentQuery);
    //                    //System.out.println("cubeName: " + cubeName);
    //                    //System.out.println("-------");
    //                    
    //                    // look for the execution time
    //                    while(!line_tmp.contains("exec:")) {
    //                        line_tmp    = br.readLine();
    //                    }
    //                    
    //                    // here the line contains exec
    //                    // look for the time before query execution
    //                    date            = line_tmp.substring(0, 23);
    //                    d               = sdf.parse(date);
    //                    Long tsAfter    = d.getTime();
    //                    
    //                    Query q_tmp = new QueryMdx(this.be.getInternalCubeByName(cubeName), currentQuery);
    //                    
    //                    result.addQuery(q_tmp, tsBefore, tsAfter);
    //                }
    //                
    //            }
    // 
    //            br.close();
    //        } catch(Exception arg_e) {
    //            arg_e.printStackTrace();
    //        }

    return result;
}

From source file:com.datascience.hadoop.CsvOutputFormat.java

/**
 * Creates a CSV format from a Hadoop configuration.
 *//*from   w  w  w .j  a  v a 2s . c o m*/
private static CSVFormat createFormat(Configuration conf) {
    CSVFormat format = CSVFormat
            .newFormat(conf.get(CSV_WRITER_DELIMITER, DEFAULT_CSV_WRITER_DELIMITER).charAt(0))
            .withSkipHeaderRecord(conf.getBoolean(CSV_WRITER_SKIP_HEADER, DEFAULT_CSV_WRITER_SKIP_HEADER))
            .withRecordSeparator(conf.get(CSV_WRITER_RECORD_SEPARATOR, DEFAULT_CSV_WRITER_RECORD_SEPARATOR))
            .withIgnoreEmptyLines(
                    conf.getBoolean(CSV_WRITER_IGNORE_EMPTY_LINES, DEFAULT_CSV_WRITER_IGNORE_EMPTY_LINES))
            .withIgnoreSurroundingSpaces(conf.getBoolean(CSV_WRITER_IGNORE_SURROUNDING_SPACES,
                    DEFAULT_CSV_WRITER_IGNORE_SURROUNDING_SPACES))
            .withNullString(conf.get(CSV_WRITER_NULL_STRING, DEFAULT_CSV_WRITER_NULL_STRING));

    String[] header = conf.getStrings(CSV_WRITER_COLUMNS);
    if (header != null && header.length > 0)
        format = format.withHeader(header);

    String escape = conf.get(CSV_WRITER_ESCAPE_CHARACTER, DEFAULT_CSV_WRITER_ESCAPE_CHARACTER);
    if (escape != null)
        format = format.withEscape(escape.charAt(0));

    String quote = conf.get(CSV_WRITER_QUOTE_CHARACTER, DEFAULT_CSV_WRITER_QUOTE_CHARACTER);
    if (quote != null)
        format = format.withQuote(quote.charAt(0));

    String quoteMode = conf.get(CSV_WRITER_QUOTE_MODE, DEFAULT_CSV_WRITER_QUOTE_MODE);
    if (quoteMode != null)
        format = format.withQuoteMode(QuoteMode.valueOf(quoteMode));
    return format;
}