Example usage for org.apache.commons.csv CSVFormat DEFAULT

List of usage examples for org.apache.commons.csv CSVFormat DEFAULT

Introduction

In this page you can find the example usage for org.apache.commons.csv CSVFormat DEFAULT.

Prototype

CSVFormat DEFAULT

To view the source code for org.apache.commons.csv CSVFormat DEFAULT.

Click Source Link

Document

Standard comma separated format, as for #RFC4180 but allowing empty lines.

Usage

From source file:com.streamsets.pipeline.lib.parser.delimited.TestDelimitedCharDataParser.java

@Test
public void testParseWithHeaderWithOffset() throws Exception {
    OverrunReader reader = new OverrunReader(new StringReader("A,B\na,b\ne,f"), 1000, true, false);
    DataParser parser = new DelimitedCharDataParser(getContext(), "id", reader, 8, 0, CSVFormat.DEFAULT,
            CsvHeader.WITH_HEADER, -1, CsvRecordType.LIST);
    Assert.assertEquals("8", parser.getOffset());
    Record record = parser.parse();//from   w  w w  .j  av  a 2  s.  co m
    Assert.assertNotNull(record);
    Assert.assertEquals("id::8", record.getHeader().getSourceId());
    Assert.assertEquals("e",
            record.get().getValueAsList().get(0).getValueAsMap().get("value").getValueAsString());
    Assert.assertEquals("A",
            record.get().getValueAsList().get(0).getValueAsMap().get("header").getValueAsString());
    Assert.assertEquals("f",
            record.get().getValueAsList().get(1).getValueAsMap().get("value").getValueAsString());
    Assert.assertEquals("B",
            record.get().getValueAsList().get(1).getValueAsMap().get("header").getValueAsString());
    Assert.assertEquals("11", parser.getOffset());
    record = parser.parse();
    Assert.assertNull(record);
    Assert.assertEquals("-1", parser.getOffset());
    parser.close();
}

From source file:com.datascience.cascading.scheme.CsvScheme.java

/**
 * Creates a new CSV scheme with the given source and sink {@link cascading.tuple.Fields}.
 * <p>/*from  w  ww.ja va 2 s .  c o  m*/
 * Strict mode is enabled when using this constructor.
 * <p>
 * The CSV input/output encoding set defaults to {@code UTF-8}
 *
 * @param fields The source and sink fields.
 * @see com.datascience.cascading.scheme.CsvScheme
 */
public CsvScheme(Fields fields) {
    this(fields, fields, CSVFormat.DEFAULT, StandardCharsets.UTF_8, true);
}

From source file:io.ecarf.core.cloud.task.processor.reason.phase1.DoReasonTask5.java

/**
 * /* w  ww .  ja  v a2  s.  com*/
 * @param term
 * @param select
 * @param schemaTriples
 * @param rows
 * @param table
 * @param writer
 * @return
 * @throws IOException
 */
private int inferAndSaveTriplesToFile(Term term, List<String> select, Set<Triple> schemaTriples,
        BigInteger rows, String table, PrintWriter writer) throws IOException {

    int inferredTriples = 0;
    int failedTriples = 0;

    // loop through the instance triples probably stored in a file and generate all the triples matching the schema triples set
    try (BufferedReader r = new BufferedReader(new FileReader(term.getFilename()), Constants.GZIP_BUF_SIZE)) {

        Iterable<CSVRecord> records = CSVFormat.DEFAULT.parse(r);

        // records will contain lots of duplicates
        Set<String> inferredAlready = new HashSet<String>();

        try {

            for (CSVRecord record : records) {

                String values = ((select.size() == 1) ? record.get(0) : StringUtils.join(record.values(), ','));

                if (!inferredAlready.contains(values)) {
                    inferredAlready.add(values);

                    NTriple instanceTriple = new NTriple();

                    if (select.size() == 1) {
                        instanceTriple.set(select.get(0), record.get(0));
                    } else {

                        instanceTriple.set(select, record.values());
                    }

                    for (Triple schemaTriple : schemaTriples) {
                        Rule rule = GenericRule.getRule(schemaTriple);
                        Triple inferredTriple = rule.head(schemaTriple, instanceTriple);

                        if (inferredTriple != null) {
                            writer.println(inferredTriple.toCsv());
                            inferredTriples++;
                        }
                    }

                    // this is just to avoid any memory issues
                    if (inferredAlready.size() > MAX_CACHE) {
                        inferredAlready.clear();
                        log.info("Cleared cache of inferred terms");
                    }
                } else {
                    this.duplicates++;
                }

            }
        } catch (Exception e) {
            log.error("Failed to parse selected terms", e);
            failedTriples++;
        }
    }

    //inferredFiles.add(inferredTriplesFile);
    log.info("\nSelect Triples: " + rows + ", Inferred: " + inferredTriples + ", Triples for term: " + term
            + ", Failed Triples: " + failedTriples);

    return inferredTriples;
}

From source file:io.ecarf.core.cloud.task.processor.reason.phase2.DoReasonTask7.java

/**
 * /*  www .  ja va  2s  .  co  m*/
 * @param term
 * @param select
 * @param schemaTriples
 * @param rows
 * @param table
 * @param writer
 * @return
 * @throws IOException
 */
protected int inferAndSaveTriplesToFile(QueryResult queryResult, Set<Long> productiveTerms, String table,
        PrintWriter writer) throws IOException {

    //Term term, List<String> select, Set<Triple> schemaTriples
    log.info("********************** Starting Inference Round **********************");

    int inferredTriples = 0;
    //int failedTriples = 0;

    // loop through the instance triples probably stored in a file and generate all the triples matching the schema triples set
    try (BufferedReader r = new BufferedReader(new FileReader(queryResult.getFilename()),
            Constants.GZIP_BUF_SIZE)) {

        Iterable<CSVRecord> records = CSVFormat.DEFAULT.parse(r);

        // records will contain lots of duplicates
        //Set<String> inferredAlready = new HashSet<String>();

        try {

            Long term;

            for (CSVRecord record : records) {

                //String values = ((select.size() == 1) ? record.get(0): StringUtils.join(record.values(), ','));

                //if(!inferredAlready.contains(values)) {
                //inferredAlready.add(values);

                /*ETriple instanceTriple = new ETriple();
                instanceTriple.setSubject(record.get(0));
                instanceTriple.setPredicate(record.get(1));
                instanceTriple.setObject(record.get(2));*/

                ETriple instanceTriple = ETriple.fromCSV(record.values());

                // TODO review for OWL ruleset
                if (SchemaURIType.RDF_TYPE.id == instanceTriple.getPredicate()) {

                    term = instanceTriple.getObject(); // object

                } else {

                    term = instanceTriple.getPredicate(); // predicate
                }

                Set<Triple> schemaTriples = schemaTerms.get(term);

                if ((schemaTriples != null) && !schemaTriples.isEmpty()) {
                    productiveTerms.add(term);

                    for (Triple schemaTriple : schemaTriples) {
                        Rule rule = GenericRule.getRule(schemaTriple);
                        Triple inferredTriple = rule.head(schemaTriple, instanceTriple);

                        if (inferredTriple != null) {
                            writer.println(inferredTriple.toCsv());
                            inferredTriples++;
                        }
                    }
                }

                // this is just to avoid any memory issues
                //if(inferredAlready.size() > MAX_CACHE) {
                //   inferredAlready.clear();
                //   log.info("Cleared cache of inferred terms");
                //}
                //} else {
                //this.duplicates++;
                //}

            }
        } catch (Exception e) {
            log.error("Failed to parse selected terms", e);
            throw new IOException(e);
            //failedTriples++;
        }
    }

    log.info("Total Rows: " + queryResult.getStats().getTotalRows() + ", Total Processed Bytes: "
            + queryResult.getStats().getTotalProcessedGBytes() + " GB" + ", Inferred: " + inferredTriples);

    log.info("********************** Completed Inference Round **********************");

    return inferredTriples;
}

From source file:net.sourceforge.ganttproject.io.GanttCSVOpen.java

/**
 * Create tasks from file./*from   ww  w .jav a2  s. c om*/
 *
 * @throws IOException
 *           on parse error or input read-failure
 */
public boolean load() throws IOException {
    CSVParser parser = new CSVParser(myInputSupplier.get(),
            CSVFormat.DEFAULT.withEmptyLinesIgnored(false).withSurroundingSpacesIgnored(true));
    int numGroup = 0;
    RecordGroup currentGroup = null;
    boolean searchHeader = true;
    List<CSVRecord> records = parser.getRecords();
    for (CSVRecord record : records) {
        if (record.size() == 0) {
            // If line is empty then current record group is probably finished.
            // Let's search for the next group header.
            searchHeader = true;
            continue;
        }
        if (searchHeader) {
            // Record is not empty and we're searching for header.
            if (numGroup < myRecordGroups.size() && myRecordGroups.get(numGroup).isHeader(record)) {
                // If next group acknowledges the header, then we give it the turn,
                // otherwise it was just an empty line in the current group
                searchHeader = false;
                currentGroup = myRecordGroups.get(numGroup);
                parser.readHeader(record);
                currentGroup.setHeader(Lists.newArrayList(record.iterator()));
                numGroup++;
                continue;
            }
            searchHeader = false;
        }
        assert currentGroup != null;
        currentGroup.process(record);
    }
    for (RecordGroup group : myRecordGroups) {
        group.postProcess();
    }
    // Succeeded
    return true;
}

From source file:io.ecarf.core.cloud.task.processor.reason.phase2.DoReasonTask6.java

/**
 * /* w w w. ja v a2 s .  c o  m*/
 * @param term
 * @param select
 * @param schemaTriples
 * @param rows
 * @param table
 * @param writer
 * @return
 * @throws IOException
 */
protected int inferAndSaveTriplesToFile(QueryResult queryResult, Set<String> productiveTerms, String table,
        PrintWriter writer) throws IOException {

    //Term term, List<String> select, Set<Triple> schemaTriples
    log.info("********************** Starting Inference Round **********************");

    int inferredTriples = 0;
    //int failedTriples = 0;

    // loop through the instance triples probably stored in a file and generate all the triples matching the schema triples set
    try (BufferedReader r = new BufferedReader(new FileReader(queryResult.getFilename()),
            Constants.GZIP_BUF_SIZE)) {

        Iterable<CSVRecord> records = CSVFormat.DEFAULT.parse(r);

        // records will contain lots of duplicates
        //Set<String> inferredAlready = new HashSet<String>();

        try {

            String term;

            for (CSVRecord record : records) {

                //String values = ((select.size() == 1) ? record.get(0): StringUtils.join(record.values(), ','));

                //if(!inferredAlready.contains(values)) {
                //inferredAlready.add(values);

                NTriple instanceTriple = (NTriple) NTriple.fromCSV(record.values());//new NTriple();

                /*instanceTriple.setSubject(record.get(0));
                instanceTriple.setPredicate(record.get(1));
                instanceTriple.setObject(record.get(2));*/

                // TODO review for OWL ruleset
                if (SchemaURIType.RDF_TYPE.getUri().equals(instanceTriple.getPredicate())) {
                    term = instanceTriple.getObject(); // object
                } else {
                    term = instanceTriple.getPredicate(); // predicate
                }

                Set<Triple> schemaTriples = schemaTerms.get(term);

                if ((schemaTriples != null) && !schemaTriples.isEmpty()) {
                    productiveTerms.add(term);

                    for (Triple schemaTriple : schemaTriples) {
                        Rule rule = GenericRule.getRule(schemaTriple);
                        Triple inferredTriple = rule.head(schemaTriple, instanceTriple);

                        if (inferredTriple != null) {
                            writer.println(inferredTriple.toCsv());
                            inferredTriples++;
                        }
                    }
                }

                // this is just to avoid any memory issues
                //if(inferredAlready.size() > MAX_CACHE) {
                //   inferredAlready.clear();
                //   log.info("Cleared cache of inferred terms");
                //}
                //} else {
                //this.duplicates++;
                //}

            }
        } catch (Exception e) {
            log.error("Failed to parse selected terms", e);
            throw new IOException(e);
            //failedTriples++;
        }
    }

    log.info("Total Rows: " + queryResult.getStats().getTotalRows() + ", Total Processed Bytes: "
            + queryResult.getStats().getTotalProcessedGBytes() + " GB" + ", Inferred: " + inferredTriples);

    log.info("********************** Completed Inference Round **********************");

    return inferredTriples;
}

From source file:com.datascience.cascading.scheme.CsvScheme.java

/**
 * Creates a new CSV scheme with the given source and sink {@link cascading.tuple.Fields}.
 * <p>/*from w  ww.  j  a  v a 2 s  .c om*/
 * Strict mode is enabled when using this constructor.
 *
 * @param fields  The source and sink fields.
 * @param charset The character set with which to read and write CSV files.
 * @see com.datascience.cascading.scheme.CsvScheme
 */
public CsvScheme(Fields fields, Charset charset) {
    this(fields, fields, CSVFormat.DEFAULT, charset, true);
}

From source file:com.streamsets.pipeline.lib.parser.delimited.TestDelimitedCharDataParser.java

@Test(expected = IOException.class)
public void testClose() throws Exception {
    OverrunReader reader = new OverrunReader(new StringReader("A,B\na,b"), 1000, true, false);
    DataParser parser = new DelimitedCharDataParser(getContext(), "id", reader, 0, 0, CSVFormat.DEFAULT,
            CsvHeader.IGNORE_HEADER, -1, CsvRecordType.LIST);
    parser.close();//from   w w w  .j  ava  2  s.co m
    parser.parse();
}

From source file:it.newfammulfin.api.EntryResource.java

@POST
@Consumes("text/csv")
@Produces(MediaType.TEXT_PLAIN)//from  w w  w  .  ja v a 2s .c  o m
public Response importFromCsv(String csvData,
        @DefaultValue("false") @QueryParam("invertSign") final boolean invertSign) {
    final Group group = (Group) requestContext.getProperty(GroupRetrieverRequestFilter.GROUP);
    final Map<String, Key<Chapter>> chapterStringsMap = new HashMap<>();
    final List<CSVRecord> records;
    try {
        records = CSVParser.parse(csvData, CSVFormat.DEFAULT.withHeader()).getRecords();
    } catch (IOException e) {
        return Response.status(Response.Status.INTERNAL_SERVER_ERROR)
                .entity(String.format("Unexpected %s: %s.", e.getClass().getSimpleName(), e.getMessage()))
                .build();
    }
    //check users
    final Set<String> userIds = new HashSet<>();
    for (String columnName : records.get(0).toMap().keySet()) {
        if (columnName.startsWith("by:")) {
            String userId = columnName.replaceFirst("by:", "");
            if (!group.getUsersMap().keySet().contains(Key.create(RegisteredUser.class, userId))) {
                return Response.status(Response.Status.INTERNAL_SERVER_ERROR)
                        .entity(String.format("User %s not found in this group.", userId)).build();
            }
            userIds.add(userId);
        }
    }
    //build chapters
    final Set<String> chapterStringsSet = new HashSet<>();
    for (CSVRecord record : records) {
        chapterStringsSet.add(record.get("chapters"));
    }
    final List<Key<?>> createdKeys = new ArrayList<>();
    try {
        OfyService.ofy().transact(new Work<List<Key<?>>>() {
            @Override
            public List<Key<?>> run() {
                for (String chapterStrings : chapterStringsSet) {
                    List<String> pieces = Arrays.asList(chapterStrings.split(CSV_CHAPTERS_SEPARATOR));
                    Key<Chapter> parentChapterKey = null;
                    for (int i = 0; i < pieces.size(); i++) {
                        String partialChapterString = Joiner.on(CSV_CHAPTERS_SEPARATOR)
                                .join(pieces.subList(0, i + 1));
                        Key<Chapter> chapterKey = chapterStringsMap.get(partialChapterString);
                        if (chapterKey == null) {
                            chapterKey = OfyService.ofy().load().type(Chapter.class).ancestor(group)
                                    .filter("name", pieces.get(i)).filter("parentChapterKey", parentChapterKey)
                                    .keys().first().now();
                            chapterStringsMap.put(partialChapterString, chapterKey);
                        }
                        if (chapterKey == null) {
                            Chapter chapter = new Chapter(pieces.get(i), Key.create(group), parentChapterKey);
                            OfyService.ofy().save().entity(chapter).now();
                            chapterKey = Key.create(chapter);
                            createdKeys.add(chapterKey);
                            LOG.info(String.format("%s created.", chapter));
                        }
                        chapterStringsMap.put(partialChapterString, chapterKey);
                        parentChapterKey = chapterKey;
                    }
                }
                //build entries
                DateTimeFormatter formatter = DateTimeFormat.forPattern("dd/MM/YY");
                Key<Group> groupKey = Key.create(group);
                for (CSVRecord record : records) {
                    Entry entry = new Entry();
                    entry.setGroupKey(groupKey);
                    entry.setDate(LocalDate.parse(record.get("date"), formatter));
                    entry.setAmount(Money.of(CurrencyUnit.of(record.get("currency").toUpperCase()),
                            (invertSign ? -1 : 1) * Double.parseDouble(record.get("value"))));
                    if (!record.get("chapters").isEmpty()) {
                        entry.setChapterKey(chapterStringsMap.get(record.get("chapters")));
                    }
                    entry.setPayee(record.get("payee"));
                    for (String tag : record.get("tags").split(CSV_TAGS_SEPARATOR)) {
                        if (!tag.trim().isEmpty()) {
                            entry.getTags().add(tag);
                        }
                    }
                    entry.setDescription(record.get("description"));
                    entry.setNote(record.get("notes"));
                    int scale = Math.max(DEFAULT_SHARE_SCALE, entry.getAmount().getScale());
                    //by shares
                    for (String userId : userIds) {
                        String share = record.get("by:" + userId);
                        double value;
                        if (share.contains("%")) {
                            entry.setByPercentage(true);
                            value = Double.parseDouble(share.replace("%", ""));
                            value = entry.getAmount().getAmount().doubleValue() * value / 100d;
                        } else {
                            value = (invertSign ? -1 : 1) * Double.parseDouble(share);
                        }
                        entry.getByShares().put(Key.create(RegisteredUser.class, userId),
                                BigDecimal.valueOf(value).setScale(scale, RoundingMode.DOWN));
                    }
                    boolean equalByShares = checkAndBalanceZeroShares(entry.getByShares(),
                            entry.getAmount().getAmount());
                    entry.setByPercentage(entry.isByPercentage() || equalByShares);
                    //for shares
                    for (String userId : userIds) {
                        String share = record.get("for:" + userId);
                        double value;
                        if (share.contains("%")) {
                            entry.setForPercentage(true);
                            value = Double.parseDouble(share.replace("%", ""));
                            value = entry.getAmount().getAmount().doubleValue() * value / 100d;
                        } else {
                            value = (invertSign ? -1 : 1) * Double.parseDouble(share);
                        }
                        entry.getForShares().put(Key.create(RegisteredUser.class, userId),
                                BigDecimal.valueOf(value).setScale(scale, RoundingMode.DOWN));
                    }
                    boolean equalForShares = checkAndBalanceZeroShares(entry.getForShares(),
                            entry.getAmount().getAmount());
                    entry.setForPercentage(entry.isForPercentage() || equalForShares);
                    OfyService.ofy().save().entity(entry).now();
                    createdKeys.add(Key.create(entry));
                    EntryOperation operation = new EntryOperation(Key.create(group), Key.create(entry),
                            new Date(),
                            Key.create(RegisteredUser.class, securityContext.getUserPrincipal().getName()),
                            EntryOperation.Type.IMPORT);
                    OfyService.ofy().save().entity(operation).now();
                    LOG.info(String.format("%s created.", entry));
                }
                return createdKeys;
            }
        });
        //count keys
        int numberOfCreatedChapters = 0;
        int numberOfCreatedEntries = 0;
        for (Key<?> key : createdKeys) {
            if (key.getKind().equals(Entry.class.getSimpleName())) {
                numberOfCreatedEntries = numberOfCreatedEntries + 1;
            } else if (key.getKind().equals(Chapter.class.getSimpleName())) {
                numberOfCreatedChapters = numberOfCreatedChapters + 1;
            }
        }
        return Response.ok(String.format("Done: %d chapters and %d entries created.", numberOfCreatedChapters,
                numberOfCreatedEntries)).build();
    } catch (RuntimeException e) {
        LOG.warning(String.format("Unexpected %s: %s.", e.getClass().getSimpleName(), e.getMessage()));
        return Response.status(Response.Status.INTERNAL_SERVER_ERROR)
                .entity(String.format("Unexpected %s: %s.", e.getClass().getSimpleName(), e.getMessage()))
                .build();
    }
}

From source file:com.datascience.cascading.scheme.CsvScheme.java

/**
 * Creates a new CSV scheme with the given source and sink {@link cascading.tuple.Fields}.
 * <p>//w  w w. j a  va  2  s  .  c  o m
 * The CSV input/output encoding set defaults to {@code UTF-8}
 *
 * @param fields The source and sink fields.
 * @param strict Indicates whether to parse records in strict parsing mode. When strict mode is disabled, single record
 *               parse errors will be caught and logged.
 * @see com.datascience.cascading.scheme.CsvScheme
 */
public CsvScheme(Fields fields, boolean strict) {
    this(fields, fields, CSVFormat.DEFAULT, StandardCharsets.UTF_8, strict);
}