List of usage examples for org.apache.commons.csv CSVFormat DEFAULT
CSVFormat DEFAULT
To view the source code for org.apache.commons.csv CSVFormat DEFAULT.
Click Source Link
From source file:com.streamsets.pipeline.lib.parser.delimited.TestDelimitedCharDataParser.java
@Test public void testParseWithHeaderWithOffset() throws Exception { OverrunReader reader = new OverrunReader(new StringReader("A,B\na,b\ne,f"), 1000, true, false); DataParser parser = new DelimitedCharDataParser(getContext(), "id", reader, 8, 0, CSVFormat.DEFAULT, CsvHeader.WITH_HEADER, -1, CsvRecordType.LIST); Assert.assertEquals("8", parser.getOffset()); Record record = parser.parse();//from w w w .j av a 2 s. co m Assert.assertNotNull(record); Assert.assertEquals("id::8", record.getHeader().getSourceId()); Assert.assertEquals("e", record.get().getValueAsList().get(0).getValueAsMap().get("value").getValueAsString()); Assert.assertEquals("A", record.get().getValueAsList().get(0).getValueAsMap().get("header").getValueAsString()); Assert.assertEquals("f", record.get().getValueAsList().get(1).getValueAsMap().get("value").getValueAsString()); Assert.assertEquals("B", record.get().getValueAsList().get(1).getValueAsMap().get("header").getValueAsString()); Assert.assertEquals("11", parser.getOffset()); record = parser.parse(); Assert.assertNull(record); Assert.assertEquals("-1", parser.getOffset()); parser.close(); }
From source file:com.datascience.cascading.scheme.CsvScheme.java
/** * Creates a new CSV scheme with the given source and sink {@link cascading.tuple.Fields}. * <p>/*from w ww.ja va 2 s . c o m*/ * Strict mode is enabled when using this constructor. * <p> * The CSV input/output encoding set defaults to {@code UTF-8} * * @param fields The source and sink fields. * @see com.datascience.cascading.scheme.CsvScheme */ public CsvScheme(Fields fields) { this(fields, fields, CSVFormat.DEFAULT, StandardCharsets.UTF_8, true); }
From source file:io.ecarf.core.cloud.task.processor.reason.phase1.DoReasonTask5.java
/** * /* w ww . ja v a2 s. com*/ * @param term * @param select * @param schemaTriples * @param rows * @param table * @param writer * @return * @throws IOException */ private int inferAndSaveTriplesToFile(Term term, List<String> select, Set<Triple> schemaTriples, BigInteger rows, String table, PrintWriter writer) throws IOException { int inferredTriples = 0; int failedTriples = 0; // loop through the instance triples probably stored in a file and generate all the triples matching the schema triples set try (BufferedReader r = new BufferedReader(new FileReader(term.getFilename()), Constants.GZIP_BUF_SIZE)) { Iterable<CSVRecord> records = CSVFormat.DEFAULT.parse(r); // records will contain lots of duplicates Set<String> inferredAlready = new HashSet<String>(); try { for (CSVRecord record : records) { String values = ((select.size() == 1) ? record.get(0) : StringUtils.join(record.values(), ',')); if (!inferredAlready.contains(values)) { inferredAlready.add(values); NTriple instanceTriple = new NTriple(); if (select.size() == 1) { instanceTriple.set(select.get(0), record.get(0)); } else { instanceTriple.set(select, record.values()); } for (Triple schemaTriple : schemaTriples) { Rule rule = GenericRule.getRule(schemaTriple); Triple inferredTriple = rule.head(schemaTriple, instanceTriple); if (inferredTriple != null) { writer.println(inferredTriple.toCsv()); inferredTriples++; } } // this is just to avoid any memory issues if (inferredAlready.size() > MAX_CACHE) { inferredAlready.clear(); log.info("Cleared cache of inferred terms"); } } else { this.duplicates++; } } } catch (Exception e) { log.error("Failed to parse selected terms", e); failedTriples++; } } //inferredFiles.add(inferredTriplesFile); log.info("\nSelect Triples: " + rows + ", Inferred: " + inferredTriples + ", Triples for term: " + term + ", Failed Triples: " + failedTriples); return inferredTriples; }
From source file:io.ecarf.core.cloud.task.processor.reason.phase2.DoReasonTask7.java
/** * /* www . ja va 2s . co m*/ * @param term * @param select * @param schemaTriples * @param rows * @param table * @param writer * @return * @throws IOException */ protected int inferAndSaveTriplesToFile(QueryResult queryResult, Set<Long> productiveTerms, String table, PrintWriter writer) throws IOException { //Term term, List<String> select, Set<Triple> schemaTriples log.info("********************** Starting Inference Round **********************"); int inferredTriples = 0; //int failedTriples = 0; // loop through the instance triples probably stored in a file and generate all the triples matching the schema triples set try (BufferedReader r = new BufferedReader(new FileReader(queryResult.getFilename()), Constants.GZIP_BUF_SIZE)) { Iterable<CSVRecord> records = CSVFormat.DEFAULT.parse(r); // records will contain lots of duplicates //Set<String> inferredAlready = new HashSet<String>(); try { Long term; for (CSVRecord record : records) { //String values = ((select.size() == 1) ? record.get(0): StringUtils.join(record.values(), ',')); //if(!inferredAlready.contains(values)) { //inferredAlready.add(values); /*ETriple instanceTriple = new ETriple(); instanceTriple.setSubject(record.get(0)); instanceTriple.setPredicate(record.get(1)); instanceTriple.setObject(record.get(2));*/ ETriple instanceTriple = ETriple.fromCSV(record.values()); // TODO review for OWL ruleset if (SchemaURIType.RDF_TYPE.id == instanceTriple.getPredicate()) { term = instanceTriple.getObject(); // object } else { term = instanceTriple.getPredicate(); // predicate } Set<Triple> schemaTriples = schemaTerms.get(term); if ((schemaTriples != null) && !schemaTriples.isEmpty()) { productiveTerms.add(term); for (Triple schemaTriple : schemaTriples) { Rule rule = GenericRule.getRule(schemaTriple); Triple inferredTriple = rule.head(schemaTriple, instanceTriple); if (inferredTriple != null) { writer.println(inferredTriple.toCsv()); inferredTriples++; } } } // this is just to avoid any memory issues //if(inferredAlready.size() > MAX_CACHE) { // inferredAlready.clear(); // log.info("Cleared cache of inferred terms"); //} //} else { //this.duplicates++; //} } } catch (Exception e) { log.error("Failed to parse selected terms", e); throw new IOException(e); //failedTriples++; } } log.info("Total Rows: " + queryResult.getStats().getTotalRows() + ", Total Processed Bytes: " + queryResult.getStats().getTotalProcessedGBytes() + " GB" + ", Inferred: " + inferredTriples); log.info("********************** Completed Inference Round **********************"); return inferredTriples; }
From source file:net.sourceforge.ganttproject.io.GanttCSVOpen.java
/** * Create tasks from file./*from ww w .jav a2 s. c om*/ * * @throws IOException * on parse error or input read-failure */ public boolean load() throws IOException { CSVParser parser = new CSVParser(myInputSupplier.get(), CSVFormat.DEFAULT.withEmptyLinesIgnored(false).withSurroundingSpacesIgnored(true)); int numGroup = 0; RecordGroup currentGroup = null; boolean searchHeader = true; List<CSVRecord> records = parser.getRecords(); for (CSVRecord record : records) { if (record.size() == 0) { // If line is empty then current record group is probably finished. // Let's search for the next group header. searchHeader = true; continue; } if (searchHeader) { // Record is not empty and we're searching for header. if (numGroup < myRecordGroups.size() && myRecordGroups.get(numGroup).isHeader(record)) { // If next group acknowledges the header, then we give it the turn, // otherwise it was just an empty line in the current group searchHeader = false; currentGroup = myRecordGroups.get(numGroup); parser.readHeader(record); currentGroup.setHeader(Lists.newArrayList(record.iterator())); numGroup++; continue; } searchHeader = false; } assert currentGroup != null; currentGroup.process(record); } for (RecordGroup group : myRecordGroups) { group.postProcess(); } // Succeeded return true; }
From source file:io.ecarf.core.cloud.task.processor.reason.phase2.DoReasonTask6.java
/** * /* w w w. ja v a2 s . c o m*/ * @param term * @param select * @param schemaTriples * @param rows * @param table * @param writer * @return * @throws IOException */ protected int inferAndSaveTriplesToFile(QueryResult queryResult, Set<String> productiveTerms, String table, PrintWriter writer) throws IOException { //Term term, List<String> select, Set<Triple> schemaTriples log.info("********************** Starting Inference Round **********************"); int inferredTriples = 0; //int failedTriples = 0; // loop through the instance triples probably stored in a file and generate all the triples matching the schema triples set try (BufferedReader r = new BufferedReader(new FileReader(queryResult.getFilename()), Constants.GZIP_BUF_SIZE)) { Iterable<CSVRecord> records = CSVFormat.DEFAULT.parse(r); // records will contain lots of duplicates //Set<String> inferredAlready = new HashSet<String>(); try { String term; for (CSVRecord record : records) { //String values = ((select.size() == 1) ? record.get(0): StringUtils.join(record.values(), ',')); //if(!inferredAlready.contains(values)) { //inferredAlready.add(values); NTriple instanceTriple = (NTriple) NTriple.fromCSV(record.values());//new NTriple(); /*instanceTriple.setSubject(record.get(0)); instanceTriple.setPredicate(record.get(1)); instanceTriple.setObject(record.get(2));*/ // TODO review for OWL ruleset if (SchemaURIType.RDF_TYPE.getUri().equals(instanceTriple.getPredicate())) { term = instanceTriple.getObject(); // object } else { term = instanceTriple.getPredicate(); // predicate } Set<Triple> schemaTriples = schemaTerms.get(term); if ((schemaTriples != null) && !schemaTriples.isEmpty()) { productiveTerms.add(term); for (Triple schemaTriple : schemaTriples) { Rule rule = GenericRule.getRule(schemaTriple); Triple inferredTriple = rule.head(schemaTriple, instanceTriple); if (inferredTriple != null) { writer.println(inferredTriple.toCsv()); inferredTriples++; } } } // this is just to avoid any memory issues //if(inferredAlready.size() > MAX_CACHE) { // inferredAlready.clear(); // log.info("Cleared cache of inferred terms"); //} //} else { //this.duplicates++; //} } } catch (Exception e) { log.error("Failed to parse selected terms", e); throw new IOException(e); //failedTriples++; } } log.info("Total Rows: " + queryResult.getStats().getTotalRows() + ", Total Processed Bytes: " + queryResult.getStats().getTotalProcessedGBytes() + " GB" + ", Inferred: " + inferredTriples); log.info("********************** Completed Inference Round **********************"); return inferredTriples; }
From source file:com.datascience.cascading.scheme.CsvScheme.java
/** * Creates a new CSV scheme with the given source and sink {@link cascading.tuple.Fields}. * <p>/*from w ww. j a v a 2 s .c om*/ * Strict mode is enabled when using this constructor. * * @param fields The source and sink fields. * @param charset The character set with which to read and write CSV files. * @see com.datascience.cascading.scheme.CsvScheme */ public CsvScheme(Fields fields, Charset charset) { this(fields, fields, CSVFormat.DEFAULT, charset, true); }
From source file:com.streamsets.pipeline.lib.parser.delimited.TestDelimitedCharDataParser.java
@Test(expected = IOException.class) public void testClose() throws Exception { OverrunReader reader = new OverrunReader(new StringReader("A,B\na,b"), 1000, true, false); DataParser parser = new DelimitedCharDataParser(getContext(), "id", reader, 0, 0, CSVFormat.DEFAULT, CsvHeader.IGNORE_HEADER, -1, CsvRecordType.LIST); parser.close();//from w w w .j ava 2 s.co m parser.parse(); }
From source file:it.newfammulfin.api.EntryResource.java
@POST @Consumes("text/csv") @Produces(MediaType.TEXT_PLAIN)//from w w w . ja v a 2s .c o m public Response importFromCsv(String csvData, @DefaultValue("false") @QueryParam("invertSign") final boolean invertSign) { final Group group = (Group) requestContext.getProperty(GroupRetrieverRequestFilter.GROUP); final Map<String, Key<Chapter>> chapterStringsMap = new HashMap<>(); final List<CSVRecord> records; try { records = CSVParser.parse(csvData, CSVFormat.DEFAULT.withHeader()).getRecords(); } catch (IOException e) { return Response.status(Response.Status.INTERNAL_SERVER_ERROR) .entity(String.format("Unexpected %s: %s.", e.getClass().getSimpleName(), e.getMessage())) .build(); } //check users final Set<String> userIds = new HashSet<>(); for (String columnName : records.get(0).toMap().keySet()) { if (columnName.startsWith("by:")) { String userId = columnName.replaceFirst("by:", ""); if (!group.getUsersMap().keySet().contains(Key.create(RegisteredUser.class, userId))) { return Response.status(Response.Status.INTERNAL_SERVER_ERROR) .entity(String.format("User %s not found in this group.", userId)).build(); } userIds.add(userId); } } //build chapters final Set<String> chapterStringsSet = new HashSet<>(); for (CSVRecord record : records) { chapterStringsSet.add(record.get("chapters")); } final List<Key<?>> createdKeys = new ArrayList<>(); try { OfyService.ofy().transact(new Work<List<Key<?>>>() { @Override public List<Key<?>> run() { for (String chapterStrings : chapterStringsSet) { List<String> pieces = Arrays.asList(chapterStrings.split(CSV_CHAPTERS_SEPARATOR)); Key<Chapter> parentChapterKey = null; for (int i = 0; i < pieces.size(); i++) { String partialChapterString = Joiner.on(CSV_CHAPTERS_SEPARATOR) .join(pieces.subList(0, i + 1)); Key<Chapter> chapterKey = chapterStringsMap.get(partialChapterString); if (chapterKey == null) { chapterKey = OfyService.ofy().load().type(Chapter.class).ancestor(group) .filter("name", pieces.get(i)).filter("parentChapterKey", parentChapterKey) .keys().first().now(); chapterStringsMap.put(partialChapterString, chapterKey); } if (chapterKey == null) { Chapter chapter = new Chapter(pieces.get(i), Key.create(group), parentChapterKey); OfyService.ofy().save().entity(chapter).now(); chapterKey = Key.create(chapter); createdKeys.add(chapterKey); LOG.info(String.format("%s created.", chapter)); } chapterStringsMap.put(partialChapterString, chapterKey); parentChapterKey = chapterKey; } } //build entries DateTimeFormatter formatter = DateTimeFormat.forPattern("dd/MM/YY"); Key<Group> groupKey = Key.create(group); for (CSVRecord record : records) { Entry entry = new Entry(); entry.setGroupKey(groupKey); entry.setDate(LocalDate.parse(record.get("date"), formatter)); entry.setAmount(Money.of(CurrencyUnit.of(record.get("currency").toUpperCase()), (invertSign ? -1 : 1) * Double.parseDouble(record.get("value")))); if (!record.get("chapters").isEmpty()) { entry.setChapterKey(chapterStringsMap.get(record.get("chapters"))); } entry.setPayee(record.get("payee")); for (String tag : record.get("tags").split(CSV_TAGS_SEPARATOR)) { if (!tag.trim().isEmpty()) { entry.getTags().add(tag); } } entry.setDescription(record.get("description")); entry.setNote(record.get("notes")); int scale = Math.max(DEFAULT_SHARE_SCALE, entry.getAmount().getScale()); //by shares for (String userId : userIds) { String share = record.get("by:" + userId); double value; if (share.contains("%")) { entry.setByPercentage(true); value = Double.parseDouble(share.replace("%", "")); value = entry.getAmount().getAmount().doubleValue() * value / 100d; } else { value = (invertSign ? -1 : 1) * Double.parseDouble(share); } entry.getByShares().put(Key.create(RegisteredUser.class, userId), BigDecimal.valueOf(value).setScale(scale, RoundingMode.DOWN)); } boolean equalByShares = checkAndBalanceZeroShares(entry.getByShares(), entry.getAmount().getAmount()); entry.setByPercentage(entry.isByPercentage() || equalByShares); //for shares for (String userId : userIds) { String share = record.get("for:" + userId); double value; if (share.contains("%")) { entry.setForPercentage(true); value = Double.parseDouble(share.replace("%", "")); value = entry.getAmount().getAmount().doubleValue() * value / 100d; } else { value = (invertSign ? -1 : 1) * Double.parseDouble(share); } entry.getForShares().put(Key.create(RegisteredUser.class, userId), BigDecimal.valueOf(value).setScale(scale, RoundingMode.DOWN)); } boolean equalForShares = checkAndBalanceZeroShares(entry.getForShares(), entry.getAmount().getAmount()); entry.setForPercentage(entry.isForPercentage() || equalForShares); OfyService.ofy().save().entity(entry).now(); createdKeys.add(Key.create(entry)); EntryOperation operation = new EntryOperation(Key.create(group), Key.create(entry), new Date(), Key.create(RegisteredUser.class, securityContext.getUserPrincipal().getName()), EntryOperation.Type.IMPORT); OfyService.ofy().save().entity(operation).now(); LOG.info(String.format("%s created.", entry)); } return createdKeys; } }); //count keys int numberOfCreatedChapters = 0; int numberOfCreatedEntries = 0; for (Key<?> key : createdKeys) { if (key.getKind().equals(Entry.class.getSimpleName())) { numberOfCreatedEntries = numberOfCreatedEntries + 1; } else if (key.getKind().equals(Chapter.class.getSimpleName())) { numberOfCreatedChapters = numberOfCreatedChapters + 1; } } return Response.ok(String.format("Done: %d chapters and %d entries created.", numberOfCreatedChapters, numberOfCreatedEntries)).build(); } catch (RuntimeException e) { LOG.warning(String.format("Unexpected %s: %s.", e.getClass().getSimpleName(), e.getMessage())); return Response.status(Response.Status.INTERNAL_SERVER_ERROR) .entity(String.format("Unexpected %s: %s.", e.getClass().getSimpleName(), e.getMessage())) .build(); } }
From source file:com.datascience.cascading.scheme.CsvScheme.java
/** * Creates a new CSV scheme with the given source and sink {@link cascading.tuple.Fields}. * <p>//w w w. j a va 2 s . c o m * The CSV input/output encoding set defaults to {@code UTF-8} * * @param fields The source and sink fields. * @param strict Indicates whether to parse records in strict parsing mode. When strict mode is disabled, single record * parse errors will be caught and logged. * @see com.datascience.cascading.scheme.CsvScheme */ public CsvScheme(Fields fields, boolean strict) { this(fields, fields, CSVFormat.DEFAULT, StandardCharsets.UTF_8, strict); }