List of usage examples for org.apache.commons.csv CSVFormat parse
public CSVParser parse(final Reader in) throws IOException
From source file:edu.umd.umiacs.clip.tools.io.BZIP2Files.java
protected static Stream<CSVRecord> records(CSVFormat format, Path path) throws IOException { return StreamSupport.stream(format.parse(new BufferedReader(new InputStreamReader( new BZip2CompressorInputStream(new BufferedInputStream(newInputStream(path), BUFFER_SIZE)), UTF_8.newDecoder().onMalformedInput(IGNORE)))).spliterator(), false); }
From source file:edu.umd.umiacs.clip.tools.io.GZIPFiles.java
protected static Stream<CSVRecord> records(CSVFormat format, Path path) throws IOException { return StreamSupport.stream(format.parse(new BufferedReader(new InputStreamReader( new GZIPInputStream(new BufferedInputStream(newInputStream(path), BUFFER_SIZE)), UTF_8.newDecoder().onMalformedInput(IGNORE)))).spliterator(), false); }
From source file:edu.umd.umiacs.clip.tools.io.AllFiles.java
private static Stream<CSVRecord> overridenRecords(CSVFormat format, Path path) throws IOException { return StreamSupport.stream(format.parse( new BufferedReader(new InputStreamReader(new BufferedInputStream(newInputStream(path), BUFFER_SIZE), UTF_8.newDecoder().onMalformedInput(IGNORE)))) .spliterator(), false);//from w ww.j a va2s . com }
From source file:ch.eitchnet.csvrestendpoint.components.CsvDataHandler.java
public <T> T getCsvData(String name, CsvDataMarshaller<T> marshaller) { File csvDataDir = getCsvDataDir(); if (!csvDataDir.isDirectory()) { logger.error("CSV Data Dir is not a directory at " + csvDataDir.getAbsolutePath()); return null; }//from w ww . j a va 2 s . c o m String csvFileName = name + ".csv"; File csvFile = new File(csvDataDir, csvFileName); if (!csvFile.isFile()) { logger.error("CSV File is not a file at " + csvFile.getAbsolutePath()); return null; } try (Reader reader = new InputStreamReader(new FileInputStream(csvFile), marshaller.getCharset())) { CSVFormat csvFormat = marshaller.getCsvFormat(); CSVParser csvParser = csvFormat.parse(reader); return marshaller.marshall(csvParser); } catch (IOException e) { logger.error(e.getMessage(), e); return null; } }
From source file:eu.fthevenet.binjr.data.codec.CsvDecoder.java
/** * Returns the columns headers of the CSV file. * * @param in an input stream for the CSV file. * @return the columns headers of the CSV file. * @throws IOException in the event of an I/O error. * @throws DecodingDataFromAdapterException if an error occurred while decoding the CSV file. *//* w w w . j av a 2 s . c om*/ public List<String> getDataColumnHeaders(InputStream in) throws IOException, DecodingDataFromAdapterException { try (BufferedReader reader = new BufferedReader(new InputStreamReader(in, encoding))) { CSVFormat csvFormat = CSVFormat.DEFAULT.withAllowMissingColumnNames(false).withDelimiter(delimiter); Iterable<CSVRecord> records = csvFormat.parse(reader); return this.parseColumnHeaders(records.iterator().next()); } }
From source file:eu.fthevenet.binjr.data.codec.CsvDecoder.java
/** * Decodes data from the provided stream and invoke the provided {@link Consumer} for each decoded record. * * @param in the {@link InputStream} for the CSV file * @param headers a list of the headers to keep from decoded records * @param mapToResult the function to invoke for reach decoded record * @throws IOException in the event of an I/O error. * @throws DecodingDataFromAdapterException if an error occurred while decoding the CSV file. *///from w w w . ja v a 2 s .co m public void decode(InputStream in, List<String> headers, Consumer<DataSample<T>> mapToResult) throws IOException, DecodingDataFromAdapterException { try (Profiler ignored = Profiler.start("Building time series from csv data", logger::trace)) { try (BufferedReader reader = new BufferedReader(new InputStreamReader(in, encoding))) { CSVFormat csvFormat = CSVFormat.DEFAULT.withAllowMissingColumnNames(false).withFirstRecordAsHeader() .withSkipHeaderRecord().withDelimiter(delimiter); Iterable<CSVRecord> records = csvFormat.parse(reader); for (CSVRecord csvRecord : records) { ZonedDateTime timeStamp = dateParser.apply(csvRecord.get(0)); DataSample<T> tRecord = new DataSample<>(timeStamp); for (String h : headers) { tRecord.getCells().put(h, numberParser.apply(csvRecord.get(h))); } mapToResult.accept(tRecord); } } } }
From source file:eu.fthevenet.binjr.data.codec.CsvDecoder.java
@Override public Map<TimeSeriesInfo<T>, TimeSeriesProcessor<T>> decode(InputStream in, List<TimeSeriesInfo<T>> seriesInfo) throws IOException, DecodingDataFromAdapterException { try (Profiler ignored = Profiler.start("Building time series from csv data", logger::trace)) { try (BufferedReader reader = new BufferedReader(new InputStreamReader(in, encoding))) { CSVFormat csvFormat = CSVFormat.DEFAULT.withAllowMissingColumnNames(false).withFirstRecordAsHeader() .withSkipHeaderRecord().withDelimiter(delimiter); Iterable<CSVRecord> records = csvFormat.parse(reader); Map<TimeSeriesInfo<T>, TimeSeriesProcessor<T>> series = new HashMap<>(); final AtomicLong nbpoints = new AtomicLong(0); for (CSVRecord csvRecord : records) { nbpoints.incrementAndGet(); ZonedDateTime timeStamp = dateParser.apply(csvRecord.get(0)); for (TimeSeriesInfo<T> info : seriesInfo) { T val = numberParser.apply(csvRecord.get(info.getBinding().getLabel())); XYChart.Data<ZonedDateTime, T> point = new XYChart.Data<>(timeStamp, val); TimeSeriesProcessor<T> l = series.computeIfAbsent(info, k -> timeSeriesFactory.create()); l.addSample(point);//from www .j a v a2s.c o m } } logger.trace(() -> String.format("Built %d series with %d samples each (%d total samples)", seriesInfo.size(), nbpoints.get(), seriesInfo.size() * nbpoints.get())); return series; } } }
From source file:com.thinkbiganalytics.discovery.parsers.csv.CSVFileSchemaParser.java
@Override public Schema parse(InputStream is, Charset charset, TableSchemaType target) throws IOException { Validate.notNull(target, "target must not be null"); Validate.notNull(is, "stream must not be null"); Validate.notNull(charset, "charset must not be null"); validate();/*w w w .ja va 2s. c o m*/ // Parse the file String sampleData = ParserHelper.extractSampleLines(is, charset, numRowsToSample); Validate.notEmpty(sampleData, "No data in file"); CSVFormat format = createCSVFormat(sampleData); try (Reader reader = new StringReader(sampleData)) { CSVParser parser = format.parse(reader); DefaultFileSchema fileSchema = populateSchema(parser); fileSchema.setCharset(charset.name()); // Convert to target schema with proper derived types Schema targetSchema = convertToTarget(target, fileSchema); return targetSchema; } }
From source file:com.thinkbiganalytics.discovery.parsers.csv.CSVAutoDetect.java
private Character guessDelimiter(List<LineStats> lineStats, String value, Character quote, boolean headerRow) throws IOException { // Assume delimiter exists in first line and compare to subsequent lines if (lineStats.size() > 0) { LineStats firstLineStat = lineStats.get(0); Map<Character, Integer> firstLineDelimCounts = firstLineStat.calcDelimCountsOrdered(); if (firstLineDelimCounts != null && firstLineDelimCounts.size() > 0) { List<Character> candidates = new ArrayList<>(); // Attempt to parse given delimiter Set<Character> firstLineDelimKeys = firstLineDelimCounts.keySet(); for (Character delim : firstLineDelimKeys) { CSVFormat format; if (headerRow) { format = CSVFormat.DEFAULT.withFirstRecordAsHeader().withDelimiter(delim).withQuote(quote); } else { format = CSVFormat.DEFAULT.withDelimiter(delim).withQuote(quote); }//from w w w . j a v a2 s .c om try (StringReader sr = new StringReader(value)) { try (CSVParser parser = format.parse(sr)) { if (parser.getHeaderMap() != null) { int size = parser.getHeaderMap().size(); List<CSVRecord> records = parser.getRecords(); boolean match = records.stream().allMatch(record -> record.size() == size); if (match) { return delim; } } } } Integer delimCount = firstLineDelimCounts.get(delim); boolean match = true; for (int i = 1; i < lineStats.size() && match; i++) { LineStats thisLine = lineStats.get(i); Integer rowDelimCount = thisLine.delimStats.get(delim); match = delimCount.equals(rowDelimCount); } if (match) { candidates.add(delim); } } if (candidates.size() > 0) { // All agree on a single delimiter if (candidates.size() == 1) { return candidates.get(0); } else { int count = 0; // Return highest delimiter from candidates for (Character delim : firstLineDelimKeys) { if (candidates.get(count++) != null) { return delim; } } } } } } return null; }
From source file:io.swagger.inflector.controllers.SwaggerOperationController.java
@Override public Response apply(ContainerRequestContext ctx) { List<Parameter> parameters = operation.getParameters(); final RequestContext requestContext = createContext(ctx); String path = ctx.getUriInfo().getPath(); Map<String, Map<String, String>> formMap = new HashMap<String, Map<String, String>>(); Map<String, File> inputStreams = new HashMap<String, File>(); Object[] args = new Object[parameters.size() + 1]; if (parameters != null) { int i = 0; args[i] = requestContext;//from w ww . j av a 2s . c o m i += 1; List<ValidationMessage> missingParams = new ArrayList<ValidationMessage>(); UriInfo uri = ctx.getUriInfo(); String formDataString = null; String[] parts = null; Set<String> existingKeys = new HashSet<String>(); for (Iterator<String> x = uri.getQueryParameters().keySet().iterator(); x.hasNext();) { existingKeys.add(x.next() + ": qp"); } for (Iterator<String> x = uri.getPathParameters().keySet().iterator(); x.hasNext();) { existingKeys.add(x.next() + ": pp"); } for (Iterator<String> x = ctx.getHeaders().keySet().iterator(); x.hasNext();) { String key = x.next(); // if(!commonHeaders.contains(key)) // existingKeys.add(key); } MediaType mt = requestContext.getMediaType(); for (Parameter p : parameters) { Map<String, String> headers = new HashMap<String, String>(); String name = null; if (p instanceof FormParameter) { if (formDataString == null) { // can only read stream once if (mt.isCompatible(MediaType.MULTIPART_FORM_DATA_TYPE)) { // get the boundary String boundary = mt.getParameters().get("boundary"); if (boundary != null) { try { InputStream output = ctx.getEntityStream(); MultipartStream multipartStream = new MultipartStream(output, boundary.getBytes()); boolean nextPart = multipartStream.skipPreamble(); while (nextPart) { String header = multipartStream.readHeaders(); // process headers if (header != null) { CSVFormat format = CSVFormat.DEFAULT.withDelimiter(';') .withRecordSeparator("="); Iterable<CSVRecord> records = format.parse(new StringReader(header)); for (CSVRecord r : records) { for (int j = 0; j < r.size(); j++) { String string = r.get(j); Iterable<CSVRecord> outerString = CSVFormat.DEFAULT .withDelimiter('=').parse(new StringReader(string)); for (CSVRecord outerKvPair : outerString) { if (outerKvPair.size() == 2) { String key = outerKvPair.get(0).trim(); String value = outerKvPair.get(1).trim(); if ("name".equals(key)) { name = value; } headers.put(key, value); } else { Iterable<CSVRecord> innerString = CSVFormat.DEFAULT .withDelimiter(':') .parse(new StringReader(string)); for (CSVRecord innerKVPair : innerString) { if (innerKVPair.size() == 2) { String key = innerKVPair.get(0).trim(); String value = innerKVPair.get(1).trim(); if ("name".equals(key)) { name = value; } headers.put(key, value); } } } } if (name != null) { formMap.put(name, headers); } } } } String filename = extractFilenameFromHeaders(headers); if (filename != null) { try { File file = new File(Files.createTempDir(), filename); file.deleteOnExit(); file.getParentFile().deleteOnExit(); FileOutputStream fo = new FileOutputStream(file); multipartStream.readBodyData(fo); inputStreams.put(name, file); } catch (Exception e) { LOGGER.error("Failed to extract uploaded file", e); } } else { ByteArrayOutputStream bo = new ByteArrayOutputStream(); multipartStream.readBodyData(bo); String value = bo.toString(); headers.put(name, value); } if (name != null) { formMap.put(name, headers); } headers = new HashMap<>(); name = null; nextPart = multipartStream.readBoundary(); } } catch (IOException e) { e.printStackTrace(); } } } else { try { formDataString = IOUtils.toString(ctx.getEntityStream(), "UTF-8"); parts = formDataString.split("&"); for (String part : parts) { String[] kv = part.split("="); existingKeys.add(kv[0] + ": fp"); } } catch (IOException e) { e.printStackTrace(); } } } } } for (Parameter parameter : parameters) { String in = parameter.getIn(); Object o = null; try { if ("formData".equals(in)) { SerializableParameter sp = (SerializableParameter) parameter; String name = parameter.getName(); if (mt.isCompatible(MediaType.MULTIPART_FORM_DATA_TYPE)) { // look in the form map Map<String, String> headers = formMap.get(name); if (headers != null && headers.size() > 0) { if ("file".equals(sp.getType())) { o = inputStreams.get(name); } else { Object obj = headers.get(parameter.getName()); if (obj != null) { JavaType jt = parameterClasses[i]; Class<?> cls = jt.getRawClass(); List<String> os = Arrays.asList(obj.toString()); try { o = validator.convertAndValidate(os, parameter, cls, definitions); } catch (ConversionException e) { missingParams.add(e.getError()); } catch (ValidationException e) { missingParams.add(e.getValidationMessage()); } } } } } else { if (formDataString != null) { for (String part : parts) { String[] kv = part.split("="); if (kv != null) { if (kv.length > 0) { existingKeys.remove(kv[0] + ": fp"); } if (kv.length == 2) { // TODO how to handle arrays here? String key = kv[0]; try { String value = URLDecoder.decode(kv[1], "utf-8"); if (parameter.getName().equals(key)) { JavaType jt = parameterClasses[i]; Class<?> cls = jt.getRawClass(); try { o = validator.convertAndValidate(Arrays.asList(value), parameter, cls, definitions); } catch (ConversionException e) { missingParams.add(e.getError()); } catch (ValidationException e) { missingParams.add(e.getValidationMessage()); } } } catch (UnsupportedEncodingException e) { LOGGER.error("unable to decode value for " + key); } } } } } } } else { try { String paramName = parameter.getName(); if ("query".equals(in)) { existingKeys.remove(paramName + ": qp"); } if ("path".equals(in)) { existingKeys.remove(paramName + ": pp"); } JavaType jt = parameterClasses[i]; Class<?> cls = jt.getRawClass(); if ("body".equals(in)) { if (ctx.hasEntity()) { BodyParameter body = (BodyParameter) parameter; o = EntityProcessorFactory.readValue(ctx.getMediaType(), ctx.getEntityStream(), cls); if (o != null) { validate(o, body.getSchema(), SchemaValidator.Direction.INPUT); } } else if (parameter.getRequired()) { ValidationException e = new ValidationException(); e.message(new ValidationMessage() .message("The input body `" + paramName + "` is required")); throw e; } } if ("query".equals(in)) { o = validator.convertAndValidate(uri.getQueryParameters().get(parameter.getName()), parameter, cls, definitions); } else if ("path".equals(in)) { o = validator.convertAndValidate(uri.getPathParameters().get(parameter.getName()), parameter, cls, definitions); } else if ("header".equals(in)) { o = validator.convertAndValidate(ctx.getHeaders().get(parameter.getName()), parameter, cls, definitions); } } catch (ConversionException e) { missingParams.add(e.getError()); } catch (ValidationException e) { missingParams.add(e.getValidationMessage()); } } } catch (NumberFormatException e) { LOGGER.error("Couldn't find " + parameter.getName() + " (" + in + ") to " + parameterClasses[i], e); } args[i] = o; i += 1; } if (existingKeys.size() > 0) { LOGGER.debug("unexpected keys: " + existingKeys); } if (missingParams.size() > 0) { StringBuilder builder = new StringBuilder(); builder.append("Input error"); if (missingParams.size() > 1) { builder.append("s"); } builder.append(": "); int count = 0; for (ValidationMessage message : missingParams) { if (count > 0) { builder.append(", "); } if (message != null && message.getMessage() != null) { builder.append(message.getMessage()); } else { builder.append("no additional input"); } count += 1; } int statusCode = config.getInvalidRequestStatusCode(); ApiError error = new ApiError().code(statusCode).message(builder.toString()); throw new ApiException(error); } } try { if (method != null) { LOGGER.info("calling method " + method + " on controller " + this.controller + " with args " + Arrays.toString(args)); try { Object response = method.invoke(controller, args); if (response instanceof ResponseContext) { ResponseContext wrapper = (ResponseContext) response; ResponseBuilder builder = Response.status(wrapper.getStatus()); // response headers for (String key : wrapper.getHeaders().keySet()) { List<String> v = wrapper.getHeaders().get(key); if (v.size() == 1) { builder.header(key, v.get(0)); } else { builder.header(key, v); } } // entity if (wrapper.getEntity() != null) { builder.entity(wrapper.getEntity()); // content type if (wrapper.getContentType() != null) { builder.type(wrapper.getContentType()); } else { final ContextResolver<ContentTypeSelector> selector = providersProvider.get() .getContextResolver(ContentTypeSelector.class, MediaType.WILDCARD_TYPE); if (selector != null) { selector.getContext(getClass()).apply(ctx.getAcceptableMediaTypes(), builder); } } if (operation.getResponses() != null) { String responseCode = String.valueOf(wrapper.getStatus()); io.swagger.models.Response responseSchema = operation.getResponses() .get(responseCode); if (responseSchema == null) { // try default response schema responseSchema = operation.getResponses().get("default"); } if (responseSchema != null && responseSchema.getSchema() != null) { validate(wrapper.getEntity(), responseSchema.getSchema(), SchemaValidator.Direction.OUTPUT); } else { LOGGER.debug( "no response schema for code " + responseCode + " to validate against"); } } } return builder.build(); } return Response.ok().entity(response).build(); } catch (IllegalArgumentException | IllegalAccessException | InvocationTargetException e) { for (Throwable cause = e.getCause(); cause != null;) { if (cause instanceof ApiException) { throw (ApiException) cause; } final Throwable next = cause.getCause(); cause = next == cause || next == null ? null : next; } throw new ApiException(ApiErrorUtils.createInternalError(), e); } } Map<String, io.swagger.models.Response> responses = operation.getResponses(); if (responses != null) { String[] keys = new String[responses.keySet().size()]; Arrays.sort(responses.keySet().toArray(keys)); int code = 0; String defaultKey = null; for (String key : keys) { if (key.startsWith("2")) { defaultKey = key; code = Integer.parseInt(key); break; } if ("default".equals(key)) { defaultKey = key; code = 200; break; } if (key.startsWith("3")) { // we use the 3xx responses as defaults defaultKey = key; code = Integer.parseInt(key); } } if (defaultKey != null) { ResponseBuilder builder = Response.status(code); io.swagger.models.Response response = responses.get(defaultKey); if (response.getHeaders() != null && response.getHeaders().size() > 0) { for (String key : response.getHeaders().keySet()) { Property headerProperty = response.getHeaders().get(key); Object output = ExampleBuilder.fromProperty(headerProperty, definitions); if (output instanceof ArrayExample) { output = ((ArrayExample) output).asString(); } else if (output instanceof ObjectExample) { LOGGER.debug( "not serializing output example, only primitives or arrays of primitives are supported"); } else { output = ((Example) output).asString(); } builder.header(key, output); } } Map<String, Object> examples = response.getExamples(); if (examples != null) { for (MediaType mediaType : requestContext.getAcceptableMediaTypes()) { for (String key : examples.keySet()) { if (MediaType.valueOf(key).isCompatible(mediaType)) { builder.entity(examples.get(key)).type(mediaType); return builder.build(); } } } } Object output = ExampleBuilder.fromProperty(response.getSchema(), definitions); if (output != null) { ResponseContext resp = new ResponseContext().entity(output); setContentType(requestContext, resp, operation); builder.entity(output); if (resp.getContentType() != null) { // this comes from the operation itself builder.type(resp.getContentType()); } else { // get acceptable content types List<EntityProcessor> processors = EntityProcessorFactory.getProcessors(); MediaType responseMediaType = null; // take first compatible one for (EntityProcessor processor : processors) { if (responseMediaType != null) { break; } for (MediaType mt : requestContext.getAcceptableMediaTypes()) { LOGGER.debug("checking type " + mt.toString() + " against " + processor.getClass().getName()); if (processor.supports(mt)) { builder.type(mt); responseMediaType = mt; break; } } } if (responseMediaType == null) { // no match based on Accept header, use first processor in list for (EntityProcessor processor : processors) { List<MediaType> supportedTypes = processor.getSupportedMediaTypes(); if (supportedTypes.size() > 0) { builder.type(supportedTypes.get(0)); break; } } } } builder.entity(output); } return builder.build(); } else { LOGGER.debug("no response type to map to, assume 200"); code = 200; } return Response.status(code).build(); } return Response.ok().build(); } finally { for (String key : inputStreams.keySet()) { File file = inputStreams.get(key); if (file != null) { LOGGER.debug("deleting file " + file.getPath()); file.delete(); } } } }