List of usage examples for com.fasterxml.jackson.dataformat.csv CsvMapper writeValueAsString
@SuppressWarnings("resource") public String writeValueAsString(Object value) throws JsonProcessingException
From source file:edu.cmu.cs.lti.discoursedb.annotation.lightside.io.LightSideService.java
/** * Exports data in a format that can be imported into LightSide and then annotated with a classifier that was training with * data generated by the exportAnnotations methods. * /*ww w. j a v a 2 s.com*/ * @param outputFilePath path to the output file to which the extracted data should be written * @param contributions contributions that should be exported for annotation */ @Transactional(readOnly = true) public void exportDataForAnnotation(String outputFilePath, Iterable<Contribution> contributions) { Assert.hasText(outputFilePath, "Path to the output file cannot be empty."); File outputFile = new File(outputFilePath); Assert.isTrue(!outputFile.isDirectory(), outputFilePath + " points to a directory but should point to a file."); StringBuilder output = new StringBuilder(); CsvMapper mapper = new CsvMapper(); try { if (!outputFile.exists()) { //only add header once output.append(mapper.writeValueAsString(new String[] { TEXT_COL, ID_COL })); } for (Contribution contrib : contributions) { output.append(mapper.writeValueAsString( new String[] { contrib.getCurrentRevision().getText(), String.valueOf(contrib.getId()) })); } FileUtils.writeStringToFile(outputFile, output.toString(), true); } catch (IOException e) { log.error("Error writing exported data to csv"); } }
From source file:edu.cmu.cs.lti.discoursedb.annotation.lightside.io.LightSideService.java
@Transactional(readOnly = true) private String generateLightSideOutput(List<RawDataInstance> data) throws JsonProcessingException { StringBuilder output = new StringBuilder(); CsvMapper mapper = new CsvMapper(); //generate list of binary label types Set<String> binaryLabelTypes = data.stream().parallel() .flatMap(instance -> instance.getAnnotations().entrySet().stream()) .filter(m -> m.getValue().toLowerCase().equals(LABEL_ASSIGNED_VAL)) .map(m -> m.getKey().toLowerCase()).collect(Collectors.toSet()); //generate header Set<String> types = data.stream().parallel() .flatMap(instance -> instance.getAnnotations().entrySet().stream()) .map(m -> m.getKey().toLowerCase()).collect(Collectors.toSet()); Assert.isTrue(!types.contains(TEXT_COL), "No feature with the name \"" + TEXT_COL + "\" is allowed."); List<String> header = new ArrayList<>(types.size() + 1); header.add(TEXT_COL);//from w w w .j a va 2s. c o m header.addAll(types); output.append(mapper.writeValueAsString(header)); //generate data vectors for (RawDataInstance instance : data) { List<String> featVector = new ArrayList<>(header.size()); featVector.add(instance.getText()); Map<String, String> curInstAnnos = instance.getAnnotations(); for (String type : types) { //Label assigned to current instance if (curInstAnnos.containsKey(type)) { featVector.add(curInstAnnos.get(type)); } //Label not assigned to current instance - handle missing value else { if (binaryLabelTypes.contains(type)) { //missing binary label interpreted as "false" featVector.add(LABEL_MISSING_VAL); } else { //missing value on interpreted as "null" featVector.add(VALUE_MISSING_VAL); } } } Assert.isTrue(featVector.size() == header.size(), "Error writing feature vector. Wrong size."); output.append(mapper.writeValueAsString(featVector)); } return output.toString(); }