List of usage examples for org.apache.commons.csv CSVFormat DEFAULT
CSVFormat DEFAULT
To view the source code for org.apache.commons.csv CSVFormat DEFAULT.
Click Source Link
From source file:onlinenewspopularity.DataFormatter.java
/** * Reads the file and randomly populates the data * @return matrix list//w w w .j a va 2 s . c om * The list has the following elements: * 1. List of features (mx1 ArrayList) * 2. Target column name * 3. Data for training (n1xm matrix) * 4. Target values for training data (n1x1 matrix) * 5. Test data (nxm matrix) * 6. Target values for test data (n2x2 matrix) * NOTE: n1 is the length of training data set. * n2 is the length of test data set. * n2 = Constants.SIZE*Constants.TEST_SET_RATIO * n1 = Constants.SIZE-n2 * @throws Exception */ public List<Matrix> readData() throws Exception { try { try (Reader br = new FileReader(new File(fileName))) { Iterable<CSVRecord> records = CSVFormat.DEFAULT.parse(br); List features = new ArrayList<>(); String predictColName; Iterator<CSVRecord> itr = records.iterator(); CSVRecord header = itr.next(); features.add(Constants.FEATURE_COL1_NAME); for (int i = Constants.INITIAL_FEATURE_INDEX; i < header.size() - 1; i++) { features.add(header.get(i).trim()); } predictColName = header.get((header.size() - 1)).trim(); trainStat = new double[2][features.size()]; double[][] data = new double[Constants.SIZE][features.size()]; double[][] res = new double[Constants.SIZE][1]; boolean[] validFeature = new boolean[features.size()]; int featureCount = 1; for (int i = 0; i < validFeature.length; i++) { validFeature[i] = Boolean.FALSE; //Not a valid feature by default } List indices = new ArrayList<>(); int n = Constants.SIZE; for (int i = 0; i < n; i++) { indices.add(i); } Random randGen = new Random(); validFeature[0] = Boolean.TRUE; //theta_0 is a valid feature int i = 0; for (CSVRecord record : records) { if (i < Constants.SIZE && !indices.isEmpty()) { int index = (int) indices.get(randGen.nextInt(indices.size())); for (int j = 0; j <= features.size(); j++) { if (j == 0) { data[index][j] = 1.0; } else if (j == features.size()) { res[index][0] = Double.parseDouble(record.get(record.size() - 1)); } else { data[index][j] = Double .parseDouble(record.get(j + Constants.INITIAL_FEATURE_INDEX - 1)); if (data[index][j] != 0) { if (validFeature[j] == Boolean.FALSE) { featureCount++; validFeature[j] = Boolean.TRUE; } } } } indices.remove((Object) index); } else { break; } i++; } //Remove empty features if (featureCount < features.size()) { List featuresCopy = new ArrayList<>(); featuresCopy.addAll(features); double[][] newData = new double[Constants.SIZE][featureCount]; int k = 0; int var = 0; for (int j = 0; j < featuresCopy.size(); j++) { if (validFeature[j] == Boolean.TRUE) { for (i = 0; i < Constants.SIZE; i++) { newData[i][k] = data[i][j]; } k++; } else { LOGGER.log(Level.INFO, "Removing empty feature: {0}", features.get(j - var)); features.remove(j - var); var++; } } data = newData; } int testLen = (int) (Constants.TEST_SET_RATIO * Constants.SIZE); int trainLen = Constants.SIZE - testLen; Matrix tmpx = new Matrix(data); Matrix tmpy = new Matrix(res); List temp = new ArrayList<>(); temp.add(features); temp.add(predictColName); temp.add(tmpx.getMatrix(0, trainLen - 1, 0, tmpx.getColumnDimension() - 1)); temp.add(tmpy.getMatrix(0, trainLen - 1, 0, tmpy.getColumnDimension() - 1)); temp.add(tmpx.getMatrix(trainLen, tmpx.getRowDimension() - 1, 0, tmpx.getColumnDimension() - 1)); temp.add(tmpy.getMatrix(trainLen, tmpy.getRowDimension() - 1, 0, tmpy.getColumnDimension() - 1)); return temp; } } catch (Exception e) { LOGGER.log(Level.WARNING, "{0}: {1}", new Object[] { e.getClass().getName(), e.getMessage() }); throw e; } }
From source file:org.ag.es.etl.IndexObjBuilder.java
public static Index buildIndexFromCSVEntry(String csv_str, String[] time_fields, String[] event_attributes, String index, String type, Optional<String> id_field) { try {// ww w . j a va2 s. c om CSVFormat csvFileFormat = CSVFormat.DEFAULT.withHeader(event_attributes).withDelimiter(';'); Map<String, String> csv_as_map = ((CSVRecord) CSVParser.parse(csv_str, csvFileFormat).getRecords() .get(0)).toMap(); for (String time_field : time_fields) { csv_as_map.put(time_field, DateFormatExtractor.buildDateTime(csv_as_map.get(time_field)).toString()); } if (id_field.isPresent()) { return new Index.Builder(csv_as_map).index(index).type(type).id(id_field.get()).build(); } else { return new Index.Builder(csv_as_map).index(index).type(type).build(); } } catch (Exception e) { return null; } }
From source file:org.ag.es.etl.IndexObjBuilder.java
public static Index buildIndexFromCSVEntry(String csv_str, String[] event_attributes, String index_field, String type_field, Optional<String> id_field, boolean drop_meta) { try {/* w ww . ja v a 2s.com*/ CSVFormat csvFileFormat = CSVFormat.DEFAULT.withHeader(event_attributes).withDelimiter(';'); Map<String, String> csv_as_map = ((CSVRecord) CSVParser.parse(csv_str, csvFileFormat).getRecords() .get(0)).toMap(); String index = csv_as_map.get(index_field); String type = csv_as_map.get(type_field); String id = id_field.isPresent() ? csv_as_map.get(id_field.get()) : null; if (drop_meta) { csv_as_map.remove(index_field); csv_as_map.remove(type_field); if (id_field.isPresent()) { csv_as_map.remove(id_field.get()); } } if (id_field.isPresent()) { return new Index.Builder(csv_as_map).index(index).type(type).id(id).build(); } else { return new Index.Builder(csv_as_map).index(index).type(type).build(); } } catch (Exception e) { return null; } }
From source file:org.andresoft.datasource.FileReformatTest.java
@Test public void testReformatChicagoFoodInpectionCsv() throws IOException { // Inspection ID | DBA Name | AKA Name| License # | Facility Type| Risk| Address| City| // State| Zip| Inspection Date| Inspection Type| Results| Violations| Latitude| Longitude| // Location//from w w w . ja v a2 s .c om System.setProperty("line.separator", "\n"); Reader in = new FileReader("/Development/andresoft/hadoop_data/Food_Inspections_chicago.csv"); File file = new File("/Development/andresoft/hadoop_data/Food_Inspections_chicago_nomalized-2.csv"); FileWriter fw = new FileWriter(file); final CSVPrinter printer = CSVFormat.DEFAULT .withHeader("Inspection ID", "DBA Name", "AKA Name", "License #", "Facility Type", "Risk", "Address", "City", "State", "Zip", "Inspection Date", "Inspection Type", "Results", "Violation Number", "Violation", "Comments", "Latitude", "Longitude", "Loacation") .print(fw); final CSVParser parser = new CSVParser(in, CSVFormat.EXCEL.withHeader()); // Iterable<CSVRecord> records = CSVFormat.EXCEL.parse(in); for (CSVRecord record : parser) { String inspectionId = record.get("Inspection ID"); String dbaName = record.get("DBA Name"); String akaName = record.get("AKA Name"); String licenseNum = record.get("License #"); String facility = record.get("Facility Type"); String risk = record.get("Risk"); String address = record.get("Address"); String city = record.get("City"); String state = record.get("State"); String zip = record.get("Zip"); String inspectionDate = record.get("Inspection Date"); String inspectionType = record.get("Inspection Type"); String results = record.get("Results"); String violations = record.get("Violations"); String latitude = record.get("Latitude"); String longitude = record.get("Longitude"); String location = record.get("Location"); String violationsArray[] = violations.split("\\|"); for (String v : violationsArray) { String comments = "None"; String violation = "None"; String[] violationWihComment = v.split("Comments:"); if (violationWihComment.length == 2) { violation = violationWihComment[0]; comments = violationWihComment[1]; } else { violation = violationWihComment[0]; } if (!StringUtils.isBlank(violation)) { int violationNumberEndIndex = violation.indexOf('.'); int viloationNumber = Integer.valueOf((violation.substring(0, violationNumberEndIndex)).trim()); printer.printRecord(inspectionId, dbaName, akaName, licenseNum, facility, risk, address, city, state, zip, inspectionDate, inspectionType, results, viloationNumber, violation.substring(violationNumberEndIndex + 1), comments, latitude, longitude, location); } } } printer.close(); in.close(); }
From source file:org.apache.ambari.server.api.services.serializers.CsvSerializer.java
/** * Serialize the result into a CSV-formatted text document. * <p/>//from ww w.ja v a2 s .c om * It is expected that the result set is a collection of flat resources - no sub-resources will be * included in the output. The root of the tree structure may have a column map (csv_column_map) * and a column order (csv_column_order) property set to indicate the header record and ordering * of the columns. * <p/> * The csv_column_map is a map of resource property names to header descriptive names. If not * specified, a header record will not be serialized. * <p/> * The csv_column_order is a list of resource property names declaring the order of the columns. * If not specified, the order will be taken from the key order of csv_column_map or the "natural" * ordering of the resource property names, both may be unpredictable. * * @param result internal result * @return a String containing the CSV-formatted document */ @Override public Object serialize(Result result) { if (result.getStatus().isErrorState()) { return serializeError(result.getStatus()); } else { CSVPrinter csvPrinter = null; try { // A StringBuffer to store the CSV-formatted document while building it. It may be // necessary to use file-based storage if the data set is expected to be really large. StringBuffer buffer = new StringBuffer(); TreeNode<Resource> root = result.getResultTree(); if (root != null) { csvPrinter = new CSVPrinter(buffer, CSVFormat.DEFAULT); // TODO: recursively handle tree structure, for now only handle single level of detail if ("true".equalsIgnoreCase(root.getStringProperty("isCollection"))) { List<String> fieldNameOrder = processHeader(csvPrinter, root); Collection<TreeNode<Resource>> children = root.getChildren(); if (children != null) { // Iterate over the child nodes of the collection an add each as a new record in the // CSV document. for (TreeNode<Resource> child : children) { processRecord(csvPrinter, child, fieldNameOrder); } } } } return buffer.toString(); } catch (IOException e) { //todo: exception handling. Create ResultStatus 500 and call serializeError throw new RuntimeException("Unable to serialize to csv: " + e, e); } finally { if (csvPrinter != null) { try { csvPrinter.close(); } catch (IOException ex) { } } } } }
From source file:org.apache.ambari.server.api.services.serializers.CsvSerializer.java
@Override public Object serializeError(ResultStatus error) { CSVPrinter csvPrinter = null;/*from ww w . ja va 2 s . c o m*/ try { StringBuffer buffer = new StringBuffer(); csvPrinter = new CSVPrinter(buffer, CSVFormat.DEFAULT); csvPrinter.printRecord(Arrays.asList("status", "message")); csvPrinter.printRecord(Arrays.asList(error.getStatus().getStatus(), error.getMessage())); return buffer.toString(); } catch (IOException e) { //todo: exception handling. Create ResultStatus 500 and call serializeError throw new RuntimeException("Unable to serialize to csv: " + e, e); } finally { if (csvPrinter != null) { try { csvPrinter.close(); } catch (IOException ex) { } } } }
From source file:org.apache.ambari.server.api.services.serializers.CsvSerializerTest.java
@Test public void testSerializeResources_NoColumnInfo() throws Exception { Result result = new ResultImpl(true); result.setResultStatus(new ResultStatus(ResultStatus.STATUS.OK)); TreeNode<Resource> tree = result.getResultTree(); List<TreeMap<String, Object>> data = new ArrayList<TreeMap<String, Object>>() { {/*from w w w . j a v a 2s . c om*/ add(new TreeMap<String, Object>() { { put("property1", "value1a"); put("property2", "value2a"); put("property3", "value3a"); put("property4", "value4a"); } }); add(new TreeMap<String, Object>() { { put("property1", "value1'b"); put("property2", "value2'b"); put("property3", "value3'b"); put("property4", "value4'b"); } }); add(new TreeMap<String, Object>() { { put("property1", "value1,c"); put("property2", "value2,c"); put("property3", "value3,c"); put("property4", "value4,c"); } }); } }; tree.setName("items"); tree.setProperty("isCollection", "true"); addChildResource(tree, "resource", 0, data.get(0)); addChildResource(tree, "resource", 1, data.get(1)); addChildResource(tree, "resource", 2, data.get(2)); replayAll(); //execute test Object o = new CsvSerializer().serialize(result).toString().replace("\r", ""); verifyAll(); assertNotNull(o); StringReader reader = new StringReader(o.toString()); CSVParser csvParser = new CSVParser(reader, CSVFormat.DEFAULT); List<CSVRecord> records = csvParser.getRecords(); assertNotNull(records); assertEquals(3, records.size()); int i = 0; for (CSVRecord record : records) { TreeMap<String, Object> actualData = data.get(i++); assertEquals(actualData.size(), record.size()); for (String item : record) { assertTrue(actualData.containsValue(item)); } } csvParser.close(); }
From source file:org.apache.ambari.server.serveraction.kerberos.AbstractKerberosDataFileBuilder.java
/** * Opens the data file for writing./* w ww. j a v a2 s. c o m*/ * <p/> * This may be called multiple times and the appropriate action will occur depending on if the * file has been previously opened or closed. * * @throws java.io.IOException */ public void open() throws IOException { if (isClosed()) { if (file == null) { throw new IOException("Missing file path"); } else { csvPrinter = new CSVPrinter(new FileWriter(file, true), CSVFormat.DEFAULT); // If the file is empty, write the header; else don't write the header. if (file.length() == 0) { // Write the header.... Iterable<?> headerRecord = getHeaderRecord(); csvPrinter.printRecord(headerRecord); } } } }
From source file:org.apache.ambari.server.serveraction.kerberos.AbstractKerberosDataFileReader.java
/** * Opens the data file for reading.//from w w w.j a v a 2 s . co m * <p/> * This may be called multiple times and the appropriate action will occur depending on if the * file has been previously opened or closed. * * @throws java.io.IOException */ public void open() throws IOException { if (isClosed()) { csvParser = CSVParser.parse(file, Charset.defaultCharset(), CSVFormat.DEFAULT.withHeader()); } }
From source file:org.apache.ambari.view.hive.resources.jobs.JobService.java
/** * Get job results in csv format/*from w ww . j ava2 s. c o m*/ */ @GET @Path("{jobId}/results/csv") @Produces("text/csv") public Response getResultsCSV(@PathParam("jobId") String jobId, @Context HttpServletResponse response, @QueryParam("columns") final String requestedColumns) { try { JobController jobController = getResourceManager().readController(jobId); final Cursor resultSet = jobController.getResults(); resultSet.selectColumns(requestedColumns); StreamingOutput stream = new StreamingOutput() { @Override public void write(OutputStream os) throws IOException, WebApplicationException { Writer writer = new BufferedWriter(new OutputStreamWriter(os)); CSVPrinter csvPrinter = new CSVPrinter(writer, CSVFormat.DEFAULT); try { while (resultSet.hasNext()) { csvPrinter.printRecord(resultSet.next().getRow()); writer.flush(); } } finally { writer.close(); } } }; return Response.ok(stream).build(); } catch (WebApplicationException ex) { throw ex; } catch (ItemNotFound itemNotFound) { throw new NotFoundFormattedException(itemNotFound.getMessage(), itemNotFound); } catch (Exception ex) { throw new ServiceFormattedException(ex.getMessage(), ex); } }