Example usage for org.apache.commons.csv CSVFormat DEFAULT

Introduction

In this page you can find the example usage for org.apache.commons.csv CSVFormat DEFAULT.

Prototype

CSVFormat DEFAULT

To view the source code for org.apache.commons.csv CSVFormat DEFAULT.

Click Source Link

Document

Standard comma separated format, as for #RFC4180 but allowing empty lines.

Usage

From source file:onlinenewspopularity.DataFormatter.java

/**
 * Reads the file and randomly populates the data
 * @return matrix list//w w w  .j  a  va 2  s  . c  om
 * The list has the following elements:
 * 1. List of features (mx1 ArrayList)
 * 2. Target column name
 * 3. Data for training (n1xm matrix)
 * 4. Target values for training data (n1x1 matrix)
 * 5. Test data (nxm matrix)
 * 6. Target values for test data (n2x2 matrix)
 * NOTE: n1 is the length of training data set.
 *       n2 is the length of test data set.
 *       n2 = Constants.SIZE*Constants.TEST_SET_RATIO
 *       n1 = Constants.SIZE-n2
 * @throws Exception 
 */
public List<Matrix> readData() throws Exception {
    try {
        try (Reader br = new FileReader(new File(fileName))) {
            Iterable<CSVRecord> records = CSVFormat.DEFAULT.parse(br);

            List features = new ArrayList<>();
            String predictColName;

            Iterator<CSVRecord> itr = records.iterator();
            CSVRecord header = itr.next();

            features.add(Constants.FEATURE_COL1_NAME);
            for (int i = Constants.INITIAL_FEATURE_INDEX; i < header.size() - 1; i++) {
                features.add(header.get(i).trim());
            }
            predictColName = header.get((header.size() - 1)).trim();

            trainStat = new double[2][features.size()];

            double[][] data = new double[Constants.SIZE][features.size()];
            double[][] res = new double[Constants.SIZE][1];
            boolean[] validFeature = new boolean[features.size()];
            int featureCount = 1;

            for (int i = 0; i < validFeature.length; i++) {
                validFeature[i] = Boolean.FALSE; //Not a valid feature by default
            }

            List indices = new ArrayList<>();
            int n = Constants.SIZE;
            for (int i = 0; i < n; i++) {
                indices.add(i);
            }
            Random randGen = new Random();

            validFeature[0] = Boolean.TRUE; //theta_0 is a valid feature
            int i = 0;
            for (CSVRecord record : records) {
                if (i < Constants.SIZE && !indices.isEmpty()) {
                    int index = (int) indices.get(randGen.nextInt(indices.size()));
                    for (int j = 0; j <= features.size(); j++) {
                        if (j == 0) {
                            data[index][j] = 1.0;
                        } else if (j == features.size()) {
                            res[index][0] = Double.parseDouble(record.get(record.size() - 1));
                        } else {
                            data[index][j] = Double
                                    .parseDouble(record.get(j + Constants.INITIAL_FEATURE_INDEX - 1));
                            if (data[index][j] != 0) {
                                if (validFeature[j] == Boolean.FALSE) {
                                    featureCount++;
                                    validFeature[j] = Boolean.TRUE;
                                }
                            }
                        }
                    }
                    indices.remove((Object) index);
                } else {
                    break;
                }
                i++;
            }

            //Remove empty features
            if (featureCount < features.size()) {
                List featuresCopy = new ArrayList<>();
                featuresCopy.addAll(features);
                double[][] newData = new double[Constants.SIZE][featureCount];
                int k = 0;
                int var = 0;

                for (int j = 0; j < featuresCopy.size(); j++) {
                    if (validFeature[j] == Boolean.TRUE) {
                        for (i = 0; i < Constants.SIZE; i++) {
                            newData[i][k] = data[i][j];
                        }
                        k++;
                    } else {
                        LOGGER.log(Level.INFO, "Removing empty feature: {0}", features.get(j - var));
                        features.remove(j - var);
                        var++;
                    }
                }

                data = newData;
            }

            int testLen = (int) (Constants.TEST_SET_RATIO * Constants.SIZE);
            int trainLen = Constants.SIZE - testLen;

            Matrix tmpx = new Matrix(data);
            Matrix tmpy = new Matrix(res);

            List temp = new ArrayList<>();
            temp.add(features);
            temp.add(predictColName);
            temp.add(tmpx.getMatrix(0, trainLen - 1, 0, tmpx.getColumnDimension() - 1));
            temp.add(tmpy.getMatrix(0, trainLen - 1, 0, tmpy.getColumnDimension() - 1));
            temp.add(tmpx.getMatrix(trainLen, tmpx.getRowDimension() - 1, 0, tmpx.getColumnDimension() - 1));
            temp.add(tmpy.getMatrix(trainLen, tmpy.getRowDimension() - 1, 0, tmpy.getColumnDimension() - 1));

            return temp;
        }
    } catch (Exception e) {
        LOGGER.log(Level.WARNING, "{0}: {1}", new Object[] { e.getClass().getName(), e.getMessage() });
        throw e;
    }
}

From source file:org.ag.es.etl.IndexObjBuilder.java

public static Index buildIndexFromCSVEntry(String csv_str, String[] time_fields, String[] event_attributes,
        String index, String type, Optional<String> id_field) {
    try {//  ww  w . j a va2  s. c  om
        CSVFormat csvFileFormat = CSVFormat.DEFAULT.withHeader(event_attributes).withDelimiter(';');
        Map<String, String> csv_as_map = ((CSVRecord) CSVParser.parse(csv_str, csvFileFormat).getRecords()
                .get(0)).toMap();
        for (String time_field : time_fields) {
            csv_as_map.put(time_field,
                    DateFormatExtractor.buildDateTime(csv_as_map.get(time_field)).toString());
        }
        if (id_field.isPresent()) {
            return new Index.Builder(csv_as_map).index(index).type(type).id(id_field.get()).build();
        } else {
            return new Index.Builder(csv_as_map).index(index).type(type).build();
        }
    } catch (Exception e) {
        return null;
    }
}

From source file:org.ag.es.etl.IndexObjBuilder.java

public static Index buildIndexFromCSVEntry(String csv_str, String[] event_attributes, String index_field,
        String type_field, Optional<String> id_field, boolean drop_meta) {
    try {/*  w  ww  .  ja  v  a 2s.com*/
        CSVFormat csvFileFormat = CSVFormat.DEFAULT.withHeader(event_attributes).withDelimiter(';');
        Map<String, String> csv_as_map = ((CSVRecord) CSVParser.parse(csv_str, csvFileFormat).getRecords()
                .get(0)).toMap();
        String index = csv_as_map.get(index_field);
        String type = csv_as_map.get(type_field);
        String id = id_field.isPresent() ? csv_as_map.get(id_field.get()) : null;
        if (drop_meta) {
            csv_as_map.remove(index_field);
            csv_as_map.remove(type_field);
            if (id_field.isPresent()) {
                csv_as_map.remove(id_field.get());
            }
        }
        if (id_field.isPresent()) {
            return new Index.Builder(csv_as_map).index(index).type(type).id(id).build();
        } else {
            return new Index.Builder(csv_as_map).index(index).type(type).build();
        }
    } catch (Exception e) {
        return null;
    }
}

From source file:org.andresoft.datasource.FileReformatTest.java

@Test
public void testReformatChicagoFoodInpectionCsv() throws IOException {
    // Inspection ID | DBA Name | AKA Name| License # | Facility Type| Risk| Address| City|
    // State| Zip| Inspection Date| Inspection Type| Results| Violations| Latitude| Longitude|
    // Location//from  w w  w . ja v a2 s .c om
    System.setProperty("line.separator", "\n");
    Reader in = new FileReader("/Development/andresoft/hadoop_data/Food_Inspections_chicago.csv");
    File file = new File("/Development/andresoft/hadoop_data/Food_Inspections_chicago_nomalized-2.csv");
    FileWriter fw = new FileWriter(file);

    final CSVPrinter printer = CSVFormat.DEFAULT
            .withHeader("Inspection ID", "DBA Name", "AKA Name", "License #", "Facility Type", "Risk",
                    "Address", "City", "State", "Zip", "Inspection Date", "Inspection Type", "Results",
                    "Violation Number", "Violation", "Comments", "Latitude", "Longitude", "Loacation")
            .print(fw);

    final CSVParser parser = new CSVParser(in, CSVFormat.EXCEL.withHeader());

    // Iterable<CSVRecord> records = CSVFormat.EXCEL.parse(in);
    for (CSVRecord record : parser) {
        String inspectionId = record.get("Inspection ID");
        String dbaName = record.get("DBA Name");
        String akaName = record.get("AKA Name");
        String licenseNum = record.get("License #");
        String facility = record.get("Facility Type");
        String risk = record.get("Risk");
        String address = record.get("Address");
        String city = record.get("City");
        String state = record.get("State");
        String zip = record.get("Zip");
        String inspectionDate = record.get("Inspection Date");
        String inspectionType = record.get("Inspection Type");
        String results = record.get("Results");
        String violations = record.get("Violations");
        String latitude = record.get("Latitude");
        String longitude = record.get("Longitude");
        String location = record.get("Location");

        String violationsArray[] = violations.split("\\|");
        for (String v : violationsArray) {
            String comments = "None";
            String violation = "None";
            String[] violationWihComment = v.split("Comments:");
            if (violationWihComment.length == 2) {
                violation = violationWihComment[0];
                comments = violationWihComment[1];
            } else {
                violation = violationWihComment[0];
            }
            if (!StringUtils.isBlank(violation)) {
                int violationNumberEndIndex = violation.indexOf('.');
                int viloationNumber = Integer.valueOf((violation.substring(0, violationNumberEndIndex)).trim());

                printer.printRecord(inspectionId, dbaName, akaName, licenseNum, facility, risk, address, city,
                        state, zip, inspectionDate, inspectionType, results, viloationNumber,
                        violation.substring(violationNumberEndIndex + 1), comments, latitude, longitude,
                        location);
            }
        }

    }
    printer.close();
    in.close();
}

From source file:org.apache.ambari.server.api.services.serializers.CsvSerializer.java

/**
 * Serialize the result into a CSV-formatted text document.
 * <p/>//from ww w.ja  v a2  s  .c  om
 * It is expected that the result set is a collection of flat resources - no sub-resources will be
 * included in the output.  The root of the tree structure may have a column map (csv_column_map)
 * and a column order (csv_column_order) property set to indicate the header record and ordering
 * of the columns.
 * <p/>
 * The csv_column_map is a map of resource property names to header descriptive names.  If not
 * specified, a header record will not be serialized.
 * <p/>
 * The csv_column_order is a list of resource property names declaring the order of the columns.
 * If not specified, the order will be taken from the key order of csv_column_map or the "natural"
 * ordering of the resource property names, both may be unpredictable.
 *
 * @param result internal result
 * @return a String containing the CSV-formatted document
 */
@Override
public Object serialize(Result result) {
    if (result.getStatus().isErrorState()) {
        return serializeError(result.getStatus());
    } else {
        CSVPrinter csvPrinter = null;
        try {
            // A StringBuffer to store the CSV-formatted document while building it.  It may be
            // necessary to use file-based storage if the data set is expected to be really large.
            StringBuffer buffer = new StringBuffer();

            TreeNode<Resource> root = result.getResultTree();

            if (root != null) {
                csvPrinter = new CSVPrinter(buffer, CSVFormat.DEFAULT);

                // TODO: recursively handle tree structure, for now only handle single level of detail
                if ("true".equalsIgnoreCase(root.getStringProperty("isCollection"))) {
                    List<String> fieldNameOrder = processHeader(csvPrinter, root);

                    Collection<TreeNode<Resource>> children = root.getChildren();
                    if (children != null) {
                        // Iterate over the child nodes of the collection an add each as a new record in the
                        // CSV document.
                        for (TreeNode<Resource> child : children) {
                            processRecord(csvPrinter, child, fieldNameOrder);
                        }
                    }
                }
            }

            return buffer.toString();
        } catch (IOException e) {
            //todo: exception handling.  Create ResultStatus 500 and call serializeError
            throw new RuntimeException("Unable to serialize to csv: " + e, e);
        } finally {
            if (csvPrinter != null) {
                try {
                    csvPrinter.close();
                } catch (IOException ex) {
                }
            }
        }
    }
}

From source file:org.apache.ambari.server.api.services.serializers.CsvSerializer.java

@Override
public Object serializeError(ResultStatus error) {
    CSVPrinter csvPrinter = null;/*from   ww  w .  ja va  2  s . c o  m*/
    try {
        StringBuffer buffer = new StringBuffer();
        csvPrinter = new CSVPrinter(buffer, CSVFormat.DEFAULT);

        csvPrinter.printRecord(Arrays.asList("status", "message"));
        csvPrinter.printRecord(Arrays.asList(error.getStatus().getStatus(), error.getMessage()));

        return buffer.toString();
    } catch (IOException e) {
        //todo: exception handling.  Create ResultStatus 500 and call serializeError
        throw new RuntimeException("Unable to serialize to csv: " + e, e);
    } finally {
        if (csvPrinter != null) {
            try {
                csvPrinter.close();
            } catch (IOException ex) {
            }
        }
    }
}

From source file:org.apache.ambari.server.api.services.serializers.CsvSerializerTest.java

@Test
public void testSerializeResources_NoColumnInfo() throws Exception {
    Result result = new ResultImpl(true);
    result.setResultStatus(new ResultStatus(ResultStatus.STATUS.OK));
    TreeNode<Resource> tree = result.getResultTree();

    List<TreeMap<String, Object>> data = new ArrayList<TreeMap<String, Object>>() {
        {/*from w w w .  j  a  v a  2s .  c  om*/
            add(new TreeMap<String, Object>() {
                {
                    put("property1", "value1a");
                    put("property2", "value2a");
                    put("property3", "value3a");
                    put("property4", "value4a");
                }
            });
            add(new TreeMap<String, Object>() {
                {
                    put("property1", "value1'b");
                    put("property2", "value2'b");
                    put("property3", "value3'b");
                    put("property4", "value4'b");
                }
            });
            add(new TreeMap<String, Object>() {
                {
                    put("property1", "value1,c");
                    put("property2", "value2,c");
                    put("property3", "value3,c");
                    put("property4", "value4,c");
                }
            });
        }
    };

    tree.setName("items");
    tree.setProperty("isCollection", "true");

    addChildResource(tree, "resource", 0, data.get(0));
    addChildResource(tree, "resource", 1, data.get(1));
    addChildResource(tree, "resource", 2, data.get(2));

    replayAll();

    //execute test
    Object o = new CsvSerializer().serialize(result).toString().replace("\r", "");

    verifyAll();

    assertNotNull(o);

    StringReader reader = new StringReader(o.toString());
    CSVParser csvParser = new CSVParser(reader, CSVFormat.DEFAULT);
    List<CSVRecord> records = csvParser.getRecords();

    assertNotNull(records);
    assertEquals(3, records.size());

    int i = 0;
    for (CSVRecord record : records) {
        TreeMap<String, Object> actualData = data.get(i++);
        assertEquals(actualData.size(), record.size());

        for (String item : record) {
            assertTrue(actualData.containsValue(item));
        }
    }

    csvParser.close();
}

From source file:org.apache.ambari.server.serveraction.kerberos.AbstractKerberosDataFileBuilder.java

/**
 * Opens the data file for writing./*  w ww. j a  v  a2 s.  c  o  m*/
 * <p/>
 * This may be called multiple times and the appropriate action will occur depending on if the
 * file has been previously opened or closed.
 *
 * @throws java.io.IOException
 */
public void open() throws IOException {
    if (isClosed()) {
        if (file == null) {
            throw new IOException("Missing file path");
        } else {
            csvPrinter = new CSVPrinter(new FileWriter(file, true), CSVFormat.DEFAULT);

            // If the file is empty, write the header; else don't write the header.
            if (file.length() == 0) {
                // Write the header....
                Iterable<?> headerRecord = getHeaderRecord();
                csvPrinter.printRecord(headerRecord);
            }
        }
    }
}

From source file:org.apache.ambari.server.serveraction.kerberos.AbstractKerberosDataFileReader.java

/**
 * Opens the data file for reading.//from   w  w  w.j a v a 2  s .  co  m
 * <p/>
 * This may be called multiple times and the appropriate action will occur depending on if the
 * file has been previously opened or closed.
 *
 * @throws java.io.IOException
 */
public void open() throws IOException {
    if (isClosed()) {
        csvParser = CSVParser.parse(file, Charset.defaultCharset(), CSVFormat.DEFAULT.withHeader());
    }
}

From source file:org.apache.ambari.view.hive.resources.jobs.JobService.java

/**
 * Get job results in csv format/*from w ww .  j  ava2 s.  c o  m*/
 */
@GET
@Path("{jobId}/results/csv")
@Produces("text/csv")
public Response getResultsCSV(@PathParam("jobId") String jobId, @Context HttpServletResponse response,
        @QueryParam("columns") final String requestedColumns) {
    try {
        JobController jobController = getResourceManager().readController(jobId);
        final Cursor resultSet = jobController.getResults();
        resultSet.selectColumns(requestedColumns);

        StreamingOutput stream = new StreamingOutput() {
            @Override
            public void write(OutputStream os) throws IOException, WebApplicationException {
                Writer writer = new BufferedWriter(new OutputStreamWriter(os));
                CSVPrinter csvPrinter = new CSVPrinter(writer, CSVFormat.DEFAULT);
                try {
                    while (resultSet.hasNext()) {
                        csvPrinter.printRecord(resultSet.next().getRow());
                        writer.flush();
                    }
                } finally {
                    writer.close();
                }
            }
        };

        return Response.ok(stream).build();
    } catch (WebApplicationException ex) {
        throw ex;
    } catch (ItemNotFound itemNotFound) {
        throw new NotFoundFormattedException(itemNotFound.getMessage(), itemNotFound);
    } catch (Exception ex) {
        throw new ServiceFormattedException(ex.getMessage(), ex);
    }
}