org.andresoft.datasource.FileReformatTest.java Source code

Java tutorial

Introduction

Here is the source code for org.andresoft.datasource.FileReformatTest.java

Source

package org.andresoft.datasource;

import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.Reader;

import org.apache.commons.csv.CSVFormat;
import org.apache.commons.csv.CSVParser;
import org.apache.commons.csv.CSVPrinter;
import org.apache.commons.csv.CSVRecord;
import org.apache.commons.lang.StringUtils;
import org.junit.Test;

public class FileReformatTest {
    @Test
    public void testReformatChicagoFoodInpectionCsv() throws IOException {
        // Inspection ID | DBA Name | AKA Name| License # | Facility Type| Risk| Address| City|
        // State| Zip| Inspection Date| Inspection Type| Results| Violations| Latitude| Longitude|
        // Location
        System.setProperty("line.separator", "\n");
        Reader in = new FileReader("/Development/andresoft/hadoop_data/Food_Inspections_chicago.csv");
        File file = new File("/Development/andresoft/hadoop_data/Food_Inspections_chicago_nomalized-2.csv");
        FileWriter fw = new FileWriter(file);

        final CSVPrinter printer = CSVFormat.DEFAULT
                .withHeader("Inspection ID", "DBA Name", "AKA Name", "License #", "Facility Type", "Risk",
                        "Address", "City", "State", "Zip", "Inspection Date", "Inspection Type", "Results",
                        "Violation Number", "Violation", "Comments", "Latitude", "Longitude", "Loacation")
                .print(fw);

        final CSVParser parser = new CSVParser(in, CSVFormat.EXCEL.withHeader());

        // Iterable<CSVRecord> records = CSVFormat.EXCEL.parse(in);
        for (CSVRecord record : parser) {
            String inspectionId = record.get("Inspection ID");
            String dbaName = record.get("DBA Name");
            String akaName = record.get("AKA Name");
            String licenseNum = record.get("License #");
            String facility = record.get("Facility Type");
            String risk = record.get("Risk");
            String address = record.get("Address");
            String city = record.get("City");
            String state = record.get("State");
            String zip = record.get("Zip");
            String inspectionDate = record.get("Inspection Date");
            String inspectionType = record.get("Inspection Type");
            String results = record.get("Results");
            String violations = record.get("Violations");
            String latitude = record.get("Latitude");
            String longitude = record.get("Longitude");
            String location = record.get("Location");

            String violationsArray[] = violations.split("\\|");
            for (String v : violationsArray) {
                String comments = "None";
                String violation = "None";
                String[] violationWihComment = v.split("Comments:");
                if (violationWihComment.length == 2) {
                    violation = violationWihComment[0];
                    comments = violationWihComment[1];
                } else {
                    violation = violationWihComment[0];
                }
                if (!StringUtils.isBlank(violation)) {
                    int violationNumberEndIndex = violation.indexOf('.');
                    int viloationNumber = Integer.valueOf((violation.substring(0, violationNumberEndIndex)).trim());

                    printer.printRecord(inspectionId, dbaName, akaName, licenseNum, facility, risk, address, city,
                            state, zip, inspectionDate, inspectionType, results, viloationNumber,
                            violation.substring(violationNumberEndIndex + 1), comments, latitude, longitude,
                            location);
                }
            }

        }
        printer.close();
        in.close();
    }

}