com.trifork.stamdata.importer.jobs.cpr.CPRParser.java Source code

Java tutorial

Introduction

Here is the source code for com.trifork.stamdata.importer.jobs.cpr.CPRParser.java

Source

/**
 * The contents of this file are subject to the Mozilla Public
 * License Version 1.1 (the "License"); you may not use this file
 * except in compliance with the License. You may obtain a copy of
 * the License at http://www.mozilla.org/MPL/
 *
 * Software distributed under the License is distributed on an "AS
 * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
 * implied. See the License for the specific language governing
 * rights and limitations under the License.
 *
 * Contributor(s): Contributors are attributed in the source code
 * where applicable.
 *
 * The Original Code is "Stamdata".
 *
 * The Initial Developer of the Original Code is Trifork Public A/S.
 *
 * Portions created for the Original Code are Copyright 2011,
 * Lgemiddelstyrelsen. All Rights Reserved.
 *
 * Portions created for the FMKi Project are Copyright 2011,
 * National Board of e-Health (NSI). All Rights Reserved.
 */

package com.trifork.stamdata.importer.jobs.cpr;

import static com.trifork.stamdata.importer.util.Dates.yyyyMMddHHmm;
import static com.trifork.stamdata.importer.util.Dates.yyyy_MM_dd;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.text.DateFormat;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.Formatter;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import com.trifork.stamdata.importer.jobs.cpr.models.BarnRelation;
import com.trifork.stamdata.importer.jobs.cpr.models.ForaeldreMyndighedRelation;
import com.trifork.stamdata.importer.jobs.cpr.models.Klarskriftadresse;
import com.trifork.stamdata.importer.jobs.cpr.models.NavneBeskyttelse;
import com.trifork.stamdata.importer.jobs.cpr.models.Navneoplysninger;
import com.trifork.stamdata.importer.jobs.cpr.models.Personoplysninger;
import com.trifork.stamdata.importer.jobs.cpr.models.UmyndiggoerelseVaergeRelation;
import org.apache.log4j.Logger;
import org.joda.time.DateTime;
import org.joda.time.LocalDateTime;
import org.joda.time.format.DateTimeFormat;
import org.joda.time.format.DateTimeFormatter;

public class CPRParser {
    private static final String FILE_ENCODING = "ISO-8859-1";

    private static final Logger logger = Logger.getLogger(CPRParser.class);

    private static final int END_RECORD = 999;
    private static final String EMPTY_DATE_STRING = "000000000000";

    static boolean haltOnDateErrors = true;

    private static final Pattern datePattern = Pattern.compile("([\\d]{4})-([\\d]{2})-([\\d]{2})");
    private static final Pattern timestampPattern = Pattern
            .compile("([\\d]{4})([\\d]{2})([\\d]{2})([\\d]{2})([\\d]{2})");

    public static CPRDataset parse(File f) throws Exception {
        BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(f), FILE_ENCODING));

        try {
            return parseFileContents(reader);
        } finally {
            reader.close();
        }
    }

    private static CPRDataset parseFileContents(BufferedReader reader)
            throws IOException, Exception, ParseException {
        boolean endRecordReached = false;
        CPRDataset cpr = new CPRDataset();

        while (reader.ready()) {
            String line = reader.readLine();
            if (line.length() > 0) {
                int recordType = getRecordType(line);
                if (recordType == END_RECORD) {
                    endRecordReached = true;
                } else if (endRecordReached) {
                    throw new Exception("Slut-record midt i cpr-filen");
                } else {
                    parseLine(recordType, line, cpr);
                }
            }
        }

        if (!endRecordReached) {
            throw new Exception("Slut-record mangler i cpr-filen");
        }

        return cpr;
    }

    static void parseLine(int recordType, String line, CPRDataset cpr) throws Exception, ParseException {
        // TODO: Make constants for these magic numbers.

        switch (recordType) {
        case 0:
            cpr.setValidFrom(getValidFrom(line));
            break;
        case 1:
            cpr.addEntity(personoplysninger(line));
            break;
        case 3:
            cpr.addEntity(klarskriftadresse(line));
            break;
        case 4:
            String beskyttelseskode = cut(line, 13, 17);
            if (beskyttelseskode.equals("0001")) {
                cpr.addEntity(navneBeskyttelse(line));
            }
            break;
        case 8:
            cpr.addEntity(navneoplysninger(line));
            break;
        case 14:
            cpr.addEntity(barnRelation(line));
            break;
        case 16:
            cpr.addEntity(foraeldreMyndighedRelation(line));
            break;
        case 17:
            cpr.addEntity(umyndiggoerelseVaergeRelation(line));
            break;
        }
    }

    static UmyndiggoerelseVaergeRelation umyndiggoerelseVaergeRelation(String line) throws Exception {
        UmyndiggoerelseVaergeRelation u = new UmyndiggoerelseVaergeRelation();
        u.setCpr(cut(line, 3, 13));
        u.setUmyndigStartDato(parseDate(yyyy_MM_dd, line, 13, 23));
        u.setUmyndigStartDatoMarkering(cut(line, 23, 24));
        u.setUmyndigSletteDato(parseDate(yyyy_MM_dd, line, 24, 34));
        u.setType(cut(line, 34, 38));
        u.setRelationCpr(cut(line, 38, 48));
        u.setRelationCprStartDato(parseDate(yyyy_MM_dd, line, 48, 58));
        u.setVaergesNavn(cut(line, 58, 92).trim());
        u.setVaergesNavnStartDato(parseDate(yyyy_MM_dd, line, 92, 102));
        u.setRelationsTekst1(cut(line, 102, 136).trim());
        u.setRelationsTekst2(cut(line, 136, 170).trim());
        u.setRelationsTekst3(cut(line, 170, 204).trim());
        u.setRelationsTekst4(cut(line, 204, 238).trim());
        u.setRelationsTekst5(cut(line, 238, 272).trim());
        return u;
    }

    static ForaeldreMyndighedRelation foraeldreMyndighedRelation(String line) throws Exception {
        ForaeldreMyndighedRelation f = new ForaeldreMyndighedRelation();
        f.setCpr(cut(line, 3, 13));
        f.setType(cut(line, 13, 17));
        f.setForaeldreMyndighedStartDato(parseDate(yyyy_MM_dd, line, 17, 27));
        f.setForaeldreMyndighedMarkering(cut(line, 27, 28));
        f.setForaeldreMyndighedSlettedato(parseDate(yyyy_MM_dd, line, 28, 38));
        f.setRelationCpr(cut(line, 38, 48));
        f.setRelationCprStartDato(parseDate(yyyy_MM_dd, line, 48, 58));
        return f;
    }

    static BarnRelation barnRelation(String line) {
        BarnRelation b = new BarnRelation();
        b.setCpr(cut(line, 3, 13));
        b.setBarnCpr(cut(line, 13, 23));
        return b;
    }

    static Navneoplysninger navneoplysninger(String line) throws Exception {
        Navneoplysninger n = new Navneoplysninger();
        n.setCpr(cut(line, 3, 13));
        n.setFornavn(cut(line, 13, 63).trim());
        n.setFornavnMarkering(cut(line, 63, 64));
        n.setMellemnavn(cut(line, 64, 104).trim());
        n.setMellemnavnMarkering(cut(line, 104, 105));
        n.setEfternavn(cut(line, 105, 145).trim());
        n.setEfternavnMarkering(cut(line, 145, 146));
        n.setStartDato(parseDate(yyyyMMddHHmm, line, 146, 158));
        n.setStartDatoMarkering(cut(line, 158, 159));
        n.setAdresseringsNavn(cut(line, 159, 193).trim());
        return n;
    }

    static NavneBeskyttelse navneBeskyttelse(String line) throws Exception {
        NavneBeskyttelse n = new NavneBeskyttelse();
        n.setCpr(cut(line, 3, 13));
        n.setNavneBeskyttelseStartDato(parseDate(yyyy_MM_dd, line, 17, 27));
        n.setNavneBeskyttelseSletteDato(parseDate(yyyy_MM_dd, line, 27, 37));
        return n;
    }

    static Klarskriftadresse klarskriftadresse(String line) throws Exception {
        Klarskriftadresse k = new Klarskriftadresse();

        k.setCpr(cut(line, 3, 13));
        k.setNavnTilAdressering(cut(line, 13, 47).trim());
        k.setCoNavn(cut(line, 47, 81).trim());
        k.setLokalitet(cut(line, 81, 115).trim());
        k.setByNavn(cut(line, 149, 183).trim());
        k.setPostNummer(parseLong(line, 183, 187));
        k.setPostDistrikt(cut(line, 187, 207).trim());
        k.setKommuneKode(parseLong(line, 207, 211));
        k.setVejKode(parseLong(line, 211, 215));
        k.setHusNummer(removeLeadingZeros(cut(line, 215, 219).trim()));
        k.setEtage(removeLeadingZeros(cut(line, 219, 221).trim()));
        k.setSideDoerNummer(cut(line, 221, 225).trim());
        k.setBygningsNummer(cut(line, 225, 229).trim());

        // This is a duplicate of the address field.

        k.setVejnavnTilAdressering(cut(line, 229, 249).trim());

        // FIXME: This is not actually the complete street name,
        // rather it is a shortened version. The complete address is not included
        // in this record type.

        k.setVejNavn(cut(line, 229, 249).trim());

        return k;
    }

    static Personoplysninger personoplysninger(String line) throws Exception {
        Personoplysninger p = new Personoplysninger();

        p.setCpr(cut(line, 3, 13));
        p.setGaeldendeCpr(cut(line, 13, 23).trim());
        p.setStatus(cut(line, 23, 25));
        p.setStatusDato(parseDate(yyyyMMddHHmm, line, 25, 37));
        p.setStatusMakering(cut(line, 37, 38));
        p.setKoen(cut(line, 38, 39));
        p.setFoedselsdato(parseDate(yyyy_MM_dd, line, 39, 49));
        p.setFoedselsdatoMarkering("*".equals(cut(line, 49, 50)));
        p.setStartDato(parseDate(yyyy_MM_dd, line, 50, 60));
        p.setStartDatoMarkering(cut(line, 60, 61));
        p.setSlutdato(parseDate(yyyy_MM_dd, line, 61, 71));
        p.setSlutDatoMarkering(cut(line, 71, 72));
        p.setStilling(cut(line, 72, 106).trim());

        return p;
    }

    /**
     * Gets the record type of a line in the CPR file.
     */
    private static int getRecordType(String line) throws Exception {
        return readInt(line, 0, 3);
    }

    private static String cut(String line, int beginIndex, int endIndex) {
        String res = "";

        if (line.length() > beginIndex) {
            int end = (line.length() < endIndex) ? line.length() : endIndex;
            res = line.substring(beginIndex, end);
        }

        return res;
    }

    private static int readInt(String line, int from, int to) throws Exception {
        try {
            return Integer.parseInt(cut(line, from, to));
        } catch (NumberFormatException e) {
            throw new Exception("Der opstod en fejl under parsning af heltal i linien: [" + line
                    + "], p positionen from: " + from + ", to: " + to, e);
        }
    }

    private static Long parseLong(String line, int from, int to) throws Exception {
        try {
            return Long.parseLong(cut(line, from, to));
        } catch (Exception e) {
            throw new Exception("Der opstod en fejl under parsning af heltal i linien: [" + line
                    + "], p positionen from: " + from + ", to: " + to, e);
        }
    }

    private static Date parseDate(DateTimeFormatter format, String line, int from, int to)
            throws ParseException, Exception {
        String dateString = cut(line, from, to);
        if (dateString != null && dateString.trim().length() == to - from
                && !dateString.equals(EMPTY_DATE_STRING)) {
            return parseDateAndCheckValidity(dateString, format, line);
        }
        return null;
    }

    private static Date getValidFrom(String line) throws Exception {
        SimpleDateFormat sdf = new SimpleDateFormat("yyyyMMdd");
        return sdf.parse(cut(line, 19, 27));
    }

    private static String removeLeadingZeros(String str) {
        if (str == null)
            return null;

        for (int index = 0; index < str.length(); index++) {
            if (str.charAt(index) != '0')
                return str.substring(index);
        }

        return "";
    }

    static String fixWeirdDate(String date) {
        Matcher dateMatcher = datePattern.matcher(date);
        String fixedDate;

        if (dateMatcher.matches()) {
            fixedDate = fixDate(dateMatcher);
        } else {
            Matcher timeMatcher = timestampPattern.matcher(date);
            if (timeMatcher.matches()) {
                fixedDate = fixTime(timeMatcher);
            } else {
                logger.error("Unexpected date format=" + date);
                return date;
            }
        }

        if (logger.isTraceEnabled() && !fixedDate.equals(date)) {
            logger.trace("Fixing CPR date from=" + date + " to=" + fixedDate);
        }

        return fixedDate;
    }

    private static String fixTime(Matcher timeMatcher) {
        int year, month, day, hours, minutes;
        year = Integer.parseInt(timeMatcher.group(1));
        month = Integer.parseInt(timeMatcher.group(2));
        day = Integer.parseInt(timeMatcher.group(3));
        hours = Integer.parseInt(timeMatcher.group(4));
        minutes = Integer.parseInt(timeMatcher.group(5));

        if (month == 0) {
            month = 1;
        }
        if (day == 0) {
            day = 1;
        }
        if (hours >= 24) {
            hours = 0;
        }
        if (minutes >= 60) {
            minutes = 0;
        }

        StringBuilder result = new StringBuilder();
        Formatter formatter = new Formatter(result);
        formatter.format("%04d%02d%02d%02d%02d", year, month, day, hours, minutes);
        return result.toString();
    }

    private static String fixDate(Matcher dateMatcher) {
        int year, month, day;
        year = Integer.parseInt(dateMatcher.group(1));
        month = Integer.parseInt(dateMatcher.group(2));
        day = Integer.parseInt(dateMatcher.group(3));
        if (month == 0) {
            month = 1;
        }
        if (day == 0) {
            day = 1;
        }
        StringBuilder result = new StringBuilder();
        Formatter formatter = new Formatter(result);
        formatter.format("%04d-%02d-%02d", year, month, day);
        return result.toString();
    }

    private static Date parseDateAndCheckValidity(String dateString, DateTimeFormatter format, String line)
            throws ParseException, Exception {
        dateString = fixWeirdDate(dateString);
        LocalDateTime date = format.parseLocalDateTime(dateString);
        String formattedDate = format.print(date);

        if (!formattedDate.equals(dateString)) {
            String errorMessage = "Ugyldig dato: " + dateString + " fra linjen [" + line + "]";

            if (haltOnDateErrors) {
                throw new Exception(errorMessage);
            } else {
                logger.error(errorMessage);
            }
        }

        return date.toDate();
    }
}