List of usage examples for org.joda.time LocalDate parse
public static LocalDate parse(String str, DateTimeFormatter formatter)
From source file:org.filteredpush.qc.date.DateUtils.java
License:Apache License
/** * Given a string that may represent a date or range of dates, or date time or range of date times, * attempt to extract a standard date from that string. * //from ww w.ja va 2 s. co m * @param verbatimEventDate a string containing a verbatim event date. * @param yearsBeforeSuspect Dates that parse to a year prior to this year are marked as suspect. * @param assumemmddyyyy if true, assume that dates in the form nn-nn-nnnn are mm-dd-yyyy, if false, assume * that these are dd-mm-yyyy, if null, such dates are tested for ambiguity. * * @return an EventResult with a resultState for the nature of the match and result for the resulting date. */ public static EventResult extractDateFromVerbatimER(String verbatimEventDate, int yearsBeforeSuspect, Boolean assumemmddyyyy) { EventResult result = new EventResult(); String resultDate = null; // Remove some common no data comments if (verbatimEventDate != null && verbatimEventDate.contains("[no date]")) { verbatimEventDate = verbatimEventDate.replace("[no date]", ""); } if (verbatimEventDate != null && verbatimEventDate.contains("[no year]")) { verbatimEventDate = verbatimEventDate.replace("[no year]", ""); } // Strip off leading and trailing [] if (verbatimEventDate != null && verbatimEventDate.startsWith("[") && verbatimEventDate.endsWith("]")) { verbatimEventDate = verbatimEventDate.substring(1); verbatimEventDate = verbatimEventDate.substring(0, verbatimEventDate.length() - 1); } if (verbatimEventDate != null && verbatimEventDate.matches(".*\\[[0-9]+\\].*")) { verbatimEventDate = verbatimEventDate.replace("[", "").replace("]", ""); } // Strip off leading and trailing quotation marks if (verbatimEventDate != null && verbatimEventDate.startsWith("\"") && verbatimEventDate != null && verbatimEventDate.endsWith("\"")) { verbatimEventDate = verbatimEventDate.substring(1, verbatimEventDate.length() - 1); } // strip off leading and trailing whitespace if (verbatimEventDate != null && (verbatimEventDate.startsWith(" ") || verbatimEventDate.endsWith(" "))) { verbatimEventDate = verbatimEventDate.trim(); } // strip off trailing period after number if (verbatimEventDate != null && verbatimEventDate.endsWith(".") && verbatimEventDate.matches(".*[0-9]\\.$")) { verbatimEventDate = verbatimEventDate.substring(0, verbatimEventDate.length() - 1); logger.debug(verbatimEventDate); } // Stop before doing work if provided verbatim string is null. if (isEmpty(verbatimEventDate)) { return result; } if (verbatimEventDate.matches("^[0-9]{4}[-][0-9]{2}[-][0-9]{2}/[0-9]{4}[-][0-9]{2}[-][0-9]{2}$")) { // if verbatim date is a ISO formatted range with identical first and last dates (/), use just one. // Example: 1982-12-11/1982-12-11 changed to 1982-12-11 String[] bits = verbatimEventDate.split("/"); if (bits.length == 2 && bits[0].equals(bits[1])) { verbatimEventDate = bits[0]; } } if (verbatimEventDate.matches("^[0-9]{4}[/][0-9]{2}[/][0-9]{2}-[0-9]{4}[/][0-9]{2}[/][0-9]{2}$")) { // if verbatim date is a range with identical first and last dates (-), use just one. // Example: 1982/12/11-1982/12/11 changed to 1982/12/11 String[] bits = verbatimEventDate.split("-"); if (bits.length == 2 && bits[0].equals(bits[1])) { verbatimEventDate = bits[0]; } } if (verbatimEventDate .matches("^[0-9]{1,2}[-. ][0-9]{1,2}[-. ][0-9]{4}/[0-9]{1,2}[-. ][0-9]{1,2}[-. ][0-9]{4}$")) { // if verbatim date is a range with identical first and last dates (/), use just one. // Example: 12-11-1982/12-11-1982 changed to 12-11-1982 String[] bits = verbatimEventDate.split("/"); if (bits.length == 2 && bits[0].equals(bits[1])) { verbatimEventDate = bits[0]; } } if (verbatimEventDate .matches("^[0-9]{1,2}[./ ][0-9]{1,2}[./ ][0-9]{4}[-][0-9]{1,2}[./ ][0-9]{1,2}[./ ][0-9]{4}$")) { // if verbatim date is a range with identical first and last dates (-), use just one. // Example: 12/11/1982-12/11/1982 changed to 12/11/1982 String[] bits = verbatimEventDate.split("-"); if (bits.length == 2 && bits[0].equals(bits[1])) { verbatimEventDate = bits[0]; } } if (verbatimEventDate.matches("^[0-9]{4}[-]([0-9]{1,2}|[A-Za-z]+)[-][0-9]{1,2}.*")) { // Both separators are the same. // Example 1982-02-05 // Example 1982-Feb-05 // Example 1982-02-05 // Example 1982-02-05T05:03:06 try { DateTimeParser[] parsers = { DateTimeFormat.forPattern("yyyy/MM/dd").getParser(), DateTimeFormat.forPattern("yyyy/MMM/dd").getParser(), DateTimeFormat.forPattern("yyyy-MMM-dd").getParser(), ISODateTimeFormat.dateOptionalTimeParser().getParser() }; DateTimeFormatter formatter = new DateTimeFormatterBuilder().append(null, parsers).toFormatter(); DateMidnight parseDate = LocalDate.parse(verbatimEventDate, formatter).toDateMidnight(); resultDate = parseDate.toString("yyyy-MM-dd"); result.setResultState(EventResult.EventQCResultState.DATE); result.setResult(resultDate); } catch (Exception e) { logger.debug(e.getMessage()); } } if (verbatimEventDate.matches("^[0-9]{4}[/]([0-9]{1,2}|[A-Za-z]+)[/][0-9]{1,2}.*")) { // Both separators are the same. // Example 1982/02/05 // Example 1982/Feb/05 // Example 1982-02-05 // Example 1982/02/05T05:03:06 try { DateTimeParser[] parsers = { DateTimeFormat.forPattern("yyyy/MM/dd").getParser(), DateTimeFormat.forPattern("yyyy/MMM/dd").getParser(), DateTimeFormat.forPattern("yyyy-MMM-dd").getParser(), ISODateTimeFormat.dateOptionalTimeParser().getParser() }; DateTimeFormatter formatter = new DateTimeFormatterBuilder().append(null, parsers).toFormatter(); DateMidnight parseDate = LocalDate.parse(verbatimEventDate, formatter).toDateMidnight(); resultDate = parseDate.toString("yyyy-MM-dd"); result.setResultState(EventResult.EventQCResultState.DATE); result.setResult(resultDate); } catch (Exception e) { logger.debug(e.getMessage()); } } if (verbatimEventDate.matches("^[0-9]{4}[.,][0-9]{1,2}[.,][0-9]{1,2}$")) { // Example 1982.02.05 // Example 1982,02,05 // Cases where the 1-2 digit numbers are both smaller than 12 are treated as ambiguous. String resultDateMD = null; String resultDateDM = null; DateMidnight parseDate1 = null; DateMidnight parseDate2 = null; try { DateTimeParser[] parsers = { DateTimeFormat.forPattern("yyyy.MM.dd").getParser(), DateTimeFormat.forPattern("yyyy,MM,dd").getParser(), DateTimeFormat.forPattern("yyyy,MM.dd").getParser(), DateTimeFormat.forPattern("yyyy.MM,dd").getParser() }; DateTimeFormatter formatter = new DateTimeFormatterBuilder().append(null, parsers).toFormatter(); parseDate1 = LocalDate.parse(verbatimEventDate, formatter).toDateMidnight(); resultDateMD = parseDate1.toString("yyyy-MM-dd"); } catch (Exception e) { logger.debug(e.getMessage()); } try { DateTimeParser[] parsers = { DateTimeFormat.forPattern("yyyy.dd.MM").getParser(), DateTimeFormat.forPattern("yyyy,dd,MM").getParser(), DateTimeFormat.forPattern("yyyy,dd.MM").getParser(), DateTimeFormat.forPattern("yyyy.dd,MM").getParser() }; DateTimeFormatter formatter = new DateTimeFormatterBuilder().append(null, parsers).toFormatter(); parseDate2 = LocalDate.parse(verbatimEventDate, formatter).toDateMidnight(); resultDateDM = parseDate2.toString("yyyy-MM-dd"); } catch (Exception e) { logger.debug(e.getMessage()); } if (resultDateMD != null && resultDateDM == null) { result.setResultState(EventResult.EventQCResultState.DATE); result.setResult(resultDateMD); } else if (resultDateMD == null && resultDateDM != null) { result.setResultState(EventResult.EventQCResultState.DATE); result.setResult(resultDateDM); } else if (resultDateMD != null && resultDateDM != null) { if (resultDateMD.equals(resultDateDM)) { result.setResultState(EventResult.EventQCResultState.DATE); result.setResult(resultDateDM); } else { result.setResultState(EventResult.EventQCResultState.AMBIGUOUS); Interval range = null; if (parseDate1.isBefore(parseDate2)) { result.setResult(resultDateMD + "/" + resultDateDM); } else { result.setResult(resultDateDM + "/" + resultDateMD); } } } } if (verbatimEventDate.matches("^[0-9]{1,2}[-/ ][0-9]{4}")) { // Example 02/1982 try { DateTimeParser[] parsers = { DateTimeFormat.forPattern("MM-yyyy").getParser(), DateTimeFormat.forPattern("MM/yyyy").getParser(), DateTimeFormat.forPattern("MM yyyy").getParser() }; DateTimeFormatter formatter = new DateTimeFormatterBuilder().append(null, parsers).toFormatter(); DateMidnight parseDate = LocalDate.parse(verbatimEventDate, formatter).toDateMidnight(); resultDate = parseDate.toString("yyyy-MM"); result.setResultState(EventResult.EventQCResultState.RANGE); result.setResult(resultDate); } catch (Exception e) { logger.debug(e.getMessage()); } } if (verbatimEventDate.matches("^[0-9]{4}[0-9]{1,2}[0-9]{1,2}[?]$")) { // Example: 19720325 try { DateTimeParser[] parsers = { DateTimeFormat.forPattern("yyyyMMdd").getParser(), DateTimeFormat.forPattern("yyyyMMdd?").getParser(), ISODateTimeFormat.dateOptionalTimeParser().getParser() }; DateTimeFormatter formatter = new DateTimeFormatterBuilder().append(null, parsers).toFormatter() .withLocale(Locale.CHINESE); DateMidnight parseDate = LocalDate.parse(verbatimEventDate, formatter).toDateMidnight(); resultDate = parseDate.toString("yyyy-MM-dd"); result.setResultState(EventResult.EventQCResultState.DATE); result.setResult(resultDate); } catch (Exception e) { logger.debug(e.getMessage()); } } if (verbatimEventDate.matches("^[0-9]{4}[-][0-9]{3}/[0-9]{4}[-][0-9]{3}$")) { // Example: 1982-145 try { String[] bits = verbatimEventDate.split("/"); DateTimeParser[] parsers = { DateTimeFormat.forPattern("yyyy-D").getParser() }; DateTimeFormatter formatter = new DateTimeFormatterBuilder().append(null, parsers).toFormatter(); LocalDate parseStartDate = LocalDate.parse(bits[0], formatter); LocalDate parseEndDate = LocalDate.parse(bits[1], formatter); resultDate = parseStartDate.toString("yyyy-MM-dd") + "/" + parseEndDate.toString("yyyy-MM-dd"); logger.debug(resultDate); result.setResultState(EventResult.EventQCResultState.RANGE); result.setResult(resultDate); } catch (Exception e) { logger.debug(e.getMessage()); } } if (result.getResultState().equals(EventResult.EventQCResultState.NOT_RUN) && verbatimEventDate.matches("^[0-9]{4}0000$")) { // case 19800000 verbatimEventDate = verbatimEventDate.substring(0, 4); } if (result.getResultState().equals(EventResult.EventQCResultState.NOT_RUN) && verbatimEventDate.matches("^[0-9]{4}$")) { // Example: 1962 try { DateTimeParser[] parsers = { DateTimeFormat.forPattern("yyyy").getParser(), }; DateTimeFormatter formatter = new DateTimeFormatterBuilder().append(null, parsers).toFormatter(); DateMidnight parseDate = LocalDate.parse(verbatimEventDate, formatter).toDateMidnight(); resultDate = parseDate.toString("yyyy"); result.setResultState(EventResult.EventQCResultState.RANGE); result.setResult(resultDate); } catch (Exception e) { logger.debug(e.getMessage()); } } if (result.getResultState().equals(EventResult.EventQCResultState.NOT_RUN) && verbatimEventDate.matches("^[12][0-9]{1}00[']{0,1}s$")) { // Example: 1900s try { String verbatimEventDateDelta = verbatimEventDate.replace("'s", "s"); DateTimeParser[] parsers = { DateTimeFormat.forPattern("yyyy's").getParser(), }; DateTimeFormatter formatter = new DateTimeFormatterBuilder().append(null, parsers).toFormatter(); DateMidnight parseDate = LocalDate.parse(verbatimEventDateDelta, formatter).toDateMidnight(); DateMidnight endDate = parseDate.plusYears(100).minusDays(1); resultDate = parseDate.toString("yyyy") + "-01-01/" + endDate.toString("yyyy") + "-12-31"; result.setResultState(EventResult.EventQCResultState.RANGE); result.setResult(resultDate); } catch (Exception e) { logger.debug(e.getMessage()); } } if (result.getResultState().equals(EventResult.EventQCResultState.NOT_RUN) && verbatimEventDate.matches("^[12][0-9]{2}0[']{0,1}s$")) { // Example: 1970s try { String verbatimEventDateDelta = verbatimEventDate.replace("'s", "s"); DateTimeParser[] parsers = { DateTimeFormat.forPattern("yyyy's").getParser(), }; DateTimeFormatter formatter = new DateTimeFormatterBuilder().append(null, parsers).toFormatter(); DateMidnight parseDate = LocalDate.parse(verbatimEventDateDelta, formatter).toDateMidnight(); DateMidnight endDate = parseDate.plusYears(10).minusDays(1); resultDate = parseDate.toString("yyyy") + "-01-01/" + endDate.toString("yyyy") + "-12-31"; result.setResultState(EventResult.EventQCResultState.RANGE); result.setResult(resultDate); } catch (Exception e) { logger.debug(e.getMessage()); } } if (result.getResultState().equals(EventResult.EventQCResultState.NOT_RUN) && verbatimEventDate.matches("^[A-Za-z]{3,9}[.]{0,1}[ ]{0,1}[-/ ][0-9]{4}$")) { // Example: Jan-1980 // Example: Jan./1980 // Example: January 1980 try { DateTimeParser[] parsers = { DateTimeFormat.forPattern("MMM-yyyy").getParser(), DateTimeFormat.forPattern("MMM/yyyy").getParser(), DateTimeFormat.forPattern("MMM /yyyy").getParser(), DateTimeFormat.forPattern("MMM yyyy").getParser() }; DateTimeFormatter formatter = new DateTimeFormatterBuilder().append(null, parsers).toFormatter(); String cleaned = verbatimEventDate.replace(".", ""); DateMidnight parseDate = LocalDate.parse(cleaned, formatter).toDateMidnight(); resultDate = parseDate.toString("yyyy-MM"); result.setResultState(EventResult.EventQCResultState.RANGE); result.setResult(resultDate); } catch (Exception e) { logger.debug(e.getMessage()); } } if (result.getResultState().equals(EventResult.EventQCResultState.NOT_RUN)) { // Example: 04/03/1994 (ambiguous) // Example: 04/20/1994 // Example: 20/04/1994 String resultDateMD = null; String resultDateDM = null; DateMidnight parseDate1 = null; DateMidnight parseDate2 = null; if (assumemmddyyyy == null || assumemmddyyyy) { try { DateTimeParser[] parsers = { DateTimeFormat.forPattern("MM/dd/yyyy").getParser(), DateTimeFormat.forPattern("MM/dd yyyy").getParser(), DateTimeFormat.forPattern("MM/dd-yyyy").getParser(), DateTimeFormat.forPattern("MM/dd, yyyy").getParser(), DateTimeFormat.forPattern("MM/dd,yyyy").getParser(), DateTimeFormat.forPattern("MM dd yyyy").getParser(), DateTimeFormat.forPattern("MM-dd-yyyy").getParser(), DateTimeFormat.forPattern("MM.dd.yyyy").getParser(), DateTimeFormat.forPattern("MM. dd. yyyy").getParser(), DateTimeFormat.forPattern("MM. dd. yyyy.").getParser() }; DateTimeFormatter formatter = new DateTimeFormatterBuilder().append(null, parsers) .toFormatter(); parseDate1 = LocalDate.parse(verbatimEventDate, formatter).toDateMidnight(); resultDateMD = parseDate1.toString("yyyy-MM-dd"); } catch (Exception e) { logger.debug(e.getMessage()); } } if (assumemmddyyyy == null || !assumemmddyyyy) { try { DateTimeParser[] parsers = { DateTimeFormat.forPattern("dd/MM/yyyy").getParser(), DateTimeFormat.forPattern("dd/MM yyyy").getParser(), DateTimeFormat.forPattern("dd/MM-yyyy").getParser(), DateTimeFormat.forPattern("dd/MM, yyyy").getParser(), DateTimeFormat.forPattern("dd/MM,yyyy").getParser(), DateTimeFormat.forPattern("dd MM yyyy").getParser(), DateTimeFormat.forPattern("dd-MM-yyyy").getParser(), DateTimeFormat.forPattern("dd.MM.yyyy").getParser(), DateTimeFormat.forPattern("dd. MM. yyyy").getParser(), DateTimeFormat.forPattern("dd. MM. yyyy.").getParser() }; DateTimeFormatter formatter = new DateTimeFormatterBuilder().append(null, parsers) .toFormatter(); parseDate2 = LocalDate.parse(verbatimEventDate, formatter).toDateMidnight(); resultDateDM = parseDate2.toString("yyyy-MM-dd"); } catch (Exception e) { logger.debug(e.getMessage()); } } if (resultDateMD != null && resultDateDM == null) { result.setResultState(EventResult.EventQCResultState.DATE); result.setResult(resultDateMD); } else if (resultDateMD == null && resultDateDM != null) { result.setResultState(EventResult.EventQCResultState.DATE); result.setResult(resultDateDM); } else if (resultDateMD != null && resultDateDM != null) { if (resultDateMD.equals(resultDateDM)) { result.setResultState(EventResult.EventQCResultState.DATE); result.setResult(resultDateDM); } else { result.setResultState(EventResult.EventQCResultState.AMBIGUOUS); Interval range = null; if (parseDate1.isBefore(parseDate2)) { result.setResult(resultDateMD + "/" + resultDateDM); } else { result.setResult(resultDateDM + "/" + resultDateMD); } } } } if (result.getResultState().equals(EventResult.EventQCResultState.NOT_RUN) && verbatimEventDate.matches("^([0-9]{1,2}|[A-Za-z]+)[-/.]([0-9]{1,2}|[A-Za-z]+)[-/. ][0-9]{4}$")) { // Example: 03/Jan/1982 // Example: Jan-03-1982 try { DateTimeParser[] parsers = { DateTimeFormat.forPattern("MMM/dd/yyyy").getParser(), DateTimeFormat.forPattern("dd/MMM/yyyy").getParser(), DateTimeFormat.forPattern("MMM/dd yyyy").getParser(), DateTimeFormat.forPattern("dd/MMM yyyy").getParser(), DateTimeFormat.forPattern("MMM-dd-yyyy").getParser(), DateTimeFormat.forPattern("dd-MMM-yyyy").getParser(), DateTimeFormat.forPattern("MMM-dd yyyy").getParser(), DateTimeFormat.forPattern("dd-MMM yyyy").getParser(), DateTimeFormat.forPattern("MMM.dd.yyyy").getParser(), DateTimeFormat.forPattern("dd.MMM.yyyy").getParser(), DateTimeFormat.forPattern("MM.dd.yyyy").getParser(), DateTimeFormat.forPattern("dd.MM.yyyy").getParser() }; DateTimeFormatter formatter = new DateTimeFormatterBuilder().append(null, parsers).toFormatter(); DateMidnight parseDate = LocalDate.parse(verbatimEventDate, formatter.withLocale(Locale.ENGLISH)) .toDateMidnight(); resultDate = parseDate.toString("yyyy-MM-dd"); result.setResultState(EventResult.EventQCResultState.DATE); result.setResult(resultDate); } catch (Exception e) { logger.debug(e.getMessage()); } } if (result.getResultState().equals(EventResult.EventQCResultState.NOT_RUN) && verbatimEventDate.matches("^[X*]{2}[-/. ]([0-9]{1,2}|[A-Za-z]+)[-/. ][0-9]{4}$")) { // Example: XX-04-1982 (XX for day) // Example: XX-Jan-1995 try { DateTimeParser[] parsers = { DateTimeFormat.forPattern("MMM/yyyy").getParser(), DateTimeFormat.forPattern("MMM yyyy").getParser(), DateTimeFormat.forPattern("MMM-yyyy").getParser(), DateTimeFormat.forPattern("MMM yyyy").getParser(), DateTimeFormat.forPattern("MMM.yyyy").getParser(), DateTimeFormat.forPattern("MM.yyyy").getParser() }; DateTimeFormatter formatter = new DateTimeFormatterBuilder().append(null, parsers).toFormatter(); DateMidnight parseDate = LocalDate .parse(verbatimEventDate.substring(3), formatter.withLocale(Locale.ENGLISH)) .toDateMidnight(); resultDate = parseDate.toString("yyyy-MM"); result.setResultState(EventResult.EventQCResultState.RANGE); result.setResult(resultDate); } catch (Exception e) { logger.debug(e.getMessage()); } } if (result.getResultState().equals(EventResult.EventQCResultState.NOT_RUN) && verbatimEventDate.matches("^[X*]{2}[-/. ][X*]{2,3}[-/. ][0-9]{4}$")) { // Example: XX-XXX-1995 // Example: **-**-1995 try { DateTimeParser[] parsers = { DateTimeFormat.forPattern("yyyy").getParser(), }; DateTimeFormatter formatter = new DateTimeFormatterBuilder().append(null, parsers).toFormatter(); String yearBit = verbatimEventDate.substring(verbatimEventDate.length() - 4); DateMidnight parseDate = LocalDate.parse(yearBit, formatter.withLocale(Locale.ENGLISH)) .toDateMidnight(); resultDate = parseDate.toString("yyyy"); result.setResultState(EventResult.EventQCResultState.RANGE); result.setResult(resultDate); } catch (Exception e) { logger.debug(e.getMessage()); } } if (verbatimEventDate.matches("^[0-9]{4}[-][0-9]{3}$")) { // Example: 1994-128 (three digits after year = day of year). if (result.getResultState().equals(EventResult.EventQCResultState.NOT_RUN)) { try { DateTimeParser[] parsers = { DateTimeFormat.forPattern("yyyy-D").getParser() }; DateTimeFormatter formatter = new DateTimeFormatterBuilder().append(null, parsers) .toFormatter(); LocalDate parseDate = LocalDate.parse(verbatimEventDate, formatter); resultDate = parseDate.toString("yyyy-MM-dd"); logger.debug(resultDate); result.setResultState(EventResult.EventQCResultState.DATE); result.setResult(resultDate); } catch (Exception e) { logger.debug(e.getMessage()); } } } if (result.getResultState().equals(EventResult.EventQCResultState.NOT_RUN)) { try { // Example: 1983-15 (two digits after year may fall into subsequent blocks). // Example: 1933-Mar DateTimeParser[] parsers = { DateTimeFormat.forPattern("yyyy/M").getParser(), DateTimeFormat.forPattern("yyyy-M").getParser(), DateTimeFormat.forPattern("yyyy-MMM").getParser(), DateTimeFormat.forPattern("yyyy.MMM").getParser(), DateTimeFormat.forPattern("yyyy.MMM.").getParser(), DateTimeFormat.forPattern("yyyy MMM.").getParser(), DateTimeFormat.forPattern("yyyy MMM").getParser(), DateTimeFormat.forPattern("yyyy. MMM.").getParser(), DateTimeFormat.forPattern("yyyy. MMM").getParser(), DateTimeFormat.forPattern("yyyy/MMM").getParser() }; DateTimeFormatter formatter = new DateTimeFormatterBuilder().append(null, parsers).toFormatter(); String cleaned = cleanMonth(verbatimEventDate); LocalDate parseDate = LocalDate.parse(cleaned, formatter.withLocale(Locale.ENGLISH)); resultDate = parseDate.toString("yyyy-MM"); // resultDate = parseDate.dayOfMonth().withMinimumValue() + "/" + parseDate.dayOfMonth().withMaximumValue(); logger.debug(resultDate); if (verbatimEventDate.matches("^[0-9]{4}[-][0-9]{2}$")) { String century = verbatimEventDate.substring(0, 2); String startBit = verbatimEventDate.substring(0, 4); String endBit = verbatimEventDate.substring(5, 7); // 1815-16 won't parse here, passes to next block // 1805-06 could be month or abbreviated year // 1805-03 should to be month if (Integer.parseInt(startBit) >= Integer.parseInt(century + endBit)) { result.setResultState(EventResult.EventQCResultState.RANGE); result.setResult(resultDate); } else { result.setResultState(EventResult.EventQCResultState.SUSPECT); result.setResult(resultDate); } } else { result.setResultState(EventResult.EventQCResultState.RANGE); result.setResult(resultDate); } } catch (Exception e) { logger.debug(e.getMessage()); } } if (result.getResultState().equals(EventResult.EventQCResultState.NOT_RUN) && verbatimEventDate.matches("^[0-9]{4}[-][0-9]{2}$")) { // Example: 1884-85 (two digits look like year later in century). try { String century = verbatimEventDate.substring(0, 2); String startBit = verbatimEventDate.substring(0, 4); String endBit = verbatimEventDate.substring(5, 7); String assembly = startBit + "/" + century + endBit; logger.debug(assembly); Interval parseDate = Interval.parse(assembly); logger.debug(parseDate); resultDate = parseDate.getStart().toString("yyyy") + "/" + parseDate.getEnd().toString("yyyy"); result.setResultState(EventResult.EventQCResultState.RANGE); result.setResult(resultDate); } catch (Exception e) { logger.debug(e.getMessage()); } } if (result.getResultState().equals(EventResult.EventQCResultState.NOT_RUN) && verbatimEventDate.matches("^[0-9]{4}[0-9]{2}[0-9]{2}$") && !verbatimEventDate.endsWith("0000")) { // Example: 19950315 try { DateTimeParser[] parsers = { DateTimeFormat.forPattern("yyyyMMdd").getParser() }; DateTimeFormatter formatter = new DateTimeFormatterBuilder().append(null, parsers).toFormatter(); DateMidnight parseDate = LocalDate.parse(verbatimEventDate, formatter.withLocale(Locale.ENGLISH)) .toDateMidnight(); resultDate = parseDate.toString("yyyy-MM-dd"); result.setResultState(EventResult.EventQCResultState.DATE); result.setResult(resultDate); } catch (Exception e) { logger.debug(e.getMessage()); } } if (result.getResultState().equals(EventResult.EventQCResultState.NOT_RUN)) { // Example: 1845 try { DateTimeParser[] parsers = { DateTimeFormat.forPattern("yyyy").getParser() }; DateTimeFormatter formatter = new DateTimeFormatterBuilder().append(null, parsers).toFormatter(); LocalDate parseDate = LocalDate.parse(verbatimEventDate, formatter); resultDate = parseDate.dayOfYear().withMinimumValue() + "/" + parseDate.dayOfYear().withMaximumValue(); logger.debug(resultDate); result.setResultState(EventResult.EventQCResultState.RANGE); result.setResult(resultDate); } catch (Exception e) { logger.debug(e.getMessage()); } } if (result.getResultState().equals(EventResult.EventQCResultState.NOT_RUN)) { // Multiple yyyy-mmm-ddd, mmm-dd-yyyy, dd-mmm-yyyy patterns. try { DateTimeParser[] parsers = { DateTimeFormat.forPattern("yyyy MMM dd").getParser(), DateTimeFormat.forPattern("yyyy MMM. dd").getParser(), DateTimeFormat.forPattern("yyyy, MMM dd").getParser(), DateTimeFormat.forPattern("yyyy, MMM. dd").getParser(), DateTimeFormat.forPattern("yyyy.MMM.dd").getParser(), DateTimeFormat.forPattern("yyyy.MMM.dd.").getParser(), DateTimeFormat.forPattern("yyyy. MMM. dd").getParser(), DateTimeFormat.forPattern("yyyy. MMM. dd.").getParser(), DateTimeFormat.forPattern("yyyy. MMM dd.").getParser(), DateTimeFormat.forPattern("yyyy. MMM dd").getParser(), DateTimeFormat.forPattern("yyyy MMM. dd.").getParser(), DateTimeFormat.forPattern("yyyy: MMM. dd.").getParser(), DateTimeFormat.forPattern("yyyy: MMM. dd").getParser(), DateTimeFormat.forPattern("yyyy: MMM dd").getParser(), DateTimeFormat.forPattern("yyyy:MMM dd").getParser(), DateTimeFormat.forPattern("yyyy:MMM. dd").getParser(), DateTimeFormat.forPattern("yyyy:MMM.dd").getParser(), DateTimeFormat.forPattern("yyyy MMM dd'st'").getParser(), DateTimeFormat.forPattern("yyyy MMM. dd'st'").getParser(), DateTimeFormat.forPattern("yyyy MMM dd'nd'").getParser(), DateTimeFormat.forPattern("yyyy MMM. dd'nd'").getParser(), DateTimeFormat.forPattern("yyyy MMM dd'rd'").getParser(), DateTimeFormat.forPattern("yyyy MMM. dd'rd'").getParser(), DateTimeFormat.forPattern("yyyy MMM dd'th'").getParser(), DateTimeFormat.forPattern("yyyy MMM. dd'th'").getParser(), DateTimeFormat.forPattern("MMM dd, yyyy").getParser(), DateTimeFormat.forPattern("MMM dd., yyyy").getParser(), DateTimeFormat.forPattern("MMMdd, yyyy").getParser(), DateTimeFormat.forPattern("MMM dd'st', yyyy").getParser(), DateTimeFormat.forPattern("MMM dd'nd', yyyy").getParser(), DateTimeFormat.forPattern("MMM dd'rd', yyyy").getParser(), DateTimeFormat.forPattern("MMM dd'd', yyyy").getParser(), DateTimeFormat.forPattern("MMM dd'th', yyyy").getParser(), DateTimeFormat.forPattern("MMM. dd, yyyy").getParser(), DateTimeFormat.forPattern("MMM.dd, yyyy").getParser(), DateTimeFormat.forPattern("MMM. dd'st', yyyy").getParser(), DateTimeFormat.forPattern("MMM. dd'nd', yyyy").getParser(), DateTimeFormat.forPattern("MMM. dd'rd', yyyy").getParser(), DateTimeFormat.forPattern("MMM. dd'th', yyyy").getParser(), DateTimeFormat.forPattern("MMM.dd,yyyy").getParser(), DateTimeFormat.forPattern("MMM.dd'st',yyyy").getParser(), DateTimeFormat.forPattern("MMM.dd'nd',yyyy").getParser(), DateTimeFormat.forPattern("MMM.dd'rd',yyyy").getParser(), DateTimeFormat.forPattern("MMM.dd'd',yyyy").getParser(), DateTimeFormat.forPattern("MMM.dd'th',yyyy").getParser(), DateTimeFormat.forPattern("MMM.dd.yyyy").getParser(), DateTimeFormat.forPattern("MMM.dd'st'.yyyy").getParser(), DateTimeFormat.forPattern("MMM.dd'nd'.yyyy").getParser(), DateTimeFormat.forPattern("MMM.dd'rd'.yyyy").getParser(), DateTimeFormat.forPattern("MMM.dd'd'.yyyy").getParser(), DateTimeFormat.forPattern("MMM.dd'th'.yyyy").getParser(), DateTimeFormat.forPattern("MMM. dd'st'. yyyy").getParser(), DateTimeFormat.forPattern("MMM. dd'nd'. yyyy").getParser(), DateTimeFormat.forPattern("MMM. dd'rd'. yyyy").getParser(), DateTimeFormat.forPattern("MMM. dd'd'. yyyy").getParser(), DateTimeFormat.forPattern("MMM. dd'th'. yyyy").getParser(), DateTimeFormat.forPattern("MMM dd'st'. yyyy").getParser(), DateTimeFormat.forPattern("MMM dd'nd'. yyyy").getParser(), DateTimeFormat.forPattern("MMM dd'rd'. yyyy").getParser(), DateTimeFormat.forPattern("MMM dd'd'. yyyy").getParser(), DateTimeFormat.forPattern("MMM dd'th'. yyyy").getParser(), DateTimeFormat.forPattern("MMM dd'st'.yyyy").getParser(), DateTimeFormat.forPattern("MMM dd'nd'.yyyy").getParser(), DateTimeFormat.forPattern("MMM dd'rd'.yyyy").getParser(), DateTimeFormat.forPattern("MMM dd'd'.yyyy").getParser(), DateTimeFormat.forPattern("MMM dd'th'.yyyy").getParser(), DateTimeFormat.forPattern("MMM-dd-yyyy").getParser(), DateTimeFormat.forPattern("MMM-dd yyyy").getParser(), DateTimeFormat.forPattern("MMM-dd, yyyy").getParser(), DateTimeFormat.forPattern("dd-MMM-yyyy").getParser(), DateTimeFormat.forPattern("dd.MMM.yyyy").getParser(), DateTimeFormat.forPattern("dd,MMM,yyyy").getParser(), DateTimeFormat.forPattern("dd.MMM.,yyyy").getParser(), DateTimeFormat.forPattern("dd. MMM.,yyyy").getParser(), DateTimeFormat.forPattern("MMM. dd. yyyy").getParser(), DateTimeFormat.forPattern("MMM, dd yyyy").getParser(), DateTimeFormat.forPattern("MMM, dd. yyyy").getParser(), DateTimeFormat.forPattern("MMM, dd, yyyy").getParser(), DateTimeFormat.forPattern("MMM, dd., yyyy").getParser(), DateTimeFormat.forPattern("MMM. dd yyyy").getParser(), DateTimeFormat.forPattern("MMM. dd, yyyy").getParser(), DateTimeFormat.forPattern("MMM. dd/yyyy").getParser(), DateTimeFormat.forPattern("MMM dd,yyyy").getParser(), DateTimeFormat.forPattern("MMM dd, yyyy").getParser(), DateTimeFormat.forPattern("MMM. dd,yyyy").getParser(), DateTimeFormat.forPattern("MMM. dd-yyyy").getParser(), DateTimeFormat.forPattern("MMM.dd-yyyy").getParser(), DateTimeFormat.forPattern("MMM. dd, yyyy").getParser(), DateTimeFormat.forPattern("MMM. dd., yyyy").getParser(), DateTimeFormat.forPattern("MMM., dd, yyyy").getParser(), DateTimeFormat.forPattern("MMM.,dd, yyyy").getParser(), DateTimeFormat.forPattern("MMM. dd yyyy").getParser(), DateTimeFormat.forPattern("MMM. dd'' yyyy").getParser(), DateTimeFormat.forPattern("MMM dd'' yyyy").getParser(), DateTimeFormat.forPattern("dd. MMM. yyyy").getParser(), DateTimeFormat.forPattern("dd. MMM.yyyy").getParser(), DateTimeFormat.forPattern("dd MMM., yyyy").getParser(), DateTimeFormat.forPattern("dd MMM.,yyyy").getParser(), DateTimeFormat.forPattern("dd MMM,.yyyy").getParser(), DateTimeFormat.forPattern("dd MMM,. yyyy").getParser(), DateTimeFormat.forPattern("dd MMM..yyyy").getParser(), DateTimeFormat.forPattern("dd MMM, yyyy").getParser(), DateTimeFormat.forPattern("dd MMM yyyy").getParser(), DateTimeFormat.forPattern("dd MMM,yyyy").getParser(), DateTimeFormat.forPattern("dd MMM.yyyy").getParser(), DateTimeFormat.forPattern("ddMMM.yyyy").getParser(), DateTimeFormat.forPattern("ddMMM. yyyy").getParser(), DateTimeFormat.forPattern("dd.MMM-yyyy").getParser(), DateTimeFormat.forPattern("dd-MMM-yyyy").getParser(), DateTimeFormat.forPattern("dd.MMM yyyy").getParser(), DateTimeFormat.forPattern("dd. MMM yyyy").getParser(), DateTimeFormat.forPattern("dd, MMM, yyyy").getParser(), DateTimeFormat.forPattern("dd, MMM; yyyy").getParser(), DateTimeFormat.forPattern("dd. MMM; yyyy").getParser(), DateTimeFormat.forPattern("dd MMM-yyyy").getParser(), DateTimeFormat.forPattern("dd-MMM yyyy").getParser(), DateTimeFormat.forPattern("ddMMMyyyy").getParser(), DateTimeFormat.forPattern("MMM dd yyyy").getParser(), DateTimeFormat.forPattern("MMM dd/yyyy").getParser(), DateTimeFormat.forPattern("MMM dd'st' yyyy").getParser(), DateTimeFormat.forPattern("MMM dd'nd' yyyy").getParser(), DateTimeFormat.forPattern("MMM dd'rd' yyyy").getParser(), DateTimeFormat.forPattern("MMM dd'd' yyyy").getParser(), DateTimeFormat.forPattern("MMM dd'th' yyyy").getParser(), DateTimeFormat.forPattern("MMM. dd yyyy").getParser(), DateTimeFormat.forPattern("MMM. dd'st' yyyy").getParser(), DateTimeFormat.forPattern("MMM. dd'nd' yyyy").getParser(), DateTimeFormat.forPattern("MMM. dd'rd' yyyy").getParser(), DateTimeFormat.forPattern("MMM. dd'd' yyyy").getParser(), DateTimeFormat.forPattern("MMM. dd'th' yyyy").getParser(), DateTimeFormat.forPattern("MMMdd yyyy").getParser(), DateTimeFormat.forPattern("MMM.dd yyyy").getParser(), DateTimeFormat.forPattern("dd MMM, yyyy").getParser(), DateTimeFormat.forPattern("dd'st' MMM, yyyy").getParser(), DateTimeFormat.forPattern("dd'nd' MMM, yyyy").getParser(), DateTimeFormat.forPattern("dd'rd' MMM, yyyy").getParser(), DateTimeFormat.forPattern("dd'd' MMM, yyyy").getParser(), DateTimeFormat.forPattern("dd'th MMM', yyyy").getParser(), DateTimeFormat.forPattern("dd MMM., yyyy").getParser(), DateTimeFormat.forPattern("dd'st' MMM., yyyy").getParser(), DateTimeFormat.forPattern("dd'nd' MMM., yyyy").getParser(), DateTimeFormat.forPattern("dd'rd' MMM., yyyy").getParser(), DateTimeFormat.forPattern("dd'th' MMM., yyyy").getParser(), DateTimeFormat.forPattern("dd MMM yyyy").getParser(), DateTimeFormat.forPattern("dd'st' MMM yyyy").getParser(), DateTimeFormat.forPattern("dd'nd' MMM yyyy").getParser(), DateTimeFormat.forPattern("dd'rd' MMM yyyy").getParser(), DateTimeFormat.forPattern("dd'd' MMM yyyy").getParser(), DateTimeFormat.forPattern("dd'th' MMM yyyy").getParser(), DateTimeFormat.forPattern("dd MMM. yyyy").getParser(), DateTimeFormat.forPattern("dd'st' MMM. yyyy").getParser(), DateTimeFormat.forPattern("dd'nd' MMM. yyyy").getParser(), DateTimeFormat.forPattern("dd'rd' MMM. yyyy").getParser(), DateTimeFormat.forPattern("dd'd' MMM. yyyy").getParser(), DateTimeFormat.forPattern("dd'th' MMM. yyyy").getParser(), DateTimeFormat.forPattern("dd'st' MMM, yyyy").getParser(), DateTimeFormat.forPattern("dd'nd' MMM, yyyy").getParser(), DateTimeFormat.forPattern("dd'rd' MMM, yyyy").getParser(), DateTimeFormat.forPattern("dd'd' MMM, yyyy").getParser(), DateTimeFormat.forPattern("dd'th' MMM, yyyy").getParser(), DateTimeFormat.forPattern("dd/MMM/yyyy").getParser(), DateTimeFormat.forPattern("dd/MMM yyyy").getParser(), DateTimeFormat.forPattern("MMM/dd yyyy").getParser(), DateTimeFormat.forPattern("MMM/dd/yyyy").getParser(), DateTimeFormat.forPattern("MMM dd. yyyy").getParser(), DateTimeFormat.forPattern("MMM dd'st'. yyyy").getParser(), DateTimeFormat.forPattern("MMM dd'nd'. yyyy").getParser(), DateTimeFormat.forPattern("MMM dd'rd'. yyyy").getParser(), DateTimeFormat.forPattern("MMM dd'th'. yyyy").getParser(), DateTimeFormat.forPattern("MMM. dd. yyyy").getParser(), DateTimeFormat.forPattern("MMM. dd'st'. yyyy").getParser(), DateTimeFormat.forPattern("MMM. dd'nd'. yyyy").getParser(), DateTimeFormat.forPattern("MMM. dd'rd'. yyyy").getParser(), DateTimeFormat.forPattern("MMM. dd'th'. yyyy").getParser(), DateTimeFormat.forPattern("MMM dd.yyyy").getParser(), DateTimeFormat.forPattern("MMM. dd.yyyy").getParser(), DateTimeFormat.forPattern("MMM. dd-yyyy").getParser(), DateTimeFormat.forPattern("MMM. dd'st'-yyyy").getParser(), DateTimeFormat.forPattern("MMM. dd'nd'-yyyy").getParser(), DateTimeFormat.forPattern("MMM. dd'rd'-yyyy").getParser(), DateTimeFormat.forPattern("MMM. dd'th'-yyyy").getParser(), DateTimeFormat.forPattern("MMM dd-yyyy").getParser(), DateTimeFormat.forPattern("MMM dd'st'-yyyy").getParser(), DateTimeFormat.forPattern("MMM dd'nd'-yyyy").getParser(), DateTimeFormat.forPattern("MMM dd'rd'-yyyy").getParser(), DateTimeFormat.forPattern("MMM dd'th'-yyyy").getParser(), DateTimeFormat.forPattern("yyyy-MMM-dd").getParser() }; DateTimeFormatter formatter = new DateTimeFormatterBuilder().append(null, parsers).toFormatter(); String cleaned = cleanMonth(verbatimEventDate); cleaned = cleaned.replace("''", "'"); try { // Specify English locale, or local default will be used LocalDate parseDate = LocalDate.parse(cleaned, formatter.withLocale(Locale.ENGLISH)); resultDate = parseDate.toString("yyyy-MM-dd"); } catch (Exception e) { try { logger.debug(e.getMessage()); LocalDate parseDate = LocalDate.parse(cleaned, formatter.withLocale(Locale.FRENCH)); resultDate = parseDate.toString("yyyy-MM-dd"); } catch (Exception e1) { try { logger.debug(e1.getMessage()); LocalDate parseDate = LocalDate.parse(cleaned, formatter.withLocale(Locale.ITALIAN)); resultDate = parseDate.toString("yyyy-MM-dd"); } catch (Exception e2) { try { logger.debug(e2.getMessage()); LocalDate parseDate = LocalDate.parse(cleaned, formatter.withLocale(Locale.GERMAN)); resultDate = parseDate.toString("yyyy-MM-dd"); } catch (Exception e3) { try { logger.debug(e2.getMessage()); LocalDate parseDate = LocalDate.parse(cleaned, formatter.withLocale(Locale.forLanguageTag("es"))); resultDate = parseDate.toString("yyyy-MM-dd"); } catch (Exception e4) { logger.debug(e2.getMessage()); LocalDate parseDate = LocalDate.parse(cleaned, formatter.withLocale(Locale.forLanguageTag("pt"))); resultDate = parseDate.toString("yyyy-MM-dd"); } } } } } logger.debug(resultDate); result.setResultState(EventResult.EventQCResultState.DATE); result.setResult(resultDate); } catch (Exception e) { logger.debug(e.getMessage()); } } logger.debug(result.getResultState()); if (result.getResultState().equals(EventResult.EventQCResultState.NOT_RUN)) { // Example: jan.-1992 // Example: January 1992 if (verbatimEventDate.matches(".*[0-9]{4}.*")) { try { DateTimeParser[] parsers = { DateTimeFormat.forPattern("MMM, yyyy").getParser(), DateTimeFormat.forPattern("MMM., yyyy").getParser(), DateTimeFormat.forPattern("MMM.,yyyy").getParser(), DateTimeFormat.forPattern("MMM.-yyyy").getParser(), DateTimeFormat.forPattern("MMM.yyyy").getParser(), DateTimeFormat.forPattern("MMM. yyyy").getParser(), DateTimeFormat.forPattern("MMM-yyyy").getParser(), DateTimeFormat.forPattern("MMM -yyyy").getParser(), DateTimeFormat.forPattern("MMM yyyy").getParser(), DateTimeFormat.forPattern("MMM/yyyy").getParser() }; DateTimeFormatter formatter = new DateTimeFormatterBuilder().append(null, parsers) .toFormatter(); String cleaned = cleanMonth(verbatimEventDate); // Strip off a trailing period after a final year if (cleaned.matches("^.*[0-9]{4}[.]$")) { cleaned = cleaned.replaceAll("[.]$", ""); } LocalDate parseDate = LocalDate.parse(cleaned, formatter.withLocale(Locale.ENGLISH)); resultDate = parseDate.toString("yyyy-MM"); logger.debug(resultDate); result.setResultState(EventResult.EventQCResultState.RANGE); result.setResult(resultDate); } catch (Exception e) { logger.debug(e.getMessage()); } } } if (result.getResultState().equals(EventResult.EventQCResultState.NOT_RUN) && verbatimEventDate.matches("^[0-9]{4}([- ]+| to |[/ ]+)[0-9]{4}$")) { // Example: 1882-1995 // Example: 1882 to 1885 // Example: 1882/1885 try { String cleaned = verbatimEventDate.replace(" ", ""); cleaned = cleaned.replace("-", "/"); if (cleaned.matches("^[0-9]{4}to[0-9]{4}$")) { int len = verbatimEventDate.length(); int lastYear = len - 4; cleaned = verbatimEventDate.substring(0, 4) + "/" + verbatimEventDate.substring(lastYear, len); } logger.debug(cleaned); Interval parseDate = Interval.parse(cleaned); logger.debug(parseDate); resultDate = parseDate.getStart().toString("yyyy") + "/" + parseDate.getEnd().toString("yyyy"); result.setResultState(EventResult.EventQCResultState.RANGE); result.setResult(resultDate); } catch (Exception e) { logger.debug(e.getMessage()); } } if (result.getResultState().equals(EventResult.EventQCResultState.NOT_RUN) && verbatimEventDate .matches("^[A-Za-z]+[.]{0,1}( and | to |[-][ ]{0,1}| [-] )[A-Za-z]+[.]{0,1}(, |[/ .])[0-9]{4}$")) { logger.debug(verbatimEventDate); // Example: Jan to Feb 1882 // Example: Jan-Feb/1882 verbatimEventDate = verbatimEventDate.replace(", ", " "); if (verbatimEventDate.matches("^[A-Za-z]+[.]{0,1}[-][A-Za-z]+[.]{0,1}[.][0-9]{4}$")) { // transform case with multiple periods to slash before year. verbatimEventDate = verbatimEventDate.substring(0, verbatimEventDate.length() - 5) + "/" + verbatimEventDate.substring(verbatimEventDate.length() - 4); logger.debug(verbatimEventDate); } if (verbatimEventDate.matches("^[A-Za-z]+[.]{0,1}[ ][-][ ]{1}[A-Za-z]+[.]{0,1}[/ .][0-9]{4}$")) { // remove space around dash. verbatimEventDate = verbatimEventDate.replace(" - ", "-"); logger.debug(verbatimEventDate); } if (verbatimEventDate.matches("^[A-Za-z]+[.]{0,1}[-][ ]{1}[A-Za-z]+[.]{0,1}[/ .][0-9]{4}$")) { // remove space trailing after dash. verbatimEventDate = verbatimEventDate.replace("- ", "-"); logger.debug(verbatimEventDate); } if (verbatimEventDate.matches("^[A-Za-z]+[.]{0,1} and {1}[A-Za-z]+[.]{0,1}[/ .][0-9]{4}$")) { // replace and with dash verbatimEventDate = verbatimEventDate.replace(" and ", "-"); logger.debug(verbatimEventDate); } if (verbatimEventDate.matches("^[A-Za-z]+[.]{0,1} to {1}[A-Za-z]+[.]{0,1}[/ .][0-9]{4}$")) { // replace to with dash verbatimEventDate = verbatimEventDate.replace(" to ", "-"); logger.debug(verbatimEventDate); } try { String[] bits = verbatimEventDate.replace(" ", "/").split("-"); if (bits != null && bits.length == 2) { String year = verbatimEventDate.substring(verbatimEventDate.length() - 4, verbatimEventDate.length()); String startBit = bits[0] + "/" + year; DateTimeParser[] parsers = { DateTimeFormat.forPattern("MMM/yyyy").getParser(), DateTimeFormat.forPattern("MMM./yyyy").getParser() }; DateTimeFormatter formatter = new DateTimeFormatterBuilder().append(null, parsers) .toFormatter(); LocalDate parseStartDate = LocalDate.parse(cleanMonth(startBit), formatter.withLocale(Locale.ENGLISH)); LocalDate parseEndDate = LocalDate.parse(cleanMonth(bits[1]), formatter.withLocale(Locale.ENGLISH)); resultDate = parseStartDate.toString("yyyy-MM") + "/" + parseEndDate.toString("yyyy-MM"); logger.debug(resultDate); result.setResultState(EventResult.EventQCResultState.RANGE); result.setResult(resultDate); } } catch (Exception e) { logger.debug(e.getMessage()); } } if (result.getResultState().equals(EventResult.EventQCResultState.NOT_RUN) && verbatimEventDate.matches( "^[0-9]{1,2}[ /.]{0,1}[A-Za-z]+[.]{0,1}( - |[-])[0-9]{1,2}[ /.]{0,1}[A-Za-z]+[.]{0,1}[/ -.][0-9]{4}$")) { logger.debug(verbatimEventDate); // Example: 05/Jan/1882-03/Feb/1885 if (verbatimEventDate.matches( "^[0-9]{1,2}[ /.]{0,1}[A-Za-z]+[.]{0,1}[-][0-9]{1,2}[ /.]{0,1}[A-Za-z]+[.]{0,1}[-][0-9]{4}$")) { // transform case with multiple dashes to slash before year. verbatimEventDate = verbatimEventDate.substring(0, verbatimEventDate.length() - 5) + "/" + verbatimEventDate.substring(verbatimEventDate.length() - 4); logger.debug(verbatimEventDate); } if (verbatimEventDate.matches( "^[0-9]{1,2}[ /.]{0,1}[A-Za-z]+[.]{0,1}[-][0-9]{1,2}[ /.]{0,1}[A-Za-z]+[.]{0,1}[.][0-9]{4}$")) { // transform case with multiple periods to slash before year. verbatimEventDate = verbatimEventDate.substring(0, verbatimEventDate.length() - 5) + "/" + verbatimEventDate.substring(verbatimEventDate.length() - 4); logger.debug(verbatimEventDate); } try { String[] bits = verbatimEventDate.replace(" - ", "-").replace(" ", "/").split("-"); if (bits != null && bits.length == 2) { String year = verbatimEventDate.substring(verbatimEventDate.length() - 4, verbatimEventDate.length()); String startBit = bits[0] + "/" + year; logger.debug(cleanMonth(startBit)); logger.debug(cleanMonth(bits[1])); DateTimeParser[] parsers = { DateTimeFormat.forPattern("dd MMM/yyyy").getParser(), DateTimeFormat.forPattern("dd.MMM/yyyy").getParser(), DateTimeFormat.forPattern("dd/MMM/yyyy").getParser(), DateTimeFormat.forPattern("ddMMM/yyyy").getParser(), DateTimeFormat.forPattern("dd MMM./yyyy").getParser(), DateTimeFormat.forPattern("dd.MMM./yyyy").getParser(), DateTimeFormat.forPattern("dd/MMM./yyyy").getParser(), DateTimeFormat.forPattern("ddMMM./yyyy").getParser() }; DateTimeFormatter formatter = new DateTimeFormatterBuilder().append(null, parsers) .toFormatter(); LocalDate parseStartDate = LocalDate.parse(cleanMonth(startBit), formatter.withLocale(Locale.ENGLISH)); LocalDate parseEndDate = LocalDate.parse(cleanMonth(bits[1]), formatter.withLocale(Locale.ENGLISH)); resultDate = parseStartDate.toString("yyyy-MM-dd") + "/" + parseEndDate.toString("yyyy-MM-dd"); logger.debug(resultDate); result.setResultState(EventResult.EventQCResultState.RANGE); result.setResult(resultDate); } } catch (Exception e) { logger.debug(e.getMessage()); } } if (result.getResultState().equals(EventResult.EventQCResultState.NOT_RUN) && verbatimEventDate.matches( "^[A-Za-z]+[.]{0,1}[ ]{0,1}[0-9]{1,2}( - |[-]| to | and | et )[A-Za-z]+[.]{0,1}[ ]{0,1}[0-9]{1,2}[/ .,][ ]{0,1}[0-9]{4}$")) { logger.debug(verbatimEventDate); // Example: Aug. 5 - Sept. 8, 1943 try { String[] bits = verbatimEventDate.replace(" to ", "-").replace(" - ", "-").replace(" and ", "-") .replace(" et ", "-").replace(", ", " ").replace(" ", "/").split("-"); if (bits != null && bits.length == 2) { String year = verbatimEventDate.substring(verbatimEventDate.length() - 4, verbatimEventDate.length()); String startBit = bits[0] + "/" + year; logger.debug(cleanMonth(startBit)); logger.debug(cleanMonth(bits[1])); DateTimeParser[] parsers = { DateTimeFormat.forPattern("MMM/dd/yyyy").getParser(), DateTimeFormat.forPattern("MMM./dd/yyyy").getParser(), DateTimeFormat.forPattern("MMM.dd/yyyy").getParser() }; DateTimeFormatter formatter = new DateTimeFormatterBuilder().append(null, parsers) .toFormatter(); LocalDate parseStartDate = LocalDate.parse(cleanMonth(startBit), formatter.withLocale(Locale.ENGLISH)); LocalDate parseEndDate = LocalDate.parse(cleanMonth(bits[1]), formatter.withLocale(Locale.ENGLISH)); resultDate = parseStartDate.toString("yyyy-MM-dd") + "/" + parseEndDate.toString("yyyy-MM-dd"); logger.debug(resultDate); result.setResultState(EventResult.EventQCResultState.RANGE); result.setResult(resultDate); } } catch (Exception e) { logger.debug(e.getMessage()); } } if (result.getResultState().equals(EventResult.EventQCResultState.NOT_RUN) && verbatimEventDate.matches( "^[0-9]{1,2}([ ]{0,1}[-][ ]{0,1}| and | et | to )[0-9]{1,2}[ /.]{0,1}[A-Za-z]+[.]{0,1}[/ -.][0-9]{4}$")) { // Example: 11 et 14 VII 1910 // Example: 05-02 Jan./1992 String toCheck = verbatimEventDate; toCheck = toCheck.replace(" - ", "-").replace(" et ", "-").replace(" and ", "-").replace(" to ", "-"); // Note: "and" has different semantics than "to", may imply that a specimen record // represents two occurrences (e.g. flower on one date, fruit on another) rather than // a range, but dwc:eventDate representation for both forms on one event is a range. if (toCheck.matches("^[0-9]{1,2}[-][0-9]{1,2}[ /.]{0,1}[A-Za-z]+[.]{0,1}[-][0-9]{4}$")) { // transform case with multiple dashes to slash before year. toCheck = toCheck.substring(0, toCheck.length() - 5) + "/" + toCheck.substring(toCheck.length() - 4); logger.debug(toCheck); } if (toCheck.matches("^[0-9]{1,2}[-][0-9]{1,2}[ /.]{0,1}[A-Za-z]+[.]{0,1}[.][0-9]{4}$")) { // transform case with multiple periods to slash before year. toCheck = toCheck.substring(0, toCheck.length() - 5) + "/" + toCheck.substring(toCheck.length() - 4); logger.debug(toCheck); } try { String[] bits = toCheck.replace(" ", "/").split("-"); if (bits != null && bits.length == 2) { String year = toCheck.substring(toCheck.length() - 4, toCheck.length()); logger.debug(cleanMonth(bits[1])); DateTimeParser[] parsers = { DateTimeFormat.forPattern("dd MMM/yyyy").getParser(), DateTimeFormat.forPattern("dd.MMM/yyyy").getParser(), DateTimeFormat.forPattern("dd/MMM/yyyy").getParser(), DateTimeFormat.forPattern("ddMMM/yyyy").getParser(), DateTimeFormat.forPattern("dd MMM./yyyy").getParser(), DateTimeFormat.forPattern("dd.MMM./yyyy").getParser(), DateTimeFormat.forPattern("dd/MMM./yyyy").getParser(), DateTimeFormat.forPattern("ddMMM./yyyy").getParser() }; DateTimeFormatter formatter = new DateTimeFormatterBuilder().append(null, parsers) .toFormatter(); LocalDate parseEndDate = LocalDate.parse(cleanMonth(bits[1]), formatter.withLocale(Locale.ENGLISH)); String startMonthYear = parseEndDate.toString("MMM/yyyy"); String startBit = bits[0] + "/" + startMonthYear; logger.debug(startBit); LocalDate parseStartDate = LocalDate.parse(startBit, formatter.withLocale(Locale.ENGLISH)); resultDate = parseStartDate.toString("yyyy-MM-dd") + "/" + parseEndDate.toString("yyyy-MM-dd"); logger.debug(resultDate); result.setResultState(EventResult.EventQCResultState.RANGE); result.setResult(resultDate); } } catch (Exception e) { logger.debug(e.getMessage()); } } if (result.getResultState().equals(EventResult.EventQCResultState.NOT_RUN) && verbatimEventDate.matches("^[0-9]{2}[-. ]XXX[-. ][0-9]{4}$")) { // Example: 05-XXX-1884 try { String start = verbatimEventDate.substring(verbatimEventDate.length() - 4) + "-01-" + verbatimEventDate.substring(0, 2); String end = verbatimEventDate.substring(verbatimEventDate.length() - 4) + "-12-" + verbatimEventDate.substring(0, 2); EventResult compositeResult = DateUtils.extractDateFromVerbatimER(start + "/" + end, yearsBeforeSuspect, assumemmddyyyy); logger.debug(compositeResult.getResultState()); if (compositeResult.getResultState().equals(EventResult.EventQCResultState.RANGE)) { result.setResultState(EventResult.EventQCResultState.RANGE); result.setResult(compositeResult.getResult()); logger.debug(result.getResult()); } } catch (Exception e) { logger.debug(e.getMessage()); } } if (result.getResultState().equals(EventResult.EventQCResultState.NOT_RUN) && verbatimEventDate.matches("^[0-9]{4}-[0-9]{2}/[0-9]{4}-[0-9]{2}$")) { // Example: 1885-03/1886-04 try { Interval parseDate = Interval.parse(verbatimEventDate); logger.debug(parseDate); resultDate = parseDate.getStart().toString("yyyy-MM") + "/" + parseDate.getEnd().toString("yyyy-MM"); result.setResultState(EventResult.EventQCResultState.RANGE); result.setResult(resultDate); } catch (Exception e) { logger.debug(e.getMessage()); } } if (result.getResultState().equals(EventResult.EventQCResultState.NOT_RUN)) { // Example: 1995-03-05/1996-05-08 try { Interval parseDate = Interval.parse(verbatimEventDate); logger.debug(parseDate); resultDate = parseDate.getStart().toString("yyyy-MM-dd") + "/" + parseDate.getEnd().toString("yyyy-MM-dd"); result.setResultState(EventResult.EventQCResultState.RANGE); result.setResult(resultDate); } catch (Exception e) { logger.debug(e.getMessage()); } } if (result.getResultState().equals(EventResult.EventQCResultState.NOT_RUN)) { // Example: Jan,15-18 1882 // Example: Jan. 17 and 18 1882 String cleaned = verbatimEventDate.trim(); if (verbatimEventDate.matches("^[A-Za-z.]+[ ,]+[0-9]{1,2} and [0-9]{0,2}[ ,]+[0-9]{4}$")) { // Note: "and" has different semantics than "to", may imply that a specimen record // represents two occurrences (e.g. flower on one date, fruit on another) rather than // a range, but dwc:eventDate representation for both forms on one event is a range. cleaned = cleaned.replace(" and ", " to "); } if (verbatimEventDate.matches("^[A-Za-z.]+[ ,]+[0-9]{1,2}-[0-9]{0,2}[ ,]+[0-9]{4}$")) { cleaned = cleaned.replace("-", " to "); } if (cleaned.contains(" to ")) { String[] bits = cleaned.split(" to "); String yearRegex = ".*([0-9]{4}).*"; Matcher yearMatcher = Pattern.compile(yearRegex).matcher(cleaned); String monthRegex = "([A-Za-z.]+).*"; Matcher monthMatcher = Pattern.compile(monthRegex).matcher(cleaned); if (yearMatcher.matches() && monthMatcher.matches()) { String year = yearMatcher.group(1); String month = monthMatcher.group(1); if (bits.length == 2) { if (!bits[0].contains(year)) { bits[0] = bits[0] + " " + year; } if (!bits[1].contains(year)) { bits[1] = bits[1] + " " + year; } if (!bits[1].contains(month)) { bits[1] = month + " " + bits[1]; } Map<String, String> resultBit0 = DateUtils.extractDateFromVerbatim(bits[0]); if (resultBit0.size() > 0 && resultBit0.get("resultState").equals("date")) { Map<String, String> resultBit1 = DateUtils.extractDateFromVerbatim(bits[1]); if (resultBit1.size() > 0 && resultBit1.get("resultState").equals("date")) { result.setResultState(EventResult.EventQCResultState.RANGE); result.setResult(resultBit0.get("result") + "/" + resultBit1.get("result")); } } logger.debug(bits[0]); logger.debug(bits[1]); } } } } // Now test to see if result is sane. if (result != null && !result.getResultState().equals(EventResult.EventQCResultState.NOT_RUN)) { Interval testExtract = DateUtils.extractDateInterval(result.getResult()); if (testExtract == null || testExtract.getStart().getYear() < yearsBeforeSuspect) { result.setResultState(EventResult.EventQCResultState.SUSPECT); logger.debug(result.getResult()); logger.debug(testExtract); } else { logger.debug(result.getResult()); } if (!verbatimEventDate.matches(".*[0-9]{4}.*") && yearsBeforeSuspect > 999) { result = new EventResult(); logger.debug(result.getResult()); } } return result; }
From source file:org.filteredpush.qc.date.DateUtils.java
License:Apache License
/** * Test to see if a string appears to represent a date range of more than one day. * // w ww . j a v a 2s . c o m * @param eventDate to check * @return true if a date range, false otherwise. */ public static boolean isRange(String eventDate) { boolean isRange = false; if (eventDate != null) { String[] dateBits = eventDate.split("/"); if (dateBits != null && dateBits.length == 2) { //probably a range. DateTimeParser[] parsers = { DateTimeFormat.forPattern("yyyy-MM").getParser(), DateTimeFormat.forPattern("yyyy").getParser(), ISODateTimeFormat.dateOptionalTimeParser().getParser() }; DateTimeFormatter formatter = new DateTimeFormatterBuilder().append(null, parsers).toFormatter(); try { // must be at least a 4 digit year. if (dateBits[0].length() > 3 && dateBits[1].length() > 3) { DateMidnight startDate = LocalDate.parse(dateBits[0], formatter).toDateMidnight(); DateMidnight endDate = LocalDate.parse(dateBits[1], formatter).toDateMidnight(); // both start date and end date must parse as dates. isRange = true; } } catch (Exception e) { // not a date range e.printStackTrace(); logger.debug(e.getMessage()); } } else if (dateBits != null && dateBits.length == 1) { logger.debug(dateBits[0]); // Date bits does not contain a / // Is eventDate in the form yyyy-mm-dd, if so, not a range DateTimeParser[] parsers = { DateTimeFormat.forPattern("yyyy-MM-dd").getParser(), }; DateTimeFormatter formatter = new DateTimeFormatterBuilder().append(null, parsers).toFormatter(); try { DateMidnight date = DateMidnight.parse(eventDate, formatter); isRange = false; } catch (Exception e) { logger.debug(e.getMessage()); // not parsable with the yyyy-mm-dd parser. DateTimeParser[] parsers2 = { DateTimeFormat.forPattern("yyyy-MM").getParser(), DateTimeFormat.forPattern("yyyy").getParser(), }; formatter = new DateTimeFormatterBuilder().append(null, parsers2).toFormatter(); try { // must be at least a 4 digit year. if (dateBits[0].length() > 3) { DateMidnight startDate = DateMidnight.parse(dateBits[0], formatter); // date must parse as either year or year and month dates. isRange = true; } } catch (Exception e1) { // not a date range } } } } return isRange; }
From source file:org.filteredpush.qc.date.DateUtils.java
License:Apache License
/** * Does eventDate match an ISO date that contains a time (including the instant of * midnight (a time with all zero elements)). * //from ww w.j a va 2 s. co m * @param eventDate string to check for an ISO date with a time. * @return true if eventDate is an ISO date that includes a time, or if eventDate is an * ISO date range either the start or end of which contains a time. */ public static boolean containsTime(String eventDate) { boolean result = false; if (!isEmpty(eventDate)) { if (eventDate.endsWith("UTC")) { eventDate = eventDate.replace("UTC", "Z"); } DateTimeParser[] parsers = { ISODateTimeFormat.dateHour().getParser(), ISODateTimeFormat.dateTimeParser().getParser(), ISODateTimeFormat.dateHourMinute().getParser(), ISODateTimeFormat.dateHourMinuteSecond().getParser(), ISODateTimeFormat.dateTime().getParser() }; DateTimeFormatter formatter = new DateTimeFormatterBuilder().append(null, parsers).toFormatter(); if (eventDate.matches("^[0-9]{4}[-][0-9]{2}[-][0-9]{2}[Tt].+")) { try { LocalDate match = LocalDate.parse(eventDate, formatter); result = true; logger.debug(match); } catch (Exception e) { // not a date with a time logger.error(e.getMessage()); } } if (isRange(eventDate) && eventDate.contains("/") && !result) { String[] bits = eventDate.split("/"); if (bits != null && bits.length > 1) { // does either start or end date contain a time? if (bits[0].matches("^[0-9]{4}[-][0-9]{2}[-][0-9]{2}[Tt].+")) { try { LocalDate match = LocalDate.parse(bits[0], formatter); result = true; logger.debug(match); } catch (Exception e) { // not a date with a time logger.error(e.getMessage()); } } if (bits[1].matches("^[0-9]{4}[-][0-9]{2}[-][0-9]{2}[Tt].+")) { try { LocalDate match = LocalDate.parse(bits[1], formatter); result = true; logger.debug(match); } catch (Exception e) { // not a date with a time logger.error(e.getMessage()); } } } } } return result; }
From source file:org.fixb.quickfix.QuickFixFieldExtractor.java
License:Apache License
@SuppressWarnings("unchecked") private <T> T getFieldValueFromMap(FieldMap message, Class<T> type, int tag) { try {//from w ww . ja v a2 s .co m if (type == String.class) { return (T) message.getString(tag); } else if (type == Boolean.class || type == boolean.class) { return (T) Boolean.valueOf(message.getBoolean(tag)); } else if (type == Character.class || type == char.class) { return (T) Character.valueOf(message.getChar(tag)); } else if (type == Integer.class || type == int.class) { return (T) Integer.valueOf(message.getInt(tag)); } else if (type == Double.class || type == double.class) { return (T) Double.valueOf(message.getDouble(tag)); } else if (type == BigDecimal.class) { return (T) message.getDecimal(tag); } else if (type == Instant.class) { return (T) new Instant(message.getUtcTimeStamp(tag)); } else if (type == LocalDate.class) { String dateString = message.getString(tag); return (T) LocalDate.parse(dateString, new DateTimeFormatterBuilder().appendPattern("yyyyMMdd").toFormatter()); } else if (type == LocalTime.class) { Date utcTime = message.getUtcTimeOnly(tag); return (T) new DateTime(utcTime, DateTimeZone.getDefault()).toLocalTime(); } else if (type == LocalDateTime.class) { Date utcTime = message.getUtcTimeStamp(tag); return (T) new DateTime(utcTime, DateTimeZone.getDefault()).toLocalDateTime(); } else if (type == DateTime.class) { Date utcDate = message.getUtcTimeStamp(tag); return (T) new DateTime(utcDate, DateTimeZone.UTC); } else if (type == Date.class) { return (T) message.getUtcTimeStamp(tag); } else if (type.isEnum()) { int fieldValue = message.getInt(tag); for (T enumValue : type.getEnumConstants()) { int ordValue = ((Enum) enumValue).ordinal() + 1; if (ordValue == fieldValue) { return enumValue; } } throw new IllegalArgumentException("Invalid ordinal of enum type " + type + ": " + fieldValue); } else { throw new IllegalArgumentException("Can't map type to FIX type: " + type); } } catch (FieldNotFound e) { return null; } catch (FieldException e) { throw new FixException(e); } }
From source file:org.fuin.objects4j.common.LocalDateAdapter.java
License:Open Source License
@Override public final LocalDate unmarshal(final String str) { if (str == null) { return null; }/*from w w w. j av a2s .c om*/ return LocalDate.parse(str, ISODateTimeFormat.localDateParser()); }
From source file:org.fuin.objects4j.common.LocalDateAdapter.java
License:Open Source License
@Override public final LocalDate convertToEntityAttribute(final String str) { if (str == null) { return null; }/*from w w w . j a v a 2 s . co m*/ return LocalDate.parse(str, ISODateTimeFormat.localDateParser()); }
From source file:org.killbill.billing.jaxrs.resources.JaxRsResourceBase.java
License:Apache License
private LocalDate extractLocalDate(final String inputDate) { if (inputDate != null) { try {/* ww w . ja va 2 s . c o m*/ final LocalDate localDate = LocalDate.parse(inputDate, LOCAL_DATE_FORMATTER); return localDate; } catch (final IllegalArgumentException expectedAndIgnore) { } } return null; }
From source file:org.mousephenotype.dcc.exportlibrary.datastructure.converters.DatatypeConverter.java
License:Apache License
public static Calendar parseDate(String lexicalDate) { if (lexicalDate == null || lexicalDate.equals("")) { logger.trace("parsing is null or empty"); return null; }//w w w .j a v a 2 s . c om logger.trace("parsing date {}", lexicalDate); LocalDate localDate = LocalDate.parse(lexicalDate, DateTimeFormat.forPattern(dateXMLpattern)); return localDate.toDateTimeAtStartOfDay().toGregorianCalendar(); }
From source file:org.supercsv.cellprocessor.joda.ParseLocalDate.java
License:Apache License
/** * {@inheritDoc}// www . ja va 2 s .c o m */ @Override protected LocalDate parse(final String string, final DateTimeFormatter formatter) { return LocalDate.parse(string, formatter); }
From source file:org.vaadin.addons.javaee.fields.converter.StringToLocalDateConverter.java
License:Apache License
@Override public LocalDate convertToModel(String value, Class<? extends LocalDate> targetType, Locale locale) throws Converter.ConversionException { if (value == null) { return null; }//from w w w . jav a 2 s . c o m return LocalDate.parse(value, DateTimeFormat.shortDate().withLocale(locale)); }