Java tutorial
/* * To change this license header, choose License Headers in Project Properties. * To change this template file, choose Tools | Templates * and open the template in the editor. */ package be.fedict.lodtools.cpsv; import com.google.common.net.HttpHeaders; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; import java.net.MalformedURLException; import java.net.URL; import java.net.URLConnection; import java.net.URLEncoder; import java.nio.charset.StandardCharsets; import java.text.MessageFormat; import java.time.LocalDate; import java.time.format.DateTimeFormatter; import java.time.format.DateTimeParseException; import java.util.Collections; import java.util.Locale; import java.util.Set; import java.util.regex.Matcher; import java.util.regex.Pattern; import java.util.stream.Collectors; import org.eclipse.rdf4j.model.IRI; import org.eclipse.rdf4j.model.Model; import org.eclipse.rdf4j.rio.RDFFormat; import org.eclipse.rdf4j.rio.Rio; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * * @author Bart.Hanssens */ public class EliMatcher { private final static Logger LOG = LoggerFactory.getLogger(EliMatcher.class); private final static Pattern p = Pattern .compile("(Wet|Loi|Decreet|Dcret) (van|de|du) " + "(\\d{1,2} \\w+ \\d{4}) (.*)"); private final static String ELI = "https://id.belgium.be" + "/_query/eli/match?date={0}&type={1}&q={2}"; private final static DateTimeFormatter DATE_FR = DateTimeFormatter.ofPattern("dd MMMM yyyy", Locale.forLanguageTag("fr")); private final static DateTimeFormatter DATE_NL = DateTimeFormatter.ofPattern("dd MMMM yyyy", Locale.forLanguageTag("nl")); /** * Try to match framework using date, type and title * * @param date publication date * @param type document type * @param title title of the document * @return */ private static Set<IRI> match(String date, String type, String title) { Set<IRI> matched = Collections.emptySet(); URLConnection conn; try { String enc = URLEncoder.encode(title, StandardCharsets.UTF_8.toString()); URL u = new URL(MessageFormat.format(ELI, date, type, enc)); conn = u.openConnection(); conn.setRequestProperty(HttpHeaders.ACCEPT, RDFFormat.NTRIPLES.getDefaultMIMEType()); try (InputStream in = conn.getInputStream()) { Model m = Rio.parse(in, "http://pubserv.belgif.be", RDFFormat.NTRIPLES); matched = m.subjects().stream().map(IRI.class::cast).collect(Collectors.toSet()); LOG.info("Mapped {}", title); } } catch (MalformedURLException ex) { LOG.error("Could not build url"); } catch (FileNotFoundException ex) { LOG.warn("No matching ELI for {}", title); } catch (IOException ex) { LOG.error("Error matching: {}", ex); } return matched; } /** * Parse string to date * * @param str * @return */ private static String parseDate(String str) { LocalDate date = null; try { date = LocalDate.parse(str, DATE_FR); } catch (DateTimeParseException dte) { try { date = LocalDate.parse(str, DATE_NL); } catch (DateTimeParseException dte2) { LOG.error("Exception in parsing {} to date", str); } } return (date != null) ? date.format(DateTimeFormatter.ISO_LOCAL_DATE) : ""; } /** * Find a match with * * @param str * @return matched */ public static Set<IRI> match(String str) { Matcher matcher = p.matcher(str); if (!matcher.matches()) { return Collections.EMPTY_SET; } String m1 = matcher.group(1); String type = (m1.equals("Wet") || m1.equals("Loi")) ? "LAW" : "DECREE"; String date = parseDate(matcher.group(3)); String title = m1 + " " + matcher.group(4); return match(date, type, title); } }