List of usage examples for org.jsoup.nodes Document normalise
public Document normalise()
From source file:com.astrientlabs.nyt.NYT.java
public String extractImageURL(int session, String memberType, String name) throws IOException { String url = "http://memberguide.gpo.gov/" + session + "/" + memberType + "/" + name; try {/*from w w w .j a v a 2 s .c om*/ Connection c = Jsoup.connect(url); c.userAgent("Mozilla/5.0 (Windows NT 6.1; WOW64; rv:10.0.1) Gecko/20100101 Firefox/10.0.1"); Document doc = c.get(); doc.normalise(); Element content = doc.getElementById("ctl00_ContentPlaceHolder1_pic"); if (content != null) { String src = content.attr("src"); //System.out.println(src + " vs " + doc.baseUri()); if (src != null) { URL u = new URL("http://memberguide.gpo.gov/" + session + "/" + memberType + "/" + src); return u.toString(); } } } catch (Exception e) { e.printStackTrace(); } return null; }
From source file:mx.clickfactura.util.TipoCambioUtil.java
public String getTipoCambio(String fecha) throws CustomBadRequestException, CustomNotFoundException, Exception { Pattern pattern = Pattern.compile("^\\d{4}\\-\\d{2}\\-\\d{2}$"); Matcher matcher = null;/*from w w w . j a va 2 s . c om*/ matcher = pattern.matcher(fecha.trim()); if (!matcher.matches()) { throw new CustomBadRequestException("Fecha invalida, el formato debe ser: yyyy-MM-dd"); } SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd"); Calendar cal = new GregorianCalendar(); cal.setTime(sdf.parse(fecha)); String dia = (cal.get(Calendar.DATE) < 10) ? "0" + cal.get(Calendar.DATE) : cal.get(Calendar.DATE) + ""; String mes = ((cal.get(Calendar.MONTH) + 1) < 10) ? "0" + (cal.get(Calendar.MONTH) + 1) : (cal.get(Calendar.MONTH) + 1) + ""; String anio = cal.get(Calendar.YEAR) + ""; String fechaInicial = dia + "%2F" + mes + "%2F" + anio; CloseableHttpClient client = HttpClients.createDefault(); CookieStore cookies = new BasicCookieStore(); String[] fechaSeparada = fecha.split("-"); HttpGet get = new HttpGet("http://www.dof.gob.mx/indicadores_detalle.php?cod_tipo_indicador=158&dfecha=" + fechaInicial + "&hfecha=" + fechaInicial); HttpContext httpContext = new BasicHttpContext(); httpContext.setAttribute(HttpClientContext.COOKIE_STORE, cookies); CloseableHttpResponse response = client.execute(get, httpContext); //System.out.println(response.toString()); //System.out.println(response.getStatusLine()); //System.out.println(response.getEntity().getContentLength()); InputStream in = response.getEntity().getContent(); Header encoding = response.getEntity().getContentEncoding(); String body = IOUtils.toString(in, "UTF-8"); //System.out.println(body); Document doc = Jsoup.parse(body, "UTF-8"); doc = doc.normalise(); //System.out.println(doc.toString()); Elements e = doc.select("table"); Iterator iterator = e.iterator(); pattern = Pattern.compile("^\\d{2}\\.\\d{6}$"); matcher = null; String tipoCambio = null; while (iterator.hasNext()) { Element xd = (Element) iterator.next(); if (xd.getElementsByClass("txt").hasAttr("height")) { if (xd.getElementsByClass("txt").text().split(" ").length == 6) { String cambio = xd.getElementsByClass("txt").text().split(" ")[5]; matcher = pattern.matcher(cambio.trim()); if (matcher.matches()) { tipoCambio = cambio; //System.out.println(tipoCambio); break; } } } } client.close(); response.close(); if (tipoCambio == null || tipoCambio.isEmpty()) { throw new CustomNotFoundException("No hay un tipo de cambio para el da: " + fecha); } return tipoCambio; }