Example usage for org.jsoup.nodes Document normalise

List of usage examples for org.jsoup.nodes Document normalise

Introduction

In this page you can find the example usage for org.jsoup.nodes Document normalise.

Prototype

public Document normalise() 

Source Link

Document

Normalise the document.

Usage

From source file:com.astrientlabs.nyt.NYT.java

public String extractImageURL(int session, String memberType, String name) throws IOException {
     String url = "http://memberguide.gpo.gov/" + session + "/" + memberType + "/" + name;

     try {/*from   w w  w  .j  a v  a  2 s .c om*/
         Connection c = Jsoup.connect(url);
         c.userAgent("Mozilla/5.0 (Windows NT 6.1; WOW64; rv:10.0.1) Gecko/20100101 Firefox/10.0.1");

         Document doc = c.get();
         doc.normalise();

         Element content = doc.getElementById("ctl00_ContentPlaceHolder1_pic");
         if (content != null) {
             String src = content.attr("src");
             //System.out.println(src + " vs " + doc.baseUri());

             if (src != null) {
                 URL u = new URL("http://memberguide.gpo.gov/" + session + "/" + memberType + "/" + src);
                 return u.toString();
             }
         }
     } catch (Exception e) {
         e.printStackTrace();
     }

     return null;
 }

From source file:mx.clickfactura.util.TipoCambioUtil.java

public String getTipoCambio(String fecha) throws CustomBadRequestException, CustomNotFoundException, Exception {

    Pattern pattern = Pattern.compile("^\\d{4}\\-\\d{2}\\-\\d{2}$");
    Matcher matcher = null;/*from w w  w  .  j a  va  2 s . c  om*/

    matcher = pattern.matcher(fecha.trim());

    if (!matcher.matches()) {
        throw new CustomBadRequestException("Fecha invalida, el formato debe ser: yyyy-MM-dd");
    }

    SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");

    Calendar cal = new GregorianCalendar();

    cal.setTime(sdf.parse(fecha));

    String dia = (cal.get(Calendar.DATE) < 10) ? "0" + cal.get(Calendar.DATE) : cal.get(Calendar.DATE) + "";
    String mes = ((cal.get(Calendar.MONTH) + 1) < 10) ? "0" + (cal.get(Calendar.MONTH) + 1)
            : (cal.get(Calendar.MONTH) + 1) + "";
    String anio = cal.get(Calendar.YEAR) + "";

    String fechaInicial = dia + "%2F" + mes + "%2F" + anio;

    CloseableHttpClient client = HttpClients.createDefault();
    CookieStore cookies = new BasicCookieStore();
    String[] fechaSeparada = fecha.split("-");
    HttpGet get = new HttpGet("http://www.dof.gob.mx/indicadores_detalle.php?cod_tipo_indicador=158&dfecha="
            + fechaInicial + "&hfecha=" + fechaInicial);

    HttpContext httpContext = new BasicHttpContext();
    httpContext.setAttribute(HttpClientContext.COOKIE_STORE, cookies);
    CloseableHttpResponse response = client.execute(get, httpContext);

    //System.out.println(response.toString());      
    //System.out.println(response.getStatusLine());
    //System.out.println(response.getEntity().getContentLength());
    InputStream in = response.getEntity().getContent();
    Header encoding = response.getEntity().getContentEncoding();

    String body = IOUtils.toString(in, "UTF-8");
    //System.out.println(body);

    Document doc = Jsoup.parse(body, "UTF-8");

    doc = doc.normalise();

    //System.out.println(doc.toString());
    Elements e = doc.select("table");

    Iterator iterator = e.iterator();

    pattern = Pattern.compile("^\\d{2}\\.\\d{6}$");
    matcher = null;

    String tipoCambio = null;

    while (iterator.hasNext()) {
        Element xd = (Element) iterator.next();
        if (xd.getElementsByClass("txt").hasAttr("height")) {
            if (xd.getElementsByClass("txt").text().split(" ").length == 6) {

                String cambio = xd.getElementsByClass("txt").text().split(" ")[5];
                matcher = pattern.matcher(cambio.trim());

                if (matcher.matches()) {
                    tipoCambio = cambio;
                    //System.out.println(tipoCambio);
                    break;
                }

            }

        }

    }

    client.close();
    response.close();

    if (tipoCambio == null || tipoCambio.isEmpty()) {
        throw new CustomNotFoundException("No hay un tipo de cambio para el da: " + fecha);

    }

    return tipoCambio;

}