Example usage for org.jsoup.nodes Document getElementsByTag

List of usage examples for org.jsoup.nodes Document getElementsByTag

Introduction

In this page you can find the example usage for org.jsoup.nodes Document getElementsByTag.

Prototype

public Elements getElementsByTag(String tagName) 

Source Link

Document

Finds elements, including and recursively under this element, with the specified tag name.

Usage

From source file:com.webbfontaine.valuewebb.timer.RatesUpdater.java

public HashMap<String, BigDecimal> ratesFromBank() {
    HashMap<String, BigDecimal> rates = new HashMap<String, BigDecimal>();

    Document doc = getPage();
    Elements tables = doc.getElementsByTag("table");
    Element tableOfRates = null;/*from   ww  w. j a v a  2 s . c om*/
    Elements trs;
    int pairsCodeIndex = 0;
    int sellingIndex = 0;

    for (Element table : tables) {
        if (table.text().contains("Dollar")) {
            tableOfRates = table;
            break;
        }
    }

    if (tableOfRates != null) {
        trs = tableOfRates.getElementsByTag("tr");
    } else {
        LOGGER.error("Error reading rates from URL");
        return rates;
    }

    Elements columns = trs.get(0).getElementsByTag("th");

    for (int i = 0; i < columns.size(); ++i) {
        if (columns.get(i).text().equalsIgnoreCase("Pairs Code")) {
            pairsCodeIndex = i;
        }

        if (columns.get(i).text().equalsIgnoreCase("Selling")) {
            sellingIndex = i;
        }
    }

    for (Element tr : trs) {
        Elements tds = tr.getElementsByTag("td");

        if (tds.size() != 0) {
            String currPair = tds.get(pairsCodeIndex).text().trim();
            String rateText = tds.get(sellingIndex).text().trim().replace(",", "");
            BigDecimal rate = new BigDecimal(rateText);
            String curr;

            if (currPair.startsWith("GHS")) {
                curr = currPair.substring(3);
                rate = new BigDecimal(1).divide(rate, Constants.FRACTION_DIGITS_NUMBER_4,
                        Utils.getRoundingMode());
            } else {
                curr = currPair.substring(0, currPair.lastIndexOf("GHS"));
            }

            rates.put(curr, rate);
        }
    }
    return rates;
}

From source file:org.keycloak.testsuite.util.saml.RequiredConsentBuilder.java

/**
 * Prepares a GET/POST request for consent granting . The consent page is expected
 * to have at least input fields with id "kc-login" and "kc-cancel".
 *
 * @param consentPage//from ww w .ja v  a 2s.c o  m
 * @param consent
 * @return
 */
public HttpUriRequest handleConsentPage(String consentPage, URI currentURI) {
    org.jsoup.nodes.Document theLoginPage = Jsoup.parse(consentPage);

    List<NameValuePair> parameters = new LinkedList<>();
    for (Element form : theLoginPage.getElementsByTag("form")) {
        String method = form.attr("method");
        String action = form.attr("action");
        boolean isPost = method != null && "post".equalsIgnoreCase(method);

        for (Element input : form.getElementsByTag("input")) {
            if (Objects.equals(input.id(), "kc-login")) {
                if (approveConsent)
                    parameters.add(new BasicNameValuePair(input.attr("name"), input.attr("value")));
            } else if (Objects.equals(input.id(), "kc-cancel")) {
                if (!approveConsent)
                    parameters.add(new BasicNameValuePair(input.attr("name"), input.attr("value")));
            } else {
                parameters.add(new BasicNameValuePair(input.attr("name"), input.val()));
            }
        }

        if (isPost) {
            HttpPost res = new HttpPost(currentURI.resolve(action));

            UrlEncodedFormEntity formEntity;
            try {
                formEntity = new UrlEncodedFormEntity(parameters, "UTF-8");
            } catch (UnsupportedEncodingException e) {
                throw new RuntimeException(e);
            }
            res.setEntity(formEntity);

            return res;
        } else {
            UriBuilder b = UriBuilder.fromPath(action);
            for (NameValuePair parameter : parameters) {
                b.queryParam(parameter.getName(), parameter.getValue());
            }
            return new HttpGet(b.build());
        }
    }

    throw new IllegalArgumentException("Invalid consent page: " + consentPage);
}

From source file:org.sonatype.nexus.proxy.maven.routing.internal.AbstractHttpRemoteStrategy.java

/**
 * Returns {@code true} if remote server (proxies by {@link MavenProxyRepository}) is recognized as server that MUST
 * NOT be trusted for any automatic routing feature.
 * /*from  ww w  .j a v  a 2s.com*/
 * @throws StrategyFailedException if server is recognized as blacklisted.
 */
protected void checkIsBlacklistedRemoteServer(final MavenProxyRepository mavenProxyRepository)
        throws StrategyFailedException, IOException {
    // check URL first, we currently test HTTP and HTTPS only for blacklist, if not, just skip this
    // but do not report blacklist at all (nor attempt)
    final String remoteUrl;
    try {
        remoteUrl = getRemoteUrlOf(mavenProxyRepository);
    } catch (MalformedURLException e) {
        // non HTTP/HTTPS, just return
        return;
    }
    final HttpClient httpClient = createHttpClientFor(mavenProxyRepository);
    {
        // NEXUS-5849: Artifactory will happily serve Central prefixes, effectively shading all the other artifacts from
        // it's group
        final HttpGet get = new HttpGet(remoteUrl);
        final BasicHttpContext httpContext = new BasicHttpContext();
        httpContext.setAttribute(HttpClientFactory.HTTP_CTX_KEY_REPOSITORY, mavenProxyRepository);
        final HttpResponse response = httpClient.execute(get, httpContext);

        try {
            if (response.containsHeader("X-Artifactory-Id")) {
                log.debug("Remote server of proxy {} recognized as ARTF by response header",
                        mavenProxyRepository);
                throw new StrategyFailedException("Server proxied by " + mavenProxyRepository
                        + " proxy repository is not supported by automatic routing discovery");
            }
            if (response.getStatusLine().getStatusCode() >= 200
                    && response.getStatusLine().getStatusCode() <= 499) {
                if (response.getEntity() != null) {
                    final Document document = Jsoup.parse(response.getEntity().getContent(), null, remoteUrl);
                    final Elements addressElements = document.getElementsByTag("address");
                    if (!addressElements.isEmpty()) {
                        final String addressText = addressElements.get(0).text();
                        if (addressText != null
                                && addressText.toLowerCase(Locale.ENGLISH).startsWith("artifactory")) {
                            log.debug("Remote server of proxy {} recognized as ARTF by address element in body",
                                    mavenProxyRepository);
                            throw new StrategyFailedException("Server proxied by " + mavenProxyRepository
                                    + " proxy repository is not supported by automatic routing discovery");
                        }
                    }
                }
            }
        } finally {
            EntityUtils.consumeQuietly(response.getEntity());
        }
    }
}

From source file:com.liato.bankdroid.banking.banks.Volvofinans.java

@Override
public void update() throws BankException, LoginException, BankChoiceException {
    super.update();
    if (username == null || password == null || username.length() == 0 || password.length() == 0) {
        throw new LoginException(res.getText(R.string.invalid_username_password).toString());
    }//from w ww.  ja  v a2 s .  c  o m
    urlopen = login();
    String response = null;
    try {
        response = urlopen
                .open("https://inloggad.volvofinans.se/privat/kund/kortkonto/oversikt/kortkonton.html");
        try {
            JSONObject object = (JSONObject) new JSONTokener(response).nextValue();
            JSONArray data = object.getJSONArray("data");

            int length = data.length();
            for (int index = 0; index < length; index++) {
                JSONObject account = data.getJSONObject(index);
                Document d = Jsoup.parse(account.getString("namnUrl"));
                Element e = d.getElementsByTag("a").first();
                if (e != null && e.attr("href") != null) {
                    mAccountUrlMappings.put(account.getString("kontonummer"),
                            e.attr("href").replace("/info.html", "/info/kontoutdrag.html"));
                }
                accounts.add(new Account(
                        String.format("%s (%s)", account.getString("namn"), account.getString("kontonummer")),
                        Helpers.parseBalance(account.getString("disponibeltBelopp"))
                                .subtract(Helpers.parseBalance(account.getString("limit"))),
                        account.getString("kontonummer")));
            }
        } catch (JSONException e) {
            throw new BankException(e.getMessage());
        }
        if (accounts.isEmpty()) {
            throw new BankException(res.getText(R.string.no_accounts_found).toString());
        }
    } catch (ClientProtocolException e) {
        throw new BankException(e.getMessage());
    } catch (IOException e) {
        throw new BankException(e.getMessage());
    } finally {
        super.updateComplete();
    }
}

From source file:eu.masconsult.bgbanking.banks.procreditbank.ProcreditClient.java

@Override
public List<RawBankAccount> getBankAccounts(String authtoken)
        throws IOException, ParseException, AuthenticationException {

    DefaultHttpClient httpClient = getHttpClient(authtoken);

    // Create an array that will hold the server-side account
    final ArrayList<RawBankAccount> bankAccounts = new ArrayList<RawBankAccount>();

    // Get the accounts list
    Log.i(TAG, "Getting from: " + GET_BANK_ACCOUNTS_URI);
    final HttpGet get = new HttpGet(GET_BANK_ACCOUNTS_URI);
    get.setHeader("Accept", "*/*");

    Log.v(TAG, "sending " + get.toString());
    final HttpResponse resp = httpClient.execute(get);

    if (resp.getStatusLine().getStatusCode() == HttpStatus.SC_OK) {
        String response = EntityUtils.toString(resp.getEntity());
        Log.v(TAG, "response = " + response);
        // Our request to the server was successful, now we need to parse
        // the result
        Document doc = Jsoup.parse(response, BASE_URL);

        for (Element row : doc.getElementsByTag("table").get(0).getElementsByTag("tbody").get(0)
                .getElementsByTag("tr")) {
            RawBankAccount bankAccount = obtainBankAccountFromHtmlTableRow(row);
            if (bankAccount != null) {
                bankAccounts.add(bankAccount);
            }/*w  ww .j  av a2s.  co m*/
        }
    } else if (resp.getStatusLine().getStatusCode() == HttpStatus.SC_MOVED_TEMPORARILY) {
        // TODO: validate session has expired
        Log.e(TAG, "Authentication exception in getting bank accounts");
        throw new AuthenticationException("session has expired");
    } else {
        throw new ParseException("status after get accounts: " + resp.getStatusLine().getStatusCode() + " "
                + resp.getStatusLine().getReasonPhrase());
    }

    return bankAccounts;
}

From source file:mx.itdurango.rober.siitdocentes.asynctasks.GruposTask.java

/**
 * Procesa el html resultante de la peticin del listado de grupos descomponiendolo y asignandolo a un ArrayList
 *
 * @param html cuerpo html del resultado de la peticin
 *//*  www . j  a v  a 2  s.  c om*/
public void procesa(String html) {
    //se genera un documento donde se almacena el contenido html listo para ser procesado.
    Document doc = Jsoup.parse(html);
    //se obtiene la tabla donde se encuentra el contenido que interesa
    Element tabla = doc.getElementsByTag("table").get(0);
    //se obtienen todos los renglones de la tabla
    Elements renglones = tabla.getElementsByTag("tr");
    //arraylist que almacenar la informacin de los grupos
    ArrayList<Grupos> gcs = new ArrayList<Grupos>();
    //se recorre cada renglon almacenandolo en un objeto
    for (Element tr : renglones) {
        //se obtienen todos los elementos td de cada renglon.
        Elements tds = tr.getElementsByTag("td");
        //lleva el control de la columna que se est evaluando
        int l = 1;
        //objeto para lmacenar la informacion de cada grupo
        Grupos gc = new Grupos();
        //se recorren todos los elementos td del renglon actual
        for (Element td : tds) {
            //en el renglon 1 se encuentra la informacion del grupo con el siguiente formato
            //<b> CLAVE_MATERIA  </b> <br> NOMBRE DE LA MATERIA
            if (l == 1) {
                //se obtiene el contenido de la celda
                String datos = td.html();
                //eliminar las etiquetas de inicio de negritas
                datos = datos.replaceAll("<b>", "");
                //separar la cadena para tener en la posicin 0 la clave de la materia y en la posicion 1 el nombre de la misma.
                String m[] = datos.split("</b> <br />");
                gc.setClave(m[0]); //se asigna la clave de la materia al campo correspondiente
                gc.setNombre(m[1]);//se asigna el nombre de la materia al campo correspondiente
            } else if (l == 2) { //en la columna 2 se encuentra el grupo
                gc.setGrupo(td.html());
            } else if (l == 3) { //en la columna 3 se encuentra el numero de alumnos inscritos
                gc.setAlumnos(td.html());
            } else if (l == 4) { //en la columna 4 se encuentran los vinculos para asignar calificaciones parciales con el siguiente formato
                // <img src="http://siit.itdurango.edu.mx/img/iconos/captura_calif.gif"
                // onclick="window.location = &quot;calificaciones_parciales.php?periodo=20141&amp;materia=IT8851&amp;grupo=8TA&quot;"
                // alt="Captura de Informacin" style="cursor:pointer">

                //tomamos el contenido de la celda
                String params = td.html();
                //si separamos mediante la cadena "&quot;" podemos obtener solamente la url con parmetros que se tiene que mandar llamar
                String separado[] = params.split("&quot;");
                params = separado[1]; // solo los parametros
                params = params.replaceAll("&amp;", "&");
                //asignar la url a su campo correspondiente
                gc.setUrl(params);
            }
            //incrementar el numero de columna
            l++;
        }
        //si la clave es nula significa que no es una materia, probablemente sea el encabezado de la tabla
        if (gc.getClave() != null)
            gcs.add(gc);
    }
    //se genera un adapter nuevo con la informacin obtenida para ser asignado al listview de grupos.
    context.lvGrupos.setAdapter(new GruposAdapter(context, R.layout.item_grupos, gcs));
}

From source file:com.aestasit.markdown.slidery.converters.TextTemplateConverter.java

private void bindDomElements(HashMap<String, Object> binding, Document slidesDocument) {
    binding.put("document", slidesDocument);
    binding.put("body", slidesDocument.getElementsByTag("body").first());
    binding.put("slides", getSlideCollection(slidesDocument));
}

From source file:it.polito.tellmefirst.web.rest.enhance.Enhancer.java

public String getImageFromMediaWiki(String uri, String label) {
    LOG.debug("[getImageFromMediaWiki] - BEGIN");
    String result = "";
    String imageFileName = "";
    try {//w ww  .j  a va  2  s  .  com
        String lang = (uri.startsWith("http://dbpedia")) ? "en" : "it";

        String filePageURL = "https://" + lang + ".wikipedia.org/wiki/Special:Redirect/file/";
        String commonsFilePageURL = "https://commons.wikimedia.org/wiki/Special:Redirect/file/";

        String queryStart = "https://" + lang + ".wikipedia.org/w/api.php?action=query&prop=pageimages&titles=";
        String queryEnd = "&format=xml";
        String query = queryStart + label.replace(" ", "+") + queryEnd;

        LOG.debug("Call to Wikimedia Commons service for the resource " + uri + ": " + query);
        String xml = restManager.getStringFromAPI(query);
        Document doc = Jsoup.parse(xml);
        Elements elementsFound = doc.getElementsByTag("page");
        imageFileName = elementsFound.attr("pageimage");

        if (imageFileName == "") {
            LOG.debug("No images at all from Wikipedia page " + uri + ". We'll search on Wikidata.");

            String findQidStart = "https://wikidata.org/w/api.php?action=wbgetentities&format=xml&sites=" + lang
                    + "wiki&titles=";
            String findQidEnd = "&props=info&format=xml";
            String findQid = findQidStart + label.replace(" ", "+") + findQidEnd;

            LOG.debug("Call to Wikimedia Commons service for the resource " + uri + ": " + findQid);
            xml = restManager.getStringFromAPI(findQid);
            doc = Jsoup.parse(xml);
            elementsFound = doc.getElementsByTag("entity");
            String Qid = elementsFound.attr("title");

            //XXX weak API but is the state of art; waiting for a better one https://phabricator.wikimedia.org/T95026
            findQidStart = "https://www.wikidata.org/w/api.php?action=query&prop=images&titles=";
            findQidEnd = "&format=xml";
            findQid = findQidStart + Qid + findQidEnd;

            LOG.debug("Call to Wikimedia Commons service for the resource " + uri + ": " + findQid);
            xml = restManager.getStringFromAPI(findQid);
            doc = Jsoup.parse(xml);
            elementsFound = doc.getElementsByTag("im");
            imageFileName = elementsFound.attr("title").replace("File:", "");

            if (imageFileName == "") {
                LOG.debug("[getImageFromMediaWiki] - END");
                return DEFAULT_IMAGE;
            } else {
                LOG.debug("[getImageFromMediaWiki] - END");
                return commonsFilePageURL + imageFileName;
            }
        } else {
            LOG.debug("[getImageFromMediaWiki] - END");
            return filePageURL + imageFileName;
        }
    } catch (Exception e) {
        LOG.error("[getImageFromMediaWiki] - EXCEPTION: ", e);
    }
    return DEFAULT_IMAGE;
}

From source file:org.keycloak.testsuite.util.saml.UpdateProfileBuilder.java

public HttpUriRequest handleUpdateProfile(String loginPage, URI currentURI) {
    org.jsoup.nodes.Document theUpdateProfilePage = Jsoup.parse(loginPage);
    Set<String> unusedParams = new HashSet<>(this.parameters.keySet());

    List<NameValuePair> parameters = new LinkedList<>();
    for (Element form : theUpdateProfilePage.getElementsByTag("form")) {
        String method = form.attr("method");
        String action = form.attr("action");
        boolean isPost = method != null && "post".equalsIgnoreCase(method);

        for (Element input : form.getElementsByTag("input")) {
            if (this.parameters.containsKey(input.attr("name"))) {
                parameters.add(/*from ww w  .  j a va  2 s . c  om*/
                        new BasicNameValuePair(input.attr("name"), this.parameters.get(input.attr("name"))));
                unusedParams.remove(input.attr("name"));
            }
        }

        if (!unusedParams.isEmpty()) {
            LOG.warnf("Unused parameter names at Update Profile page: %s", unusedParams);
        }

        if (isPost) {
            HttpPost res = new HttpPost(action);

            UrlEncodedFormEntity formEntity;
            try {
                formEntity = new UrlEncodedFormEntity(parameters, "UTF-8");
            } catch (UnsupportedEncodingException e) {
                throw new RuntimeException(e);
            }
            res.setEntity(formEntity);

            return res;
        } else {
            UriBuilder b = UriBuilder.fromPath(action);
            for (NameValuePair parameter : parameters) {
                b.queryParam(parameter.getName(), parameter.getValue());
            }
            return new HttpGet(b.build());
        }
    }

    throw new IllegalArgumentException("Invalid update profile form: " + loginPage);
}

From source file:jobhunter.dice.Client.java

public Job execute() throws IOException, URISyntaxException {
    l.debug("Connecting to {}", url);

    update("Connecting", 1L);
    final Document doc = Jsoup.connect(url).get();

    update("Parsing HTML", 2L);
    final Job job = Job.of();
    job.setPortal(DicePlugin.portal);//from w w  w.  j  ava  2s.  c o  m
    job.setLink(url);

    StringBuilder description = new StringBuilder();

    for (Element meta : doc.getElementsByTag("meta")) {
        l.debug("Checking {}", meta.toString());
        if (meta.attr("name").equals("twitter:text:job_title"))
            job.setPosition(meta.attr("content"));

        if (meta.attr("name").equals("twitter:text:company"))
            job.getCompany().setName(meta.attr("content"));

        if (meta.attr("name").equals("twitter:text:city"))
            job.setAddress(meta.attr("content"));

        if (meta.attr("name").equals("twitter:text:salary"))
            job.setSalary(meta.attr("content"));

        if (meta.attr("name").equals("twitter:text:job_description_web")) {
            description.append(StringEscapeUtils.unescapeHtml4(meta.attr("content")));
        }

        if (meta.attr("name").equals("twitter:text:skills")) {
            description.append(StringEscapeUtils.unescapeHtml4(meta.attr("content")));
        }
    }

    job.setDescription(description.toString());

    update("Done", 3L);
    return job;
}