Example usage for org.jsoup.nodes Document getElementById

List of usage examples for org.jsoup.nodes Document getElementById

Introduction

In this page you can find the example usage for org.jsoup.nodes Document getElementById.

Prototype

public Element getElementById(String id) 

Source Link

Document

Find an element by ID, including or under this element.

Usage

From source file:com.rest.samples.getTipoCambioBanxico.java

public static void main(String[] args) {
    String url = "http://www.banxico.org.mx/tipcamb/llenarTiposCambioAction.do?idioma=sp";
    try {//w ww  . jav  a  2 s .c  om
        HttpClient hc = HttpClientBuilder.create().build();
        HttpGet request = new HttpGet(url);
        request.setHeader("User-Agent", "Mozilla/5.0");
        request.setHeader("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8");

        HttpResponse res = hc.execute(request);
        if (res.getStatusLine().getStatusCode() != 200) {
            throw new RuntimeException("Failed : HTTP eror code: " + res.getStatusLine().getStatusCode());
        }

        BufferedReader rd = new BufferedReader(new InputStreamReader(res.getEntity().getContent()));
        StringBuffer result = new StringBuffer();
        String line = "";
        while ((line = rd.readLine()) != null) {
            result.append(line);
        }
        Document doc = Jsoup.parse(result.toString());
        Element tipoCambioFix = doc.getElementById("FIX_DATO");
        System.out.println(tipoCambioFix.text());

    } catch (IOException ex) {
        Logger.getLogger(SamplesUseHttpclient.class.getName()).log(Level.SEVERE, null, ex);
    }
}

From source file:edu.uci.ics.crawler4j.examples.login.LoginCrawlController.java

public static void main(String[] args) throws Exception {
    //        if (args.length != 2) {
    //            System.out.println("Needed parameters: ");
    //            System.out.println("\t rootFolder (it will contain intermediate crawl data)");
    //            System.out.println("\t numberOfCralwers (number of concurrent threads)");
    //            return;
    //        }//w  ww  .  j  a va  2 s.c o  m

    /*
       * crawlStorageFolder is a folder where intermediate crawl data is
     * stored.
     */
    String crawlStorageFolder = "/tmp/test_crawler/";

    /*
     * numberOfCrawlers shows the number of concurrent threads that should
     * be initiated for crawling.
     */
    int numberOfCrawlers = 1;

    CrawlConfig config = new CrawlConfig();

    config.setCrawlStorageFolder(crawlStorageFolder);

    /*
     * Be polite: Make sure that we don't send more than 1 request per
     * second (1000 milliseconds between requests).
     */
    config.setPolitenessDelay(1000);

    /*
     * You can set the maximum crawl depth here. The default value is -1 for
     * unlimited depth
     */
    config.setMaxDepthOfCrawling(0);

    /*
     * You can set the maximum number of pages to crawl. The default value
     * is -1 for unlimited number of pages
     */
    config.setMaxPagesToFetch(1000);

    /*
     * Do you need to set a proxy? If so, you can use:
     * config.setProxyHost("proxyserver.example.com");
     * config.setProxyPort(8080);
     *
     * If your proxy also needs authentication:
     * config.setProxyUsername(username); config.getProxyPassword(password);
     */

    /*
     * This config parameter can be used to set your crawl to be resumable
     * (meaning that you can resume the crawl from a previously
     * interrupted/crashed crawl). Note: if you enable resuming feature and
     * want to start a fresh crawl, you need to delete the contents of
     * rootFolder manually.
     */
    config.setResumableCrawling(false);

    config.setIncludeHttpsPages(true);
    HttpClient client = new DefaultHttpClient();
    HttpResponse response = client.execute(new HttpGet("http://58921.com/user/login"));

    HttpEntity entity = response.getEntity();
    String content = EntityUtils.toString(entity, HTTP.UTF_8);
    Document doc = Jsoup.parse(content);
    Elements elements = doc.getElementById("user_login_form").children();
    Element tokenEle = elements.last();
    String token = tokenEle.val();
    System.out.println(token);
    LoginConfiguration somesite;
    try {
        somesite = new LoginConfiguration("58921.com", new URL("http://58921.com/user/login"),
                new URL("http://58921.com/user/login/ajax?ajax=submit&__q=user/login"));
        somesite.addParam("form_id", "user_login_form");
        somesite.addParam("mail", "paxbeijing@gmail.com");
        somesite.addParam("pass", "cetas123");
        somesite.addParam("submit", "");
        somesite.addParam("form_token", token);
        config.addLoginConfiguration(somesite);
    } catch (MalformedURLException e) {
        e.printStackTrace();
    }

    /*
     * Instantiate the controller for this crawl.
     */
    PageFetcher pageFetcher = new PageFetcher(config);
    RobotstxtConfig robotstxtConfig = new RobotstxtConfig();
    robotstxtConfig.setEnabled(false);
    RobotstxtServer robotstxtServer = new RobotstxtServer(robotstxtConfig, pageFetcher);
    CrawlController controller = new CrawlController(config, pageFetcher, robotstxtServer);

    /*
     * For each crawl, you need to add some seed urls. These are the first
     * URLs that are fetched and then the crawler starts following links
     * which are found in these pages
     */

    controller.addSeed("http://58921.com/alltime?page=60");

    /*
     * Start the crawl. This is a blocking operation, meaning that your code
     * will reach the line after this only when crawling is finished.
     */
    controller.start(LoginCrawler.class, numberOfCrawlers);

    controller.env.close();
}

From source file:Main.java

public static Element getElement(Document doc, String id) {
    return doc.getElementById(id);
}

From source file:io.github.carlomicieli.footballdb.starter.pages.TableBuilderTests.java

private static Element notTableElement() {
    String html = "<div id=\"id\">ooo</div>";
    Document doc = JsoupHelper.fromHtml(html);
    return doc.getElementById("id");
}

From source file:io.github.carlomicieli.footballdb.starter.pages.TableBuilderTests.java

private static Element tableElement() {
    String html = "<table id=\"my-table\">" + "<tr><td>1</td><td>one</td><td>3</td></tr>"
            + "<tr><td colspan=\"3\"></td></tr>" + "<tr><td>2</td><td>two</td><td>1</td></tr>" + "</table>";
    Document doc = JsoupHelper.fromHtml(html);
    return doc.getElementById("my-table");
}

From source file:com.feilong.tools.jsoup.JsoupUtil.java

/**
 * getElementById.//from ww  w. j av  a 2  s. c o m
 * 
 * @param url
 *            the url
 * @param id
 *            the id
 * @return getElementById
 * @throws JsoupUtilException
 *             the jsoup util exception
 * @see #getDocument(String)
 * @see org.jsoup.nodes.Element#getElementById(String)
 */
public static Element getElementById(String url, String id) throws JsoupUtilException {
    Validate.notEmpty(url);
    Validate.notEmpty(id);
    Document document = getDocument(url);
    Element element = document.getElementById(id);
    return element;
}

From source file:com.shareplaylearn.OauthPasswordFlow.java

public static LoginInfo googleLogin(String username, String password, String clientId, String callbackUri)
        throws URISyntaxException, IOException, AuthorizationException, UnauthorizedException {

    CloseableHttpClient httpClient = HttpClients.custom().build();
    String oAuthQuery = "client_id=" + clientId + "&";
    oAuthQuery += "response_type=code&";
    oAuthQuery += "scope=openid email&";
    oAuthQuery += "redirect_uri=" + callbackUri;
    URI oAuthUrl = new URI("https", null, "accounts.google.com", 443, "/o/oauth2/auth", oAuthQuery, null);
    Connection oauthGetCoonnection = Jsoup.connect(oAuthUrl.toString());
    Connection.Response oauthResponse = oauthGetCoonnection.method(Connection.Method.GET).execute();
    if (oauthResponse.statusCode() != 200) {
        String errorMessage = "Error contacting Google's oauth endpoint: " + oauthResponse.statusCode() + " / "
                + oauthResponse.statusMessage();
        if (oauthResponse.body() != null) {
            errorMessage += oauthResponse.body();
        }//from www.j  ava2s  . c o m
        throw new AuthorizationException(errorMessage);
    }
    Map<String, String> oauthCookies = oauthResponse.cookies();
    Document oauthPage = oauthResponse.parse();
    Element oauthForm = oauthPage.getElementById("gaia_loginform");
    System.out.println(oauthForm.toString());
    Connection oauthPostConnection = Jsoup.connect("https://accounts.google.com/ServiceLoginAuth");
    HashMap<String, String> formParams = new HashMap<>();

    for (Element child : oauthForm.children()) {
        System.out.println("Tag name: " + child.tagName());
        System.out.println("attrs: " + Arrays.toString(child.attributes().asList().toArray()));
        if (child.tagName().equals("input") && child.hasAttr("name")) {

            String keyName = child.attr("name");
            String keyValue = null;

            if (child.hasAttr("value")) {
                keyValue = child.attr("value");
            }

            if (keyName != null && keyName.trim().length() != 0 && keyValue != null
                    && keyValue.trim().length() != 0) {
                oauthPostConnection.data(keyName, keyValue);
                formParams.put(keyName, keyValue);
            }
        }
    }
    oauthPostConnection.cookies(oauthCookies);
    formParams.put("Email", username);
    formParams.put("Passwd-hidden", password);
    //oauthPostConnection.followRedirects(false);
    System.out.println("form post params were: ");
    for (Map.Entry<String, String> kvp : formParams.entrySet()) {
        //DO NOT let passwords end up in the logs ;)
        if (kvp.getKey().equals("Passwd")) {
            continue;
        }
        System.out.println(kvp.getKey() + "," + kvp.getValue());
    }
    System.out.println("form cookies were: ");
    for (Map.Entry<String, String> cookie : oauthCookies.entrySet()) {
        System.out.println(cookie.getKey() + "," + cookie.getValue());
    }
    //System.exit(0);
    Connection.Response postResponse = null;
    try {
        postResponse = oauthPostConnection.method(Connection.Method.POST).timeout(5000).execute();
    } catch (Throwable t) {
        System.out.println("Failed to post login information to googles endpoint :/ " + t.getMessage());
        System.out.println("This usually means the connection is bad, shareplaylearn.com is down, or "
                + " google is being a punk - login manually and check.");
        assertTrue(false);
    }
    if (postResponse.statusCode() != 200) {
        String errorMessage = "Failed to validate credentials: " + oauthResponse.statusCode() + " / "
                + oauthResponse.statusMessage();
        if (oauthResponse.body() != null) {
            errorMessage += oauthResponse.body();
        }
        throw new UnauthorizedException(errorMessage);
    }
    System.out.println("Response headers (after post to google form & following redirect):");
    for (Map.Entry<String, String> header : postResponse.headers().entrySet()) {
        System.out.println(header.getKey() + "," + header.getValue());
    }
    System.out.println("Final response url was: " + postResponse.url().toString());
    String[] args = postResponse.url().toString().split("&");
    LoginInfo loginInfo = new LoginInfo();
    for (String arg : args) {
        if (arg.startsWith("access_token")) {
            loginInfo.accessToken = arg.split("=")[1].trim();
        } else if (arg.startsWith("id_token")) {
            loginInfo.idToken = arg.split("=")[1].trim();
        } else if (arg.startsWith("expires_in")) {
            loginInfo.expiry = arg.split("=")[1].trim();
        }
    }

    //Google doesn't actually throw a 401 or anything - it just doesn't redirect
    //and sends you back to it's login page to try again.
    //So this is what happens with an invalid password.
    if (loginInfo.accessToken == null || loginInfo.idToken == null) {
        //Document oauthPostResponse = postResponse.parse();
        //System.out.println("*** Oauth response from google *** ");
        //System.out.println(oauthPostResponse.toString());
        throw new UnauthorizedException(
                "Error retrieving authorization: did you use the correct username/password?");
    }
    String[] idTokenFields = loginInfo.idToken.split("\\.");
    if (idTokenFields.length < 3) {
        throw new AuthorizationException("Error parsing id token " + loginInfo.idToken + "\n" + "it only had "
                + idTokenFields.length + " field!");
    }
    String jwtBody = new String(Base64.decodeBase64(idTokenFields[1]), StandardCharsets.UTF_8);
    loginInfo.idTokenBody = new Gson().fromJson(jwtBody, OauthJwt.class);
    loginInfo.id = loginInfo.idTokenBody.sub;
    return loginInfo;
}

From source file:com.shareplaylearn.utilities.OauthPasswordFlow.java

public static LoginInfo googleLogin(String username, String password, String clientId, String callbackUri)
        throws URISyntaxException, IOException, AuthorizationException, UnauthorizedException {

    CloseableHttpClient httpClient = HttpClients.custom().build();
    String oAuthQuery = "client_id=" + clientId + "&";
    oAuthQuery += "response_type=code&";
    oAuthQuery += "scope=openid email&";
    oAuthQuery += "redirect_uri=" + callbackUri;
    URI oAuthUrl = new URI("https", null, "accounts.google.com", 443, "/o/oauth2/auth", oAuthQuery, null);
    Connection oauthGetCoonnection = Jsoup.connect(oAuthUrl.toString());
    Connection.Response oauthResponse = oauthGetCoonnection.method(Connection.Method.GET).execute();
    if (oauthResponse.statusCode() != 200) {
        String errorMessage = "Error contacting Google's oauth endpoint: " + oauthResponse.statusCode() + " / "
                + oauthResponse.statusMessage();
        if (oauthResponse.body() != null) {
            errorMessage += oauthResponse.body();
        }/*from w ww. ja v a2 s.co m*/
        throw new AuthorizationException(errorMessage);
    }
    Map<String, String> oauthCookies = oauthResponse.cookies();
    Document oauthPage = oauthResponse.parse();
    Element oauthForm = oauthPage.getElementById("gaia_loginform");
    System.out.println(oauthForm.toString());
    Connection oauthPostConnection = Jsoup.connect("https://accounts.google.com/ServiceLoginAuth");
    HashMap<String, String> formParams = new HashMap<>();
    for (Element child : oauthForm.children()) {
        if (child.tagName().equals("input") && child.hasAttr("name")) {

            String keyName = child.attr("name");
            String keyValue = null;

            if (keyName.equals("Email")) {
                keyValue = username;
            } else if (keyName.equals("Passwd")) {
                keyValue = password;
            } else if (child.hasAttr("value")) {
                keyValue = child.attr("value");
            }

            if (keyValue != null) {
                oauthPostConnection.data(keyName, keyValue);
                formParams.put(keyName, keyValue);
            }
        }
    }
    oauthPostConnection.cookies(oauthCookies);
    //oauthPostConnection.followRedirects(false);
    System.out.println("form post params were: ");
    for (Map.Entry<String, String> kvp : formParams.entrySet()) {
        //DO NOT let passwords end up in the logs ;)
        if (kvp.getKey().equals("Passwd")) {
            continue;
        }
        System.out.println(kvp.getKey() + "," + kvp.getValue());
    }
    System.out.println("form cookies were: ");
    for (Map.Entry<String, String> cookie : oauthCookies.entrySet()) {
        System.out.println(cookie.getKey() + "," + cookie.getValue());
    }
    Connection.Response postResponse = null;
    try {
        postResponse = oauthPostConnection.method(Connection.Method.POST).timeout(5000).execute();
    } catch (Throwable t) {
        System.out.println("Failed to post login information to googles endpoint :/ " + t.getMessage());
        System.out.println("This usually means the connection is bad, shareplaylearn.com is down, or "
                + " google is being a punk - login manually and check.");
        assertTrue(false);
    }
    if (postResponse.statusCode() != 200) {
        String errorMessage = "Failed to validate credentials: " + oauthResponse.statusCode() + " / "
                + oauthResponse.statusMessage();
        if (oauthResponse.body() != null) {
            errorMessage += oauthResponse.body();
        }
        throw new UnauthorizedException(errorMessage);
    }
    System.out.println("Response headers (after post to google form & following redirect):");
    for (Map.Entry<String, String> header : postResponse.headers().entrySet()) {
        System.out.println(header.getKey() + "," + header.getValue());
    }
    System.out.println("Final response url was: " + postResponse.url().toString());
    String[] args = postResponse.url().toString().split("&");
    LoginInfo loginInfo = new LoginInfo();
    for (String arg : args) {
        if (arg.startsWith("access_token")) {
            loginInfo.accessToken = arg.split("=")[1].trim();
        } else if (arg.startsWith("id_token")) {
            loginInfo.idToken = arg.split("=")[1].trim();
        } else if (arg.startsWith("expires_in")) {
            loginInfo.expiry = arg.split("=")[1].trim();
        }
    }

    //Google doesn't actually throw a 401 or anything - it just doesn't redirect
    //and sends you back to it's login page to try again.
    //So this is what happens with an invalid password.
    if (loginInfo.accessToken == null || loginInfo.idToken == null) {
        //Document oauthPostResponse = postResponse.parse();
        //System.out.println("*** Oauth response from google *** ");
        //System.out.println(oauthPostResponse.toString());
        throw new UnauthorizedException(
                "Error retrieving authorization: did you use the correct username/password?");
    }
    String[] idTokenFields = loginInfo.idToken.split("\\.");
    if (idTokenFields.length < 3) {
        throw new AuthorizationException("Error parsing id token " + loginInfo.idToken + "\n" + "it only had "
                + idTokenFields.length + " field!");
    }
    String jwtBody = new String(Base64.decodeBase64(idTokenFields[1]), StandardCharsets.UTF_8);
    loginInfo.idTokenBody = new Gson().fromJson(jwtBody, OauthJwt.class);
    loginInfo.id = loginInfo.idTokenBody.sub;
    return loginInfo;
}

From source file:com.adavr.player.media.XVClient.java

private Map<String, String> getFlashVars(String url) throws ClientException {
    client.get(url);//from  w w w .j  a  v a 2s.  c om
    Document document = client.getJsoupDocument();
    Element player = document.getElementById("player");
    Elements candidates = player.getElementsByAttribute("flashvars");
    if (candidates.isEmpty()) {
        return null;
    }
    return parseFlashVars(candidates.get(0).attr("flashvars"));
}

From source file:io.github.carlomicieli.footballdb.starter.parsers.SeasonGamesParser.java

private Optional<Element> gamesTable(Document doc) {
    return Optional.ofNullable(doc.getElementById("games"));
}