Example usage for org.jsoup.nodes Document select

List of usage examples for org.jsoup.nodes Document select

Introduction

In this page you can find the example usage for org.jsoup.nodes Document select.

Prototype

public Elements select(String cssQuery) 

Source Link

Document

Find elements that match the Selector CSS query, with this element as the starting context.

Usage

From source file:app.data.parse.WebPageUtil.java

public static WebPageInfo parse(String url, Cache<String, WebPageInfo> urlInfoCache) throws IOException {
    String original = url;//from   ww w .j  a  v a  2 s.co m

    // hit toutiao.io
    // fixme http://toutiao.io/shares/640539/url
    if (original.startsWith("https://toutiao.io/posts/")) {
        original = original.replace("/posts/", "/k/");
    }

    // check cache
    WebPageInfo info = urlInfoCache != null ? urlInfoCache.getIfPresent(original) : null;
    if (info != null) {
        return info;
    } else {
        info = new WebPageInfo();
        info.url = original;
    }

    // attach url
    Document doc = requestUrl(info.url);
    info.url = doc.baseUri(); // or doc.location()

    // hit gold.xitu.io
    if (info.url.startsWith("http://gold.xitu.io/entry/")) {
        Elements origin = doc.select("div[class=ellipsis]");
        Elements originLink = origin.select("a[class=share-link]");
        info.url = originLink.attr("href");

        // reconnect
        doc = requestUrl(info.url);
        info.url = doc.baseUri(); // or doc.location()
    }

    info.url = smartUri(info.url);

    // get title
    Elements metaTitle = doc.select("meta[property=og:title]");
    if (metaTitle != null) {
        info.title = metaTitle.attr("content");
    }
    if (StringUtils.isEmpty(info.title)) {
        metaTitle = doc.select("meta[property=twitter:title]");
        if (metaTitle != null) {
            info.title = metaTitle.attr("content");
        }
        info.title = StringUtils.isEmpty(info.title) ? doc.title() : info.title;
    }

    // get desc
    Elements metaDesc = doc.select("meta[property=og:description]");
    if (metaDesc != null) {
        info.description = metaDesc.attr("content");
    }
    if (StringUtils.isEmpty(info.description)) {
        metaDesc = doc.select("meta[property=twitter:description]");
        if (metaDesc != null) {
            info.description = metaDesc.attr("content");
        }
        if (StringUtils.isEmpty(info.description)) {
            metaDesc = doc.select("meta[name=description]");
            if (metaDesc != null) {
                info.description = metaDesc.attr("content");
            }
            if (StringUtils.isEmpty(info.description)) {
                metaDesc = doc.body().select("p");
                if (metaDesc != null) {
                    for (Element element : metaDesc) {
                        info.description = element.text();
                        if (info.description != null && info.description.length() >= 20) {
                            break;
                        }
                    }
                }
            }
        }
    }
    info.description = ellipsis(info.description, 140, "...");

    // cache info
    if (urlInfoCache != null) {
        urlInfoCache.put(original, info);
    }
    return info;
}

From source file:com.hp.test.framework.htmparse.UpdateTestCaseDesciption.java

public static void getTestCaseHtmlPath(String path) {
    Document htmlFile = null;
    try {// ww w .j  a  va2s.co  m
        htmlFile = Jsoup.parse(new File(path), "UTF-8");
    } catch (IOException e) {
        System.out.println("Exception in parse Current Run html file" + e.getMessage());
    }
    for (Element table : htmlFile.select("table[id=tableStyle]")) {
        Elements row1 = table.select("tr");
        for (int j = 0; j < row1.size(); j++) {
            Element tds1 = row1.get(j);
            Elements tds = tds1.select("td");
            for (int i = 0; i < tds.size(); i++) {
                Element link = tds.get(i);
                Elements href = link.select("a");

                if (i == 0) {
                    if (href.size() > 0) {
                        String[] temp_ar = href.get(0).text("href").toString().split("\"");
                        getTestDescription(temp_ar[1]);
                        break;

                    }
                }

            }
        }

    }

}

From source file:com.hp.test.framework.htmparse.UpdateTestCaseDesciption.java

public static void getTestDescription(String path) {
    Document htmlFile = null;
    try {/*w  w  w . j  a v a  2 s  .  co m*/
        htmlFile = Jsoup.parse(new File(basepath + path), "UTF-8");
    } catch (IOException e) {
        System.out.println("Exception in parse Current Run html file" + e.getMessage());
    }

    for (Element table : htmlFile.select("table[id=tableStyle]")) {
        Elements row1 = table.select("tr");
        for (int j = 0; j < row1.size(); j++) {
            Element tds1 = row1.get(j);
            Elements tds = tds1.select("td");

            for (int i = 0; i < tds.size(); i++) {
                Element link = tds.get(i);
                String link_temp = link.toString();

                if (i == 1) {
                    //   System.out.println("data" + link_temp);
                    if (!TestCaseDesMap.containsKey(path)) {
                        TestCaseDesMap.put(path, Jsoup.parse(link_temp).text());
                    }
                    break;
                }
            }

        }
    }

}

From source file:com.itcs.commons.email.EmailAutoconfigClient.java

private static void extractOutgoingServerSettings(Document doc, Map<String, String> settings) {
    for (Element element : doc.select("outgoingServer")) {
        //            System.out.println("element.attr(\"type\"):"+element.attr("type"));
        if (element.attr("type").equals("smtp")) {
            //                System.out.println("element.select(\"hostname\"):" + element.select("hostname").text());
            settings.put(EnumEmailSettingKeys.SMTP_SERVER.getKey(), element.select("hostname").text());
            //                System.out.println("element.select(\"port\"):" + element.select("port").text());
            settings.put(EnumEmailSettingKeys.SMTP_PORT.getKey(), element.select("port").text());
            //                System.out.println("element.select(\"socketType\"):" + element.select("socketType").text());
            settings.put(EnumEmailSettingKeys.SMTP_SSL_ENABLED.getKey(),
                    element.select("socketType").text().equals("SSL") ? "true" : "false");
            settings.put(EnumEmailSettingKeys.TRANSPORT_TLS.getKey(),
                    element.select("socketType").text().equals("STARTTLS") ? "true" : "false");
        }/*from ww  w . java2 s .  c o  m*/
    }
}

From source file:com.hp.test.framework.htmparse.UpdateTestCaseDesciption.java

public static void replaceDetailsTable(String path) throws IOException {

    File source = new File(path);
    Document report = null;
    try {/*  w  w w  .  jav  a2s  .  co m*/
        report = Jsoup.parse(source, "UTF-8");
    } catch (IOException e) {
        System.out.println("Unable to open [" + source.getAbsolutePath() + "] for parsing!");
    }
    Elements dom = report.children();
    Elements tds = report.select("table[id=tableStyle] td"); // select the tds from your table
    String temp_key = "";
    for (Element td : tds) { // loop through them

        String[] temp_ar = td.toString().split("\"");
        String Key = temp_ar[1];
        String Status = "";

        if (td.toString().contains("pass.png")) {
            Status = "pass";
        }
        if (td.toString().contains("fail.png")) {
            Status = "fail";
        }
        if (td.toString().contains("skip.png")) {
            Status = "skip";
        }

        if (TestCaseDesMap.containsKey(temp_key) && Status.length() > 1) {
            TestcaseStatusMap.put(temp_key, Status);
            temp_key = "";
        }

        if (td.text().contains("Test Method")) { // found the one you want
            String TestcaseDes;
            if (!TestCaseDesMap.containsKey(Key)) {
                TestcaseDes = "  ---------       ";
                TestCaseDesMap.put(Key, TestcaseDes);
                temp_key = Key;

            } else {
                TestcaseDes = TestCaseDesMap.get(Key);
                temp_key = Key;
                // TestcaseStatusMap.put(Key, Status);
            }
            td.text(TestcaseDes);
            // Replace with your text
        }
    }

    Elements ths = report.select("table[id=tableStyle] th"); // select the tds from your table
    for (Element th : ths) { // loop through them

        if (th.text().contains("Method Type")) { // found the one you want
            th.text("TestCase Description");

        }
        if (th.text().contains("Test Case Name")) { // found the one you want
            th.text("Testng Method");

        }
    }

    if (!source.canWrite()) {
        System.out.println("Can't write this file!");//Just check if the file is writable or not
    }
    BufferedWriter bw = new BufferedWriter(new FileWriter(source));
    bw.write(dom.toString()); //toString will give all the elements as a big string
    bw.close(); //Close to apply the changes
    //  genarateFailureReport(new File("C:\\Users\\yanamalp\\Desktop\\Gen_jelly\\HTML_Design_Files\\CSS\\HtmlReport.html"), "c:\\");

}

From source file:net.intelliant.util.UtilCommon.java

public static String getModifiedHtmlWithAbsoluteImagePath(String html) {
    if (UtilValidate.isEmpty(html)) {
        return html;
    }// ww w  .  jav a2s.  c o  m
    org.jsoup.nodes.Document doc = Jsoup.parse(html);
    Elements images = doc.select("img[src~=(?i)\\.(jpg|jpeg|png|gif)]");

    if (images != null && images.size() > 0) {
        String srcAttributeValue = "";
        StringBuilder finalLocation = new StringBuilder();
        Set<String> imageSrc = new HashSet<String>();

        for (Element image : images) {
            srcAttributeValue = image.attr("src");

            if (!imageSrc.contains(srcAttributeValue)) {
                int separatorIndex = srcAttributeValue.lastIndexOf("/");
                if (separatorIndex == -1) {
                    separatorIndex = srcAttributeValue
                            .lastIndexOf("\\"); /** just in case some one plays with html source. */
                }
                String outputFileName = null;
                if (separatorIndex != -1) {
                    String originalFileName = srcAttributeValue.substring(separatorIndex + 1);
                    outputFileName = originalFileName;
                }
                finalLocation = new StringBuilder(imageUploadLocation);
                finalLocation = finalLocation.append(outputFileName);

                imageSrc.add(srcAttributeValue);
                html = StringUtil.replaceString(html, srcAttributeValue, finalLocation.toString());
            }
        }
    }
    return html;
}

From source file:com.itcs.commons.email.EmailAutoconfigClient.java

private static boolean existsIncommingType(String emailAddress, String type) {
    if (existsAutoconfigSettings(emailAddress)) {
        try {//  ww  w  . j  a v a 2 s  .  c o  m
            String domain = "gmail.com";
            if (!isGmailAddress(emailAddress)) {
                domain = extractDomain(emailAddress);
            }
            Document doc = settingsCache.get(domain);
            for (Element element : doc.select("incomingServer")) {
                if (element.attr("type").equals(type)) {
                    return true;
                }
            }

        } catch (Exception ex) {
            //                ex.printStackTrace();
        }
    }
    return false;
}

From source file:com.itcs.commons.email.EmailAutoconfigClient.java

private static void extractIncommingServerSettings(Document doc, Map<String, String> settings, String type) {
    for (Element element : doc.select("incomingServer")) {
        //            System.out.println("element.attr(\"type\"):"+element.attr("type"));
        if (element.attr("type").equals(type)) {
            //                System.out.println("element.select(\"hostname\"):" + element.select("hostname").text());
            settings.put(EnumEmailSettingKeys.INBOUND_SERVER.getKey(), element.select("hostname").text());
            //                System.out.println("element.select(\"port\"):" + element.select("port").text());
            settings.put(EnumEmailSettingKeys.INBOUND_PORT.getKey(), element.select("port").text());
            //                System.out.println("element.select(\"socketType\"):" + element.select("socketType").text());
            settings.put(EnumEmailSettingKeys.INBOUND_SSL_ENABLED.getKey(),
                    element.select("socketType").text().trim().equals("SSL") ? "true" : "false");
        }/*from  www .  j  a v a 2  s  .co m*/
    }
}

From source file:org.wso2.carbon.appmgt.sampledeployer.main.ApplicationPublisher.java

private static void accsesWebPages(String webContext, String trackingCode, int hitCount) {
    String loginHtmlPage = null;//  w w  w.j  a  va  2 s. c o  m
    String webAppurl = "http://" + ipAddress + ":8280" + webContext + "/1.0.0/";
    String responceHtml = null;
    try {
        loginHtmlPage = httpHandler.getHtml(webAppurl);
        Document html = Jsoup.parse(loginHtmlPage);
        Element something = html.select("input[name=sessionDataKey]").first();
        String sessionDataKey = something.val();
        responceHtml = httpHandler.doPostHttps(backEndUrl + "/commonauth",
                "username=admin&password=admin&sessionDataKey=" + sessionDataKey, "none",
                "application/x-www-form-urlencoded; charset=UTF-8");
        Document postHtml = Jsoup.parse(responceHtml);
        Element postHTMLResponse = postHtml.select("input[name=SAMLResponse]").first();
        String samlResponse = postHTMLResponse.val();
        String appmSamlSsoTokenId = httpHandler.doPostHttp(webAppurl,
                "SAMLResponse=" + URLEncoder.encode(samlResponse, "UTF-8"), "appmSamlSsoTokenId",
                "application/x-www-form-urlencoded; charset=UTF-8");
        for (int i = 0; i < hitCount; i++) {
            if (webContext.equals("/notifi")) {
                if (i == hitCount / 5) {
                    webAppurl += "member/";
                } else if (i == hitCount / 2) {
                    webAppurl = appendPageToUrl("admin", webAppurl, false);
                }
            } else if (webContext.equals("/travelBooking")) {
                if (i == hitCount / 5) {
                    webAppurl = appendPageToUrl("booking-step1.jsp", webAppurl, true);
                } else if (i == hitCount / 2) {
                    webAppurl = appendPageToUrl("booking-step2.jsp", webAppurl, false);
                }
            }
            httpHandler.doGet("http://" + ipAddress + ":8280/statistics/", trackingCode, appmSamlSsoTokenId,
                    webAppurl);
            log.info("Web Page : " + webAppurl + " Hit count : " + i);
            try {
                Thread.sleep(1000);
            } catch (InterruptedException e) {
                e.printStackTrace();
            }
        }
    } catch (IOException e) {
        e.printStackTrace();
    }

}

From source file:dsll.pinterest.crawler.Reduce.java

private static Text updatePinContent(String url, DBCollection pinsCollection)
        throws JSONException, IOException {
    // add more related pins, include more boards
    String id = url.split("/pin/")[1];
    DBCursor c = pinsCollection.find(new BasicDBObject("ID", id));
    DBObject oldPin = c.next();//  w ww  .j ava 2 s  . co  m
    JSONArray oldBoards = new JSONArray(oldPin.get("board").toString());
    JSONArray oldRltPin = new JSONArray(oldPin.get("related_pins").toString());

    Document doc = Jsoup.connect(url).get();
    Element bottomDoc = doc.select("div[class=Module CloseupSidebar]").first();

    //pin board
    Element boardEle = bottomDoc.select("div[class=boardHeader]").first();
    JSONArray board = new JSONArray();
    JSONObject b = new JSONObject();
    String boardName = "";
    try {
        boardName = boardEle.select("h3[class=title]").text().trim();
    } catch (Exception ee) {
    }
    String boardSrc = "";
    try {
        boardSrc = "https://www.pinterest.com" + boardEle.select("a").attr("href").trim();
    } catch (Exception ee) {
    }
    b.append("name", boardName);
    b.append("src", boardSrc);
    board.put(b);

    //related pins
    bottomDoc = doc
            .select("div[class=closeupBottom] div[class=Module CloseupBottom] div[class=relatedPinsWrapper]")
            .first();

    JSONArray relatedPins = new JSONArray();
    Elements relatedPinsConts = bottomDoc.select("div[class=pinWrapper]");
    for (Element relatedPinsCont : relatedPinsConts) {
        JSONObject relatedPin = new JSONObject();
        relatedPin.append("src",
                "https://www.pinterest.com" + relatedPinsCont.select("div[class=pinHolder] > a").attr("href"));
        relatedPins.put(relatedPin);
    }

    // process new boards
    List<String> oldBoardNames = new ArrayList<String>();
    for (int i = 0; i < oldBoards.length(); i++) {
        oldBoardNames.add(oldBoards.getJSONObject(i).getString("name"));
    }
    for (int i = 0; i < board.length(); i++) {
        JSONObject tmp = board.getJSONObject(i);
        if (oldBoardNames.contains(tmp.getString("name"))) {
            continue;
        }
        oldBoards.put(board.get(i));
    }

    // process new related pins
    List<String> oldRelatedPins = new ArrayList<String>();
    for (int i = 0; i < oldRltPin.length(); i++) {
        oldRelatedPins.add(oldRltPin.getJSONObject(i).getString("src"));
    }
    for (int i = 0; i < relatedPins.length(); i++) {
        if (oldRelatedPins.contains(relatedPins.getJSONObject(i).get("src"))) {
            continue;
        }
        oldRltPin.put(relatedPins.getJSONObject(i));
    }

    BasicDBObject newAttr = new BasicDBObject();
    newAttr.append("board", oldBoards);
    newAttr.append("related_pins", oldRltPin);
    BasicDBObject update = new BasicDBObject().append("$set", newAttr);

    pinsCollection.update(new BasicDBObject("ID", id), update);

    return new Text("Pin " + id + " updated.");
}