Example usage for org.jsoup.nodes Document select

List of usage examples for org.jsoup.nodes Document select

Introduction

In this page you can find the example usage for org.jsoup.nodes Document select.

Prototype

public Elements select(String cssQuery) 

Source Link

Document

Find elements that match the Selector CSS query, with this element as the starting context.

Usage

From source file:it.polito.tellmefirst.apimanager.ImageManager.java

public int[] scrapeImageSizeFromPage(String pageURL) {
    LOG.debug("[scrapeImageSizeFromPage] - BEGIN");
    int[] result = { 0, 0 };
    try {//  w ww  .j  a  v  a  2  s .  co  m
        Document doc = Jsoup.connect(pageURL).timeout(10 * 1000).get();
        Element image = doc.select("img").first();
        result[0] = Integer.valueOf(image.attr("width"));
        result[1] = Integer.valueOf(image.attr("height"));
    } catch (Exception e) {
        LOG.error("[scrapeImageSizeFromPage] - EXCEPTION: ", e);
    }
    LOG.debug("[scrapeImageSizeFromPage] - END");
    return result;
}

From source file:org.brunocvcunha.taskerbox.impl.custom.slickdeals.SlickDealsEmailAction.java

@Override
public void spreadAction(final String url, String postTitle) {
    EmailAction email = getEmailAction();

    EmailValueVO emailVO = new EmailValueVO();
    StringBuffer sb = new StringBuffer();
    sb.append(url);//from ww w  .  j  a  va 2s.  c o m

    emailVO.setTitle("SlickDeals - " + postTitle);

    try {
        Document doc = TaskerboxHttpBox.getInstance().getDocumentForURL(url);

        for (Element post : doc.select(".post_message")) {
            sb.append("<br>");
            sb.append(post.html());
        }
    } catch (ClientProtocolException e) {
        e.printStackTrace();
    } catch (IllegalStateException e) {
        e.printStackTrace();
    } catch (IOException e) {
        e.printStackTrace();
    } catch (URISyntaxException e) {
        e.printStackTrace();
    }

    emailVO.setBody(sb.toString());

    email.action(emailVO);

}

From source file:com.webcrawler.MailCrawlerService.java

/**
 * Gets the link elements./* ww  w.java 2  s . c  o  m*/
 *
 * @param doc the doc
 * @param tagSelector the tag selector
 * @return the link elements
 */
private Elements getLinkElements(Document doc, String tagSelector) {
    return doc.select(tagSelector);
}

From source file:hu.petabyte.redflags.engine.gear.parser.DocFamilyFetcher.java

@Override
protected Notice processImpl(Notice notice) throws Exception {
    TedResponse r = ted.get().requestNoticeTabQuietly(notice.getId(), lang, Tab.DATA);
    if (null != r) {
        Document dataTab = r.getParsedDocument();
        if (!dataTab.select("a[href~=tabId=4").isEmpty()) {
            TedResponse r2 = ted.get().requestNoticeTabQuietly(notice.getId(), lang, Tab.DOCUMENT_FAMILY);
            if (null != r2) {
                Document docFamilyTab = r2.getParsedDocument();
                notice = parseDocFamilyTab(notice, docFamilyTab);
            }//from   w ww  . j av  a2  s  . c  o  m
        }
    }

    determineDocFamilyId(notice);

    return notice;
}

From source file:com.dsh105.nexus.command.module.information.TimeCommand.java

@Override
public boolean onCommand(CommandPerformEvent event) {
    if (event.getArgs().length > 0) {
        String args = StringUtil.combineSplit(0, event.getArgs(), " ");
        try {//from www .  j a v a 2  s  .  c o m
            HttpResponse<JsonNode> jsonResponse = Unirest.get(GOOGLE_COORDS_URL + args)
                    .header("accept", "application/json").asJson();
            JSONArray response = jsonResponse.getBody().getObject().getJSONArray("results");
            if (!jsonResponse.getBody().getObject().getString("status").equalsIgnoreCase("OK")) {
                event.errorWithPing("Invalid request");
            } else {
                double lat = response.getJSONObject(0).getJSONObject("geometry").getJSONObject("location")
                        .getDouble("lat");
                double lng = response.getJSONObject(0).getJSONObject("geometry").getJSONObject("location")
                        .getDouble("lng");
                String loc = response.getJSONObject(0).getString("formatted_address");
                Document doc = Jsoup.connect(TIME_URL + lat + "/" + lng).get();
                Element timeEl = doc.select("localtime").first();
                String time = timeEl.text();
                event.respond("Time in " + Colors.BOLD + loc + ": " + time);
                return true;
            }
        } catch (Exception e) {
            throw new TimeDataLookupException("An error occurred in the lookup process", e);
        }
    } else {
        return false;
    }
    return true;
}

From source file:com.johan.vertretungsplan.parser.UntisMonitorParser.java

private void loadUrl(String url, String encoding, boolean following, List<Document> docs, String startUrl)
        throws IOException {
    String html = httpGet(url, encoding).replace("&nbsp;", "");
    Document doc = Jsoup.parse(html);
    docs.add(doc);//from   www  . j a  va2s . c  om
    if (following && doc.select("meta[http-equiv=refresh]").size() > 0) {
        Element meta = doc.select("meta[http-equiv=refresh]").first();
        String attr = meta.attr("content").toLowerCase();
        String redirectUrl = url.substring(0, url.lastIndexOf("/") + 1)
                + attr.substring(attr.indexOf("url=") + 4);
        if (!redirectUrl.equals(startUrl))
            loadUrl(redirectUrl, encoding, true, docs, startUrl);
    }
}

From source file:ru.xxlabaza.popa.pack.PackingService.java

private void processJavaScript(Document document) {
    document.select("script[src$=.js]:not([src^=http])").forEach(script -> {
        Path path = build.resolve(createPath(script.attr("src")));
        log.info("Processing script '{}'", path);

        //            String content = commentRemoveService.removeComments(path);
        String content = FileSystemUtils.getContent(path);
        if (!path.getFileName().toString().endsWith(".min.js")) {
            content = compressService.compress(content, JAVASCRIPT);
        }/* w  w w.ja  va2 s.com*/

        script.removeAttr("src");
        script.html(content);
    });
}

From source file:org.sonatype.nexus.testsuite.misc.nxcm4389.NXCM4389FavIconIT.java

private void assertFavIcons(final String text) throws IOException {
    // assert that shortcut icon mentioned in the HTML is actually available
    Document doc = Jsoup.parse(text);

    // favicon is used with absolute url here
    assertExists(doc.select("link[rel=icon]").attr("href"));

    doc = extractIELink(doc);/*  ww w.  j  a  v  a 2  s.  c om*/

    // favicon is used with absolute url here
    assertExists(doc.select("link[rel=shortcut icon]").attr("href"));
}

From source file:hello.Scraper.java

@Splitter(inputChannel = "channel1", outputChannel = "channel2")
public List<Element> scrape(ResponseEntity<String> payload) {
    String html = payload.getBody();
    final Document htmlDoc = Jsoup.parse(html);
    final Elements anchorNodes = htmlDoc.select("body").select("ul").select("li");

    final List<Element> anchorList = new ArrayList<Element>();
    anchorNodes.traverse(new NodeVisitor() {
        @Override//from  w w w. j  ava2  s.c o m
        public void head(org.jsoup.nodes.Node node, int depth) {
            if (node instanceof org.jsoup.nodes.Element) {
                Element e = (Element) node;
                anchorList.add(e);
            }
        }

        @Override
        public void tail(Node node, int depth) {
        }
    });

    return anchorList;
}

From source file:it.polito.tellmefirst.apimanager.ImageManager.java

public String scrapeDBpediaImageFromPage(String pageURL) {
    LOG.debug("[scrapeDBpediaImageFromPage] - BEGIN url=" + pageURL);
    long startTime = System.currentTimeMillis();
    String result = "";
    try {//from w  w w. j  a va 2 s .com
        Document doc = Jsoup.connect(pageURL).timeout(10 * 1000).get();
        Element image = doc.select("img").first();
        result = "http:" + image.attr("src");
    } catch (Exception e) {
        LOG.error("[scrapeDBpediaImageFromPage] - EXCEPTION: ", e);
    }
    long endTime = System.currentTimeMillis();
    long duration = (endTime - startTime) / 1000;
    //no prod
    LOG.debug("########### [scrapeDBpediaImageFromPage] took " + duration + " seconds. ###########");
    LOG.debug("[scrapeDBpediaImageFromPage] - END");

    return result;
}