Example usage for org.jsoup.nodes Document select

List of usage examples for org.jsoup.nodes Document select

Introduction

In this page you can find the example usage for org.jsoup.nodes Document select.

Prototype

public Elements select(String cssQuery) 

Source Link

Document

Find elements that match the Selector CSS query, with this element as the starting context.

Usage

From source file:it.polito.tellmefirst.web.rest.apimanager.ImageManager.java

public int[] scrapeImageSizeFromPage(String pageURL) {
    LOG.debug("[scrapeImageSizeFromPage] - BEGIN");
    int[] result = { 0, 0 };
    try {/*from   w ww  .j a v  a2s .co  m*/
        Document doc = Jsoup.connect(pageURL).get();
        Element image = doc.select("div.fullImageLink").select("img").first();
        result[0] = Integer.valueOf(image.attr("width"));
        result[1] = Integer.valueOf(image.attr("height"));
    } catch (Exception e) {
        LOG.error("[scrapeImageSizeFromPage] - EXCEPTION: ", e);
    }
    LOG.debug("[scrapeImageSizeFromPage] - END");
    return result;
}

From source file:com.isoftstone.proxy.api.sdk.KuaidailiProxySDK.java

private List<ProxyVo> parseHtml(Document doc) {
    Elements eles = doc.select("#list table tr");
    List<ProxyVo> proxyList = new ArrayList<ProxyVo>();
    for (int i = 1; i < eles.size(); i++) {
        Element ele = eles.get(i);
        Element ipEle = ele.select("td:eq(0)").first();
        Element portEle = ele.select("td:eq(1)").first();
        ProxyVo proxyVo = new ProxyVo();
        proxyVo.setProxyIp(ipEle.text());
        proxyVo.setProxyPort(Integer.parseInt(portEle.text()));
        proxyList.add(proxyVo);/*from www .j ava2  s  .com*/
    }
    return proxyList;
}

From source file:org.manalith.ircbot.plugin.linuxpkgfinder.PhPortageProvider.java

@Override
public String find(String arg) {
    String result = "";
    String url = "http://darkcircle.kr/phportage/phportage.xml?k=" + arg + "&limit=1&similarity=exact"
            + "&showmasked=true&livebuild=false";

    try {/*from   ww w. j a v  a 2 s. c om*/
        Document d = Jsoup.connect(url).get();
        System.out.println(d.select("result>code").get(0).text());
        if (NumberUtils.toInt(d.select("result>code").get(0).text()) == 0) {
            if (NumberUtils.toInt(d.select("result>actualnumofres").get(0).text()) == 0)
                result = "[Gentoo]  ";
            else {
                Element e = d.select("result>packages>pkg").get(0);
                String pkgname = e.select("category").get(0).text() + "/" + e.select("name").get(0).text();

                String ver = e.select("version").get(0).text();
                String description = e.select("description").get(0).text();

                result = "[Gentoo] \u0002" + pkgname + "\u0002 - " + description + ", " + ver;
            }
        }

    } catch (Exception e) {
        logger.error(e.getMessage(), e);
        result = ": " + e.getMessage();
    }

    return result;
}

From source file:it.polito.tellmefirst.apimanager.VideoManager.java

public String extractVideoIdFromResult(String input) {

    LOG.debug("[extractVideoIdFromResult] - BEGIN");

    String result = null;//from  w w w . j av a 2 s  .c o m
    Document doc = Jsoup.parse(input);

    Elements ids = doc.select("id");

    if (ids != null && ids.size() > 1) {
        String idDirty = ids.get(1).text();
        System.out.println("ID dirty: " + idDirty);
        String[] idArray = idDirty.split("video:");
        result = idArray[idArray.length - 1];
    } else
        LOG.error("no video id available");

    LOG.debug("[extractVideoIdFromResult] - END");
    return result;
}

From source file:org.commonjava.indy.ftest.core.urls.StoreOneAndSourceStoreUrlInHtmlListingTest.java

@Test
public void storeOneFileAndVerifyItInParentDirectoryListing() throws Exception {
    final byte[] data = "this is a test".getBytes();
    final ByteArrayInputStream stream = new ByteArrayInputStream(data);
    final String root = "/path/to/";
    final String path = root + "foo.txt";

    client.content().store(hosted, STORE, path, stream);

    final IndyClientHttp http = getHttp();

    final HttpGet request = http.newRawGet(client.content().contentUrl(hosted, STORE, root));

    request.addHeader("Accept", "text/html");

    final CloseableHttpClient hc = http.newClient();
    final CloseableHttpResponse response = hc.execute(request);

    final InputStream listing = response.getEntity().getContent();
    final String html = IOUtils.toString(listing);

    // TODO: Charset!!
    final Document doc = Jsoup.parse(html);
    for (final Element item : doc.select("a.source-link")) {
        final String fname = item.text();
        System.out.printf("Listing contains: '%s'\n", fname);
        final String href = item.attr("href");
        final String expected = client.content().contentUrl(hosted, STORE);

        assertThat(fname + " does not have a href", href, notNullValue());
        assertThat(fname + " has incorrect link: '" + href + "' (" + href.getClass().getName()
                + ")\nshould be: '" + expected + "' (String)", href, equalTo(expected));
    }/*from  ww  w  .java2 s .co  m*/
}

From source file:org.commonjava.indy.ftest.core.urls.StoreOneAndVerifyInHtmlListingTest.java

@Test
public void storeOneFileAndVerifyItInParentDirectoryListing() throws Exception {
    final byte[] data = "this is a test".getBytes();
    final ByteArrayInputStream stream = new ByteArrayInputStream(data);
    final String root = "/path/to/";
    final String path = root + "foo.txt";

    client.content().store(hosted, STORE, path, stream);

    final IndyClientHttp http = getHttp();

    final HttpGet request = http.newRawGet(client.content().contentUrl(hosted, STORE, root));

    request.addHeader("Accept", "text/html");

    final CloseableHttpClient hc = http.newClient();
    final CloseableHttpResponse response = hc.execute(request);

    final InputStream listing = response.getEntity().getContent();
    final String html = IOUtils.toString(listing);

    // TODO: Charset!!
    final Document doc = Jsoup.parse(html);
    for (final Element item : doc.select("a.item-link")) {
        final String fname = item.text();
        System.out.printf("Listing contains: '%s'\n", fname);
        final String href = item.attr("href");
        final String expected = client.content().contentUrl(hosted, STORE, root, fname);

        assertThat(fname + " does not have a href", href, notNullValue());
        assertThat(fname + " has incorrect link: '" + href + "' (" + href.getClass().getName()
                + ")\nshould be: '" + expected + "' (String)", href, equalTo(expected));
    }//from w  w  w  .ja  v a 2  s  . com
}

From source file:org.mashupmedia.task.MetaTaskScheduler.java

public void getMashupMediaLatestReleaseInformation() {
    String url = "http://www.mashupmedia.org/latest-release/final";
    try {//from   www.j  av a  2s .c o  m
        ProxyTextFile proxyTextFile = (ProxyTextFile) proxyManager.loadProxyFile(url, ProxyType.TEXT_FILE);

        if (proxyTextFile == null) {
            logger.info(
                    "Unable to find latest release from page: http://www.mashupmedia.org/latest-release/final");
            return;
        }

        Document document = Jsoup.parse(proxyTextFile.getText());
        Elements elements = document.select("div.view-latest-final-release div.views-row");
        String releaseType = elements.select("div.views-field-field-release-type").text();
        String version = elements.select("div.views-field-field-version").text();
        logger.info("Found latest release information, type = " + releaseType + ", version = " + version);
        configurationManager.saveConfiguration(MashUpMediaConstants.LATEST_RELEASE_FINAL_VERSION, version);
    } catch (IOException e) {
        logger.error("Unable to get latest version information from www.mashupmedia.org", e);
        return;
    }

}

From source file:com.clonephpscrapper.crawler.ClonePhpScrapper.java

public void crawledCategories() throws URISyntaxException, IOException, InterruptedException, Exception {

    String url = "http://clonephp.com/";

    //       Document doc = Jsoup.parse(fetchPage(new URI(url)));
    String response = "";
    response = new GetRequestHandler().doGetRequest(new URL(url));

    Document doc = Jsoup.parse(response);

    Elements ele = doc.select("table[class=dir] tbody tr td table[class=dir_cat] tbody tr th a");//.first();

    for (Element ele1 : ele) {
        objCategories = new Categories();

        String categoryName = ele1.text();
        String categoryUrl = "http://clonephp.com/" + ele1.attr("href");

        System.out.println("CATEGORY_NAME : " + categoryName);
        System.out.println("CATEGORY_URL  : " + categoryUrl);

        objCategories.setCategoryName(categoryName);
        objCategories.setCategoryUrl(categoryUrl);

        objClonePhpDaoImpl.insertCategoriesData(objCategories);

        //            objCrawlingEachUrlData.crawlingUrlData(categoryUrl);
    }/*  w  w w  . j a v  a 2 s.  c o m*/

    List<Future<String>> list = new ArrayList<Future<String>>();
    ExecutorService executor = Executors.newFixedThreadPool(5);

    List<Categories> listCatogories = objClonePhpDaoImpl.getCategoriesDataList();

    for (Categories listCatogory : listCatogories) {

        try {
            Callable worker = new CrawlingEachUrlData(listCatogory, objClonePhpDaoImpl);
            Future<String> future = executor.submit(worker);
            list.add(future);
        } catch (Exception exx) {
            System.out.println(exx);
        }

    }

    for (Future<String> fut : list) {
        try {
            //print the return value of Future, notice the output delay in console
            // because Future.get() waits for task to get completed
            System.out.println(new Date() + "::" + fut.get());
        } catch (InterruptedException | ExecutionException ep) {
            ep.printStackTrace();
        }
    }
    //shut down the executor service now
    executor.shutdown();

}

From source file:org.commonjava.aprox.folo.ftest.urls.StoreOneAndSourceStoreUrlInHtmlListingTest.java

@Test
public void storeOneFileAndVerifyItInParentDirectoryListing() throws Exception {
    final byte[] data = "this is a test".getBytes();
    final ByteArrayInputStream stream = new ByteArrayInputStream(data);
    final String root = "/path/to/";
    final String path = root + "foo.txt";
    final String track = "track";

    content.store(track, hosted, STORE, path, stream);

    final AproxClientHttp http = getHttp();

    final HttpGet request = http.newRawGet(content.contentUrl(track, hosted, STORE, root));

    request.addHeader("Accept", "text/html");

    final CloseableHttpClient hc = http.newClient();
    final CloseableHttpResponse response = hc.execute(request);

    final InputStream listing = response.getEntity().getContent();
    final String html = IOUtils.toString(listing);

    // TODO: Charset!!
    final Document doc = Jsoup.parse(html);
    for (final Element item : doc.select("a.source-link")) {
        final String fname = item.text();
        System.out.printf("Listing contains: '%s'\n", fname);
        final String href = item.attr("href");
        final String expected = client.content().contentUrl(hosted, STORE);

        assertThat(fname + " does not have a href", href, notNullValue());
        assertThat(fname + " has incorrect link: '" + href + "' (" + href.getClass().getName()
                + ")\nshould be: '" + expected + "' (String)", href, equalTo(expected));
    }//from   ww  w  .  j a  v a  2  s  .c om
}

From source file:org.commonjava.aprox.folo.ftest.urls.StoreOneAndVerifyInHtmlListingTest.java

@Test
public void storeOneFileAndVerifyItInParentDirectoryListing() throws Exception {
    final byte[] data = "this is a test".getBytes();
    final ByteArrayInputStream stream = new ByteArrayInputStream(data);
    final String root = "/path/to/";
    final String path = root + "foo.txt";
    final String track = "track";

    content.store(track, hosted, STORE, path, stream);

    final AproxClientHttp http = getHttp();

    final HttpGet request = http.newRawGet(content.contentUrl(track, hosted, STORE, root));

    request.addHeader("Accept", "text/html");

    final CloseableHttpClient hc = http.newClient();
    final CloseableHttpResponse response = hc.execute(request);

    final InputStream listing = response.getEntity().getContent();
    final String html = IOUtils.toString(listing);

    // TODO: Charset!!
    final Document doc = Jsoup.parse(html);
    for (final Element item : doc.select("a.item-link")) {
        final String fname = item.text();
        System.out.printf("Listing contains: '%s'\n", fname);
        final String href = item.attr("href");
        final String expected = client.content().contentUrl(hosted, STORE, root, fname);

        assertThat(fname + " does not have a href", href, notNullValue());
        assertThat(fname + " has incorrect link: '" + href + "' (" + href.getClass().getName()
                + ")\nshould be: '" + expected + "' (String)", href, equalTo(expected));
    }//  w w w. j av  a  2  s.  c  om
}