Example usage for org.jsoup.select Elements select

List of usage examples for org.jsoup.select Elements select

Introduction

In this page you can find the example usage for org.jsoup.select Elements select.

Prototype

public Elements select(String query) 

Source Link

Document

Find matching elements within this element list.

Usage

From source file:com.manisha.allmybooksarepacked.service.BookParser.java

private String findLanguage() {
    Elements language = doc.select(PathMapping.LANGUAGE);
    language.select("b").remove();
    return language.html().trim();
}

From source file:com.manisha.allmybooksarepacked.service.BookParser.java

private Double findShippingWeight() {
    Elements weight = doc.select(PathMapping.WEIGHT);
    weight.select("b").remove();
    weight.select("a").remove();
    String str = weight.html().replace("(", "").replace(")", "").split(" ")[0];
    try {//from  ww  w  .java2s  .c om
        if (StringUtils.isNotBlank(str)) {
            return Double.valueOf(str);
        }
    } catch (Exception ex) {
    }
    return null;
}

From source file:com.manisha.allmybooksarepacked.service.BookParser.java

private String findPublisher() {
    Elements publisher = doc.select(PathMapping.PUBLISHER);
    publisher.select("b").remove();
    String str = publisher.html().substring(0, publisher.html().indexOf("(")).trim();
    if (str.lastIndexOf(";") != -1) {
        str = str.substring(0, str.lastIndexOf(";"));
    }/*ww w .  j  av  a 2s .c om*/
    return str;
}

From source file:com.manisha.allmybooksarepacked.service.BookParser.java

private Integer findPages() {
    Elements pages = doc.select(PathMapping.PAGES_HARDCOVER);
    pages.select("b").remove();
    if (StringUtils.isNotBlank(pages.html())) {
        try {//w  ww.  j  a  v a 2s .  co  m
            return Integer.valueOf(pages.html().split(" ")[0].replaceAll(",", ""));
        } catch (Exception ex) {
        }
    } else {
        pages = doc.select(PathMapping.PAGES_PAPERBACK);
        pages.select("b").remove();
        try {
            return Integer.valueOf(pages.html().split(" ")[0].replaceAll(",", ""));
        } catch (Exception ex) {
        }
    }
    return null;
}

From source file:org.mashupmedia.task.MetaTaskScheduler.java

public void getMashupMediaLatestReleaseInformation() {
    String url = "http://www.mashupmedia.org/latest-release/final";
    try {// w  w  w .ja v  a2 s.c om
        ProxyTextFile proxyTextFile = (ProxyTextFile) proxyManager.loadProxyFile(url, ProxyType.TEXT_FILE);

        if (proxyTextFile == null) {
            logger.info(
                    "Unable to find latest release from page: http://www.mashupmedia.org/latest-release/final");
            return;
        }

        Document document = Jsoup.parse(proxyTextFile.getText());
        Elements elements = document.select("div.view-latest-final-release div.views-row");
        String releaseType = elements.select("div.views-field-field-release-type").text();
        String version = elements.select("div.views-field-field-version").text();
        logger.info("Found latest release information, type = " + releaseType + ", version = " + version);
        configurationManager.saveConfiguration(MashUpMediaConstants.LATEST_RELEASE_FINAL_VERSION, version);
    } catch (IOException e) {
        logger.error("Unable to get latest version information from www.mashupmedia.org", e);
        return;
    }

}

From source file:org.javiermoreno.torrentscratcher.Runner.java

public Movie enrichMovieWithFilmAffinity(Movie movie) {
    try {/*  w  ww. j  av  a2s  .c o  m*/
        String url = "http://www.filmaffinity.com/es/search.php?stext={title}&stype=all";
        String title = URLEncoder.encode(movie.getTitle(), "UTF8");
        url = url.replace("{title}", title);
        Document doc = Jsoup.connect(url).get();
        if (doc.select("[property=og:title]").size() == 0) {
            // several results found, take the first
            Element firstResult = doc.select(".item-search .mc-title a").first();
            if (firstResult == null) {
                // filmaffinity search engine failed
                log.warn("FilmAffinity 404: " + movie.getTitle());
                return movie;
            }
            url = "http://www.filmaffinity.com" + firstResult.attr("href");
            doc = Jsoup.connect(url).get();
        }
        movie.setFilmAffinityId(doc.select("div.rate-movie-box").attr("data-movie-id"));
        Elements movieInfo = doc.select("dl.movie-info");
        String originalTitle = movieInfo.select("dd").eq(0).text();
        originalTitle = originalTitle.replaceAll("\\([^\\(]*\\)", "").replaceAll("\\[[^\\(]*\\]", "")
                .replaceAll("aka$", "").trim();
        movie.setOriginalTitle(originalTitle);
        movie.setDescription(movieInfo.select("dd").eq(11).text());
    } catch (IOException ex) {
        log.warn(ex.getMessage());
    }
    return movie;
}

From source file:org.hmzb.test.HttpClientTest.java

@Test
public final void testPMS() throws IOException {
    // Map<String, String> data = new HashMap<String, String>();
    // data.put("act", "module");
    // data.put("name", "sns");
    // data.put("do", "post");
    // data.put("id", "137");
    // data.put("replyid", "");
    // data.put("postid", "150");
    // data.put("reply_content", "?, ?");

    String url = "http://pms.local.17173.com/task_list_department.php?action=search&employment_id=&state=0&time_id=plan&start_date=2014-01-01&end_date=2014-05-16&x=24&y=5";
    String cookieValue = "SUV=1381469482625841; NUV=1381507200000; sohutag=8HsmeSc5NCwmcyc5NCwmYjc5NCwmYSc5NCwmZjc5NCwmZyc5Njwmbjc5NCwmaSc5NCwmdyc5NCwmaCc5NCwmYyc5NCwmZSc5NCwmbSc5NH0; __utma=113262040.1666690635.1382600575.1382600575.1382600575.1; vjuids=c639cb6b1.142370b45c7.0.ef4cedbb; Hm_lvt_0245ebe4fb30a09e371e4f011dec1f6a=1388137801; live_17173_unique=e7de7aed49953586fc1da607967cf847; _ga=GA1.2.1666690635.1382600575; pgv_pvi=2611450780; vjlast=1383902955.1399958818.22; ermpdockData=1,2,4,13,17; DIFF=1400117702510; IPLOC=CN3501; ErmpToken=Q1k1MzIw; ErmpTicket=MTAuNS4xNS4xNg; ppinf=2|1401269453|1402479053|bG9naW5pZDowOnx1c2VyaWQ6MTY6cHR6aHVmQDE3MTczLmNvbXxzZXJ2aWNldXNlOjMwOjAwMDAwMDAwMDAwMDAwMDAwMDAwMDAwMDAwMDAwMHxjcnQ6MTA6MjAxMi0xMS0yMHxlbXQ6MTowfGFwcGlkOjQ6MTA3N3x0cnVzdDoxOjF8cGFydG5lcmlkOjE6MHxyZWxhdGlvbjowOnx1dWlkOjE2OmRiYmNhNTA3ZjNmMjRjMnR8dWlkOjk6czg3MDM4OTcwfHVuaXFuYW1lOjQ0OiVFNiU5MCU5QyVFNyU4QiU5MCVFNyVCRCU5MSVFNSU4RiU4QjMxNDI4NjcxfA; pprdig=Hs7tIw6klJdNasYa5mYo4aOzZnr2dL96PkIAMo8K4KGp4UM2yhx2LHuNOZ5zX7s4pKShi4GnXYFIIyAW-BWRJCAgmI2qeorvqshYjT5gs4gWKGgJNtoQAbdIt1liIK-Bt1aX_mYueEHUA_yRDVhRxRVLVt3mtlgywukd-stCIOE; lastdomain=1402479053|cHR6aHVmQDE3MTczLmNvbXw|17173.com; PHPSESSID=qcr7raandp6l0k7g9vpg0lgn22; PMS_cypms_username=fuzhu; PMS_cypms_auth=c0b47dad95a0e7ef7505d9ce057b6651";
    Document resultDoc = Jsoup.connect(url).header("cookie", cookieValue).timeout(20000).get();
    Elements table = resultDoc.select("table.list");
    Elements trs = table.select("tr");
    // /*from  www.  j av a2s .co  m*/
    trs.remove(0);
    // ??
    trs.remove(trs.size() - 1);
    // 
    Double totalTime = 0d;
    String regex = ".*?.*";
    for (Element element : trs) {
        Elements tds = element.select("td");
        //         System.out.println(tds);
        String projectName = tds.get(3).text();
        Double realTime = Double.valueOf(tds.get(7).text());
        if (projectName.matches(regex)) {
            totalTime += realTime;
        }
    }
    System.out.println(totalTime);
}

From source file:Leitura.Jxr.java

public String leituraJxr() throws IOException { //mtodo para pegar os nomes dos mtodos declarados
    Elements elements = document.getElementsByTag("pre");
    elements.select("a.jxr_linenumber").remove();
    // elements.select("strong.jxr_keyword").remove();
    // elements.select("span.jxr_string").remove();
    // elements.select("em.jxr_comment").remove();
    for (Element children : elements) {
        children.getElementsByClass("jxr_comment").remove();
        children.getElementsByClass("jxr_javadoccomment").remove();
    }/*from   w  w  w . j  a  v  a  2s  .co m*/
    return elements.text(); // retorna o cdigo sem lixo
}

From source file:com.maxl.java.aips2xml.Aips2Xml.java

static String convertHtmlToXml(String med_title, String html_str, String regnr_str) {
    Document mDoc = Jsoup.parse(html_str);
    mDoc.outputSettings().escapeMode(EscapeMode.xhtml);
    mDoc.outputSettings().prettyPrint(true);
    mDoc.outputSettings().indentAmount(4);

    // <div id="monographie"> -> <fi>
    mDoc.select("div[id=monographie]").tagName("fi").removeAttr("id");
    // <div class="MonTitle"> -> <title>
    mDoc.select("div[class=MonTitle]").tagName("title").removeAttr("class").removeAttr("id");
    // Beautify the title to the best of my possibilities ... still not good enough!
    String title_str = mDoc.select("title").text().trim().replaceAll("<br />", "").replaceAll("(\\t|\\r?\\n)+",
            "");//from   ww w . j  a v a  2s .  co m
    if (!title_str.equals(med_title))
        if (SHOW_ERRORS)
            System.err.println(med_title + " differs from " + title_str);
    // Fallback solution: use title from the header AIPS.xml file - the titles look all pretty good!
    mDoc.select("title").first().text(med_title);
    // <div class="ownerCompany"> -> <owner>
    Element owner_elem = mDoc.select("div[class=ownerCompany]").first();
    if (owner_elem != null) {
        owner_elem.tagName("owner").removeAttr("class");
        String owner_str = mDoc.select("owner").text();
        mDoc.select("owner").first().text(owner_str);
    } else {
        mDoc.select("title").after("<owner></owner>");
        if (DB_LANGUAGE.equals("de"))
            mDoc.select("owner").first().text("k.A.");
        else if (DB_LANGUAGE.equals("fr"))
            mDoc.select("owner").first().text("n.s.");
    }

    // <div class="paragraph"> -> <paragraph>
    mDoc.select("div[class=paragraph]").tagName("paragraph").removeAttr("class").removeAttr("id");
    // <div class="absTitle"> -> <paragraphTitle>
    mDoc.select("div[class=absTitle]").tagName("paragraphtitle").removeAttr("class");
    // <div class="untertitle1"> -> <paragraphSubTitle>
    mDoc.select("div[class=untertitle1]").tagName("paragraphsubtitle").removeAttr("class");
    // <div class="untertitle"> -> <paragraphSubTitle>
    mDoc.select("div[class=untertitle]").tagName("paragraphsubtitle").removeAttr("class");
    // <div class="shortCharacteristic"> -> <characteristic>
    mDoc.select("div[class=shortCharacteristic]").tagName("characteristic").removeAttr("class");
    // <div class="image">
    mDoc.select("div[class=image]").tagName("image").removeAttr("class");

    // <p class="spacing1"> -> <p> / <p class="noSpacing"> -> <p>
    mDoc.select("p[class]").tagName("p").removeAttr("class");
    // <span style="font-style:italic"> -> <i>
    mDoc.select("span").tagName("i").removeAttr("style");
    // <i class="indention1"> -> <i> / <i class="indention2"> -> <b-i> 
    mDoc.select("i[class=indention1]").tagName("i").removeAttr("class");
    mDoc.select("i[class=indention2]").tagName("i").removeAttr("class");
    // mDoc.select("p").select("i").tagName("i");
    // mDoc.select("paragraphtitle").select("i").tagName("para-i");
    // mDoc.select("paragraphsubtitle").select("i").tagName("parasub-i");
    Elements elems = mDoc.select("paragraphtitle");
    for (Element e : elems) {
        if (!e.text().isEmpty())
            e.text(e.text());
    }
    elems = mDoc.select("paragraphsubtitle");
    for (Element e : elems) {
        if (!e.text().isEmpty())
            e.text(e.text());
    }

    // Here we take care of tables
    // <table class="s21"> -> <table>
    mDoc.select("table[class]").removeAttr("class");
    mDoc.select("table").removeAttr("cellspacing").removeAttr("cellpadding").removeAttr("border");
    mDoc.select("colgroup").remove();
    mDoc.select("td").removeAttr("class").removeAttr("colspan").removeAttr("rowspan");
    mDoc.select("tr").removeAttr("class");
    elems = mDoc.select("div[class]");
    for (Element e : elems) {
        if (e.text().isEmpty())
            e.remove();
    }

    mDoc.select("tbody").unwrap();
    // Remove nested table (a nasty table-in-a-table
    Elements nested_table = mDoc.select("table").select("tr").select("td").select("table");
    if (!nested_table.isEmpty()) {
        nested_table.select("table").unwrap();
    }

    // Here we take care of the images
    mDoc.select("img").removeAttr("style").removeAttr("align").removeAttr("border");

    // Subs and sups
    mDoc.select("sub[class]").tagName("sub").removeAttr("class");
    mDoc.select("sup[class]").tagName("sup").removeAttr("class");
    mDoc.select("td").select("sub").tagName("td-sub");
    mDoc.select("td").select("sup").tagName("td-sup");
    // Remove floating <td-sup> tags
    mDoc.select("p").select("td-sup").tagName("sup");
    mDoc.select("p").select("td-sub").tagName("sub");

    // Box
    mDoc.select("div[class=box]").tagName("box").removeAttr("class");

    // Insert swissmedicno5 after <owner> tag
    mDoc.select("owner").after("<swissmedicno5></swissmedicno5");
    mDoc.select("swissmedicno5").first().text(regnr_str);

    // Remove html, head and body tags         
    String xml_str = mDoc.select("body").first().html();

    //xml_str = xml_str.replaceAll("<tbody>", "").replaceAll("</tbody>", "");
    xml_str = xml_str.replaceAll("<sup> </sup>", "");
    xml_str = xml_str.replaceAll("<sub> </sub>", "");
    xml_str = xml_str.replaceAll("<p> <i>", "<p><i>");
    xml_str = xml_str.replaceAll("</p> </td>", "</p></td>");
    xml_str = xml_str.replaceAll("<p> </p>", "<p></p>"); // MUST be improved, the space is not a real space!!
    xml_str = xml_str.replaceAll("", "- ");
    xml_str = xml_str.replaceAll("<br />", "");
    xml_str = xml_str.replaceAll("(?m)^[ \t]*\r?\n", "");

    // Remove multiple instances of <p></p>
    Scanner scanner = new Scanner(xml_str);
    String new_xml_str = "";
    int counter = 0;
    while (scanner.hasNextLine()) {
        String line = scanner.nextLine();
        if (line.trim().equals("<p></p>")) {
            counter++;
        } else
            counter = 0;
        if (counter < 3)
            new_xml_str += line;
    }
    scanner.close();

    return new_xml_str;
}

From source file:co.dilaver.quoter.fragments.QODFragment.java

private void parseQodResponse(JSONObject response) throws JSONException {
    JSONObject parse = response.getJSONObject("parse");
    JSONObject text = parse.getJSONObject("text");
    String content = text.getString("*");

    Document doc = Jsoup.parse(content);
    Elements table = doc.select("table[style=\"text-align:center; width:100%\"]");
    Elements rows = table.select("tr");
    Elements qod = rows.get(0).select("td");
    Elements author = rows.get(1).select("td");
    Whitelist whitelist = Whitelist.none();

    String newQuote = Html.fromHtml(Jsoup.clean(qod.toString(), whitelist)).toString();
    String newAuthor = Html.fromHtml(Jsoup.clean(author.toString(), whitelist).replace("~", "")).toString();

    if (!qodString.equals("") && !authorString.equals("")) {
        if (!qodString.equals(newQuote) || !authorString.equals(newAuthor)) {
            Snackbar.make(rootLayout, getString(R.string.str_Refreshing), Snackbar.LENGTH_SHORT).show();
        }//from ww  w  .j  a  v  a2s . c o m
    }

    qodString = newQuote;
    authorString = newAuthor;

    sharedPrefStorage.setQodText(qodString);
    sharedPrefStorage.setQodAuthor(authorString);

    Log.e(TAG, "quote: " + qodString);
    Log.e(TAG, "author: " + authorString);

    qodText.setText(getString(R.string.str_WithinQuotation, qodString));
    qodAuthor.setText(authorString);
}