Example usage for org.jsoup.nodes Element text

List of usage examples for org.jsoup.nodes Element text

Introduction

In this page you can find the example usage for org.jsoup.nodes Element text.

Prototype

public String text() 

Source Link

Document

Gets the combined text of this element and all its children.

Usage

From source file:com.jimplush.goose.outputformatters.DefaultOutputFormatter.java

/**
 * remove paragraphs that have less than x number of words, would indicate that it's some sort of link
 *//*from   w  ww.j  a  v  a 2  s . c om*/
private void removeParagraphsWithFewWords() {
    if (logger.isDebugEnabled()) {
        logger.debug("removeParagraphsWithFewWords starting...");
    }

    Elements allNodes = this.topNode.getAllElements();
    for (Element el : allNodes) {

        try {
            // get stop words that appear in each node

            WordStats stopWords = StopWords.getStopWordCount(el.text());

            if (stopWords.getStopWordCount() < 5 && el.getElementsByTag("object").size() == 0
                    && el.getElementsByTag("embed").size() == 0) {
                el.remove();
            }
        } catch (IllegalArgumentException e) {
            logger.error(e.getMessage());
        }
        //}
    }
}

From source file:com.example.android.expandingcells.ExpandingCells.java

public void getHoroscope() {

    AsyncHttpClient client = new AsyncHttpClient();
    client.get(//from   ww w  . j  av  a  2  s.c o  m
            "http://pipes.yahoo.com/pipes/pipe.run?_id=_omfgXdL3BGGadhGdrq02Q&_render=json&sign=Virgo&url=http%3A%2F%2Fwww.astrology.com%2Fhoroscopes%2Fdaily-horoscope.rss",
            new JsonHttpResponseHandler() {
                @Override
                public void onSuccess(JSONObject response) {
                    JSONArray horoscopeJsonResults = null;
                    try {
                        horoscopeJsonResults = response.getJSONObject("value").getJSONArray("items");

                        // Pass the index value based on what sunsign, here, 0->Aries, 1->Taurus ... etc            
                        String horoscopeFullString = horoscopeJsonResults.getJSONObject(0)
                                .getString("description").toString();

                        Document doc = Jsoup.parse(horoscopeFullString);
                        Element p = doc.select("p").first();
                        horoscopeText = p.text();
                        Horo_Image = "drawable://" + R.drawable.mb_horoscope;
                        Horo = "Daily Horoscope for " + CommonLib.findZodiacSign("9", "6") + ": \n"
                                + horoscopeText;
                        Log.d("DEBUG", Horo);
                        Log.d("DEBUG", "Horo");
                        NewsList.get(1).setNews(Horo_Image, "Horoscope", Horo);
                        adapter.notifyDataSetChanged();
                        Log.d("DEBUG", "Horo1");
                        adapter.notifyDataSetChanged();
                        //itemList.add(new Bytes(Horo_Image,"Horoscope",  Horo));
                        // Replace this with birth date
                        //todoAdapter.add("Daily Horoscope for " + CommonLib.findZodiacSign("12", "6") + ": \n" + horoscopeText);

                    } catch (JSONException e) {
                        e.printStackTrace();
                        Log.d("DEBUG", "pipes");
                    }
                }
            });
}

From source file:accountgen.controller.Controller.java

private void setPhone(Document doc, Person p) {
    Element tel = doc.select(".tel").first();
    p.setPhone(tel.text());
}

From source file:accountgen.controller.Controller.java

private void setEmail(Document doc, Person p) {
    Element em = doc.select(".email").first();
    p.setEmail(em.text().split(" ")[0]);
}

From source file:com.amastigote.xdu.query.module.WaterAndElectricity.java

private List<String> query_metInfo() throws IOException {
    Document document = getPage("", METINFO_SUFFIX);
    Elements elements = document.select("td");

    List<String> stringArrayList = new ArrayList<>();

    for (Element td : elements) {
        String tmp = td.text();
        if (!"".equals(tmp)) {
            stringArrayList.add(tmp);//from   ww w  .ja v  a2 s. c o m
        }
    }

    for (int i = 0; i < stringArrayList.size(); i++) {
        stringArrayList.set(i, stringArrayList.get(i).substring(stringArrayList.get(i).indexOf("") + 1));
    }

    /*
     * (stringArrayList):
     *      - 0, ????
     *      - ? [ ?? | ? | ? ]
     *      - , (3n), n???
     *
     *      - ?: ?null!
     */
    return stringArrayList;
}

From source file:net.devietti.ArchConfMapServlet.java

/**
 * Returns the URL of the external conference website (not the WikiCFP page) for the given
 * eventid./*from www  .j  av  a  2s .  c om*/
 */
private void getConfLink(HttpServletRequest req, HttpServletResponse resp) throws IOException {
    String eids = req.getParameter("eventid");
    if (eids == null) {
        error("missing required URL parameter: eventid");
        return;
    }
    Integer eid;
    try {
        eid = Integer.valueOf(eids);
    } catch (NumberFormatException e) {
        error(e.getMessage());
        return;
    }
    if (eid == null || eid == 0) {
        error("error parsing eventid");
        return;
    }

    // pull down the CFP
    Document cfp = getURL("http://www.wikicfp.com/cfp/servlet/event.showcfp?eventid=" + eids);

    for (Element a : cfp.select("tr td[align=center] a")) {
        Element td = a.parent();
        if (td.text().contains("Link:") && a.hasAttr("href") && a.attr("href").contains("http://")) {
            // got the link!
            resp.setContentType("application/json");
            resp.getWriter().println(GSON.toJson(a.attr("href")));
            return;
        }
    }

    error("no matching link");
}

From source file:net.pixomania.crawler.W3C.parser.rules.editors.version.VersionEditorRule1.java

@Override
public ArrayList<Person> run(String url, Document doc) {
    ArrayList<Person> editorList = new ArrayList<>();

    Elements editors = doc.select("dt:contains(version 1), dt:contains(version 1) ~ dd");
    if (editors.size() == 0)
        return null;

    boolean skip = false;
    String version = "";
    for (Element editor : editors) {
        Element prev = editor.previousElementSibling();
        if (prev != null) {
            if (prev.tagName().equals("dt")) {
                if (!prev.text().trim().toLowerCase().startsWith("version 1")
                        && !prev.text().trim().toLowerCase().startsWith("editors (version 1")) {
                    skip = true;/*from ww  w .j  av  a 2  s . c  o  m*/
                }
            }

            if (skip) {
                Element next = editor.nextElementSibling();
                if (next != null) {
                    if (next.text().trim().toLowerCase().startsWith("version 1")
                            || next.text().trim().toLowerCase().startsWith("editors (version 1")) {
                        skip = false;

                        continue;
                    }
                }
                continue;
            }
        }

        if (editor.tagName().equals("dt")) {
            version = editor.text();
            continue;
        }

        String[] splitted = editor.html().split("<br />|<br clear=\"none\" />");

        if (splitted.length < 2) {
            if (editor.text().toLowerCase().startsWith("(in alphabetic")
                    || editor.text().toLowerCase().startsWith("see acknowl")
                    || editor.text().toLowerCase().startsWith("the w3")
                    || editor.text().toLowerCase().startsWith("(see ac")
                    || editor.text().toLowerCase().startsWith("see participants")
                    || editor.text().toLowerCase().contains("note:")) {
                Log.log("warning", "Spec " + url + " may refer to a different section!");
                continue;
            }
            if (editor.text().equals("WHATWG:") || editor.text().equals("W3C:"))
                continue;
            Person result = NameParser.parse(editor.text());
            if (result == null)
                continue;

            result.setVersion(version);
            for (int i = 0; i < editor.select("a").size(); i++) {
                if (!editor.select("a").get(i).attr("href").isEmpty()) {
                    if (editor.select("a").get(i).attr("href").contains("@")) {
                        result.setEmail(editor.select("a").get(i).attr("href").replace("mailto:", ""));
                    } else {
                        result.addWebsite(editor.select("a").get(i).attr("href"));
                    }
                }
            }

            editorList.add(result);
        } else {
            for (String split : splitted) {
                if (!split.isEmpty()) {
                    if (split.toLowerCase().startsWith("(in alphabetic")
                            || split.toLowerCase().startsWith("see acknowl")
                            || split.toLowerCase().startsWith("the w3")
                            || split.toLowerCase().startsWith("(see ac")
                            || split.toLowerCase().startsWith("see participants")
                            || split.toLowerCase().contains("note:")) {
                        Log.log("warning", "Spec " + url + " may refer to a different section!");
                        continue;
                    }
                    if (split.equals("WHATWG:") || split.equals("W3C:"))
                        continue;
                    Document newdoc = Jsoup.parse(split.replaceAll("\n", ""));
                    Person result = NameParser.parse(newdoc.text());
                    if (result == null)
                        continue;

                    result.setVersion(version);
                    for (int i = 0; i < newdoc.select("a").size(); i++) {
                        if (!newdoc.select("a").get(i).attr("href").isEmpty()) {
                            if (newdoc.select("a").get(i).attr("href").contains("@")) {
                                result.setEmail(newdoc.select("a").get(i).attr("href").replace("mailto:", ""));
                            } else {
                                result.addWebsite(newdoc.select("a").get(i).attr("href"));
                            }
                        }
                    }

                    editorList.add(result);
                }
            }
        }

        Element next = editor.nextElementSibling();
        if (next != null)
            if (next.tag().getName().equals("dt")
                    && !next.text().trim().toLowerCase().startsWith("editors (version 1"))
                break;
    }

    if (editorList.size() == 0)
        return null;

    return editorList;
}

From source file:com.amastigote.xdu.query.module.WaterAndElectricity.java

private List<String> query_payInfo(Duration duration) throws IOException {
    getPageAttributes(PAYINFO_SUFFIX);/*from ww w .j a  v  a 2s  . c om*/
    String OUTPUT_DATA = "But_Seach3=";
    switch (duration) {
    case ONE_MONTH:
        OUTPUT_DATA += ONE_MONTH;
        break;
    case THREE_MONTH:
        OUTPUT_DATA += THREE_MONTH;
        break;
    default:
        throw new IllegalArgumentException("Bad parameter, check document for help");
    }
    OUTPUT_DATA += "&__VIEWSTATE=";
    OUTPUT_DATA += VIEWSTATE;
    OUTPUT_DATA += "&HiddenField_webName=";
    OUTPUT_DATA += "&HiddenField_UserID=";
    OUTPUT_DATA += ID;

    Document document = getPage(OUTPUT_DATA, PAYINFO_SUFFIX);
    Elements elements = document.select("td");

    List<String> stringArrayList = new ArrayList<>();

    for (Element td : elements) {
        String tmp = td.text();
        if (!"".equals(tmp)) {
            stringArrayList.add(tmp);
        }
    }

    for (int i = 0; i < stringArrayList.size(); i++) {
        if (stringArrayList.get(i).contains("")) {
            stringArrayList.set(i, stringArrayList.get(i).substring(stringArrayList.get(i).indexOf("") + 2));
            continue;
        }
        stringArrayList.set(i, stringArrayList.get(i).substring(stringArrayList.get(i).indexOf("") + 1));
    }

    /*
     * (stringArrayList):
     *      - 0, ????
     *      - ? [  | ? ]
     *      - , (2n), n???
     *
     *      - ?: ?null!
     */
    return stringArrayList;
}

From source file:com.amastigote.xdu.query.module.WaterAndElectricity.java

private List<String> query_useInfo(Duration duration) throws IOException {
    getPageAttributes(USEINFO_SUFFIX);//from  w w w  .j a  va 2 s  . c  o m
    String OUTPUT_DATA = "But_Seach3=";
    switch (duration) {
    case ONE_MONTH:
        OUTPUT_DATA += ONE_MONTH;
        break;
    case THREE_MONTH:
        OUTPUT_DATA += THREE_MONTH;
        break;
    default:
        throw new IllegalArgumentException("Bad parameter, check document for help");
    }
    OUTPUT_DATA += "&__VIEWSTATE=";
    OUTPUT_DATA += VIEWSTATE;
    OUTPUT_DATA += "&HiddenField_webName=";
    OUTPUT_DATA += "&HiddenField_UserID=";
    OUTPUT_DATA += ID;

    Document document = getPage(OUTPUT_DATA, USEINFO_SUFFIX);
    Elements elements = document.select("td");

    List<String> stringArrayList = new ArrayList<>();

    for (Element td : elements) {
        String tmp = td.text();
        tmp = tmp.replaceAll(" ", "");
        if (!"".equals(tmp)) {
            if (tmp.contains("???")) {
                stringArrayList.add(tmp.substring(0, tmp.indexOf("???")));
                stringArrayList.add(tmp.substring(tmp.indexOf("???")));
                continue;
            }
            stringArrayList.add(tmp);
        }
    }

    for (int i = 0; i < stringArrayList.size(); i++) {
        stringArrayList.set(i, stringArrayList.get(i).substring(stringArrayList.get(i).indexOf("") + 1));
    }

    /*
     * (stringArrayList):
     *      - 0, ????
     *      - ? [ ? |  | ? | ?/? | ??? ]
     *      - , (5n), n???
     *
     *      - ?: ?null!
     */
    return stringArrayList;
}

From source file:feedzilla.Feed.java

private void parser(Element entry) {
    boolean source = false;
    for (Element element : entry.children()) {
        switch (element.nodeName()) {
        case "id":
            this.id = Integer.parseInt(element.text().split(":")[1]);
            break;
        case "title":
            if (source) {
                this.source_title = element.text();
            } else {
                this.title = element.text();
            }//from  ww  w . jav  a2  s  . com
            break;
        case "summary":
            this.summary = element.text().split("<br")[0];
            break;
        case "published":
            this.published = element.text();
            break;
        case "updated":
            this.updated = element.text();
            break;
        case "author":
            this.author = element.text();
            break;
        case "link":
            if (source) {
                this.source_link = element.attr("href");
            } else {
                this.link = element.attr("href");
            }
            break;
        case "rights":
            this.copyright = element.text();
            break;
        case "source":
            source = true;
            break;
        default:
            Log.debug("Unknow TAG: " + element.nodeName());
            break;
        }
    }
}