Example usage for org.jsoup.select Elements text

List of usage examples for org.jsoup.select Elements text

Introduction

In this page you can find the example usage for org.jsoup.select Elements text.

Prototype

public String text() 

Source Link

Document

Get the combined text of all the matched elements.

Usage

From source file:GIST.IzbirkomExtractor.TableExtractor.java

/**
     * Cleaning up leftover of HTML code from the cell content.
     * //from  w  w w  .j  a v  a2  s.co m
     * @param cell_content HTML code contains in the table cell 
     * @return an array list containing each line of the cell_content withh all HTML markup removed
     */
    private ArrayList<String> cleanLeftoverHTML(Element cell_content) {

        ArrayList<String> streets_and_numbers = new ArrayList<String>();

        /* <div>s designate separate lines inside the table cell */
        for (Element addr_line : cell_content.getElementsByTag("div")) {

            /* skip empty address lines */
            String addr_line_text = cleanupUNICODE(addr_line.text());
            if (StringUtils.isBlank(addr_line_text))
                continue;

            /* <strong> is not particularly useful, but can designate placement of simple separators like space */
            Elements streets = addr_line.getElementsByTag("strong");
            if (!streets.isEmpty()) {
                addr_line_text = addr_line_text.replaceFirst(Pattern.quote(streets.text()),
                        " " + streets.text() + " ");
            }

            streets_and_numbers.add(addr_line_text);
        }
        return streets_and_numbers;
    }

From source file:com.FFLive.Player.java

public void getPlayer() {
    try {//from  ww  w . j av a  2  s .c  om
        if (playerID.equals("-1")) {
            //Average Team Player...
            Document doc = Jsoup.connect("http://fantasy.premierleague.com/entry/1/event-history/" + GW + "/")
                    .get();
            Elements averageScore = doc.select("div.ismUnit.ismSize2of5.ismLastUnit")
                    .select("div.ismSBSecondaryVal");
            if (averageScore.isEmpty()) {
                playerScore = 0;
            } else {
                try {
                    playerScore = Integer.parseInt(averageScore.text().replaceAll("\\D+", ""));
                } catch (NumberFormatException n) {
                    Main.log.log(2, "Issue saving Average Team..." + n + "\n");
                }
            }
            playerName = "Average";
        } else {
            Main.log.log(7, "Fetching Player " + playerID + "\n");
            //Connects to the players info page
            InputStream playerJson = new URL(
                    "http://fantasy.premierleague.com/web/api/elements/" + playerID + "/").openStream();
            //Reads the data into a JSON object (via casting into a regular object)
            Reader reader = new InputStreamReader(playerJson, "UTF-8");
            JSONObject playerValues = (JSONObject) JSONValue.parse(reader);

            //TODO Check if there are values overlength
            //Max Length Ref playerCount INT DEFAULT 1 NOT NULL, firstName VARCHAR(40), lastName VARCHAR(40),
            //webName VARCHAR(50), score INT, gameweekBreakdown VARCHAR(250), breakdown VARCHAR(250), 
            //teamName VARCHAR(40), currentFixture VARCHAR(40), nextFixture VARCHAR(40), status VARCHAR(10), 
            //news VARCHAR(250), photo VARCHAR(30))

            //Adds Required Data
            firstName = playerValues.get("first_name").toString();
            lastName = playerValues.get("second_name").toString();
            playerName = playerValues.get("web_name").toString();
            playerTeam = playerValues.get("team_name").toString();
            teamNumber = Integer.parseInt(playerValues.get("team_id").toString());
            position = playerValues.get("type_name").toString();
            /*
            JSONObject test = (JSONObject)JSONValue.parse(playerValues.get("fixture_history").toString());
            String summary = test.get("summary").toString();
            String all = test.get("all").toString();
             */
            playerScore = Integer.parseInt(playerValues.get("event_total").toString());
            gameweekBreakdown = playerValues.get("event_explain").toString();
            //scoreBreakdown = playerValues.get("fixture_history").toString();
            currentFixture = playerValues.get("current_fixture").toString();
            nextFixture = playerValues.get("next_fixture").toString();
            status = playerValues.get("status").toString();
            news = playerValues.get("news").toString();
            photo = playerValues.get("photo").toString();

            /*
            System.out.println(firstName);
            System.out.println(lastName);
            System.out.println(playerName);
            System.out.println(playerTeam);
            System.out.println(position);
            System.out.println(summary);
            System.out.println(all);
            System.out.println(playerScore);
            System.out.println(scoreBreakdown);
            System.out.println(currentFixture);
            System.out.println(nextFixture);
            System.out.println(status);
            System.out.println(news);
            System.out.println(photo);*/
        }
    } catch (ConnectException c) {
        if (timeoutCheck() > 3) {
            Main.log.log(2, "Too Many Timeouts.. Skipping\n");
        }
        Main.log.log(6, "Timeout Connecting, Retrying...\n");
        getPlayer();
    } catch (SocketTimeoutException e) {
        if (timeoutCheck() > 3) {
            Main.log.log(2, "Too Many Timeouts.. Skipping\n");
        }
        Main.log.log(6, "Timeout Connecting, Retrying...\n");
        getPlayer();
    } catch (UnknownHostException g) {
        Main.log.log(6, "No Connection... Skipping\n");
    } catch (NoRouteToHostException h) {
        Main.log.log(6, "No Connection... Skipping\n");
    } catch (IOException f) {
        Main.log.log(6, "In getPlayer: " + f + "\n");
    } catch (NullPointerException n) {
        Main.log.log(2, "Missing Player Field with ID:" + playerID + " " + n + "\n");
        Main.log.log(9, n);
    }
}

From source file:com.elevenpaths.googleindexretriever.GoogleSearch.java

public String getResults()
        throws EmptyQueryException, ManyResultsException, CaptchaException, UnsupportedEncodingException {
    if (this.query.isEmpty()) {
        throw new EmptyQueryException();
    }/*from  w w w. j  a v a 2 s . c  o  m*/
    Document doc = getData(this.query);

    Elements data = doc.select(".st");

    if (data.size() > 1) {
        throw new ManyResultsException();
    }

    return data.text();
}

From source file:gov.medicaid.screening.dao.impl.OIGDAOBean.java

/**
 * Parses the excluded provider profile details page.
 *
 * @param page the details page/*from  w  w w .j  a  va  2s  .  c  o m*/
 * @return the parsed license details
 * @throws ParsingException if the expected tags were not found
 */
private ProviderProfile parseProfile(Document page) throws ParsingException {
    ProviderProfile profile = new ProviderProfile();

    // name
    User user = new User();
    profile.setUser(user);
    user.setLastName(page.select("th:containsOwn(Last Name) + td").text());
    user.setFirstName(page.select("th:containsOwn(First Name) + td").text());

    // business
    String businessName = page.select("th:containsOwn(Entity) + td").text();
    if (!"N/A".equals(businessName)) {
        Business business = new Business();
        profile.setBusiness(business);
        business.setName(businessName);
    }

    // DOB
    Date dob = parseDate(page.select("th:has(acronym:containsOwn(DOB)) + td").text(), DATE_FORMAT);
    if (dob != null) {
        profile.setDob(dob);
    }

    // exclusion type
    ExclusionType exclusionType = new ExclusionType();
    profile.setExclusionType(exclusionType);
    exclusionType.setName(page.select("th:containsOwn(Excl. Type) + td").text());

    // specialty
    List<Specialty> specialties = new ArrayList<Specialty>();
    Specialty specialty = new Specialty();
    specialties.add(specialty);
    specialty.setName(page.select("th:containsOwn(Specialty) + td").text());
    profile.setSpecialties(specialties);

    // address
    Elements addrElement = page.select("th:containsOwn(Address) + td");
    String addr = addrElement.text();
    Element addrNextRow = addrElement.parents().first().nextElementSibling();
    if ("".equals(addrNextRow.select("th").text())) {
        addr += " " + addrNextRow.select("td").text();
    }
    Address address = new Address();
    address.setLocation(addr);
    profile.setAddresses(Arrays.asList(new Address[] { address }));

    Date date = parseDate(page.select("th:containsOwn(Excl. Date) + td").text(), DATE_FORMAT);
    if (date != null) {
        profile.setRequestEffectiveDate(date);
    }

    return profile;
}

From source file:com.quarterfull.newsAndroid.NewsDetailFragment.java

public void onCreateContextMenu(ContextMenu menu, View v, ContextMenu.ContextMenuInfo menuInfo) {
    if (v instanceof WebView) {
        WebView.HitTestResult result = ((WebView) v).getHitTestResult();
        if (result != null) {
            int type = result.getType();

            Document htmldoc = Jsoup.parse(html);

            FragmentTransaction ft = getFragmentManager().beginTransaction();

            if (type == WebView.HitTestResult.IMAGE_TYPE
                    || type == WebView.HitTestResult.SRC_IMAGE_ANCHOR_TYPE) {
                String imageUrl = result.getExtra();
                if (imageUrl.startsWith("http") || imageUrl.startsWith("file")) {

                    URL mImageUrl;
                    String imgtitle;
                    String imgaltval;
                    String imgsrcval;

                    imgsrcval = imageUrl.substring(imageUrl.lastIndexOf('/') + 1, imageUrl.length());
                    Elements imgtag = htmldoc.getElementsByAttributeValueContaining("src", imageUrl);

                    try {
                        imgtitle = imgtag.first().attr("title");
                    } catch (NullPointerException e) {
                        imgtitle = "";
                    }/*w  w w  . ja va2s .  c o  m*/
                    try {
                        imgaltval = imgtag.first().attr("alt");
                    } catch (NullPointerException e) {
                        imgaltval = "";
                    }
                    try {
                        mImageUrl = new URL(imageUrl);
                    } catch (MalformedURLException e) {
                        return;
                    }

                    String title = imgsrcval;
                    int titleIcon = android.R.drawable.ic_menu_gallery;
                    String text = (imgtitle.isEmpty()) ? imgaltval : imgtitle;

                    // Create and show the dialog.
                    DialogFragment newFragment = NewsDetailImageDialogFragment.newInstanceImage(title,
                            titleIcon, text, mImageUrl);
                    newFragment.show(ft, "menu_fragment_dialog");
                }
            } else if (type == WebView.HitTestResult.SRC_ANCHOR_TYPE) {
                String url = result.getExtra();
                URL mUrl;
                String text;
                try {
                    Elements urltag = htmldoc.getElementsByAttributeValueContaining("href", url);
                    text = urltag.text();
                    mUrl = new URL(url);
                } catch (MalformedURLException e) {
                    return;
                }

                // Create and show the dialog.
                DialogFragment newFragment = NewsDetailImageDialogFragment.newInstanceUrl(text,
                        mUrl.toString());
                newFragment.show(ft, "menu_fragment_dialog");
            }
            //else if (type == WebView.HitTestResult.EMAIL_TYPE) { }
            //else if (type == WebView.HitTestResult.GEO_TYPE) { }
            //else if (type == WebView.HitTestResult.PHONE_TYPE) { }
            //else if (type == WebView.HitTestResult.EDIT_TEXT_TYPE) { }
        }
    }
}

From source file:Leitura.Jxr.java

public String leituraJxr() throws IOException { //mtodo para pegar os nomes dos mtodos declarados
    Elements elements = document.getElementsByTag("pre");
    elements.select("a.jxr_linenumber").remove();
    // elements.select("strong.jxr_keyword").remove();
    // elements.select("span.jxr_string").remove();
    // elements.select("em.jxr_comment").remove();
    for (Element children : elements) {
        children.getElementsByClass("jxr_comment").remove();
        children.getElementsByClass("jxr_javadoccomment").remove();
    }//w w  w  . j a v  a 2 s .com
    return elements.text(); // retorna o cdigo sem lixo
}

From source file:com.qkj.qkjmanage.action.OilManageAction.java

public void getOilPrice() throws Exception {
    List<String> prices = new ArrayList<>();
    //?//from  w w  w . j  a v  a2  s.  co m
    try {
        Document doc = null;
        doc = Jsoup.connect("http://ny.gold600.com/qinghai.html").get();
        Elements element1 = doc.getElementsByClass("JO_330q63");
        Elements element2 = doc.getElementsByClass("JO_331q63");
        Elements element3 = doc.getElementsByClass("JO_332q63");

        prices.add(element1.text());
        prices.add(element2.text());
        prices.add(element3.text());

        HttpServletRequest request = ServletActionContext.getRequest();
        JSONArray jsonArray = JSONArray.fromObject(prices);
        HttpServletResponse response = ServletActionContext.getResponse();
        response.setContentType("text/html;charset=UTF-8");
        response.getWriter().print(jsonArray);
    } catch (Exception e) {
        log.error(this.getClass().getName() + "!getOilPrice ??:", e);
        throw new Exception(this.getClass().getName() + "!getOilPrice ??:", e);
    }
}

From source file:abelymiguel.miralaprima.GetPrima.java

private HashMap<String, Float> getPrimaDataBloom(String country_code, String providerUrl, String indexName) {

    HashMap<String, Float> respuestaJson = new HashMap<String, Float>();
    HashMap<String, Object> primaJson;

    Float prima_value;//from w w  w  . ja v a  2  s  .  c  o m
    Float prima_delta;
    Float prima_percent;

    Document doc;
    try {
        doc = Jsoup.connect(providerUrl + indexName).get();
        Element riskPremium = doc.select(".price").last();
        //              System.out.println("Prima: " + riskPremium.text());
        prima_value = Float.valueOf(riskPremium.text().replace(".", "")).floatValue();

        Elements riskPremiumsUp = doc.select(".trending_up");
        Elements riskPremiumsDown = doc.select(".trending_down");
        //              System.out.println("Trending: " + riskPremiumsUp.text());
        //              System.out.println("Trending: " + riskPremiumsDown.text());

        if (!riskPremiumsUp.text().equals("")) {
            String delta = riskPremiumsUp.text();
            prima_delta = Float.valueOf(delta.substring(0, delta.indexOf(" ")).replace(",", "")).floatValue();
            //                  System.out.println("Delta: " + prima_delta);

            String percent = riskPremiumsUp.text();
            prima_percent = Float.valueOf(percent.substring(percent.indexOf(" ") + 1, percent.length() - 1))
                    .floatValue();
            //                  System.out.println("Percent: " + prima_percent);
        } else if (!riskPremiumsDown.text().equals("")) {
            String delta = riskPremiumsDown.text();
            prima_delta = Float.valueOf(delta.substring(0, delta.indexOf(" ")).replace(",", "")).floatValue();
            prima_delta = prima_delta * -1;
            //                  System.out.println("Delta: " + prima_delta);

            String percent = riskPremiumsDown.text();
            prima_percent = Float.valueOf(percent.substring(percent.indexOf(" ") + 1, percent.length() - 1))
                    .floatValue();
            prima_percent = prima_percent * -1;
            //                  System.out.println("Percent: " + prima_percent);
        } else {
            prima_delta = 0f;
            prima_percent = 0f;
        }
        respuestaJson.put("prima_value", prima_value);
        respuestaJson.put("prima_delta", prima_delta);
        respuestaJson.put("prima_percent", prima_percent);

        if (isSameDay(country_code)) {
            this.updatePrimaInDB(prima_value, prima_delta, prima_percent,
                    this.getLatestPrimaIdFromDB(country_code));
        } else {
            this.storePrimaInDB(prima_value, prima_delta, prima_percent, country_code);
        }
    } catch (Exception ex) {
        Logger.getLogger(GetPrima.class.getName()).log(Level.SEVERE, null, ex);
        primaJson = getLatestPrimaFromDB(country_code);
        respuestaJson.put("prima_value", (Float) primaJson.get("prima_value"));
        respuestaJson.put("prima_delta", (Float) primaJson.get("prima_delta"));
        respuestaJson.put("prima_percent", (Float) primaJson.get("prima_percent"));
    }

    return respuestaJson;
}

From source file:net.kevxu.purdueassist.course.ScheduleDetail.java

private ScheduleDetailEntry parseDocument(Document document)
        throws HtmlParseException, CourseNotFoundException, ResultNotMatchException {
    ScheduleDetailEntry entry = new ScheduleDetailEntry(term, crn);
    Elements tableElements = document.getElementsByAttributeValue("summary",
            "This table is used to present the detailed class information.");

    if (!tableElements.isEmpty()) {
        for (Element tableElement : tableElements) {
            // get basic info for selected course
            Element tableBasicInfoElement = tableElement.getElementsByClass("ddlabel").first();
            if (tableBasicInfoElement != null) {
                setBasicInfo(entry, tableBasicInfoElement.text());
            } else {
                throw new HtmlParseException("Basic info element empty.");
            }/*from   www. jav  a  2 s .  c om*/

            // get detailed course info
            Element tableDetailedInfoElement = tableElement.getElementsByClass("dddefault").first();

            if (tableDetailedInfoElement != null) {
                // process seat info
                Elements tableSeatDetailElements = tableDetailedInfoElement.getElementsByAttributeValue(
                        "summary", "This layout table is used to present the seating numbers.");
                if (tableSeatDetailElements.size() == 1) {
                    Element tableSeatDetailElement = tableSeatDetailElements.first();
                    Elements tableSeatDetailEntryElements = tableSeatDetailElement.getElementsByTag("tbody")
                            .first().children();
                    if (tableSeatDetailEntryElements.size() == 3 || tableSeatDetailEntryElements.size() == 4) {
                        setSeats(entry, tableSeatDetailEntryElements.get(1).text());
                        setWaitlistSeats(entry, tableSeatDetailEntryElements.get(2).text());
                        if (tableSeatDetailEntryElements.size() == 4) {
                            setCrosslistSeats(entry, tableSeatDetailEntryElements.get(3).text());
                        }
                    } else {
                        throw new HtmlParseException("Seat detail entry elements size not 3. We have "
                                + tableSeatDetailEntryElements.size() + ".");
                    }
                } else {
                    throw new HtmlParseException(
                            "Seat detail elements size not 1. We have " + tableSeatDetailElements.size() + ".");
                }
                // remove the seat info from detailed info
                tableSeatDetailElements.remove();

                // remaining information
                setRemainingInfo(entry, tableDetailedInfoElement.html());

            } else {
                throw new HtmlParseException("Detailed info element empty.");
            }

        }
    } else {
        // test empty
        Elements informationElements = document.getElementsByAttributeValue("summary",
                "This layout table holds message information");
        if (!informationElements.isEmpty()
                && informationElements.text().contains("No detailed class information found")) {
            throw new CourseNotFoundException(informationElements.text());
        } else {
            throw new HtmlParseException(
                    "Course table not found, but page does not contain message stating no course found.");
        }
    }

    return entry;
}