Example usage for org.jsoup.nodes Document select

List of usage examples for org.jsoup.nodes Document select

Introduction

In this page you can find the example usage for org.jsoup.nodes Document select.

Prototype

public Elements select(String cssQuery) 

Source Link

Document

Find elements that match the Selector CSS query, with this element as the starting context.

Usage

From source file:com.amastigote.xdu.query.module.EduSystem.java

private @Nullable JSONObject lessonsQuery() throws IOException, JSONException {
    if (!checkIsLogin(ID))
        return null;

    URL url = new URL(SYS_HOST + "xkAction.do?actionType=6");
    HttpURLConnection httpURLConnection = (HttpURLConnection) url.openConnection();
    httpURLConnection.setRequestProperty("Cookie", "JSESSIONID=" + SYS_JSESSIONID);
    httpURLConnection.connect();/* ww  w  .j  av  a2s  . co m*/

    Document document = Jsoup.parse(httpURLConnection.getInputStream(), "gb2312",
            httpURLConnection.getURL().toString());
    document = Jsoup.parse(document.toString().replaceAll(" ", ""));
    Elements lessons = document.select("table[class=titleTop2]");
    Element lessonsElement = lessons.get(1);

    Elements lessonsInfo = lessonsElement.select("tr[onmouseout=this.className='even';]");
    int lessons_quantity = lessonsInfo.size();
    JSONArray jsonArray = new JSONArray();

    for (int i = 0; i < lessons_quantity;) {
        Element lessonInfo = lessonsInfo.get(i);
        Elements lessonDetails = lessonInfo.select("td");

        //
        if (lessonDetails.get(14).text().equals("")) {
            i++;
            continue;
        }

        JSONObject JLessonObject = new JSONObject();
        JLessonObject.put(CourseKey.ID, lessonDetails.get(1).text());
        JLessonObject.put(CourseKey.NAME, lessonDetails.get(2).text());
        JLessonObject.put(CourseKey.CREDIT, lessonDetails.get(4).text());
        JLessonObject.put(CourseKey.LENGTH, lessonDetails.get(5).text());
        JLessonObject.put(CourseKey.ATTR, lessonDetails.get(6).text());
        JLessonObject.put(CourseKey.EXAM_TYPE, lessonDetails.get(7).text());
        JLessonObject.put(CourseKey.TEACHER, lessonDetails.get(8).text());

        JSONArray JLessonTimeAndPosArray = new JSONArray();
        JSONObject JLessonTimeAndPos = new JSONObject();

        JLessonTimeAndPos.put(CourseKey.WEEK, lessonDetails.get(12).text());
        JLessonTimeAndPos.put(CourseKey.WEEK_DAY, lessonDetails.get(13).text());
        JLessonTimeAndPos.put(CourseKey.SECTION_TIME, lessonDetails.get(14).text());
        JLessonTimeAndPos.put(CourseKey.SECTION_LENGTH, lessonDetails.get(15).text());
        JLessonTimeAndPos.put(CourseKey.CAMPUS, lessonDetails.get(16).text());
        JLessonTimeAndPos.put(CourseKey.BUILDING, lessonDetails.get(17).text());
        JLessonTimeAndPos.put(CourseKey.CLASSROOM, lessonDetails.get(18).text());

        JLessonTimeAndPosArray.put(JLessonTimeAndPos);

        i++;

        //??Array
        int row_span;

        //row_span?1
        if ("".equals(lessonInfo.select("td").get(0).attr("rowspan"))) {
            row_span = 1;
        } else {
            row_span = Integer.parseInt(lessonInfo.select("td").get(0).attr("rowspan"));
        }

        //row_span?1??
        for (int j = 0; j < row_span - 1; j++, i++) {
            Elements EExtraTimeAndPos = lessonsInfo.get(i).select("td");
            JSONObject JExtraLessonTimeAndPos = new JSONObject();

            JExtraLessonTimeAndPos.put(CourseKey.WEEK, EExtraTimeAndPos.get(0).text());
            JExtraLessonTimeAndPos.put(CourseKey.WEEK_DAY, EExtraTimeAndPos.get(1).text());
            JExtraLessonTimeAndPos.put(CourseKey.SECTION_TIME, EExtraTimeAndPos.get(2).text());
            JExtraLessonTimeAndPos.put(CourseKey.SECTION_LENGTH, EExtraTimeAndPos.get(3).text());
            JExtraLessonTimeAndPos.put(CourseKey.CAMPUS, EExtraTimeAndPos.get(4).text());
            JExtraLessonTimeAndPos.put(CourseKey.BUILDING, EExtraTimeAndPos.get(5).text());
            JExtraLessonTimeAndPos.put(CourseKey.CLASSROOM, EExtraTimeAndPos.get(6).text());

            JLessonTimeAndPosArray.put(JExtraLessonTimeAndPos);
        }

        JLessonObject.put(CourseKey.TIME_AND_LOCATION_DERAIL, JLessonTimeAndPosArray);
        jsonArray.put(JLessonObject);
    }

    return new JSONObject().put("ARRAY", jsonArray);
}

From source file:com.muzima.view.forms.HTMLFormWebViewActivity.java

private String prePopulateData() {
    if (formData.getJsonPayload() == null) {
        return formTemplate.getHtml();
    }/*from  www  .  j  a v  a 2s .c om*/
    Document document = Jsoup.parse(formTemplate.getHtml());
    String json = formData.getJsonPayload();
    String htmlWithJSON = "<div id='pre_populate_data'>" + json + "</div>";
    document.select("body").prepend(htmlWithJSON);
    return document.toString();
}

From source file:com.amastigote.xdu.query.module.EduSystem.java

private @Nullable JSONObject gradesQuery() throws IOException, JSONException {
    if (!checkIsLogin(ID)) {
        return null;
    }/* w  w w  .j a va 2  s .co m*/

    URL url = new URL(SYS_HOST + GRADE_QUERY_SUFFIX);
    HttpURLConnection httpURLConnection = (HttpURLConnection) url.openConnection();
    httpURLConnection.setRequestProperty("Cookie", "JSESSIONID=" + SYS_JSESSIONID);
    httpURLConnection.connect();

    Document document = Jsoup.parse(httpURLConnection.getInputStream(), "gb2312",
            httpURLConnection.getURL().toString());
    document = Jsoup.parse(document.toString().replaceAll("&nbsp;", ""));

    JSONObject jsonObject = new JSONObject();
    Elements elements_content = document.select("td[class=pageAlign]");
    Elements elements_titles = document.select("b");
    for (int i = 0; i < elements_titles.size(); i++) {
        JSONObject jsonObject_semester = new JSONObject();
        String semester_key = elements_titles.get(i).text().trim();

        Element table_for_this_semester = elements_content.get(i);
        Elements elements_rows = table_for_this_semester.select("td[align=center]");

        for (int j = 0; j < elements_rows.size() / 7; j++) {
            JSONObject jsonObject_course = new JSONObject();
            String course_key = elements_rows.get(j * 7 + 2).text().trim();

            jsonObject_course.put(GradeKey.ID, elements_rows.get(j * 7).text().trim());
            jsonObject_course.put(GradeKey.CREDIT, elements_rows.get(j * 7 + 4).text().trim());
            jsonObject_course.put(GradeKey.ATTR, elements_rows.get(j * 7 + 5).text().trim());
            jsonObject_course.put(GradeKey.GRADE, elements_rows.get(j * 7 + 6).text().trim());
            jsonObject_semester.put(course_key, jsonObject_course);
        }
        jsonObject.put(semester_key, jsonObject_semester);
    }
    return jsonObject;
}

From source file:abelymiguel.miralaprima.GetPrima.java

private HashMap<String, Float> getPrimaDataDMacro(String country_code, String providerUrl, String indexName)
        throws IOException {
    HashMap<String, Float> respuestaJson = new HashMap<String, Float>();
    HashMap<String, Object> primaJson;

    Float prima_value;/*from ww  w  . ja v  a  2 s .  c o  m*/
    Float prima_delta;
    Float prima_percent;

    Document doc;
    doc = Jsoup.connect(providerUrl + indexName).get();

    try {
        Element riskPremium = doc.select(".numero").first();
        //                System.out.println("Prima: " + riskPremium.text());
        prima_value = Float.valueOf(riskPremium.text().replace(".", "")).floatValue();

        Element riskDelta = doc.select(".text-success").first();
        String deltaStr = riskDelta.text().substring(riskDelta.text().indexOf(">") + 1);
        prima_delta = Float.valueOf(deltaStr).floatValue();
        //                System.out.println("Trending delta: " + prima_delta);

        String percentStr;
        prima_percent = 100 * prima_delta / (prima_value - prima_delta);
        DecimalFormat df = new DecimalFormat("0.00");
        percentStr = df.format(prima_percent);
        prima_percent = Float.valueOf(percentStr).floatValue();
        //                System.out.println("Trending prima_percent: " + prima_percent);

        respuestaJson.put("prima_value", prima_value);
        respuestaJson.put("prima_delta", prima_delta);
        respuestaJson.put("prima_percent", prima_percent);

        if (isSameDay(country_code)) {
            this.updatePrimaInDB(prima_value, prima_delta, prima_percent,
                    this.getLatestPrimaIdFromDB(country_code));

        } else {
            this.storePrimaInDB(prima_value, prima_delta, prima_percent, country_code);
        }
    } catch (Exception ex) {
        Logger.getLogger(GetPrima.class.getName()).log(Level.SEVERE, null, ex);
        primaJson = getLatestPrimaFromDB(country_code);
        respuestaJson.put("prima_value", (Float) primaJson.get("prima_value"));
        respuestaJson.put("prima_delta", (Float) primaJson.get("prima_delta"));
        respuestaJson.put("prima_percent", (Float) primaJson.get("prima_percent"));
    }

    return respuestaJson;

}

From source file:se.vgregion.portal.iframe.controller.CSViewController.java

private Map<String, String> lookupDynamicValue(PortletConfig portletConfig) {
    Map<String, String> dynamicFieldValueMap = new HashMap<String, String>();
    try {//from w w w .j a v  a2s. c  o  m
        String dynamicFieldString = portletConfig.getDynamicField();
        String[] dynamicFields = dynamicFieldString.replace(" ", "").split(",");
        String dynamicValue;
        for (String dynamicField : dynamicFields) {

            Document doc = getDynamicFieldsDocument(portletConfig);
            dynamicValue = doc.select("body").get(0).getElementsByAttributeValue("name", dynamicField).get(0)
                    .attr("value").replaceAll("\n\r", "").replaceAll("\r", "").replaceAll("\n", "");
            if (dynamicValue.contains("<>")) {
                throw new Exception("Invalid value format [" + dynamicValue + "]");
            }
            dynamicFieldValueMap.put(dynamicField, dynamicValue);
        }

        if (portletConfig.isRdEncode()) {
            addSpecialFieldForRaindance(dynamicFieldValueMap, portletConfig);
        }

        return dynamicFieldValueMap;
    } catch (Exception e) {
        LOGGER.error(e.getMessage(), e);
        return Collections.emptyMap();
    } finally {
        // Remove the ThreadLocal so we don't reuse it in subsequent request which is in a reused thread.
        dynamicFieldsDocThreadLocal.remove();
    }
}

From source file:me.vertretungsplan.parser.DSBMobileParser.java

private void loadScheduleFromUrl(SubstitutionSchedule v, String url, List<String> usedUrls)
        throws IOException, JSONException, CredentialInvalidException, IncompatibleScheduleException {
    usedUrls.add(url);//  w w  w  .jav a2s .  c  o m
    String html = httpGet(url, data.has(PARAM_ENCODING) ? data.optString(PARAM_ENCODING, null) : "UTF-8");
    Document doc = Jsoup.parse(html);

    if (doc.title().toLowerCase().contains("untis") || doc.html().toLowerCase().contains("untis")
            || data.optString(PARAM_TYPE, "").equals("untis")) {
        parseMultipleMonitorDays(v, doc, data);
    } else if (doc.html().toLowerCase().contains("created by davinci")
            || data.optString(PARAM_TYPE, "").equals("davinci")) {
        Elements titles = doc.select("h2");
        Elements tables = doc.select("h2 + p + table");
        if (titles.size() != tables.size())
            throw new IOException("Anzahl berschriften != Anzahl Tabellen");
        for (int i = 0; i < titles.size(); i++) {
            SubstitutionScheduleDay day = new SubstitutionScheduleDay();
            String date = titles.get(i).text();
            day.setDateString(date);
            day.setDate(ParserUtils.parseDate(date));
            DaVinciParser.parseDaVinciTable(tables.get(i), v, day, colorProvider);
            v.addDay(day);
        }
    } else if (doc.select(".tdaktionen").size() > 0 || data.optString(PARAM_TYPE, "").equals("indiware")) {
        new IndiwareParser(scheduleData, cookieProvider).parseIndiwarePage(v, doc.html());
    } else if (doc.text().matches(".*Fr diesen Bereich.*wurde kein Inhalt bereitgestellt\\.")) {
        return;
    } else {
        throw new IncompatibleScheduleException();
    }

    if (doc.select("meta[http-equiv=refresh]").size() > 0) {
        Element meta = doc.select("meta[http-equiv=refresh]").first();
        String attr = meta.attr("content").toLowerCase();
        String redirectUrl = url.substring(0, url.lastIndexOf("/") + 1)
                + attr.substring(attr.indexOf("url=") + 4);
        if (!usedUrls.contains(redirectUrl)) {
            loadScheduleFromUrl(v, redirectUrl, usedUrls);
        }
    }
}

From source file:gui.InboxPanel.java

private String getPlain() {
    String html = BodyTextPane.getText();
    Document doc = Jsoup.parseBodyFragment(html);
    doc.select("ds").remove();

    System.out.println("DS nya diilangin : \n" + doc.select("body").html());

    return doc.select("body").html();
}

From source file:com.github.binlee1990.spider.movie.spider.MovieCrawler.java

private void setFilmCover(Document doc, Film film) {
    Elements coverElements = doc.select(".fm-intro img[src]");
    if (CollectionUtils.isNotEmpty(coverElements)) {
        Element coverElement = coverElements.get(0);
        String coverUrl = coverElement.attr("src").toString();
        if (StringUtils.isNotBlank(coverUrl)) {
            film.setCoverUrl(coverUrl);//from   w  ww . ja va 2 s .c  o m
        }
    }
}

From source file:gui.InboxPanel.java

private String getSign() {
    String html = BodyTextPane.getText();
    Document doc = Jsoup.parseBodyFragment(html);
    if (doc.select("ds") != null) {
        //System.out.println("Signature : \n" + doc.select("ds").text());
        return doc.select("ds").text();
    }/*from   w w  w . j  a v a2s .  c  o m*/
    return null;
}

From source file:com.fluidops.iwb.provider.HTMLProvider.java

@Override
public void gather(List<Statement> res) throws Exception {

    String url = config.url;/* w  w w.  j a va  2 s  .  c  om*/
    Document doc = Jsoup.connect(url).get();
    Elements links = doc.select("a[href]");
    Elements media = doc.select("[src]");
    Elements imports = doc.select("link[href]");
    // Elements article =
    // doc.select("div.wrapper").select("div.box-shadow").select("div#content.cols").select("div.cl").select("div.crm").select("article").select("section.article").select("div.textblock").select("table");
    Elements article = doc.getElementsByTag("tbody").select("tr");
    Elements tableElem;
    URI nameURI = null;
    URI roadsURI = null;
    URI sideURI = null;
    URI totalURI = null;

    File file = new File("HTMLdata.txt");
    PrintWriter out = new PrintWriter(new BufferedWriter(new FileWriter(file)));

    out.println("Media");
    print("\nMedia: (%d)", media.size());
    for (Element el : media) {
        if (el.tagName().equals("img")) {
            print(" * %s: <%s> %sx%s (%s)", el.tagName(), el.attr("abs:src"), el.attr("width"),
                    el.attr("height"), trim(el.attr("alt"), 20));
            out.printf(" \n * %s: <%s> %sx%s (%s)", el.tagName(), el.attr("abs:src"), el.attr("width"),
                    el.attr("height"), trim(el.attr("alt"), 20));
            out.println();
        } else {
            print(" * %s: <%s>", el.tagName(), el.attr("abs:src"));
            out.printf(" \n * %s: <%s>", el.tagName(), el.attr("abs:src"));
            out.println();
        }

    }

    out.println("Imports");
    print("\nImports: (%d)", imports.size());
    for (Element link : imports) {
        print(" * %s <%s> (%s)", link.tagName(), link.attr("abs:href"), link.attr("rel"));
        out.printf(" * %s <%s> (%s)", link.tagName(), link.attr("abs:href"), link.attr("rel"));
        out.println();
    }

    out.println("Links");
    print("\nLinks: (%d)", links.size());
    for (Element link : links) {
        print(" * a: <%s> (%s)", link.attr("abs:href"), trim(link.text(), 35));
        out.printf(" * a: <%s> (%s)", link.attr("abs:href"), link.text());
        out.println();
    }

    /*
     * out.println("Custom text"); print("\nCustom: (%d)",customArt.size());
     * for (Element custom:customArt){
     * out.printf(" * a (%s): (%s)",custom.tagName(),custom.text());
     * out.println(); }
     */

    out.println("Article");
    print("\nArticle: (%d)", article.size());

    for (int i = 3; i < article.size() - 2; i++) {
        tableElem = article.get(i).select("td");
        out.println();

        if (i == 3) {
            nameURI = ProviderUtils.objectToUri(tableElem.get(0).text());
            roadsURI = ProviderUtils.objectToUri(tableElem.get(1).text());
            sideURI = ProviderUtils.objectToUri(tableElem.get(2).text());
            totalURI = ProviderUtils.objectToUri(tableElem.get(3).text());

        } else {

            res.add(ProviderUtils.createStatement(ProviderUtils.objectToUri(tableElem.get(0).text()), RDF.TYPE,
                    nameURI));
            res.add(ProviderUtils.createLiteralStatement(ProviderUtils.objectToUri(tableElem.get(0).text()),
                    RDFS.LABEL, tableElem.get(0).text()));
            res.add(ProviderUtils.createLiteralStatement(ProviderUtils.objectToUri(tableElem.get(0).text()),
                    roadsURI, tableElem.get(1).text()));
            res.add(ProviderUtils.createLiteralStatement(ProviderUtils.objectToUri(tableElem.get(0).text()),
                    sideURI, tableElem.get(2).text()));
            res.add(ProviderUtils.createLiteralStatement(ProviderUtils.objectToUri(tableElem.get(0).text()),
                    totalURI, tableElem.get(3).text()));

            for (Element el : tableElem) {
                out.printf("\n * (%s): (%s)", el.tagName(), el.text());
                out.println();

            }
        }
        out.println();
        out.printf("\n * a (%s) (%d): (%s)", article.get(i).tagName(), tableElem.size(), article.get(i).text());
        out.println();
    }
    out.close();
}