Example usage for org.jsoup.select Elements first

List of usage examples for org.jsoup.select Elements first

Introduction

In this page you can find the example usage for org.jsoup.select Elements first.

Prototype

public Element first() 

Source Link

Document

Get the first matched element.

Usage

From source file:de.stkl.gbgvertretungsplan.sync.SyncAdapter.java

private Map<String, String> parseGeneralData(Element root, int dataType) {
    Map<String, String> generalData = new HashMap<String, String>();
    // last update time and day
    Element updateTime = root.select("table.mon_head td:eq(2) p").first();
    if (updateTime != null) {
        Pattern pat = Pattern.compile("(Stand: [\\.:0-9 ]+)", Pattern.DOTALL);
        Matcher matcher = pat.matcher(updateTime.text());
        if (matcher.find())
            generalData.put(Sync.GENERAL_DATA_UPDATETIME, matcher.group(1));
    }//from   w  ww.j  a  va 2  s.  co m
    // date the substitution table belongs to
    Element belongingDate = root.select("div.mon_title").first();
    if (belongingDate != null)
        generalData.put(Sync.GENERAL_DATA_DATE, belongingDate.text());

    // daily information
    Elements dailyInfos = root.select("table.info tr");
    int i = 0;
    for (Element info : dailyInfos) {
        Elements e = info.select("td");
        if (e.size() == 0)
            continue;

        String title = "", description = "";
        for (TextNode node : e.first().textNodes())
            title += node.text() + '\n';
        title = title.trim();

        // description only if available
        if (e.size() > 1) {
            for (TextNode node : e.get(1).textNodes())
                description += node.text() + '\n';
            description = title.trim();
        }

        String keyTitle = "", keyDescription = "";
        switch (i) {
        case 0:
            keyTitle = Sync.GENERAL_DATA_DAILYINFO_1_TITLE;
            keyDescription = Sync.GENERAL_DATA_DAILYINFO_1_DESCRIPTION;
            break;
        case 1:
            keyTitle = Sync.GENERAL_DATA_DAILYINFO_2_TITLE;
            keyDescription = Sync.GENERAL_DATA_DAILYINFO_2_DESCRIPTION;
            break;
        case 2:
            keyTitle = Sync.GENERAL_DATA_DAILYINFO_3_TITLE;
            keyDescription = Sync.GENERAL_DATA_DAILYINFO_3_DESCRIPTION;
            break;
        default:
            break;
        }
        if (!keyTitle.equals("")) {
            generalData.put(keyTitle, title);
            generalData.put(keyDescription, description);
        }
        i++;
    }

    generalData.put(Sync.GENERAL_DATA_DATATYPE, String.valueOf(dataType));

    return generalData;
}

From source file:com.github.binlee1990.spider.video.spider.PersonCrawler.java

private void setVideoCount(Document doc, Video video) {
    Elements countElements = doc.select("div#video_favorite_edit span");
    if (CollectionUtils.isNotEmpty(countElements)) {
        Elements countWantedElements = countElements.select("#subscribed a");
        if (CollectionUtils.isNotEmpty(countWantedElements)) {
            String countWanted = countWantedElements.first().text();
            try {
                video.setCountWanted(Integer.valueOf(countWanted));
            } catch (Exception e) {
            }/*from www. j av  a  2 s  .  c om*/
        }

        Elements countWatchedElements = countElements.select("#watched a");
        if (CollectionUtils.isNotEmpty(countWatchedElements)) {
            String countWatched = countWatchedElements.first().text();
            try {
                video.setCountWatched(Integer.valueOf(countWatched));
            } catch (Exception e) {
            }
        }

        Elements countOwnedElements = countElements.select("#owned a");
        if (CollectionUtils.isNotEmpty(countOwnedElements)) {
            String countOwned = countOwnedElements.first().text();
            try {
                video.setCountOwned(Integer.valueOf(countOwned));
            } catch (Exception e) {
            }
        }
    }
}

From source file:org.confab.PhpBB3Parser.java

/**
 * Parses each post for a particular topic.
 * @param  html         Html containing the posts to be parsed 
 * @return              List of Post objects 
 *///w  w w .java2s  .c o  m
public List<Post> parsePosts(Document html, ForumThread parent) {
    Utilities.debug("Starting parsePosts");
    List<Post> ret = new ArrayList<Post>();

    // Each post should have it's own table
    Elements div_posts = html.select("div#posts");
    assert !div_posts.isEmpty();
    Elements posts_table = div_posts.select("table[id~=(post\\d+)]");
    assert !posts_table.isEmpty();

    for (Element el_post : posts_table) {
        Post new_post = new Post(parent);

        // Get post id (id=post\d+)
        new_post.id = el_post.attr("id").replace("post", "").trim();
        assert new_post.id != null;

        // Get post message 
        Elements el_message = el_post.select("div[id~=(post_message_\\d+)]");
        assert !el_message.isEmpty();
        new_post.message = el_message.first().text();
        assert new_post.message != null;
        Utilities.debug("new_post.message: " + new_post.message);

        // Get post author
        Elements el_author = el_post.select(".bigusername");
        assert !el_author.isEmpty();
        new_post.author.username = el_author.first().text();
        assert new_post.author != null;
        Utilities.debug("new_post.author: " + new_post.author);

        ret.add(new_post);
    }

    Utilities.debug("Finished parsePosts");
    return ret;
}

From source file:prince.app.ccm.tools.Task.java

public String getFormParams(String html, String username, String password) throws UnsupportedEncodingException {

    System.out.println("Extracting form's data...");

    Document doc = Jsoup.parse(html);

    // Google form id
    Element loginform = doc.getElementById("contenido_right");
    Elements loginaction = doc.getElementsByTag("form");
    Element form = loginaction.first();
    log = MAIN_PAGE + form.attr("action");
    Log.e(TAG, "Action: " + log);
    Elements inputElements = loginform.getElementsByTag("input");
    List<String> paramList = new ArrayList<String>();
    for (Element inputElement : inputElements) {
        String key = inputElement.attr("name");
        String value = inputElement.attr("value");

        if (key.equals("usuario")) {
            value = username;//from  w w w. j a  va 2 s  . c o  m
            paramList.add(key + "=" + URLEncoder.encode(value, "UTF-8"));
        } else if (key.equals("contrasena")) {
            value = password;
            paramList.add(key + "=" + URLEncoder.encode(value, "UTF-8"));
        }
    }

    // build parameters list
    StringBuilder result = new StringBuilder();
    for (String param : paramList) {
        if (result.length() == 0) {
            result.append(param);
        } else {
            result.append("&" + param);
        }
    }

    Log.d(TAG, "Done in getFormParams: " + result.toString());
    return result.toString();
}

From source file:com.github.binlee1990.transformers.spider.PersonCrawler.java

@Override
public void visit(Page page) {
    int docid = page.getWebURL().getDocid();
    String url = page.getWebURL().getURL();

    logger.info(url);/*from   ww  w  .j a v a  2 s.  com*/
    if (!url.startsWith("http://www.javlibrary.com/cn/?v=jav")) {
        return;
    }

    if (page.getParseData() instanceof HtmlParseData) {
        HtmlParseData htmlParseData = (HtmlParseData) page.getParseData();
        String html = htmlParseData.getHtml();

        Document doc = Jsoup.parse(html);

        String videoIdentificationCode = doc.select("div#video_id td.text").first().text().toString();
        Video queryVideo = new Video();
        queryVideo.setIdentificationCode(videoIdentificationCode);
        Video video = videoMapper.queryByVideo(queryVideo);

        if (null != video) {
            return;
        }

        video = new Video();
        video.setUrl(url);

        Date now = new Date();
        video.setCreateTime(now);
        video.setUpdateTime(now);

        String title = doc.select("div#video_title a").first().text().toString();
        video.setTitle(title);

        video.setIdentificationCode(videoIdentificationCode);

        Elements rdElements = doc.select("div#video_date td.text");
        if (CollectionUtils.isNotEmpty(rdElements)) {
            String releaseDate = rdElements.first().text().toString();
            SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
            try {
                Date date = sdf.parse(releaseDate);
                video.setReleaseDate(date);
            } catch (ParseException e) {
            }
        }

        Elements dmElements = doc.select("div#video_length span.text");
        if (CollectionUtils.isNotEmpty(dmElements)) {
            String durationMinutes = dmElements.first().text().toString();
            video.setDurationMinutes(Integer.valueOf(durationMinutes));
        }

        Elements dElements = doc.select("div#video_director td.text");
        if (CollectionUtils.isNotEmpty(dElements)) {
            String director = dElements.first().text().toString();
            video.setDirector(director);
        }

        Elements pElements = doc.select("div#video_maker td.text");
        if (CollectionUtils.isNotEmpty(pElements)) {
            String producer = pElements.first().text().toString();
            video.setProducer(producer);
        }

        Elements disElements = doc.select("div#video_label td.text");
        if (CollectionUtils.isNotEmpty(disElements)) {
            String distributor = disElements.first().text().toString();
            video.setDistributor(distributor);
        }

        Elements countElements = doc.select("div#video_favorite_edit span");
        if (CollectionUtils.isNotEmpty(countElements)) {
            Elements countWantedElements = countElements.select("#subscribed a");
            if (CollectionUtils.isNotEmpty(countWantedElements)) {
                String countWanted = countWantedElements.first().text();
                try {
                    video.setCountWanted(Integer.valueOf(countWanted));
                } catch (Exception e) {
                }
            }

            Elements countWatchedElements = countElements.select("#watched a");
            if (CollectionUtils.isNotEmpty(countWatchedElements)) {
                String countWatched = countWatchedElements.first().text();
                try {
                    video.setCountWatched(Integer.valueOf(countWatched));
                } catch (Exception e) {
                }
            }

            Elements countOwnedElements = countElements.select("#owned a");
            if (CollectionUtils.isNotEmpty(countOwnedElements)) {
                String countOwned = countOwnedElements.first().text();
                try {
                    video.setCountOwned(Integer.valueOf(countOwned));
                } catch (Exception e) {
                }
            }
        }

        Elements sElements = doc.select("div#video_review td.text span.score");
        if (CollectionUtils.isNotEmpty(sElements)) {
            String score = sElements.first().text().toString();
            score = StringUtils.replace(score, "(", "");
            score = StringUtils.replace(score, ")", "");
            if (StringUtils.isNotBlank(score)) {
                try {
                    video.setScore(Float.valueOf(score));
                } catch (Exception e) {
                }
            }
        }

        Elements actressElements = doc.select("div#video_cast span.star");
        if (CollectionUtils.isNotEmpty(actressElements)) {
            if (actressElements.size() <= 1) {
                video.setSingleFemaleFlag(true);
            } else {
                video.setSingleFemaleFlag(false);
            }
        }

        videoMapper.insertSelective(video);
        int videoId = videoMapper.queryByVideo(video).getId();

        logger.info("handle " + videoId + "\n" + JSON.toJSONString(video));

        if (CollectionUtils.isNotEmpty(actressElements)) {
            actressElements.stream().forEach(a -> {
                String aName = a.text().toString().trim();

                if (StringUtils.isNotBlank(aName)) {
                    Actress queryActress = new Actress();
                    queryActress.setName(aName);
                    Actress actress = actressMapper.queryByActress(queryActress);
                    if (null != actress) {
                        VideoActress va = new VideoActress();
                        va.setActressCode(actress.getCode());
                        va.setVideoId(videoId);
                        videoActressMapper.insertSelective(va);
                    } else {
                        actress = new Actress();
                        actress.setName(aName);
                        actressMapper.insertSelective(actress);
                        int actressId = actressMapper.queryByActress(actress).getId();

                        VideoActress va = new VideoActress();
                        va.setActressCode(actress.getCode());
                        va.setVideoId(videoId);
                        videoActressMapper.insertSelective(va);
                    }
                }
            });
        }

        Elements categoryElements = doc.select("div#video_genres span.genre");
        if (CollectionUtils.isNotEmpty(categoryElements)) {
            categoryElements.stream().forEach(c -> {
                String cDescription = c.text().toString().trim();

                if (StringUtils.isNotBlank(cDescription)) {
                    Category queryCategory = new Category();
                    queryCategory.setSubtype(cDescription);
                    Category category = categoryMapper.queryByCategory(queryCategory);
                    if (null != category) {
                        VideoCategory vc = new VideoCategory();
                        vc.setCategoryId(category.getId());
                        vc.setCategoryDescription(category.getSubtype());
                        vc.setVideoId(videoId);
                        videoCategoryMapper.insertSelective(vc);
                    } else {
                        category = new Category();
                        category.setSubtype(cDescription);
                        categoryMapper.insertSelective(category);
                        int categoryId = categoryMapper.queryByCategory(category).getId();

                        VideoCategory vc = new VideoCategory();
                        vc.setCategoryId(categoryId);
                        vc.setCategoryDescription(category.getSubtype());
                        vc.setVideoId(videoId);
                        videoCategoryMapper.insertSelective(vc);
                    }
                }
            });
        }
    }
}

From source file:org.confab.VBulletinParser.java

public List<Forum> parseForums(Document root, BulletinBoard parent) {
    Utilities.debug("parseForums");

    List<Forum> ret = new ArrayList<Forum>();

    // get table/* w  ww .j  a v  a2 s.  c o m*/
    Elements forum_table = root.select("tbody[id*=collapseobj_forumbit_] tr");
    assert !forum_table.isEmpty();

    for (Element el_tr : forum_table) {
        Forum new_forum = new Forum(parent);

        // Get the table data for this row
        Elements el_tds = el_tr.select("td");
        assert !el_tds.isEmpty() : el_tr.html();

        // xbox360achievements has a lot of subforums and puts these in their own table
        // The <a>'s are picked up as children of the parent <td> so don't parse this sub-
        // tables row's seperatly
        if (!el_tds.select("td.thead").isEmpty() || el_tds.size() < 3) {
            //Utilities.debug("tr doesn't seem to have anything we want, skipping.");
            continue;
        }

        // Get the title URL
        Elements els_a = el_tds.get(1).select("a");
        assert !els_a.isEmpty() : el_tds.html();
        new_forum.url = els_a.first().attr("href");
        assert new_forum.url != null;
        Utilities.debug("new_forum.url : " + new_forum.url);

        // Get the title text
        assert els_a.first() != null;
        new_forum.title = els_a.first().text();
        assert new_forum.title != null;
        Utilities.debug("new_forum.title : " + new_forum.title);

        // Check for any subforums in remaining a elements
        els_a.remove(els_a.first());
        for (Element el_a : els_a) {
            Forum sub_forum = new Forum(parent);
            sub_forum.url = el_a.attr("href");
            assert sub_forum.url != null;
            sub_forum.title = el_a.text();
            assert sub_forum.title != null;
            new_forum.subForums.add(sub_forum);
            Utilities.debug("added subForum: " + sub_forum.title);
        }

        // Get num viewing the current forum
        Element el_viewing = el_tr.select(":matchesOwn((\\d+ Viewing))").first();
        if (el_viewing != null) {
            new_forum.numViewing = el_viewing.text();
        } else {
            new_forum.numViewing = "0";
        }
        Utilities.debug("new_forum.numViewing : " + new_forum.numViewing);

        // Get the description/message of this topic
        Element el_description = el_tds.get(1).select("div.smallfont").first();
        if (el_description != null) {
            new_forum.description = el_description.text();
        } else {
            new_forum.description = "";
        }
        Utilities.debug("new_forum.description : " + new_forum.description);

        Utilities.debug("new_forum.parent.url : " + new_forum.parent.url);

        ret.add(new_forum);
        Utilities.debug("-----");
    }
    Utilities.debug("end parseForums");
    return ret;
}

From source file:eu.masconsult.bgbanking.banks.sgexpress.SGExpressClient.java

@Override
public List<RawBankAccount> getBankAccounts(String authTokenString)
        throws IOException, ParseException, AuthenticationException {
    AuthToken authToken = AuthToken.fromJson(authTokenString);

    String response = loadPageWithAuth(getHttpClient(), authToken, LIST_ACCOUNTS_XML_ID);

    Document doc = Jsoup.parse(response, BASE_URL);

    Element content = doc.getElementById("main");
    if (content == null) {
        throw new ParseException("getBankAccounts: can't find #main");
    }//from  www  . java2  s.com

    Elements tables = content.select("section.result table.data");
    if (tables == null || tables.size() == 0) {
        throw new ParseException("getBankAccounts: can't find table section.result table.data");
    }

    Elements rows = tables.first().getElementsByTag("tr");
    if (rows == null || rows.size() == 0) {
        throw new ParseException("getBankAccounts: first table is empty");
    }

    ArrayList<RawBankAccount> bankAccounts = new ArrayList<RawBankAccount>(rows.size());

    String type = "undef";
    for (Element row : rows) {
        if (row.getElementsByTag("th").size() > 0) {
            // header row
            type = row.child(0).text();
        } else {
            RawBankAccount bankAccount = obtainBankAccountFromHtmlTableRow(type, row);
            if (bankAccount != null) {
                bankAccounts.add(bankAccount);
            }
        }
    }

    return bankAccounts;
}

From source file:org.jresponder.message.MessageRefImpl.java

/**
 * Render a message in the context of a particular subscriber
 * and subscription./* www . j  a va  2s  . com*/
 */
@Override
public boolean populateMessage(MimeMessage aMimeMessage, SendConfig aSendConfig, Subscriber aSubscriber,
        Subscription aSubscription) {

    try {

        // prepare context
        Map<String, Object> myRenderContext = new HashMap<String, Object>();
        myRenderContext.put("subscriber", aSubscriber);
        myRenderContext.put("subscription", aSubscription);
        myRenderContext.put("config", aSendConfig);
        myRenderContext.put("message", this);

        // render the whole file
        String myRenderedFileContents = TextRenderUtil.getInstance().render(fileContents, myRenderContext);

        // now parse again with Jsoup
        Document myDocument = Jsoup.parse(myRenderedFileContents);

        String myHtmlBody = "";
        String myTextBody = "";

        // html body
        Elements myBodyElements = myDocument.select("#htmlbody");
        if (!myBodyElements.isEmpty()) {
            myHtmlBody = myBodyElements.html();
        }

        // text body
        Elements myJrTextBodyElements = myDocument.select("#textbody");
        if (!myJrTextBodyElements.isEmpty()) {
            myTextBody = TextUtil.getInstance().getWholeText(myJrTextBodyElements.first());
        }

        // now build the actual message
        MimeMessage myMimeMessage = aMimeMessage;
        // wrap it in a MimeMessageHelper - since some things are easier with that
        MimeMessageHelper myMimeMessageHelper = new MimeMessageHelper(myMimeMessage);

        // set headers

        // subject
        myMimeMessageHelper.setSubject(TextRenderUtil.getInstance()
                .render((String) propMap.get(MessageRefProp.JR_SUBJECT.toString()), myRenderContext));

        // TODO: implement DKIM, figure out subetha

        String mySenderEmailPattern = aSendConfig.getSenderEmailPattern();
        String mySenderEmail = TextRenderUtil.getInstance().render(mySenderEmailPattern, myRenderContext);
        myMimeMessage.setSender(new InternetAddress(mySenderEmail));

        myMimeMessageHelper.setTo(aSubscriber.getEmail());

        // from
        myMimeMessageHelper.setFrom(
                TextRenderUtil.getInstance()
                        .render((String) propMap.get(MessageRefProp.JR_FROM_EMAIL.toString()), myRenderContext),
                TextRenderUtil.getInstance()
                        .render((String) propMap.get(MessageRefProp.JR_FROM_NAME.toString()), myRenderContext));

        // see how to set body

        // if we have both text and html, then do multipart
        if (myTextBody.trim().length() > 0 && myHtmlBody.trim().length() > 0) {

            // create wrapper multipart/alternative part
            MimeMultipart ma = new MimeMultipart("alternative");
            myMimeMessage.setContent(ma);
            // create the plain text
            BodyPart plainText = new MimeBodyPart();
            plainText.setText(myTextBody);
            ma.addBodyPart(plainText);
            // create the html part
            BodyPart html = new MimeBodyPart();
            html.setContent(myHtmlBody, "text/html");
            ma.addBodyPart(html);
        }

        // if only HTML, then just use that
        else if (myHtmlBody.trim().length() > 0) {
            myMimeMessageHelper.setText(myHtmlBody, true);
        }

        // if only text, then just use that
        else if (myTextBody.trim().length() > 0) {
            myMimeMessageHelper.setText(myTextBody, false);
        }

        // if neither text nor HTML, then the message is being skipped,
        // so we just return null
        else {
            return false;
        }

        return true;

    } catch (MessagingException e) {
        throw new RuntimeException(e);
    } catch (UnsupportedEncodingException e) {
        throw new RuntimeException(e);
    }

}

From source file:tkbautobooking.BookingSystem.java

private void praseLoginPage() throws Exception {

    Document doc = Jsoup.parse(LoginPageHTML);
    Elements token_elm = doc.getElementsByAttributeValue("name", "access_token");

    if (token_elm.isEmpty() || token_elm.size() > 1 || !token_elm.first().hasAttr("value"))
        throw new Exception("Prase Login Page fail !");

    login_hidden_token = token_elm.first().attr("value");

}

From source file:net.devietti.ArchConfMapServlet.java

/** Fetch info for a list of conferences from WikiCFP */
private List<Conf> getConfInfo(List<String> confs) throws IOException {
    String query = StringUtils.join(confs, "+");
    List<Conf> results = new LinkedList<Conf>();

    /*//from ww  w  .jav a  2s  .  c o m
     * NB: year=f returns hits for this year and future years. This is exactly what we want, since
     * we automatically discard conferences that have already happened.
     */
    Document doc = getURL("http://www.wikicfp.com/cfp/servlet/tool.search?year=f&q=" + query);

    Elements rows = doc.select("div[class=contsec] table table tr");
    for (Iterator<Element> iter = rows.iterator(); iter.hasNext();) {
        final Element firstRow = iter.next();
        final Elements confName = firstRow.select("td a");
        if (confName.isEmpty())
            continue;

        final Conf conf = new Conf();

        // make sure we match one of the conferences we're interested in
        String cn = confName.first().text().split(" ")[0];
        int found = Arrays.binarySearch(CONFERENCE_NAMES, cn);
        if (found < 0)
            continue; // not found

        final String confFullName = firstRow.select("td").get(1).text();
        // don't match other ICS conferences, eg Information, Communication, Society
        if (CONFERENCE_NAMES[found].equals("ICS")) {
            if (!confFullName.toLowerCase().contains("supercomputing")) {
                continue;
            }
        }
        // don't match other CC conferences, eg Creative Construction
        if (CONFERENCE_NAMES[found].equals("CC")) {
            if (!confFullName.toLowerCase().contains("compiler")) {
                continue;
            }
        }

        conf.name = confName.first().text();

        /*
         * we found a hit! The conference information is split across two <tr> table elements.
         * Conference name and link to cfp are in the first <tr>, and dates, location and deadline
         * in the second.
         */

        final Element secondRow = iter.next();
        String dates = secondRow.select("td").first().text();
        String startDate = dates.substring(0, dates.indexOf('-')).trim();
        conf.start = cfpDateFormat.parseDateTime(startDate);
        conf.end = cfpDateFormat.parseDateTime(dates.substring(dates.indexOf('-') + 1).trim());

        conf.dates = cfpDateFormat.print(conf.start) + " - " + cfpDateFormat.print(conf.end);
        if (conf.start.year().equals(conf.end.year())
                && conf.start.monthOfYear().equals(conf.end.monthOfYear())) {
            conf.dates = monthFormat.print(conf.start) + " " + dayFormat.print(conf.start) + "-"
                    + dayFormat.print(conf.end) + " " + yearFormat.print(conf.start);
        }

        String deadline = secondRow.select("td").get(2).text().trim();
        if (deadline.contains("(")) { // abstract deadline may be in parentheses
            deadline = deadline.substring(0, deadline.indexOf('(')).trim();
        }
        conf.deadline = cfpDateFormat.parseDateTime(deadline);

        conf.url = "http://www.wikicfp.com" + confName.attr("href");
        /*
         * extract the WikiCFP eventid from the link, so that, later on, the client can pull the
         * cfp page and get the direct conference site link.
         */

        com.shopobot.util.URL url = new com.shopobot.util.URL(conf.url);
        String[] eid = url.getParameters("eventid");
        if (0 == eid.length)
            continue;
        try {
            conf.eventid = Integer.valueOf(eid[0]);
        } catch (NumberFormatException e) {
            error("invalid event id " + eid);
            continue;
        }

        conf.location = secondRow.select("td").get(1).text();

        results.add(conf);
    }
    return results;
}