Example usage for org.jsoup.nodes Element select

List of usage examples for org.jsoup.nodes Element select

Introduction

In this page you can find the example usage for org.jsoup.nodes Element select.

Prototype

public Elements select(String cssQuery) 

Source Link

Document

Find elements that match the Selector CSS query, with this element as the starting context.

Usage

From source file:lolthx.autohome.buy.AutohomePriceListFetch.java

@Override
public void parse(String result, Task task) throws Exception {
    if (StringUtils.isBlank(result)) {
        return;/*w w w  .  jav a2 s.co  m*/
    }

    Date start = task.getStartDate();
    Date end = task.getEndDate();

    Document doc = Jsoup.parse(result);
    Elements lis = doc.select("li.price-item");

    AutohomePriceInfoBean bean = new AutohomePriceInfoBean();

    for (Element li : lis) {

        try {
            Elements postTimeEl = li.select("div.user-name span");
            String postTime = "";
            if (!postTimeEl.isEmpty()) {
                postTime = StringUtils.trim(
                        StringUtils.substringBefore(postTimeEl.first().text(), "?").replaceAll("", ""));

                if (!isTime(postTime, start, end)) {
                    continue;
                }
            }
            bean.setPostTime(postTime);
            bean.setUrl(task.getUrl());
            bean.setForumId(StringUtils.substringBefore(task.getExtra(), ":"));
            bean.setProjectName(task.getProjectName());
            bean.setKeyword(StringUtils.substringAfter(task.getExtra(), ":"));

            // post id
            Elements id = li.select("div.price-share a.share");
            if (!id.isEmpty()) {
                String idStr = id.first().attr("data-target");
                idStr = StringUtils.substringAfterLast(idStr, "_");
                if (StringUtils.isBlank(idStr)) {
                    continue;
                }

                bean.setId(idStr);
            }

            // 
            Elements user = li.select("div.user-name a");
            if (!user.isEmpty()) {
                String userUrl = user.first().absUrl("href");
                String userId = StringUtils.substringAfterLast(userUrl, "/");
                String userName = user.first().text();

                bean.setUserId(userId);
                bean.setUserUrl(userUrl);
                bean.setUserName(userName);
            }

            Elements dataLis = li.select("div.price-item-bd li");
            for (Element dataLi : dataLis) {
                String data = dataLi.text();

                if (StringUtils.startsWith(data, "")) {
                    bean.setCar(StringUtils.trim(StringUtils.substringAfter(data, "")));
                }

                if (StringUtils.startsWith(data, "")) {
                    bean.setPrice(StringUtils.trim(StringUtils.substringAfter(data, "")));
                }

                if (StringUtils.startsWith(data, "")) {
                    bean.setGuidePrice(StringUtils.trim(StringUtils.substringAfter(data, "")));
                }

                if (StringUtils.startsWith(data, "?")) {
                    bean.setTotalPrice(StringUtils.trim(StringUtils.substringAfter(data, "")));
                }

                if (StringUtils.startsWith(data, "")) {
                    bean.setPurchaseTax(StringUtils.trim(StringUtils.substringAfter(data, "")));
                }

                if (StringUtils.startsWith(data, "?")) {
                    bean.setCommercialInsurance(StringUtils.trim(StringUtils.substringAfter(data, "")));
                }

                if (StringUtils.startsWith(data, "")) {
                    bean.setVehicleUseTax(StringUtils.trim(StringUtils.substringAfter(data, "")));
                }
                if (StringUtils.startsWith(data, "")) {
                    bean.setCompulsoryInsurance(StringUtils.trim(StringUtils.substringAfter(data, "")));
                }
                if (StringUtils.startsWith(data, "")) {
                    bean.setLicenseFee(StringUtils.trim(StringUtils.substringAfter(data, "")));
                }
                if (StringUtils.startsWith(data, "?")) {
                    bean.setPromotion(StringUtils.trim(StringUtils.substringAfter(data, "")));
                }
                if (StringUtils.startsWith(data, "")) {
                    bean.setBuyTime(StringUtils.trim(StringUtils.substringAfter(data, "")));
                }
                if (StringUtils.startsWith(data, "")) {
                    String area = StringUtils.trim(StringUtils.substringAfter(data, ""));
                    String[] pAndC = StringUtils.splitByWholeSeparator(area, ",", 2);

                    if (pAndC.length == 1) {
                        bean.setBuyProvince(pAndC[0]);
                        bean.setBuyCity(pAndC[0]);
                    }

                    if (pAndC.length == 2) {
                        bean.setBuyProvince(pAndC[0]);
                        bean.setBuyCity(pAndC[1]);
                    }

                }
                if (StringUtils.startsWith(data, "")) {
                    Elements level = dataLi.select("span.level");
                    // 
                    if (!level.isEmpty()) {
                        bean.setSellerComment(level.first().text());
                    }

                    // ?
                    Elements seller = dataLi.select("a.title");
                    if (!seller.isEmpty()) {
                        String sellerUrl = seller.first().absUrl("href");
                        String sellerName = seller.first().text();
                        String sellerId = StringUtils.substringAfterLast(sellerUrl, "/");

                        bean.setSellerId(sellerId);
                        bean.setSellerName(sellerName);
                        bean.setSellerUrl(sellerUrl);
                    }

                    // ?
                    Elements sellerPhone = dataLi.select("em.phone-num");
                    if (!sellerPhone.isEmpty()) {
                        bean.setSellerPhone(sellerPhone.first().text());
                    }

                    // ?
                    // Elements sellerAddress =
                    // dataLi.select("em.phone-num");

                }
                if (StringUtils.startsWith(data, "?")) {
                    bean.setBuyFeeling(StringUtils.trim(StringUtils.substringAfter(data, "")));
                }
            }
            bean.saveOnNotExist();
        } catch (Exception e) {
            e.printStackTrace();
            continue;
        }
    }
}

From source file:me.rkfg.xmpp.bot.plugins.CoolStoryPlugin.java

private String fetchStory(Website website) throws IOException {
    int roll = 0;
    String result;//  w  w w  . j a  v  a  2  s . c  o  m
    int resultLength;
    int resultLines;

    //noinspection ConstantConditions
    do {
        roll++;

        final Document doc = Jsoup.connect(website.getUrlString()).userAgent(DEFAULT_UA).get();
        doc.outputSettings(new Document.OutputSettings().prettyPrint(false));
        logger.info("Fetched a story from {}", doc.location());

        final Element story = doc.select(website.getCssQuery()).first();
        if (story == null) {
            return ERROR_COULD_NOT_PARSE;
        }

        story.select("div").remove();
        story.select("img").forEach(img -> img.replaceWith(new TextNode(img.attr("src"), "")));
        story.select("br").after("\\n");
        story.select("p").before("\\n\\n");
        final String storyHtml = story.html().replaceAll("\\\\n", "\n");

        result = Jsoup.clean(storyHtml, "", Whitelist.none(), new Document.OutputSettings().prettyPrint(false))
                .trim();
        resultLength = result.length();
        resultLines = countLines(result);

    } while (CONFIG_REROLL_LONG_STORIES
            && (resultLength > CONFIG_MAX_STORY_LENGTH || resultLines > CONFIG_MAX_STORY_LINES)
            && roll <= CONFIG_MAX_ROLLS);

    return result;
}

From source file:me.vertretungsplan.parser.SVPlanParser.java

private void setDate(Element svp, Document doc, SubstitutionScheduleDay day) {
    String date = "Unbekanntes Datum";
    if (svp.select(".svp-plandatum-heute, .svp-plandatum-morgen, .Titel").size() > 0) {
        date = svp.select(".svp-plandatum-heute, .svp-plandatum-morgen, .Titel").text()
                .replaceAll("Vertretungsplan (fr )?", "").trim();
    } else if (doc.title().startsWith("Vertretungsplan fr ")) {
        date = doc.title().substring("Vertretungsplan fr ".length());
    }//  w  w  w  .  j a  va2  s  .c om
    date = date.replaceAll("\\s+", " ");
    day.setDateString(date);
    day.setDate(ParserUtils.parseDate(date));
    if (svp.select(".svp-uploaddatum, .Stand").size() > 0) {
        String lastChange = svp.select(".svp-uploaddatum, .Stand").text().replace("Aktualisierung: ", "")
                .replace("Stand: ", "");
        day.setLastChangeString(lastChange);
        day.setLastChange(ParserUtils.parseDateTime(lastChange));
    }
}

From source file:fr.arlefebvre.pronostics.controller.UEFATeamsController.java

@RequestMapping("/uefa/teams")
public List<Team> teams() {
    if (pseudoCache != null && !pseudoCache.isEmpty())
        return pseudoCache;
    ArrayList<Team> result = new ArrayList<Team>();
    String uri = "http://fr.fifa.com/fifa-world-ranking/ranking-table/men/uefa.html";

    //On se connecte au site et on charge le document html

    Document doc;//from   w w  w .j a  va  2  s  .c o m
    try {
        doc = Jsoup.connect(uri).get();
        Elements elements = doc.getElementsByClass("table");
        for (Element element : elements) {
            Element tbody = element.getElementsByTag("tbody").first();
            for (Element child : tbody.children()) {
                Element teamNameElement = child.getElementsByClass("tbl-teamname").first();
                String name = teamNameElement.text();
                String countryCode = child.getElementsByClass("tbl-countrycode").first().text();
                String imgUrl = teamNameElement.select("img").first().absUrl("src");
                Team team = new Team();
                team.setName(name);
                team.setCountryCode(countryCode);
                team.setImgUrl(imgUrl);
                team.setNationalTeam(true);
                result.add(team);
            }
        }

        //String titre =  element.text();
    } catch (IOException e) {
        e.printStackTrace();
    }

    //        RestTemplate restTemplate = new RestTemplate();
    //        ResponseEntity<ChampionListDto> response = restTemplate.getForEntity(
    //                uri,
    //                ChampionListDto.class);
    //
    //        List<ChampionDto> champions = response.getBody().getChampions();
    //        return champions.stream().map(c -> getChampionById(c.getId()).getName()).collect(Collectors.toList());
    result.sort((t1, t2) -> t1.getName().compareTo(t2.getName()));
    if (pseudoCache == null)
        pseudoCache = result;
    return result;
}

From source file:de.stkl.gbgvertretungsplan.sync.SyncAdapter.java

private List<List<String>> parseRows(Element root) {
    Element table = root.select("table.mon_list").first();
    // each row has categories.size() categories, build a two dimensional array:
    // <row-index><category-index> = <value>
    // rows[0] is the name of the class, if multiple classes are set there, split them (separator: ,)
    List<List<String>> allRows = new ArrayList<List<String>>();
    Elements rows = table.select("tr:gt(0)");
    for (Element row : rows) {
        int i = 0;
        ArrayList<String> newrow = new ArrayList<String>();

        String[] pendingClasses = null;
        // each category
        for (Element categ : row.select("td")) {
            if (i == 0) { // split class field by separator(,) if needed
                String text = categ.text();
                pendingClasses = text.split(",");
            }//from  w  w w .j  a v a  2 s  . c  o m
            // dont add class if multiple classes are given
            if (i != 0 || (pendingClasses == null || pendingClasses.length == 0))
                newrow.add(categ.text());
            //                Log.i(LOG_TAG, categ.text());
            i++;
        }

        // add row with category info to allRows array, if not multiple classes
        if (pendingClasses == null || pendingClasses.length == 0)
            allRows.add(newrow);
        // otherwise set class names to multiple rows
        else {
            for (String classN : pendingClasses) {
                ArrayList<String> n = (ArrayList<String>) newrow.clone();
                n.add(0, classN.trim());
                allRows.add(n);
            }
        }
    }

    return allRows;
}

From source file:net.orzo.data.Web.java

/**
 *
 *//*from   ww  w  .ja v a2  s. c  o m*/
public List<Element> queryPage(Element root, String select, ScriptFunction fn) {
    MethodHandle mh;
    Element curr;
    List<Element> ans = null; // returns null in case fn is null

    try {
        if (fn != null) {
            for (Iterator<Element> iter = root.select(select).iterator(); iter.hasNext();) {
                curr = iter.next();
                mh = fn.getBoundInvokeHandle(curr);
                mh.invoke(curr);
            }

        } else {
            ans = new ArrayList<>();
            Iterators.addAll(ans, root.select(select).iterator());
        }
        return ans;

    } catch (Throwable ex) {
        throw new RuntimeException(ex);
    }
}

From source file:com.anhao.spring.service.impl.PhotosServiceImpl.java

private void getWallpaperTags(String wallpaperId) {
    String wallpaperUrl = "http://alpha.wallhaven.cc/wallpaper/" + wallpaperId;
    Document docDetails = getWallpaperHtmlDocument(wallpaperUrl);
    Elements Tags = docDetails.select("#tags li");
    for (Element tag : Tags) {
        //iduuid ?wallhavenID
        String photosId = jobPhotosDAO.findByWallpaperId(wallpaperId);
        //tagUUID
        Element tagName = tag.select(".tagname").first();

        String TagId = tagDAO.findByTagName(tagName.text());

        System.out.println("wallpaperId:" + wallpaperId + "====tag name " + tagName.text());
        PhotosTag photosTag = new PhotosTag();

        photosTag.setPhotoId(photosId);/*from  w  w  w  . j  av  a2 s.c o  m*/
        photosTag.setTagId(TagId);
        photostagDAO.add(photosTag);
    }
}

From source file:hello.Scraper.java

@Transformer(inputChannel = "channel3", outputChannel = "channel4")
public DumpEntry convert(Element payload) throws ParseException {
    String dateStr = payload.ownText().substring(0, 19);

    DateFormat format = new SimpleDateFormat("yyyy-MM-dd hh:mm:ss");
    format.setTimeZone(TimeZone.getTimeZone("GMT"));

    Date timestamp = format.parse(dateStr);

    Elements list = payload.select("a");
    String id;/*from   w w  w  .  j  av a2 s.c  om*/
    String ref;
    if (list.size() > 0) {
        Element a = list.get(0);
        id = a.ownText();
        ref = a.attr("href");
    } else {
        id = "private data";
        ref = null;
    }

    Element span = payload.select("span").get(0);
    String status = span.ownText();

    return new DumpEntry(timestamp, id, ref, status);
}

From source file:com.johan.vertretungsplan.parser.UntisCommonParser.java

/**
 * Parst eine "Nachrichten zum Tag"-Tabelle aus Untis-Vertretungsplnen
 * // w w  w  .  ja  va 2  s  .  c  o m
 * @param table
 *            das <code>table</code>-Element des HTML-Dokuments, das geparst
 *            werden soll
 * @param data
 *            Daten von der Schule (aus <code>Schule.getData()</code>)
 * @param tag
 *            der {@link VertretungsplanTag} in dem die Nachrichten
 *            gespeichert werden sollen
 */
protected void parseNachrichten(Element table, JSONObject data, VertretungsplanTag tag) {
    Elements zeilen = table.select("tr:not(:contains(Nachrichten zum Tag))");
    for (Element i : zeilen) {
        Elements spalten = i.select("td");
        String info = "";
        for (Element b : spalten) {
            info += "\n" + TextNode.createFromEncoded(b.html(), null).getWholeText();
        }
        info = info.substring(1); // remove first \n
        tag.getNachrichten().add(info);
    }
}

From source file:de.stkl.gbgvertretungsplan.sync.SyncAdapter.java

private List<String> parseCategories(Element root) {
    // get table/*from   w ww.  j  av a  2s.c om*/
    //Log.d(LOG_TAG, root.toString());
    Element table = root.select("table.mon_list").first();
    // category headlines
    List<String> categories = new ArrayList<String>();
    for (Element headline : table.select("tr:first-child th")) {
        categories.add(headline.text());
    }

    return categories;
}