Example usage for org.jsoup.nodes Document select

List of usage examples for org.jsoup.nodes Document select

Introduction

In this page you can find the example usage for org.jsoup.nodes Document select.

Prototype

public Elements select(String cssQuery) 

Source Link

Document

Find elements that match the Selector CSS query, with this element as the starting context.

Usage

From source file:com.FFLive.Player.java

public void getPlayer() {
    try {/*from  w  w w.ja  v a  2 s . c o  m*/
        if (playerID.equals("-1")) {
            //Average Team Player...
            Document doc = Jsoup.connect("http://fantasy.premierleague.com/entry/1/event-history/" + GW + "/")
                    .get();
            Elements averageScore = doc.select("div.ismUnit.ismSize2of5.ismLastUnit")
                    .select("div.ismSBSecondaryVal");
            if (averageScore.isEmpty()) {
                playerScore = 0;
            } else {
                try {
                    playerScore = Integer.parseInt(averageScore.text().replaceAll("\\D+", ""));
                } catch (NumberFormatException n) {
                    Main.log.log(2, "Issue saving Average Team..." + n + "\n");
                }
            }
            playerName = "Average";
        } else {
            Main.log.log(7, "Fetching Player " + playerID + "\n");
            //Connects to the players info page
            InputStream playerJson = new URL(
                    "http://fantasy.premierleague.com/web/api/elements/" + playerID + "/").openStream();
            //Reads the data into a JSON object (via casting into a regular object)
            Reader reader = new InputStreamReader(playerJson, "UTF-8");
            JSONObject playerValues = (JSONObject) JSONValue.parse(reader);

            //TODO Check if there are values overlength
            //Max Length Ref playerCount INT DEFAULT 1 NOT NULL, firstName VARCHAR(40), lastName VARCHAR(40),
            //webName VARCHAR(50), score INT, gameweekBreakdown VARCHAR(250), breakdown VARCHAR(250), 
            //teamName VARCHAR(40), currentFixture VARCHAR(40), nextFixture VARCHAR(40), status VARCHAR(10), 
            //news VARCHAR(250), photo VARCHAR(30))

            //Adds Required Data
            firstName = playerValues.get("first_name").toString();
            lastName = playerValues.get("second_name").toString();
            playerName = playerValues.get("web_name").toString();
            playerTeam = playerValues.get("team_name").toString();
            teamNumber = Integer.parseInt(playerValues.get("team_id").toString());
            position = playerValues.get("type_name").toString();
            /*
            JSONObject test = (JSONObject)JSONValue.parse(playerValues.get("fixture_history").toString());
            String summary = test.get("summary").toString();
            String all = test.get("all").toString();
             */
            playerScore = Integer.parseInt(playerValues.get("event_total").toString());
            gameweekBreakdown = playerValues.get("event_explain").toString();
            //scoreBreakdown = playerValues.get("fixture_history").toString();
            currentFixture = playerValues.get("current_fixture").toString();
            nextFixture = playerValues.get("next_fixture").toString();
            status = playerValues.get("status").toString();
            news = playerValues.get("news").toString();
            photo = playerValues.get("photo").toString();

            /*
            System.out.println(firstName);
            System.out.println(lastName);
            System.out.println(playerName);
            System.out.println(playerTeam);
            System.out.println(position);
            System.out.println(summary);
            System.out.println(all);
            System.out.println(playerScore);
            System.out.println(scoreBreakdown);
            System.out.println(currentFixture);
            System.out.println(nextFixture);
            System.out.println(status);
            System.out.println(news);
            System.out.println(photo);*/
        }
    } catch (ConnectException c) {
        if (timeoutCheck() > 3) {
            Main.log.log(2, "Too Many Timeouts.. Skipping\n");
        }
        Main.log.log(6, "Timeout Connecting, Retrying...\n");
        getPlayer();
    } catch (SocketTimeoutException e) {
        if (timeoutCheck() > 3) {
            Main.log.log(2, "Too Many Timeouts.. Skipping\n");
        }
        Main.log.log(6, "Timeout Connecting, Retrying...\n");
        getPlayer();
    } catch (UnknownHostException g) {
        Main.log.log(6, "No Connection... Skipping\n");
    } catch (NoRouteToHostException h) {
        Main.log.log(6, "No Connection... Skipping\n");
    } catch (IOException f) {
        Main.log.log(6, "In getPlayer: " + f + "\n");
    } catch (NullPointerException n) {
        Main.log.log(2, "Missing Player Field with ID:" + playerID + " " + n + "\n");
        Main.log.log(9, n);
    }
}

From source file:fusion.Fusion.java

private static boolean isSynonym(Value val1, Value val2) throws IOException {
    boolean isSyn = false;
    String thesaurusUrl = "http://words.bighugelabs.com/api/2/92eae7f933f0f63404b3438ca46861e5/"
            + val1.getValue() + "/xml";

    Document doc = Jsoup.connect(thesaurusUrl).get();
    Elements synonyms = doc.select("w");

    String syn = synonyms.html();
    String[] synonymsArray = syn.split("\n");
    ArrayList<String> synonymsList = new ArrayList<String>(Arrays.asList(synonymsArray));
    if (synonymsList.contains(val2.getValue())) {
        val1.addToSynonyms(val2);
        val2.addToSynonyms(val1);
        isSyn = true;/*from w w  w .j av a  2 s . com*/

    }
    return isSyn;
}

From source file:io.seldon.importer.articles.dynamicextractors.FirstElementAttrValueDateDynamicExtractor.java

@Override
public String extract(AttributeDetail attributeDetail, String url, Document articleDoc) throws Exception {

    String attrib_value = null;/*ww  w.  j  a va 2  s . c o  m*/

    if ((attributeDetail.extractor_args != null) && (attributeDetail.extractor_args.size() >= 2)) {
        String cssSelector = attributeDetail.extractor_args.get(0);
        Element element = articleDoc.select(cssSelector).first();
        if (StringUtils.isNotBlank(cssSelector)) {
            int arg_count = 0;
            for (String value_name : attributeDetail.extractor_args) {
                if (arg_count > 0) { // skip the first one, its the cssSelector
                    if (element != null && element.attr(value_name) != null) {
                        attrib_value = element.attr(value_name);
                        if (StringUtils.isNotBlank(attrib_value)) {
                            break;
                        }
                    }
                }
                arg_count++;
            }
        }
    }

    if (attrib_value != null) {
        String pubtext = attrib_value;
        SimpleDateFormat dateFormatter = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
        DateFormat df = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss", Locale.ENGLISH);
        Date result = null;
        try {
            result = df.parse(pubtext);
        } catch (ParseException e) {
            logger.info("Failed to parse date withUTC format " + pubtext);
        }
        // try a simpler format
        df = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss", Locale.ENGLISH);
        try {
            result = df.parse(pubtext);
        } catch (ParseException e) {
            logger.info("Failed to parse date " + pubtext);
        }

        if (result != null) {
            attrib_value = dateFormatter.format(result);
        } else {
            logger.error("Failed to parse date " + pubtext);
        }
    }

    return attrib_value;
}

From source file:gov.medicaid.screening.dao.impl.NurseAnesthetistsLicenseDAOBean.java

/**
 * Parses the provider details from the response.
 *
 * @param page the response page//from   w ww . ja v  a2s .com
 * @return the parsed provider details
 * @throws ParsingException if the page is not of the expected format
 */
private ProviderProfile parseProvider(Document page) throws ParsingException {
    ProviderProfile profile = new ProviderProfile();
    String fullName = page.select("#ctl00_PageContent_CertRecert_lblMemberName").text();
    if (Util.isBlank(fullName)) { // this should always be present
        throw new ParsingException(ErrorCode.MITA50002.getDesc());
    }
    profile.setUser(parseName(fullName));

    ArrayList<License> licenses = new ArrayList<License>();
    License license = new License();
    licenses.add(license);
    profile.setLicenses(licenses);

    String issueDate = page.select("#ctl00_PageContent_CertRecert_lblInitialCertificationDate").text();
    if (Util.isNotBlank(issueDate)) {
        license.setOriginalIssueDate(parseDate(issueDate, DATE_FORMAT));
    }
    String renewalDate = page.select("#ctl00_PageContent_CertRecert_lblRecertBeginDate").text();
    if (Util.isNotBlank(renewalDate)) {
        license.setRenewalDate(parseDate(renewalDate, DATE_FORMAT));
    }

    String renewalEndDate = page.select("#ctl00_PageContent_CertRecert_lblRecertEndDate").text();
    if (Util.isNotBlank(renewalEndDate)) {
        license.setRenewalDate(parseDate(renewalEndDate, DATE_FORMAT));
    }
    String status = page.select("#ctl00_PageContent_CertRecert_lblRecertStatus").text();
    if (Util.isNotBlank(status)) {
        LicenseStatus licenseStatus = new LicenseStatus();
        licenseStatus.setName(status);
        license.setStatus(licenseStatus);
    }

    String licenseName = page.select("#ctl00_PageContent_CertRecert_lblAnesthesiaProgramName").text();
    if (Util.isNotBlank(licenseName)) {
        LicenseType type = new LicenseType();
        type.setName(licenseName);
        license.setType(type);
    }

    String licenseNo = page.select("#ctl00_PageContent_CertRecert_lblAANANumber").text();
    license.setLicenseNumber(licenseNo);

    String address = page.select("#ctl00_PageContent_CertRecert_lblMemberResidence").text();
    if (Util.isNotBlank(address)) {
        List<Address> addresses = new ArrayList<Address>();
        addresses.add(parseAddress(address));
        profile.setAddresses(addresses);
    }

    return profile;
}

From source file:org.apache.marmotta.ldclient.provider.phpbb.PHPBBForumProvider.java

/**
 * Try to find further URLs in the document that need to be requested to complete the resource
 * data.//from  w w w.  j av  a  2  s . c o  m
 * Used e.g. to parse the result of paging in HTML pages. The default implementation returns an
 * empty list.
 * <p/>
 * This implementation tries to locate the paging area of PHPBB and selects the last link of the
 * paging, which will be the "next" page.
 * 
 * @param document
 * @param requestUrl
 * @return
 */
@Override
protected List<String> findAdditionalRequestUrls(String resource, Document document, String requestUrl) {
    List<String> result = new LinkedList<String>();

    // return the next page in the result list
    String foo = "a[href*='start='][href*='viewforum.php']";

    List<Element> values = document.select(foo);
    for (Element e : values) {
        String baseUrl = e.absUrl("href");
        if (baseUrl.length() > 0) {
            result.add(baseUrl);
        }
    }

    return result;

}

From source file:com.amastigote.xdu.query.module.WaterAndElectricity.java

private List<String> query_payInfo(Duration duration) throws IOException {
    getPageAttributes(PAYINFO_SUFFIX);/*from w w  w  .  j  av a  2 s  . c  o  m*/
    String OUTPUT_DATA = "But_Seach3=";
    switch (duration) {
    case ONE_MONTH:
        OUTPUT_DATA += ONE_MONTH;
        break;
    case THREE_MONTH:
        OUTPUT_DATA += THREE_MONTH;
        break;
    default:
        throw new IllegalArgumentException("Bad parameter, check document for help");
    }
    OUTPUT_DATA += "&__VIEWSTATE=";
    OUTPUT_DATA += VIEWSTATE;
    OUTPUT_DATA += "&HiddenField_webName=";
    OUTPUT_DATA += "&HiddenField_UserID=";
    OUTPUT_DATA += ID;

    Document document = getPage(OUTPUT_DATA, PAYINFO_SUFFIX);
    Elements elements = document.select("td");

    List<String> stringArrayList = new ArrayList<>();

    for (Element td : elements) {
        String tmp = td.text();
        if (!"".equals(tmp)) {
            stringArrayList.add(tmp);
        }
    }

    for (int i = 0; i < stringArrayList.size(); i++) {
        if (stringArrayList.get(i).contains("")) {
            stringArrayList.set(i, stringArrayList.get(i).substring(stringArrayList.get(i).indexOf("") + 2));
            continue;
        }
        stringArrayList.set(i, stringArrayList.get(i).substring(stringArrayList.get(i).indexOf("") + 1));
    }

    /*
     * (stringArrayList):
     *      - 0, ????
     *      - ? [  | ? ]
     *      - , (2n), n???
     *
     *      - ?: ?null!
     */
    return stringArrayList;
}

From source file:ru.org.linux.user.AddPhotoWebTest.java

@Test
/**/*w  w  w. j  a va2s .c om*/
 * ?  
 */
public void testInvalid3Image() throws IOException {
    String auth = WebHelper.doLogin(resource, "JB", "passwd");
    ClientResponse cr = WebHelper.addPhoto(resource, "src/main/webapp/img/twitter.png", auth);
    assertEquals(HttpStatus.SC_BAD_REQUEST, cr.getStatus());
    Document doc = Jsoup.parse(cr.getEntityInputStream(), "UTF-8", resource.getURI().toString());
    assertEquals(
            "!   ?: ?  ",
            doc.select(".error").text()); // ?  
}

From source file:com.amastigote.xdu.query.module.WaterAndElectricity.java

private List<String> query_useInfo(Duration duration) throws IOException {
    getPageAttributes(USEINFO_SUFFIX);//from   w  w  w  .  j  a v  a  2s.c o  m
    String OUTPUT_DATA = "But_Seach3=";
    switch (duration) {
    case ONE_MONTH:
        OUTPUT_DATA += ONE_MONTH;
        break;
    case THREE_MONTH:
        OUTPUT_DATA += THREE_MONTH;
        break;
    default:
        throw new IllegalArgumentException("Bad parameter, check document for help");
    }
    OUTPUT_DATA += "&__VIEWSTATE=";
    OUTPUT_DATA += VIEWSTATE;
    OUTPUT_DATA += "&HiddenField_webName=";
    OUTPUT_DATA += "&HiddenField_UserID=";
    OUTPUT_DATA += ID;

    Document document = getPage(OUTPUT_DATA, USEINFO_SUFFIX);
    Elements elements = document.select("td");

    List<String> stringArrayList = new ArrayList<>();

    for (Element td : elements) {
        String tmp = td.text();
        tmp = tmp.replaceAll(" ", "");
        if (!"".equals(tmp)) {
            if (tmp.contains("???")) {
                stringArrayList.add(tmp.substring(0, tmp.indexOf("???")));
                stringArrayList.add(tmp.substring(tmp.indexOf("???")));
                continue;
            }
            stringArrayList.add(tmp);
        }
    }

    for (int i = 0; i < stringArrayList.size(); i++) {
        stringArrayList.set(i, stringArrayList.get(i).substring(stringArrayList.get(i).indexOf("") + 1));
    }

    /*
     * (stringArrayList):
     *      - 0, ????
     *      - ? [ ? |  | ? | ?/? | ??? ]
     *      - , (5n), n???
     *
     *      - ?: ?null!
     */
    return stringArrayList;
}

From source file:com.github.binlee1990.spider.video.spider.PersonCrawler.java

private void createVideoActress(Document doc, int videoId) {
    Elements actressElements = doc.select("div#video_cast span.star");
    if (CollectionUtils.isNotEmpty(actressElements)) {
        actressElements.stream().forEach(a -> {
            String aName = a.text().toString().trim();

            if (StringUtils.isNotBlank(aName)) {
                Actress queryActress = new Actress();
                queryActress.setName(aName);
                Actress actress = actressMapper.queryByActress(queryActress);

                if (null != actress) {
                    VideoActress va = videoActressMapper.queryByVideoIdAndActressName(videoId, aName);
                    if (null == va) {
                        va = new VideoActress();
                        va.setActressCode(actress.getCode());
                        va.setVideoId(videoId);

                        Date now = new Date();
                        va.setCreateTime(now);
                        va.setUpdateTime(now);
                        videoActressMapper.insertSelective(va);
                    }/*w  w  w . j  ava 2s .c o  m*/
                }
            }
        });
    }
}

From source file:com.github.binlee1990.spider.video.spider.PersonCrawler.java

private void setVideoCount(Document doc, Video video) {
    Elements countElements = doc.select("div#video_favorite_edit span");
    if (CollectionUtils.isNotEmpty(countElements)) {
        Elements countWantedElements = countElements.select("#subscribed a");
        if (CollectionUtils.isNotEmpty(countWantedElements)) {
            String countWanted = countWantedElements.first().text();
            try {
                video.setCountWanted(Integer.valueOf(countWanted));
            } catch (Exception e) {
            }// ww w . j a  v a 2s.c o  m
        }

        Elements countWatchedElements = countElements.select("#watched a");
        if (CollectionUtils.isNotEmpty(countWatchedElements)) {
            String countWatched = countWatchedElements.first().text();
            try {
                video.setCountWatched(Integer.valueOf(countWatched));
            } catch (Exception e) {
            }
        }

        Elements countOwnedElements = countElements.select("#owned a");
        if (CollectionUtils.isNotEmpty(countOwnedElements)) {
            String countOwned = countOwnedElements.first().text();
            try {
                video.setCountOwned(Integer.valueOf(countOwned));
            } catch (Exception e) {
            }
        }
    }
}