List of usage examples for org.jsoup.nodes Document select
public Elements select(String cssQuery)
From source file:com.FFLive.Player.java
public void getPlayer() { try {/*from w w w.ja v a 2 s . c o m*/ if (playerID.equals("-1")) { //Average Team Player... Document doc = Jsoup.connect("http://fantasy.premierleague.com/entry/1/event-history/" + GW + "/") .get(); Elements averageScore = doc.select("div.ismUnit.ismSize2of5.ismLastUnit") .select("div.ismSBSecondaryVal"); if (averageScore.isEmpty()) { playerScore = 0; } else { try { playerScore = Integer.parseInt(averageScore.text().replaceAll("\\D+", "")); } catch (NumberFormatException n) { Main.log.log(2, "Issue saving Average Team..." + n + "\n"); } } playerName = "Average"; } else { Main.log.log(7, "Fetching Player " + playerID + "\n"); //Connects to the players info page InputStream playerJson = new URL( "http://fantasy.premierleague.com/web/api/elements/" + playerID + "/").openStream(); //Reads the data into a JSON object (via casting into a regular object) Reader reader = new InputStreamReader(playerJson, "UTF-8"); JSONObject playerValues = (JSONObject) JSONValue.parse(reader); //TODO Check if there are values overlength //Max Length Ref playerCount INT DEFAULT 1 NOT NULL, firstName VARCHAR(40), lastName VARCHAR(40), //webName VARCHAR(50), score INT, gameweekBreakdown VARCHAR(250), breakdown VARCHAR(250), //teamName VARCHAR(40), currentFixture VARCHAR(40), nextFixture VARCHAR(40), status VARCHAR(10), //news VARCHAR(250), photo VARCHAR(30)) //Adds Required Data firstName = playerValues.get("first_name").toString(); lastName = playerValues.get("second_name").toString(); playerName = playerValues.get("web_name").toString(); playerTeam = playerValues.get("team_name").toString(); teamNumber = Integer.parseInt(playerValues.get("team_id").toString()); position = playerValues.get("type_name").toString(); /* JSONObject test = (JSONObject)JSONValue.parse(playerValues.get("fixture_history").toString()); String summary = test.get("summary").toString(); String all = test.get("all").toString(); */ playerScore = Integer.parseInt(playerValues.get("event_total").toString()); gameweekBreakdown = playerValues.get("event_explain").toString(); //scoreBreakdown = playerValues.get("fixture_history").toString(); currentFixture = playerValues.get("current_fixture").toString(); nextFixture = playerValues.get("next_fixture").toString(); status = playerValues.get("status").toString(); news = playerValues.get("news").toString(); photo = playerValues.get("photo").toString(); /* System.out.println(firstName); System.out.println(lastName); System.out.println(playerName); System.out.println(playerTeam); System.out.println(position); System.out.println(summary); System.out.println(all); System.out.println(playerScore); System.out.println(scoreBreakdown); System.out.println(currentFixture); System.out.println(nextFixture); System.out.println(status); System.out.println(news); System.out.println(photo);*/ } } catch (ConnectException c) { if (timeoutCheck() > 3) { Main.log.log(2, "Too Many Timeouts.. Skipping\n"); } Main.log.log(6, "Timeout Connecting, Retrying...\n"); getPlayer(); } catch (SocketTimeoutException e) { if (timeoutCheck() > 3) { Main.log.log(2, "Too Many Timeouts.. Skipping\n"); } Main.log.log(6, "Timeout Connecting, Retrying...\n"); getPlayer(); } catch (UnknownHostException g) { Main.log.log(6, "No Connection... Skipping\n"); } catch (NoRouteToHostException h) { Main.log.log(6, "No Connection... Skipping\n"); } catch (IOException f) { Main.log.log(6, "In getPlayer: " + f + "\n"); } catch (NullPointerException n) { Main.log.log(2, "Missing Player Field with ID:" + playerID + " " + n + "\n"); Main.log.log(9, n); } }
From source file:fusion.Fusion.java
private static boolean isSynonym(Value val1, Value val2) throws IOException { boolean isSyn = false; String thesaurusUrl = "http://words.bighugelabs.com/api/2/92eae7f933f0f63404b3438ca46861e5/" + val1.getValue() + "/xml"; Document doc = Jsoup.connect(thesaurusUrl).get(); Elements synonyms = doc.select("w"); String syn = synonyms.html(); String[] synonymsArray = syn.split("\n"); ArrayList<String> synonymsList = new ArrayList<String>(Arrays.asList(synonymsArray)); if (synonymsList.contains(val2.getValue())) { val1.addToSynonyms(val2); val2.addToSynonyms(val1); isSyn = true;/*from w w w .j av a 2 s . com*/ } return isSyn; }
From source file:io.seldon.importer.articles.dynamicextractors.FirstElementAttrValueDateDynamicExtractor.java
@Override public String extract(AttributeDetail attributeDetail, String url, Document articleDoc) throws Exception { String attrib_value = null;/*ww w. j a va 2 s . c o m*/ if ((attributeDetail.extractor_args != null) && (attributeDetail.extractor_args.size() >= 2)) { String cssSelector = attributeDetail.extractor_args.get(0); Element element = articleDoc.select(cssSelector).first(); if (StringUtils.isNotBlank(cssSelector)) { int arg_count = 0; for (String value_name : attributeDetail.extractor_args) { if (arg_count > 0) { // skip the first one, its the cssSelector if (element != null && element.attr(value_name) != null) { attrib_value = element.attr(value_name); if (StringUtils.isNotBlank(attrib_value)) { break; } } } arg_count++; } } } if (attrib_value != null) { String pubtext = attrib_value; SimpleDateFormat dateFormatter = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); DateFormat df = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss", Locale.ENGLISH); Date result = null; try { result = df.parse(pubtext); } catch (ParseException e) { logger.info("Failed to parse date withUTC format " + pubtext); } // try a simpler format df = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss", Locale.ENGLISH); try { result = df.parse(pubtext); } catch (ParseException e) { logger.info("Failed to parse date " + pubtext); } if (result != null) { attrib_value = dateFormatter.format(result); } else { logger.error("Failed to parse date " + pubtext); } } return attrib_value; }
From source file:gov.medicaid.screening.dao.impl.NurseAnesthetistsLicenseDAOBean.java
/** * Parses the provider details from the response. * * @param page the response page//from w ww . ja v a2s .com * @return the parsed provider details * @throws ParsingException if the page is not of the expected format */ private ProviderProfile parseProvider(Document page) throws ParsingException { ProviderProfile profile = new ProviderProfile(); String fullName = page.select("#ctl00_PageContent_CertRecert_lblMemberName").text(); if (Util.isBlank(fullName)) { // this should always be present throw new ParsingException(ErrorCode.MITA50002.getDesc()); } profile.setUser(parseName(fullName)); ArrayList<License> licenses = new ArrayList<License>(); License license = new License(); licenses.add(license); profile.setLicenses(licenses); String issueDate = page.select("#ctl00_PageContent_CertRecert_lblInitialCertificationDate").text(); if (Util.isNotBlank(issueDate)) { license.setOriginalIssueDate(parseDate(issueDate, DATE_FORMAT)); } String renewalDate = page.select("#ctl00_PageContent_CertRecert_lblRecertBeginDate").text(); if (Util.isNotBlank(renewalDate)) { license.setRenewalDate(parseDate(renewalDate, DATE_FORMAT)); } String renewalEndDate = page.select("#ctl00_PageContent_CertRecert_lblRecertEndDate").text(); if (Util.isNotBlank(renewalEndDate)) { license.setRenewalDate(parseDate(renewalEndDate, DATE_FORMAT)); } String status = page.select("#ctl00_PageContent_CertRecert_lblRecertStatus").text(); if (Util.isNotBlank(status)) { LicenseStatus licenseStatus = new LicenseStatus(); licenseStatus.setName(status); license.setStatus(licenseStatus); } String licenseName = page.select("#ctl00_PageContent_CertRecert_lblAnesthesiaProgramName").text(); if (Util.isNotBlank(licenseName)) { LicenseType type = new LicenseType(); type.setName(licenseName); license.setType(type); } String licenseNo = page.select("#ctl00_PageContent_CertRecert_lblAANANumber").text(); license.setLicenseNumber(licenseNo); String address = page.select("#ctl00_PageContent_CertRecert_lblMemberResidence").text(); if (Util.isNotBlank(address)) { List<Address> addresses = new ArrayList<Address>(); addresses.add(parseAddress(address)); profile.setAddresses(addresses); } return profile; }
From source file:org.apache.marmotta.ldclient.provider.phpbb.PHPBBForumProvider.java
/** * Try to find further URLs in the document that need to be requested to complete the resource * data.//from w w w. j av a 2 s . c o m * Used e.g. to parse the result of paging in HTML pages. The default implementation returns an * empty list. * <p/> * This implementation tries to locate the paging area of PHPBB and selects the last link of the * paging, which will be the "next" page. * * @param document * @param requestUrl * @return */ @Override protected List<String> findAdditionalRequestUrls(String resource, Document document, String requestUrl) { List<String> result = new LinkedList<String>(); // return the next page in the result list String foo = "a[href*='start='][href*='viewforum.php']"; List<Element> values = document.select(foo); for (Element e : values) { String baseUrl = e.absUrl("href"); if (baseUrl.length() > 0) { result.add(baseUrl); } } return result; }
From source file:com.amastigote.xdu.query.module.WaterAndElectricity.java
private List<String> query_payInfo(Duration duration) throws IOException { getPageAttributes(PAYINFO_SUFFIX);/*from w w w . j av a 2 s . c o m*/ String OUTPUT_DATA = "But_Seach3="; switch (duration) { case ONE_MONTH: OUTPUT_DATA += ONE_MONTH; break; case THREE_MONTH: OUTPUT_DATA += THREE_MONTH; break; default: throw new IllegalArgumentException("Bad parameter, check document for help"); } OUTPUT_DATA += "&__VIEWSTATE="; OUTPUT_DATA += VIEWSTATE; OUTPUT_DATA += "&HiddenField_webName="; OUTPUT_DATA += "&HiddenField_UserID="; OUTPUT_DATA += ID; Document document = getPage(OUTPUT_DATA, PAYINFO_SUFFIX); Elements elements = document.select("td"); List<String> stringArrayList = new ArrayList<>(); for (Element td : elements) { String tmp = td.text(); if (!"".equals(tmp)) { stringArrayList.add(tmp); } } for (int i = 0; i < stringArrayList.size(); i++) { if (stringArrayList.get(i).contains("")) { stringArrayList.set(i, stringArrayList.get(i).substring(stringArrayList.get(i).indexOf("") + 2)); continue; } stringArrayList.set(i, stringArrayList.get(i).substring(stringArrayList.get(i).indexOf("") + 1)); } /* * (stringArrayList): * - 0, ???? * - ? [ | ? ] * - , (2n), n??? * * - ?: ?null! */ return stringArrayList; }
From source file:ru.org.linux.user.AddPhotoWebTest.java
@Test /**/*w w w. j a va2s .c om*/ * ? */ public void testInvalid3Image() throws IOException { String auth = WebHelper.doLogin(resource, "JB", "passwd"); ClientResponse cr = WebHelper.addPhoto(resource, "src/main/webapp/img/twitter.png", auth); assertEquals(HttpStatus.SC_BAD_REQUEST, cr.getStatus()); Document doc = Jsoup.parse(cr.getEntityInputStream(), "UTF-8", resource.getURI().toString()); assertEquals( "! ?: ? ", doc.select(".error").text()); // ? }
From source file:com.amastigote.xdu.query.module.WaterAndElectricity.java
private List<String> query_useInfo(Duration duration) throws IOException { getPageAttributes(USEINFO_SUFFIX);//from w w w . j a v a 2s.c o m String OUTPUT_DATA = "But_Seach3="; switch (duration) { case ONE_MONTH: OUTPUT_DATA += ONE_MONTH; break; case THREE_MONTH: OUTPUT_DATA += THREE_MONTH; break; default: throw new IllegalArgumentException("Bad parameter, check document for help"); } OUTPUT_DATA += "&__VIEWSTATE="; OUTPUT_DATA += VIEWSTATE; OUTPUT_DATA += "&HiddenField_webName="; OUTPUT_DATA += "&HiddenField_UserID="; OUTPUT_DATA += ID; Document document = getPage(OUTPUT_DATA, USEINFO_SUFFIX); Elements elements = document.select("td"); List<String> stringArrayList = new ArrayList<>(); for (Element td : elements) { String tmp = td.text(); tmp = tmp.replaceAll(" ", ""); if (!"".equals(tmp)) { if (tmp.contains("???")) { stringArrayList.add(tmp.substring(0, tmp.indexOf("???"))); stringArrayList.add(tmp.substring(tmp.indexOf("???"))); continue; } stringArrayList.add(tmp); } } for (int i = 0; i < stringArrayList.size(); i++) { stringArrayList.set(i, stringArrayList.get(i).substring(stringArrayList.get(i).indexOf("") + 1)); } /* * (stringArrayList): * - 0, ???? * - ? [ ? | | ? | ?/? | ??? ] * - , (5n), n??? * * - ?: ?null! */ return stringArrayList; }
From source file:com.github.binlee1990.spider.video.spider.PersonCrawler.java
private void createVideoActress(Document doc, int videoId) { Elements actressElements = doc.select("div#video_cast span.star"); if (CollectionUtils.isNotEmpty(actressElements)) { actressElements.stream().forEach(a -> { String aName = a.text().toString().trim(); if (StringUtils.isNotBlank(aName)) { Actress queryActress = new Actress(); queryActress.setName(aName); Actress actress = actressMapper.queryByActress(queryActress); if (null != actress) { VideoActress va = videoActressMapper.queryByVideoIdAndActressName(videoId, aName); if (null == va) { va = new VideoActress(); va.setActressCode(actress.getCode()); va.setVideoId(videoId); Date now = new Date(); va.setCreateTime(now); va.setUpdateTime(now); videoActressMapper.insertSelective(va); }/*w w w . j ava 2s .c o m*/ } } }); } }
From source file:com.github.binlee1990.spider.video.spider.PersonCrawler.java
private void setVideoCount(Document doc, Video video) { Elements countElements = doc.select("div#video_favorite_edit span"); if (CollectionUtils.isNotEmpty(countElements)) { Elements countWantedElements = countElements.select("#subscribed a"); if (CollectionUtils.isNotEmpty(countWantedElements)) { String countWanted = countWantedElements.first().text(); try { video.setCountWanted(Integer.valueOf(countWanted)); } catch (Exception e) { }// ww w . j a v a 2s.c o m } Elements countWatchedElements = countElements.select("#watched a"); if (CollectionUtils.isNotEmpty(countWatchedElements)) { String countWatched = countWatchedElements.first().text(); try { video.setCountWatched(Integer.valueOf(countWatched)); } catch (Exception e) { } } Elements countOwnedElements = countElements.select("#owned a"); if (CollectionUtils.isNotEmpty(countOwnedElements)) { String countOwned = countOwnedElements.first().text(); try { video.setCountOwned(Integer.valueOf(countOwned)); } catch (Exception e) { } } } }