List of usage examples for org.jsoup.nodes Document select
public Elements select(String cssQuery)
From source file:cn.cuizuoli.appranking.service.GooglePlayService.java
/** * getAppRankingList/* ww w.jav a 2 s. com*/ * @param feedType * @return */ public List<AppRanking> getAppRankingList(FeedType feedType, Category category) { List<AppRanking> appRankingList = new ArrayList<AppRanking>(); try { if (feedType.getMediaType() == MediaType.GOOGLE) { String url = StringUtils.EMPTY; if (category == Category.ALL) { url = getHotUrl(feedType); } else { url = getUrl(feedType, category); } log.info("Google Play -> " + url); if (StringUtils.isNotBlank(url)) { Document doc = appRankingRestTemplate.getForObject(url, Document.class); Elements elements = doc.select(".card-list>.card"); Iterator<Element> iterator = elements.iterator(); int i = 1; while (iterator.hasNext()) { Element element = iterator.next(); String appId = element.attr("data-docid"); String name = element.select(".details .title").attr("title"); String uri = element.select(".details .title").attr("href"); String artist = element.select(".details .subtitle").attr("title"); String price = element.select(".details button.price.buy>span").text(); String image170 = element.select(".cover .cover-image").attr("data-cover-small"); String image340 = element.select(".cover .cover-image").attr("data-cover-large"); AppRanking appRanking = new AppRanking(); appRanking.setAppId(appId); appRanking.setDeviceType(DeviceType.ANDROID); appRanking.setCountry(Country.JAPAN); appRanking.setMediaType(MediaType.GOOGLE); appRanking.setFeedType(feedType); appRanking.setRanking(i); appRanking.setTitle(name + " - " + artist); appRanking.setCategory(category.getCode()); appRanking.setUri(GOOGLE_PLAY_DOMAIN + uri); appRanking.setName(name); appRanking.setArtist(artist); appRanking.setPrice(price); appRanking.setImage53(image170); appRanking.setImage75(image170); appRanking.setImage100(image340); appRankingList.add(appRanking); i++; } } } } catch (HttpStatusCodeException e) { log.error(ExceptionUtils.getFullStackTrace(e)); } catch (Exception e) { log.error(ExceptionUtils.getFullStackTrace(e)); } return appRankingList; }
From source file:namedatabasescraper.PageScraper.java
@SuppressWarnings("OverridableMethodCallInConstructor") public PageScraper(File file, String dirname, String selector, String charset) throws IOException { filename = file.getAbsolutePath();//from w ww.j av a 2 s .co m this.dirname = dirname; this.id = this.createScraperId(); String html = FileUtils.readFileToString(file, charset); this.names = new ArrayList<>(); Document soup = Jsoup.parse(html); //Elements nameElements = soup.select("a.nom"); //Elements nameElements = soup.select("div > a:not(.n1)"); Elements nameElements = soup.select(selector); for (Element nameElement : nameElements) { String name = nameElement.text(); names.add(name); } logger.log(Level.INFO, "Scraped " + this.names.size() + " names from page {0}", file.getName()); }
From source file:me.vertretungsplan.parser.ESchoolParser.java
@Override public SubstitutionSchedule getSubstitutionSchedule() throws IOException, JSONException, CredentialInvalidException { if (!(scheduleData.getAuthenticationData() instanceof NoAuthenticationData) && (credential == null || !(credential instanceof PasswordCredential) || ((PasswordCredential) credential).getPassword() == null || ((PasswordCredential) credential).getPassword().isEmpty())) { throw new IOException("no login"); }// w w w .j a v a 2s . c o m List<NameValuePair> nvps = new ArrayList<>(); nvps.add(new BasicNameValuePair("wp", scheduleData.getData().getString(PARAM_ID))); nvps.add(new BasicNameValuePair("go", "vplan")); nvps.add(new BasicNameValuePair("content", "x14")); nvps.add(new BasicNameValuePair("sortby", "S")); String url = BASE_URL + "?" + URLEncodedUtils.format(nvps, "UTF-8"); Document doc = Jsoup.parse(httpGet(url, ENCODING)); if (doc.select("form[name=loginform]").size() > 0 && scheduleData.getAuthenticationData() instanceof PasswordAuthenticationData) { // Login required List<NameValuePair> formParams = new ArrayList<>(); formParams.add(new BasicNameValuePair("password", ((PasswordCredential) credential).getPassword())); formParams.add(new BasicNameValuePair("login", "")); doc = Jsoup.parse(httpPost(url, ENCODING, formParams)); if (doc.select("font[color=red]").text().contains("fehlgeschlagen")) { throw new CredentialInvalidException(); } } SubstitutionSchedule schedule = parseESchoolSchedule(doc); return schedule; }
From source file:mobi.jenkinsci.ci.client.sso.GoogleSsoHandler.java
@Override public String doTwoStepAuthentication(final HttpClient httpClient, final HttpContext httpContext, final HttpResponse response, final String otp) throws IOException { final HttpPost formPost = getOtpFormPost(httpContext, response, otp); Element otpResponseForm;/*from w w w.java 2 s . co m*/ try { final HttpResponse otpResponse = httpClient.execute(formPost, httpContext); if (otpResponse.getStatusLine().getStatusCode() != HttpURLConnection.HTTP_OK) { throw getException(otpResponse); } final Document otpResponseDoc = Jsoup.parse(otpResponse.getEntity().getContent(), "UTF-8", ""); otpResponseForm = otpResponseDoc.select("form[id=hiddenpost]").first(); if (otpResponseForm == null) { final Element errorDiv = otpResponseDoc.select("div[id=error]").first(); if (errorDiv == null) { throw new IOException( "2nd-step authentication FAILED: Google did not return positive response form."); } else { throw new TwoPhaseAuthenticationRequiredException(getDivText(errorDiv), GOOGLE_ANDROID_APPS_AUTHENTICATOR2_APP_ID); } } } finally { formPost.releaseConnection(); } final HttpPost formCompletePost = JenkinsFormAuthHttpClient .getPostForm(JenkinsFormAuthHttpClient.getLatestRedirectedUrl(httpContext), otpResponseForm, null); try { final HttpResponse otpCompleteResponse = httpClient.execute(formCompletePost, httpContext); if (otpCompleteResponse.getStatusLine().getStatusCode() != HttpURLConnection.HTTP_MOVED_TEMP) { throw new IOException( String.format("2nd-step authentication failed: Google returned HTTP-Status:%d %s", otpCompleteResponse.getStatusLine().getStatusCode(), otpCompleteResponse.getStatusLine().getReasonPhrase())); } return otpCompleteResponse.getFirstHeader("Location").getValue(); } finally { formCompletePost.releaseConnection(); } }
From source file:net.poemerchant.scraper.ShopScraper.java
private String scrapeItemJSArray(Document doc) { Element scriptDataElem = doc.select("script").last(); // a with href String raw = scriptDataElem.data(); raw = StringUtils.substringBetween(raw, "new R(", ")).run();"); return raw;/*from ww w. j a v a 2 s .c om*/ }
From source file:neembuu.release1.externalImpl.linkhandler.SaveVideoYoutubeLinkHandlerProvider.java
private BasicLinkHandler.Builder saveVideoExtraction(TrialLinkHandler tlh, int retryCount) throws Exception { String url = tlh.getReferenceLinkString(); BasicLinkHandler.Builder linkHandlerBuilder = BasicLinkHandler.Builder.create(); try {// www .j av a 2s. c o m DefaultHttpClient httpClient = NHttpClient.getNewInstance(); String requestUrl = "http://www.save-video.com/download.php?url=" + URLEncoder.encode(url, "UTF-8"); final String responseString = NHttpClientUtils.getData(requestUrl, httpClient); //Set the group name as the name of the video String nameOfVideo = getVideoName(url); String fileName = "text"; linkHandlerBuilder.setGroupName(nameOfVideo); long c_duration = -1; Document doc = Jsoup.parse(responseString); Elements elements = doc.select(".sv-download-links ul li a"); for (Element element : elements) { String singleUrl = element.attr("href"); if (!singleUrl.startsWith("DownloadFile.php")) { fileName = element.text(); singleUrl = Utils.normalize(singleUrl); LOGGER.log(Level.INFO, "Normalized URL: {0}", singleUrl); long length = NHttpClientUtils.calculateLength(singleUrl, httpClient); //LOGGER.log(Level.INFO,"Length: " + length); if (length <= 0) { continue; /*skip this url*/ } BasicOnlineFile.Builder fileBuilder = linkHandlerBuilder.createFile(); try { // finding video/audio length // String dur = StringUtils.stringBetweenTwoStrings(singleUrl, "dur=", "&"); // long duration = (int)(Double.parseDouble(dur)*1000); // if(c_duration < 0 ){ c_duration = duration; } // fileBuilder.putLongPropertyValue(PropertyProvider.LongProperty.MEDIA_DURATION_IN_MILLISECONDS, duration); // LOGGER.log(Level.INFO,"dur="+dur); } catch (NumberFormatException a) { // ignore } try { // finding the quality short name // String type = fileName.substring(fileName.indexOf("(")+1); String type = fileName; fileBuilder.putStringPropertyValue(PropertyProvider.StringProperty.VARIANT_DESCRIPTION, type); LOGGER.log(Level.INFO, "type={0}", type); } catch (Exception a) { a.printStackTrace(); } fileName = nameOfVideo + " " + fileName; fileBuilder.setName(fileName).setUrl(singleUrl).setSize(length).next(); } } for (OnlineFile of : linkHandlerBuilder.getFiles()) { long dur = of.getPropertyProvider() .getLongPropertyValue(PropertyProvider.LongProperty.MEDIA_DURATION_IN_MILLISECONDS); if (dur < 0 && c_duration > 0 && of.getPropertyProvider() instanceof BasicPropertyProvider) { ((BasicPropertyProvider) of.getPropertyProvider()).putLongPropertyValue( PropertyProvider.LongProperty.MEDIA_DURATION_IN_MILLISECONDS, c_duration); } } } catch (Exception ex) { ex.printStackTrace(); } return linkHandlerBuilder; }
From source file:org.brunocvcunha.taskerbox.impl.jobs.DiceJobSeeker.java
@Override protected void execute() throws Exception { try {/*from w w w . j a va 2 s . c om*/ for (int x = 1; x < this.maxPages; x++) { int uniqueCount = 0; // DefaultHttpClient client = // TaskerboxHttpBox.getInstance().buildNewHttpClient(); String seekUrl = "http://www.dice.com/job/results?n=50&q=" + URLEncoder.encode(this.search) + "&o=" + (x * 50); logInfo(log, "... Seeking " + seekUrl); HttpEntity entity = TaskerboxHttpBox.getInstance().getEntityForURL(seekUrl); String result = TaskerboxHttpBox.getInstance().readResponseFromEntity(entity); if (result.contains("Sorry, no jobs were found that match your criteria")) { System.err.println("Busca encerrada."); this.bootstrapHttpClient(true); break; // return; } try { Document doc = Jsoup.parse(result); Elements el = doc.select("div#SRcolContainer").select("tr"); for (val item : el) { Elements jobEls = item.select("a"); if (jobEls.size() < 3) { continue; } String url = jobEls.get(0).attr("href"); if (url.equals("")) { continue; } url = "http://www.dice.com" + url; if (url.contains("?src=")) { url = url.substring(0, url.indexOf("?src=")); } String jobTitle = jobEls.get(0).text(); String company = jobEls.get(1).text(); String location = jobEls.get(2).text(); // System.out.println("==============="); // System.out.println(item.html()); // System.out.println("==============="); if (!jobTitle.equalsIgnoreCase("Job Title")) { handleJob(jobTitle, company, location, url); uniqueCount++; } } if (uniqueCount == 0) { logInfo(log, "DICE BREAK -- NO UNIQUE COUNT"); break; } try { Thread.sleep(10000L); } catch (InterruptedException e) { e.printStackTrace(); } } catch (Exception e) { e.printStackTrace(); } } } catch (Exception e) { e.printStackTrace(); } }
From source file:free.rm.skytube.businessobjects.VideoStream.ParseStreamMetaData.java
private String getJplayerJsonErrorMessage(String pageContent) { StringBuilder errorMessage = new StringBuilder(); try {/*from w w w.ja v a 2s.c o m*/ Document document = Jsoup.parse(pageContent, pageUrl); errorMessage.append(document.select("h1[id=\"unavailable-message\"]").first().text()); errorMessage.append(": "); errorMessage.append(document.select("[id=\"unavailable-submessage\"]").first().text()); } catch (Throwable tr) { Log.e(TAG, "Error has occurred while retrieving video availability status", tr); errorMessage = new StringBuilder(SkyTubeApp.getStr(R.string.error_stream_err_unavailable)); } return errorMessage.toString(); }
From source file:org.eclipseplugins.impexeditor.core.utils.ImpexHttpClient.java
private String getCSrfToken(final String jSessionid) throws IOException { //<meta name="_csrf" content="c1dee1f7-8c79-43b1-8f3f-767662abc87a" /> final Document doc = Jsoup.connect(hostName).cookie("JSESSIONID", jSessionid).get(); final Elements csrfMetaElt = doc.select("meta[name=_csrf]"); final String csrfToken = csrfMetaElt.attr("content"); return csrfToken; }
From source file:emily.command.fun.FMLCommand.java
private void getFMLItems() { try {/*from w w w.j a v a 2s . c om*/ Document document = Jsoup.connect("http://fmylife.com/random").timeout(30_000) .userAgent(BotConfig.USER_AGENT).get(); if (document != null) { Elements fmls = document.select("p.block a[href^=/article/]"); for (Element fml : fmls) { items.add(fml.text().trim()); } } } catch (IOException e) { Launcher.logToDiscord(e, "fml-command", "boken"); } }