Example usage for org.jsoup.nodes Document select

List of usage examples for org.jsoup.nodes Document select

Introduction

In this page you can find the example usage for org.jsoup.nodes Document select.

Prototype

public Elements select(String cssQuery) 

Source Link

Document

Find elements that match the Selector CSS query, with this element as the starting context.

Usage

From source file:cn.cuizuoli.appranking.service.GooglePlayService.java

/**
 * getAppRankingList/* ww  w.jav  a  2  s. com*/
 * @param feedType
 * @return
 */
public List<AppRanking> getAppRankingList(FeedType feedType, Category category) {
    List<AppRanking> appRankingList = new ArrayList<AppRanking>();
    try {
        if (feedType.getMediaType() == MediaType.GOOGLE) {
            String url = StringUtils.EMPTY;
            if (category == Category.ALL) {
                url = getHotUrl(feedType);
            } else {
                url = getUrl(feedType, category);
            }
            log.info("Google Play -> " + url);
            if (StringUtils.isNotBlank(url)) {

                Document doc = appRankingRestTemplate.getForObject(url, Document.class);
                Elements elements = doc.select(".card-list>.card");
                Iterator<Element> iterator = elements.iterator();
                int i = 1;
                while (iterator.hasNext()) {
                    Element element = iterator.next();
                    String appId = element.attr("data-docid");
                    String name = element.select(".details .title").attr("title");
                    String uri = element.select(".details .title").attr("href");
                    String artist = element.select(".details .subtitle").attr("title");
                    String price = element.select(".details button.price.buy>span").text();
                    String image170 = element.select(".cover .cover-image").attr("data-cover-small");
                    String image340 = element.select(".cover .cover-image").attr("data-cover-large");
                    AppRanking appRanking = new AppRanking();
                    appRanking.setAppId(appId);
                    appRanking.setDeviceType(DeviceType.ANDROID);
                    appRanking.setCountry(Country.JAPAN);
                    appRanking.setMediaType(MediaType.GOOGLE);
                    appRanking.setFeedType(feedType);
                    appRanking.setRanking(i);
                    appRanking.setTitle(name + " - " + artist);
                    appRanking.setCategory(category.getCode());
                    appRanking.setUri(GOOGLE_PLAY_DOMAIN + uri);
                    appRanking.setName(name);
                    appRanking.setArtist(artist);
                    appRanking.setPrice(price);
                    appRanking.setImage53(image170);
                    appRanking.setImage75(image170);
                    appRanking.setImage100(image340);
                    appRankingList.add(appRanking);
                    i++;
                }

            }
        }
    } catch (HttpStatusCodeException e) {
        log.error(ExceptionUtils.getFullStackTrace(e));
    } catch (Exception e) {
        log.error(ExceptionUtils.getFullStackTrace(e));
    }
    return appRankingList;
}

From source file:namedatabasescraper.PageScraper.java

@SuppressWarnings("OverridableMethodCallInConstructor")
public PageScraper(File file, String dirname, String selector, String charset) throws IOException {
    filename = file.getAbsolutePath();//from w ww.j  av  a 2 s  .co  m
    this.dirname = dirname;
    this.id = this.createScraperId();
    String html = FileUtils.readFileToString(file, charset);
    this.names = new ArrayList<>();
    Document soup = Jsoup.parse(html);
    //Elements nameElements = soup.select("a.nom");
    //Elements nameElements = soup.select("div > a:not(.n1)");
    Elements nameElements = soup.select(selector);
    for (Element nameElement : nameElements) {
        String name = nameElement.text();
        names.add(name);
    }
    logger.log(Level.INFO, "Scraped " + this.names.size() + " names from page {0}", file.getName());
}

From source file:me.vertretungsplan.parser.ESchoolParser.java

@Override
public SubstitutionSchedule getSubstitutionSchedule()
        throws IOException, JSONException, CredentialInvalidException {
    if (!(scheduleData.getAuthenticationData() instanceof NoAuthenticationData)
            && (credential == null || !(credential instanceof PasswordCredential)
                    || ((PasswordCredential) credential).getPassword() == null
                    || ((PasswordCredential) credential).getPassword().isEmpty())) {
        throw new IOException("no login");
    }// w  w w  .j a  v  a 2s  .  c o  m

    List<NameValuePair> nvps = new ArrayList<>();
    nvps.add(new BasicNameValuePair("wp", scheduleData.getData().getString(PARAM_ID)));
    nvps.add(new BasicNameValuePair("go", "vplan"));
    nvps.add(new BasicNameValuePair("content", "x14"));
    nvps.add(new BasicNameValuePair("sortby", "S"));

    String url = BASE_URL + "?" + URLEncodedUtils.format(nvps, "UTF-8");

    Document doc = Jsoup.parse(httpGet(url, ENCODING));
    if (doc.select("form[name=loginform]").size() > 0
            && scheduleData.getAuthenticationData() instanceof PasswordAuthenticationData) {
        // Login required
        List<NameValuePair> formParams = new ArrayList<>();
        formParams.add(new BasicNameValuePair("password", ((PasswordCredential) credential).getPassword()));
        formParams.add(new BasicNameValuePair("login", ""));
        doc = Jsoup.parse(httpPost(url, ENCODING, formParams));

        if (doc.select("font[color=red]").text().contains("fehlgeschlagen")) {
            throw new CredentialInvalidException();
        }
    }

    SubstitutionSchedule schedule = parseESchoolSchedule(doc);

    return schedule;
}

From source file:mobi.jenkinsci.ci.client.sso.GoogleSsoHandler.java

@Override
public String doTwoStepAuthentication(final HttpClient httpClient, final HttpContext httpContext,
        final HttpResponse response, final String otp) throws IOException {
    final HttpPost formPost = getOtpFormPost(httpContext, response, otp);
    Element otpResponseForm;/*from  w  w w.java 2 s . co m*/
    try {
        final HttpResponse otpResponse = httpClient.execute(formPost, httpContext);
        if (otpResponse.getStatusLine().getStatusCode() != HttpURLConnection.HTTP_OK) {
            throw getException(otpResponse);
        }

        final Document otpResponseDoc = Jsoup.parse(otpResponse.getEntity().getContent(), "UTF-8", "");
        otpResponseForm = otpResponseDoc.select("form[id=hiddenpost]").first();
        if (otpResponseForm == null) {
            final Element errorDiv = otpResponseDoc.select("div[id=error]").first();
            if (errorDiv == null) {
                throw new IOException(
                        "2nd-step authentication FAILED: Google did not return positive response form.");
            } else {
                throw new TwoPhaseAuthenticationRequiredException(getDivText(errorDiv),
                        GOOGLE_ANDROID_APPS_AUTHENTICATOR2_APP_ID);
            }
        }
    } finally {
        formPost.releaseConnection();
    }

    final HttpPost formCompletePost = JenkinsFormAuthHttpClient
            .getPostForm(JenkinsFormAuthHttpClient.getLatestRedirectedUrl(httpContext), otpResponseForm, null);
    try {
        final HttpResponse otpCompleteResponse = httpClient.execute(formCompletePost, httpContext);
        if (otpCompleteResponse.getStatusLine().getStatusCode() != HttpURLConnection.HTTP_MOVED_TEMP) {
            throw new IOException(
                    String.format("2nd-step authentication failed: Google returned HTTP-Status:%d %s",
                            otpCompleteResponse.getStatusLine().getStatusCode(),
                            otpCompleteResponse.getStatusLine().getReasonPhrase()));
        }

        return otpCompleteResponse.getFirstHeader("Location").getValue();
    } finally {
        formCompletePost.releaseConnection();
    }
}

From source file:net.poemerchant.scraper.ShopScraper.java

private String scrapeItemJSArray(Document doc) {
    Element scriptDataElem = doc.select("script").last(); // a with href
    String raw = scriptDataElem.data();
    raw = StringUtils.substringBetween(raw, "new R(", ")).run();");
    return raw;/*from   ww  w. j  a v a  2 s .c om*/
}

From source file:neembuu.release1.externalImpl.linkhandler.SaveVideoYoutubeLinkHandlerProvider.java

private BasicLinkHandler.Builder saveVideoExtraction(TrialLinkHandler tlh, int retryCount) throws Exception {
    String url = tlh.getReferenceLinkString();
    BasicLinkHandler.Builder linkHandlerBuilder = BasicLinkHandler.Builder.create();

    try {//  www .j av  a  2s.  c o m
        DefaultHttpClient httpClient = NHttpClient.getNewInstance();
        String requestUrl = "http://www.save-video.com/download.php?url=" + URLEncoder.encode(url, "UTF-8");

        final String responseString = NHttpClientUtils.getData(requestUrl, httpClient);

        //Set the group name as the name of the video
        String nameOfVideo = getVideoName(url);

        String fileName = "text";

        linkHandlerBuilder.setGroupName(nameOfVideo);

        long c_duration = -1;

        Document doc = Jsoup.parse(responseString);

        Elements elements = doc.select(".sv-download-links ul li a");

        for (Element element : elements) {
            String singleUrl = element.attr("href");

            if (!singleUrl.startsWith("DownloadFile.php")) {
                fileName = element.text();
                singleUrl = Utils.normalize(singleUrl);
                LOGGER.log(Level.INFO, "Normalized URL: {0}", singleUrl);
                long length = NHttpClientUtils.calculateLength(singleUrl, httpClient);

                //LOGGER.log(Level.INFO,"Length: " + length);

                if (length <= 0) {
                    continue;
                    /*skip this url*/ }

                BasicOnlineFile.Builder fileBuilder = linkHandlerBuilder.createFile();

                try { // finding video/audio length
                    //                        String dur = StringUtils.stringBetweenTwoStrings(singleUrl, "dur=", "&");
                    //                        long duration = (int)(Double.parseDouble(dur)*1000);
                    //                        if(c_duration < 0 ){ c_duration = duration; }
                    //                        fileBuilder.putLongPropertyValue(PropertyProvider.LongProperty.MEDIA_DURATION_IN_MILLISECONDS, duration);
                    //                        LOGGER.log(Level.INFO,"dur="+dur);
                } catch (NumberFormatException a) {
                    // ignore
                }

                try { // finding the quality short name
                    //                        String type = fileName.substring(fileName.indexOf("(")+1);
                    String type = fileName;
                    fileBuilder.putStringPropertyValue(PropertyProvider.StringProperty.VARIANT_DESCRIPTION,
                            type);
                    LOGGER.log(Level.INFO, "type={0}", type);
                } catch (Exception a) {
                    a.printStackTrace();
                }

                fileName = nameOfVideo + " " + fileName;

                fileBuilder.setName(fileName).setUrl(singleUrl).setSize(length).next();
            }
        }

        for (OnlineFile of : linkHandlerBuilder.getFiles()) {
            long dur = of.getPropertyProvider()
                    .getLongPropertyValue(PropertyProvider.LongProperty.MEDIA_DURATION_IN_MILLISECONDS);
            if (dur < 0 && c_duration > 0 && of.getPropertyProvider() instanceof BasicPropertyProvider) {
                ((BasicPropertyProvider) of.getPropertyProvider()).putLongPropertyValue(
                        PropertyProvider.LongProperty.MEDIA_DURATION_IN_MILLISECONDS, c_duration);
            }
        }

    } catch (Exception ex) {
        ex.printStackTrace();
    }

    return linkHandlerBuilder;
}

From source file:org.brunocvcunha.taskerbox.impl.jobs.DiceJobSeeker.java

@Override
protected void execute() throws Exception {
    try {/*from  w  w  w  .  j a va 2 s  . c  om*/
        for (int x = 1; x < this.maxPages; x++) {
            int uniqueCount = 0;

            // DefaultHttpClient client =
            // TaskerboxHttpBox.getInstance().buildNewHttpClient();
            String seekUrl = "http://www.dice.com/job/results?n=50&q=" + URLEncoder.encode(this.search) + "&o="
                    + (x * 50);
            logInfo(log, "... Seeking " + seekUrl);
            HttpEntity entity = TaskerboxHttpBox.getInstance().getEntityForURL(seekUrl);
            String result = TaskerboxHttpBox.getInstance().readResponseFromEntity(entity);

            if (result.contains("Sorry, no jobs were found that match your criteria")) {
                System.err.println("Busca encerrada.");
                this.bootstrapHttpClient(true);
                break;
                // return;
            }

            try {
                Document doc = Jsoup.parse(result);

                Elements el = doc.select("div#SRcolContainer").select("tr");

                for (val item : el) {
                    Elements jobEls = item.select("a");
                    if (jobEls.size() < 3) {
                        continue;
                    }

                    String url = jobEls.get(0).attr("href");
                    if (url.equals("")) {
                        continue;
                    }
                    url = "http://www.dice.com" + url;

                    if (url.contains("?src=")) {
                        url = url.substring(0, url.indexOf("?src="));
                    }

                    String jobTitle = jobEls.get(0).text();
                    String company = jobEls.get(1).text();
                    String location = jobEls.get(2).text();

                    // System.out.println("===============");
                    // System.out.println(item.html());
                    // System.out.println("===============");

                    if (!jobTitle.equalsIgnoreCase("Job Title")) {
                        handleJob(jobTitle, company, location, url);
                        uniqueCount++;
                    }

                }

                if (uniqueCount == 0) {
                    logInfo(log, "DICE BREAK -- NO UNIQUE COUNT");
                    break;
                }

                try {
                    Thread.sleep(10000L);
                } catch (InterruptedException e) {
                    e.printStackTrace();
                }
            } catch (Exception e) {
                e.printStackTrace();
            }
        }
    } catch (Exception e) {
        e.printStackTrace();
    }
}

From source file:free.rm.skytube.businessobjects.VideoStream.ParseStreamMetaData.java

private String getJplayerJsonErrorMessage(String pageContent) {
    StringBuilder errorMessage = new StringBuilder();

    try {/*from   w  w  w.ja v a 2s.c  o  m*/
        Document document = Jsoup.parse(pageContent, pageUrl);
        errorMessage.append(document.select("h1[id=\"unavailable-message\"]").first().text());
        errorMessage.append(": ");
        errorMessage.append(document.select("[id=\"unavailable-submessage\"]").first().text());
    } catch (Throwable tr) {
        Log.e(TAG, "Error has occurred while retrieving video availability status", tr);
        errorMessage = new StringBuilder(SkyTubeApp.getStr(R.string.error_stream_err_unavailable));
    }

    return errorMessage.toString();
}

From source file:org.eclipseplugins.impexeditor.core.utils.ImpexHttpClient.java

private String getCSrfToken(final String jSessionid) throws IOException {
    //<meta name="_csrf" content="c1dee1f7-8c79-43b1-8f3f-767662abc87a" />
    final Document doc = Jsoup.connect(hostName).cookie("JSESSIONID", jSessionid).get();
    final Elements csrfMetaElt = doc.select("meta[name=_csrf]");
    final String csrfToken = csrfMetaElt.attr("content");
    return csrfToken;

}

From source file:emily.command.fun.FMLCommand.java

private void getFMLItems() {
    try {/*from   w  w  w.j  a  v  a  2s .  c om*/
        Document document = Jsoup.connect("http://fmylife.com/random").timeout(30_000)
                .userAgent(BotConfig.USER_AGENT).get();
        if (document != null) {
            Elements fmls = document.select("p.block a[href^=/article/]");
            for (Element fml : fmls) {
                items.add(fml.text().trim());
            }
        }
    } catch (IOException e) {
        Launcher.logToDiscord(e, "fml-command", "boken");
    }

}