Example usage for org.jsoup.nodes Document select

List of usage examples for org.jsoup.nodes Document select

Introduction

In this page you can find the example usage for org.jsoup.nodes Document select.

Prototype

public Elements select(String cssQuery) 

Source Link

Document

Find elements that match the Selector CSS query, with this element as the starting context.

Usage

From source file:com.gote.downloader.kgs.KGSDownloader.java

/**
 * Try to found out if a game has been already played or by looking into archives page by page.
 * /*from  w w  w . j  av a2  s  .  co  m*/
 * @param pGame Game to found and update
 * @param pPlayerArchivePages List of archive pages
 */
private void retrieveAndUpdateGame(Game pGame, List<Document> pPlayerArchivePages) {
    stage = "Etape 3/3 - Rcupration de la partie";
    for (Document playerArchivePage : pPlayerArchivePages) {
        Elements tableRows = playerArchivePage.select("tr");

        for (Element row : tableRows) {
            if (Pattern.matches(regexGame, row.toString())) {
                // LOGGER.log(Level.INFO, "[TRACE] New row checked " + row.toString());

                // "Visible", "Blanc", "Noir", "Genre", "Debutee le", "Type", "Resultat"
                Elements tableCells = row.getElementsByTag("td");

                String gameUrl = isPublicGame(tableCells.get(GAMEURL));

                // May check with time if you can leave or continue
                if (gameUrl != null && !gameUrl.isEmpty()) {
                    if (gameUrl.toLowerCase().contains(pGame.getBlack().getPseudo().toLowerCase())
                            && gameUrl.toLowerCase().contains(pGame.getWhite().getPseudo().toLowerCase())) {
                        pGame.setGameUrl(gameUrl);
                        pGame.setResult(getStdResultFromKGSResult(tableCells.get(RESULT).text()));
                        File sgf = new File(AppUtil.PATH_TO_TOURNAMENTS + tournament.getTitle() + "/"
                                + AppUtil.PATH_TO_SGFS + tournament.getTitle().trim() + "_round"
                                + pGame.getBlack().getPseudo() + "_" + pGame.getWhite().getPseudo() + ".sgf");
                        try {
                            URL url = new URL(gameUrl);
                            FileUtils.copyURLToFile(url, sgf);
                        } catch (MalformedURLException e) {
                            log(Level.WARNING, "URL " + gameUrl + " malformee", e);
                        } catch (IOException e) {
                            log(Level.WARNING, "Erreur lors de l'ecriture du fichier", e);
                        }

                        // Leave the process
                        return;
                    }
                } else {
                    log(Level.INFO, "La partie " + tableCells
                            + " n'est pas visible ou un probleme a eu lieu lors de la recuperation de l'url");
                }
            }
        }
    }
}

From source file:org.jasig.portlet.proxy.search.AnchorSearchStrategy.java

@Override
public List<SearchResult> search(SearchRequest searchQuery, EventRequest request, Document document) {
    List<SearchResult> results = new ArrayList<SearchResult>();
    final String[] whitelistRegexes = request.getPreferences().getValues("anchorWhitelistRegex",
            new String[] {});
    String searchTerms = searchQuery.getSearchTerms().toLowerCase();

    Elements links = document.select("a[href]");
    for (Element link : links) {
        String linkUrl = link.attr("abs:href");
        for (String searchTerm : searchTerms.split(" ")) {
            if (link.text().toLowerCase().contains(searchTerm)) {
                log.debug("found a match, term: [" + searchTerm + "], anchor URL: [" + linkUrl
                        + "], anchor text: [" + link.text() + "]");
                SearchResult result = new SearchResult();
                result.setTitle(link.text());
                result.setSummary(link.text());

                PortletUrl pUrl = new PortletUrl();
                pUrl.setPortletMode(PortletMode.VIEW.toString());
                pUrl.setType(PortletUrlType.RENDER);
                pUrl.setWindowState(WindowState.MAXIMIZED.toString());
                PortletUrlParameter param = new PortletUrlParameter();
                param.setName("proxy.url");
                param.getValue().add(linkUrl);
                pUrl.getParam().add(param);

                new SearchUtil().updateUrls(linkUrl, request, whitelistRegexes);

                result.setPortletUrl(pUrl);
                results.add(result);//from   w  w  w.ja  v a2  s  .  c om
            }
        }
    }
    return results;
}

From source file:gov.medicaid.screening.dao.impl.MedicaidCertifiedProvidersDAOBean.java

/**
 * Retrieves all results from the source site.
 *
 * @param criteria the search criteria.// w  ww.j ava2 s. co  m
 * @return the providers matched
 * @throws URISyntaxException if the URL could not be correctly constructed
 * @throws IOException for any I/O related errors
 * @throws ServiceException for any other errors encountered
 */
private SearchResult<ProviderProfile> getAllResults(MedicaidCertifiedProviderSearchCriteria criteria)
        throws URISyntaxException, IOException, ServiceException {
    DefaultHttpClient client = new DefaultHttpClient();
    client.setRedirectStrategy(new LaxRedirectStrategy());

    // we need to get a token from the start page, this will be stored in the client
    HttpGet getFrontPage = new HttpGet(new URIBuilder(getSearchURL()).build());
    HttpResponse response = client.execute(getFrontPage);
    verifyAndAuditCall(getSearchURL(), response);
    EntityUtils.consume(response.getEntity()); // releases the connection

    // our client is now valid, pass the criteria to the search page
    String postSearchURL = Util.replaceLastURLPart(getSearchURL(), "showprovideroutput.cfm");
    HttpPost searchPage = new HttpPost(new URIBuilder(postSearchURL).build());
    HttpEntity entity = postForm(postSearchURL, client, searchPage,
            new String[][] { { "ProviderCatagory", criteria.getType() },
                    { "WhichArea", criteria.getCriteria() }, { "Submit", "Submit" },
                    { "SelectCounty", "All".equals(criteria.getCriteria()) ? "0" : criteria.getValue() },
                    { "CityToFind", "All".equals(criteria.getCriteria()) ? "" : criteria.getValue() },
                    { "ProviderToFind", "All".equals(criteria.getCriteria()) ? "" : criteria.getValue() } },
            true);

    // this now holds the search results, parse every row
    Document page = Jsoup.parse(EntityUtils.toString(entity));
    List<ProviderProfile> allProviders = new ArrayList<ProviderProfile>();
    Elements rows = page.select("div#body table tbody tr:gt(0)");
    for (Element row : rows) {
        ProviderProfile profile = parseProfile(row.children());
        if (profile != null) {
            allProviders.add(profile);
        }
    }

    SearchResult<ProviderProfile> results = new SearchResult<ProviderProfile>();
    results.setItems(allProviders);
    return results;
}

From source file:gov.medicaid.screening.dao.impl.MarriageAndFamilyTherapyLicenseDAOBean.java

/**
 * Performs a search for all possible results.
 *
 * @param criteria The search criteria./* ww w  .j  a  v  a2 s.c  o  m*/
 * @param identifier The value to be searched.
 * @param host The host where to perform search.
 * @param pageNumber The page number requested
 * @return the search result for licenses
 * @throws URISyntaxException When an error occurs while building the URL.
 * @throws IOException When an error occurs while parsing response.
 * @throws ParseException When an error occurs while parsing response.
 * @throws PersistenceException if any db related error is encountered
 * @throws ServiceException When an error occurs while trying to perform search.
 */
private SearchResult<License> getAllResults(String criteria, String identifier, String host, int pageNumber)
        throws URISyntaxException, ParseException, PersistenceException, IOException, ServiceException {
    HttpClient client = new DefaultHttpClient();
    URIBuilder builder = new URIBuilder(host).setPath("/search.asp");
    String hostId = builder.build().toString();

    builder.setParameter("qry", criteria).setParameter("crit", identifier).setParameter("p", "s")
            .setParameter("rsp", pageNumber + "");

    URI uri = builder.build();
    HttpGet httpget = new HttpGet(uri);

    SearchResult<License> searchResults = new SearchResult<License>();

    HttpResponse response = client.execute(httpget);
    int statusCode = response.getStatusLine().getStatusCode();
    if (statusCode == HttpStatus.SC_OK) {

        HttpEntity entity = response.getEntity();
        SearchResult<License> nextResults = null;
        // licenses list
        List<License> licenseList = new ArrayList<License>();
        if (entity != null) {
            String result = EntityUtils.toString(entity);
            Document document = Jsoup.parse(result);
            Elements trs = document.select("tr[bgcolor]");
            for (Element tr : trs) {
                Elements tds = tr.children();
                licenseList.add(parseLicenseInfo(tds));
            }
            // check if there is next page
            Element next = document.select("a:containsOwn(Next)").first();
            if (next != null) {
                nextResults = getAllResults(criteria, identifier, host, pageNumber + 1);
            }
            if (nextResults != null) {
                licenseList.addAll(nextResults.getItems());
            }
        }

        searchResults.setItems(licenseList);
    }
    verifyAndAuditCall(hostId, response);

    return searchResults;
}

From source file:net.devietti.ArchConfMapServlet.java

/**
 * Returns the URL of the external conference website (not the WikiCFP page) for the given
 * eventid.//from  www . j  a  v  a 2s.c  o  m
 */
private void getConfLink(HttpServletRequest req, HttpServletResponse resp) throws IOException {
    String eids = req.getParameter("eventid");
    if (eids == null) {
        error("missing required URL parameter: eventid");
        return;
    }
    Integer eid;
    try {
        eid = Integer.valueOf(eids);
    } catch (NumberFormatException e) {
        error(e.getMessage());
        return;
    }
    if (eid == null || eid == 0) {
        error("error parsing eventid");
        return;
    }

    // pull down the CFP
    Document cfp = getURL("http://www.wikicfp.com/cfp/servlet/event.showcfp?eventid=" + eids);

    for (Element a : cfp.select("tr td[align=center] a")) {
        Element td = a.parent();
        if (td.text().contains("Link:") && a.hasAttr("href") && a.attr("href").contains("http://")) {
            // got the link!
            resp.setContentType("application/json");
            resp.getWriter().println(GSON.toJson(a.attr("href")));
            return;
        }
    }

    error("no matching link");
}

From source file:io.seldon.importer.articles.dynamicextractors.AllElementsAttrValueDynamicExtractor.java

@Override
public String extract(AttributeDetail attributeDetail, String url, Document articleDoc) throws Exception {
    String attrib_value = null;/* ww  w  .  jav  a2 s  .c  om*/

    if ((attributeDetail.extractor_args != null) && (attributeDetail.extractor_args.size() >= 2)) {
        String cssSelector = attributeDetail.extractor_args.get(0);
        String attributeName = attributeDetail.extractor_args.get(1);
        Elements elements = articleDoc.select(cssSelector);
        if (StringUtils.isNotBlank(cssSelector)) {
            if (elements != null) {
                StringBuilder sb = new StringBuilder();
                boolean isFirstInList = true;
                for (Element e : elements) {
                    String eText = e.attr(attributeName);
                    eText = StringUtils.strip(eText);
                    eText = eText.toLowerCase();
                    if (StringUtils.isBlank(eText))
                        continue;
                    if (isFirstInList) {
                        isFirstInList = false;
                    } else {
                        sb.append(",");
                    }
                    sb.append(eText);
                }
                attrib_value = sb.toString();
            }
        }
    }

    return attrib_value;
}

From source file:com.shalzz.attendance.activity.LoginActivity.java

private Response.Listener<String> getHiddenDataSuccessListener() {
    return new Response.Listener<String>() {
        @Override//  w  w w  .j a  v  a2s.  c  o  m
        public void onResponse(String response) {

            Log.i(getClass().getName(), "Collected hidden data.");
            Document doc = Jsoup.parse(response);
            Log.i(getClass().getName(), "Parsing hidden data...");

            // Get Hidden values
            Elements hiddenvalues = doc.select("input[type=hidden]");
            for (Element hiddenvalue : hiddenvalues) {
                String name = hiddenvalue.attr("name");
                String val = hiddenvalue.attr("value");
                if (name.length() != 0 && val.length() != 0) {
                    data.put(name, val);
                }
            }
            Log.i(getClass().getName(), "Parsed hidden data.");
        }
    };
}

From source file:com.normalexception.app.rx8club.task.AdminTask.java

@Override
protected Void doInBackground(Void... params) {
    try {//from w  w  w.  j  av a 2  s  .co m
        Log.d(TAG, progressText.get(doType));

        if (this.doType == DELETE_THREAD) {
            HtmlFormUtils.adminTypePost(doType, token, thread, deleteResponse);
        } else
            HtmlFormUtils.adminTypePost(doType, token, thread, null);

        if (this.doType == MOVE_THREAD) {
            String response = HtmlFormUtils.getResponseUrl();
            Log.d(TAG, "Response: " + response);

            Document doc = Jsoup.parse(HtmlFormUtils.getResponseContent());

            threadTitle = HtmlFormUtils.getInputElementValueByName(doc, "title");
            Log.d(TAG, "Thread Title: " + threadTitle);

            Elements selects = doc.select("select[name=destforumid] > option");
            for (Element select : selects) {
                selectOptions.put(select.text(), Integer.parseInt(select.attr("value")));
            }

            Log.d(TAG, "Parsed " + selectOptions.keySet().size() + " options");
        }
    } catch (ClientProtocolException e) {
        Log.e(TAG, e.getMessage(), e);
    } catch (IOException e) {
        Log.e(TAG, e.getMessage(), e);
    }
    return null;
}

From source file:com.maxl.java.aips2xml.Aips2Xml.java

static String addHeaderToXml(String xml_str) {
    Document mDoc = Jsoup.parse("<kompendium>\n" + xml_str + "</kompendium>");
    mDoc.outputSettings().escapeMode(EscapeMode.xhtml);
    mDoc.outputSettings().prettyPrint(true);
    mDoc.outputSettings().indentAmount(4);

    // Add date//from   w  w w .j  av a2s .  c o m
    Date df = new Date();
    String date_str = df.toString();
    mDoc.select("kompendium").first().prependElement("date");
    mDoc.select("date").first().text(date_str);
    // Add language
    mDoc.select("date").after("<lang></lang>");
    if (DB_LANGUAGE.equals("de"))
        mDoc.select("lang").first().text("DE");
    else if (DB_LANGUAGE.equals("fr"))
        mDoc.select("lang").first().text("FR");

    // Fool jsoup.parse which seems to have its own "life" 
    mDoc.select("tbody").unwrap();
    Elements img_elems = mDoc.select("img");
    for (Element img_e : img_elems) {
        if (!img_e.hasAttr("src"))
            img_e.unwrap();
    }
    mDoc.select("img").tagName("image");

    String final_xml_str = mDoc.select("kompendium").first().outerHtml();

    return final_xml_str;
}