List of usage examples for org.jsoup.nodes Document select
public Elements select(String cssQuery)
From source file:com.gote.downloader.kgs.KGSDownloader.java
/** * Try to found out if a game has been already played or by looking into archives page by page. * /*from w w w . j av a2 s . co m*/ * @param pGame Game to found and update * @param pPlayerArchivePages List of archive pages */ private void retrieveAndUpdateGame(Game pGame, List<Document> pPlayerArchivePages) { stage = "Etape 3/3 - Rcupration de la partie"; for (Document playerArchivePage : pPlayerArchivePages) { Elements tableRows = playerArchivePage.select("tr"); for (Element row : tableRows) { if (Pattern.matches(regexGame, row.toString())) { // LOGGER.log(Level.INFO, "[TRACE] New row checked " + row.toString()); // "Visible", "Blanc", "Noir", "Genre", "Debutee le", "Type", "Resultat" Elements tableCells = row.getElementsByTag("td"); String gameUrl = isPublicGame(tableCells.get(GAMEURL)); // May check with time if you can leave or continue if (gameUrl != null && !gameUrl.isEmpty()) { if (gameUrl.toLowerCase().contains(pGame.getBlack().getPseudo().toLowerCase()) && gameUrl.toLowerCase().contains(pGame.getWhite().getPseudo().toLowerCase())) { pGame.setGameUrl(gameUrl); pGame.setResult(getStdResultFromKGSResult(tableCells.get(RESULT).text())); File sgf = new File(AppUtil.PATH_TO_TOURNAMENTS + tournament.getTitle() + "/" + AppUtil.PATH_TO_SGFS + tournament.getTitle().trim() + "_round" + pGame.getBlack().getPseudo() + "_" + pGame.getWhite().getPseudo() + ".sgf"); try { URL url = new URL(gameUrl); FileUtils.copyURLToFile(url, sgf); } catch (MalformedURLException e) { log(Level.WARNING, "URL " + gameUrl + " malformee", e); } catch (IOException e) { log(Level.WARNING, "Erreur lors de l'ecriture du fichier", e); } // Leave the process return; } } else { log(Level.INFO, "La partie " + tableCells + " n'est pas visible ou un probleme a eu lieu lors de la recuperation de l'url"); } } } } }
From source file:org.jasig.portlet.proxy.search.AnchorSearchStrategy.java
@Override public List<SearchResult> search(SearchRequest searchQuery, EventRequest request, Document document) { List<SearchResult> results = new ArrayList<SearchResult>(); final String[] whitelistRegexes = request.getPreferences().getValues("anchorWhitelistRegex", new String[] {}); String searchTerms = searchQuery.getSearchTerms().toLowerCase(); Elements links = document.select("a[href]"); for (Element link : links) { String linkUrl = link.attr("abs:href"); for (String searchTerm : searchTerms.split(" ")) { if (link.text().toLowerCase().contains(searchTerm)) { log.debug("found a match, term: [" + searchTerm + "], anchor URL: [" + linkUrl + "], anchor text: [" + link.text() + "]"); SearchResult result = new SearchResult(); result.setTitle(link.text()); result.setSummary(link.text()); PortletUrl pUrl = new PortletUrl(); pUrl.setPortletMode(PortletMode.VIEW.toString()); pUrl.setType(PortletUrlType.RENDER); pUrl.setWindowState(WindowState.MAXIMIZED.toString()); PortletUrlParameter param = new PortletUrlParameter(); param.setName("proxy.url"); param.getValue().add(linkUrl); pUrl.getParam().add(param); new SearchUtil().updateUrls(linkUrl, request, whitelistRegexes); result.setPortletUrl(pUrl); results.add(result);//from w w w.ja v a2 s . c om } } } return results; }
From source file:gov.medicaid.screening.dao.impl.MedicaidCertifiedProvidersDAOBean.java
/** * Retrieves all results from the source site. * * @param criteria the search criteria.// w ww.j ava2 s. co m * @return the providers matched * @throws URISyntaxException if the URL could not be correctly constructed * @throws IOException for any I/O related errors * @throws ServiceException for any other errors encountered */ private SearchResult<ProviderProfile> getAllResults(MedicaidCertifiedProviderSearchCriteria criteria) throws URISyntaxException, IOException, ServiceException { DefaultHttpClient client = new DefaultHttpClient(); client.setRedirectStrategy(new LaxRedirectStrategy()); // we need to get a token from the start page, this will be stored in the client HttpGet getFrontPage = new HttpGet(new URIBuilder(getSearchURL()).build()); HttpResponse response = client.execute(getFrontPage); verifyAndAuditCall(getSearchURL(), response); EntityUtils.consume(response.getEntity()); // releases the connection // our client is now valid, pass the criteria to the search page String postSearchURL = Util.replaceLastURLPart(getSearchURL(), "showprovideroutput.cfm"); HttpPost searchPage = new HttpPost(new URIBuilder(postSearchURL).build()); HttpEntity entity = postForm(postSearchURL, client, searchPage, new String[][] { { "ProviderCatagory", criteria.getType() }, { "WhichArea", criteria.getCriteria() }, { "Submit", "Submit" }, { "SelectCounty", "All".equals(criteria.getCriteria()) ? "0" : criteria.getValue() }, { "CityToFind", "All".equals(criteria.getCriteria()) ? "" : criteria.getValue() }, { "ProviderToFind", "All".equals(criteria.getCriteria()) ? "" : criteria.getValue() } }, true); // this now holds the search results, parse every row Document page = Jsoup.parse(EntityUtils.toString(entity)); List<ProviderProfile> allProviders = new ArrayList<ProviderProfile>(); Elements rows = page.select("div#body table tbody tr:gt(0)"); for (Element row : rows) { ProviderProfile profile = parseProfile(row.children()); if (profile != null) { allProviders.add(profile); } } SearchResult<ProviderProfile> results = new SearchResult<ProviderProfile>(); results.setItems(allProviders); return results; }
From source file:gov.medicaid.screening.dao.impl.MarriageAndFamilyTherapyLicenseDAOBean.java
/** * Performs a search for all possible results. * * @param criteria The search criteria./* ww w .j a v a2 s.c o m*/ * @param identifier The value to be searched. * @param host The host where to perform search. * @param pageNumber The page number requested * @return the search result for licenses * @throws URISyntaxException When an error occurs while building the URL. * @throws IOException When an error occurs while parsing response. * @throws ParseException When an error occurs while parsing response. * @throws PersistenceException if any db related error is encountered * @throws ServiceException When an error occurs while trying to perform search. */ private SearchResult<License> getAllResults(String criteria, String identifier, String host, int pageNumber) throws URISyntaxException, ParseException, PersistenceException, IOException, ServiceException { HttpClient client = new DefaultHttpClient(); URIBuilder builder = new URIBuilder(host).setPath("/search.asp"); String hostId = builder.build().toString(); builder.setParameter("qry", criteria).setParameter("crit", identifier).setParameter("p", "s") .setParameter("rsp", pageNumber + ""); URI uri = builder.build(); HttpGet httpget = new HttpGet(uri); SearchResult<License> searchResults = new SearchResult<License>(); HttpResponse response = client.execute(httpget); int statusCode = response.getStatusLine().getStatusCode(); if (statusCode == HttpStatus.SC_OK) { HttpEntity entity = response.getEntity(); SearchResult<License> nextResults = null; // licenses list List<License> licenseList = new ArrayList<License>(); if (entity != null) { String result = EntityUtils.toString(entity); Document document = Jsoup.parse(result); Elements trs = document.select("tr[bgcolor]"); for (Element tr : trs) { Elements tds = tr.children(); licenseList.add(parseLicenseInfo(tds)); } // check if there is next page Element next = document.select("a:containsOwn(Next)").first(); if (next != null) { nextResults = getAllResults(criteria, identifier, host, pageNumber + 1); } if (nextResults != null) { licenseList.addAll(nextResults.getItems()); } } searchResults.setItems(licenseList); } verifyAndAuditCall(hostId, response); return searchResults; }
From source file:net.devietti.ArchConfMapServlet.java
/** * Returns the URL of the external conference website (not the WikiCFP page) for the given * eventid.//from www . j a v a 2s.c o m */ private void getConfLink(HttpServletRequest req, HttpServletResponse resp) throws IOException { String eids = req.getParameter("eventid"); if (eids == null) { error("missing required URL parameter: eventid"); return; } Integer eid; try { eid = Integer.valueOf(eids); } catch (NumberFormatException e) { error(e.getMessage()); return; } if (eid == null || eid == 0) { error("error parsing eventid"); return; } // pull down the CFP Document cfp = getURL("http://www.wikicfp.com/cfp/servlet/event.showcfp?eventid=" + eids); for (Element a : cfp.select("tr td[align=center] a")) { Element td = a.parent(); if (td.text().contains("Link:") && a.hasAttr("href") && a.attr("href").contains("http://")) { // got the link! resp.setContentType("application/json"); resp.getWriter().println(GSON.toJson(a.attr("href"))); return; } } error("no matching link"); }
From source file:io.seldon.importer.articles.dynamicextractors.AllElementsAttrValueDynamicExtractor.java
@Override public String extract(AttributeDetail attributeDetail, String url, Document articleDoc) throws Exception { String attrib_value = null;/* ww w . jav a2 s .c om*/ if ((attributeDetail.extractor_args != null) && (attributeDetail.extractor_args.size() >= 2)) { String cssSelector = attributeDetail.extractor_args.get(0); String attributeName = attributeDetail.extractor_args.get(1); Elements elements = articleDoc.select(cssSelector); if (StringUtils.isNotBlank(cssSelector)) { if (elements != null) { StringBuilder sb = new StringBuilder(); boolean isFirstInList = true; for (Element e : elements) { String eText = e.attr(attributeName); eText = StringUtils.strip(eText); eText = eText.toLowerCase(); if (StringUtils.isBlank(eText)) continue; if (isFirstInList) { isFirstInList = false; } else { sb.append(","); } sb.append(eText); } attrib_value = sb.toString(); } } } return attrib_value; }
From source file:com.shalzz.attendance.activity.LoginActivity.java
private Response.Listener<String> getHiddenDataSuccessListener() { return new Response.Listener<String>() { @Override// w w w .j a v a2s. c o m public void onResponse(String response) { Log.i(getClass().getName(), "Collected hidden data."); Document doc = Jsoup.parse(response); Log.i(getClass().getName(), "Parsing hidden data..."); // Get Hidden values Elements hiddenvalues = doc.select("input[type=hidden]"); for (Element hiddenvalue : hiddenvalues) { String name = hiddenvalue.attr("name"); String val = hiddenvalue.attr("value"); if (name.length() != 0 && val.length() != 0) { data.put(name, val); } } Log.i(getClass().getName(), "Parsed hidden data."); } }; }
From source file:com.normalexception.app.rx8club.task.AdminTask.java
@Override protected Void doInBackground(Void... params) { try {//from w w w. j av a 2 s .co m Log.d(TAG, progressText.get(doType)); if (this.doType == DELETE_THREAD) { HtmlFormUtils.adminTypePost(doType, token, thread, deleteResponse); } else HtmlFormUtils.adminTypePost(doType, token, thread, null); if (this.doType == MOVE_THREAD) { String response = HtmlFormUtils.getResponseUrl(); Log.d(TAG, "Response: " + response); Document doc = Jsoup.parse(HtmlFormUtils.getResponseContent()); threadTitle = HtmlFormUtils.getInputElementValueByName(doc, "title"); Log.d(TAG, "Thread Title: " + threadTitle); Elements selects = doc.select("select[name=destforumid] > option"); for (Element select : selects) { selectOptions.put(select.text(), Integer.parseInt(select.attr("value"))); } Log.d(TAG, "Parsed " + selectOptions.keySet().size() + " options"); } } catch (ClientProtocolException e) { Log.e(TAG, e.getMessage(), e); } catch (IOException e) { Log.e(TAG, e.getMessage(), e); } return null; }
From source file:com.maxl.java.aips2xml.Aips2Xml.java
static String addHeaderToXml(String xml_str) { Document mDoc = Jsoup.parse("<kompendium>\n" + xml_str + "</kompendium>"); mDoc.outputSettings().escapeMode(EscapeMode.xhtml); mDoc.outputSettings().prettyPrint(true); mDoc.outputSettings().indentAmount(4); // Add date//from w w w .j av a2s . c o m Date df = new Date(); String date_str = df.toString(); mDoc.select("kompendium").first().prependElement("date"); mDoc.select("date").first().text(date_str); // Add language mDoc.select("date").after("<lang></lang>"); if (DB_LANGUAGE.equals("de")) mDoc.select("lang").first().text("DE"); else if (DB_LANGUAGE.equals("fr")) mDoc.select("lang").first().text("FR"); // Fool jsoup.parse which seems to have its own "life" mDoc.select("tbody").unwrap(); Elements img_elems = mDoc.select("img"); for (Element img_e : img_elems) { if (!img_e.hasAttr("src")) img_e.unwrap(); } mDoc.select("img").tagName("image"); String final_xml_str = mDoc.select("kompendium").first().outerHtml(); return final_xml_str; }