List of usage examples for org.jsoup.nodes Document select
public Elements select(String cssQuery)
From source file:com.msds.km.service.Impl.DrivingLicenseRecognitionServcieiImpl.java
/** * html???/*from www . j ava 2s . c o m*/ * * @param html * ??xml?java * @return * @throws Exception */ private DrivingLicense parseDrivingLicense(String html) throws Exception { if (html.isEmpty()) { logger.info(""); return null; } Document document = Jsoup.parse(html); if (document == null) { logger.info("html"); return null; } Elements fieldsets = document.select("div[class=left result] fieldset"); if (fieldsets.size() != 1) { logger.info("?"); return null; } Element regResult = fieldsets.first(); String result = regResult.html().trim(); // String removedStr = "<legend></legend>"; if (result.startsWith(removedStr)) { result = result.substring(removedStr.length()); } // ??xml result = StringEscapeUtils.unescapeXml(result); // result = "<drivingLicense>" + result + "</drivingLicense>"; return XMLUtils.toObject(result, DrivingLicense.class); }
From source file:gov.medicaid.screening.dao.impl.AccreditedBirthCentersLicenseDAOBean.java
/** * Retrieves all results from the source site. * /*from w w w .jav a 2 s. c o m*/ * @return the birth centers matched * @throws URISyntaxException * if the URL could not be correctly constructed * @throws IOException * for any I/O related errors * @throws ServiceException * for any other errors encountered */ private List<AccreditedBirthCenter> getAllResults() throws URISyntaxException, IOException, ServiceException { DefaultHttpClient client = new DefaultHttpClient(); client.setRedirectStrategy(new LaxRedirectStrategy()); HttpGet getFrontPage = new HttpGet(new URIBuilder(getSearchURL()).build()); HttpResponse response = client.execute(getFrontPage); verifyAndAuditCall(getSearchURL(), response); Document page = Jsoup.parse(EntityUtils.toString(response.getEntity())); List<AccreditedBirthCenter> allCenters = new ArrayList<AccreditedBirthCenter>(); Elements rows = page.select("table#wp-table-reloaded-id-1-no-1 tbody tr"); for (Element row : rows) { AccreditedBirthCenter center = parseCenter(row.children()); if (center != null) { allCenters.add(center); } } return allCenters; }
From source file:cd.go.contrib.elasticagents.dockerswarm.elasticagent.executors.AgentStatusReportExecutorTest.java
@Test public void shouldPrintMessageWhenLogIsNotAvailable() throws Exception { final Service service = mockedService("elastic-agent-id", "abcd-xyz"); when(statusReportRequest.getJobIdentifier()).thenReturn(null); when(statusReportRequest.getElasticAgentId()).thenReturn("elastic-agent-id"); when(client.listServices()).thenReturn(Arrays.asList(service)); when(client.serviceLogs("abcd-xyz", stdout(), stderr())).thenReturn(new StubbedLogStream("")); GoPluginApiResponse response = executor.execute(); assertThat(response.responseCode(), is(200)); final Map<String, String> responseMap = GSON.fromJson(response.responseBody(), new TypeToken<Map<String, String>>() { }.getType());//from ww w .j ava2s. c om assertTrue(responseMap.containsKey("view")); final Document document = Jsoup.parse(responseMap.get("view")); assertThat(document.select(".service-logs").text(), is("Logs not available for this agent.")); }
From source file:accountgen.controller.Controller.java
private void setEmail(Document doc, Person p) { Element em = doc.select(".email").first(); p.setEmail(em.text().split(" ")[0]); }
From source file:accountgen.controller.Controller.java
private void setPhone(Document doc, Person p) { Element tel = doc.select(".tel").first(); p.setPhone(tel.text()); }
From source file:com.webcrawler.manager.impl.ImageManagerImpl.java
@Override public List<ImageDTO> getImageData(final String url) throws IOException, IllegalArgumentException, InterruptedException, ExecutionException { if (url == null || url.equals("")) { throw new IllegalArgumentException("Set URL first"); }/* w w w. j ava 2 s .c o m*/ Callable<List<ImageDTO>> callable = new Callable<List<ImageDTO>>() { @Override public List<ImageDTO> call() throws Exception { System.out.println("Retrieving image data from url " + url); Document document = null; Elements media = null; List<ImageDTO> images = new ArrayList<ImageDTO>(); try { document = Jsoup.connect(url).get(); media = document.select("[src]"); } catch (Exception e) { e.printStackTrace(); return images; } System.out.println("# of images: " + media.size()); for (Element src : media) { if (src.tagName().equals("img")) { ImageDTO dto = new ImageDTO(); dto.setUrlAddress(src.attr("abs:src")); dto.setFileName(getFileName(src.attr("abs:src"))); images.add(dto); } } return images; } }; Future<List<ImageDTO>> result = executorService.submit(callable); return result.get(); }
From source file:accountgen.controller.Controller.java
private void setGender(Document doc, Person p) { Element gen = doc.select(".bcs").first().select(".content").first().select("img").first(); String g = gen.attr("alt"); p.setGender(g);/*ww w . j a v a 2s. c o m*/ }
From source file:com.mycompany.grabberrasskazov.threads.ThreadForPageSave.java
public void indexStory(String pageUrl) { try {/* ww w .jav a2 s . c o m*/ String oldId = pageUrl.replace(GlobalVars.mainSite, ""); if (!mainBean.storyExists(oldId)) { Stories r = new Stories(); Document doc = Jsoup.connect(pageUrl) .userAgent("Opera/9.80 (X11; Linux x86_64) " + "Presto/2.12.388 Version/12.16").get(); Elements nameBlockElements = doc.select("b:containsOwn(?)"); Element nameBlock = nameBlockElements.get(0); nameBlock = nameBlock.parent().parent(); nameBlockElements = nameBlock.select("td:eq(1)"); nameBlock = nameBlockElements.get(0); String storyName = nameBlock.text(); r.setStoryName(storyName); // Start of processing writer Elements writerBlockElements = doc.select("b:containsOwn(?:)"); Element writerBlock = writerBlockElements.get(0); writerBlock = writerBlock.parent().parent(); writerBlockElements = writerBlock.select("td:eq(1)"); writerBlock = writerBlockElements.get(0); String writersUrl = writerBlock.select("a:eq(0)").attr("href"); String writersName = writerBlock.select("a:eq(0)").text(); String writersContacts = writerBlock.select("a:eq(1)").attr("href"); StoryWriters storyWriter = new StoryWriters(); storyWriter.setOldId(writersUrl); storyWriter.setWriterEmail(writersContacts); storyWriter.setWriterName(writersName); storyWriter = mainBean.saveWriter(storyWriter); Set<StoriesToWritersRelations> storiesToWritersRelationses = new HashSet<StoriesToWritersRelations>(); StoriesToWritersRelations storiesToWritersRelations = new StoriesToWritersRelations(); storiesToWritersRelations.setStories(r); storiesToWritersRelations.setStoryWriters(storyWriter); r.setStoriesToWritersRelationses(storiesToWritersRelationses); // End of processing writer Set<StoriesToCategoriessRelations> catsRelationses = new HashSet<>(); Elements katsInfo = doc.select("a[href*=ras.shtml?kat]"); for (Element kat : katsInfo) { String katId = kat.attr("href"); StoryCategories cat = mainBean.getCat(katId); StoriesToCategoriessRelations catsRelations = new StoriesToCategoriessRelations(); catsRelations.setStoryCategories(cat); catsRelations.setStories(r); catsRelationses.add(catsRelations); } r.setStoriesToCategoriessRelationses(catsRelationses); Elements textBlocks = doc.select("p[align=justify]"); Element textBlock = textBlocks.get(0); String textStr = textBlock.html(); r.setStoryText(textStr.replace("\"", "'")); r.setOldId(oldId); mainBean.saveStory(r); } } catch (IOException ex) { ex.printStackTrace(); } }
From source file:accountgen.controller.Controller.java
private void setGEO(Document doc, Person p) { String geo_x = doc.select("#geo").first().text().split(", ")[0]; String geo_y = doc.select("#geo").first().text().split(", ")[1]; p.setGEOX(geo_x);//www. j a v a 2 s . co m p.setGEOY(geo_y); }
From source file:com.johan.vertretungsplan.parser.UntisInfoParser.java
@Override public Vertretungsplan getVertretungsplan() throws IOException, JSONException { new LoginHandler(schule).handleLogin(executor, cookieStore, username, password); Document navbarDoc = Jsoup.parse(getNavbarDoc().replace(" ", "")); Element select = navbarDoc.select("select[name=week]").first(); Vertretungsplan v = new Vertretungsplan(); List<VertretungsplanTag> tage = new ArrayList<VertretungsplanTag>(); String info = navbarDoc.select(".description").text(); String stand;/*from ww w.j av a 2 s .c o m*/ try { stand = info.substring(info.indexOf("Stand:")); } catch (Exception e) { stand = ""; } for (Element option : select.children()) { String week = option.attr("value"); String letter = data.optString("letter", "w"); if (data.optBoolean("single_classes", false)) { int classNumber = 1; for (String klasse : getAllClasses()) { String paddedNumber = String.format("%05d", classNumber); String url; if (data.optBoolean("w_after_number", false)) url = baseUrl + "/" + week + "/" + letter + "/" + letter + paddedNumber + ".htm"; else url = baseUrl + "/" + letter + "/" + week + "/" + letter + paddedNumber + ".htm"; Document doc = Jsoup.parse(httpGet(url, schule.getData().getString("encoding"))); Elements days = doc.select("#vertretung > p > b, #vertretung > b"); for (Element day : days) { VertretungsplanTag tag = getTagByDatum(tage, day.text()); tag.setStand(stand); tag.setDatum(day.text()); Element next = null; if (day.parent().tagName().equals("p")) { next = day.parent().nextElementSibling().nextElementSibling(); } else next = day.parent().select("p").first().nextElementSibling(); if (next.className().equals("subst")) { //Vertretungstabelle if (next.text().contains("Vertretungen sind nicht freigegeben")) continue; parseVertretungsplanTable(next, data, tag); } else { //Nachrichten parseNachrichten(next, data, tag); next = next.nextElementSibling().nextElementSibling(); parseVertretungsplanTable(next, data, tag); } writeTagByDatum(tage, tag); } classNumber++; } } else { String url; if (data.optBoolean("w_after_number", false)) url = baseUrl + "/" + week + "/" + letter + "/" + letter + "00000.htm"; else url = baseUrl + "/" + letter + "/" + week + "/" + letter + "00000.htm"; Document doc = Jsoup.parse(httpGet(url, schule.getData().getString("encoding"))); Elements days = doc.select("#vertretung > p > b, #vertretung > b"); for (Element day : days) { VertretungsplanTag tag = getTagByDatum(tage, day.text()); tag.setStand(stand); tag.setDatum(day.text()); Element next = null; if (day.parent().tagName().equals("p")) { next = day.parent().nextElementSibling().nextElementSibling(); } else next = day.parent().select("p").first().nextElementSibling(); if (next.className().equals("subst")) { //Vertretungstabelle if (next.text().contains("Vertretungen sind nicht freigegeben")) continue; parseVertretungsplanTable(next, data, tag); } else { //Nachrichten parseNachrichten(next, data, tag); next = next.nextElementSibling().nextElementSibling(); parseVertretungsplanTable(next, data, tag); } tage.add(tag); } } v.setTage(tage); } return v; }