List of usage examples for org.jsoup.nodes Document select
public Elements select(String cssQuery)
From source file:io.seldon.importer.articles.dynamicextractors.FirstElementAttrUppercaseValueDynamicExtractor.java
@Override public String extract(AttributeDetail attributeDetail, String url, Document articleDoc) throws Exception { String attrib_value = null;//from w ww. j a v a 2s.c om if ((attributeDetail.extractor_args != null) && (attributeDetail.extractor_args.size() >= 2)) { String cssSelector = attributeDetail.extractor_args.get(0); Element element = articleDoc.select(cssSelector).first(); if (StringUtils.isNotBlank(cssSelector)) { int arg_count = 0; for (String value_name : attributeDetail.extractor_args) { if (arg_count > 0) { // skip the first one, its the cssSelector if (element != null && element.attr(value_name) != null) { attrib_value = element.attr(value_name); if (StringUtils.isNotBlank(attrib_value)) { break; } } } arg_count++; } } } attrib_value = (attrib_value != null) ? attrib_value.toUpperCase() : attrib_value; return attrib_value; }
From source file:com.example.android.expandingcells.ExpandingCells.java
public void getHoroscope() { AsyncHttpClient client = new AsyncHttpClient(); client.get(// w w w . j av a 2 s.c o m "http://pipes.yahoo.com/pipes/pipe.run?_id=_omfgXdL3BGGadhGdrq02Q&_render=json&sign=Virgo&url=http%3A%2F%2Fwww.astrology.com%2Fhoroscopes%2Fdaily-horoscope.rss", new JsonHttpResponseHandler() { @Override public void onSuccess(JSONObject response) { JSONArray horoscopeJsonResults = null; try { horoscopeJsonResults = response.getJSONObject("value").getJSONArray("items"); // Pass the index value based on what sunsign, here, 0->Aries, 1->Taurus ... etc String horoscopeFullString = horoscopeJsonResults.getJSONObject(0) .getString("description").toString(); Document doc = Jsoup.parse(horoscopeFullString); Element p = doc.select("p").first(); horoscopeText = p.text(); Horo_Image = "drawable://" + R.drawable.mb_horoscope; Horo = "Daily Horoscope for " + CommonLib.findZodiacSign("9", "6") + ": \n" + horoscopeText; Log.d("DEBUG", Horo); Log.d("DEBUG", "Horo"); NewsList.get(1).setNews(Horo_Image, "Horoscope", Horo); adapter.notifyDataSetChanged(); Log.d("DEBUG", "Horo1"); adapter.notifyDataSetChanged(); //itemList.add(new Bytes(Horo_Image,"Horoscope", Horo)); // Replace this with birth date //todoAdapter.add("Daily Horoscope for " + CommonLib.findZodiacSign("12", "6") + ": \n" + horoscopeText); } catch (JSONException e) { e.printStackTrace(); Log.d("DEBUG", "pipes"); } } }); }
From source file:net.niyonkuru.koodroid.html.SubscribersHandler.java
@Override public ArrayList<ContentProviderOperation> parse(Document doc, ContentResolver resolver) throws HandlerException { final ArrayList<ContentProviderOperation> batch = new ArrayList<ContentProviderOperation>(); Element subscriberLi = doc.select("div#banSelector li:has(div)").first(); while (subscriberLi != null) { String text = subscriberLi.text(); /* this assumes the name and phone number are separated by a space */ int separator = text.lastIndexOf(' ') + 1; String subscriberId = text.substring(separator).replaceAll("\\D", ""); if (subscriberId.length() != 10) throw new HandlerException(getString(R.string.parser_error_unexpected_input)); final ContentProviderOperation.Builder builder; final Uri subscriberUri = Subscribers.buildSubscriberUri(subscriberId); if (subscriberExists(subscriberUri, resolver)) { builder = ContentProviderOperation.newUpdate(subscriberUri); builder.withValue(Subscribers.UPDATED, System.currentTimeMillis()); } else {//from w w w.j a v a 2s. c o m builder = ContentProviderOperation.newInsert(Subscribers.CONTENT_URI); } builder.withValue(Subscribers.SUBSCRIBER_ID, subscriberId); String fullName = ""; String[] names = text.substring(0, separator).split("\\s"); for (String name : names) { fullName += ParserUtils.capitalize(name) + " "; } builder.withValue(Subscribers.SUBSCRIBER_FULL_NAME, fullName.trim()); if (subscriberLi.hasAttr("onClick")) { String switchUrl = subscriberLi.attr("onClick"); /* extract only the url */ switchUrl = switchUrl.substring(switchUrl.indexOf('/'), switchUrl.lastIndexOf('\'')); builder.withValue(Subscribers.SUBSCRIBER_SWITCHER, switchUrl); } else { /* this is the default subscriber as it doesn't have a switcher url */ ContentValues cv = new ContentValues(1); cv.put(Settings.SUBSCRIBER, subscriberId); resolver.insert(Settings.CONTENT_URI, cv); } builder.withValue(Subscribers.SUBSCRIBER_EMAIL, mParent); batch.add(builder.build()); subscriberLi = subscriberLi.nextElementSibling(); } if (batch.size() == 0) throw new HandlerException(getString(R.string.parser_error_unexpected_input)); JSONObject metadata = new JSONObject(); try { metadata.put("subscribers", batch.size()); metadata.put("language", getString(R.string.locale)); } catch (JSONException ignored) { } Crittercism.setMetadata(metadata); Crittercism.setUsername(mParent); return batch; }
From source file:io.seldon.importer.articles.dynamicextractors.AllElementsTextListValueDynamicExtractor.java
@Override public String extract(AttributeDetail attributeDetail, String url, Document articleDoc) throws Exception { String attrib_value = null;//from w w w . j a va2 s. com if ((attributeDetail.extractor_args != null) && (attributeDetail.extractor_args.size() >= 1)) { String cssSelector = attributeDetail.extractor_args.get(0); Elements elements = articleDoc.select(cssSelector); if (StringUtils.isNotBlank(cssSelector)) { if (elements != null) { StringBuilder sb = new StringBuilder(); boolean isFirstInList = true; for (Element e : elements) { String eText = e.text(); eText = StringUtils.strip(eText); if (StringUtils.isBlank(eText)) continue; eText = eText.toLowerCase(); if (isFirstInList) { isFirstInList = false; } else { sb.append(","); } sb.append(eText); } attrib_value = sb.toString(); } } } return attrib_value; }
From source file:com.aestasit.markdown.slidery.converters.TextTemplateConverter.java
protected void transformDocument(final Document slidesDocument, final Configuration config) { if (!config.notesIncluded()) { for (Element notesElement : slidesDocument.select("aside")) { notesElement.remove();//from www .j a va 2s . c o m } } if ("true".equals(config.getOption("renderSyntaxHighlighting"))) { renderSyntaxHighlightingHtml(slidesDocument, config); } }
From source file:coding.cowboys.scrapers.DvcMagicResalesScraper.java
public List<ResortWrapper> findResorts() { List<ResortWrapper> wrappers = new ArrayList<ResortWrapper>(); Document doc = null; try {/* w ww .j a va 2s. c o m*/ doc = Jsoup.connect(SiteUrls.DVC_MAGIC_RESALES).timeout(60000).get(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } if (doc != null) { for (Element element : doc.select("table#listALL")) { for (Element row : element.select("tr")) { if (row.hasClass("stat-Active")) { ResortWrapper wrapper = new ResortWrapper(); Elements data = row.select("td"); wrapper.setResort(getResortFromText(data.get(0).text())); wrapper.setUseYear(data.get(1).text()); wrapper.setPoints(data.get(2).text()); wrapper.setPricePerPoint(data.get(3).text()); wrapper.setTotalPrice(data.get(4).text()); wrapper.setPointSummary(data.get(0).text().replace(wrapper.getResort(), "")); wrapper.setUrl("http://www.dvcmagicresales.com/dvcmr/resales-all-listings/"); wrappers.add(wrapper); } } } } else { System.out.println("DVC Magic Resales returned null"); } return wrappers; }
From source file:com.aurel.track.exchange.docx.exporter.PreprocessImage.java
/** * Gets the image captions in a map keyed by itemID_attachmentID * The key is saved also in the <img> tag's "alt" attribute for later use from word * @param doc/*ww w .j a v a 2s . co m*/ * @param personID * @param imageCaptionsMap * @return */ private String getImageCaptions(Document doc, Integer personID, Map<String, ImageOrTableCaption> imageCaptionsMap) { Elements imgElements = doc.select("img"); if (imgElements != null) { for (Iterator<Element> iterator = imgElements.iterator(); iterator.hasNext();) { Element imageElement = iterator.next(); String sourceAttribute = imageElement.attr("src"); String style = imageElement.attr("style"); //remove the width and height attributes from html img to avoid java.lang.OutOfMemoryError: Java heap space imageElement.removeAttr("width"); imageElement.removeAttr("height"); ALIGN align = null; if (style != null) { if (style.contains("float:left")) { align = ALIGN.LEFT; } else { if (style.contains("float:right")) { align = ALIGN.RIGHT; } } } String altAttribute = imageElement.attr("alt"); Map<String, String> map = getTemporaryFilePathMap(sourceAttribute, personID); if (map != null) { imageElement.attr("src", map.get("temporaryFilePath")); //save imageCaption into the map and now use the "alt" attribute for storing the merged key //which will be transformed in nonvisualdrawingprops.getDescr() by XHTMLImporterImpl to set the caption on the ms word side String imageCaption = null; if (altAttribute != null && !"".equals(altAttribute)) { //probably from previously removed figcaption but it may also be explicitly set imageCaption = altAttribute; } else { imageCaption = map.get("description"); } globalCounter++; counterWithinChapter++; imageElement.attr("alt", String.valueOf(globalCounter)); if (imageCaption == null) { //add anyway to the map even as empty string because this marks the image to be added to the List of figures imageCaption = ""; } imageCaptionsMap.put(String.valueOf(globalCounter), new ImageOrTableCaption(chapterNo, counterWithinChapter, imageCaption, align)); } } } return doc.body().html(); }
From source file:accountgen.controller.Controller.java
private void getListTags(Document doc, Person p) { Elements li = doc.select(".extra").select("li:not(.lab)"); p.setUsername(li.get(2).text());/*from w w w . ja va 2 s .c om*/ p.setPassword(li.get(3).text()); p.setMmn(li.get(4).text()); p.setMastercard(li.get(6).text()); p.setSsn(""); Date d = new Date(); d.setDate(1); d.setYear(Integer.parseInt(li.get(7).text().split("/")[1]) - 1900); d.setMonth(Integer.parseInt(li.get(7).text().split("/")[0]) - 1); p.setExpires(d); p.setCvv2(li.get(8).text()); p.setFavoritecolor(li.get(9).text()); p.setOccupation(li.get(10).text()); p.setCompany(li.get(11).text()); p.setWebsite(li.get(12).text()); Vehicle v = new Vehicle(); v.setModel(li.get(13).text().split(" ")[li.get(13).text().split(" ").length - 1].trim()); v.setYear(Integer.parseInt(li.get(13).text().split(" ")[0].trim())); v.setBrand( li.get(13).text().replace(li.get(13).text().split(" ")[li.get(13).text().split(" ").length - 1], "") .replace(li.get(13).text().split(" ")[0], "").trim()); p.setVehicle(v); p.setUpsnr(li.get(14).text()); p.setBloodtype(li.get(15).text()); p.setWeight(li.get(16).text().split("\\(")[1].split(" ")[0]); p.setHeight(li.get(17).text().split("\\(")[1].split(" ")[0]); p.setGuid(li.get(18).text()); }
From source file:com.amastigote.xdu.query.module.WaterAndElectricity.java
private void getPageAttributes(String host_suffix) throws IOException { Document document = getPage("", host_suffix); Elements elements_VIEWSTATE = document.select("input[type=\"hidden\"][name=\"__VIEWSTATE\"]"); VIEWSTATE = elements_VIEWSTATE.get(0).attr("value"); }
From source file:com.aestheticsw.jobkeywords.service.termextractor.impl.indeed.IndeedClient.java
/** * Return the job-details sub-section of the HTML that Indeed returns. This method takes a URL * that Indeed returns for each JobSummary returned by getIndeedJobSummaryList() above. * <p/>//from w ww. j av a2 s . c o m * * This method is dependent upon the JSoup library which can consume malformed HTML and XML with * invalid syntax. * <p/> * * JSoup can't be tested easily. * * TODO convert from JSoup to HtmlCleaner */ public String getIndeedJobDetails(String url) { log.debug("Indeed job-details query: " + url); Document doc; try { doc = Jsoup.connect(url).get(); } catch (IOException e) { throw new RuntimeException("Indeed-search or JSoup response parser failed, URL: " + url, e); } Elements jobHeader = doc.select("#job_header > b > font"); Elements jobSummary = doc.select("#job_summary"); StringBuilder sb = new StringBuilder(); sb.append(jobHeader.toString()).append("\n"); sb.append(jobSummary.toString()); return sb.toString(); }