List of usage examples for org.jsoup.nodes Document toString
public String toString()
From source file:com.amastigote.xdu.query.module.EduSystem.java
private @Nullable JSONObject lessonsQuery() throws IOException, JSONException { if (!checkIsLogin(ID)) return null; URL url = new URL(SYS_HOST + "xkAction.do?actionType=6"); HttpURLConnection httpURLConnection = (HttpURLConnection) url.openConnection(); httpURLConnection.setRequestProperty("Cookie", "JSESSIONID=" + SYS_JSESSIONID); httpURLConnection.connect();//from w ww.j a v a 2 s. c o m Document document = Jsoup.parse(httpURLConnection.getInputStream(), "gb2312", httpURLConnection.getURL().toString()); document = Jsoup.parse(document.toString().replaceAll(" ", "")); Elements lessons = document.select("table[class=titleTop2]"); Element lessonsElement = lessons.get(1); Elements lessonsInfo = lessonsElement.select("tr[onmouseout=this.className='even';]"); int lessons_quantity = lessonsInfo.size(); JSONArray jsonArray = new JSONArray(); for (int i = 0; i < lessons_quantity;) { Element lessonInfo = lessonsInfo.get(i); Elements lessonDetails = lessonInfo.select("td"); // if (lessonDetails.get(14).text().equals("")) { i++; continue; } JSONObject JLessonObject = new JSONObject(); JLessonObject.put(CourseKey.ID, lessonDetails.get(1).text()); JLessonObject.put(CourseKey.NAME, lessonDetails.get(2).text()); JLessonObject.put(CourseKey.CREDIT, lessonDetails.get(4).text()); JLessonObject.put(CourseKey.LENGTH, lessonDetails.get(5).text()); JLessonObject.put(CourseKey.ATTR, lessonDetails.get(6).text()); JLessonObject.put(CourseKey.EXAM_TYPE, lessonDetails.get(7).text()); JLessonObject.put(CourseKey.TEACHER, lessonDetails.get(8).text()); JSONArray JLessonTimeAndPosArray = new JSONArray(); JSONObject JLessonTimeAndPos = new JSONObject(); JLessonTimeAndPos.put(CourseKey.WEEK, lessonDetails.get(12).text()); JLessonTimeAndPos.put(CourseKey.WEEK_DAY, lessonDetails.get(13).text()); JLessonTimeAndPos.put(CourseKey.SECTION_TIME, lessonDetails.get(14).text()); JLessonTimeAndPos.put(CourseKey.SECTION_LENGTH, lessonDetails.get(15).text()); JLessonTimeAndPos.put(CourseKey.CAMPUS, lessonDetails.get(16).text()); JLessonTimeAndPos.put(CourseKey.BUILDING, lessonDetails.get(17).text()); JLessonTimeAndPos.put(CourseKey.CLASSROOM, lessonDetails.get(18).text()); JLessonTimeAndPosArray.put(JLessonTimeAndPos); i++; //??Array int row_span; //row_span?1 if ("".equals(lessonInfo.select("td").get(0).attr("rowspan"))) { row_span = 1; } else { row_span = Integer.parseInt(lessonInfo.select("td").get(0).attr("rowspan")); } //row_span?1?? for (int j = 0; j < row_span - 1; j++, i++) { Elements EExtraTimeAndPos = lessonsInfo.get(i).select("td"); JSONObject JExtraLessonTimeAndPos = new JSONObject(); JExtraLessonTimeAndPos.put(CourseKey.WEEK, EExtraTimeAndPos.get(0).text()); JExtraLessonTimeAndPos.put(CourseKey.WEEK_DAY, EExtraTimeAndPos.get(1).text()); JExtraLessonTimeAndPos.put(CourseKey.SECTION_TIME, EExtraTimeAndPos.get(2).text()); JExtraLessonTimeAndPos.put(CourseKey.SECTION_LENGTH, EExtraTimeAndPos.get(3).text()); JExtraLessonTimeAndPos.put(CourseKey.CAMPUS, EExtraTimeAndPos.get(4).text()); JExtraLessonTimeAndPos.put(CourseKey.BUILDING, EExtraTimeAndPos.get(5).text()); JExtraLessonTimeAndPos.put(CourseKey.CLASSROOM, EExtraTimeAndPos.get(6).text()); JLessonTimeAndPosArray.put(JExtraLessonTimeAndPos); } JLessonObject.put(CourseKey.TIME_AND_LOCATION_DERAIL, JLessonTimeAndPosArray); jsonArray.put(JLessonObject); } return new JSONObject().put("ARRAY", jsonArray); }
From source file:com.zacwolf.commons.email.Email.java
public String getAsHTML() { final org.jsoup.nodes.Document doc = Jsoup.parse(getBody(), "UTF-8"); prepareImgs(doc, null);//from w ww . j a va 2 s .c o m prepare(doc); return doc.toString(); }
From source file:ie.nuim.cs.dri.metadata.WebSearch.java
/** * * @param title/*from w w w. j av a 2s . c o m*/ * @return */ public String searchCiteSeer(String title) { String htmlString = ""; try { String url = "http://citeseerx.ist.psu.edu/search?" + buildCiteSeerSearchTitle(title); Document doc = Jsoup.connect(url).timeout(30000).get(); htmlString = doc.toString(); // Document doc = Jsoup.parse(getGS()); } catch (IOException ex) { // System.out.println("The server took longer than usual to respond, please try again later on. "+ ex.getLocalizedMessage() +"\n"+ex.getCause()); Logger.getLogger(WebSearch.class.getName()).log(Level.SEVERE, null, ex); } return htmlString; }
From source file:com.muzima.view.forms.HTMLFormWebViewActivity.java
private String prePopulateData() { if (formData.getJsonPayload() == null) { return formTemplate.getHtml(); }/*from w w w.j av a 2 s .c o m*/ Document document = Jsoup.parse(formTemplate.getHtml()); String json = formData.getJsonPayload(); String htmlWithJSON = "<div id='pre_populate_data'>" + json + "</div>"; document.select("body").prepend(htmlWithJSON); return document.toString(); }
From source file:com.zacwolf.commons.email.Email.java
public Multipart getAsMultipart() throws MessagingException { /** First we create the "related" htmlmultipart for the html email content: * ?/* ww w . j a v a 2s . co m*/ * msg.setContent() * ? * htmlmultipart [MimeMultipart("related")] * ? * htmlmessageBodyPart [MimeBodyPart] * * EmailAttachment(INLINE) [MimeBodyPart] * * EmailAttachment(INLINE) [BodyPart] * * * **/ final Multipart htmlmultipart = new MimeMultipart("related"); final BodyPart htmlmessageBodyPart = new MimeBodyPart(); htmlmultipart.addBodyPart(htmlmessageBodyPart); final org.jsoup.nodes.Document doc = Jsoup.parse(getBody(), "UTF-8"); prepareImgs(doc, htmlmultipart); prepare(doc); htmlmessageBodyPart.setContent(doc.toString(), "text/html; charset=utf-8"); // populate the top multipart Multipart msgmultipart = htmlmultipart; if (getBodyPlainText() != null) {// Now create a plain-text body part /** * If there is a plain text attachment (and their should always be one), * then an "alternative" type MimeMultipart is added to the structure * ? * msg.setContent() * ? * msgmultipart [MimeMultipart("alternative")] * ? * htmlcontent [MimeBodyPart] * ? * htmlmultipart [MimeMultipart("related")] * ? * htmlmessageBodyPart [MimeBodyPart] * * EmailAttachment(INLINE) [MimeBodyPart] * * EmailAttachment(INLINE) [MimeBodyPart] * * * * plaintxtBodypart [MimeBodyPart] * .setText(message_plaintxt) * * * */ msgmultipart = new MimeMultipart("alternative"); final BodyPart plaintxtBodyPart = new MimeBodyPart(); plaintxtBodyPart.setText(getBodyPlainText()); final BodyPart htmlBodyPart = new MimeBodyPart(); htmlBodyPart.setContent(htmlmultipart); msgmultipart.addBodyPart(plaintxtBodyPart); msgmultipart.addBodyPart(htmlBodyPart); } /** * If there are non-inline attachments, then a "mixed" type * MimeMultipart object has to be added to the structure * ? * msg.setContent() * ? * msgmultipart [MimeMultipart("mixed")] * ? * wrap [MimeBodyPart] * ? * msgmultipart [MimeMultipart("alternative")] * ? * htmlcontent [MimeBodyPart] * ? * htmlmultipart [MimeMultipart("related")] * ? * htmlmessageBodyPart [MimeBodyPart] * * EmailAttachment(INLINE) [MimeBodyPart] * * EmailAttachment(INLINE) [MimeBodyPart] * * * * plaintxtBodypart [MimeBodyPart] * .setText(message_plaintxt) * * * * EmailAttachment (non-inline) [MimeBodyPart] * * EmailAttachment (non-inline) [MimeBodyPart] * * * */ Multipart mixed = msgmultipart; final Set<EmailAttachment> noninlineattachments = new HashSet<EmailAttachment>(); for (EmailAttachment attach : getAttachments().values()) if (attach.disposition != null && !attach.disposition.equals(MimeBodyPart.INLINE)) noninlineattachments.add(attach); // If there are non-IN-LINE attachments, we'll have to create another layer "mixed" MultiPart object if (!noninlineattachments.isEmpty()) { mixed = new MimeMultipart("mixed"); //Multiparts are not themselves containers, so create a wrapper BodyPart container final BodyPart wrap = new MimeBodyPart(); wrap.setContent(msgmultipart); mixed.addBodyPart(wrap); for (EmailAttachment attach : noninlineattachments) mixed.addBodyPart(attach); } return mixed; }
From source file:dk.dma.msinm.service.MessageService.java
/** * Utility method that will process the HTML and turn all images and links into * absolute URL's pointing back to the MSI-NM server. * @param html the HTML to process//from ww w . j a v a2 s . com * @return the processed HTML */ public String externalizeHtml(String html) { if (StringUtils.isNotBlank(html)) { Document doc = Jsoup.parse(html, app.getBaseUri()); externalizeLinks(doc, "a", "href"); externalizeLinks(doc, "img", "src"); html = doc.toString(); } return html; }
From source file:com.aquest.emailmarketing.web.controllers.BroadcastTemplateController.java
/** * Define content.// w ww .j av a 2 s . c om * * @param model the model * @param broadcastTemplate1 the broadcast template1 * @param result the result * @param principal the principal * @return the string * @throws IOException */ @RequestMapping(value = "/defineBcastTemplateContent", method = RequestMethod.POST) public String defineContent(Model model, @Valid @ModelAttribute("broadcastTemplate") BroadcastTemplate broadcastTemplate1, @RequestParam(value = "fromUrl", required = false) String fromUrl, @RequestParam(value = "optimize", required = false) boolean optimize, @RequestParam(value = "baseurl", required = false) String baseUrl, @RequestParam(value = "rel2abs", required = false) boolean rel2abs, BindingResult result, Principal principal) throws IOException { String htmlBodyPrep = ""; BroadcastTemplate broadcastTemplate = broadcastTemplateService .getBroadcastTemplateById(broadcastTemplate1.getId()); broadcastTemplate.setB_template_subject(broadcastTemplate1.getB_template_subject()); if (fromUrl != "") { Document doc = Jsoup.connect(fromUrl).get(); htmlBodyPrep = doc.outerHtml(); broadcastTemplate.setHtmlbody(htmlBodyPrep); System.out.println(htmlBodyPrep); } if (broadcastTemplate1.getHtmlbody() != null) { htmlBodyPrep = broadcastTemplate1.getHtmlbody(); broadcastTemplate.setHtmlbody(htmlBodyPrep); } if (rel2abs == true) { if (baseUrl != null) { System.out.println(baseUrl); Document doc = Jsoup.parse(broadcastTemplate.getHtmlbody(), baseUrl); System.out.println(doc.toString()); Elements images = doc.select("img"); for (Element e : images) { e.attr("src", e.absUrl("src")); System.out.println(e.absUrl("src")); } broadcastTemplate.setHtmlbody(doc.outerHtml()); htmlBodyPrep = doc.outerHtml(); } else { // ovde staviti error handling } } if (optimize == true) { // /* PREMAILER API OPTIONS // * line_length - Line length used by to_plain_text. Boolean, default is 65. // warn_level - What level of CSS compatibility warnings to show (see Warnings). // NONE = 0 // SAFE = 1 // POOR = 2 // RISKY = 3 // link_query_string - A string to append to every a href="" link. Do not include the initial ?. // base_url - Used to calculate absolute URLs for local files. // css - Manually specify CSS stylesheets. // css_to_attributes - Copy related CSS attributes into HTML attributes (e.g. background-color to bgcolor) // css_string - Pass CSS as a string // remove_ids - Remove ID attributes whenever possible and convert IDs used as anchors to hashed to avoid collisions in webmail programs. Default is false. // remove_classes - Remove class attributes. Default is false. // remove_comments - Remove html comments. Default is false. // preserve_styles - Whether to preserve any link rel=stylesheet and style elements. Default is false. // preserve_reset - Whether to preserve styles associated with the MailChimp reset code // with_html_string - Whether the html param should be treated as a raw string. // verbose - Whether to print errors and warnings to $stderr. Default is false. // adapter - Which HTML parser to use, either :nokogiri or :hpricot. Default is :hpricot. // */ Premailer premailer = new Premailer(); PremailerInterface premailerInterface = premailer.getPremailerInstance(); Map<String, Object> options = new HashMap<String, Object>(); options.put("with_html_string", true); options.put("base_url", fromUrl); premailerInterface.init(broadcastTemplate.getHtmlbody(), options); //premailerInterface.init(htmlBodyPrep, options); broadcastTemplate.setHtmlbody(premailerInterface.inline_css()); System.out.println(premailerInterface.inline_css()); premailer.destroyInstance(); } broadcastTemplate.setPlaintext(broadcastTemplate1.getPlaintext()); System.out.println(broadcastTemplate.toString()); String bcast_id = broadcastTemplateService.SaveOrUpdate(broadcastTemplate); // Find URLs in html body and add tracking code Urls urls = new Urls(); String html = broadcastTemplate.getHtmlbody(); List<String> urlList = new ArrayList<String>(); Document doc = Jsoup.parse(html); Elements links = doc.select("a[href]"); for (Element link : links) { if (link.attr("abs:href").length() > 5) { urlList.add(link.attr("abs:href")); } } model.addAttribute("urlList", urlList); model.addAttribute("urls", urls); // Google Analytics - utmCampaign List List<String> utmCampaignList = new ArrayList<String>(); utmCampaignList.add("[BROADAST_NAME]"); model.addAttribute("utmCampaignList", utmCampaignList); // Google Analytics - utmSource List List<String> utmSourceList = new ArrayList<String>(); utmSourceList.add("[CAMPAIGN_NAME]"); model.addAttribute("utmSourceList", utmSourceList); // Google Analytics - utmContent List List<String> utmContentList = new ArrayList<String>(); utmContentList.add("[EMAIL]"); // ovde dodati sve varijabilne podatke iz CM_EMAIL_BROADCAST_LIST model.addAttribute("utmContentList", utmContentList); model.addAttribute("broadcastTemplate", broadcastTemplate); return "bcasttemptracking"; }
From source file:com.aquest.emailmarketing.web.controllers.BroadcastController.java
/** * Define content./* ww w. j a v a2s .c o m*/ * * @param model the model * @param broadcast1 the broadcast1 * @param result the result * @param principal the principal * @return the string * @throws IOException */ @RequestMapping(value = "/defineContent", method = RequestMethod.POST) public String defineContent(Model model, @Valid @ModelAttribute("broadcast") Broadcast broadcast1, @RequestParam(value = "fromUrl", required = false) String fromUrl, @RequestParam(value = "optimize", required = false) boolean optimize, @RequestParam(value = "baseurl", required = false) String baseUrl, @RequestParam(value = "rel2abs", required = false) boolean rel2abs, BindingResult result, Principal principal) throws IOException { String htmlBodyPrep = ""; Broadcast broadcast = broadcastService.getBroadcastById(broadcast1.getId()); broadcast.setSubject(broadcast1.getSubject()); if (fromUrl != "") { Document doc = Jsoup.connect(fromUrl).get(); htmlBodyPrep = doc.outerHtml(); broadcast.setHtmlbody(htmlBodyPrep); System.out.println(htmlBodyPrep); } if (broadcast1.getHtmlbody() != null) { htmlBodyPrep = broadcast1.getHtmlbody(); broadcast.setHtmlbody(htmlBodyPrep); System.out.println("Da vidimo: " + htmlBodyPrep); } if (rel2abs == true) { if (baseUrl != null) { System.out.println(baseUrl); Document doc = Jsoup.parse(broadcast.getHtmlbody(), baseUrl); System.out.println(doc.toString()); Elements images = doc.select("img"); for (Element e : images) { e.attr("src", e.absUrl("src")); System.out.println(e.absUrl("src")); } broadcast.setHtmlbody(doc.outerHtml()); htmlBodyPrep = doc.outerHtml(); } else { // ovde staviti error handling } } if (optimize == true) { // /* PREMAILER API OPTIONS // * line_length - Line length used by to_plain_text. Boolean, default is 65. // warn_level - What level of CSS compatibility warnings to show (see Warnings). // NONE = 0 // SAFE = 1 // POOR = 2 // RISKY = 3 // link_query_string - A string to append to every a href="" link. Do not include the initial ?. // base_url - Used to calculate absolute URLs for local files. // css - Manually specify CSS stylesheets. // css_to_attributes - Copy related CSS attributes into HTML attributes (e.g. background-color to bgcolor) // css_string - Pass CSS as a string // remove_ids - Remove ID attributes whenever possible and convert IDs used as anchors to hashed to avoid collisions in webmail programs. Default is false. // remove_classes - Remove class attributes. Default is false. // remove_comments - Remove html comments. Default is false. // preserve_styles - Whether to preserve any link rel=stylesheet and style elements. Default is false. // preserve_reset - Whether to preserve styles associated with the MailChimp reset code // with_html_string - Whether the html param should be treated as a raw string. // verbose - Whether to print errors and warnings to $stderr. Default is false. // adapter - Which HTML parser to use, either :nokogiri or :hpricot. Default is :hpricot. // */ Premailer premailer = new Premailer(); PremailerInterface premailerInterface = premailer.getPremailerInstance(); Map<String, Object> options = new HashMap<String, Object>(); options.put("with_html_string", true); options.put("base_url", fromUrl); premailerInterface.init(broadcast.getHtmlbody(), options); //premailerInterface.init(htmlBodyPrep, options); broadcast.setHtmlbody(premailerInterface.inline_css()); System.out.println(premailerInterface.inline_css()); premailer.destroyInstance(); } broadcast.setPlaintext(broadcast1.getPlaintext()); broadcastService.SaveOrUpdate(broadcast); // Find URLs in html body and add tracking code Urls urls = new Urls(); String html = broadcast.getHtmlbody(); //HashSet to avoid duplicates Set<String> urlList = new HashSet<String>(); Document doc = Jsoup.parse(html); Elements links = doc.select("a[href]"); for (Element link : links) { if (link.attr("abs:href").length() > 5) { urlList.add(link.attr("abs:href")); } } model.addAttribute("urlList", urlList); model.addAttribute("urls", urls); // Google Analytics - utmCampaign List List<String> utmCampaignList = new ArrayList<String>(); utmCampaignList.add("[BROADAST_NAME]"); model.addAttribute("utmCampaignList", utmCampaignList); // Google Analytics - utmSource List List<String> utmSourceList = new ArrayList<String>(); utmSourceList.add("[CAMPAIGN_NAME]"); model.addAttribute("utmSourceList", utmSourceList); // Google Analytics - utmContent List List<String> utmContentList = new ArrayList<String>(); utmContentList.add("[EMAIL]"); //TODO: add all variables from CM_EMAIL_BROADCAST_LIST model.addAttribute("utmContentList", utmContentList); model.addAttribute("broadcast", broadcast); return "tracking"; }
From source file:blackman.matt.board.Post.java
/** * Formats the HTML on the post text to accurately display it on the post. * * @param post The unformatted text of the post. * @return A formatted version of the post. *//* w w w . jav a 2 s .c om*/ private String formatPostBody(String post) { Document formattedText = Jsoup.parse(post); Pattern p = Pattern.compile("^/.*/index\\.html"); // Red Text Elements redTexts = formattedText.getElementsByClass("heading"); for (Element text : redTexts) { text.wrap("<font color=\"#AF0A0F\"><strong></strong></font>"); } // Green text Elements greenTexts = formattedText.getElementsByClass("quote"); for (Element text : greenTexts) { text.wrap("<font color=\"#789922\"></font>"); } // Board Links Elements boardLinks = formattedText.select("a"); for (Element link : boardLinks) { String url = link.attr("href"); Matcher m = p.matcher(url); if (m.matches()) { link.attr("href", "http://8chan.co" + url); } } // Reply links Elements replyLinks = formattedText.select("a[onclick^=highlightReply"); for (Element reply : replyLinks) { repliedTo.add(reply.attr("href").split("#")[1]); boardLinks.attr("href", "http://8chan.co" + reply.attr("href")); } // Post too long text removal Elements tooLongs = formattedText.getElementsByClass("toolong"); for (Element text : tooLongs) { text.text(""); } return formattedText.toString(); }
From source file:com.aliyun.openservices.odps.console.commands.DescribeResourceCommand.java
@Override public String runHtml(Document dom) throws ODPSConsoleException, OdpsException { Odps odps = getCurrentOdps();//ww w. j a v a2 s . c om if (!(odps.resources().exists(projectName, resourceName))) { throw new ODPSConsoleException("Resource not found : " + resourceName); } Resource r = odps.resources().get(projectName, resourceName); Element element = dom.body().appendElement("div").appendElement("dl"); element.appendElement("dt").text("Name"); element.appendElement("dd").text(r.getName()); element.appendElement("dt").text("Owner"); element.appendElement("dd").text(r.getOwner()); element.appendElement("dt").text("Type"); element.appendElement("dd").text(String.valueOf(r.getType())); if (r.getType() == Resource.Type.TABLE) { TableResource tr = (TableResource) r; String tableSource = tr.getSourceTable().getProject() + "." + tr.getSourceTable().getName(); if (tr.getSourceTablePartition() != null) { tableSource += " partition(" + tr.getSourceTablePartition().toString() + ")"; } element.appendElement("dt").text("SourceTableName"); element.appendElement("dd").text(tableSource); } element.appendElement("dt").text("Comment"); element.appendElement("dd").text(r.getComment()); element.appendElement("dt").text("CreatedTime"); element.appendElement("dd").text(DATE_FORMAT.format(r.getCreatedTime())); element.appendElement("dt").text("LastModifiedTime"); element.appendElement("dd").text(DATE_FORMAT.format(r.getLastModifiedTime())); return dom.toString(); }