List of usage examples for org.jsoup.nodes Document outputSettings
OutputSettings outputSettings
To view the source code for org.jsoup.nodes Document outputSettings.
Click Source Link
From source file:com.maxl.java.aips2xml.Aips2Xml.java
static String updateSectionPackungen(String title, Map<String, ArrayList<String>> pack_info, String regnr_str, String content_str, List<String> tIndex_list) { Document doc = Jsoup.parse(content_str, "UTF-16"); List<String> pinfo_str = new ArrayList<String>(); int index = 0; // Extract swissmedicno5 registration numbers List<String> swissmedicno5_list = Arrays.asList(regnr_str.split("\\s*,\\s*")); for (String s : swissmedicno5_list) { // Extract original / generika info + Selbstbehalt info from "add_info_map" String orggen_str = ""; String flagsb_str = ""; String addinfo_str = add_info_map.get(s); if (addinfo_str != null) { List<String> ai_list = Arrays.asList(addinfo_str.split("\\s*;\\s*")); if (ai_list != null) { if (!ai_list.get(0).isEmpty()) orggen_str = ", " + ai_list.get(0); if (!ai_list.get(1).isEmpty()) flagsb_str = ", " + ai_list.get(1); }/*w ww . j a va 2 s . c om*/ } // Now generate many swissmedicno8 = swissmedicno5 + ***, check if they're keys and retrieve package info String swissmedicno8_key = ""; for (int n = 0; n < 1000; ++n) { if (n < 10) swissmedicno8_key = s + String.valueOf(n).format("00%d", n); else if (n < 100) swissmedicno8_key = s + String.valueOf(n).format("0%d", n); else swissmedicno8_key = s + String.valueOf(n).format("%d", n); // Check if swissmedicno8_key is a key of the map if (pack_info.containsKey(swissmedicno8_key)) { ArrayList<String> pi_row = package_info.get(swissmedicno8_key); if (pi_row != null) { // --> Add "ausser Handel" information String withdrawn_str = ""; if (pi_row.get(10).length() > 0) withdrawn_str = ", " + pi_row.get(10); // --> Add public price information if (pi_row.get(7).length() > 0) { // Remove double spaces in title String medtitle = capitalizeFully(pi_row.get(1).replaceAll("\\s+", " "), 1); // Remove [QAP?] -> not an easy one! medtitle = medtitle.replaceAll("\\[(.*?)\\?\\] ", ""); pinfo_str.add("<p class=\"spacing1\">" + medtitle + ", " + pi_row.get(7) + withdrawn_str + " [" + pi_row.get(5) + pi_row.get(11) + pi_row.get(12) + flagsb_str + orggen_str + "]</p>"); } else { // Remove double spaces in title String medtitle = capitalizeFully(pi_row.get(1).replaceAll("\\s+", " "), 1); // Remove [QAP?] -> not an easy one! medtitle = medtitle.replaceAll("\\[(.*?)\\?\\] ", ""); if (DB_LANGUAGE.equals("de")) { pinfo_str.add("<p class=\"spacing1\">" + medtitle + ", " + "k.A." + withdrawn_str + " [" + pi_row.get(5) + pi_row.get(11) + pi_row.get(12) + flagsb_str + orggen_str + "]</p>"); } else if (DB_LANGUAGE.equals("fr")) { pinfo_str.add("<p class=\"spacing1\">" + medtitle + ", " + "prix n.s." + withdrawn_str + " [" + pi_row.get(5) + pi_row.get(11) + pi_row.get(12) + flagsb_str + orggen_str + "]</p>"); } } // --> Add "tindex_str" and "application_str" (see SqlDatabase.java) if (index == 0) { tIndex_list.add(pi_row.get(9)); // therapeutic index tIndex_list.add(pi_row.get(6)); // application area index++; } } } } } // In case the pinfo_str is empty due to malformed XML /* if (pinfo_str.isEmpty()) html_utils.extractPackSection(); */ // In case nothing was found if (index == 0) { tIndex_list.add(""); tIndex_list.add(""); } // Replace original package information with pinfo_str String p_str = ""; mPackSection_str = ""; for (String p : pinfo_str) { p_str += p; } // Generate a html-deprived string file mPackSection_str = p_str.replaceAll("\\<p.*?\\>", ""); mPackSection_str = mPackSection_str.replaceAll("<\\/p\\>", "\n"); // Remove last \n if (mPackSection_str.length() > 0) mPackSection_str = mPackSection_str.substring(0, mPackSection_str.length() - 1); doc.outputSettings().escapeMode(EscapeMode.xhtml); Element div7800 = doc.select("[id=Section7800]").first(); if (div7800 != null) { div7800.html("<div class=\"absTitle\">Packungen</div>" + p_str); } else { Element div18 = doc.select("[id=section18]").first(); if (div18 != null) { div18.html("<div class=\"absTitle\">Packungen</div>" + p_str); } else { if (SHOW_ERRORS) System.err.println(">> ERROR: elem is null, sections 18/7800 does not exist: " + title); } } return doc.html(); }
From source file:com.blackducksoftware.tools.nrt.generator.NRTReportGenerator.java
private void writeOutLicenseText(String componentName, PrintStream outputTextFile) { try {//from ww w . ja v a 2 s. c om outputTextFile.println(); outputTextFile.println("License texts (" + (componentMap.get(componentName).getLicenses() != null ? componentMap.get(componentName).getLicenses().size() : "0") + ")"); int licenseCounter = 0; if (componentMap.get(componentName).getLicenses() != null) { for (LicenseModel license : componentMap.get(componentName).getLicenseModels()) { String licenseName = license.getName() != null ? license.getName() + "(Taken from KnowledgeBase)" : "license_" + licenseCounter + "(Taken from scanned file)"; if (nrtConfig.isTextFileOutput()) { outputTextFile.println(); outputTextFile.println( "=========================================================================="); outputTextFile.println(licenseName); outputTextFile.print(StringEscapeUtils.unescapeHtml(Jsoup.clean(license.getText(), "", Whitelist.none(), new Document.OutputSettings().prettyPrint(false)))); } licenseCounter++; } // for all licenses } // if licenses exist } catch (Exception e) { log.error("Error writing out licenses", e); } }
From source file:com.maxl.java.aips2sqlite.RealExpertInfo.java
private String updateSectionPackungen(String title, String atc_code, Map<String, ArrayList<String>> pack_info, String regnr_str, String content_str, List<String> tIndex_list) { Document doc = Jsoup.parse(content_str, "UTF-16"); // package info string for original List<String> pinfo_originals_str = new ArrayList<String>(); // package info string for generika List<String> pinfo_generics_str = new ArrayList<String>(); // package info string for the rest List<String> pinfo_str = new ArrayList<String>(); // String containg all barcodes List<String> barcode_list = new ArrayList<String>(); int index = 0; // Extract swissmedicno5 registration numbers List<String> swissmedicno5_list = Arrays.asList(regnr_str.split("\\s*,\\s*")); for (String smno5 : swissmedicno5_list) { // Extract original / generika info + Selbstbehalt info from // "add_info_map" String orggen_str = ""; // O=Original, G=Generika String flagsb_str = ""; // SB=Selbstbehalt String addinfo_str = m_add_info_map.get(smno5); if (addinfo_str != null) { List<String> ai_list = Arrays.asList(addinfo_str.split("\\s*;\\s*")); if (ai_list != null) { if (!ai_list.get(0).isEmpty()) orggen_str = ", " + ai_list.get(0); // O + G if (!ai_list.get(1).isEmpty()) flagsb_str = ", " + ai_list.get(1); // SB }//from w ww . j av a 2 s . c o m } // Now generate many swissmedicno8 = swissmedicno5 + ***, check if they're keys and retrieve package info String swissmedicno8_key = ""; for (int n = 0; n < 1000; ++n) { swissmedicno8_key = getSwissmedicNo8(smno5, n); // Check if swissmedicno8_key is a key of the map if (pack_info.containsKey(swissmedicno8_key)) { ArrayList<String> pi_row = m_package_info.get(swissmedicno8_key); if (pi_row != null) { // This string is used for "shopping carts" and contatins: // Prparatname | Package size | Package unit | Public price // | Exfactory price | Spezialittenliste, Swissmedic Kategorie, Limitations // | EAN code | Pharma code String barcode_html = ""; String pup = pi_row.get(7); // public price String efp = pi_row.get(8); // exfactory price String fep = ""; String fap = ""; String vat = ""; String eancode = pi_row.get(14); int visible = 0xff; // by default visible to all! int has_free_samples = 0x00; // by default no free samples // Exctract fep and fap pricing information // FAP = Fabrikabgabepreis = EFP? // FEP = Fachhandelseinkaufspreis // EFP = FAP < FEP < PUP if (m_map_products != null && eancode != null && m_map_products.containsKey(eancode)) { Product product = m_map_products.get(eancode); // Correct these prices, if necessary... the m_map_products info comes from the owner directly! // @maxl: Added on 30.08.2015 if (product.efp > 0.0f) efp = String.format("CHF %.2f", product.efp); if (product.pp > 0.0f) pup = String.format("CHF %.2f", product.pp); if (product.fap > 0.0f) fap = String.format("CHF %.2f", product.fap); if (product.fep > 0.0f) fep = String.format("CHF %.2f", product.fep); if (product.vat > 0.0f) vat = String.format("%.2f", product.vat); visible = product.visible; has_free_samples = product.free_sample; } // Some articles are listed in swissmedic_packages file but are not in the refdata file if (pi_row.get(10).equals("a.H.")) { pi_row.set(10, "ev.nn.i.H."); } if (pi_row.get(10).equals("p.c.")) { pi_row.set(10, "ev.ep.e.c."); } // Add only if medication is "in Handel" -> check pi_row.get(10) if (pi_row.get(10).isEmpty() || pi_row.get(10).equals("ev.nn.i.H.") || pi_row.get(10).equals("ev.ep.e.c.")) { // --> Extract EAN-13 or EAN-12 and generate barcodes try { if (!eancode.isEmpty()) { BarCode bc = new BarCode(); if (eancode.length() == 12) { int cs = bc.getChecksum(eancode); eancode += cs; } String barcodeImg64 = bc.encode(eancode); barcode_html = "<p class=\"barcode\">" + barcodeImg64 + "</p>"; barcode_list.add(barcode_html); } } catch (IOException e) { e.printStackTrace(); } m_list_of_packages.add(pi_row.get(1) + "|" + pi_row.get(3) + "|" + pi_row.get(4) + "|" + efp + "|" + pup + "|" + fap + "|" + fep + "|" + vat + "|" + pi_row.get(5) + ", " + pi_row.get(11) + ", " + pi_row.get(12) + "|" + eancode + "|" + pi_row.get(15) + "|" + visible + "|" + has_free_samples + "\n"); m_list_of_eancodes.add(eancode); } // Remove double spaces in title and capitalize String medtitle = capitalizeFully(pi_row.get(1).replaceAll("\\s+", " "), 1); // Remove [QAP?] -> not an easy one! medtitle = medtitle.replaceAll("\\[(.*?)\\?\\] ", ""); // --> Add "ausser Handel" information String withdrawn_str = ""; if (pi_row.get(10).length() > 0) withdrawn_str = ", " + pi_row.get(10); // --> Add ex factory price information String price_efp = !efp.isEmpty() ? "EFP" + efp.replace("CHF", "") : "FEP" + fep.replace("CHF", ""); String price_pp = !pup.isEmpty() ? ", PP" + pup.replace("CHF", "") : ""; if (efp.length() > 0 || fep.length() > 0) { // The rest of the package information String append_str = ", " + price_efp + price_pp + withdrawn_str + " [" + pi_row.get(5) + pi_row.get(11) + pi_row.get(12) + flagsb_str + orggen_str + "]"; // Generate package info string if (orggen_str.equals(", O")) pinfo_originals_str.add( "<p class=\"spacing1\">" + medtitle + append_str + "</p>" + barcode_html); else if (orggen_str.equals(", G")) pinfo_generics_str.add( "<p class=\"spacing1\">" + medtitle + append_str + "</p>" + barcode_html); else pinfo_str.add( "<p class=\"spacing1\">" + medtitle + append_str + "</p>" + barcode_html); } else { // // @maxl (10.01.2014): Price for swissmedicNo8 pack is not listed in bag_preparations.xml!! // pinfo_str.add("<p class=\"spacing1\">" + medtitle + withdrawn_str + " [" + pi_row.get(5) + "]</p>" + barcode_html); } // --> Add "tindex_str" and "application_str" (see // SqlDatabase.java) if (index == 0) { tIndex_list.add(pi_row.get(9)); // therapeutic index tIndex_list.add(pi_row.get(6)); // application area index++; } } } } } // Re-order the string alphabetically if (!m_list_of_packages.isEmpty()) { Collections.sort(m_list_of_packages, new AlphanumComp()); } if (!pinfo_originals_str.isEmpty()) { Collections.sort(pinfo_originals_str, new AlphanumComp()); } if (!pinfo_generics_str.isEmpty()) { Collections.sort(pinfo_generics_str, new AlphanumComp()); } if (!pinfo_str.isEmpty()) { Collections.sort(pinfo_str, new AlphanumComp()); } // Concatenate lists... pinfo_originals_str.addAll(pinfo_generics_str); pinfo_originals_str.addAll(pinfo_str); // Put everything in pinfo_str pinfo_str = pinfo_originals_str; // In case nothing was found if (index == 0) { tIndex_list.add(""); tIndex_list.add(""); } /* * Replace package information */ if (CmlOptions.PLAIN == false) { // Replace original package information with pinfo_str String p_str = ""; for (String p : pinfo_str) { p_str += p; } // Generate a html-deprived string file m_pack_info_str = p_str.replaceAll("<p class=\"spacing1\">[<](/)?img[^>]*[>]</p>", ""); m_pack_info_str = m_pack_info_str.replaceAll("<p class=\"barcode\">[<](/)?img[^>]*[>]</p>", ""); m_pack_info_str = m_pack_info_str.replaceAll("\\<p.*?\\>", ""); m_pack_info_str = m_pack_info_str.replaceAll("<\\/p\\>", "\n"); // Remove last \n if (m_pack_info_str.length() > 0) m_pack_info_str = m_pack_info_str.substring(0, m_pack_info_str.length() - 1); doc.outputSettings().escapeMode(EscapeMode.xhtml); Element div7800 = doc.select("[id=Section7800]").first(); // Initialize section titles String packages_title = "Packungen"; String swiss_drg_title = "Swiss DRG"; if (CmlOptions.DB_LANGUAGE.equals("fr")) { packages_title = "Prsentation"; swiss_drg_title = "Swiss DRG"; } // Generate html for chapter "Packagungen" and subchapter "Swiss DRGs" // ** Chapter "Packungen" String section_html = "<div class=\"absTitle\">" + packages_title + "</div>" + p_str; // ** Subchapter "Swiss DRGs" // Loop through list of dosages for a particular atc code and format appropriately if (atc_code != null) { // Update DRG footnote super scripts String footnotes = "1"; String fn = m_swiss_drg_footnote.get(atc_code); if (fn != null) footnotes += (", " + fn); // Generate Swiss DRG string String drg_str = ""; ArrayList<String> dosages = m_swiss_drg_info.get(atc_code); // For most atc codes, there are NO special DRG sanctioned dosages... if (dosages != null) { System.out.println(title + " (DRG)"); for (String drg : dosages) drg_str += "<p class=\"spacing1\">" + drg + "</p>"; if (!drg_str.isEmpty()) { section_html += ("<p class=\"paragraph\"></p><div class=\"absTitle\">" + swiss_drg_title + "<sup>" + footnotes + "</sup></div>" + drg_str); } section_html += "<p class=\"noSpacing\"></p>"; if (CmlOptions.DB_LANGUAGE.equals("de")) { section_html += "<p class=\"spacing1\"><sup>1</sup> Alle Spitler mssen im Rahmen der jhrlichen Datenerhebung (Detaillieferung) die SwissDRG AG zwingend ber die Hhe der in Rechnung gestellten Zusatzentgelte informieren.</p>"; section_html += "<p class=\"spacing1\"><sup>2</sup> Eine zustzliche Abrechnung ist im Zusammenhang mit einer Fallpauschale der Basis-DRGs L60 oder L71 nicht mglich.</p>"; section_html += "<p class=\"spacing1\"><sup>3</sup> Eine Abrechnung des Zusatzentgeltes ist nur ber die in der Anlage zum Fallpauschalenkatalog aufgefhrten Dosisklassen mglich.</p>"; section_html += "<p class=\"spacing1\"><sup>4</sup> Dieses Zusatzentgelt ist nur abrechenbar fr Patienten mit einem Alter < 15 Jahre.</p>"; section_html += "<p class=\"spacing1\"><sup>5</sup> Dieses Zusatzentgelt darf nicht zustzlich zur DRG A91Z abgerechnet werden, da in dieser DRG Apheresen die Hauptleistung darstellen. " + "Die Verfahrenskosten der Apheresen sind in dieser DRG bereits vollumfnglich enthalten.</p>"; } else if (CmlOptions.DB_LANGUAGE.equals("fr")) { section_html += "<p class=\"spacing1\"><sup>1</sup> Tous les hpitaux doivent imprativement informer SwissDRG SA lors du relev (relev dtaill) sur le montant des rmunrations supplmentaires factures.</p>"; section_html += "<p class=\"spacing1\"><sup>2</sup> Une facturation supplmentaire aux forfaits par cas des DRG de base L60 ou L71 nest pas possible.</p>"; section_html += "<p class=\"spacing1\"><sup>3</sup> Une facturation des rmunration supplmentaires n'est possible que pour les classes de dosage dfinies dans cette annexe.</p>"; section_html += "<p class=\"spacing1\"><sup>4</sup> Cette rmunration supplmentaire n'est facturable que pour les patients gs de moins de 15 ans.</p>"; section_html += "<p class=\"spacing1\"><sup>5</sup> Cette rmunration supplmentaire ne peut pas tre facture en plus du DRG A91Z, la prestation principale de ce DRG tant l'aphrse. " + "Les cots du traitement par aphrse sont dj intgralement compris dans le DRG.</p>"; } } } if (div7800 != null) { div7800.html(section_html); } else { Element div18 = doc.select("[id=section18]").first(); if (div18 != null) { div18.html(section_html); } else { if (CmlOptions.SHOW_ERRORS) System.err.println(">> ERROR: elem is null, sections 18/7800 does not exist: " + title); } } } return doc.html(); }
From source file:org.asqatasun.rules.doc.utils.rga33.extractor.Rgaa3Extractor.java
private static void createTestcaseFiles() throws IOException { File srcDir = new File(RGAA3_TESTCASE_PATH); for (File file : srcDir.listFiles()) { String fileName = file.getName().replace("Rgaa30Rule", "").replace(".java", ""); String theme = fileName.substring(0, 2); String crit = fileName.substring(2, 4); String test = fileName.substring(4, 6); String testKey = Integer.valueOf(theme).toString() + "-" + Integer.valueOf(crit).toString() + "-" + Integer.valueOf(test).toString(); String wrongKey = theme + "." + crit + "." + test; for (File testcase : file.listFiles()) { if (testcase.isFile() && testcase.getName().contains("html")) { Document doc = Jsoup.parse(FileUtils.readFileToString(testcase)); Element detail = doc.select(".test-detail").first(); if (detail == null) { System.out.println(doc.outerHtml()); } else { detail.tagName("div"); detail.text(""); for (Element el : detail.children()) { el.remove();//from w w w. j a va 2s . co m } if (!detail.hasAttr("lang")) { detail.attr("lang", "fr"); } detail.append("\n" + RGAA3.get(testKey).ruleRawHtml + "\n"); doc.outputSettings().escapeMode(Entities.EscapeMode.xhtml); doc.outputSettings().outline(false); doc.outputSettings().indentAmount(4); String outputHtml = doc.outerHtml(); if (outputHtml.contains(wrongKey)) { outputHtml = outputHtml.replaceAll(wrongKey, RGAA3.get(testKey).getRuleDot()); } FileUtils.writeStringToFile(testcase, outputHtml); } } } } }
From source file:org.b3log.solo.plugin.list.ListHandler.java
@Override public void action(final Event<JSONObject> event) throws EventException { final JSONObject data = event.getData(); final JSONObject article = data.optJSONObject(Article.ARTICLE); String content = article.optString(Article.ARTICLE_CONTENT); final Document doc = Jsoup.parse(content, StringUtils.EMPTY, Parser.htmlParser()); doc.outputSettings().prettyPrint(false); final StringBuilder listBuilder = new StringBuilder(); listBuilder.append("<link rel=\"stylesheet\" type=\"text/css\" href=\"" + Latkes.getStaticServePath() + "/plugins/list/style.css\" />"); final Elements hs = doc.select("h1, h2, h3, h4, h5"); listBuilder.append("<ul class='b3-solo-list'>"); for (int i = 0; i < hs.size(); i++) { final Element element = hs.get(i); final String tagName = element.tagName().toLowerCase(); final String text = element.text(); final String id = "b3_solo_" + tagName + "_" + i; element.before("<span id='" + id + "'></span>"); listBuilder.append("<li class='b3-solo-list-").append(tagName).append("'><a href='#").append(id) .append("'>").append(text).append("</a></li>"); }// w w w. ja v a 2 s . c o m listBuilder.append("</ul>"); final Element body = doc.getElementsByTag("body").get(0); content = listBuilder.toString() + body.html(); article.put(Article.ARTICLE_CONTENT, content); }
From source file:org.b3log.symphony.service.ArticleQueryService.java
/** * Markdowns the specified article content. * * <ul>/* w ww. j a v a2s . c o m*/ * <li>Markdowns article content/reward content</li> * <li>Generates secured article content/reward content</li> * </ul> * * @param article the specified article content */ public void markdown(final JSONObject article) { String content = article.optString(Article.ARTICLE_CONTENT); final int articleType = article.optInt(Article.ARTICLE_TYPE); if (Article.ARTICLE_TYPE_C_THOUGHT != articleType) { content = Markdowns.toHTML(content); content = Markdowns.clean(content, Latkes.getServePath() + article.optString(Article.ARTICLE_PERMALINK)); } else { final Document.OutputSettings outputSettings = new Document.OutputSettings(); outputSettings.prettyPrint(false); content = Jsoup.clean(content, Latkes.getServePath() + article.optString(Article.ARTICLE_PERMALINK), Whitelist.relaxed().addAttributes(":all", "id", "target", "class").addTags("span", "hr") .addAttributes("iframe", "src", "width", "height") .addAttributes("audio", "controls", "src"), outputSettings); content = content.replace("\n", "\\n").replace("'", "\\'").replace("\"", "\\\""); } article.put(Article.ARTICLE_CONTENT, content); if (article.optInt(Article.ARTICLE_REWARD_POINT) > 0) { String rewardContent = article.optString(Article.ARTICLE_REWARD_CONTENT); rewardContent = Markdowns.toHTML(rewardContent); rewardContent = Markdowns.clean(rewardContent, Latkes.getServePath() + article.optString(Article.ARTICLE_PERMALINK)); article.put(Article.ARTICLE_REWARD_CONTENT, rewardContent); } }
From source file:org.b3log.symphony.util.Markdowns.java
/** * Converts the specified markdown text to HTML. * * @param markdownText the specified markdown text * @return converted HTML, returns an empty string "" if the specified markdown text is "" or {@code null}, returns * 'markdownErrorLabel' if exception/* ww w.jav a 2 s.com*/ */ public static String toHTML(final String markdownText) { if (Strings.isEmptyOrNull(markdownText)) { return ""; } final String cachedHTML = getHTML(markdownText); if (null != cachedHTML) { return cachedHTML; } final ExecutorService pool = Executors.newSingleThreadExecutor(); final long[] threadId = new long[1]; final Callable<String> call = () -> { threadId[0] = Thread.currentThread().getId(); String html = LANG_PROPS_SERVICE.get("contentRenderFailedLabel"); if (MARKED_AVAILABLE) { html = toHtmlByMarked(markdownText); if (!StringUtils.startsWith(html, "<p>")) { html = "<p>" + html + "</p>"; } } else { com.vladsch.flexmark.ast.Node document = PARSER.parse(markdownText); html = RENDERER.render(document); if (!StringUtils.startsWith(html, "<p>")) { html = "<p>" + html + "</p>"; } } final Document doc = Jsoup.parse(html); final List<org.jsoup.nodes.Node> toRemove = new ArrayList<>(); doc.traverse(new NodeVisitor() { @Override public void head(final org.jsoup.nodes.Node node, int depth) { if (node instanceof org.jsoup.nodes.TextNode) { final org.jsoup.nodes.TextNode textNode = (org.jsoup.nodes.TextNode) node; final org.jsoup.nodes.Node parent = textNode.parent(); if (parent instanceof Element) { final Element parentElem = (Element) parent; if (!parentElem.tagName().equals("code")) { String text = textNode.getWholeText(); boolean nextIsBr = false; final org.jsoup.nodes.Node nextSibling = textNode.nextSibling(); if (nextSibling instanceof Element) { nextIsBr = "br".equalsIgnoreCase(((Element) nextSibling).tagName()); } if (null != userQueryService) { try { final Set<String> userNames = userQueryService.getUserNames(text); for (final String userName : userNames) { text = text.replace('@' + userName + (nextIsBr ? "" : " "), "@<a href='" + Latkes.getServePath() + "/member/" + userName + "'>" + userName + "</a> "); } text = text.replace("@participants ", "@<a href='https://hacpai.com/article/1458053458339' class='ft-red'>participants</a> "); } finally { JdbcRepository.dispose(); } } if (text.contains("@<a href=")) { final List<org.jsoup.nodes.Node> nodes = Parser.parseFragment(text, parentElem, ""); final int index = textNode.siblingIndex(); parentElem.insertChildren(index, nodes); toRemove.add(node); } else { textNode.text(Pangu.spacingText(text)); } } } } } @Override public void tail(org.jsoup.nodes.Node node, int depth) { } }); toRemove.forEach(node -> node.remove()); doc.select("pre>code").addClass("hljs"); doc.select("a").forEach(a -> { String src = a.attr("href"); if (!StringUtils.startsWithIgnoreCase(src, Latkes.getServePath())) { try { src = URLEncoder.encode(src, "UTF-8"); } catch (final Exception e) { } a.attr("href", Latkes.getServePath() + "/forward?goto=" + src); a.attr("target", "_blank"); } }); doc.outputSettings().prettyPrint(false); String ret = doc.select("body").html(); ret = StringUtils.trim(ret); // cache it putHTML(markdownText, ret); return ret; }; Stopwatchs.start("Md to HTML"); try { final Future<String> future = pool.submit(call); return future.get(MD_TIMEOUT, TimeUnit.MILLISECONDS); } catch (final TimeoutException e) { LOGGER.log(Level.ERROR, "Markdown timeout [md=" + markdownText + "]"); Callstacks.printCallstack(Level.ERROR, new String[] { "org.b3log" }, null); final Set<Thread> threads = Thread.getAllStackTraces().keySet(); for (final Thread thread : threads) { if (thread.getId() == threadId[0]) { thread.stop(); break; } } } catch (final Exception e) { LOGGER.log(Level.ERROR, "Markdown failed [md=" + markdownText + "]", e); } finally { pool.shutdownNow(); Stopwatchs.end(); } return LANG_PROPS_SERVICE.get("contentRenderFailedLabel"); }
From source file:org.b3log.symphony.util.Markdowns.java
/** * Gets the safe HTML content of the specified content. * * @param content the specified content/*from www. j ava 2s. c o m*/ * @param baseURI the specified base URI, the relative path value of href will starts with this URL * @return safe HTML content */ public static String clean(final String content, final String baseURI) { final Document.OutputSettings outputSettings = new Document.OutputSettings(); outputSettings.prettyPrint(false); final String tmp = Jsoup.clean(content, baseURI, Whitelist.relaxed().addAttributes(":all", "id", "target", "class") .addTags("span", "hr", "kbd", "samp", "tt", "del", "s", "strike", "u") .addAttributes("iframe", "src", "width", "height", "border", "marginwidth", "marginheight") .addAttributes("audio", "controls", "src") .addAttributes("video", "controls", "src", "width", "height") .addAttributes("source", "src", "media", "type") .addAttributes("object", "width", "height", "data", "type") .addAttributes("param", "name", "value") .addAttributes("input", "type", "disabled", "checked").addAttributes("embed", "src", "type", "width", "height", "wmode", "allowNetworking"), outputSettings); final Document doc = Jsoup.parse(tmp, baseURI, Parser.htmlParser()); final Elements ps = doc.getElementsByTag("p"); for (final Element p : ps) { p.removeAttr("style"); } final Elements iframes = doc.getElementsByTag("iframe"); for (final Element iframe : iframes) { final String src = StringUtils.deleteWhitespace(iframe.attr("src")); if (StringUtils.startsWithIgnoreCase(src, "javascript") || StringUtils.startsWithIgnoreCase(src, "data:")) { iframe.remove(); } } final Elements objs = doc.getElementsByTag("object"); for (final Element obj : objs) { final String data = StringUtils.deleteWhitespace(obj.attr("data")); if (StringUtils.startsWithIgnoreCase(data, "data:") || StringUtils.startsWithIgnoreCase(data, "javascript")) { obj.remove(); continue; } final String type = StringUtils.deleteWhitespace(obj.attr("type")); if (StringUtils.containsIgnoreCase(type, "script")) { obj.remove(); } } final Elements embeds = doc.getElementsByTag("embed"); for (final Element embed : embeds) { final String data = StringUtils.deleteWhitespace(embed.attr("src")); if (StringUtils.startsWithIgnoreCase(data, "data:") || StringUtils.startsWithIgnoreCase(data, "javascript")) { embed.remove(); continue; } } final Elements as = doc.getElementsByTag("a"); for (final Element a : as) { a.attr("rel", "nofollow"); final String href = a.attr("href"); if (href.startsWith(Latkes.getServePath())) { continue; } a.attr("target", "_blank"); } final Elements audios = doc.getElementsByTag("audio"); for (final Element audio : audios) { audio.attr("preload", "none"); } final Elements videos = doc.getElementsByTag("video"); for (final Element video : videos) { video.attr("preload", "none"); } String ret = doc.body().html(); ret = ret.replaceAll("(</?br\\s*/?>\\s*)+", "<br>"); // patch for Jsoup issue return ret; }
From source file:org.dswarm.xmlenhancer.XMLEnhancer.java
private static void unescapeEntity(final PrintWriter out, final Node node) { node.ownerDocument().outputSettings().escapeMode(Entities.EscapeMode.xhtml) .syntax(Document.OutputSettings.Syntax.xml).prettyPrint(false); if (node instanceof TextNode) { final TextNode textNode = (TextNode) node; final String wholeText = textNode.getWholeText(); out.print(wholeText);//from ww w . jav a 2 s . c om return; } final String nodeString = node.toString(); final String unescapedNodeString = Parser.unescapeEntities(nodeString, true); out.print(unescapedNodeString); }