List of usage examples for org.jsoup.nodes Element removeAttr
public Node removeAttr(String attributeKey)
From source file:com.aurel.track.exchange.docx.exporter.PreprocessImage.java
/** * Gets the image captions in a map keyed by itemID_attachmentID * The key is saved also in the <img> tag's "alt" attribute for later use from word * @param doc/*from w w w . jav a2 s .co m*/ * @param personID * @param imageCaptionsMap * @return */ private String getImageCaptions(Document doc, Integer personID, Map<String, ImageOrTableCaption> imageCaptionsMap) { Elements imgElements = doc.select("img"); if (imgElements != null) { for (Iterator<Element> iterator = imgElements.iterator(); iterator.hasNext();) { Element imageElement = iterator.next(); String sourceAttribute = imageElement.attr("src"); String style = imageElement.attr("style"); //remove the width and height attributes from html img to avoid java.lang.OutOfMemoryError: Java heap space imageElement.removeAttr("width"); imageElement.removeAttr("height"); ALIGN align = null; if (style != null) { if (style.contains("float:left")) { align = ALIGN.LEFT; } else { if (style.contains("float:right")) { align = ALIGN.RIGHT; } } } String altAttribute = imageElement.attr("alt"); Map<String, String> map = getTemporaryFilePathMap(sourceAttribute, personID); if (map != null) { imageElement.attr("src", map.get("temporaryFilePath")); //save imageCaption into the map and now use the "alt" attribute for storing the merged key //which will be transformed in nonvisualdrawingprops.getDescr() by XHTMLImporterImpl to set the caption on the ms word side String imageCaption = null; if (altAttribute != null && !"".equals(altAttribute)) { //probably from previously removed figcaption but it may also be explicitly set imageCaption = altAttribute; } else { imageCaption = map.get("description"); } globalCounter++; counterWithinChapter++; imageElement.attr("alt", String.valueOf(globalCounter)); if (imageCaption == null) { //add anyway to the map even as empty string because this marks the image to be added to the List of figures imageCaption = ""; } imageCaptionsMap.put(String.valueOf(globalCounter), new ImageOrTableCaption(chapterNo, counterWithinChapter, imageCaption, align)); } } } return doc.body().html(); }
From source file:com.near.chimerarevo.fragments.PostFragment.java
private void parseParagraphs(Elements ps) { for (Element p : ps) { if (!p.html().startsWith("&") && !p.html().startsWith("<iframe") && !p.html().startsWith("<!") && !p.html().contains("<h") && !p.html().contains("<ol") && !p.html().contains("<ul") && !p.html().contains("<pre") && !p.html().contains("<tr")) { parseNormalImages(p.select("img")); p.select("img").remove(); Elements lnks = p.getElementsByTag("a"); for (Element lnk : lnks) { if (lnk.attr("href").startsWith("#")) lnk.removeAttr("href"); }/*from w w w . ja v a 2 s . com*/ String txt = p.html().replace("<br />", "").replace("\n", "").trim(); if (txt.length() > 0) addText(txt, true, Typeface.DEFAULT); } } }
From source file:com.near.chimerarevo.fragments.PostFragment.java
private void parseBulletedLists(Elements itms) { String bld = ""; for (Element itm : itms) { Elements str = itm.getElementsByTag("li"); for (Element itm2 : str) { if (itm2.children().size() >= 1) { Elements ch = itm2.getElementsByTag("a"); for (Element c : ch) { if (c.attr("href").contains("#")) c.removeAttr("href"); }//from w ww . j a v a2s . c o m } bld += ("\u2022 " + itm2.outerHtml() + "<br />"); } } addText(bld, true, Typeface.DEFAULT); }
From source file:no.kantega.publishing.admin.content.htmlfilter.ConvertUnderlineToEditorStyleFilter.java
@Override public Document runFilter(Document document) { for (Element span : document.getElementsByTag("span")) { String style = span.attr("style"); if (isNotBlank(style)) { String textDecoration = getSubAttributeValue(style, "text-decoration"); if ("underline".equalsIgnoreCase(textDecoration)) { span.removeAttr("style"); span.tagName("u"); }//from w ww. j a va 2 s. c o m } } return document; }
From source file:no.kantega.publishing.admin.content.htmlfilter.ReplaceStyleAlignWithAttributeAlignFilter.java
@Override public Document runFilter(Document document) { for (String tag : tags) { for (Element element : document.getElementsByTag(tag)) { String style = element.attr("style"); if (isNotBlank(style)) { if (style.contains("right")) { element.attr("align", "right"); } else if (style.contains("left")) { element.attr("align", "left"); } else if (style.contains("center")) { element.attr("align", "center"); }//from w ww . j a va 2 s . c o m element.removeAttr("style"); } } } return document; }
From source file:org.b3log.symphony.util.Markdowns.java
/** * Gets the safe HTML content of the specified content. * * @param content the specified content/* w w w . ja va 2 s. c om*/ * @param baseURI the specified base URI, the relative path value of href will starts with this URL * @return safe HTML content */ public static String clean(final String content, final String baseURI) { final Document.OutputSettings outputSettings = new Document.OutputSettings(); outputSettings.prettyPrint(false); final String tmp = Jsoup.clean(content, baseURI, Whitelist.relaxed().addAttributes(":all", "id", "target", "class") .addTags("span", "hr", "kbd", "samp", "tt", "del", "s", "strike", "u") .addAttributes("iframe", "src", "width", "height", "border", "marginwidth", "marginheight") .addAttributes("audio", "controls", "src") .addAttributes("video", "controls", "src", "width", "height") .addAttributes("source", "src", "media", "type") .addAttributes("object", "width", "height", "data", "type") .addAttributes("param", "name", "value") .addAttributes("input", "type", "disabled", "checked").addAttributes("embed", "src", "type", "width", "height", "wmode", "allowNetworking"), outputSettings); final Document doc = Jsoup.parse(tmp, baseURI, Parser.htmlParser()); final Elements ps = doc.getElementsByTag("p"); for (final Element p : ps) { p.removeAttr("style"); } final Elements iframes = doc.getElementsByTag("iframe"); for (final Element iframe : iframes) { final String src = StringUtils.deleteWhitespace(iframe.attr("src")); if (StringUtils.startsWithIgnoreCase(src, "javascript") || StringUtils.startsWithIgnoreCase(src, "data:")) { iframe.remove(); } } final Elements objs = doc.getElementsByTag("object"); for (final Element obj : objs) { final String data = StringUtils.deleteWhitespace(obj.attr("data")); if (StringUtils.startsWithIgnoreCase(data, "data:") || StringUtils.startsWithIgnoreCase(data, "javascript")) { obj.remove(); continue; } final String type = StringUtils.deleteWhitespace(obj.attr("type")); if (StringUtils.containsIgnoreCase(type, "script")) { obj.remove(); } } final Elements embeds = doc.getElementsByTag("embed"); for (final Element embed : embeds) { final String data = StringUtils.deleteWhitespace(embed.attr("src")); if (StringUtils.startsWithIgnoreCase(data, "data:") || StringUtils.startsWithIgnoreCase(data, "javascript")) { embed.remove(); continue; } } final Elements as = doc.getElementsByTag("a"); for (final Element a : as) { a.attr("rel", "nofollow"); final String href = a.attr("href"); if (href.startsWith(Latkes.getServePath())) { continue; } a.attr("target", "_blank"); } final Elements audios = doc.getElementsByTag("audio"); for (final Element audio : audios) { audio.attr("preload", "none"); } final Elements videos = doc.getElementsByTag("video"); for (final Element video : videos) { video.attr("preload", "none"); } String ret = doc.body().html(); ret = ret.replaceAll("(</?br\\s*/?>\\s*)+", "<br>"); // patch for Jsoup issue return ret; }