Example usage for org.jsoup.nodes Element removeAttr

List of usage examples for org.jsoup.nodes Element removeAttr

Introduction

In this page you can find the example usage for org.jsoup.nodes Element removeAttr.

Prototype

public Node removeAttr(String attributeKey) 

Source Link

Document

Remove an attribute from this element.

Usage

From source file:com.aurel.track.exchange.docx.exporter.PreprocessImage.java

/**
 * Gets the image captions in a map keyed by itemID_attachmentID
 * The key is saved also in the <img> tag's "alt" attribute for later use from word
 * @param doc/*from  w w w . jav a2 s  .co m*/
 * @param personID
 * @param imageCaptionsMap
 * @return
 */
private String getImageCaptions(Document doc, Integer personID,
        Map<String, ImageOrTableCaption> imageCaptionsMap) {
    Elements imgElements = doc.select("img");
    if (imgElements != null) {
        for (Iterator<Element> iterator = imgElements.iterator(); iterator.hasNext();) {
            Element imageElement = iterator.next();
            String sourceAttribute = imageElement.attr("src");
            String style = imageElement.attr("style");
            //remove the width and height attributes from html img to avoid java.lang.OutOfMemoryError: Java heap space
            imageElement.removeAttr("width");
            imageElement.removeAttr("height");
            ALIGN align = null;
            if (style != null) {
                if (style.contains("float:left")) {
                    align = ALIGN.LEFT;
                } else {
                    if (style.contains("float:right")) {
                        align = ALIGN.RIGHT;
                    }
                }
            }
            String altAttribute = imageElement.attr("alt");
            Map<String, String> map = getTemporaryFilePathMap(sourceAttribute, personID);
            if (map != null) {
                imageElement.attr("src", map.get("temporaryFilePath"));
                //save imageCaption into the map and now use the "alt" attribute for storing the merged key
                //which will be transformed  in nonvisualdrawingprops.getDescr() by XHTMLImporterImpl to set the caption on the ms word side
                String imageCaption = null;
                if (altAttribute != null && !"".equals(altAttribute)) {
                    //probably from previously removed figcaption but it may also be explicitly set
                    imageCaption = altAttribute;
                } else {
                    imageCaption = map.get("description");
                }
                globalCounter++;
                counterWithinChapter++;
                imageElement.attr("alt", String.valueOf(globalCounter));
                if (imageCaption == null) {
                    //add anyway to the map even as empty string because this marks the image to be added to the List of figures 
                    imageCaption = "";
                }
                imageCaptionsMap.put(String.valueOf(globalCounter),
                        new ImageOrTableCaption(chapterNo, counterWithinChapter, imageCaption, align));
            }
        }
    }
    return doc.body().html();
}

From source file:com.near.chimerarevo.fragments.PostFragment.java

private void parseParagraphs(Elements ps) {
    for (Element p : ps) {
        if (!p.html().startsWith("&") && !p.html().startsWith("<iframe") && !p.html().startsWith("<!")
                && !p.html().contains("<h") && !p.html().contains("<ol") && !p.html().contains("<ul")
                && !p.html().contains("<pre") && !p.html().contains("<tr")) {
            parseNormalImages(p.select("img"));
            p.select("img").remove();

            Elements lnks = p.getElementsByTag("a");
            for (Element lnk : lnks) {
                if (lnk.attr("href").startsWith("#"))
                    lnk.removeAttr("href");
            }/*from  w w  w  . ja  v  a  2  s  .  com*/

            String txt = p.html().replace("<br />", "").replace("\n", "").trim();
            if (txt.length() > 0)
                addText(txt, true, Typeface.DEFAULT);
        }
    }
}

From source file:com.near.chimerarevo.fragments.PostFragment.java

private void parseBulletedLists(Elements itms) {
    String bld = "";
    for (Element itm : itms) {
        Elements str = itm.getElementsByTag("li");
        for (Element itm2 : str) {
            if (itm2.children().size() >= 1) {
                Elements ch = itm2.getElementsByTag("a");
                for (Element c : ch) {
                    if (c.attr("href").contains("#"))
                        c.removeAttr("href");
                }//from   w ww  . j  a v  a2s  . c o m
            }
            bld += ("\u2022 " + itm2.outerHtml() + "<br />");
        }
    }
    addText(bld, true, Typeface.DEFAULT);
}

From source file:no.kantega.publishing.admin.content.htmlfilter.ConvertUnderlineToEditorStyleFilter.java

@Override
public Document runFilter(Document document) {
    for (Element span : document.getElementsByTag("span")) {
        String style = span.attr("style");
        if (isNotBlank(style)) {
            String textDecoration = getSubAttributeValue(style, "text-decoration");
            if ("underline".equalsIgnoreCase(textDecoration)) {
                span.removeAttr("style");
                span.tagName("u");
            }//from   w  ww. j a  va 2 s.  c o  m
        }
    }
    return document;
}

From source file:no.kantega.publishing.admin.content.htmlfilter.ReplaceStyleAlignWithAttributeAlignFilter.java

@Override
public Document runFilter(Document document) {
    for (String tag : tags) {
        for (Element element : document.getElementsByTag(tag)) {
            String style = element.attr("style");
            if (isNotBlank(style)) {
                if (style.contains("right")) {
                    element.attr("align", "right");
                } else if (style.contains("left")) {
                    element.attr("align", "left");
                } else if (style.contains("center")) {
                    element.attr("align", "center");
                }//from w  ww . j a va  2 s  .  c o m
                element.removeAttr("style");
            }
        }
    }
    return document;
}

From source file:org.b3log.symphony.util.Markdowns.java

/**
 * Gets the safe HTML content of the specified content.
 *
 * @param content the specified content/* w  w w . ja  va 2 s.  c om*/
 * @param baseURI the specified base URI, the relative path value of href will starts with this URL
 * @return safe HTML content
 */
public static String clean(final String content, final String baseURI) {
    final Document.OutputSettings outputSettings = new Document.OutputSettings();
    outputSettings.prettyPrint(false);

    final String tmp = Jsoup.clean(content, baseURI,
            Whitelist.relaxed().addAttributes(":all", "id", "target", "class")
                    .addTags("span", "hr", "kbd", "samp", "tt", "del", "s", "strike", "u")
                    .addAttributes("iframe", "src", "width", "height", "border", "marginwidth", "marginheight")
                    .addAttributes("audio", "controls", "src")
                    .addAttributes("video", "controls", "src", "width", "height")
                    .addAttributes("source", "src", "media", "type")
                    .addAttributes("object", "width", "height", "data", "type")
                    .addAttributes("param", "name", "value")
                    .addAttributes("input", "type", "disabled", "checked").addAttributes("embed", "src", "type",
                            "width", "height", "wmode", "allowNetworking"),
            outputSettings);
    final Document doc = Jsoup.parse(tmp, baseURI, Parser.htmlParser());

    final Elements ps = doc.getElementsByTag("p");
    for (final Element p : ps) {
        p.removeAttr("style");
    }

    final Elements iframes = doc.getElementsByTag("iframe");
    for (final Element iframe : iframes) {
        final String src = StringUtils.deleteWhitespace(iframe.attr("src"));
        if (StringUtils.startsWithIgnoreCase(src, "javascript")
                || StringUtils.startsWithIgnoreCase(src, "data:")) {
            iframe.remove();
        }
    }

    final Elements objs = doc.getElementsByTag("object");
    for (final Element obj : objs) {
        final String data = StringUtils.deleteWhitespace(obj.attr("data"));
        if (StringUtils.startsWithIgnoreCase(data, "data:")
                || StringUtils.startsWithIgnoreCase(data, "javascript")) {
            obj.remove();

            continue;
        }

        final String type = StringUtils.deleteWhitespace(obj.attr("type"));
        if (StringUtils.containsIgnoreCase(type, "script")) {
            obj.remove();
        }
    }

    final Elements embeds = doc.getElementsByTag("embed");
    for (final Element embed : embeds) {
        final String data = StringUtils.deleteWhitespace(embed.attr("src"));
        if (StringUtils.startsWithIgnoreCase(data, "data:")
                || StringUtils.startsWithIgnoreCase(data, "javascript")) {
            embed.remove();

            continue;
        }
    }

    final Elements as = doc.getElementsByTag("a");
    for (final Element a : as) {
        a.attr("rel", "nofollow");

        final String href = a.attr("href");
        if (href.startsWith(Latkes.getServePath())) {
            continue;
        }

        a.attr("target", "_blank");
    }

    final Elements audios = doc.getElementsByTag("audio");
    for (final Element audio : audios) {
        audio.attr("preload", "none");
    }

    final Elements videos = doc.getElementsByTag("video");
    for (final Element video : videos) {
        video.attr("preload", "none");
    }

    String ret = doc.body().html();
    ret = ret.replaceAll("(</?br\\s*/?>\\s*)+", "<br>"); // patch for Jsoup issue

    return ret;
}