Example usage for org.jsoup.safety Whitelist Whitelist

Introduction

In this page you can find the example usage for org.jsoup.safety Whitelist Whitelist.

Prototype

public Whitelist()

Source Link

Document

Create a new, empty whitelist.

Usage

From source file:net.duckling.falcon.xss.JSONConfig.java

public static Whitelist parse(String filename) throws IOException, ParseException {
    String jsonString = FileUtils.readFileToString(new File(filename), "UTF-8");
    JSONParser parser = new JSONParser();
    Object obj = parser.parse(jsonString);
    if (obj instanceof JSONObject) {
        Whitelist whitelist = new Whitelist();
        JSONObject config = (JSONObject) obj;
        addTags(whitelist, config);/*from   w w  w . j a va2  s.c om*/
        addProtocols(whitelist, config);
        return whitelist;
    }
    return Whitelist.none();
}

From source file:com.mycollab.core.utils.StringUtils.java

private static Whitelist relaxed() {
    return (new Whitelist())
            .addTags(new String[] { "a", "b", "blockquote", "br", "caption", "cite", "code", "col", "colgroup",
                    "dd", "div", "dl", "dt", "em", "h1", "h2", "h3", "h4", "h5", "h6", "i", "img", "li", "ol",
                    "pre", "q", "small", "span", "strike", "strong", "sub", "sup", "table", "tbody", "td",
                    "tfoot", "th", "thead", "tr", "u", "ul" })
            .addAttributes("a", new String[] { "href", "title" })
            .addAttributes("blockquote", new String[] { "cite" })
            .addAttributes("col", new String[] { "span", "width" })
            .addAttributes("colgroup", new String[] { "span", "width" })
            .addAttributes("img", new String[] { "align", "alt", "height", "src", "title", "width" })
            .addAttributes("ol", new String[] { "start", "type" }).addAttributes("q", new String[] { "cite" })
            .addAttributes("table", new String[] { "summary", "width" })
            .addAttributes("td", new String[] { "abbr", "axis", "colspan", "rowspan", "width" })
            .addAttributes("th", new String[] { "abbr", "axis", "colspan", "rowspan", "scope", "width" })
            .addAttributes("ul", new String[] { "type" })
            .addProtocols("a", "href", new String[] { "ftp", "http", "https", "mailto" })
            .addProtocols("blockquote", "cite", new String[] { "http", "https" })
            .addProtocols("cite", "cite", new String[] { "http", "https" })
            .addProtocols("img", "src", new String[] { "http", "https" })
            .addProtocols("q", "cite", "http", "https");
}

From source file:com.elevenpaths.googleindexretriever.GoogleSearch.java

/**
 * Make the query to google and return the data.
 *
 * @param query//from  w  w  w  .  ja  v  a2  s .c  o  m
 *            textfield for google
 * @return webpage in Document format
 */
private Document getData(String query)
        throws CaptchaException, EmptyQueryException, UnsupportedEncodingException {
    if (this.query.isEmpty() || this.query == null) {
        throw new EmptyQueryException();
    }

    Connection conn = null;
    Document doc = null;

    String request = "https://www.google.com/search?q=" + URLEncoder.encode(stripXSS(query), "UTF-8");
    if (!tokenCookie.isEmpty()) {
        request = request + "&google_abuse=" + URLEncoder.encode(tokenCookie, "UTF-8");
    }

    try {
        conn = Jsoup.connect(request).method(Method.GET)
                .userAgent("Mozilla/5.0 (Windows NT 6.3; WOW64; rv:36.0) Gecko/20100101 Firefox/48.0")
                .header("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8")
                .header("Cookie", tokenCookie).header("Connection", "keep-alive").ignoreHttpErrors(true)
                .timeout(5000);

        if (!referer.isEmpty()) {
            conn.header("Referer", referer);
        }

        Connection.Response response = conn.execute();

        if (response.statusCode() == 503) {

            referer = response.url().toString();
            idCaptcha = getIDCaptcha(response.parse());

            getCaptcha("https://ipv4.google.com/sorry/image?id=" + idCaptcha + "&hl=es&"
                    + referer.substring(referer.indexOf('?') + 1));

            throw new CaptchaException();

        }

        doc = Jsoup.parse(response.body());

        // Clean the response
        Whitelist wl = new Whitelist().basic();
        wl.addAttributes("span", "class");
        Cleaner clean = new Cleaner(wl);
        doc = clean.clean(doc);
    } catch (IOException e) {
        //System.out.println(e.getMessage());
        e.printStackTrace();
    }

    return doc;
}

From source file:org.eclipse.skalli.commons.HtmlUtils.java

/**
 * Returns a {@link Whitelist whitelist} of HTML tags and attributes that can safely be used
 * when rendering HTML/JSP pages. Use the returned whitelist with {@link JSoup}.
 *///w  ww  .  ja  va  2 s  .  co m
@SuppressWarnings("nls")
public static Whitelist getWhiteList() {
    Whitelist whitelist = new Whitelist();
    whitelist.addTags(ALLOWED_TAGS).addAttributes("a", "href", "target", "name", "title", "rel")
            .addAttributes("ul", "type").addAttributes("ol", "start", "type").addAttributes("li", "value")
            .addAttributes("blockquote", "cite").addAttributes("q", "cite")
            .addProtocols("a", "href", "http", "https", "mailto")
            .addProtocols("blockquote", "cite", "http", "https").addProtocols("cite", "cite", "http", "https")
            .addProtocols("q", "cite", "http", "https");
    return whitelist;
}

From source file:org.finra.herd.core.HerdStringUtils.java

/**
 * Strips HTML tags from a given input String, allows some tags to be retained via a whitelist
 *
 * @param fragment the specified String//  w w  w .j a v  a  2 s . c o m
 * @param whitelistTags the specified whitelist tags
 *
 * @return cleaned String with allowed tags
 */
public static String stripHtml(String fragment, String... whitelistTags) {

    // Parse out html tags except those from a given list of whitelist tags
    Document dirty = Jsoup.parseBodyFragment(fragment);

    Whitelist whitelist = new Whitelist();

    for (String whitelistTag : whitelistTags) {
        // Get the actual tag name from the whitelist tag
        // this is vulnerable in general to complex tags but will suffice for our simple needs
        whitelistTag = StringUtils.removePattern(whitelistTag, "[^\\{IsAlphabetic}]");

        // Add all specified tags to the whitelist while preserving inline css
        whitelist.addTags(whitelistTag).addAttributes(whitelistTag, "class");
    }

    Cleaner cleaner = new Cleaner(whitelist);
    Document clean = cleaner.clean(dirty);
    // Set character encoding to UTF-8 and make sure no line-breaks are added
    clean.outputSettings().escapeMode(Entities.EscapeMode.base).charset(StandardCharsets.UTF_8)
            .prettyPrint(false);

    // return 'cleaned' html body
    return clean.body().html();
}