Example usage for org.jsoup.safety Whitelist Whitelist

List of usage examples for org.jsoup.safety Whitelist Whitelist

Introduction

In this page you can find the example usage for org.jsoup.safety Whitelist Whitelist.

Prototype

public Whitelist() 

Source Link

Document

Create a new, empty whitelist.

Usage

From source file:net.duckling.falcon.xss.JSONConfig.java

public static Whitelist parse(String filename) throws IOException, ParseException {
    String jsonString = FileUtils.readFileToString(new File(filename), "UTF-8");
    JSONParser parser = new JSONParser();
    Object obj = parser.parse(jsonString);
    if (obj instanceof JSONObject) {
        Whitelist whitelist = new Whitelist();
        JSONObject config = (JSONObject) obj;
        addTags(whitelist, config);/*from   w w  w . j a va2  s.c om*/
        addProtocols(whitelist, config);
        return whitelist;
    }
    return Whitelist.none();
}

From source file:com.mycollab.core.utils.StringUtils.java

private static Whitelist relaxed() {
    return (new Whitelist())
            .addTags(new String[] { "a", "b", "blockquote", "br", "caption", "cite", "code", "col", "colgroup",
                    "dd", "div", "dl", "dt", "em", "h1", "h2", "h3", "h4", "h5", "h6", "i", "img", "li", "ol",
                    "pre", "q", "small", "span", "strike", "strong", "sub", "sup", "table", "tbody", "td",
                    "tfoot", "th", "thead", "tr", "u", "ul" })
            .addAttributes("a", new String[] { "href", "title" })
            .addAttributes("blockquote", new String[] { "cite" })
            .addAttributes("col", new String[] { "span", "width" })
            .addAttributes("colgroup", new String[] { "span", "width" })
            .addAttributes("img", new String[] { "align", "alt", "height", "src", "title", "width" })
            .addAttributes("ol", new String[] { "start", "type" }).addAttributes("q", new String[] { "cite" })
            .addAttributes("table", new String[] { "summary", "width" })
            .addAttributes("td", new String[] { "abbr", "axis", "colspan", "rowspan", "width" })
            .addAttributes("th", new String[] { "abbr", "axis", "colspan", "rowspan", "scope", "width" })
            .addAttributes("ul", new String[] { "type" })
            .addProtocols("a", "href", new String[] { "ftp", "http", "https", "mailto" })
            .addProtocols("blockquote", "cite", new String[] { "http", "https" })
            .addProtocols("cite", "cite", new String[] { "http", "https" })
            .addProtocols("img", "src", new String[] { "http", "https" })
            .addProtocols("q", "cite", "http", "https");
}

From source file:com.elevenpaths.googleindexretriever.GoogleSearch.java

/**
 * Make the query to google and return the data.
 *
 * @param query//from  w  w  w  .  ja  v  a2  s .c  o  m
 *            textfield for google
 * @return webpage in Document format
 */
private Document getData(String query)
        throws CaptchaException, EmptyQueryException, UnsupportedEncodingException {
    if (this.query.isEmpty() || this.query == null) {
        throw new EmptyQueryException();
    }

    Connection conn = null;
    Document doc = null;

    String request = "https://www.google.com/search?q=" + URLEncoder.encode(stripXSS(query), "UTF-8");
    if (!tokenCookie.isEmpty()) {
        request = request + "&google_abuse=" + URLEncoder.encode(tokenCookie, "UTF-8");
    }

    try {
        conn = Jsoup.connect(request).method(Method.GET)
                .userAgent("Mozilla/5.0 (Windows NT 6.3; WOW64; rv:36.0) Gecko/20100101 Firefox/48.0")
                .header("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8")
                .header("Cookie", tokenCookie).header("Connection", "keep-alive").ignoreHttpErrors(true)
                .timeout(5000);

        if (!referer.isEmpty()) {
            conn.header("Referer", referer);
        }

        Connection.Response response = conn.execute();

        if (response.statusCode() == 503) {

            referer = response.url().toString();
            idCaptcha = getIDCaptcha(response.parse());

            getCaptcha("https://ipv4.google.com/sorry/image?id=" + idCaptcha + "&hl=es&"
                    + referer.substring(referer.indexOf('?') + 1));

            throw new CaptchaException();

        }

        doc = Jsoup.parse(response.body());

        // Clean the response
        Whitelist wl = new Whitelist().basic();
        wl.addAttributes("span", "class");
        Cleaner clean = new Cleaner(wl);
        doc = clean.clean(doc);
    } catch (IOException e) {
        //System.out.println(e.getMessage());
        e.printStackTrace();
    }

    return doc;
}

From source file:org.eclipse.skalli.commons.HtmlUtils.java

/**
 * Returns a {@link Whitelist whitelist} of HTML tags and attributes that can safely be used
 * when rendering HTML/JSP pages. Use the returned whitelist with {@link JSoup}.
 *///w  ww  .  ja  va  2 s  .  co m
@SuppressWarnings("nls")
public static Whitelist getWhiteList() {
    Whitelist whitelist = new Whitelist();
    whitelist.addTags(ALLOWED_TAGS).addAttributes("a", "href", "target", "name", "title", "rel")
            .addAttributes("ul", "type").addAttributes("ol", "start", "type").addAttributes("li", "value")
            .addAttributes("blockquote", "cite").addAttributes("q", "cite")
            .addProtocols("a", "href", "http", "https", "mailto")
            .addProtocols("blockquote", "cite", "http", "https").addProtocols("cite", "cite", "http", "https")
            .addProtocols("q", "cite", "http", "https");
    return whitelist;
}

From source file:org.finra.herd.core.HerdStringUtils.java

/**
 * Strips HTML tags from a given input String, allows some tags to be retained via a whitelist
 *
 * @param fragment the specified String//  w w  w .j a v  a  2 s . c o m
 * @param whitelistTags the specified whitelist tags
 *
 * @return cleaned String with allowed tags
 */
public static String stripHtml(String fragment, String... whitelistTags) {

    // Parse out html tags except those from a given list of whitelist tags
    Document dirty = Jsoup.parseBodyFragment(fragment);

    Whitelist whitelist = new Whitelist();

    for (String whitelistTag : whitelistTags) {
        // Get the actual tag name from the whitelist tag
        // this is vulnerable in general to complex tags but will suffice for our simple needs
        whitelistTag = StringUtils.removePattern(whitelistTag, "[^\\{IsAlphabetic}]");

        // Add all specified tags to the whitelist while preserving inline css
        whitelist.addTags(whitelistTag).addAttributes(whitelistTag, "class");
    }

    Cleaner cleaner = new Cleaner(whitelist);
    Document clean = cleaner.clean(dirty);
    // Set character encoding to UTF-8 and make sure no line-breaks are added
    clean.outputSettings().escapeMode(Entities.EscapeMode.base).charset(StandardCharsets.UTF_8)
            .prettyPrint(false);

    // return 'cleaned' html body
    return clean.body().html();
}