List of usage examples for org.jsoup.safety Whitelist Whitelist
public Whitelist()
From source file:net.duckling.falcon.xss.JSONConfig.java
public static Whitelist parse(String filename) throws IOException, ParseException { String jsonString = FileUtils.readFileToString(new File(filename), "UTF-8"); JSONParser parser = new JSONParser(); Object obj = parser.parse(jsonString); if (obj instanceof JSONObject) { Whitelist whitelist = new Whitelist(); JSONObject config = (JSONObject) obj; addTags(whitelist, config);/*from w w w . j a va2 s.c om*/ addProtocols(whitelist, config); return whitelist; } return Whitelist.none(); }
From source file:com.mycollab.core.utils.StringUtils.java
private static Whitelist relaxed() { return (new Whitelist()) .addTags(new String[] { "a", "b", "blockquote", "br", "caption", "cite", "code", "col", "colgroup", "dd", "div", "dl", "dt", "em", "h1", "h2", "h3", "h4", "h5", "h6", "i", "img", "li", "ol", "pre", "q", "small", "span", "strike", "strong", "sub", "sup", "table", "tbody", "td", "tfoot", "th", "thead", "tr", "u", "ul" }) .addAttributes("a", new String[] { "href", "title" }) .addAttributes("blockquote", new String[] { "cite" }) .addAttributes("col", new String[] { "span", "width" }) .addAttributes("colgroup", new String[] { "span", "width" }) .addAttributes("img", new String[] { "align", "alt", "height", "src", "title", "width" }) .addAttributes("ol", new String[] { "start", "type" }).addAttributes("q", new String[] { "cite" }) .addAttributes("table", new String[] { "summary", "width" }) .addAttributes("td", new String[] { "abbr", "axis", "colspan", "rowspan", "width" }) .addAttributes("th", new String[] { "abbr", "axis", "colspan", "rowspan", "scope", "width" }) .addAttributes("ul", new String[] { "type" }) .addProtocols("a", "href", new String[] { "ftp", "http", "https", "mailto" }) .addProtocols("blockquote", "cite", new String[] { "http", "https" }) .addProtocols("cite", "cite", new String[] { "http", "https" }) .addProtocols("img", "src", new String[] { "http", "https" }) .addProtocols("q", "cite", "http", "https"); }
From source file:com.elevenpaths.googleindexretriever.GoogleSearch.java
/** * Make the query to google and return the data. * * @param query//from w w w . ja v a2 s .c o m * textfield for google * @return webpage in Document format */ private Document getData(String query) throws CaptchaException, EmptyQueryException, UnsupportedEncodingException { if (this.query.isEmpty() || this.query == null) { throw new EmptyQueryException(); } Connection conn = null; Document doc = null; String request = "https://www.google.com/search?q=" + URLEncoder.encode(stripXSS(query), "UTF-8"); if (!tokenCookie.isEmpty()) { request = request + "&google_abuse=" + URLEncoder.encode(tokenCookie, "UTF-8"); } try { conn = Jsoup.connect(request).method(Method.GET) .userAgent("Mozilla/5.0 (Windows NT 6.3; WOW64; rv:36.0) Gecko/20100101 Firefox/48.0") .header("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8") .header("Cookie", tokenCookie).header("Connection", "keep-alive").ignoreHttpErrors(true) .timeout(5000); if (!referer.isEmpty()) { conn.header("Referer", referer); } Connection.Response response = conn.execute(); if (response.statusCode() == 503) { referer = response.url().toString(); idCaptcha = getIDCaptcha(response.parse()); getCaptcha("https://ipv4.google.com/sorry/image?id=" + idCaptcha + "&hl=es&" + referer.substring(referer.indexOf('?') + 1)); throw new CaptchaException(); } doc = Jsoup.parse(response.body()); // Clean the response Whitelist wl = new Whitelist().basic(); wl.addAttributes("span", "class"); Cleaner clean = new Cleaner(wl); doc = clean.clean(doc); } catch (IOException e) { //System.out.println(e.getMessage()); e.printStackTrace(); } return doc; }
From source file:org.eclipse.skalli.commons.HtmlUtils.java
/** * Returns a {@link Whitelist whitelist} of HTML tags and attributes that can safely be used * when rendering HTML/JSP pages. Use the returned whitelist with {@link JSoup}. *///w ww . ja va 2 s . co m @SuppressWarnings("nls") public static Whitelist getWhiteList() { Whitelist whitelist = new Whitelist(); whitelist.addTags(ALLOWED_TAGS).addAttributes("a", "href", "target", "name", "title", "rel") .addAttributes("ul", "type").addAttributes("ol", "start", "type").addAttributes("li", "value") .addAttributes("blockquote", "cite").addAttributes("q", "cite") .addProtocols("a", "href", "http", "https", "mailto") .addProtocols("blockquote", "cite", "http", "https").addProtocols("cite", "cite", "http", "https") .addProtocols("q", "cite", "http", "https"); return whitelist; }
From source file:org.finra.herd.core.HerdStringUtils.java
/** * Strips HTML tags from a given input String, allows some tags to be retained via a whitelist * * @param fragment the specified String// w w w .j a v a 2 s . c o m * @param whitelistTags the specified whitelist tags * * @return cleaned String with allowed tags */ public static String stripHtml(String fragment, String... whitelistTags) { // Parse out html tags except those from a given list of whitelist tags Document dirty = Jsoup.parseBodyFragment(fragment); Whitelist whitelist = new Whitelist(); for (String whitelistTag : whitelistTags) { // Get the actual tag name from the whitelist tag // this is vulnerable in general to complex tags but will suffice for our simple needs whitelistTag = StringUtils.removePattern(whitelistTag, "[^\\{IsAlphabetic}]"); // Add all specified tags to the whitelist while preserving inline css whitelist.addTags(whitelistTag).addAttributes(whitelistTag, "class"); } Cleaner cleaner = new Cleaner(whitelist); Document clean = cleaner.clean(dirty); // Set character encoding to UTF-8 and make sure no line-breaks are added clean.outputSettings().escapeMode(Entities.EscapeMode.base).charset(StandardCharsets.UTF_8) .prettyPrint(false); // return 'cleaned' html body return clean.body().html(); }