Example usage for org.apache.commons.validator UrlValidator UrlValidator

List of usage examples for org.apache.commons.validator UrlValidator UrlValidator

Introduction

In this page you can find the example usage for org.apache.commons.validator UrlValidator UrlValidator.

Prototype

public UrlValidator(int options) 

Source Link

Document

Initialize a UrlValidator with the given validation options.

Usage

From source file:com.illustrationfinder.IllustrationFinderController.java

@RequestMapping(value = "/", method = RequestMethod.GET, params = { "url", "preferred-width",
        "preferred-height" })
public ModelAndView showIllustrationFinderResults(ModelMap modelMap, @RequestParam(value = "url") String pUrl,
        @RequestParam(value = "preferred-width") String pPreferredWidth,
        @RequestParam(value = "preferred-height") String pPreferredHeight) {
    final ModelAndView modelAndView = new ModelAndView("/IllustrationFinderView");

    // Add the URL to attributes
    modelMap.addAttribute("pUrl", pUrl);

    // Check if the URL is valid
    boolean isUrlValid = false;

    String url = pUrl;//from  www .  j av  a2  s.  c o m
    if (url != null) {
        url = StringEscapeUtils.escapeHtml4(url);

        if (new UrlValidator(new String[] { "http", "https" }).isValid(url)) {
            isUrlValid = true;
        }
    }

    modelMap.addAttribute("isUrlValid", isUrlValid);

    // Get the images
    try {
        if (isUrlValid) {
            final IPostProcessor postProcessor = new HtmlPostProcessor();
            final GoogleSearchEngine searchEngine = new GoogleSearchEngine();
            final IImageProcessor<BufferedImage, BufferedImageOp> imageProcessor = new BufferedImageProcessor();

            imageProcessor.setPreferredSize(
                    new Dimension(Integer.parseInt(pPreferredWidth), Integer.parseInt(pPreferredHeight)));

            final IllustrationFinder illustrationFinder = new IllustrationFinder();
            illustrationFinder.setPostProcessor(postProcessor);
            illustrationFinder.setSearchEngine(searchEngine);
            illustrationFinder.setImageProcessor(imageProcessor);

            final List<BufferedImage> images = illustrationFinder.getImages(new URL(pUrl));

            // Convert images to base64 strings
            final List<String> imagesAsStrings = new ArrayList<>();

            if (images != null) {
                for (BufferedImage image : images) {
                    final ByteArrayOutputStream baos = new ByteArrayOutputStream();
                    try {
                        ImageIO.write(image, "png", baos);
                        baos.flush();
                        final byte[] imageInByteArray = baos.toByteArray();
                        baos.close();
                        final String b64 = DatatypeConverter.printBase64Binary(imageInByteArray);

                        imagesAsStrings.add(b64);
                    } catch (IOException e) {
                        // Failed to convert the image
                    }
                }
            }

            modelMap.addAttribute("images", imagesAsStrings);
        }
    } catch (IOException e) {
        // Exception triggered if the URL is malformed, it should not happen because the URL is validated before
    }

    return modelAndView;
}

From source file:ecar.util.HtmlSanitizer.java

public static SanitizeResult sanitizer(String html, Pattern allowedTags, Pattern forbiddenTags) {
    SanitizeResult ret = new SanitizeResult();
    Stack<String> openTags = new Stack();

    List<String> tokens = tokenize(html);

    // ------------------- LOOP for every token --------------------------
    for (String token : tokens) {
        boolean isAcceptedToken = false;

        Matcher startMatcher = tagStartPattern.matcher(token);
        Matcher endMatcher = tagClosePattern.matcher(token);

        // --------------------------------------------------------------------------------
        // COMMENT <!-- ......... -->
        if (commentPattern.matcher(token).find()) {
            ret.val = ret.val + token + (token.endsWith("-->") ? "" : "-->");
            ret.invalidTags.add(token + (token.endsWith("-->") ? "" : "-->"));
            continue;

            // --------------------------------------------------------------------------------
            // OPEN TAG <tag .........>
        } else if (startMatcher.find()) {

            // tag name extraction
            String tag = startMatcher.group(1).toLowerCase();

            // -----------------------------------------------------
            // FORBIDDEN TAG <script .........>
            if (forbiddenTags.matcher(tag).find()) {
                ret.invalidTags.add("<" + tag + ">");
                continue;

                // -------------------------------------------------- WELL
                // KNOWN TAG
            } else if (allowedTags.matcher(tag).find()) {

                String cleanToken = "<" + tag;
                String tokenBody = startMatcher.group(2);

                // first test table consistency
                // table tbody tfoot thead th tr td
                if ("thead".equals(tag) || "tbody".equals(tag) || "tfoot".equals(tag) || "tr".equals(tag)) {
                    if (openTags.search("table") < 1) {
                        ret.invalidTags.add("<" + tag + ">");
                        continue;
                    }/* w ww.j a v  a 2s.  co m*/
                } else if ("td".equals(tag) || "th".equals(tag)) {
                    if (openTags.search("tr") < 1) {
                        ret.invalidTags.add("<" + tag + ">");
                        continue;
                    }
                }

                // then test properties
                Matcher attributes = attributesPattern.matcher(tokenBody);

                boolean foundURL = false; // URL flag
                while (attributes.find()) {

                    String attr = attributes.group(1).toLowerCase();
                    String val = attributes.group(2);

                    // we will accept href in case of <A>
                    if ("a".equals(tag) && "href".equals(attr)) { // <a
                        // href="......">
                        String[] customSchemes = { "http", "https" };
                        if (new UrlValidator(customSchemes).isValid(val)) {
                            foundURL = true;
                        } else {
                            // may be it is a mailto?
                            // case <a
                            // href="mailto:pippo@pippo.com?subject=...."
                            if (val.toLowerCase().startsWith("mailto:") && val.indexOf("@") >= 0) {
                                String val1 = "http://www." + val.substring(val.indexOf("@") + 1);
                                if (new UrlValidator(customSchemes).isValid(val1)) {
                                    foundURL = true;
                                } else {
                                    ret.invalidTags.add(attr + " " + val);
                                    val = "";
                                }
                            } else {
                                ret.invalidTags.add(attr + " " + val);
                                val = "";
                            }
                        }

                    } else if (tag.matches("img|embed") && "src".equals(attr)) { // <img src="......">
                        String[] customSchemes = { "http", "https" };
                        if (new UrlValidator(customSchemes).isValid(val)) {
                            foundURL = true;
                        } else {
                            ret.invalidTags.add(attr + " " + val);
                            val = "";
                        }

                    } else if ("href".equals(attr) || "src".equals(attr)) { // <tag
                        // src/href="......">
                        // skipped
                        ret.invalidTags.add(tag + " " + attr + " " + val);
                        continue;

                    } else if (attr.matches("width|height")) { // <tag
                        // width/height="......">
                        if (!val.toLowerCase().matches("\\d+%|\\d+$")) { // test
                            // numeric
                            // values
                            ret.invalidTags.add(tag + " " + attr + " " + val);
                            continue;
                        }

                    } else if ("style".equals(attr)) { // <tag
                        // style="......">

                        // then test properties
                        Matcher styles = stylePattern.matcher(val);
                        String cleanStyle = "";

                        while (styles.find()) {
                            String styleName = styles.group(1).toLowerCase();
                            String styleValue = styles.group(2);

                            // suppress invalid styles values
                            if (forbiddenStylePattern.matcher(styleValue).find()) {
                                ret.invalidTags.add(tag + " " + attr + " " + styleValue);
                                continue;
                            }

                            // check if valid url
                            Matcher urlStyleMatcher = urlStylePattern.matcher(styleValue);
                            if (urlStyleMatcher.find()) {
                                String[] customSchemes = { "http", "https" };
                                String url = urlStyleMatcher.group(1);
                                if (!new UrlValidator(customSchemes).isValid(url)) {
                                    ret.invalidTags.add(tag + " " + attr + " " + styleValue);
                                    continue;
                                }
                            }

                            cleanStyle = cleanStyle + styleName + ":" + encode(styleValue) + ";";

                        }
                        val = cleanStyle;

                    } else if (attr.startsWith("on")) { // skip all
                        // javascript events
                        ret.invalidTags.add(tag + " " + attr + " " + val);
                        continue;

                    } else { // by default encode all properies
                        val = encode(val);
                    }

                    cleanToken = cleanToken + " " + attr + "=\"" + val + "\"";
                }
                cleanToken = cleanToken + ">";

                isAcceptedToken = true;

                // for <img> and <a>
                if (tag.matches("a|img|embed") && !foundURL) {
                    isAcceptedToken = false;
                    cleanToken = "";
                }

                token = cleanToken;

                // push the tag if require closure and it is accepted
                // (otherwirse is encoded)
                if (isAcceptedToken && !(standAloneTags.matcher(tag).find() || selfClosed.matcher(tag).find()))
                    openTags.push(tag);

                // --------------------------------------------------------------------------------
                // UNKNOWN TAG
            } else {
                ret.invalidTags.add(token);
                ret.val = ret.val + token;
                continue;

            }

            // --------------------------------------------------------------------------------
            // CLOSE TAG </tag>
        } else if (endMatcher.find()) {
            String tag = endMatcher.group(1).toLowerCase();

            // is self closing
            if (selfClosed.matcher(tag).find()) {
                ret.invalidTags.add(token);
                continue;
            }
            if (forbiddenTags.matcher(tag).find()) {
                ret.invalidTags.add("/" + tag);
                continue;
            }
            if (!allowedTags.matcher(tag).find()) {
                ret.invalidTags.add(token);
                ret.val = ret.val + token;
                continue;
            } else {

                String cleanToken = "";

                // check tag position in the stack
                int pos = openTags.search(tag);
                // if found on top ok
                for (int i = 1; i <= pos; i++) {
                    // pop all elements before tag and close it
                    String poppedTag = openTags.pop();
                    cleanToken = cleanToken + "</" + poppedTag + ">";
                    isAcceptedToken = true;
                }

                token = cleanToken;
            }

        }

        ret.val = ret.val + token;

        if (isAcceptedToken) {
            ret.html = ret.html + token;
            // ret.text = ret.text + " ";
        } else {
            String sanToken = htmlEncodeApexesAndTags(token);
            ret.html = ret.html + sanToken;
            ret.text = ret.text + htmlEncodeApexesAndTags(removeLineFeed(token));
        }

    }

    // must close remaining tags
    while (openTags.size() > 0) {
        // pop all elements before tag and close it
        String poppedTag = openTags.pop();
        ret.html = ret.html + "</" + poppedTag + ">";
        ret.val = ret.val + "</" + poppedTag + ">";
    }

    // set boolean value
    ret.isValid = ret.invalidTags.size() == 0;

    return ret;
}

From source file:com.tmh.web.filter.xss.HtmlSanitizer.java

public static SanitizeResult sanitizer(String html, Pattern allowedTags, Pattern forbiddenTags) {
    SanitizeResult ret = new SanitizeResult();
    Stack<String> openTags = new Stack();

    List<String> tokens = tokenize(html);

    // -------------------   LOOP for every token --------------------------
    for (String token : tokens) {
        boolean isAcceptedToken = false;
        Matcher startMatcher = tagStartPattern.matcher(token);
        Matcher endMatcher = tagClosePattern.matcher(token);

        //  COMMENT    <!-- ......... -->
        if (commentPattern.matcher(token).find()) {
            ret.val = ret.val + token + (token.endsWith("-->") ? "" : "-->");
            ret.invalidTags.add(token + (token.endsWith("-->") ? "" : "-->"));
            continue;

        } //  STYLE SCRIPT   style=xss:expression
        else if (styleScriptPattern.matcher(token).find()) {
            ret.val = ret.val + token;
            ret.invalidTags.add(token);/*from   w w w . j  av a  2  s.c  om*/
            continue;

            //  OPEN TAG    <tag .........>
        } else if (startMatcher.find()) {

            //tag name extraction
            String tag = startMatcher.group(1).toLowerCase();

            //-----------------------------------------------------  FORBIDDEN TAG   <script .........>
            if (forbiddenTags.matcher(tag).find()) {
                ret.invalidTags.add("<" + tag + ">");
                continue;

                // --------------------------------------------------  WELL KNOWN TAG
            } else if (allowedTags.matcher(tag).find()) {

                String cleanToken = "<" + tag;
                String tokenBody = startMatcher.group(2);

                //first test table consistency
                //table tbody tfoot thead th tr td
                if ("thead".equals(tag) || "tbody".equals(tag) || "tfoot".equals(tag) || "tr".equals(tag)) {
                    if (openTags.search("table") < 1) {
                        ret.invalidTags.add("<" + tag + ">");
                        continue;
                    }
                } else if ("td".equals(tag) || "th".equals(tag)) {
                    if (openTags.search("tr") < 1) {
                        ret.invalidTags.add("<" + tag + ">");
                        continue;
                    }
                }

                // then test properties
                Matcher attributes = attributesPattern.matcher(tokenBody);

                boolean foundURL = false; // URL flag
                while (attributes.find()) {

                    String attr = attributes.group(1).toLowerCase();
                    String val = attributes.group(2);

                    // we will accept href in case of <A>
                    if ("a".equals(tag) && "href".equals(attr)) { // <a href="......">
                        String[] customSchemes = { "http", "https" };
                        if (new UrlValidator(customSchemes).isValid(val)) {
                            foundURL = true;
                        } else {
                            // may be it is a mailto?
                            //  case <a href="mailto:pippo@pippo.com?subject=...."
                            if (val.toLowerCase().startsWith("mailto:") && val.indexOf("@") >= 0) {
                                String val1 = "http://www." + val.substring(val.indexOf("@") + 1);
                                if (new UrlValidator(customSchemes).isValid(val1)) {
                                    foundURL = true;
                                } else {
                                    ret.invalidTags.add(attr + " " + val);
                                    val = "";
                                }
                            } else {
                                ret.invalidTags.add(attr + " " + val);
                                val = "";
                            }
                        }

                    } else if (tag.matches("img|embed") && "src".equals(attr)) { // <img src="......">
                        String[] customSchemes = { "http", "https" };
                        if (new UrlValidator(customSchemes).isValid(val)) {
                            foundURL = true;
                        } else {
                            ret.invalidTags.add(attr + " " + val);
                            val = "";
                        }

                    } else if ("href".equals(attr) || "src".equals(attr)) { // <tag src/href="......">   skipped
                        ret.invalidTags.add(tag + " " + attr + " " + val);
                        continue;

                    } else if (attr.matches("width|height")) { // <tag width/height="......">
                        if (!val.toLowerCase().matches("\\d+%|\\d+$")) { // test numeric values
                            ret.invalidTags.add(tag + " " + attr + " " + val);
                            continue;
                        }

                    } else if ("style".equals(attr)) { // <tag style="......">

                        // then test properties
                        Matcher styles = stylePattern.matcher(val);
                        String cleanStyle = "";

                        while (styles.find()) {
                            String styleName = styles.group(1).toLowerCase();
                            String styleValue = styles.group(2);

                            // suppress invalid styles values
                            if (forbiddenStylePattern.matcher(styleValue).find()) {
                                ret.invalidTags.add(tag + " " + attr + " " + styleValue);
                                continue;
                            }

                            // check if valid url
                            Matcher urlStyleMatcher = urlStylePattern.matcher(styleValue);
                            if (urlStyleMatcher.find()) {
                                String[] customSchemes = { "http", "https" };
                                String url = urlStyleMatcher.group(1);
                                if (!new UrlValidator(customSchemes).isValid(url)) {
                                    ret.invalidTags.add(tag + " " + attr + " " + styleValue);
                                    continue;
                                }
                            }

                            cleanStyle = cleanStyle + styleName + ":" + encode(styleValue) + ";";

                        }
                        val = cleanStyle;

                    } else if (attr.startsWith("on")) { // skip all javascript events
                        ret.invalidTags.add(tag + " " + attr + " " + val);
                        continue;

                    } else { // by default encode all properies
                        val = encode(val);
                    }

                    cleanToken = cleanToken + " " + attr + "=\"" + val + "\"";
                }
                cleanToken = cleanToken + ">";

                isAcceptedToken = true;

                // for <img> and <a>
                if (tag.matches("a|img|embed") && !foundURL) {
                    isAcceptedToken = false;
                    cleanToken = "";
                }

                token = cleanToken;

                // push the tag if require closure and it is accepted (otherwirse is encoded)
                if (isAcceptedToken && !(standAloneTags.matcher(tag).find() || selfClosed.matcher(tag).find()))
                    openTags.push(tag);

                //   UNKNOWN TAG
            } else {
                ret.invalidTags.add(token);
                ret.val = ret.val + token;
                continue;

            }

            //   CLOSE TAG </tag>
        } else if (endMatcher.find()) {
            String tag = endMatcher.group(1).toLowerCase();

            //is self closing
            if (selfClosed.matcher(tag).find()) {
                ret.invalidTags.add(token);
                continue;
            }
            if (forbiddenTags.matcher(tag).find()) {
                ret.invalidTags.add("/" + tag);
                continue;
            }
            if (!allowedTags.matcher(tag).find()) {
                ret.invalidTags.add(token);
                ret.val = ret.val + token;
                continue;
            } else {

                String cleanToken = "";

                // check tag position in the stack
                int pos = openTags.search(tag);
                // if found on top ok
                for (int i = 1; i <= pos; i++) {
                    //pop all elements before tag and close it
                    String poppedTag = openTags.pop();
                    cleanToken = cleanToken + "</" + poppedTag + ">";
                    isAcceptedToken = true;
                }

                token = cleanToken;
            }

        }

        ret.val = ret.val + token;

        if (isAcceptedToken) {
            ret.html = ret.html + token;
            //ret.text = ret.text + " ";
        } else {
            String sanToken = htmlEncodeApexesAndTags(token);
            ret.html = ret.html + sanToken;
            ret.text = ret.text + htmlEncodeApexesAndTags(removeLineFeed(token));
        }

    }

    // must close remaining tags
    while (openTags.size() > 0) {
        //pop all elements before tag and close it
        String poppedTag = openTags.pop();
        ret.html = ret.html + "</" + poppedTag + ">";
        ret.val = ret.val + "</" + poppedTag + ">";
    }

    //set boolean value
    ret.isValid = ret.invalidTags.size() == 0;

    return ret;
}

From source file:com.reizes.shiva.utils.CommonUtil.java

/**
 * http  https URL ??  //  w  w  w . j  a  va2 s . com
 * @param url
 * @return
 * @throws MalformedURLException 
 */
public static boolean isValidHttpUrl(String url) {
    if (url.length() > 255) { // 255?  url ?  ?
        return false;
    }

    String[] schemes = { "http", "https" };
    UrlValidator urlValidator = new UrlValidator(schemes);
    if (urlValidator.isValid(url)) {
        return true;
    }

    // ?   ??  
    URL urlTemp;
    try {
        urlTemp = new URL(url);
    } catch (MalformedURLException e) {
        return false;
    }
    String forUnicodeUrl = urlTemp.getProtocol() + "://" + IDN.toASCII(urlTemp.getHost());
    if (urlValidator.isValid(forUnicodeUrl)) {
        // ???  http://.com  www      ? 
        return true;
    }

    String regex = "([a-zA-Z0-9-.\\-&/%=?:#$(),.+;~\\_]+)"; // ?  ??  
    if (urlTemp.getHost().startsWith("\"")) {
        // ?? ? ??  ? URL
        return false;
    } else if (urlTemp.getHost().startsWith(".")) {
        // ?? ? ??  ? URL
        return false;
    } else if (urlTemp.getProtocol().startsWith("http") && urlTemp.getHost().matches(regex)) {
        return true;
    }

    return false;
}

From source file:com.serli.maven.plugin.quality.mojo.LicenseMojo.java

/**
 * @param project/* w w  w . ja  va2  s  .c o  m*/
 *          not null
 * @param url
 *          not null
 * @return a valid URL object from the url string
 * @throws IOException
 *           if any
 */
protected static URL getLicenseURL(MavenProject project, String url) throws IOException {
    URL licenseUrl = null;
    UrlValidator urlValidator = new UrlValidator(UrlValidator.ALLOW_ALL_SCHEMES);
    // UrlValidator does not accept file URLs because the file
    // URLs do not contain a valid authority (no hostname).
    // As a workaround accept license URLs that start with the
    // file scheme.
    if (urlValidator.isValid(url) || StringUtils.defaultString(url).startsWith("file://")) {
        try {
            licenseUrl = new URL(url);
        } catch (MalformedURLException e) {
            throw new MalformedURLException(
                    "The license url '" + url + "' seems to be invalid: " + e.getMessage());
        }
    } else {
        File licenseFile = new File(project.getBasedir(), url);
        if (!licenseFile.exists()) {
            // Workaround to allow absolute path names while
            // staying compatible with the way it was...
            licenseFile = new File(url);
        }
        if (!licenseFile.exists()) {
            throw new IOException("Maven can't find the file '" + licenseFile + "' on the system.");
        }
        try {
            licenseUrl = licenseFile.toURI().toURL();
        } catch (MalformedURLException e) {
            throw new MalformedURLException(
                    "The license url '" + url + "' seems to be invalid: " + e.getMessage());
        }
    }

    return licenseUrl;
}

From source file:com.vportal.portlet.vlinksman.service.impl.LinksServiceImpl.java

public void _validate(String name, String url) throws SystemException, PortalException, RemoteException {

    String[] schemes = { "http", "https", "ftp" };
    UrlValidator urlValidator = new UrlValidator(schemes);

    if (Validator.isNull(name)) {
        throw new InvalidNameLinkException();
    }// ww  w  .  java 2  s.  c o  m
    if (Validator.isNull(url)) {
        throw new InvalidUrlLinkException();
    }
}

From source file:nl.ivo2u.tiny.boundary.TinyUrlTest.java

@Test
public void name() {
    final String[] schemes = { "http", "https" };
    final UrlValidator urlValidator = new UrlValidator(schemes);
    assertTrue(urlValidator.isValid(//www . ja va  2  s .c o m
            "https://www.ivonet.nl/2019/02/05/java-ee-8-+-payara-5-+-microprofile-2.1-+-docker-in-about-a-minute/"));
}

From source file:nl.ivo2u.tiny.controller.TinyRestController.java

private boolean isWrongUrl(final String url) {
    final String[] schemes = { "http", "https" };
    final UrlValidator urlValidator = new UrlValidator(schemes);
    return !urlValidator.isValid(url);
}

From source file:org.apache.roller.weblogger.util.HTMLSanitizer.java

public static SanitizeResult sanitizer(String html, Pattern allowedTags, Pattern forbiddenTags) {
    SanitizeResult ret = new SanitizeResult();
    Stack<String> openTags = new Stack();

    List<String> tokens = tokenize(html);

    // -------------------   LOOP for every token --------------------------
    for (String token : tokens) {
        boolean isAcceptedToken = false;

        Matcher startMatcher = tagStartPattern.matcher(token);
        Matcher endMatcher = tagClosePattern.matcher(token);

        //--------------------------------------------------------------------------------  COMMENT    <!-- ......... -->
        if (commentPattern.matcher(token).find()) {
            ret.val = ret.val + token + (token.endsWith("-->") ? "" : "-->");
            ret.invalidTags.add(token + (token.endsWith("-->") ? "" : "-->"));
            continue;

            //--------------------------------------------------------------------------------  OPEN TAG    <tag .........>
        } else if (startMatcher.find()) {

            //tag name extraction
            String tag = startMatcher.group(1).toLowerCase();

            //-----------------------------------------------------  FORBIDDEN TAG   <script .........>
            if (forbiddenTags.matcher(tag).find()) {
                ret.invalidTags.add("<" + tag + ">");
                continue;

                // --------------------------------------------------  WELL KNOWN TAG
            } else if (allowedTags.matcher(tag).find()) {

                String cleanToken = "<" + tag;
                String tokenBody = startMatcher.group(2);

                //first test table consistency
                //table tbody tfoot thead th tr td
                if ("thead".equals(tag) || "tbody".equals(tag) || "tfoot".equals(tag) || "tr".equals(tag)) {
                    if (openTags.search("table") < 1) {
                        ret.invalidTags.add("<" + tag + ">");
                        continue;
                    }// w  w  w.  j  a  v  a  2s  .c o  m
                } else if ("td".equals(tag) || "th".equals(tag)) {
                    if (openTags.search("tr") < 1) {
                        ret.invalidTags.add("<" + tag + ">");
                        continue;
                    }
                }

                // then test properties
                Matcher attributes = attributesPattern.matcher(tokenBody);

                boolean foundURL = false; // URL flag
                while (attributes.find()) {

                    String attr = attributes.group(1).toLowerCase();
                    String val = attributes.group(2);

                    // we will accept href in case of <A>
                    if ("a".equals(tag) && "href".equals(attr)) { // <a href="......">
                        String[] customSchemes = { "http", "https" };
                        if (new UrlValidator(customSchemes).isValid(val)) {
                            foundURL = true;
                        } else {
                            // may be it is a mailto?
                            //  case <a href="mailto:pippo@pippo.com?subject=...."
                            if (val.toLowerCase().startsWith("mailto:") && val.indexOf('@') >= 0) {
                                String val1 = "http://www." + val.substring(val.indexOf('@') + 1);
                                if (new UrlValidator(customSchemes).isValid(val1)) {
                                    foundURL = true;
                                } else {
                                    ret.invalidTags.add(attr + " " + val);
                                    val = "";
                                }
                            } else {
                                ret.invalidTags.add(attr + " " + val);
                                val = "";
                            }
                        }

                    } else if (tag.matches("img|embed") && "src".equals(attr)) { // <img src="......">
                        String[] customSchemes = { "http", "https" };
                        if (new UrlValidator(customSchemes).isValid(val)) {
                            foundURL = true;
                        } else {
                            ret.invalidTags.add(attr + " " + val);
                            val = "";
                        }

                    } else if ("href".equals(attr) || "src".equals(attr)) { // <tag src/href="......">   skipped
                        ret.invalidTags.add(tag + " " + attr + " " + val);
                        continue;

                    } else if (attr.matches("width|height")) { // <tag width/height="......">
                        if (!val.toLowerCase().matches("\\d+%|\\d+$")) { // test numeric values
                            ret.invalidTags.add(tag + " " + attr + " " + val);
                            continue;
                        }

                    } else if ("style".equals(attr)) { // <tag style="......">

                        // then test properties
                        Matcher styles = stylePattern.matcher(val);
                        String cleanStyle = "";

                        while (styles.find()) {
                            String styleName = styles.group(1).toLowerCase();
                            String styleValue = styles.group(2);

                            // suppress invalid styles values
                            if (forbiddenStylePattern.matcher(styleValue).find()) {
                                ret.invalidTags.add(tag + " " + attr + " " + styleValue);
                                continue;
                            }

                            // check if valid url
                            Matcher urlStyleMatcher = urlStylePattern.matcher(styleValue);
                            if (urlStyleMatcher.find()) {
                                String[] customSchemes = { "http", "https" };
                                String url = urlStyleMatcher.group(1);
                                if (!new UrlValidator(customSchemes).isValid(url)) {
                                    ret.invalidTags.add(tag + " " + attr + " " + styleValue);
                                    continue;
                                }
                            }

                            cleanStyle = cleanStyle + styleName + ":" + encode(styleValue) + ";";

                        }
                        val = cleanStyle;

                    } else if (attr.startsWith("on")) { // skip all javascript events
                        ret.invalidTags.add(tag + " " + attr + " " + val);
                        continue;

                    } else { // by default encode all properies
                        val = encode(val);
                    }

                    cleanToken = cleanToken + " " + attr + "=\"" + val + "\"";
                }
                cleanToken = cleanToken + ">";

                isAcceptedToken = true;

                // for <img> and <a>
                if (tag.matches("a|img|embed") && !foundURL) {
                    isAcceptedToken = false;
                    cleanToken = "";
                }

                token = cleanToken;

                // push the tag if require closure and it is accepted (otherwirse is encoded)
                if (isAcceptedToken
                        && !(standAloneTags.matcher(tag).find() || selfClosed.matcher(tag).find())) {
                    openTags.push(tag);
                }

                // --------------------------------------------------------------------------------  UNKNOWN TAG
            } else {
                ret.invalidTags.add(token);
                ret.val = ret.val + token;
                continue;

            }

            // --------------------------------------------------------------------------------  CLOSE TAG </tag>
        } else if (endMatcher.find()) {
            String tag = endMatcher.group(1).toLowerCase();

            //is self closing
            if (selfClosed.matcher(tag).find()) {
                ret.invalidTags.add(token);
                continue;
            }
            if (forbiddenTags.matcher(tag).find()) {
                ret.invalidTags.add("/" + tag);
                continue;
            }
            if (!allowedTags.matcher(tag).find()) {
                ret.invalidTags.add(token);
                ret.val = ret.val + token;
                continue;
            } else {

                String cleanToken = "";

                // check tag position in the stack
                int pos = openTags.search(tag);
                // if found on top ok
                for (int i = 1; i <= pos; i++) {
                    //pop all elements before tag and close it
                    String poppedTag = openTags.pop();
                    cleanToken = cleanToken + "</" + poppedTag + ">";
                    isAcceptedToken = true;
                }

                token = cleanToken;
            }

        }

        ret.val = ret.val + token;

        if (isAcceptedToken) {
            ret.html = ret.html + token;
            //ret.text = ret.text + " ";
        } else {
            String sanToken = htmlEncodeApexesAndTags(token);
            ret.html = ret.html + sanToken;
            ret.text = ret.text + htmlEncodeApexesAndTags(removeLineFeed(token));
        }

    }

    // must close remaining tags
    while (openTags.size() > 0) {
        //pop all elements before tag and close it
        String poppedTag = openTags.pop();
        ret.html = ret.html + "</" + poppedTag + ">";
        ret.val = ret.val + "</" + poppedTag + ">";
    }

    //set boolean value
    ret.isValid = ret.invalidTags.size() == 0;

    return ret;
}

From source file:org.easyrec.util.core.Web.java

/**
 * This function checks the syntax of a given url
 * and returns true in case of the right syntax.
 *
 * @param sUrl String/*  ww w. java 2s.  c  om*/
 * @return boolean
 */
public static boolean isValidUrl(String sUrl) {
    String[] schemes = { "http", "https" };
    UrlValidator urlValidator = new UrlValidator(schemes);
    if (urlValidator.isValid(sUrl)) {
        return true;
    } else {
        return urlValidator.isValid(sUrl.replaceFirst("^https?://localhost", "http://www.example.com"));
    }
}