Java Path to URL getUriNormalizedContainerAndPathWithoutSlash(String stringUriValue, String containerUrl, boolean normalizeUrlMode, boolean matchBaseUrlMode)

Here you can find the source of getUriNormalizedContainerAndPathWithoutSlash(String stringUriValue, String containerUrl, boolean normalizeUrlMode, boolean matchBaseUrlMode)

Description

TODO replace by merely using URI to parse !

License

Open Source License

Parameter

Parameter Description
stringUriValue (encoded) absolute ex. http://data.ozwillo.com/dc//type/sample.marka.field//1 or relative (then uses provided containerUrl) ex. type//sample.marka.field///1
containerUrl used only for default mode or if given URI is relative, not used to check absolute URL
normalizeUrlMode a parameter
matchBaseUrlMode a parameter

Exception

Parameter Description
ResourceParsingException an exception

Return

the given Datacore URI's container URL (i.e. base URL, or null if URI is relative) and (undecoded) URL Path without slash ex. [ "http://data.ozwillo.com/", "dc/type/sample.marka.field/1" ]

Declaration

public static String[] getUriNormalizedContainerAndPathWithoutSlash(String stringUriValue, String containerUrl,
        boolean normalizeUrlMode, boolean matchBaseUrlMode) throws URISyntaxException, MalformedURLException 

Method Source Code

//package com.java2s;
//License from project: Open Source License 

import java.net.MalformedURLException;
import java.net.URI;
import java.net.URISyntaxException;

import java.util.HashSet;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class Main {
    /** to detect whether relative (rather than absolute) uri
     groups are delimited by () see http://stackoverflow.com/questions/6865377/java-regex-capture-group
     URI scheme : see http://stackoverflow.com/questions/3641722/valid-characters-for-uri-schemes */
    private static final Pattern anyBaseUrlPattern = Pattern.compile("^([a-zA-Z][a-zA-Z0-9\\.\\-\\+]*)://[^/]+");
    private static final Pattern multiSlashPattern = Pattern.compile("/+");
    private static final Pattern frontSlashesPattern = Pattern.compile("^/*");
    @SuppressWarnings("serial")//from  ww  w.j  a  v  a  2  s. c  o  m
    private static Set<String> allowedProtocolSet = new HashSet<String>() {
        {
            add("http");
            add("https");
        }
    };

    /**
     * TODO replace by merely using URI to parse !
     * LATER optimize with containerUrl cache...
     * If normalizeUrlMode, checks it is an URI and if absolute checks it is an URL
     * with http or https protocol
     * Else if matchBaseUrlMode, uses pattern matching to split and normalize if
     * absolute checks it has an http or https protocol
     * Else merely splits at the given containerUrl's length.
     * Does not URL decode.
     * @param stringUriValue (encoded) absolute ex. http://data.ozwillo.com/dc//type/sample.marka.field//1
     * or relative (then uses provided containerUrl) ex. type//sample.marka.field///1
     * @param containerUrl used only for default mode or if given URI is relative,
     * not used to check absolute URL
     * @param normalizeUrlMode
     * @param matchBaseUrlMode
     * @return the given Datacore URI's container URL (i.e. base URL, or null if URI is relative)
     * and (undecoded) URL Path without slash
     * ex. [ "http://data.ozwillo.com/", "dc/type/sample.marka.field/1" ]
     * @throws ResourceParsingException
     */
    public static String[] getUriNormalizedContainerAndPathWithoutSlash(String stringUriValue, String containerUrl,
            boolean normalizeUrlMode, boolean matchBaseUrlMode) throws URISyntaxException, MalformedURLException {
        String uriBaseUrl = null;
        String urlPathWithoutSlash = null;

        if (normalizeUrlMode) {
            // NB. Datacore URIs should ALSO be URLs
            URI uriValue = new URI(stringUriValue).normalize(); // unencodes ; from ex. http://localhost:8180//dc/type//country/UK
            if (uriValue.isAbsolute()) {
                /*URL urlValue = uriValue.toURL(); // also checks protocol
                if (!isHttpOrS(urlValue.getProtocol())) {
                   throw new MalformedURLException("Datacore URIs should be HTTP(S)");
                }*/
                if (!allowedProtocolSet.contains(uriValue.getScheme())) {
                    throw new MalformedURLException(
                            "Datacore URIs should be HTTP(S) but is " + uriValue.getScheme());
                }
            }
            uriBaseUrl = new URI(uriValue.getScheme(), null, uriValue.getHost(), uriValue.getPort(), null, null,
                    null).toString(); // rather than substring, because stringUriValue
            urlPathWithoutSlash = uriValue.toString().substring(uriBaseUrl.length());
            // and not as follows else would already be decoded :
            //urlPathWithoutSlash = uriValue.getPath(); // NB. unencoded !!
            if (urlPathWithoutSlash.length() != 0 && urlPathWithoutSlash.charAt(0) == '/') {
                urlPathWithoutSlash = urlPathWithoutSlash.substring(1); // ex. dc/type/country/UK
            }

            // else no parsed uriBaseUrl (so possibly no leading slash)
            //uriBaseUrl = containerUrl; // DON'T set default, let caller decide (ex. rather baseUrl)

        } else if (matchBaseUrlMode) {
            // checking that URI is an HTTP(S) one
            Matcher replaceBaseUrlMatcher = anyBaseUrlPattern.matcher(stringUriValue); // ex. http://data.ozwillo.com/dc//type/sample.marka.field//1
            if (!replaceBaseUrlMatcher.find()) {
                // maybe a relative URI
                /*if (normalizeUrlMode) {
                   URI uriValue = new URI(stringUriValue).normalize(); // from ex. http://localhost:8180//dc/type//country/UK
                   if (uriValue.isAbsolute()) {
                  throw new MalformedURLException("Datacore URIs should be HTTP(S) (i.e. respect pattern "
                + anyBaseUrlPattern.pattern());
                   }
                }*/
                urlPathWithoutSlash = multiSlashPattern.matcher(stringUriValue).replaceAll("/"); // ex. dc/type/sample.marka.field/1
                urlPathWithoutSlash = frontSlashesPattern.matcher(urlPathWithoutSlash).replaceAll("");
                // no parsed uriBaseUrl
                //uriBaseUrl = containerUrl; // DON'T set default, let caller decide (ex. rather baseUrl)

            } else {
                // building uriBaseUrl & checking protocol
                String protocol = replaceBaseUrlMatcher.group(1); // group is delimited by ()
                if (!isHttpOrS(protocol)) {
                    throw new MalformedURLException("Datacore URIs should be HTTP(S)");
                }
                uriBaseUrl = replaceBaseUrlMatcher.group(0); // full match, includes end slash, ex. http://data.ozwillo.com/

                // building urlPathWithoutSlash, see http://www.tutorialspoint.com/java/java_string_replacefirst.htm
                StringBuffer sbuf = new StringBuffer();
                replaceBaseUrlMatcher.appendReplacement(sbuf, "");
                replaceBaseUrlMatcher.appendTail(sbuf);
                urlPathWithoutSlash = sbuf.toString(); // ex. dc//type/sample.marka.field//1

                // replacing multi slash (i.e. normalizing)
                urlPathWithoutSlash = multiSlashPattern.matcher(urlPathWithoutSlash).replaceAll("/"); // ex. dc/type/sample.marka.field/1
                urlPathWithoutSlash = frontSlashesPattern.matcher(urlPathWithoutSlash).replaceAll("");
            }

        } else {
            // default, "exact container" mode :
            // fastest, assumes that URI starts exactly with containerUrl ending by single slash
            // (no relative URL or https vs http or additional slash)
            urlPathWithoutSlash = stringUriValue.substring(containerUrl.length());
            uriBaseUrl = containerUrl; // stringUriValue.substring(0, containerUrlLength);
        }
        return new String[] { uriBaseUrl, urlPathWithoutSlash };
    }

    private static boolean isHttpOrS(String protocol) {
        protocol = protocol.toLowerCase();
        return protocol != null && protocol.startsWith("http")
                && (protocol.length() == 4 || protocol.length() == 5 && protocol.charAt(4) == 's');
    }
}

Related

  1. fileToURIString(File file)
  2. getUri(String baseApiUrl, String path)
  3. getURL(String aPath)
  4. getUrl(String baseUrl, String absPath)
  5. getURL(String host, int port, String path, boolean isHTTPS)
  6. getUrl(String inPath)