Java Path Normalize normalizePath(String pathname)

Here you can find the source of normalizePath(String pathname)

Description

Normalizes a path as per <a href="http://tools.ietf.org/html/rfc3986#section-5.2.4>rfc3986</a>.

License

Open Source License

Parameter

Parameter Description
pathname raw path

Return

normalized path

Declaration

public static String normalizePath(String pathname) 

Method Source Code

//package com.java2s;
/*//from ww  w .  j  ava 2 s.  c  o m
 * Copyright 2014 Red Hat, Inc.
 *
 *  All rights reserved. This program and the accompanying materials
 *  are made available under the terms of the Eclipse Public License v1.0
 *  and Apache License v2.0 which accompanies this distribution.
 *
 *  The Eclipse Public License is available at
 *  http://www.eclipse.org/legal/epl-v10.html
 *
 *  The Apache License v2.0 is available at
 *  http://www.opensource.org/licenses/apache2.0.php
 *
 *  You may elect to redistribute this code under either of these licenses.
 */

public class Main {
    /**
     * Normalizes a path as per <a href="http://tools.ietf.org/html/rfc3986#section-5.2.4>rfc3986</a>.
     *
     * There are 2 extra transformations that are not part of the spec but kept for backwards compatibility:
     *
     * double slash // will be converted to single slash and the path will always start with slash.
     *
     * @param pathname raw path
     * @return normalized path
     */
    public static String normalizePath(String pathname) {
        // add trailing slash if not set
        if (pathname == null || pathname.length() == 0) {
            return "/";
        }

        StringBuilder ibuf = new StringBuilder(pathname.length() + 1);

        // Not standard!!!
        if (pathname.charAt(0) != '/') {
            ibuf.append('/');
        }

        ibuf.append(pathname);
        int i = 0;

        while (i < ibuf.length()) {
            // decode unreserved chars described in
            // http://tools.ietf.org/html/rfc3986#section-2.4
            if (ibuf.charAt(i) == '%') {
                decodeUnreserved(ibuf, i);
            }

            i++;
        }

        // remove dots as described in
        // http://tools.ietf.org/html/rfc3986#section-5.2.4
        return removeDots(ibuf);
    }

    private static void decodeUnreserved(StringBuilder path, int start) {
        if (start + 3 <= path.length()) {
            // these are latin chars so there is no danger of falling into some special unicode char that requires more
            // than 1 byte
            int unescaped = Integer.parseInt(path.substring(start + 1, start + 3), 16);
            if (unescaped < 0) {
                throw new IllegalArgumentException("Invalid escape sequence: " + path.substring(start, start + 3));
            }
            // validate if the octet is within the allowed ranges
            if (
            // ALPHA
            (unescaped >= 0x41 && unescaped <= 0x5A) || (unescaped >= 0x61 && unescaped <= 0x7A) ||
            // DIGIT
                    (unescaped >= 0x30 && unescaped <= 0x39) ||
                    // HYPHEN
                    (unescaped == 0x2D) ||
                    // PERIOD
                    (unescaped == 0x2E) ||
                    // UNDERSCORE
                    (unescaped == 0x5F) ||
                    // TILDE
                    (unescaped == 0x7E)) {

                path.setCharAt(start, (char) unescaped);
                path.delete(start + 1, start + 3);
            }
        } else {
            throw new IllegalArgumentException("Invalid position for escape character: " + start);
        }
    }

    /**
     * Removed dots as per <a href="http://tools.ietf.org/html/rfc3986#section-5.2.4>rfc3986</a>.
     *
     * There are 2 extra transformations that are not part of the spec but kept for backwards compatibility:
     *
     * double slash // will be converted to single slash and the path will always start with slash.
     *
     * @param path raw path
     * @return normalized path
     */
    public static String removeDots(CharSequence path) {

        if (path == null) {
            return null;
        }

        final StringBuilder obuf = new StringBuilder(path.length());

        int i = 0;
        while (i < path.length()) {
            // remove dots as described in
            // http://tools.ietf.org/html/rfc3986#section-5.2.4
            if (matches(path, i, "./")) {
                i += 2;
            } else if (matches(path, i, "../")) {
                i += 3;
            } else if (matches(path, i, "/./")) {
                // preserve last slash
                i += 2;
            } else if (matches(path, i, "/.", true)) {
                path = "/";
                i = 0;
            } else if (matches(path, i, "/../")) {
                // preserve last slash
                i += 3;
                int pos = obuf.lastIndexOf("/");
                if (pos != -1) {
                    obuf.delete(pos, obuf.length());
                }
            } else if (matches(path, i, "/..", true)) {
                path = "/";
                i = 0;
                int pos = obuf.lastIndexOf("/");
                if (pos != -1) {
                    obuf.delete(pos, obuf.length());
                }
            } else if (matches(path, i, ".", true) || matches(path, i, "..", true)) {
                break;
            } else {
                if (path.charAt(i) == '/') {
                    i++;
                    // Not standard!!!
                    // but common // -> /
                    if (obuf.length() == 0 || obuf.charAt(obuf.length() - 1) != '/') {
                        obuf.append('/');
                    }
                }
                int pos = indexOfSlash(path, i);
                if (pos != -1) {
                    obuf.append(path, i, pos);
                    i = pos;
                } else {
                    obuf.append(path, i, path.length());
                    break;
                }
            }
        }

        return obuf.toString();
    }

    private static boolean matches(CharSequence path, int start, String what) {
        return matches(path, start, what, false);
    }

    private static boolean matches(CharSequence path, int start, String what, boolean exact) {
        if (exact) {
            if (path.length() - start != what.length()) {
                return false;
            }
        }

        if (path.length() - start >= what.length()) {
            for (int i = 0; i < what.length(); i++) {
                if (path.charAt(start + i) != what.charAt(i)) {
                    return false;
                }
            }
            return true;
        }

        return false;
    }

    private static int indexOfSlash(CharSequence str, int start) {
        for (int i = start; i < str.length(); i++) {
            if (str.charAt(i) == '/') {
                return i;
            }
        }

        return -1;
    }
}

Related

  1. normalizePath(String path)
  2. normalizePath(String path, char sep)
  3. normalizePath(String path, String oldFileSeperator, String newFileSeparator)
  4. normalizePath(String path, String separator)
  5. normalizePath(String pathFragment)
  6. normalizePathElement(String name)
  7. normalizePathname(String pathname)
  8. normalizePathPart(final String path)
  9. normalizePaths(String path)