Java String Decode by Charset decodeComponent(final String s, final Charset charset)

Here you can find the source of decodeComponent(final String s, final Charset charset)

Description

Decodes a bit of an URL encoded by a browser.

License

Apache License

Parameter

Parameter Description
s The string to decode (can be empty).
charset The charset to use to decode the string (should really be Charsets#UTF_8 .

Exception

Parameter Description
IllegalArgumentException if the string contains a malformedescape sequence.

Return

The decoded string, or s if there's nothing to decode. If the string to decode is null , returns an empty string.

Declaration

@SuppressWarnings("fallthrough")
public static String decodeComponent(final String s, final Charset charset) 

Method Source Code


//package com.java2s;
/*/*from   ww w  . jav a  2  s  . c  o  m*/
 * Licensed to Elasticsearch under one or more contributor
 * license agreements. See the NOTICE file distributed with
 * this work for additional information regarding copyright
 * ownership. Elasticsearch licenses this file to you under
 * the Apache License, Version 2.0 (the "License"); you may
 * not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

import java.nio.charset.Charset;

public class Main {
    /**
     * Decodes a bit of an URL encoded by a browser.
     * <p/>
     * The string is expected to be encoded as per RFC 3986, Section 2.
     * This is the encoding used by JavaScript functions {@code encodeURI}
     * and {@code encodeURIComponent}, but not {@code escape}.  For example
     * in this encoding, &eacute; (in Unicode {@code U+00E9} or in UTF-8
     * {@code 0xC3 0xA9}) is encoded as {@code %C3%A9} or {@code %c3%a9}.
     * <p/>
     * This is essentially equivalent to calling
     * <code>{@link java.net.URLDecoder URLDecoder}.{@link
     * java.net.URLDecoder#decode(String, String)}</code>
     * except that it's over 2x faster and generates less garbage for the GC.
     * Actually this function doesn't allocate any memory if there's nothing
     * to decode, the argument itself is returned.
     *
     * @param s       The string to decode (can be empty).
     * @param charset The charset to use to decode the string (should really
     *                be {@link Charsets#UTF_8}.
     * @return The decoded string, or {@code s} if there's nothing to decode.
     * If the string to decode is {@code null}, returns an empty string.
     * @throws IllegalArgumentException if the string contains a malformed
     *                                  escape sequence.
     */
    @SuppressWarnings("fallthrough")
    public static String decodeComponent(final String s, final Charset charset) {
        if (s == null) {
            return "";
        }
        final int size = s.length();
        boolean modified = false;
        for (int i = 0; i < size; i++) {
            final char c = s.charAt(i);
            switch (c) {
            case '%':
                i++; // We can skip at least one char, e.g. `%%'.
                // Fall through.
            case '+':
                modified = true;
                break;
            }
        }
        if (!modified) {
            return s;
        }
        final byte[] buf = new byte[size];
        int pos = 0; // position in `buf'.
        for (int i = 0; i < size; i++) {
            char c = s.charAt(i);
            switch (c) {
            case '+':
                buf[pos++] = ' '; // "+" -> " "
                break;
            case '%':
                if (i == size - 1) {
                    throw new IllegalArgumentException("unterminated escape" + " sequence at end of string: " + s);
                }
                c = s.charAt(++i);
                if (c == '%') {
                    buf[pos++] = '%'; // "%%" -> "%"
                    break;
                } else if (i == size - 1) {
                    throw new IllegalArgumentException("partial escape" + " sequence at end of string: " + s);
                }
                c = decodeHexNibble(c);
                final char c2 = decodeHexNibble(s.charAt(++i));
                if (c == Character.MAX_VALUE || c2 == Character.MAX_VALUE) {
                    throw new IllegalArgumentException("invalid escape sequence `%" + s.charAt(i - 1) + s.charAt(i)
                            + "' at index " + (i - 2) + " of: " + s);
                }
                c = (char) (c * 16 + c2);
                // Fall through.
            default:
                buf[pos++] = (byte) c;
                break;
            }
        }
        return new String(buf, 0, pos, charset);
    }

    /**
     * Helper to decode half of a hexadecimal number from a string.
     *
     * @param c The ASCII character of the hexadecimal number to decode.
     *          Must be in the range {@code [0-9a-fA-F]}.
     * @return The hexadecimal value represented in the ASCII character
     * given, or {@link Character#MAX_VALUE} if the character is invalid.
     */
    private static char decodeHexNibble(final char c) {
        if ('0' <= c && c <= '9') {
            return (char) (c - '0');
        } else if ('a' <= c && c <= 'f') {
            return (char) (c - 'a' + 10);
        } else if ('A' <= c && c <= 'F') {
            return (char) (c - 'A' + 10);
        } else {
            return Character.MAX_VALUE;
        }
    }
}

Related

  1. decode(String s, Charset encoding)
  2. decode(String url, Charset charset)
  3. decode(String value, Charset charset)
  4. decode(String value, Charset charset)
  5. decodeCharset(String value, String charset)
  6. decodeFormFields(final String content, final Charset charset)
  7. decodeURL(@Nullable String str, Charset charSet)
  8. decodeWithDefaultCharSet(String urlToDecode)
  9. getCharsetDecoder(String charsetName)