001/*
002 *  jDTAUS Core Utilities
003 *  Copyright (C) 2005 Christian Schulte
004 *  <cs@schulte.it>
005 *
006 *  This library is free software; you can redistribute it and/or
007 *  modify it under the terms of the GNU Lesser General Public
008 *  License as published by the Free Software Foundation; either
009 *  version 2.1 of the License, or any later version.
010 *
011 *  This library is distributed in the hope that it will be useful,
012 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
013 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
014 *  Lesser General Public License for more details.
015 *
016 *  You should have received a copy of the GNU Lesser General Public
017 *  License along with this library; if not, write to the Free Software
018 *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
019 *
020 */
021package org.jdtaus.core.nio.util;
022
023import java.io.BufferedReader;
024import java.io.IOException;
025import java.io.InputStream;
026import java.io.InputStreamReader;
027import java.net.URL;
028import java.nio.ByteBuffer;
029import java.nio.CharBuffer;
030import java.nio.charset.Charset;
031import java.nio.charset.spi.CharsetProvider;
032import java.util.Enumeration;
033import java.util.HashMap;
034import java.util.Iterator;
035import java.util.LinkedList;
036import java.util.List;
037import java.util.Map;
038
039/**
040 * Charset coder and decoder utility.
041 * <p>This class extends the former charset provider implementations which
042 * cannot be used in every environment (e.g. WebStart, Maven) without
043 * installation in the JRE extensions directory where they are available to the
044 * system classloader. It uses the same service provider files as the
045 * platform implementation ({@code java.nio.charset.spi.CharsetProvider}) but
046 * is capable of using the current thread's classloader before falling back
047 * to the system classloader for loading {@code CharsetProvider} classes.</p>
048 *
049 * @author <a href="mailto:cs@schulte.it">Christian Schulte</a>
050 * @version $JDTAUS: Charsets.java 8743 2012-10-07 03:06:20Z schulte $
051 */
052public class Charsets
053{
054    //--Charsets----------------------------------------------------------------
055
056    /** Cached {@code CharsetProvider} instances. */
057    private static final List providers = new LinkedList();
058
059    /** Cached {@code Charset} instances by name. */
060    private static final Map charsets = new HashMap( 100 );
061
062    /** Private constructor. */
063    private Charsets()
064    {
065        super();
066    }
067
068    /**
069     * Gets a charset for the given name.
070     *
071     * @param name the name of the charset to return.
072     *
073     * @return a {@code Charset} corresponding to {@code name} or {@code null}
074     * if no such {@code Charset} is available.
075     *
076     * @throws IOException if reading the service provider files fails.
077     * @throws ClassNotFoundException if a service provider file defines
078     * a class which cannot be loaded.
079     * @throws InstantiationException if creating an instance of a
080     * {@code CharsetProvider} fails.
081     * @throws IllegalAccessException if a {@code CharsetProvider} class
082     * does not define a public no-arg constructor.
083     * @throws java.nio.charset.IllegalCharsetNameException if {@code name} is
084     * no valid charset name.
085     * @throws java.nio.charset.UnsupportedCharsetException if {@code name} is
086     * not supported.
087     */
088    private static Charset getCharset( final String name )
089        throws IOException, ClassNotFoundException, InstantiationException,
090               IllegalAccessException
091    {
092        // Populate the provider list with available providers if it is empty.
093        if ( providers.size() == 0 )
094        {
095            synchronized ( Charsets.class )
096            {
097                // Use the current thread's context classloader if available or
098                // fall back to the system classloader.
099                ClassLoader classLoader = Thread.currentThread().
100                    getContextClassLoader();
101
102                if ( classLoader == null )
103                {
104                    classLoader = ClassLoader.getSystemClassLoader();
105                }
106
107                assert classLoader != null :
108                    "Expected system classloader to always be available.";
109
110                // Read all service provider files and load all defined
111                // provider classes.
112                final Enumeration providerFiles = classLoader.getResources(
113                    "META-INF/services/java.nio.charset.spi.CharsetProvider" );
114
115                if ( providerFiles != null )
116                {
117                    while ( providerFiles.hasMoreElements() )
118                    {
119                        final URL url = ( URL ) providerFiles.nextElement();
120                        BufferedReader reader = null;
121
122                        try
123                        {
124                            String line;
125                            reader = new BufferedReader(
126                                new InputStreamReader( url.openStream(),
127                                                       "UTF-8" ) );
128
129                            while ( ( line = reader.readLine() ) != null )
130                            {
131                                // Check that the line denotes a valid Java
132                                // classname and load that class using
133                                // reflection.
134                                if ( line.indexOf( '#' ) < 0 )
135                                {
136                                    providers.add(
137                                        classLoader.loadClass( line ).
138                                        newInstance() );
139
140                                }
141                            }
142
143                            reader.close();
144                            reader = null;
145                        }
146                        finally
147                        {
148                            if ( reader != null )
149                            {
150                                reader.close();
151                            }
152                        }
153                    }
154                }
155            }
156        }
157
158        // Search cached charsets.
159        Charset charset = ( Charset ) charsets.get( name );
160        if ( charset == null )
161        {
162            synchronized ( Charsets.class )
163            {
164                // Search all available providers for a charset matching "name".
165                for ( final Iterator it = providers.iterator(); it.hasNext();)
166                {
167                    charset =
168                        ( ( CharsetProvider ) it.next() ).charsetForName( name );
169
170                    if ( charset != null )
171                    {
172                        charsets.put( name, charset );
173                        break;
174                    }
175                }
176            }
177        }
178
179        // Fall back to platform charsets if nothing is found so far.
180        if ( charset == null )
181        {
182            synchronized ( Charsets.class )
183            {
184                charset = Charset.forName( name );
185                charsets.put( name, charset );
186            }
187        }
188
189        return charset;
190    }
191
192    /**
193     * Encodes a given string to an array of bytes representing the characters
194     * of the string in a given charset.
195     *
196     * @param str the string to encode.
197     * @param charset the name of the charset to use.
198     *
199     * @throws NullPointerException if {@code str} or {@code charset} is
200     * {@code null}.
201     * @throws java.nio.charset.IllegalCharsetNameException if {@code charset}
202     * is no valid charset name.
203     * @throws java.nio.charset.UnsupportedCharsetException if {@code charset}
204     * is not supported.
205     */
206    public static byte[] encode( final String str, final String charset )
207    {
208        if ( str == null )
209        {
210            throw new NullPointerException( "str" );
211        }
212        if ( charset == null )
213        {
214            throw new NullPointerException( "charset" );
215        }
216
217        final byte[] ret;
218        try
219        {
220            final Charset cset = Charsets.getCharset( charset );
221            final ByteBuffer buf = cset.encode( str );
222
223            if ( buf.hasArray() )
224            {
225                if ( buf.array().length == buf.limit() )
226                {
227                    ret = buf.array();
228                }
229                else
230                {
231                    ret = new byte[ buf.limit() ];
232                    System.arraycopy( buf.array(), buf.arrayOffset(),
233                                      ret, 0, ret.length );
234
235                }
236            }
237            else
238            {
239                ret = new byte[ buf.limit() ];
240                buf.rewind();
241                buf.get( ret );
242            }
243        }
244        catch ( final ClassNotFoundException e )
245        {
246            throw new AssertionError( e );
247        }
248        catch ( final InstantiationException e )
249        {
250            throw new AssertionError( e );
251        }
252        catch ( final IllegalAccessException e )
253        {
254            throw new AssertionError( e );
255        }
256        catch ( final IOException e )
257        {
258            throw new AssertionError( e );
259        }
260
261        return ret;
262    }
263
264    /**
265     * Decodes the bytes of a given array to a string.
266     *
267     * @param bytes the bytes to decode.
268     * @param charset the name of the charset to use.
269     *
270     * @throws NullPointerException if {@code bytes} or {@code charset} is
271     * {@code null}.
272     * @throws java.nio.charset.IllegalCharsetNameException if {@code charset}
273     * is no valid charset name.
274     * @throws java.nio.charset.UnsupportedCharsetException if {@code charset}
275     * is not supported.
276     */
277    public static String decode( final byte[] bytes, final String charset )
278    {
279        if ( bytes == null )
280        {
281            throw new NullPointerException( "bytes" );
282        }
283        if ( charset == null )
284        {
285            throw new NullPointerException( "charset" );
286        }
287
288        final String ret;
289        try
290        {
291            final Charset cset = Charsets.getCharset( charset );
292            final CharBuffer buf = cset.decode( ByteBuffer.wrap( bytes ) );
293
294            if ( buf.hasArray() )
295            {
296                ret = String.valueOf( buf.array(), buf.arrayOffset(),
297                                      buf.length() );
298
299            }
300            else
301            {
302                final char[] c = new char[ buf.length() ];
303                buf.rewind();
304                buf.get( c );
305                ret = String.valueOf( c );
306            }
307        }
308        catch ( final ClassNotFoundException e )
309        {
310            throw new AssertionError( e );
311        }
312        catch ( final InstantiationException e )
313        {
314            throw new AssertionError( e );
315        }
316        catch ( final IllegalAccessException e )
317        {
318            throw new AssertionError( e );
319        }
320        catch ( final IOException e )
321        {
322            throw new AssertionError( e );
323        }
324
325        return ret;
326    }
327
328    /**
329     * Decodes the bytes of a given array to a string.
330     *
331     * @param bytes the bytes to decode.
332     * @param off the offset from where to start decoding.
333     * @param count the number of bytes to decode starting at {@code offset}.
334     * @param charset the name of the charset to use.
335     *
336     * @throws NullPointerException if {@code bytes} or {@code charset} is
337     * {@code null}.
338     * @throws IndexOutOfBoundsException if {@code off} is negative or greater
339     * than the length of {@code bytes} or {@code off + count} is negative or
340     * greater than the length of {@code bytes}.
341     * @throws java.nio.charset.IllegalCharsetNameException if {@code charset}
342     * is no valid charset name.
343     * @throws java.nio.charset.UnsupportedCharsetException if {@code charset}
344     * is not supported.
345     */
346    public static String decode( final byte[] bytes, final int off,
347                                   final int count, final String charset )
348    {
349        if ( bytes == null )
350        {
351            throw new NullPointerException( "bytes" );
352        }
353        if ( charset == null )
354        {
355            throw new NullPointerException( "charset" );
356        }
357        if ( off < 0 || off >= bytes.length )
358        {
359            throw new ArrayIndexOutOfBoundsException( off );
360        }
361        if ( count < 0 || off + count >= bytes.length )
362        {
363            throw new ArrayIndexOutOfBoundsException( count + off );
364        }
365
366        final String ret;
367        try
368        {
369            final Charset cset = Charsets.getCharset( charset );
370            final CharBuffer buf = cset.decode(
371                ByteBuffer.wrap( bytes, off, count ) );
372
373            if ( buf.hasArray() )
374            {
375                ret = String.valueOf( buf.array(), buf.arrayOffset(),
376                                      buf.length() );
377
378            }
379            else
380            {
381                final char[] c = new char[ buf.length() ];
382                buf.rewind();
383                buf.get( c );
384                ret = String.valueOf( c );
385            }
386        }
387        catch ( final ClassNotFoundException e )
388        {
389            throw new AssertionError( e );
390        }
391        catch ( final InstantiationException e )
392        {
393            throw new AssertionError( e );
394        }
395        catch ( final IllegalAccessException e )
396        {
397            throw new AssertionError( e );
398        }
399        catch ( final IOException e )
400        {
401            throw new AssertionError( e );
402        }
403
404        return ret;
405    }
406
407    //----------------------------------------------------------------Charsets--
408}