org.marketcetera.util.test.UnicodeDataTest.java Source code

Java tutorial

Introduction

Here is the source code for org.marketcetera.util.test.UnicodeDataTest.java

Source

package org.marketcetera.util.test;

import java.io.File;
import java.io.FileOutputStream;
import java.nio.ByteBuffer;
import java.nio.charset.CharacterCodingException;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CharsetEncoder;
import java.nio.charset.CodingErrorAction;
import org.apache.commons.lang.CharEncoding;
import org.junit.Test;

import static org.junit.Assert.*;
import static org.marketcetera.util.test.UnicodeData.*;

/**
 * @author tlerios@marketcetera.com
 * @since 0.6.0
 * @version $Id$
 */

/* $License$ */

public class UnicodeDataTest extends TestCaseBase {
    private static final Charset NAT = Charset.defaultCharset();
    private static final Charset UTF8 = Charset.forName(CharEncoding.UTF_8);
    private static final Charset UTF16BE = Charset.forName(CharEncoding.UTF_16BE);
    private static final Charset UTF16LE = Charset.forName(CharEncoding.UTF_16LE);
    private static final Charset UTF32BE = Charset.forName("UTF-32BE");
    private static final Charset UTF32LE = Charset.forName("UTF-32LE");

    private static final String TEST_FILE_PREFIX = DIR_TARGET + File.separator + "unicode_";

    private static void singleValid(String str, char[] chars, int[] ucps, byte[] nat, byte[] utf8, byte[] utf16be,
            byte[] utf16le, byte[] utf32be, byte[] utf32le) {
        assertArrayEquals(str.toCharArray(), chars);
        int i = 0;
        int j = 0;
        while (i < str.length()) {
            int ucp = str.codePointAt(i);
            assertEquals("At code point position " + j, ucp, ucps[j++]);
            i += Character.charCount(ucp);
        }
        assertArrayEquals(str.getBytes(), nat);
        assertArrayEquals(str.getBytes(UTF8), utf8);
        assertArrayEquals(str.getBytes(UTF16BE), utf16be);
        assertArrayEquals(str.getBytes(UTF16LE), utf16le);
        assertArrayEquals(str.getBytes(UTF32BE), utf32be);
        assertArrayEquals(str.getBytes(UTF32LE), utf32le);
    }

    private static void singleInvalid(Charset cs, byte[] encoded) {
        CharsetDecoder dec = cs.newDecoder();
        dec.onMalformedInput(CodingErrorAction.REPORT);
        try {
            dec.decode(ByteBuffer.wrap(encoded));
            fail();
        } catch (CharacterCodingException ex) {
            // Desired.
        }
    }

    private static void writeFile(Charset charset, String fileName, byte[] data) throws Exception {
        FileOutputStream out = new FileOutputStream(TEST_FILE_PREFIX + fileName + ".xml");
        out.write(("<?xml version=\"1.0\" encoding=\"" + charset.name() + "\"?><root>").getBytes(charset));
        out.write(data);
        out.write("</root>".getBytes(charset));
        out.close();
    }

    @Test
    public void valid() {
        singleValid(SPACE, SPACE_CHARS, SPACE_UCPS, SPACE_NAT, SPACE_UTF8, SPACE_UTF16BE, SPACE_UTF16LE,
                SPACE_UTF32BE, SPACE_UTF32LE);
        singleValid(HELLO_EN, HELLO_EN_CHARS, HELLO_EN_UCPS, HELLO_EN_NAT, HELLO_EN_UTF8, HELLO_EN_UTF16BE,
                HELLO_EN_UTF16LE, HELLO_EN_UTF32BE, HELLO_EN_UTF32LE);
        singleValid(LANGUAGE_NO, LANGUAGE_NO_CHARS, LANGUAGE_NO_UCPS, LANGUAGE_NO_NAT, LANGUAGE_NO_UTF8,
                LANGUAGE_NO_UTF16BE, LANGUAGE_NO_UTF16LE, LANGUAGE_NO_UTF32BE, LANGUAGE_NO_UTF32LE);
        singleValid(HELLO_GR, HELLO_GR_CHARS, HELLO_GR_UCPS, HELLO_GR_NAT, HELLO_GR_UTF8, HELLO_GR_UTF16BE,
                HELLO_GR_UTF16LE, HELLO_GR_UTF32BE, HELLO_GR_UTF32LE);
        singleValid(HOUSE_AR, HOUSE_AR_CHARS, HOUSE_AR_UCPS, HOUSE_AR_NAT, HOUSE_AR_UTF8, HOUSE_AR_UTF16BE,
                HOUSE_AR_UTF16LE, HOUSE_AR_UTF32BE, HOUSE_AR_UTF32LE);
        singleValid(GOODBYE_JA, GOODBYE_JA_CHARS, GOODBYE_JA_UCPS, GOODBYE_JA_NAT, GOODBYE_JA_UTF8,
                GOODBYE_JA_UTF16BE, GOODBYE_JA_UTF16LE, GOODBYE_JA_UTF32BE, GOODBYE_JA_UTF32LE);
        singleValid(GOATS_LNB, GOATS_LNB_CHARS, GOATS_LNB_UCPS, GOATS_LNB_NAT, GOATS_LNB_UTF8, GOATS_LNB_UTF16BE,
                GOATS_LNB_UTF16LE, GOATS_LNB_UTF32BE, GOATS_LNB_UTF32LE);
        singleValid(G_CLEF_MSC, G_CLEF_MSC_CHARS, G_CLEF_MSC_UCPS, G_CLEF_MSC_NAT, G_CLEF_MSC_UTF8,
                G_CLEF_MSC_UTF16BE, G_CLEF_MSC_UTF16LE, G_CLEF_MSC_UTF32BE, G_CLEF_MSC_UTF32LE);
        singleValid(COMBO, COMBO_CHARS, COMBO_UCPS, COMBO_NAT, COMBO_UTF8, COMBO_UTF16BE, COMBO_UTF16LE,
                COMBO_UTF32BE, COMBO_UTF32LE);
    }

    @Test
    public void invalid() {
        CharsetEncoder enc = UTF8.newEncoder();
        assertFalse(enc.canEncode(INVALID));
        assertFalse(enc.canEncode(new String(INVALID_CHARS)));
        assertFalse(enc.canEncode(new String(INVALID_UCPS, 0, INVALID_UCPS.length)));

        singleInvalid(UTF8, INVALID_UTF8);
        singleInvalid(UTF16BE, INVALID_UTF16BE);
        singleInvalid(UTF16LE, INVALID_UTF16LE);
        singleInvalid(UTF32BE, INVALID_UTF32BE);
        singleInvalid(UTF32LE, INVALID_UTF32LE);
    }

    /**
     * Writes a set of files that can opened using external
     * unicode-aware tools (e.g. Firefox) to check visually the test
     * data.
     *
     * @throws Exception Thrown if an I/O error occurs.
     */

    @Test
    public void writeFiles() throws Exception {
        writeFile(NAT, "native", COMBO_NAT);
        writeFile(UTF8, "utf8", COMBO_UTF8);
        writeFile(UTF16BE, "utf16be", COMBO_UTF16BE);
        writeFile(UTF16LE, "utf16le", COMBO_UTF16LE);
        writeFile(UTF32BE, "utf32be", COMBO_UTF32BE);
        writeFile(UTF32LE, "utf32le", COMBO_UTF32LE);
    }
}