Java String Normalize normalizeWidth(String text)

Here you can find the source of normalizeWidth(String text)

Description

Normalize the <a href="https://en.wikipedia.org/wiki/Halfwidth_and_fullwidth_forms"> width</a> of characters in the supplied text.

License

Open Source License

Parameter

Parameter Description
text a parameter

Return

Normalized-width text

Declaration

public static String normalizeWidth(String text) 

Method Source Code

//package com.java2s;
/**************************************************************************
 OmegaT - Computer Assisted Translation (CAT) tool 
  with fuzzy matching, translation memory, keyword search, 
  glossaries, and translation leveraging into updated projects.
    //from w w w  . ja  va2 s.  co m
 Copyright (C) 2000-2006 Keith Godfrey and Maxym Mykhalchuk
       2007 Didier Briel and Tiago Saboga
       2007 Zoltan Bartko - bartkozoltan@bartkozoltan.com
       2008 Andrzej Sawula
       2010-2013 Alex Buloichik
       2015 Zoltan Bartko, Aaron Madlon-Kay
       2016 Aaron Madlon-Kay
       Home page: http://www.omegat.org/
       Support center: http://groups.yahoo.com/group/OmegaT/
    
 This file is part of OmegaT.
    
 OmegaT is free software: you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
 the Free Software Foundation, either version 3 of the License, or
 (at your option) any later version.
    
 OmegaT is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.
    
 You should have received a copy of the GNU General Public License
 along with this program.  If not, see <http://www.gnu.org/licenses/>.
 **************************************************************************/

import java.text.Normalizer;

public class Main {
    /**
     * Normalize the
     * <a href="https://en.wikipedia.org/wiki/Halfwidth_and_fullwidth_forms">
     * width</a> of characters in the supplied text. Specifically:
     * <ul>
     * <li>ASCII characters will become halfwidth
     * <li>Katakana characters will become fullwidth
     * <li>Hangul will become fullwidth
     * <li>Letter-like symbols and squared Latin abbreviations will be
     * decomposed to ASCII
     * </ul>
     * This method was adapted from <a href=
     * "https://bitbucket.org/okapiframework/okapi/src/52143104fcfc7eda204d04dfbbc273189f3a7f0f/okapi/steps/fullwidthconversion/src/main/java/net/sf/okapi/steps/fullwidthconversion/FullWidthConversionStep.java">
     * FullWidthConversionStep.java</a> in the Okapi Framework under GPLv2+.
     * 
     * @param text
     * @return Normalized-width text
     */
    public static String normalizeWidth(String text) {
        StringBuilder sb = new StringBuilder(text);

        int ch;
        for (int i = 0; i < sb.length(); i++) {
            ch = sb.charAt(i);
            // ASCII
            if ((ch >= 0xFF01) && (ch <= 0xFF5E)) {
                sb.setCharAt(i, (char) (ch - 0xFEE0));
                continue;
            }
            if (ch == 0x3000) {
                sb.setCharAt(i, ' ');
            }
            switch (ch) {
            // Katakana
            case 0xFF61:
                sb.setCharAt(i, (char) 0x3002);
                break;
            case 0xFF62:
                sb.setCharAt(i, (char) 0x300C);
                break;
            case 0xFF63:
                sb.setCharAt(i, (char) 0x300D);
                break;
            case 0xFF64:
                sb.setCharAt(i, (char) 0x3001);
                break;
            case 0xFF65:
                sb.setCharAt(i, (char) 0x30FB);
                break;
            case 0xFF66:
                sb.setCharAt(i, (char) 0x30F2);
                break;
            case 0xFF67:
                sb.setCharAt(i, (char) 0x30A1);
                break;
            case 0xFF68:
                sb.setCharAt(i, (char) 0x30A3);
                break;
            case 0xFF69:
                sb.setCharAt(i, (char) 0x30A5);
                break;
            case 0xFF6A:
                sb.setCharAt(i, (char) 0x30A7);
                break;
            case 0xFF6B:
                sb.setCharAt(i, (char) 0x30A9);
                break;
            case 0xFF6C:
                sb.setCharAt(i, (char) 0x30E3);
                break;
            case 0xFF6D:
                sb.setCharAt(i, (char) 0x30E5);
                break;
            case 0xFF6E:
                sb.setCharAt(i, (char) 0x30E7);
                break;
            case 0xFF6F:
                sb.setCharAt(i, (char) 0x30C3);
                break;
            case 0xFF70:
                sb.setCharAt(i, (char) 0x30FC);
                break;
            case 0xFF71:
                sb.setCharAt(i, (char) 0x30A2);
                break;
            case 0xFF72:
                sb.setCharAt(i, (char) 0x30A4);
                break;
            case 0xFF73:
                sb.setCharAt(i, (char) 0x30A6);
                break;
            case 0xFF74:
                sb.setCharAt(i, (char) 0x30A8);
                break;
            case 0xFF75:
                sb.setCharAt(i, (char) 0x30AA);
                break;
            case 0xFF76:
                sb.setCharAt(i, (char) 0x30AB);
                break;
            case 0xFF77:
                sb.setCharAt(i, (char) 0x30AD);
                break;
            case 0xFF78:
                sb.setCharAt(i, (char) 0x30AF);
                break;
            case 0xFF79:
                sb.setCharAt(i, (char) 0x30B1);
                break;
            case 0xFF7A:
                sb.setCharAt(i, (char) 0x30B3);
                break;
            case 0xFF7B:
                sb.setCharAt(i, (char) 0x30B5);
                break;
            case 0xFF7C:
                sb.setCharAt(i, (char) 0x30B7);
                break;
            case 0xFF7D:
                sb.setCharAt(i, (char) 0x30B9);
                break;
            case 0xFF7E:
                sb.setCharAt(i, (char) 0x30BB);
                break;
            case 0xFF7F:
                sb.setCharAt(i, (char) 0x30BD);
                break;
            case 0xFF80:
                sb.setCharAt(i, (char) 0x30BF);
                break;
            case 0xFF81:
                sb.setCharAt(i, (char) 0x30C1);
                break;
            case 0xFF82:
                sb.setCharAt(i, (char) 0x30C4);
                break;
            case 0xFF83:
                sb.setCharAt(i, (char) 0x30C6);
                break;
            case 0xFF84:
                sb.setCharAt(i, (char) 0x30C8);
                break;
            case 0xFF85:
                sb.setCharAt(i, (char) 0x30CA);
                break;
            case 0xFF86:
                sb.setCharAt(i, (char) 0x30CB);
                break;
            case 0xFF87:
                sb.setCharAt(i, (char) 0x30CC);
                break;
            case 0xFF88:
                sb.setCharAt(i, (char) 0x30CD);
                break;
            case 0xFF89:
                sb.setCharAt(i, (char) 0x30CE);
                break;
            case 0xFF8A:
                sb.setCharAt(i, (char) 0x30CF);
                break;
            case 0xFF8B:
                sb.setCharAt(i, (char) 0x30D2);
                break;
            case 0xFF8C:
                sb.setCharAt(i, (char) 0x30D5);
                break;
            case 0xFF8D:
                sb.setCharAt(i, (char) 0x30D8);
                break;
            case 0xFF8E:
                sb.setCharAt(i, (char) 0x30DB);
                break;
            case 0xFF8F:
                sb.setCharAt(i, (char) 0x30DE);
                break;
            case 0xFF90:
                sb.setCharAt(i, (char) 0x30DF);
                break;
            case 0xFF91:
                sb.setCharAt(i, (char) 0x30E0);
                break;
            case 0xFF92:
                sb.setCharAt(i, (char) 0x30E1);
                break;
            case 0xFF93:
                sb.setCharAt(i, (char) 0x30E2);
                break;
            case 0xFF94:
                sb.setCharAt(i, (char) 0x30E4);
                break;
            case 0xFF95:
                sb.setCharAt(i, (char) 0x30E6);
                break;
            case 0xFF96:
                sb.setCharAt(i, (char) 0x30E8);
                break;
            case 0xFF97:
                sb.setCharAt(i, (char) 0x30E9);
                break;
            case 0xFF98:
                sb.setCharAt(i, (char) 0x30EA);
                break;
            case 0xFF99:
                sb.setCharAt(i, (char) 0x30EB);
                break;
            case 0xFF9A:
                sb.setCharAt(i, (char) 0x30EC);
                break;
            case 0xFF9B:
                sb.setCharAt(i, (char) 0x30ED);
                break;
            case 0xFF9C:
                sb.setCharAt(i, (char) 0x30EF);
                break;
            case 0xFF9D:
                sb.setCharAt(i, (char) 0x30F3);
                break;
            case 0xFF9E:
                sb.setCharAt(i, (char) 0x3099);
                break;
            case 0xFF9F:
                sb.setCharAt(i, (char) 0x309A);
                break;
            }
            // Hangul
            if ((ch > 0xFFA1) && (ch <= 0xFFBE)) {
                sb.setCharAt(i, (char) (ch - 0xCE70));
                continue;
            }
            switch (ch) {
            // Hangul
            case 0xFFA0:
                sb.setCharAt(i, (char) 0x3164);
                break;
            case 0xFFDA:
                sb.setCharAt(i, (char) 0x3161);
                break;
            case 0xFFDB:
                sb.setCharAt(i, (char) 0x3162);
                break;
            case 0xFFDC:
                sb.setCharAt(i, (char) 0x3163);
                break;
            // Others
            case 0xFFE8:
                sb.setCharAt(i, (char) 0x2502);
                break;
            case 0xFFE9:
                sb.setCharAt(i, (char) 0x2190);
                break;
            case 0xFFEA:
                sb.setCharAt(i, (char) 0x2191);
                break;
            case 0xFFEB:
                sb.setCharAt(i, (char) 0x2192);
                break;
            case 0xFFEC:
                sb.setCharAt(i, (char) 0x2193);
                break;
            case 0xFFED:
                sb.setCharAt(i, (char) 0x25A0);
                break;
            case 0xFFEE:
                sb.setCharAt(i, (char) 0x25CB);
                break;
            }

            // Process letter-like symbols
            switch (ch) {
            case 0x2100:
                sb.setCharAt(i, 'a');
                sb.insert(i + 1, "/c");
                i += 2;
                break;
            case 0x2101:
                sb.setCharAt(i, 'a');
                sb.insert(i + 1, "/s");
                i += 2;
                break;
            case 0x2105:
                sb.setCharAt(i, 'c');
                sb.insert(i + 1, "/o");
                i += 2;
                break;
            case 0x2103:
                sb.setCharAt(i, (char) 0x00B0);
                sb.insert(i + 1, "C");
                i++;
                break;
            case 0x2109:
                sb.setCharAt(i, (char) 0x00B0);
                sb.insert(i + 1, "F");
                i++;
                break;
            case 0x2116:
                sb.setCharAt(i, 'N');
                sb.insert(i + 1, "o");
                i++;
                break;
            case 0x212A:
                sb.setCharAt(i, 'K');
                break;
            case 0x212B:
                sb.setCharAt(i, (char) 0x00C5);
                break;
            }

            switch (ch) {
            // Squared Latin Abbreviations 1
            case 0x3371:
                sb.setCharAt(i, 'h');
                sb.insert(i + 1, "Pa");
                i += 2;
                break;
            case 0x3372:
                sb.setCharAt(i, 'd');
                sb.insert(i + 1, "a");
                i++;
                break;
            case 0x3373:
                sb.setCharAt(i, 'A');
                sb.insert(i + 1, "U");
                i++;
                break;
            case 0x3374:
                sb.setCharAt(i, 'b');
                sb.insert(i + 1, "ar");
                i += 2;
                break;
            case 0x3375:
                sb.setCharAt(i, 'o');
                sb.insert(i + 1, "V");
                i++;
                break;
            case 0x3376:
                sb.setCharAt(i, 'p');
                sb.insert(i + 1, "c");
                i++;
                break;
            case 0x3377:
                sb.setCharAt(i, 'd');
                sb.insert(i + 1, "m");
                i++;
                break;
            case 0x3378:
                sb.setCharAt(i, 'd');
                sb.insert(i + 1, "m\u00B2");
                i += 2;
                break;
            case 0x3379:
                sb.setCharAt(i, 'd');
                sb.insert(i + 1, "m\u00B3");
                i += 2;
                break;
            case 0x337A:
                sb.setCharAt(i, 'I');
                sb.insert(i + 1, "U");
                i++;
                break;
            // Squared Latin Abbreviations 2
            case 0x3380:
                sb.setCharAt(i, 'p');
                sb.insert(i + 1, "A");
                i++;
                break;
            case 0x3381:
                sb.setCharAt(i, 'n');
                sb.insert(i + 1, "A");
                i++;
                break;
            case 0x3382:
                sb.setCharAt(i, (char) 0x03BC);
                sb.insert(i + 1, "A");
                i++;
                break;
            case 0x3383:
                sb.setCharAt(i, 'm');
                sb.insert(i + 1, "A");
                i++;
                break;
            case 0x3384:
                sb.setCharAt(i, 'k');
                sb.insert(i + 1, "A");
                i++;
                break;
            case 0x3385:
                sb.setCharAt(i, 'K');
                sb.insert(i + 1, "B");
                i++;
                break;
            case 0x3386:
                sb.setCharAt(i, 'M');
                sb.insert(i + 1, "B");
                i++;
                break;
            case 0x3387:
                sb.setCharAt(i, 'G');
                sb.insert(i + 1, "B");
                i++;
                break;
            case 0x3388:
                sb.setCharAt(i, 'c');
                sb.insert(i + 1, "al");
                i += 2;
                break;
            case 0x3389:
                sb.setCharAt(i, 'k');
                sb.insert(i + 1, "cal");
                i += 3;
                break;
            case 0x338A:
                sb.setCharAt(i, 'p');
                sb.insert(i + 1, "F");
                i++;
                break;
            case 0x338B:
                sb.setCharAt(i, 'n');
                sb.insert(i + 1, "F");
                i++;
                break;
            case 0x338C:
                sb.setCharAt(i, (char) 0x03BC);
                sb.insert(i + 1, "F");
                i++;
                break;
            case 0x338D:
                sb.setCharAt(i, (char) 0x03BC);
                sb.insert(i + 1, "g");
                i++;
                break;
            case 0x338E:
                sb.setCharAt(i, 'm');
                sb.insert(i + 1, "g");
                i++;
                break;
            case 0x338F:
                sb.setCharAt(i, 'k');
                sb.insert(i + 1, "g");
                i++;
                break;
            case 0x3390:
                sb.setCharAt(i, 'H');
                sb.insert(i + 1, "z");
                i++;
                break;
            case 0x3391:
                sb.setCharAt(i, 'k');
                sb.insert(i + 1, "Hz");
                i += 2;
                break;
            case 0x3392:
                sb.setCharAt(i, 'M');
                sb.insert(i + 1, "Hz");
                i += 2;
                break;
            case 0x3393:
                sb.setCharAt(i, 'G');
                sb.insert(i + 1, "Hz");
                i += 2;
                break;
            case 0x3394:
                sb.setCharAt(i, 'T');
                sb.insert(i + 1, "Hz");
                i += 2;
                break;
            case 0x3395:
                sb.setCharAt(i, (char) 0x03BC);
                sb.insert(i + 1, "\u2113");
                i++;
                break;
            case 0x3396:
                sb.setCharAt(i, 'm');
                sb.insert(i + 1, "\u2113");
                i++;
                break;
            case 0x3397:
                sb.setCharAt(i, 'd');
                sb.insert(i + 1, "\u2113");
                i++;
                break;
            case 0x3398:
                sb.setCharAt(i, 'k');
                sb.insert(i + 1, "\u2113");
                i++;
                break;
            case 0x3399:
                sb.setCharAt(i, 'f');
                sb.insert(i + 1, "m");
                i++;
                break;
            case 0x339A:
                sb.setCharAt(i, 'n');
                sb.insert(i + 1, "m");
                i++;
                break;
            case 0x339B:
                sb.setCharAt(i, (char) 0x03BC);
                sb.insert(i + 1, "m");
                i++;
                break;
            case 0x339C:
                sb.setCharAt(i, 'm');
                sb.insert(i + 1, "m");
                i++;
                break;
            case 0x339D:
                sb.setCharAt(i, 'c');
                sb.insert(i + 1, "m");
                i++;
                break;
            case 0x339E:
                sb.setCharAt(i, 'k');
                sb.insert(i + 1, "m");
                i++;
                break;
            case 0x339F:
                sb.setCharAt(i, 'm');
                sb.insert(i + 1, "m\u00B2");
                i += 2;
                break;
            case 0x33A0:
                sb.setCharAt(i, 'c');
                sb.insert(i + 1, "m\u00B2");
                i += 2;
                break;
            case 0x33A1:
                sb.setCharAt(i, 'm');
                sb.insert(i + 1, "\u00B2");
                i++;
                break;
            case 0x33A2:
                sb.setCharAt(i, 'k');
                sb.insert(i + 1, "m\u00B2");
                i += 2;
                break;
            case 0x33A3:
                sb.setCharAt(i, 'm');
                sb.insert(i + 1, "m\u00B3");
                i += 2;
                break;
            case 0x33A4:
                sb.setCharAt(i, 'c');
                sb.insert(i + 1, "m\u00B3");
                i += 2;
                break;
            case 0x33A5:
                sb.setCharAt(i, 'm');
                sb.insert(i + 1, "\u00B3");
                i++;
                break;
            case 0x33A6:
                sb.setCharAt(i, 'k');
                sb.insert(i + 1, "m\u00B3");
                i += 2;
                break;
            case 0x33A7:
                sb.setCharAt(i, 'm');
                sb.insert(i + 1, "/s");
                i += 2;
                break;
            case 0x33A8:
                sb.setCharAt(i, 'm');
                sb.insert(i + 1, "/s\u00B2");
                i += 3;
                break;
            case 0x33A9:
                sb.setCharAt(i, 'P');
                sb.insert(i + 1, "a");
                i++;
                break;
            case 0x33AA:
                sb.setCharAt(i, 'k');
                sb.insert(i + 1, "Pa");
                i += 2;
                break;
            case 0x33AB:
                sb.setCharAt(i, 'M');
                sb.insert(i + 1, "Pa");
                i += 2;
                break;
            case 0x33AC:
                sb.setCharAt(i, 'G');
                sb.insert(i + 1, "Pa");
                i += 2;
                break;
            case 0x33AD:
                sb.setCharAt(i, 'r');
                sb.insert(i + 1, "ad");
                i += 2;
                break;
            case 0x33AE:
                sb.setCharAt(i, 'r');
                sb.insert(i + 1, "ad/s");
                i += 4;
                break;
            case 0x33AF:
                sb.setCharAt(i, 'r');
                sb.insert(i + 1, "ad/s\u00B2");
                i += 5;
                break;
            case 0x33B0:
                sb.setCharAt(i, 'p');
                sb.insert(i + 1, "s");
                i++;
                break;
            case 0x33B1:
                sb.setCharAt(i, 'n');
                sb.insert(i + 1, "s");
                i++;
                break;
            case 0x33B2:
                sb.setCharAt(i, (char) 0x03BC);
                sb.insert(i + 1, "s");
                i++;
                break;
            case 0x33B3:
                sb.setCharAt(i, 'm');
                sb.insert(i + 1, "s");
                i++;
                break;
            case 0x33B4:
                sb.setCharAt(i, 'p');
                sb.insert(i + 1, "V");
                i++;
                break;
            case 0x33B5:
                sb.setCharAt(i, 'n');
                sb.insert(i + 1, "V");
                i++;
                break;
            case 0x33B6:
                sb.setCharAt(i, (char) 0x03BC);
                sb.insert(i + 1, "V");
                i++;
                break;
            case 0x33B7:
                sb.setCharAt(i, 'm');
                sb.insert(i + 1, "V");
                i++;
                break;
            case 0x33B8:
                sb.setCharAt(i, 'k');
                sb.insert(i + 1, "V");
                i++;
                break;
            case 0x33B9:
                sb.setCharAt(i, 'M');
                sb.insert(i + 1, "V");
                i++;
                break;
            case 0x33BA:
                sb.setCharAt(i, 'p');
                sb.insert(i + 1, "W");
                i++;
                break;
            case 0x33BB:
                sb.setCharAt(i, 'n');
                sb.insert(i + 1, "W");
                i++;
                break;
            case 0x33BC:
                sb.setCharAt(i, (char) 0x03BC);
                sb.insert(i + 1, "W");
                i++;
                break;
            case 0x33BD:
                sb.setCharAt(i, 'm');
                sb.insert(i + 1, "W");
                i++;
                break;
            case 0x33BE:
                sb.setCharAt(i, 'k');
                sb.insert(i + 1, "W");
                i++;
                break;
            case 0x33BF:
                sb.setCharAt(i, 'M');
                sb.insert(i + 1, "W");
                i++;
                break;
            case 0x33C0:
                sb.setCharAt(i, 'k');
                sb.insert(i + 1, "\u03A9");
                i++;
                break;
            case 0x33C1:
                sb.setCharAt(i, 'M');
                sb.insert(i + 1, "\u03A9");
                i++;
                break;
            case 0x33C2:
                sb.setCharAt(i, 'a');
                sb.insert(i + 1, ".m.");
                i += 3;
                break;
            case 0x33C3:
                sb.setCharAt(i, 'B');
                sb.insert(i + 1, "q");
                i++;
                break;
            case 0x33C4:
                sb.setCharAt(i, 'c');
                sb.insert(i + 1, "c");
                i++;
                break;
            case 0x33C5:
                sb.setCharAt(i, 'c');
                sb.insert(i + 1, "d");
                i++;
                break;
            case 0x33C6:
                sb.setCharAt(i, 'C');
                sb.insert(i + 1, "/kg");
                i += 3;
                break;
            case 0x33C7:
                sb.setCharAt(i, 'C');
                sb.insert(i + 1, "o.");
                i += 2;
                break;
            case 0x33C8:
                sb.setCharAt(i, 'd');
                sb.insert(i + 1, "B");
                i++;
                break;
            case 0x33C9:
                sb.setCharAt(i, 'G');
                sb.insert(i + 1, "y");
                i++;
                break;
            case 0x33CA:
                sb.setCharAt(i, 'h');
                sb.insert(i + 1, "a");
                i++;
                break;
            case 0x33CB:
                sb.setCharAt(i, 'H');
                sb.insert(i + 1, "P");
                i++;
                break;
            case 0x33CC:
                sb.setCharAt(i, 'i');
                sb.insert(i + 1, "n");
                i++;
                break;
            case 0x33CD:
                sb.setCharAt(i, 'K');
                sb.insert(i + 1, "K");
                i++;
                break;
            case 0x33CE:
                sb.setCharAt(i, 'K');
                sb.insert(i + 1, "M");
                i++;
                break;
            case 0x33CF:
                sb.setCharAt(i, 'K');
                sb.insert(i + 1, "t");
                i++;
                break;
            case 0x33D0:
                sb.setCharAt(i, 'l');
                sb.insert(i + 1, "m");
                i++;
                break;
            case 0x33D1:
                sb.setCharAt(i, 'l');
                sb.insert(i + 1, "n");
                i++;
                break;
            case 0x33D2:
                sb.setCharAt(i, 'l');
                sb.insert(i + 1, "og");
                i += 2;
                break;
            case 0x33D3:
                sb.setCharAt(i, 'l');
                sb.insert(i + 1, "x");
                i++;
                break;
            case 0x33D4:
                sb.setCharAt(i, 'm');
                sb.insert(i + 1, "b");
                i++;
                break;
            case 0x33D5:
                sb.setCharAt(i, 'm');
                sb.insert(i + 1, "il");
                i += 2;
                break;
            case 0x33D6:
                sb.setCharAt(i, 'm');
                sb.insert(i + 1, "ol");
                i += 2;
                break;
            case 0x33D7:
                sb.setCharAt(i, 'p');
                sb.insert(i + 1, "H");
                i++;
                break;
            case 0x33D8:
                sb.setCharAt(i, 'p');
                sb.insert(i + 1, ".m.");
                i += 3;
                break;
            case 0x33D9:
                sb.setCharAt(i, 'P');
                sb.insert(i + 1, "PM");
                i += 2;
                break;
            case 0x33DA:
                sb.setCharAt(i, 'P');
                sb.insert(i + 1, "R");
                i++;
                break;
            case 0x33DB:
                sb.setCharAt(i, 's');
                sb.insert(i + 1, "r");
                i++;
                break;
            case 0x33DC:
                sb.setCharAt(i, 'S');
                sb.insert(i + 1, "v");
                i++;
                break;
            case 0x33DD:
                sb.setCharAt(i, 'W');
                sb.insert(i + 1, "b");
                i++;
                break;
            case 0x33DE:
                sb.setCharAt(i, 'v');
                sb.insert(i + 1, "/m");
                i += 2;
                break;
            case 0x33DF:
                sb.setCharAt(i, 'a');
                sb.insert(i + 1, "/m");
                i += 2;
                break;
            // Squared Latin Abbreviations 3
            case 0x33FF:
                sb.setCharAt(i, 'g');
                sb.insert(i + 1, "al");
                i += 2;
                break;
            }
        }

        String result = sb.toString();

        if (text.equals(result)) {
            // No characters were changed. Return the original text so that
            // composition of unrelated characters is not affected.
            return text;
        }

        return normalizeUnicode(result);
    }

    /**
     * Apply Unicode NFC normalization to a string.
     */
    public static String normalizeUnicode(CharSequence text) {
        return Normalizer.isNormalized(text, Normalizer.Form.NFC) ? text.toString()
                : Normalizer.normalize(text, Normalizer.Form.NFC);
    }
}

Related

  1. normalizeUnicode(String input)
  2. normalizeUnicode(String str)
  3. normalizeUnicodeDiacritics(String text)
  4. normalizeWhitespace(final String str)
  5. normalizeWhiteSpace(String str)
  6. normalizeWord(String word)