For a given character c, return the version of the character that doesn't have an accent, stroke, etc. - Java java.lang

Java examples for java.lang:char

Description

For a given character c, return the version of the character that doesn't have an accent, stroke, etc.

Demo Code

/*/*from  w  ww  . j a v a  2 s .  com*/
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
//package com.java2s;

public class Main {
    /**
     * For a given character c, return the version of the character that doesn't have
     * an accent, stroke, etc. If the character doesn't have accents, strokes, etc.
     * return 0x00.
     * @param c the character to check
     * @return the normalized character or 0x00 if the character doesn't need
     * normalization.
     */
    public static char foldNonDiacriticChar(final char c) {
        switch (c) {
        //  LATIN CAPITAL LETTER B WITH HOOK -> LATIN CAPITAL LETTER B
        case 0x0181:
            return (0x0042);
            //  LATIN CAPITAL LETTER B WITH TOPBAR -> LATIN CAPITAL LETTER B
        case 0x0182:
            return (0x0042);
            //  LATIN CAPITAL LETTER C WITH HOOK -> LATIN CAPITAL LETTER C
        case 0x0187:
            return (0x0043);
            //  LATIN CAPITAL LETTER D WITH STROKE -> LATIN CAPITAL LETTER D
        case 0x0110:
            return (0x0044);
            //  LATIN CAPITAL LETTER D WITH HOOK -> LATIN CAPITAL LETTER D
        case 0x018A:
            return (0x0044);
            //  LATIN CAPITAL LETTER D WITH TOPBAR -> LATIN CAPITAL LETTER D
        case 0x018B:
            return (0x0044);
            //  LATIN CAPITAL LETTER F WITH HOOK -> LATIN CAPITAL LETTER F
        case 0x0191:
            return (0x0046);
            //  LATIN CAPITAL LETTER G WITH HOOK -> LATIN CAPITAL LETTER G
        case 0x0193:
            return (0x0047);
            //  LATIN CAPITAL LETTER G WITH STROKE -> LATIN CAPITAL LETTER G
        case 0x01E4:
            return (0x0047);
            //  LATIN CAPITAL LETTER H WITH STROKE -> LATIN CAPITAL LETTER H
        case 0x0126:
            return (0x0048);
            //  LATIN CAPITAL LETTER I WITH STROKE -> LATIN CAPITAL LETTER I
        case 0x0197:
            return (0x0049);
            //  LATIN CAPITAL LETTER K WITH HOOK -> LATIN CAPITAL LETTER K
        case 0x0198:
            return (0x004B);
            //  LATIN CAPITAL LETTER L WITH STROKE -> LATIN CAPITAL LETTER L
        case 0x0141:
            return (0x004C);
            //  LATIN CAPITAL LETTER N WITH LEFT HOOK -> LATIN CAPITAL LETTER N
        case 0x019D:
            return (0x004E);
            //  LATIN CAPITAL LETTER N WITH LONG RIGHT LEG -> LATIN CAPITAL LETTER N
        case 0x0220:
            return (0x004E);
            //  LATIN CAPITAL LETTER O WITH STROKE -> LATIN CAPITAL LETTER O
        case 0x00D8:
            return (0x004F);
            //  LATIN CAPITAL LETTER O WITH MIDDLE TILDE -> LATIN CAPITAL LETTER O
        case 0x019F:
            return (0x004F);
            //  LATIN CAPITAL LETTER O WITH STROKE AND ACUTE -> LATIN CAPITAL LETTER O
        case 0x01FE:
            return (0x004F);
            //  LATIN CAPITAL LETTER P WITH HOOK -> LATIN CAPITAL LETTER P
        case 0x01A4:
            return (0x0050);
            //  LATIN CAPITAL LETTER T WITH STROKE -> LATIN CAPITAL LETTER T
        case 0x0166:
            return (0x0054);
            //  LATIN CAPITAL LETTER T WITH HOOK -> LATIN CAPITAL LETTER T
        case 0x01AC:
            return (0x0054);
            //  LATIN CAPITAL LETTER T WITH RETROFLEX HOOK -> LATIN CAPITAL LETTER T
        case 0x01AE:
            return (0x0054);
            //  LATIN CAPITAL LETTER V WITH HOOK -> LATIN CAPITAL LETTER V
        case 0x01B2:
            return (0x0056);
            //  LATIN CAPITAL LETTER Y WITH HOOK -> LATIN CAPITAL LETTER Y
        case 0x01B3:
            return (0x0059);
            //  LATIN CAPITAL LETTER Z WITH STROKE -> LATIN CAPITAL LETTER Z
        case 0x01B5:
            return (0x005A);
            //  LATIN CAPITAL LETTER Z WITH HOOK -> LATIN CAPITAL LETTER Z
        case 0x0224:
            return (0x005A);
            //  LATIN SMALL LETTER B WITH STROKE -> LATIN SMALL LETTER B
        case 0x0180:
            return (0x0062);
            //  LATIN SMALL LETTER B WITH TOPBAR -> LATIN SMALL LETTER B
        case 0x0183:
            return (0x0062);
            //  LATIN SMALL LETTER B WITH HOOK -> LATIN SMALL LETTER B
        case 0x0253:
            return (0x0062);
            //  LATIN SMALL LETTER C WITH HOOK -> LATIN SMALL LETTER C
        case 0x0188:
            return (0x0063);
            //  LATIN SMALL LETTER C WITH CURL -> LATIN SMALL LETTER C
        case 0x0255:
            return (0x0063);
            //  LATIN SMALL LETTER D WITH STROKE -> LATIN SMALL LETTER D
        case 0x0111:
            return (0x0064);
            //  LATIN SMALL LETTER D WITH TOPBAR -> LATIN SMALL LETTER D
        case 0x018C:
            return (0x0064);
            //  LATIN SMALL LETTER D WITH CURL -> LATIN SMALL LETTER D
        case 0x0221:
            return (0x0064);
            //  LATIN SMALL LETTER D WITH TAIL -> LATIN SMALL LETTER D
        case 0x0256:
            return (0x0064);
            //  LATIN SMALL LETTER D WITH HOOK -> LATIN SMALL LETTER D
        case 0x0257:
            return (0x0064);
            //  LATIN SMALL LETTER F WITH HOOK -> LATIN SMALL LETTER F
        case 0x0192:
            return (0x0066);
            //  LATIN SMALL LETTER G WITH STROKE -> LATIN SMALL LETTER G
        case 0x01E5:
            return (0x0067);
            //  LATIN SMALL LETTER G WITH HOOK -> LATIN SMALL LETTER G
        case 0x0260:
            return (0x0067);
            //  LATIN SMALL LETTER H WITH STROKE -> LATIN SMALL LETTER H
        case 0x0127:
            return (0x0068);
            //  LATIN SMALL LETTER H WITH HOOK -> LATIN SMALL LETTER H
        case 0x0266:
            return (0x0068);
            //  LATIN SMALL LETTER I WITH STROKE -> LATIN SMALL LETTER I
        case 0x0268:
            return (0x0069);
            //  LATIN SMALL LETTER J WITH CROSSED-TAIL -> LATIN SMALL LETTER J
        case 0x029D:
            return (0x006A);
            //  LATIN SMALL LETTER K WITH HOOK -> LATIN SMALL LETTER K
        case 0x0199:
            return (0x006B);
            //  LATIN SMALL LETTER L WITH STROKE -> LATIN SMALL LETTER L
        case 0x0142:
            return (0x006C);
            //  LATIN SMALL LETTER L WITH BAR -> LATIN SMALL LETTER L
        case 0x019A:
            return (0x006C);
            //  LATIN SMALL LETTER L WITH CURL -> LATIN SMALL LETTER L
        case 0x0234:
            return (0x006C);
            //  LATIN SMALL LETTER L WITH MIDDLE TILDE -> LATIN SMALL LETTER L
        case 0x026B:
            return (0x006C);
            //  LATIN SMALL LETTER L WITH BELT -> LATIN SMALL LETTER L
        case 0x026C:
            return (0x006C);
            //  LATIN SMALL LETTER L WITH RETROFLEX HOOK -> LATIN SMALL LETTER L
        case 0x026D:
            return (0x006C);
            //  LATIN SMALL LETTER M WITH HOOK -> LATIN SMALL LETTER M
        case 0x0271:
            return (0x006D);
            //  LATIN SMALL LETTER N WITH LONG RIGHT LEG -> LATIN SMALL LETTER N
        case 0x019E:
            return (0x006E);
            //  LATIN SMALL LETTER N WITH CURL -> LATIN SMALL LETTER N
        case 0x0235:
            return (0x006E);
            //  LATIN SMALL LETTER N WITH LEFT HOOK -> LATIN SMALL LETTER N
        case 0x0272:
            return (0x006E);
            //  LATIN SMALL LETTER N WITH RETROFLEX HOOK -> LATIN SMALL LETTER N
        case 0x0273:
            return (0x006E);
            //  LATIN SMALL LETTER O WITH STROKE -> LATIN SMALL LETTER O
        case 0x00F8:
            return (0x006F);
            //  LATIN SMALL LETTER O WITH STROKE AND ACUTE -> LATIN SMALL LETTER O
        case 0x01FF:
            return (0x006F);
            //  LATIN SMALL LETTER P WITH HOOK -> LATIN SMALL LETTER P
        case 0x01A5:
            return (0x0070);
            //  LATIN SMALL LETTER Q WITH HOOK -> LATIN SMALL LETTER Q
        case 0x02A0:
            return (0x0071);
            //  LATIN SMALL LETTER R WITH LONG LEG -> LATIN SMALL LETTER R
        case 0x027C:
            return (0x0072);
            //  LATIN SMALL LETTER R WITH TAIL -> LATIN SMALL LETTER R
        case 0x027D:
            return (0x0072);
            //  LATIN SMALL LETTER S WITH HOOK -> LATIN SMALL LETTER S
        case 0x0282:
            return (0x0073);
            //  LATIN SMALL LETTER T WITH STROKE -> LATIN SMALL LETTER T
        case 0x0167:
            return (0x0074);
            //  LATIN SMALL LETTER T WITH PALATAL HOOK -> LATIN SMALL LETTER T
        case 0x01AB:
            return (0x0074);
            //  LATIN SMALL LETTER T WITH HOOK -> LATIN SMALL LETTER T
        case 0x01AD:
            return (0x0074);
            //  LATIN SMALL LETTER T WITH CURL -> LATIN SMALL LETTER T
        case 0x0236:
            return (0x0074);
            //  LATIN SMALL LETTER T WITH RETROFLEX HOOK -> LATIN SMALL LETTER T
        case 0x0288:
            return (0x0074);
            //  LATIN SMALL LETTER V WITH HOOK -> LATIN SMALL LETTER V
        case 0x028B:
            return (0x0076);
            //  LATIN SMALL LETTER Y WITH HOOK -> LATIN SMALL LETTER Y
        case 0x01B4:
            return (0x0079);
            //  LATIN SMALL LETTER Z WITH STROKE -> LATIN SMALL LETTER Z
        case 0x01B6:
            return (0x007A);
            //  LATIN SMALL LETTER Z WITH HOOK -> LATIN SMALL LETTER Z
        case 0x0225:
            return (0x007A);
            //  LATIN SMALL LETTER Z WITH RETROFLEX HOOK -> LATIN SMALL LETTER Z
        case 0x0290:
            return (0x007A);
            //  LATIN SMALL LETTER Z WITH CURL -> LATIN SMALL LETTER Z
        case 0x0291:
            return (0x007A);
            //  LATIN SMALL LETTER SCHWA WITH HOOK -> LATIN SMALL LETTER SCHWA
        case 0x025A:
            return (0x0259);
            //  LATIN SMALL LETTER ESH WITH CURL -> LATIN SMALL LETTER ESH
        case 0x0286:
            return (0x0283);
            //  LATIN SMALL LETTER EZH WITH TAIL -> LATIN SMALL LETTER EZH
        case 0x01BA:
            return (0x0292);
            //  LATIN SMALL LETTER EZH WITH CURL -> LATIN SMALL LETTER EZH
        case 0x0293:
            return (0x0292);
            //  CYRILLIC CAPITAL LETTER GHE WITH UPTURN -> CYRILLIC CAPITAL LETTER GHE
        case 0x0490:
            return (0x0413);
            //  CYRILLIC CAPITAL LETTER GHE WITH STROKE -> CYRILLIC CAPITAL LETTER GHE
        case 0x0492:
            return (0x0413);
            //  CYRILLIC CAPITAL LETTER GHE WITH MIDDLE HOOK -> CYRILLIC CAPITAL LETTER GHE
        case 0x0494:
            return (0x0413);
            //  CYRILLIC CAPITAL LETTER ZHE WITH DESCENDER -> CYRILLIC CAPITAL LETTER ZHE
        case 0x0496:
            return (0x0416);
            //  CYRILLIC CAPITAL LETTER ZE WITH DESCENDER -> CYRILLIC CAPITAL LETTER ZE
        case 0x0498:
            return (0x0417);
            //  CYRILLIC CAPITAL LETTER SHORT I WITH TAIL -> CYRILLIC CAPITAL LETTER SHORT I
        case 0x048A:
            return (0x0419);
            //  CYRILLIC CAPITAL LETTER KA WITH DESCENDER -> CYRILLIC CAPITAL LETTER KA
        case 0x049A:
            return (0x041A);
            //  CYRILLIC CAPITAL LETTER KA WITH VERTICAL STROKE -> CYRILLIC CAPITAL LETTER KA
        case 0x049C:
            return (0x041A);
            //  CYRILLIC CAPITAL LETTER KA WITH STROKE -> CYRILLIC CAPITAL LETTER KA
        case 0x049E:
            return (0x041A);
            //  CYRILLIC CAPITAL LETTER KA WITH HOOK -> CYRILLIC CAPITAL LETTER KA
        case 0x04C3:
            return (0x041A);
            //  CYRILLIC CAPITAL LETTER EL WITH TAIL -> CYRILLIC CAPITAL LETTER EL
        case 0x04C5:
            return (0x041B);
            //  CYRILLIC CAPITAL LETTER EM WITH TAIL -> CYRILLIC CAPITAL LETTER EM
        case 0x04CD:
            return (0x041C);
            //  CYRILLIC CAPITAL LETTER EN WITH DESCENDER -> CYRILLIC CAPITAL LETTER EN
        case 0x04A2:
            return (0x041D);
            //  CYRILLIC CAPITAL LETTER EN WITH HOOK -> CYRILLIC CAPITAL LETTER EN
        case 0x04C7:
            return (0x041D);
            //  CYRILLIC CAPITAL LETTER EN WITH TAIL -> CYRILLIC CAPITAL LETTER EN
        case 0x04C9:
            return (0x041D);
            //  CYRILLIC CAPITAL LETTER PE WITH MIDDLE HOOK -> CYRILLIC CAPITAL LETTER PE
        case 0x04A6:
            return (0x041F);
            //  CYRILLIC CAPITAL LETTER ER WITH TICK -> CYRILLIC CAPITAL LETTER ER
        case 0x048E:
            return (0x0420);
            //  CYRILLIC CAPITAL LETTER ES WITH DESCENDER -> CYRILLIC CAPITAL LETTER ES
        case 0x04AA:
            return (0x0421);
            //  CYRILLIC CAPITAL LETTER TE WITH DESCENDER -> CYRILLIC CAPITAL LETTER TE
        case 0x04AC:
            return (0x0422);
            //  CYRILLIC CAPITAL LETTER HA WITH DESCENDER -> CYRILLIC CAPITAL LETTER HA
        case 0x04B2:
            return (0x0425);
            //  CYRILLIC SMALL LETTER HA WITH DESCENDER -> CYRILLIC CAPITAL LETTER HA
        case 0x04B3:
            return (0x0425);
            //  CYRILLIC SMALL LETTER GHE WITH UPTURN -> CYRILLIC SMALL LETTER GHE
        case 0x0491:
            return (0x0433);
            //  CYRILLIC SMALL LETTER GHE WITH STROKE -> CYRILLIC SMALL LETTER GHE
        case 0x0493:
            return (0x0433);
            //  CYRILLIC SMALL LETTER GHE WITH MIDDLE HOOK -> CYRILLIC SMALL LETTER GHE
        case 0x0495:
            return (0x0433);
            //  CYRILLIC SMALL LETTER ZHE WITH DESCENDER -> CYRILLIC SMALL LETTER ZHE
        case 0x0497:
            return (0x0436);
            //  CYRILLIC SMALL LETTER ZE WITH DESCENDER -> CYRILLIC SMALL LETTER ZE
        case 0x0499:
            return (0x0437);
            //  CYRILLIC SMALL LETTER SHORT I WITH TAIL -> CYRILLIC SMALL LETTER SHORT I
        case 0x048B:
            return (0x0439);
            //  CYRILLIC SMALL LETTER KA WITH DESCENDER -> CYRILLIC SMALL LETTER KA
        case 0x049B:
            return (0x043A);
            //  CYRILLIC SMALL LETTER KA WITH VERTICAL STROKE -> CYRILLIC SMALL LETTER KA
        case 0x049D:
            return (0x043A);
            //  CYRILLIC SMALL LETTER KA WITH STROKE -> CYRILLIC SMALL LETTER KA
        case 0x049F:
            return (0x043A);
            //  CYRILLIC SMALL LETTER KA WITH HOOK -> CYRILLIC SMALL LETTER KA
        case 0x04C4:
            return (0x043A);
            //  CYRILLIC SMALL LETTER EL WITH TAIL -> CYRILLIC SMALL LETTER EL
        case 0x04C6:
            return (0x043B);
            //  CYRILLIC SMALL LETTER EM WITH TAIL -> CYRILLIC SMALL LETTER EM
        case 0x04CE:
            return (0x043C);
            //  CYRILLIC SMALL LETTER EN WITH DESCENDER -> CYRILLIC SMALL LETTER EN
        case 0x04A3:
            return (0x043D);
            //  CYRILLIC SMALL LETTER EN WITH HOOK -> CYRILLIC SMALL LETTER EN
        case 0x04C8:
            return (0x043D);
            //  CYRILLIC SMALL LETTER EN WITH TAIL -> CYRILLIC SMALL LETTER EN
        case 0x04CA:
            return (0x043D);
            //  CYRILLIC SMALL LETTER PE WITH MIDDLE HOOK -> CYRILLIC SMALL LETTER PE
        case 0x04A7:
            return (0x043F);
            //  CYRILLIC SMALL LETTER ER WITH TICK -> CYRILLIC SMALL LETTER ER
        case 0x048F:
            return (0x0440);
            //  CYRILLIC SMALL LETTER ES WITH DESCENDER -> CYRILLIC SMALL LETTER ES
        case 0x04AB:
            return (0x0441);
            //  CYRILLIC SMALL LETTER TE WITH DESCENDER -> CYRILLIC SMALL LETTER TE
        case 0x04AD:
            return (0x0442);
            //  CYRILLIC SMALL LETTER CHE WITH VERTICAL STROKE -> CYRILLIC SMALL LETTER CHE
        case 0x04B9:
            return (0x0447);
            //  CYRILLIC CAPITAL LETTER OMEGA WITH TITLO -> CYRILLIC CAPITAL LETTER OMEGA
        case 0x047C:
            return (0x0460);
            //  CYRILLIC SMALL LETTER OMEGA WITH TITLO -> CYRILLIC SMALL LETTER OMEGA
        case 0x047D:
            return (0x0461);
            //  CYRILLIC CAPITAL LETTER STRAIGHT U WITH STROKE ->
            //     CYRILLIC CAPITAL LETTER STRAIGHT U
        case 0x04B0:
            return (0x04AE);
            //  CYRILLIC SMALL LETTER STRAIGHT U WITH STROKE ->
            //     CYRILLIC SMALL LETTER STRAIGHT U
        case 0x04B1:
            return (0x04AF);
            //  CYRILLIC CAPITAL LETTER CHE WITH DESCENDER ->
            //     CYRILLIC CAPITAL LETTER ABKHASIAN CHE
        case 0x04B6:
            return (0x04BC);
            //  CYRILLIC SMALL LETTER CHE WITH DESCENDER -> CYRILLIC CAPITAL LETTER ABKHASIAN CHE
        case 0x04B7:
            return (0x04BC);
            //  CYRILLIC CAPITAL LETTER CHE WITH VERTICAL STROKE ->
            //     CYRILLIC CAPITAL LETTER ABKHASIAN CHE
        case 0x04B8:
            return (0x04BC);
            //  CYRILLIC CAPITAL LETTER ABKHASIAN CHE WITH DESCENDER ->
            //     CYRILLIC CAPITAL LETTER ABKHASIANCHE
        case 0x04BE:
            return (0x04BC);
            //  CYRILLIC SMALL LETTER ABKHASIAN CHE WITH DESCENDER ->
            //     CYRILLIC CAPITAL LETTER ABKHASIAN CHE
        case 0x04BF:
            return (0x04BC);
            //  CYRILLIC CAPITAL LETTER KHAKASSIAN CHE -> CYRILLIC CAPITAL LETTER ABKHASIAN CHE
        case 0x04CB:
            return (0x04BC);
            //  CYRILLIC SMALL LETTER KHAKASSIAN CHE -> CYRILLIC CAPITAL LETTER ABKHASIAN CHE
        case 0x04CC:
            return (0x04BC);
        default:
            return (0x00);
        }
    }
}

Related Tutorials