Example usage for java.lang String codePointAt

List of usage examples for java.lang String codePointAt

Introduction

In this page you can find the example usage for java.lang String codePointAt.

Prototype

public int codePointAt(int index) 

Source Link

Document

Returns the character (Unicode code point) at the specified index.

Usage

From source file:de.pangaea.fixo3.xml.ProcessXmlFiles.java

private String toUri(String s) {
    s = s.replaceAll(" ", "");
    s = s.replaceAll("_", "");
    s = s.replaceAll("-", "");
    s = s.replaceAll("/", "");
    s = s.replaceAll("&", "");
    s = s.replaceAll("\\(", "");
    s = s.replaceAll("\\)", "");
    s = s.replaceAll("", "");
    s = s.replaceAll("'", "");
    s = s.replaceAll("\\+", "");

    if (!Character.isUpperCase(s.codePointAt(0))) {
        s = s.substring(0, 1).toUpperCase() + s.substring(1);
    }//from  w  ww .  java  2 s  .  c  o  m

    return EYP.ns.toString() + s;
}

From source file:org.apache.pdfbox.pdmodel.font.PDCIDFontType2.java

/**
 * Returns the GID for the given character code.
 *
 * @param code character code/*w  w w . j  a  v a2s. c om*/
 * @return GID
 * @throws IOException
 */
@Override
public int codeToGID(int code) throws IOException {
    if (!isEmbedded) {
        // The conforming reader shall select glyphs by translating characters from the
        // encoding specified by the predefined CMap to one of the encodings in the TrueType
        // font's 'cmap' table. The means by which this is accomplished are implementation-
        // dependent.

        boolean hasUnicodeMap = parent.getCMapUCS2() != null;

        if (cid2gid != null) {
            // Acrobat allows non-embedded GIDs - todo: can we find a test PDF for this?
            int cid = codeToCID(code);
            return cid2gid[cid];
        } else if (hasIdentityCid2Gid || !hasUnicodeMap) {
            // same as above, but for the default Identity CID2GIDMap or when there is no
            // ToUnicode CMap to fallback to, see PDFBOX-2599 and PDFBOX-2560
            // todo: can we find a test PDF for the Identity case?
            return codeToCID(code);
        } else {
            // fallback to the ToUnicode CMap, test with PDFBOX-1422 and PDFBOX-2560
            String unicode = parent.toUnicode(code);
            if (unicode == null) {
                LOG.warn("Failed to find a character mapping for " + code + " in " + getName());
                return 0;
            } else if (unicode.length() > 1) {
                LOG.warn("Trying to map multi-byte character using 'cmap', result will be poor");
            }

            // a non-embedded font always has a cmap (otherwise FontMapper won't load it)
            return cmap.getGlyphId(unicode.codePointAt(0));
        }
    } else {
        // If the TrueType font program is embedded, the Type 2 CIDFont dictionary shall contain
        // a CIDToGIDMap entry that maps CIDs to the glyph indices for the appropriate glyph
        // descriptions in that font program.

        int cid = codeToCID(code);
        if (cid2gid != null) {
            // use CIDToGIDMap
            if (cid < cid2gid.length) {
                return cid2gid[cid];
            } else {
                return 0;
            }
        } else {
            // "Identity" is the default CIDToGIDMap
            if (cid < ttf.getNumberOfGlyphs()) {
                return cid;
            } else {
                // out of range CIDs map to GID 0
                return 0;
            }
        }
    }
}

From source file:org.cosmo.common.util.Util.java

/**
* This method ensures that the output String has only valid XML unicode characters as specified by the
* XML 1.0 standard. For reference, please see the
* standard. This method will return an empty String if the input is null or empty.
*
* @author Donoiu Cristian, GPL//from w w  w.ja va  2s . c o  m
* @param  The String whose non-valid characters we want to remove.
* @return The in String, stripped of non-valid characters.
*/
public static String removeInvalidXMLCharacters(String s) {
    StringBuilder out = new StringBuilder(); // Used to hold the output.
    int codePoint; // Used to reference the current character.
    //String ss = "\ud801\udc00";                           // This is actualy one unicode character, represented by two code units!!!.
    //System.out.println(ss.codePointCount(0, ss.length()));// See: 1
    int i = 0;
    while (i < s.length()) {
        //System.out.println("i=" + i);
        codePoint = s.codePointAt(i); // This is the unicode code of the character.
        if ((codePoint == 0x9) || // Consider testing larger ranges first to improve speed.
                (codePoint == 0xA) || (codePoint == 0xD) || ((codePoint >= 0x20) && (codePoint <= 0xD7FF))
                || ((codePoint >= 0xE000) && (codePoint <= 0xFFFD))
                || ((codePoint >= 0x10000) && (codePoint <= 0x10FFFF))) {
            out.append(Character.toChars(codePoint));
        }
        i += Character.charCount(codePoint); // Increment with the number of code units(java chars) needed to represent a Unicode char.
    }
    return out.toString();
}

From source file:net.sf.ufsc.ServiceLoader.java

private int parseLine(Class<?> service, URL u, BufferedReader r, int lc, List<String> names)
        throws IOException, ServiceConfigurationError {
    String ln = r.readLine();
    if (ln == null) {
        return -1;
    }//  ww  w  .j a  v a  2 s . c o m
    int ci = ln.indexOf('#');
    if (ci >= 0)
        ln = ln.substring(0, ci);
    ln = ln.trim();
    int n = ln.length();
    if (n != 0) {
        if ((ln.indexOf(' ') >= 0) || (ln.indexOf('\t') >= 0))
            fail(service, u, lc, "Illegal configuration-file syntax");
        int cp = ln.codePointAt(0);
        if (!Character.isJavaIdentifierStart(cp))
            fail(service, u, lc, "Illegal provider-class name: " + ln);
        for (int i = Character.charCount(cp); i < n; i += Character.charCount(cp)) {
            cp = ln.codePointAt(i);
            if (!Character.isJavaIdentifierPart(cp) && (cp != '.'))
                fail(service, u, lc, "Illegal provider-class name: " + ln);
        }
        if (!providers.containsKey(ln) && !names.contains(ln))
            names.add(ln);
    }
    return lc + 1;
}

From source file:org.codelibs.fess.helper.ViewHelper.java

protected String escapeHighlight(final String text) {
    final String escaped = LaFunctions.h(text);
    int pos = escaped.indexOf(escapedHighlightPre);
    while (pos >= 0) {
        int c = escaped.codePointAt(pos);
        if (Character.isISOControl(c) || hihglightTerminalCharSet.contains(c)) {
            break;
        }/*from w ww  .j a  v  a 2 s . co m*/
        pos--;
    }

    final String value = escaped.substring(pos + 1);
    return value.replaceAll(escapedHighlightPre, highlightTagPre).replaceAll(escapedHighlightPost,
            highlightTagPost);
}

From source file:org.komusubi.feeder.sns.twitter.TweetMessage.java

@Override
public boolean add(Script script) {
    if (script instanceof TweetScript) {
        super.add(script);
    } else {//from www  . java2 s  .  c  o m
        String line = script.line();
        // FIXME code point count
        if (TweetScript.lengthAfterTweeted(line) > TweetScript.MESSAGE_LENGTH_MAX) {
            // lengthAfterTweeted size different from script.codePointCount by url shortening.
            // adjust position when match max length. 
            int adjust = script.codePointCount() == TweetScript.MESSAGE_LENGTH_MAX ? 1 : 0;
            // edit value to 140 character
            for (int position = TweetScript.MESSAGE_LENGTH_MAX - adjust; position >= 0; position--) {
                if (line.codePointAt(position) != '\n')
                    continue;
                super.add(new TweetScript(fragment, line.substring(0, position)));
                this.add(new ScriptLine(line.substring(position + 1))); // call recursively.
                return true;
            }
            // not found line feed '\n'
            int offset = 0;
            for (; TweetScript.lengthAfterTweeted(line.substring(
                    offset)) > TweetScript.MESSAGE_LENGTH_MAX; offset += TweetScript.MESSAGE_LENGTH_MAX) {
                super.add(new TweetScript(fragment, line.substring(offset, TweetScript.MESSAGE_LENGTH_MAX)));
            }
            if (line.length() - offset > 0)
                super.add(new TweetScript(fragment, line.substring(offset)));
        } else {
            super.add(script);
        }
    }
    return true;
}

From source file:com.weibo.api.motan.core.extension.ExtensionLoader.java

private void parseLine(Class<T> type, URL url, String line, int lineNumber, List<String> names)
        throws IOException, ServiceConfigurationError {
    int ci = line.indexOf('#');

    if (ci >= 0) {
        line = line.substring(0, ci);//from ww  w. j a v  a2 s  .com
    }

    line = line.trim();

    if (line.length() <= 0) {
        return;
    }

    if ((line.indexOf(' ') >= 0) || (line.indexOf('\t') >= 0)) {
        failThrows(type, url, lineNumber, "Illegal spi configuration-file syntax");
    }

    int cp = line.codePointAt(0);
    if (!Character.isJavaIdentifierStart(cp)) {
        failThrows(type, url, lineNumber, "Illegal spi provider-class name: " + line);
    }

    for (int i = Character.charCount(cp); i < line.length(); i += Character.charCount(cp)) {
        cp = line.codePointAt(i);
        if (!Character.isJavaIdentifierPart(cp) && (cp != '.')) {
            failThrows(type, url, lineNumber, "Illegal spi provider-class name: " + line);
        }
    }

    if (!names.contains(line)) {
        names.add(line);
    }
}

From source file:me.Wundero.Ray.utils.TextUtils.java

/**
 * Get the widths of all characters in the string.
 *//*from w  w  w.ja  va  2  s  . com*/
public static int getStringWidth(String text, boolean isBold, boolean forceUnicode) {
    double width = 0;
    for (int i = 0; i < text.length(); ++i) {
        width += getWidth(text.codePointAt(i), isBold, forceUnicode);
    }
    return (int) Math.ceil(width);
}

From source file:com.microsoft.windowsazure.mobileservices.MobileServiceTableBase.java

/**
 * Validates if a given string contains a control character.
 * @param s//www. j av a2  s.c  o m
 * @return
 */
protected boolean containsControlCharacter(String s) {
    boolean result = false;

    final int length = s.length();

    for (int offset = 0; offset < length;) {
        final int codepoint = s.codePointAt(offset);

        if (Character.isISOControl(codepoint)) {
            result = true;
            break;
        }

        offset += Character.charCount(codepoint);
    }

    return result;
}

From source file:org.sd.token.StandardTokenizerOptions.java

private Set<Integer> computeCodePoints(String string) {
    Set<Integer> result = null;

    if (string != null && !"".equals(string)) {
        result = new HashSet<Integer>();
        final int len = string.length();
        for (int charPos = 0; charPos < len; ++charPos) {
            final int codePoint = string.codePointAt(charPos);
            result.add(codePoint);/* w  w w  .  ja  v a2 s  . co  m*/
        }
    }

    return result;
}