Example usage for java.lang String codePointAt

List of usage examples for java.lang String codePointAt

Introduction

In this page you can find the example usage for java.lang String codePointAt.

Prototype

public int codePointAt(int index) 

Source Link

Document

Returns the character (Unicode code point) at the specified index.

Usage

From source file:de.unwesen.packrat.api.FeedReader.java

private void handleWebSearchResults(String data, final Handler handler) {
    //    Log.d(LTAG, "Result: " + data);

    try {//  www.  ja v a  2 s. c o m
        // First check response status. If that is != 200, we may have an error
        // message to log, and definitely can bail out early.
        JSONObject result = new JSONObject(data);
        int status = result.getInt("responseStatus");
        if (200 != status) {
            Log.e(LTAG, "Server error: " + result.getString("responseDetails"));
            handler.obtainMessage(ERR_SERVER).sendToTarget();
            return;
        }

        JSONObject d = result.getJSONObject("responseData");
        JSONArray res = d.getJSONArray("results");

        // Count the occurrences of various words across all returned titles.
        // If a word is known to designate media type, we'll ignore it. We'll
        // also ignore words shorter than MIN_WORD_LENGTH.
        HashMap<String, Integer> wordCount = new HashMap<String, Integer>();
        for (int i = 0; i < res.length(); ++i) {
            JSONObject entry = res.getJSONObject(i);

            String title = entry.getString("titleNoFormatting");

            String[] words = title.split(" ");
            for (String word : words) {
                if (MIN_WORD_LENGTH > word.length()) {
                    // Too short
                    continue;
                }

                Integer type = sMediaTypes.get(word);
                if (null != type) {
                    // This word is a media type keyword, so we'll ignore it.
                    continue;
                }

                word = word.toLowerCase();
                Integer count = wordCount.get(word);
                if (null == count) {
                    wordCount.put(word, 1);
                } else {
                    wordCount.put(word, count + 1);
                }
            }
        }

        // Now that we've counted words, first filter out all words that contain
        // non-letters. Those are likely not good candidates for further searching.
        // We ignore them by putting their count to zero.
        // The tricky part here is that trailing non-letters are likely fine, we
        // just can't use them for searches.
        HashMap<String, Integer> filteredWordCount = new HashMap<String, Integer>();
        for (String word : wordCount.keySet()) {
            // Log.d(LTAG, "Word: " + word + " -> " + wordCount.get(word));

            int lastLetter = -1;
            int lastNonLetter = -1;
            for (int i = 0; i < word.length(); ++i) {
                int codePoint = word.codePointAt(i);
                if (Character.isLetter(codePoint) || Character.isDigit(codePoint)) {
                    lastLetter = i;
                    if (lastNonLetter > 0) {
                        // Due to the sequential nature of our iteration, we know that
                        // at(i) is now a letter following a non-letter, so we can
                        // safely ignore this word.
                        break;
                    }
                } else {
                    lastNonLetter = i;
                    if (-1 == lastLetter) {
                        // We have non-letters preceeding letters, that word should
                        // likely be discarded.
                        break;
                    }
                }
            }

            if (-1 == lastNonLetter) {
                // Word is pure letters, keep it.
                filteredWordCount.put(word, wordCount.get(word));
            } else if (-1 == lastLetter) {
                // Word is pure non-letters, discard it.
            } else if (lastNonLetter > lastLetter) {
                // Word has trailing non-letters, cut it.
                Integer count = wordCount.get(word);
                word = word.substring(0, lastLetter + 1);
                filteredWordCount.put(word, count);
            } else {
                // Word has non-letters in the middle.
            }
        }

        // Next filter step is optional: if we had more than one title to go
        // through, then chances are that words with only one count should be
        // ignored. If we had only one title, that's not an optimization we can
        // safely make.
        if (1 < res.length()) {
            wordCount = filteredWordCount;
            filteredWordCount = new HashMap<String, Integer>();
            for (String word : wordCount.keySet()) {
                int count = wordCount.get(word);
                if (count > 1) {
                    filteredWordCount.put(word, count);
                }
            }
        }

        // If we're left with no results, give up right here.
        if (0 == filteredWordCount.size()) {
            handler.obtainMessage(ERR_EMPTY_RESPONSE).sendToTarget();
            return;
        }

        // If we've got results, sort them.
        List<HashMap.Entry> wordList = new LinkedList<HashMap.Entry>(filteredWordCount.entrySet());
        Collections.sort(wordList, new Comparator() {
            public int compare(Object o1, Object o2) {
                return -1 * ((Comparable) ((HashMap.Entry) (o1)).getValue())
                        .compareTo(((HashMap.Entry) (o2)).getValue());
            }
        });

        // With the resulting wordList, we'll generate search terms, preferring
        // more words over fewer words, and words with a higher count over words
        // with a lower count.
        WebSearchMachine machine = new WebSearchMachine(wordList, handler);
        machine.nextTerm();

    } catch (JSONException ex) {
        handler.obtainMessage(ERR_SERIALIZATION).sendToTarget();
    }
}

From source file:org.apache.pdfbox.pdmodel.PDPageContentStream.java

/**
 * Shows the given text at the location specified by the current text matrix.
 *
 * @param text The Unicode text to show.
 * @throws IOException If an io exception occurs.
 *//*from  w ww. j  a  va2  s  .  c o  m*/
public void showText(String text) throws IOException {
    if (!inTextMode) {
        throw new IllegalStateException("Must call beginText() before showText()");
    }

    if (fontStack.isEmpty()) {
        throw new IllegalStateException("Must call setFont() before showText()");
    }

    PDFont font = fontStack.peek();

    // Unicode code points to keep when subsetting
    if (font.willBeSubset()) {
        for (int offset = 0; offset < text.length();) {
            int codePoint = text.codePointAt(offset);
            font.addToSubset(codePoint);
            offset += Character.charCount(codePoint);
        }
    }

    COSWriter.writeString(font.encode(text), output);
    write(" ");

    writeOperator("Tj");
}

From source file:org.rzo.yajsw.os.ms.win.w32.WindowsXPProcess.java

public String getCommandLineInternalWMI() {
    String result = "?";
    // if the server is overloaded we may not get an answer -> try 3 times
    for (int k = 0; k < 3 && "?".equals(result); k++)
        try {/*from  w w w . java2 s  .c  o  m*/

            WindowsXPProcess p = new WindowsXPProcess();
            new File("wmic.tmp").delete();
            p.setCommand("cmd /C wmic process where processid=" + getPid() + " get commandline > wmic.tmp");
            p.setVisible(false);
            p.start();
            p.waitFor(30000);
            BufferedReader br = new BufferedReader(new FileReader("wmic.tmp"));
            br.readLine();
            br.readLine();
            String l = br.readLine();
            if (l.codePointAt(0) == 0) {
                StringBuffer s = new StringBuffer();
                for (int i = 0; i < l.length(); i++)
                    if (l.codePointAt(i) != 0)
                        s.append(l.charAt(i));
                l = s.toString();
            }
            br.close();
            result = l;
        } catch (Exception e) {
            e.printStackTrace();
            try {
                Thread.sleep(10000);
            } catch (InterruptedException e1) {
                e1.printStackTrace();
                return result;
            }
        }
    return result;

}

From source file:org.apache.pdfbox.text.PDFTextStripper.java

/**
 * Handles the LTR and RTL direction of the given words. The whole implementation stands and falls with the given
 * word. If the word is a full line, the results will be the best. If the word contains of single words or
 * characters, the order of the characters in a word or words in a line may wrong, due to RTL and LTR marks and
 * characters!/*  w  w  w.j  a  va2  s.  c om*/
 * 
 * Based on http://www.nesterovsky-bros.com/weblog/2013/07/28/VisualToLogicalConversionInJava.aspx
 * 
 * @param word The word that shall be processed
 * @return new word with the correct direction of the containing characters
 */
private String handleDirection(String word) {
    Bidi bidi = new Bidi(word, Bidi.DIRECTION_DEFAULT_LEFT_TO_RIGHT);

    // if there is pure LTR text no need to process further
    if (!bidi.isMixed() && bidi.getBaseLevel() == Bidi.DIRECTION_LEFT_TO_RIGHT) {
        return word;
    }

    // collect individual bidi information
    int runCount = bidi.getRunCount();
    byte[] levels = new byte[runCount];
    Integer[] runs = new Integer[runCount];

    for (int i = 0; i < runCount; i++) {
        levels[i] = (byte) bidi.getRunLevel(i);
        runs[i] = i;
    }

    // reorder individual parts based on their levels
    Bidi.reorderVisually(levels, 0, runs, 0, runCount);

    // collect the parts based on the direction within the run
    StringBuilder result = new StringBuilder();

    for (int i = 0; i < runCount; i++) {
        int index = runs[i];
        int start = bidi.getRunStart(index);
        int end = bidi.getRunLimit(index);

        int level = levels[index];

        if ((level & 1) != 0) {
            while (--end >= start) {
                char character = word.charAt(end);
                if (Character.isMirrored(word.codePointAt(end))) {
                    if (MIRRORING_CHAR_MAP.containsKey(character)) {
                        result.append(MIRRORING_CHAR_MAP.get(character));
                    } else {
                        result.append(character);
                    }
                } else {
                    result.append(character);
                }
            }
        } else {
            result.append(word, start, end);
        }
    }

    return result.toString();
}

From source file:com.repeatability.pdf.PDFTextStripper.java

/**
 * Handles the LTR and RTL direction of the given words. The whole implementation stands and falls with the given
 * word. If the word is a full line, the results will be the best. If the word contains of single words or
 * characters, the order of the characters in a word or words in a line may wrong, due to RTL and LTR marks and
 * characters!//w  w  w. j  a  v  a  2s.co m
 * 
 * Based on http://www.nesterovsky-bros.com/weblog/2013/07/28/VisualToLogicalConversionInJava.aspx
 * 
 * @param word The word that shall be processed
 * @return new word with the correct direction of the containing characters
 */
// kwa
//private String handleDirection(String word)
protected String handleDirection(String word) {
    Bidi bidi = new Bidi(word, Bidi.DIRECTION_DEFAULT_LEFT_TO_RIGHT);

    // if there is pure LTR text no need to process further
    if (!bidi.isMixed() && bidi.getBaseLevel() == Bidi.DIRECTION_LEFT_TO_RIGHT) {
        return word;
    }

    // collect individual bidi information
    int runCount = bidi.getRunCount();
    byte[] levels = new byte[runCount];
    Integer[] runs = new Integer[runCount];

    for (int i = 0; i < runCount; i++) {
        levels[i] = (byte) bidi.getRunLevel(i);
        runs[i] = i;
    }

    // reorder individual parts based on their levels
    Bidi.reorderVisually(levels, 0, runs, 0, runCount);

    // collect the parts based on the direction within the run
    StringBuilder result = new StringBuilder();

    for (int i = 0; i < runCount; i++) {
        int index = runs[i];
        int start = bidi.getRunStart(index);
        int end = bidi.getRunLimit(index);

        int level = levels[index];

        if ((level & 1) != 0) {
            for (; --end >= start;) {
                char character = word.charAt(end);
                if (Character.isMirrored(word.codePointAt(end))) {
                    if (MIRRORING_CHAR_MAP.containsKey(character)) {
                        result.append(MIRRORING_CHAR_MAP.get(character));
                    } else {
                        result.append(character);
                    }
                } else {
                    result.append(character);
                }
            }
        } else {
            result.append(word, start, end);
        }
    }

    return result.toString();
}

From source file:com.flexoodb.common.FlexUtils.java

public static String removeNonASCII(String s) {
    StringBuilder out = new StringBuilder();

    int codePoint;
    int i = 0;/*from w w  w.  ja  v a  2 s.  co  m*/

    while (i < s.length()) {
        // This is the unicode code of the character.
        codePoint = s.codePointAt(i);
        if (codePoint < 128) {
            out.append(Character.toChars(codePoint));
        }
        i += Character.charCount(codePoint);
    }
    return out.toString();
}

From source file:com.flexoodb.common.FlexUtils.java

public static String removeInvalidXMLCharacters(String s) {
    StringBuilder out = new StringBuilder();

    int codePoint;
    int i = 0;/*from   www. j a v  a 2  s.  com*/

    while (i < s.length()) {
        // This is the unicode code of the character.
        codePoint = s.codePointAt(i);
        if ((codePoint == 0x9) || (codePoint == 0xA) || (codePoint == 0xD)
                || ((codePoint >= 0x20) && (codePoint <= 0xD7FF))
                || ((codePoint >= 0xE000) && (codePoint <= 0xFFFD))
                || ((codePoint >= 0x10000) && (codePoint <= 0x10FFFF))) {
            out.append(Character.toChars(codePoint));
        }
        i += Character.charCount(codePoint);
    }
    return out.toString();
}

From source file:com.crushpaper.Servlet.java

/** Appends the string the a RTF value escaping for unicode. This is a slow function. */
private void appendRtfString(StringBuilder result, String value) {
    // Inspired by http://blog.stuartlewis.com/2010/09/18/java-rtf-and-unicode-characters/
    for (int i = 0; i < value.length(); i++) {
        int codePoint = value.codePointAt(i);

        // If the character value is above the
        // 7-bit range of RTF ASCII
        if (codePoint == 10) {
            result.append("\\par\n");
        } else if (codePoint > 127) {
            result.append("\\u" + codePoint + "?");
        } else {//from  ww  w  .  ja v a 2  s .  com
            result.append(value.substring(i, i + 1));
        }
    }
}

From source file:bfile.util.StringUtils.java

/**
 * <p>Capitalizes a String changing the first character to title case as
 * per {@link Character#toTitleCase(int)}. No other characters are changed.</p>
 *
 * <p>For a word based algorithm, see {@link org.apache.commons.lang3.text.WordUtils#capitalize(String)}.
 * A {@code null} input String returns {@code null}.</p>
 *
 * <pre>/*from   ww  w. j  a  v a 2  s. co  m*/
 * StringUtils.capitalize(null)  = null
 * StringUtils.capitalize("")    = ""
 * StringUtils.capitalize("cat") = "Cat"
 * StringUtils.capitalize("cAt") = "CAt"
 * StringUtils.capitalize("'cat'") = "'cat'"
 * </pre>
 *
 * @param str the String to capitalize, may be null
 * @return the capitalized String, {@code null} if null String input
 * @see org.apache.commons.lang3.text.WordUtils#capitalize(String)
 * @see #uncapitalize(String)
 * @since 2.0
 */
public static String capitalize(final String str) {
    int strLen;
    if (str == null || (strLen = str.length()) == 0) {
        return str;
    }

    final int firstCodepoint = str.codePointAt(0);
    final int newCodePoint = Character.toTitleCase(firstCodepoint);
    if (firstCodepoint == newCodePoint) {
        // already capitalized
        return str;
    }

    int newCodePoints[] = new int[strLen]; // cannot be longer than the char array
    int outOffset = 0;
    newCodePoints[outOffset++] = newCodePoint; // copy the first codepoint
    for (int inOffset = Character.charCount(firstCodepoint); inOffset < strLen;) {
        final int codepoint = str.codePointAt(inOffset);
        newCodePoints[outOffset++] = codepoint; // copy the remaining ones
        inOffset += Character.charCount(codepoint);
    }
    return new String(newCodePoints, 0, outOffset);
}

From source file:bfile.util.StringUtils.java

/**
 * <p>Uncapitalizes a String, changing the first character to lower case as
 * per {@link Character#toLowerCase(int)}. No other characters are changed.</p>
 *
 * <p>For a word based algorithm, see {@link org.apache.commons.lang3.text.WordUtils#uncapitalize(String)}.
 * A {@code null} input String returns {@code null}.</p>
 *
 * <pre>/*  www . ja va2 s  . c  om*/
 * StringUtils.uncapitalize(null)  = null
 * StringUtils.uncapitalize("")    = ""
 * StringUtils.uncapitalize("cat") = "cat"
 * StringUtils.uncapitalize("Cat") = "cat"
 * StringUtils.uncapitalize("CAT") = "cAT"
 * </pre>
 *
 * @param str the String to uncapitalize, may be null
 * @return the uncapitalized String, {@code null} if null String input
 * @see org.apache.commons.lang3.text.WordUtils#uncapitalize(String)
 * @see #capitalize(String)
 * @since 2.0
 */
public static String uncapitalize(final String str) {
    int strLen;
    if (str == null || (strLen = str.length()) == 0) {
        return str;
    }

    final int firstCodepoint = str.codePointAt(0);
    final int newCodePoint = Character.toLowerCase(firstCodepoint);
    if (firstCodepoint == newCodePoint) {
        // already capitalized
        return str;
    }

    int newCodePoints[] = new int[strLen]; // cannot be longer than the char array
    int outOffset = 0;
    newCodePoints[outOffset++] = newCodePoint; // copy the first codepoint
    for (int inOffset = Character.charCount(firstCodepoint); inOffset < strLen;) {
        final int codepoint = str.codePointAt(inOffset);
        newCodePoints[outOffset++] = codepoint; // copy the remaining ones
        inOffset += Character.charCount(codepoint);
    }
    return new String(newCodePoints, 0, outOffset);
}