Java String Clean cleanText(String s)

Here you can find the source of cleanText(String s)

Description

clean Text

License

Open Source License

Declaration

public static String cleanText(String s) 

Method Source Code

//package com.java2s;

public class Main {
    public static final char BULLET = 8226;

    public static String cleanText(String s) {
        s = s.trim();//from w ww  .j a  v a2 s  . c  o  m
        int len = s.length();
        StringBuffer cleanValue = new StringBuffer(len);
        for (int i = 0; i < len; i++) {
            char ch = s.charAt(i);

            if (ch > 127) {
                System.out.println(
                        "WARNING: Non ASCII character " + ch + " (" + (int) ch + ") in following string\n" + s);
            }

            String convertedCh = String.valueOf(ch);

            boolean handled = true;

            //These strange character codes are what we see after copy/pasting text
            //from MS Word.
            switch (ch) {
            case 210: //Open double quote
            case 8220:
            case 211: //Close double quote
            case 8221:
                convertedCh = String.valueOf('"');
                break;

            case 212: //Open single quote
            case 8216:
            case 213: //Close single quote
            case 8217:
                convertedCh = String.valueOf('\'');
                break;

            case 8218:
                convertedCh = String.valueOf(',');
                break;

            case 8594: //Right arrow
                convertedCh = "->";
                break;

            case 65533:
            case 208: // dashes
            case 209:
            case 8211:
            case 8212:
                convertedCh = String.valueOf('-');
                break;

            case 8195: //Funny space which messes up Java parsing.
            case 160:
                convertedCh = String.valueOf(' ');
                break;

            case 165: //Bullet point
            case BULLET:
                //Convert all bullets to a single special value.
                //It will be converted later
                convertedCh = String.valueOf(BULLET);
                break;

            default:
                handled = false;
            }

            if (ch > 127 && !handled) {
                System.out.println("WARNING: Unhandled non ASCII character " + ch + "(" + (int) ch + ")");
            }

            cleanValue.append(convertedCh);
        }

        return cleanValue.toString();
    }
}

Related

  1. cleanStringForFilename(String originalString)
  2. cleanStringForFilePath(final String dirty)
  3. cleanStringForJavaName(String original)
  4. cleanStringFromWhitespaces(String text)
  5. cleanText(final String input)
  6. cleanText(String s)
  7. cleanText(String t)
  8. cleanText(String text)
  9. cleanText(String text)