Java tutorial
/******************************************************************************* * Copyright (c) 2009 Andrey Loskutov. * All rights reserved. This program and the accompanying materials * are made available under the terms of the Eclipse Public License v1.0 * which accompanies this distribution, and is available at * http://www.eclipse.org/legal/epl-v10.html * Contributor: Andrey Loskutov - initial API and implementation *******************************************************************************/ package de.loskutov.anyedit.util; import java.io.UnsupportedEncodingException; import java.nio.charset.Charset; import java.nio.charset.IllegalCharsetNameException; import java.nio.charset.UnsupportedCharsetException; import java.util.regex.Matcher; import java.util.regex.Pattern; import java.util.regex.PatternSyntaxException; import org.eclipse.core.resources.ResourcesPlugin; import org.eclipse.jface.preference.IPreferenceStore; import de.loskutov.anyedit.AnyEditToolsPlugin; import de.loskutov.anyedit.IAnyEditConstants; import de.loskutov.anyedit.Messages; /** * @author Andrey */ public class TextUtil { public static final String SYSTEM_CHARSET = ResourcesPlugin.getEncoding(); /** The predefined line delimiters */ private static final char[] CR = { '\r' }; private static final char[] LF = { '\n' }; private static final char[] CRLF = { '\r', '\n' }; private static final char[] EMPTY = {}; public static final String DEFAULT_CHARACTERS_REQUIRED_IN_PATH = "."; public static final String WINDOOF_DEF = " \n\"'*?><|=(){};&$,%@"; public static final String LINUX_DEF = " \n\"'*?><|=(){};&$,%@:"; // ':' is invalid in some cases too public static final String DEFAULT_CHARACTERS_DISALLOWED_IN_PATH = EclipseUtils.isWindows() ? WINDOOF_DEF : LINUX_DEF; public static final String DEFAULT_LINE_SEPARATOR_REGEX = ":|\\s+"; private static final String INVALID_PATH_ENDS_CHARACTERS = "/\\"; // private static final String VARIABLE_DELIMITERS = "${}()"; public static final boolean DEFAULT_UNICODIFY_ALL = false; public static final int DEFAULT_BASE64_LINE_LENGTH = 100; private static TextUtil instance; public boolean useRequiredInPathChars; private String charsDisallowedInPath; private String charsRequiredInPath; private String lineSeparatorRegex; private int base64LineLength; private boolean unicodifyAll; private static final Pattern WHITE_SPACE_PATTERN = Pattern.compile("(\\n|\\r| |\\\t)"); private static final Pattern UNICODE_PATTERN = Pattern.compile("\\\\u[0-9a-fA-F]{2,4}"); /** $HOME (group "one") or ( $(HOME) or ${HOME} ) (group "two") */ // XXX Java 6 doesn't support named groups!!!: (?<one>) causes crash // private static final Pattern VARIABLE_PATTERN = Pattern.compile("(~/)|\\$((?<one>\\w+)|[\\{\\(](?<two>\\w+)[\\)\\}])"); private static final Pattern VARIABLE_PATTERN = Pattern.compile("(~/)|\\$((\\w+)|[\\{\\(](\\w+)[\\)\\}])"); private TextUtil() { useRequiredInPathChars = true; charsDisallowedInPath = DEFAULT_CHARACTERS_DISALLOWED_IN_PATH; charsRequiredInPath = DEFAULT_CHARACTERS_REQUIRED_IN_PATH; base64LineLength = DEFAULT_BASE64_LINE_LENGTH; unicodifyAll = DEFAULT_UNICODIFY_ALL; } private static synchronized TextUtil getInstance() { if (instance == null) { instance = new TextUtil(); } return instance; } /** * @param string * in the "camel" notation like "beMyCamel" * @return the resulting string in usual notation like "be_my_camel" */ public static String fromCamelToUnderscore(String string) { int size = string.length(); StringBuffer sb = new StringBuffer(size); for (int i = 0; i < size; i++) { char c = string.charAt(i); if (i > 0 && i < size - 1) { char next = string.charAt(i + 1); char prev = string.charAt(i - 1); if (Character.isUpperCase(c) && Character.isJavaIdentifierPart(next) && Character.isJavaIdentifierPart(prev) && !Character.isUpperCase(next)) { sb.append('_'); c = Character.toLowerCase(c); } } sb.append(c); } return sb.toString(); } /** * @param string * in the "underscore" notation like "be_my_camel" * @return the resulting string in "camel" notation like "beMyCamel" */ public static String fromUnderscoreToCamel(String string) { int size = string.length(); StringBuffer sb = new StringBuffer(size); boolean skipChar = false; boolean toUpper = false; for (int i = 0; i < size; i++) { char c = string.charAt(i); skipChar = i > 0 && c == '_'; if (skipChar && i < size - 1 && !Character.isJavaIdentifierPart(string.charAt(i + 1))) { skipChar = false; } else if (i == size - 1) { skipChar = false; } if (!skipChar) { if (toUpper) { sb.append(Character.toUpperCase(c)); } else { if (i > 0) { if (Character.isJavaIdentifierPart(string.charAt(i - 1))) { sb.append(Character.toLowerCase(c)); } else { sb.append(c); } } else { sb.append(Character.toLowerCase(c)); } } } toUpper = skipChar; } return sb.toString(); } /** * @param string in the "camel" notation like "beMyCamel" * @param toCamel determines the direction of conversion, true-to Camel, false-to Pascal * @return the resulting string in Pascal notation like "BeMyCamel" */ public static String fromCamelCaseToPascalCaseBidirectional(String string, boolean toCamel) { int size = string.length(); StringBuffer sb = new StringBuffer(size); boolean isNewWord = true; for (int i = 0; i < size; i++) { char c = string.charAt(i); if (isNewWord == true && !Character.isWhitespace(c)) { isNewWord = false; if (Character.isLetter(c)) { if (toCamel) { if (Character.isUpperCase(c)) { c = Character.toLowerCase(c); } } else { if (Character.isLowerCase(c)) { c = Character.toUpperCase(c); } } } } else { if (Character.isWhitespace(c)) { isNewWord = true; } } sb.append(c); } return sb.toString(); } /** Check String to match real path name * @return false if this path is may be not a File/Dir path, i.e. contains * not alloved characters etc. */ public boolean isPath(String path) { if (path == null) { return false; } path = path.trim(); if (path.length() == 0 || path.length() > 400) { return false; } String disallowed = getCharsDisallowedInPath(); for (int i = 0; i < disallowed.length(); i++) { if (path.indexOf(disallowed.charAt(i)) >= 0) { return false; } } if (isUseRequiredInPathChars()) { String required = getCharsRequiredInPath(); for (int i = 0; i < required.length(); i++) { if (path.indexOf(required.charAt(i)) >= 0) { return true; } } return false; } return true; } /** * Check if given string can contain real <b>file</b> path name * @return false if this path is may be not a <b>file</b> path, i.e. contains * not alloved characters etc. */ public boolean isFilePath(String path) { if (path == null || (path = path.trim()).length() == 0) { return false; } int lastIdx = path.length() - 1; for (int i = 0; i < INVALID_PATH_ENDS_CHARACTERS.length(); i++) { if (path.charAt(lastIdx) == INVALID_PATH_ENDS_CHARACTERS.charAt(i)) { return false; } } return isPath(path); } /** * Check if given string can contain real <b>Java type</b> name * @return false if this type is may be not a <b>Java type</b> name, i.e. contains * not alloved characters etc. */ public boolean isJavaType(String type) { if (type == null || (type = type.trim()).length() == 0) { return false; } if (!Character.isJavaIdentifierStart(type.charAt(0))) { return false; } for (int i = 1; i < type.length(); i++) { if (!Character.isJavaIdentifierPart(type.charAt(i))) { return false; } } return true; } /** * @param path string to check * @return a copy of the string, with leading and trailing whitespace * and not in path allowed characters (leading and trailing) omitted. */ public String trimPath(String path) { if (path == null) { return path; // shit in, shit out } path = path.trim(); if (path.length() == 0) { return path; // shit in, shit out } if (EclipseUtils.isWindows()) { path = path.replace('/', '\\'); // make "\test.txt" to "test.txt" but do not touch "\\share\text.txt" // "\test.txt" causes problems by selecting file in "open resource" dialog if (path.charAt(0) == '\\' && path.length() > 1 && path.charAt(1) != '\\') { path = path.substring(1); } } String disallowed = getCharsDisallowedInPath().replace("$", ""); /* * trim leading characters */ for (int i = 0; i < disallowed.length(); i++) { if (path.charAt(0) == disallowed.charAt(i)) { path = path.substring(1); if (path.length() > 0) { i = -1; // start search again with new first character } else { break; } } } if (path.length() == 0) { return path; // shit in, shit out } /* * trim trailing characters */ disallowed = getCharsDisallowedInPath().replace(")", ""); disallowed = disallowed.replace("}", ""); for (int i = 0; i < disallowed.length(); i++) { if (path.charAt(path.length() - 1) == disallowed.charAt(i)) { path = path.substring(0, path.length() - 1); if (path.length() > 0) { i = -1; // start search again with new last character } else { break; } } } int length = path.length(); path = path.trim(); if (length != path.length()) { // start again!!! return trimPath(path); } return path; } public static class LineAndCaret { public String line; public int caret; public LineAndCaret(String line, int caret) { this.line = line; this.caret = caret; } } public String findPath(/* @Nonnull */ LineAndCaret position) { if (badData(position)) { return null; // shit in, shit out } position = resolveVariables(position); if (badData(position)) { return null; // shit in, shit out } String line = position.line; /** * we search for nearest to caret 'invalid' path characters in both directions */ int backwardSearchIdx = -1; String disallowed = getCharsDisallowedInPath(); for (int i = 0; i < disallowed.length(); i++) { char charAt = disallowed.charAt(i); int matchIdx = indexOf(line, charAt, position.caret, backwardSearchIdx, false); // search nearest to caret, also biggest if (matchIdx > backwardSearchIdx) { backwardSearchIdx = matchIdx; } } int forwardSearchIdx = line.length(); for (int i = 0; i < disallowed.length(); i++) { int matchIdx = indexOf(line, disallowed.charAt(i), position.caret, forwardSearchIdx, true); // search nearest to caret, also smaller if (matchIdx != -1 && matchIdx < forwardSearchIdx) { forwardSearchIdx = matchIdx; } } if (EclipseUtils.isWindows() && disallowed.indexOf(':') < 0) { int matchIdx = indexOf(line, ':', position.caret, forwardSearchIdx, true); // search nearest to caret, also smaller if (matchIdx != -1 && matchIdx < forwardSearchIdx) { forwardSearchIdx = matchIdx; } } /** * now we have (or not) both ends of new line: check for identity with line and for * needed path characters (like '.') inside */ if (forwardSearchIdx == line.length() && backwardSearchIdx == -1) { return trimPath(line); } else if (forwardSearchIdx - backwardSearchIdx > 1) { line = line.substring(backwardSearchIdx + 1, forwardSearchIdx); if (isFilePath(line)) { return trimPath(line); } } return null; } private boolean badData(LineAndCaret position) { String line = position.line; return line == null || line.length() < 2 || position.caret >= line.length() || position.caret < 0; } private static LineAndCaret resolveVariables(LineAndCaret position) { String line = position.line; Matcher matcher = VARIABLE_PATTERN.matcher(line); if (!matcher.find()) { return position; } int newCaret = position.caret; StringBuffer sb = new StringBuffer(); do { String var = matcher.group(1); if (var == null) { // XXX Java 6 doesn't support named groups!!! // String var = matcher.group("one"); var = matcher.group(3); if (var == null) { // var = matcher.group("two"); var = matcher.group(4); if (var == null) { // paranoia break; } } } int start = matcher.start(); int end = matcher.end(); // for unresolved variables just use "null". Shit in, shit out. String value = getEnv(var); matcher.appendReplacement(sb, value); if (position.caret >= start && position.caret < end) { // caret inside current variable: place it at the end of the current input newCaret = sb.length() - 1; } else if (position.caret >= end) { // caret after current variable: update it with the diff newCaret += value.length() - (end - start); } } while (matcher.find()); matcher.appendTail(sb); return new LineAndCaret(sb.toString(), newCaret); } private static String getEnv(String var) { if ("~/".equals(var)) { return System.getProperty("user.home") + "/"; } return System.getenv(var) + ""; } public String trimJavaType(String type) { if (type == null || (type = type.trim()).length() == 0) { return type; // shit in, shit out } // trick: compute virtual "caret" in the middle of string int caretIdx = type.length() / 2; return findJavaType(type, caretIdx); } /** * Search for occurencies of line references in text, like * <pre> * foo/Foo.java RegexSeparator 156 * </pre> * If the regular expression separator can't be compiled it just uses the separator as string. * @param line * @param startOffset * @return integer value guessed as line reference in text (this is not a offset in given line!!!) */ public int findLineReferenceRegex(String line, int startOffset) { try { Pattern p = Pattern.compile("(" + getLineSeparatorRegex() + ")(\\d+)"); Matcher m = p.matcher(line); if (m.find()) { int groupCount = m.groupCount(); if (groupCount == 0) { return -1; } String group = m.group(groupCount); if (group == null) { return -1; } try { return Integer.parseInt(group); } catch (Exception e) { // ignore, there was no line info? return -1; } } } catch (PatternSyntaxException e) { //since input values in preferences dialog are checked for valid patterns //exception normally can't happen. return findLineReference(line, startOffset); } return -1; } /** * Search for occurencies of line references in text, like * <pre> * foo/Foo.java:156 * </pre> * @return integer value guessed as line reference in text (this is not a offset in given line!!!) */ private int findLineReference(String line, int startOffset) { if (line == null || line.length() == 0 || startOffset >= line.length() || startOffset < 0) { return -1; // shit in, shit out } // search for first ':', if any int doppIndx = line.indexOf(getLineSeparatorRegex(), startOffset); // means > -1 and not the same occurence if (doppIndx > startOffset) { // try to find most common occurence: after first ':' int firstTry = findLineReference(line, doppIndx); // found? ok. if (firstTry >= 0) { return firstTry; } // else: we doesn't have line info after ':' or it is before! } int startChar = -1, stopChar = -1; boolean digit; for (int i = startOffset; i < line.length(); i++) { digit = Character.isDigit(line.charAt(i)); if (digit) { if (startChar < 0) { // let see on pevious character: is it letter, then // followed digit cannot be line number, but is part of // path or java name like 6 in Base64.java:125 if (i - 1 >= 0 && Character.isLetter(line.charAt(i - 1))) { continue; } startChar = i; } stopChar = i + 1; } else if (startChar >= 0) { stopChar = i; break; } } if (startChar >= 0 && stopChar > 0) { line = line.substring(startChar, stopChar); int result = Integer.parseInt(line); return result; } return -1; } public String findJavaType(String line, int caretOffset) { if (line == null || line.length() == 0 || caretOffset >= line.length() || caretOffset < 0) { return null; // shit in, shit out } /** * we search for nearest to caret 'invalid' java characters in both directions */ int forwardSearchIdx = caretOffset; for (int i = caretOffset; i < line.length(); i++) { if (Character.isJavaIdentifierPart(line.charAt(i))) { forwardSearchIdx++; } else { break; } } int backwardSearchIdx = caretOffset; for (int i = caretOffset; i >= 0; i--) { if (Character.isJavaIdentifierPart(line.charAt(i))) { backwardSearchIdx--; } else { break; } } if (backwardSearchIdx < 0) { backwardSearchIdx = 0; } // find first valid first java character for (int i = backwardSearchIdx; i < forwardSearchIdx; i++) { if (Character.isJavaIdentifierStart(line.charAt(i))) { backwardSearchIdx = i; break; } } /** * now we have (or not) both ends of new line: check for identity with line and for * needed path characters (like '.') inside */ if (forwardSearchIdx == line.length() && backwardSearchIdx == 0) { return line; } else if (forwardSearchIdx > backwardSearchIdx) { return line.substring(backwardSearchIdx, forwardSearchIdx); } return null; } public static int indexOf(String line, char c, int startOffset, int stopOffset, boolean forward) { int i = startOffset; while (forward ? i < stopOffset : i > stopOffset) { if (line.charAt(i) == c) { return i; } if (forward) { i++; } else { i--; } } return -1; } public String getCharsDisallowedInPath() { return charsDisallowedInPath; } public String getCharsRequiredInPath() { return charsRequiredInPath; } public String getLineSeparatorRegex() { return lineSeparatorRegex; } public boolean isUseRequiredInPathChars() { return useRequiredInPathChars; } public void setCharsDisallowedInPath(String string) { charsDisallowedInPath = string; } public void setCharsRequiredInPath(String string) { charsRequiredInPath = string; } public void setLineSeparatorRegex(String string) { lineSeparatorRegex = string; } public void setUseRequiredInPathChars(boolean b) { useRequiredInPathChars = b; } /* @SuppressFBWarnings("NP_NULL_PARAM_DEREF_ALL_TARGETS_DANGEROUS") */ public String base64decode(String base64, String charset) { Base64Preferences prefs = new Base64Preferences(); prefs.put(null, base64); byte[] byteArray = prefs.getByteArray(null, null); if (byteArray == null) { // not base64 encoded => return input back return base64; } try { return new String(byteArray, charset); } catch (UnsupportedEncodingException e) { return new String(byteArray); } } public String base64encode(String plainText, String charset) { Base64Preferences prefs = new Base64Preferences(); prefs.putByteArray(null, plainText.getBytes()); return prefs.get(null, null); } public String base64trim(String text, String lineDelim) { text = WHITE_SPACE_PATTERN.matcher(text).replaceAll(""); StringBuffer resultText = new StringBuffer(); for (int i = 0; i < text.length(); i += base64LineLength) { if ((i + base64LineLength) >= text.length()) { resultText.append(text.substring(i)); } else { resultText.append(text.substring(i, i + base64LineLength)); } resultText.append(lineDelim); } String string = resultText.toString(); if (string.endsWith(lineDelim)) { string = string.substring(0, string.length() - lineDelim.length()); } return string; } public String toUnicode(String input) { StringBuffer ret = new StringBuffer(); for (int i = 0; i < input.length(); i++) { char ch = input.charAt(i); if (unicodifyAll || (!Character.isWhitespace(ch) && ch < 0x20 || ch > 0x7e)) { ret.append("\\u"); // requires 1.5 VM // ret.append(String.format("%1$04x", new Object[] { Integer.valueOf(ch) })); ret.append(leading4Zeros(Integer.toHexString(ch))); } else { ret.append(ch); } } return ret.toString(); } /** * @param hexString max 4 characters length * @return same string with leading zeros */ private char[] leading4Zeros(String hexString) { char[] chars = "0000".toCharArray(); int length = hexString.length(); hexString.getChars(0, length, chars, 4 - length); return chars; } /** * * @param charset may be null. If null, no checks for the supported encoding would be * performed * @param input non null * @throws UnsupportedOperationException if given charset does not support characters * from given text */ public String fromUnicode(String charset, String input) throws UnsupportedOperationException { StringBuffer ret = new StringBuffer(); Matcher matcher = UNICODE_PATTERN.matcher(input); String error = null; while (matcher.find()) { try { String uniValue = matcher.group().substring(2); String newValue = new String(new char[] { (char) Integer.parseInt(uniValue, 16) }); if (charset != null) { error = canEncode(charset, newValue, uniValue); if (error != null) { break; } } matcher.appendReplacement(ret, quoteReplacement(newValue)); } catch (NumberFormatException t) { matcher.appendReplacement(ret, quoteReplacement(matcher.group())); } } if (error != null) { throw new UnsupportedOperationException(error); } matcher.appendTail(ret); return ret.toString(); } // TODO already exists in 1.5 JDK, but here to be compatible with 1.4 public static String quoteReplacement(String s) { if (s.indexOf('\\') == -1 && s.indexOf('$') == -1) { return s; } int length = s.length(); StringBuffer sb = new StringBuffer(length + 10); for (int i = 0; i < length; i++) { char c = s.charAt(i); if (c == '\\') { sb.append('\\').append('\\'); } else if (c == '$') { sb.append('\\').append('$'); } else { sb.append(c); } } return sb.toString(); } /** * * @param charset non null * @param text non null * @param unicodeValue * @return null if text could be encoded, error message otherwise */ public static String canEncode(String charset, CharSequence text, String unicodeValue) { Charset cs; try { cs = Charset.forName(charset); } catch (IllegalCharsetNameException e) { return "Charset name '" + charset + "' is illegal."; } catch (UnsupportedCharsetException e) { return "Charset '" + charset + "' is not supported."; } if (cs.canEncode() && cs.newEncoder().canEncode(text)) { return null; } return "Charset '" + charset + "' does not support encoding for \\u" + unicodeValue + "."; } public static boolean isValidLineSeparatorRegex(String regex) { try { Pattern.compile(regex); return true; } catch (PatternSyntaxException e) { AnyEditToolsPlugin.errorDialog(Messages.OpenLineSeparatorRegex_WarningInvalidRegex, e); return false; } } public static synchronized void updateTextUtils() { TextUtil textUtils = getInstance(); IPreferenceStore store = AnyEditToolsPlugin.getDefault().getPreferenceStore(); textUtils.setCharsDisallowedInPath(store.getString(IAnyEditConstants.CHARACTERS_DISALLOWED_IN_PATH)); textUtils.setCharsRequiredInPath(store.getString(IAnyEditConstants.CHARACTERS_REQUIRED_IN_PATH)); textUtils.setLineSeparatorRegex(store.getString(IAnyEditConstants.LINE_SEPARATOR_REGEX)); textUtils.setUseRequiredInPathChars(store.getBoolean(IAnyEditConstants.USE_REQUIRED_IN_PATH_CHARACTERS)); textUtils.base64LineLength = store.getInt(IAnyEditConstants.BASE64_LINE_LENGTH); if (textUtils.base64LineLength <= 0) { // paranoia textUtils.base64LineLength = DEFAULT_BASE64_LINE_LENGTH; } textUtils.unicodifyAll = store.getBoolean(IAnyEditConstants.UNICODIFY_ALL); } public static TextUtil getDefaultTextUtilities() { updateTextUtils(); return getInstance(); } public static boolean convertTabsToSpaces(StringBuffer line, int tabWidth, boolean removeTrailing, boolean ignoreBlankLines, boolean replaceAllTabs, boolean useModulo4Tabs) { char lastChar; boolean changed = false; if (removeTrailing) { changed = removeTrailingSpace(line, ignoreBlankLines); } int lineLength = line.length(); int spacesCount = 0; int tabsCount = 0; int lastIdx = 0; for (; lastIdx < lineLength; lastIdx++) { lastChar = line.charAt(lastIdx); if (lastChar == '\t') { changed = true; tabsCount++; } else if (lastChar == ' ') { spacesCount++; } else { break; } } if (tabsCount > 0) { spacesCount = calculateSpaces4Tabs(spacesCount, tabsCount, tabWidth, useModulo4Tabs); // delete whitespace to 'last' index, replace with spaces line.delete(0, lastIdx); line.insert(0, fillWith(spacesCount, ' ')); } if (replaceAllTabs) { if (lastIdx >= lineLength) { lastIdx = 0; } changed |= replaceAllTabs(line, lastIdx, tabWidth); } return changed; } private static int calculateSpaces4Tabs(int spacesCount, int tabsCount, int tabWidth, boolean useModulo4Tabs) { if (!useModulo4Tabs) { return spacesCount + tabsCount * tabWidth; } /* * This does work well if and only if all three conditions below are met: * 1) the same tab size was used as the one set in AnyEdit preferences * 2) spaces wasn't "cross over" mixed with tabs multiple times in a line * 3) spaces prepends tabs */ return spacesCount - (spacesCount % tabWidth) + tabsCount * tabWidth; } private static int calculateTabs4Spaces(int spacesCount, int tabWidth) { int tabs = spacesCount / tabWidth; int rest = spacesCount % tabWidth != 0 ? 1 : 0; return tabs + rest; } private static boolean replaceAllTabs(StringBuffer line, int start, int tabWidth) { String spaces = null; boolean changed = false; for (int i = start; i < line.length(); i++) { char c = line.charAt(i); if (c == '\t') { if (spaces == null) { spaces = String.valueOf(fillWith(tabWidth, ' ')); } line.replace(i, i + 1, spaces); changed = true; } } return changed; } private static boolean replaceAllSpaces(StringBuffer line, int start, int tabWidth) { boolean changed = false; int spacesCount = 0; int lastIdx = start; int firstIdx = start; for (; lastIdx < line.length(); lastIdx++) { char c = line.charAt(lastIdx); if (c == ' ') { if (spacesCount == 0) { firstIdx = lastIdx; } spacesCount++; } else if (spacesCount > 0) { int tabsCount = calculateTabs4Spaces(spacesCount, tabWidth); line.replace(firstIdx, lastIdx, String.valueOf(fillWith(tabsCount, '\t'))); changed = true; spacesCount = 0; lastIdx = firstIdx + tabsCount; } } if (spacesCount > 0) { int tabsCount = calculateTabs4Spaces(spacesCount, tabWidth); line.replace(firstIdx, lastIdx, String.valueOf(fillWith(tabsCount, '\t'))); changed = true; } return changed; } public static boolean removeTrailingSpace(StringBuffer line, boolean ignoreBlankLine) { boolean changed = false; char lastChar; int lineLength = line.length(); int lastCharsLength = getLineEnd(line).length; int lastIdx = lineLength - lastCharsLength - 1; if (ignoreBlankLine) { boolean nonWhitespaceFound = false; for (int i = lastIdx; i >= 0; i--) { lastChar = line.charAt(i); if (lastChar != ' ' && lastChar != '\t') { nonWhitespaceFound = true; break; } } if (nonWhitespaceFound == false) { return false; } } while (lastIdx >= 0) { lastChar = line.charAt(lastIdx); if (lastChar != ' ' && lastChar != '\t') { break; } lastIdx--; } if (lastIdx != lineLength - lastCharsLength - 1) { line.delete(lastIdx + 1, lineLength - lastCharsLength); changed = true; } return changed; } public static boolean convertSpacesToTabs(StringBuffer line, int tabWidth, boolean removeTrailing, boolean ignoreBlankLines, boolean replaceAllSpaces) { boolean changed = false; if (removeTrailing) { changed = removeTrailingSpace(line, ignoreBlankLines); } int lineLength = line.length(); int spacesCount = 0; int tabsCount = 0; int lastIdx = 0; char lastChar = '?'; for (; lastIdx < lineLength; lastIdx++) { lastChar = line.charAt(lastIdx); if (lastChar == ' ') { changed = true; spacesCount++; } else if (lastChar == '\t') { tabsCount++; } else { break; } } if (spacesCount > 0) { boolean isComment = lastChar == '*'; int additionalTabs = spacesCount / tabWidth; if (additionalTabs == 0 && tabsCount == 0) { if (replaceAllSpaces) { additionalTabs = 1; spacesCount = 0; } else { // XXX remove leading spaces, except for javadoc if (!isComment) { line.delete(0, lastIdx); changed = true; } return changed; } } if (additionalTabs == 0 && !replaceAllSpaces) { line.delete(0, tabsCount + spacesCount); if (tabsCount > 0) { line.insert(0, fillWith(tabsCount, '\t')); } // XXX add extra space for javadoc if (isComment) { line.insert(tabsCount, fillWith(1, ' ')); } return true; } tabsCount += additionalTabs; // modulo rest int extraSpaces = spacesCount % tabWidth; // delete whitespace to 'last' index, replace with tabs line.delete(0, lastIdx); line.insert(0, fillWith(tabsCount, '\t')); // if some last spaces exists, add them back if (extraSpaces > 0) { if (replaceAllSpaces) { line.insert(tabsCount, fillWith(1, '\t')); } else { line.insert(tabsCount, fillWith(extraSpaces, ' ')); } } } if (replaceAllSpaces) { changed |= replaceAllSpaces(line, tabsCount, tabWidth); } return changed; } private static char[] getLineEnd(StringBuffer line) { if (line == null) { return EMPTY; } int lastIdx = line.length() - 1; if (lastIdx < 0) { return EMPTY; } char last = line.charAt(lastIdx); if (last == '\n') { if (lastIdx > 0) { if (line.charAt(lastIdx - 1) == '\r') { return CRLF; // windows } } return LF; // unix } else if (last == '\r') { return CR; // mac } else { return EMPTY; } } /** * @return number of occurencies of c in s */ public static int count(String s, char c) { if (s == null) { return 0; } int count = 0; for (int i = 0; i < s.length(); i++) { if (s.charAt(i) == c) { count++; } } return count; } /** * @return char array with specified amount of given characters. */ private static char[] fillWith(int length, char c) { char[] chars = new char[length]; for (int i = 0; i < length; i++) { chars[i] = c; } return chars; } /** * Characters used for escape operations */ private static final String[][] HTML_ESCAPE_CHARS = { { "<", "<" }, { ">", ">" }, { "&", "&" }, { """, "\"" }, { "à", "\u00e0" }, { "À", "\u00c0" }, { "â", "\u00e2" }, { "ä", "\u00e4" }, { "Ä", "\u00c4" }, { "Â", "\u00c2" }, { "å", "\u00e5" }, { "Å", "\u00c5" }, { "æ", "\u00e6" }, { "Æ", "\u00c6" }, { "ç", "\u00e7" }, { "Ç", "\u00c7" }, { "é", "\u00e9" }, { "É", "\u00c9" }, { "á", "\u00e1" }, { "Á", "\u00c1" }, { "è", "\u00e8" }, { "È", "\u00c8" }, { "ê", "\u00ea" }, { "Ê", "\u00ca" }, { "ë", "\u00eb" }, { "Ë", "\u00cb" }, { "ï", "\u00ef" }, { "Ï", "\u00cf" }, { "í", "\u00ed" }, { "Í", "\u00cd" }, { "ã", "\u00e3" }, { "Ã", "\u00c3" }, { "õ", "\u00f5" }, { "Õ", "\u00d5" }, { "ó", "\u00f3" }, { "Ó", "\u00d3" }, { "ô", "\u00f4" }, { "Ô", "\u00d4" }, { "ö", "\u00f6" }, { "Ö", "\u00d6" }, { "ø", "\u00f8" }, { "Ø", "\u00d8" }, { "ß", "\u00df" }, { "ù", "\u00f9" }, { "Ù", "\u00d9" }, { "ú", "\u00fa" }, { "Ú", "\u00da" }, { "û", "\u00fb" }, { "Û", "\u00db" }, { "ü", "\u00fc" }, { "Ü", "\u00dc" }, { " ", " " }, { "®", "\u00AE" }, { "©", "\u00A9" }, { "€", "\u20A0" }, { "€", "\u20AC" } }; /** * Get html entity for escape character * @return null, if no entity found for given character */ public static String getEntityForChar(char ch) { switch (ch) { case '<': return "<"; case '>': return ">"; case '&': return "&"; case '"': return """; case '\u00e0': return "à"; case '\u00e1': return "á"; case '\u00c0': return "À"; case '\u00c1': return "Á"; case '\u00e2': return "â"; case '\u00c2': return "Â"; case '\u00e4': return "ä"; case '\u00c4': return "Ä"; case '\u00e5': return "å"; case '\u00c5': return "Å"; case '\u00e3': return "ã"; case '\u00c3': return "Ã"; case '\u00e6': return "æ"; case '\u00c6': return "Æ"; case '\u00e7': return "ç"; case '\u00c7': return "Ç"; case '\u00e9': return "é"; case '\u00c9': return "É"; case '\u00e8': return "è"; case '\u00c8': return "È"; case '\u00ea': return "ê"; case '\u00ca': return "Ê"; case '\u00eb': return "ë"; case '\u00cb': return "Ë"; case '\u00ed': return "í"; case '\u00cd': return "Í"; case '\u00ef': return "ï"; case '\u00cf': return "Ï"; case '\u00f5': return "õ"; case '\u00d5': return "Õ"; case '\u00f3': return "ó"; case '\u00f4': return "ô"; case '\u00d3': return "Ó"; case '\u00d4': return "Ô"; case '\u00f6': return "ö"; case '\u00d6': return "Ö"; case '\u00f8': return "ø"; case '\u00d8': return "Ø"; case '\u00df': return "ß"; case '\u00f9': return "ù"; case '\u00d9': return "Ù"; case '\u00fa': return "ú"; case '\u00da': return "Ú"; case '\u00fb': return "û"; case '\u00db': return "Û"; case '\u00fc': return "ü"; case '\u00dc': return "Ü"; case '\u00AE': return "®"; case '\u00A9': return "©"; case '\u20A0': return "€"; case '\u20AC': return "€"; // case '' : return "€"; // case '\u20AC': return "€"; // euro // be carefull with this one (non-breaking white space) //case ' ' : return " "; default: { //Submitted by S. Bayer. int ci = 0xffff & ch; if (ci < 160) { // nothing special only 7 Bit return null; } // Not 7 Bit use the unicode system return "&#" + ci + ";"; } } } /** * change escape characters to html entities (from http://www.rgagnon.com/howto.html) * @param s string to be modified * @return string with escape characters, changed to html entities */ public static String escapeText(String s) { if (s == null) { // shit in, shit out return null; } StringBuffer sb = new StringBuffer(); int n = s.length(); char c; String entity; for (int i = 0; i < n; i++) { c = s.charAt(i); entity = getEntityForChar(c); if (entity != null) { sb.append(entity); } else { sb.append(c); } } return sb.toString(); } /** * change html entities to escape characters (from http://www.rgagnon.com/howto.html) * @param s string to unescape * @return new string with html entities changed to escape characters */ public static String unescapeText(String s) { int i, j, k; if (s != null && (i = s.indexOf('&')) > -1) { j = s.indexOf(';', i); if (j > i) { String temp = s.substring(i, j + 1); // search in escape[][] if temp is there k = 0; int arraySize = HTML_ESCAPE_CHARS.length; while (k < arraySize) { if (HTML_ESCAPE_CHARS[k][0].equals(temp)) { break; } k++; } // now we found html escape character if (k < arraySize) { // replace it to ASCII s = new StringBuffer(s.substring(0, i)).append(HTML_ESCAPE_CHARS[k][1]) .append(s.substring(j + 1)).toString(); return unescapeText(s); // recursive call } else if (k == arraySize) { s = new StringBuffer(s.substring(0, i)).append('&').append(unescapeText(s.substring(i + 1))) .toString(); return s; } } } return s; } /** * get index of first non-whitespace letter (one of " \t\r\n") * @return -1 if no such (non-whitespace) character found from given * startOffset (inclusive) */ private static int indexOfNextWord(String line, int startOffset, int lastIdx) { int size = line.length(); char c; boolean continueSequence = lastIdx + 1 == startOffset; for (int i = startOffset; i < size; i++) { c = line.charAt(i); if (Character.isWhitespace(c)) { continueSequence = false; continue; } else if (continueSequence) { continue; } return i; } return -1; } public static String capitalize(String line) { StringBuffer sb = new StringBuffer(line); int size = line.length(); boolean changed = false; char c; int lastWordIdx = 0; for (int i = 0; i < size; i++) { i = indexOfNextWord(line, i, lastWordIdx); if (i < 0) { break; } c = line.charAt(i); if (Character.isLowerCase(c)) { c = Character.toUpperCase(c); sb.setCharAt(i, c); changed = true; } lastWordIdx = i; } if (changed) { return new String(sb); } return line; } public static String invertCase(String line) { char[] chars = line.toCharArray(); char c; boolean changed = false; for (int i = 0; i < chars.length; i++) { c = chars[i]; // XXX DOESN'T WORK WITH UNICODE SPECIAL CHARS!!!! if (Character.isLowerCase(c)) { chars[i] = Character.toUpperCase(c); changed = true; } else if (Character.isUpperCase(c)) { chars[i] = Character.toLowerCase(c); changed = true; } } if (changed) { return String.valueOf(chars); } return line; } }