List of usage examples for com.google.common.base CharMatcher forPredicate
public static CharMatcher forPredicate(final Predicate<? super Character> predicate)
From source file:com.facebook.presto.bytecode.BytecodeUtils.java
public static String toJavaIdentifierString(String className) { // replace invalid characters with '_' return CharMatcher.forPredicate(Character::isJavaIdentifierPart).negate().replaceFrom(className, '_'); }
From source file:org.apache.mahout.knn.tools.Vectorize20NewsGroups.java
static Multiset<String> parse(File f) throws IOException { return Files.readLines(f, Charsets.UTF_8, new LineProcessor<Multiset<String>>() { private boolean readingHeaders = true; private Splitter header = Splitter.on(":").limit(2); private Splitter words = Splitter.on(CharMatcher.forPredicate(new Predicate<Character>() { @Override// ww w . j a va 2 s. c om public boolean apply(Character ch) { return !Character.isLetterOrDigit(ch) && ch != '.' && ch != '/' && ch != ':'; } })).omitEmptyStrings().trimResults(); private Pattern quotedLine = Pattern.compile("(^In article .*)|(^> .*)|(.*writes:$)|(^\\|>)"); private Multiset<String> counts = HashMultiset.create(); @Override public boolean processLine(String line) throws IOException { if (readingHeaders && line.length() == 0) { readingHeaders = false; } if (readingHeaders) { Iterator<String> i = header.split(line).iterator(); String head = i.next().toLowerCase(); if (legalHeaders.contains(head)) { addText(counts, i.next()); } } else { boolean quote = quotedLine.matcher(line).matches(); if (includeQuotes || !quote) { addText(counts, line); } } return true; } @Override public Multiset<String> getResult() { return counts; } private void addText(Multiset<String> v, String line) { for (String word : words.split(line)) { v.add(word.toLowerCase()); } } }); }