List of usage examples for com.google.common.base CharMatcher INVISIBLE
CharMatcher INVISIBLE
To view the source code for com.google.common.base CharMatcher INVISIBLE.
Click Source Link
From source file:net.conquiris.qs.QSStrings.java
static String toVisibleString(char c) { if (CharMatcher.INVISIBLE.matches(c)) { Integer.toHexString(c);/*w w w.j a v a 2s . c o m*/ } return Character.toString(c); }
From source file:io.github.valters.xsdiff.report.NodeToString.java
/** remove all possible (invisible) whitespace: including line breaks. used to see if document is actually empty and alternative printer should be used. */ public String trim(final String str) { return CharMatcher.invisible().trimFrom(Strings.nullToEmpty(str)); }
From source file:org.aliuge.crawler.extractor.selector.action.string.StringFilterAction.java
/** * //from w ww . ja va 2s . c om */ @Override public String doAction(String content) { for (CharType ct : set) { switch (ct) { case INVISIBLE: content = CharMatcher.INVISIBLE.removeFrom(content); case BREAKING_WHITESPACE: content = CharMatcher.BREAKING_WHITESPACE.removeFrom(content); case DIGIT: content = CharMatcher.DIGIT.removeFrom(content); case LETTER: content = CharMatcher.JAVA_LETTER.removeFrom(content); default: break; } } if (StringUtils.isNotBlank(filterString)) { content = CharMatcher.anyOf(filterString).removeFrom(content); } return content; }
From source file:org.nuxeo.ecm.core.filter.CharacterFilteringServiceImpl.java
@Override public void registerContribution(Object contrib, String point, ComponentInstance contributor) { if (FILTERING_XP.equals(point)) { desc = (CharacterFilteringServiceDescriptor) contrib; CharMatcher charsToPreserve = CharMatcher.anyOf("\r\n\t"); CharMatcher allButPreserved = charsToPreserve.negate(); charsToRemove = CharMatcher.JAVA_ISO_CONTROL.and(allButPreserved); charsToRemove = charsToRemove.or(CharMatcher.INVISIBLE.and(CharMatcher.WHITESPACE.negate())); List<String> additionalChars = desc.getDisallowedChars(); String otherCharsToRemove = ""; if (additionalChars != null && !additionalChars.isEmpty()) { for (String c : additionalChars) { otherCharsToRemove += StringEscapeUtils.unescapeJava(c); }/*from www . j av a 2 s.c o m*/ charsToRemove = charsToRemove.or(CharMatcher.anyOf(otherCharsToRemove)); } } else { throw new RuntimeException("Unknown extension point: " + point); } }
From source file:net.orpiske.tcs.wc.reduce.CountReducerTable.java
/** * Gets a mutation ...//w w w. j av a2 s . co m * @param name The name of the table * @param obj A string object * @return A mutation object */ private static Mutation getMutation(String name, String obj) { org.apache.cassandra.thrift.Column c = new org.apache.cassandra.thrift.Column(); // We really, really need to filter this, otherwise we save the // data with lots of invisible chars in the DB CharMatcher legalChars = CharMatcher.INVISIBLE; String filtered = legalChars.removeFrom(obj); c.setName(ByteBufferUtil.bytes(name)); c.setValue(ByteBufferUtil.bytes(filtered)); c.setTimestamp(System.currentTimeMillis()); Mutation m = new Mutation(); m.setColumn_or_supercolumn(new ColumnOrSuperColumn()); m.column_or_supercolumn.setColumn(c); return m; }
From source file:ch.bd.qv.quiz.ejb.UploadBean.java
/** * purge the current question database and reloads it. This operation is * transaction which means, if the new questions cannot be inserted, a rollback * is commited and the old data will be restored. * @param bytes a csv file/*from w w w. ja va 2 s .c o m*/ */ @TransactionAttribute(TransactionAttributeType.REQUIRED) public void purgeAndUpload(byte[] bytes) { truncateTables(); String inputfile = new String(bytes, Charset.forName("UTF-8")); Iterable<String> lines = Splitter.on("\n").omitEmptyStrings().trimResults(CharMatcher.INVISIBLE) .split(inputfile); for (String line : lines) { LOGGER.debug("LINE: " + line); List<String> question = Lists.newArrayList(Splitter.on(";").split(line)); LOGGER.debug(Joiner.on(":").join(question)); switch (question.get(0).replaceAll("\\W", "").toUpperCase()) { case "FREE": makeFreeQuestion(question); break; case "RADIO": makeRadioQuestion(question); break; case "CHECK": makeCheckQuestion(question); break; default: throw new IllegalArgumentException("type: " + question.get(0) + " is not known. check csv"); } } }
From source file:edu.ehu.galan.cvalue.ProcessLinguisticFilters.java
private List<Candidate> cleanCandidates(List<Candidate> candidates) { List<Candidate> cand = new ArrayList<>(); for (Candidate candidate : candidates) { String printable = CharMatcher.INVISIBLE.removeFrom(candidate.getText()); int size = candidate.getText().length(); if (printable.length() > 0) { if (!cand.contains(candidate)) { candidate.incrementFreq(1); cand.add(candidate);//from ww w .ja v a2s . co m } else { int idx = cand.indexOf(candidate); cand.get(idx).incrementFreq(1); } } } if (cand.size() > 60000) { logger.warn("The candidates of CValue are > 5.000 .... an empty list will be returned"); List<Candidate> cands = new ArrayList<>(); return cands; } return cand; }
From source file:org.aliuge.crawler.extractor.selector.StringElementCssSelector.java
private String trimInvisibleChar(String text) { if (StringUtils.isNotBlank(text)) return CharMatcher.INVISIBLE.trimFrom(text); return text;/*from w w w . j av a 2 s . co m*/ }
From source file:org.aliuge.crawler.url.WebURL.java
public void setName(String name) { if (StringUtils.isNoneBlank(name)) this.name = CharMatcher.INVISIBLE.trimFrom(name); }
From source file:humanize.Humanize.java
/** * <p>/* w w w . j a v a 2s. co m*/ * Transforms a text into a representation suitable to be used in an URL. * </p> * * <table border="0" cellspacing="0" cellpadding="3" width="100%"> * <tr> * <th class="colFirst">Input</th> * <th class="colLast">Output</th> * </tr> * <tr> * <td>"J'tudie le franais"</td> * <td>"j-etudie-le-francais"</td> * </tr> * <tr> * <td>"Lo siento, no hablo espaol"</td> * <td>"lo-siento-no-hablo-espanol"</td> * </tr> * <tr> * <td>"\nsome@mail.com\n"</td> * <td>"some-mail-com"</td> * </tr> * </table> * * @param text * The text to be slugified * @param params * The slugify parameterization object * @return a slugified representation of text specified */ public static String slugify(final String text, final SlugifyParams params) { String result = unidecode(text); result = PUNCTUATION.matcher(result).replaceAll("-"); result = ONLY_SLUG_CHARS.matcher(result).replaceAll(""); result = CharMatcher.WHITESPACE.trimFrom(result); result = HYPEN_SPACE.matcher(result).replaceAll(params.separator); result = CharMatcher.INVISIBLE.removeFrom(result); return params.isToLowerCase ? result.toLowerCase() : result; }