Example usage for com.google.common.base CharMatcher INVISIBLE

List of usage examples for com.google.common.base CharMatcher INVISIBLE

Introduction

In this page you can find the example usage for com.google.common.base CharMatcher INVISIBLE.

Prototype

CharMatcher INVISIBLE

To view the source code for com.google.common.base CharMatcher INVISIBLE.

Click Source Link

Document

Determines whether a character is invisible; that is, if its Unicode category is any of SPACE_SEPARATOR, LINE_SEPARATOR, PARAGRAPH_SEPARATOR, CONTROL, FORMAT, SURROGATE, and PRIVATE_USE according to ICU4J.

Usage

From source file:net.conquiris.qs.QSStrings.java

static String toVisibleString(char c) {
    if (CharMatcher.INVISIBLE.matches(c)) {
        Integer.toHexString(c);/*w  w w.j  a v a  2s  . c o m*/
    }
    return Character.toString(c);
}

From source file:io.github.valters.xsdiff.report.NodeToString.java

/** remove all possible (invisible) whitespace: including line breaks. used to see if document is actually empty and alternative printer should be used. */
public String trim(final String str) {
    return CharMatcher.invisible().trimFrom(Strings.nullToEmpty(str));
}

From source file:org.aliuge.crawler.extractor.selector.action.string.StringFilterAction.java

/**
 * //from  w ww  . ja va  2s . c  om
 */
@Override
public String doAction(String content) {
    for (CharType ct : set) {
        switch (ct) {
        case INVISIBLE:
            content = CharMatcher.INVISIBLE.removeFrom(content);

        case BREAKING_WHITESPACE:
            content = CharMatcher.BREAKING_WHITESPACE.removeFrom(content);
        case DIGIT:
            content = CharMatcher.DIGIT.removeFrom(content);
        case LETTER:
            content = CharMatcher.JAVA_LETTER.removeFrom(content);
        default:
            break;
        }
    }
    if (StringUtils.isNotBlank(filterString)) {
        content = CharMatcher.anyOf(filterString).removeFrom(content);
    }
    return content;
}

From source file:org.nuxeo.ecm.core.filter.CharacterFilteringServiceImpl.java

@Override
public void registerContribution(Object contrib, String point, ComponentInstance contributor) {
    if (FILTERING_XP.equals(point)) {

        desc = (CharacterFilteringServiceDescriptor) contrib;

        CharMatcher charsToPreserve = CharMatcher.anyOf("\r\n\t");
        CharMatcher allButPreserved = charsToPreserve.negate();
        charsToRemove = CharMatcher.JAVA_ISO_CONTROL.and(allButPreserved);
        charsToRemove = charsToRemove.or(CharMatcher.INVISIBLE.and(CharMatcher.WHITESPACE.negate()));

        List<String> additionalChars = desc.getDisallowedChars();
        String otherCharsToRemove = "";
        if (additionalChars != null && !additionalChars.isEmpty()) {
            for (String c : additionalChars) {
                otherCharsToRemove += StringEscapeUtils.unescapeJava(c);
            }/*from  www . j av  a 2 s.c o  m*/
            charsToRemove = charsToRemove.or(CharMatcher.anyOf(otherCharsToRemove));
        }
    } else {
        throw new RuntimeException("Unknown extension point: " + point);
    }
}

From source file:net.orpiske.tcs.wc.reduce.CountReducerTable.java

/**
 * Gets a mutation ...//w  w w. j  av  a2 s .  co m
 * @param name The name of the table
 * @param obj A string object
 * @return A mutation object
 */
private static Mutation getMutation(String name, String obj) {
    org.apache.cassandra.thrift.Column c = new org.apache.cassandra.thrift.Column();

    // We really, really need to filter this, otherwise we save the
    // data with lots of invisible chars in the DB
    CharMatcher legalChars = CharMatcher.INVISIBLE;
    String filtered = legalChars.removeFrom(obj);

    c.setName(ByteBufferUtil.bytes(name));
    c.setValue(ByteBufferUtil.bytes(filtered));
    c.setTimestamp(System.currentTimeMillis());

    Mutation m = new Mutation();

    m.setColumn_or_supercolumn(new ColumnOrSuperColumn());
    m.column_or_supercolumn.setColumn(c);

    return m;
}

From source file:ch.bd.qv.quiz.ejb.UploadBean.java

/**
 * purge the current question database and reloads it. This operation is 
 * transaction which means, if the new questions cannot be inserted, a rollback
 * is commited and the old data will be restored. 
 * @param bytes a csv file/*from   w w  w. ja  va  2 s  .c  o m*/
 */
@TransactionAttribute(TransactionAttributeType.REQUIRED)
public void purgeAndUpload(byte[] bytes) {
    truncateTables();
    String inputfile = new String(bytes, Charset.forName("UTF-8"));
    Iterable<String> lines = Splitter.on("\n").omitEmptyStrings().trimResults(CharMatcher.INVISIBLE)
            .split(inputfile);
    for (String line : lines) {
        LOGGER.debug("LINE: " + line);
        List<String> question = Lists.newArrayList(Splitter.on(";").split(line));
        LOGGER.debug(Joiner.on(":").join(question));

        switch (question.get(0).replaceAll("\\W", "").toUpperCase()) {

        case "FREE":
            makeFreeQuestion(question);
            break;
        case "RADIO":
            makeRadioQuestion(question);
            break;
        case "CHECK":
            makeCheckQuestion(question);
            break;
        default:
            throw new IllegalArgumentException("type: " + question.get(0) + " is not known. check csv");
        }
    }
}

From source file:edu.ehu.galan.cvalue.ProcessLinguisticFilters.java

private List<Candidate> cleanCandidates(List<Candidate> candidates) {
    List<Candidate> cand = new ArrayList<>();
    for (Candidate candidate : candidates) {
        String printable = CharMatcher.INVISIBLE.removeFrom(candidate.getText());
        int size = candidate.getText().length();
        if (printable.length() > 0) {
            if (!cand.contains(candidate)) {
                candidate.incrementFreq(1);
                cand.add(candidate);//from ww  w  .ja v a2s .  co  m
            } else {
                int idx = cand.indexOf(candidate);
                cand.get(idx).incrementFreq(1);
            }
        }
    }
    if (cand.size() > 60000) {
        logger.warn("The candidates of CValue are > 5.000 .... an empty list will be returned");
        List<Candidate> cands = new ArrayList<>();
        return cands;
    }
    return cand;
}

From source file:org.aliuge.crawler.extractor.selector.StringElementCssSelector.java

private String trimInvisibleChar(String text) {
    if (StringUtils.isNotBlank(text))
        return CharMatcher.INVISIBLE.trimFrom(text);
    return text;/*from w w w  . j av a 2  s . co m*/
}

From source file:org.aliuge.crawler.url.WebURL.java

public void setName(String name) {
    if (StringUtils.isNoneBlank(name))
        this.name = CharMatcher.INVISIBLE.trimFrom(name);
}

From source file:humanize.Humanize.java

/**
 * <p>/* w  w  w  . j a  v a  2s.  co  m*/
 * Transforms a text into a representation suitable to be used in an URL.
 * </p>
 * 
 * <table border="0" cellspacing="0" cellpadding="3" width="100%">
 * <tr>
 * <th class="colFirst">Input</th>
 * <th class="colLast">Output</th>
 * </tr>
 * <tr>
 * <td>"J'tudie le franais"</td>
 * <td>"j-etudie-le-francais"</td>
 * </tr>
 * <tr>
 * <td>"Lo siento, no hablo espaol"</td>
 * <td>"lo-siento-no-hablo-espanol"</td>
 * </tr>
 * <tr>
 * <td>"\nsome@mail.com\n"</td>
 * <td>"some-mail-com"</td>
 * </tr>
 * </table>
 * 
 * @param text
 *            The text to be slugified
 * @param params
 *            The slugify parameterization object
 * @return a slugified representation of text specified
 */
public static String slugify(final String text, final SlugifyParams params) {
    String result = unidecode(text);
    result = PUNCTUATION.matcher(result).replaceAll("-");
    result = ONLY_SLUG_CHARS.matcher(result).replaceAll("");
    result = CharMatcher.WHITESPACE.trimFrom(result);
    result = HYPEN_SPACE.matcher(result).replaceAll(params.separator);
    result = CharMatcher.INVISIBLE.removeFrom(result);
    return params.isToLowerCase ? result.toLowerCase() : result;
}