Example usage for org.apache.commons.lang3 CharEncoding isSupported

List of usage examples for org.apache.commons.lang3 CharEncoding isSupported

Introduction

In this page you can find the example usage for org.apache.commons.lang3 CharEncoding isSupported.

Prototype

public static boolean isSupported(final String name) 

Source Link

Document

<p>Returns whether the named charset is supported.</p> <p>This is similar to <a href="http://download.oracle.com/javase/1.4.2/docs/api/java/nio/charset/Charset.html#isSupported%28java.lang.String%29"> java.nio.charset.Charset.isSupported(String)</a> but handles more formats</p>

Usage

From source file:com.norconex.importer.handler.tagger.AbstractCharStreamTagger.java

@Override
protected final void tagApplicableDocument(String reference, InputStream document, ImporterMetadata metadata,
        boolean parsed) throws ImporterHandlerException {

    String contentType = metadata.getString("Content-Type", "");
    contentType = StringUtils.substringBefore(contentType, ";");

    String charset = metadata.getString("Content-Encoding", null);
    if (charset == null) {
        charset = metadata.getString("charset", null);
    }/*from w  w w .j  av  a2s . co  m*/
    if (charset == null) {
        for (String type : metadata.getStrings("Content-Type")) {
            if (type.contains("charset")) {
                charset = StringUtils.trimToNull(StringUtils.substringAfter(type, "charset="));
                break;
            }
        }
    }
    if (StringUtils.isBlank(charset) || !CharEncoding.isSupported(charset)) {
        charset = CharEncoding.UTF_8;
    }
    try {
        InputStreamReader is = new InputStreamReader(document, charset);
        tagTextDocument(reference, is, metadata, parsed);
    } catch (UnsupportedEncodingException e) {
        throw new ImporterHandlerException(e);
    }
}

From source file:com.norconex.importer.parser.impl.xfdl.XFDLParser.java

@Override
public List<ImporterDocument> parseDocument(ImporterDocument doc, Writer output)
        throws DocumentParserException {
    try {//from ww  w.j a v  a 2s. c  om
        //TODO have a generic utility method for this?
        BufferedInputStream is = new BufferedInputStream(doc.getContent());
        CharsetDetector detector = new CharsetDetector();
        detector.enableInputFilter(true);
        detector.setText(is);
        CharsetMatch match = detector.detect();
        String charset = CharEncoding.UTF_8;
        if (match != null && CharEncoding.isSupported(match.getName())) {
            charset = match.getName();
        }
        BufferedReader reader = new BufferedReader(new InputStreamReader(is, charset));
        parse(reader, output, doc.getMetadata());
    } catch (IOException | ParserConfigurationException | SAXException e) {
        throw new DocumentParserException("Could not parse " + doc.getReference(), e);
    }
    return null;
}

From source file:com.norconex.commons.lang.url.URLNormalizer.java

/**
 * Create a new <code>URLNormalizer</code> instance.
 * @param url the url to normalize// w w  w  .  java  2 s.  co m
 */
public URLNormalizer(String url) {
    super();
    // make sure URL is valid
    String fixedURL = url;
    try {
        if (StringUtils.contains(fixedURL, " ")) {
            LOG.warn("URL syntax is invalid as it contains space "
                    + "character(s). Replacing them with %20. URL: " + url);
            fixedURL = StringUtils.replace(fixedURL, " ", "%20");
        }
        new URI(fixedURL);
    } catch (URISyntaxException e) {
        throw new URLException("Invalid URL syntax: " + url, e);
    }
    if (!CharEncoding.isSupported(CharEncoding.UTF_8)) {
        throw new URLException("UTF-8 is not supported by your system.");
    }
    this.url = fixedURL.trim();
}