Example usage for org.apache.commons.io ByteOrderMark UTF_8

List of usage examples for org.apache.commons.io ByteOrderMark UTF_8

Introduction

In this page you can find the example usage for org.apache.commons.io ByteOrderMark UTF_8.

Prototype

ByteOrderMark UTF_8

To view the source code for org.apache.commons.io ByteOrderMark UTF_8.

Click Source Link

Document

UTF-8 BOM

Usage

From source file:com.ibm.wala.cast.ipa.callgraph.CAstCallGraphUtil.java

public static SourceFileModule makeSourceModule(URL script, String scriptName) {
    String hackedName = script.getFile().replaceAll("%5c", "/").replaceAll("%20", " ");

    File scriptFile = new File(hackedName);

    assert hackedName.endsWith(scriptName) : scriptName + " does not match file " + script.getFile();

    return new SourceFileModule(scriptFile, scriptName, null) {
        @Override// w ww .  j  a v a 2s .  co m
        public InputStream getInputStream() {
            BOMInputStream bs = new BOMInputStream(super.getInputStream(), false, ByteOrderMark.UTF_8,
                    ByteOrderMark.UTF_16LE, ByteOrderMark.UTF_16BE, ByteOrderMark.UTF_32LE,
                    ByteOrderMark.UTF_32BE);
            try {
                if (bs.hasBOM()) {
                    System.err.println("removing BOM " + bs.getBOM());
                }
                return bs;
            } catch (IOException e) {
                return super.getInputStream();
            }
        }
    };
}

From source file:ee.ria.xroad.proxy.testsuite.testcases.EmptyMultipartRequest.java

@Override
protected InputStream getQueryInputStream(String fileName, boolean addUtf8Bom) throws Exception {
    return new ByteArrayInputStream(addUtf8Bom ? ByteOrderMark.UTF_8.getBytes() : new byte[] {});
}

From source file:ca.nines.ise.dom.DOMStream.java

/**
 * Construct a DOMStream from an input stream and record the source of the
 * input data./*from w ww  . jav a 2s .  c  o  m*/
 *
 * @param in
 * @param source
 * @throws java.io.IOException
 */
public DOMStream(InputStream in, String source) throws IOException {
    lines = new ArrayList<>();
    boolean warnedSmartQuotes = false;

    BOMInputStream bomStream = new BOMInputStream(in, ByteOrderMark.UTF_8, ByteOrderMark.UTF_32LE,
            ByteOrderMark.UTF_32BE, ByteOrderMark.UTF_16LE, ByteOrderMark.UTF_16BE);
    bom = bomStream.getBOM();
    if (bom != null) {
        Message m = Message.builder("builder.bom").setSource(source)
                .addNote("The byte order mark was " + bom.getCharsetName()).build();
        Log.addMessage(m);
        encoding = bom.getCharsetName();
    } else {
        encoding = "UTF-8";
    }

    if (!encoding.equals("UTF-8")) {
        Message m = Message.builder("builder.notutf8").setSource(source)
                .addNote("The incorrect encoding is " + encoding).build();
        Log.addMessage(m);
    }

    BufferedReader buffer = new BufferedReader(new InputStreamReader(bomStream, encoding));
    String line;
    StringBuilder sb = new StringBuilder();

    Pattern p = Pattern.compile("\u201C|\u201D");

    while ((line = buffer.readLine()) != null) {
        line = Normalizer.normalize(line, Form.NFKC);
        Matcher m = p.matcher(line);
        if (m.find()) {
            line = m.replaceAll("\"");
            if (!warnedSmartQuotes) {
                warnedSmartQuotes = true;
                Message msg = Message.builder("builder.smartquotes").setSource(source)
                        .addNote("The first occurence of smart quotes was at line " + lines.size()).build();
                Log.addMessage(msg);
            }
        }
        lines.add(line);
        sb.append(line).append("\n");
    }

    content = sb.toString().trim();
}

From source file:com.vistatec.ocelot.xliff.okapi.OkapiXLIFFFactory.java

@Override
public XLIFFVersion detectXLIFFVersion(File detectVersion) throws IOException, XMLStreamException {
    try (BOMInputStream bomInputStream = new BOMInputStream(new FileInputStream(detectVersion),
            ByteOrderMark.UTF_8, ByteOrderMark.UTF_16BE, ByteOrderMark.UTF_16LE, ByteOrderMark.UTF_32BE,
            ByteOrderMark.UTF_32LE)) {/*  ww w  .  j av  a  2  s.  c o  m*/
        String bom = "UTF-8";
        if (bomInputStream.hasBOM()) {
            bom = bomInputStream.getBOMCharsetName();
        }

        XMLInputFactory xml = XMLInputFactory.newInstance();
        XMLEventReader reader = xml.createXMLEventReader(bomInputStream, bom);
        while (reader.hasNext()) {
            XMLEvent event = reader.nextEvent();
            switch (event.getEventType()) {
            case XMLEvent.START_ELEMENT:
                StartElement startElement = (StartElement) event;
                String localPart = startElement.getName().getLocalPart();
                if (localPart.equals("xliff")) {
                    @SuppressWarnings("unchecked")
                    Iterator<Attribute> attrs = startElement.getAttributes();
                    while (attrs.hasNext()) {
                        Attribute attr = attrs.next();
                        if (isXliffVersionAttributeName(attr.getName())) {
                            String value = attr.getValue();
                            reader.close();
                            if ("2.0".equals(value)) {
                                return XLIFFVersion.XLIFF20;
                            } else {
                                return XLIFFVersion.XLIFF12;
                            }
                        }
                    }
                }
                break;

            default:
                break;
            }
        }
        throw new IllegalStateException("Could not detect XLIFF version");
    }
}

From source file:ee.ria.xroad.proxy.testsuite.testcases.AttachmentBig.java

@Override
protected Pair<String, InputStream> getRequestInput(boolean addUtf8Bom) throws Exception {
    PipedOutputStream os = new PipedOutputStream();
    PipedInputStream is = new PipedInputStream(os);
    MultiPartOutputStream mpos = new MultiPartOutputStream(os);

    if (addUtf8Bom) {
        mpos.write(ByteOrderMark.UTF_8.getBytes());
    }// w  w w . jav a2 s . c  om

    new Thread(new MpWriter(mpos)).start();

    return Pair.of("multipart/related; charset=UTF-8; " + "boundary=" + mpos.getBoundary(), (InputStream) is);
}

From source file:net.sf.jmimemagic.detectors.TextFileDetector.java

/**
 * DOCUMENT ME!/*from w  w w  .  j  a v  a 2s.c  o m*/
 *
 * @param data DOCUMENT ME!
 * @param offset DOCUMENT ME!
 * @param length DOCUMENT ME!
 * @param bitmask DOCUMENT ME!
 * @param comparator DOCUMENT ME!
 * @param mimeType DOCUMENT ME!
 * @param params DOCUMENT ME!
 *
 * @return DOCUMENT ME!
 */
public String[] process(byte[] data, int offset, int length, long bitmask, char comparator, String mimeType,
        Map params) {
    log.debug("processing stream data");

    Perl5Util util = new Perl5Util();

    try {
        BOMInputStream bomIn = new BOMInputStream(new ByteArrayInputStream(data), ByteOrderMark.UTF_8,
                ByteOrderMark.UTF_16LE, ByteOrderMark.UTF_16BE);
        if (bomIn.hasBOM()) {
            return new String[] { "text/plain" };
        }
    } catch (IOException e) {
        log.error("TextFileDetector: error detecting byte order mark");
    }

    try {
        String s = new String(data, "UTF-8");

        if (!util.match("/[^[:ascii:][:space:]]/", s)) {
            return new String[] { "text/plain" };
        }
    } catch (UnsupportedEncodingException e) {
        log.error("TextFileDetector: failed to process data");
    }

    return null;
}

From source file:com.examples.with.different.packagename.idnaming.BOMInputStream.java

/**
 * Constructs a new BOM InputStream that excludes a {@link ByteOrderMark#UTF_8} BOM.
 * //from  w  w w .j ava  2  s  .  c  o  m
 * @param delegate
 *            the InputStream to delegate to
 */
public BOMInputStream(InputStream delegate) {
    this(delegate, false, ByteOrderMark.UTF_8);
}

From source file:com.examples.with.different.packagename.coverage.BOMInputStreamTest.java

public void testReadWithoutBOM() throws Exception {
    byte[] data = new byte[] { 'A', 'B', 'C' };
    BOMInputStream in = new BOMInputStream(createDataStream(data, false));
    assertEquals('A', in.read());
    assertEquals('B', in.read());
    assertEquals('C', in.read());
    assertEquals(-1, in.read());/*from  w  w  w  .j  a va2 s . c  o m*/
    assertFalse("hasBOM()", in.hasBOM());
    assertFalse("hasBOM(UTF-8)", in.hasBOM(ByteOrderMark.UTF_8));
    assertNull("getBOM", in.getBOM());
}

From source file:com.examples.with.different.packagename.idnaming.BOMInputStream.java

/**
 * Constructs a new BOM InputStream that detects a a {@link ByteOrderMark#UTF_8} and optionally includes it.
 * //from www  .  j a v  a 2s . c o m
 * @param delegate
 *            the InputStream to delegate to
 * @param include
 *            true to include the UTF-8 BOM or false to exclude it
 */
public BOMInputStream(InputStream delegate, boolean include) {
    this(delegate, include, ByteOrderMark.UTF_8);
}

From source file:com.examples.with.different.packagename.coverage.BOMInputStreamTest.java

public void testReadEmpty() throws Exception {
    byte[] data = new byte[] {};
    BOMInputStream in = new BOMInputStream(createDataStream(data, false));
    assertEquals(-1, in.read());//from  w w  w  .j av  a  2  s.  co  m
    assertFalse("hasBOM()", in.hasBOM());
    assertFalse("hasBOM(UTF-8)", in.hasBOM(ByteOrderMark.UTF_8));
    assertNull("getBOM", in.getBOM());
}