List of usage examples for org.apache.commons.io ByteOrderMark UTF_8
ByteOrderMark UTF_8
To view the source code for org.apache.commons.io ByteOrderMark UTF_8.
Click Source Link
From source file:com.ibm.wala.cast.ipa.callgraph.CAstCallGraphUtil.java
public static SourceFileModule makeSourceModule(URL script, String scriptName) { String hackedName = script.getFile().replaceAll("%5c", "/").replaceAll("%20", " "); File scriptFile = new File(hackedName); assert hackedName.endsWith(scriptName) : scriptName + " does not match file " + script.getFile(); return new SourceFileModule(scriptFile, scriptName, null) { @Override// w ww . j a v a 2s . co m public InputStream getInputStream() { BOMInputStream bs = new BOMInputStream(super.getInputStream(), false, ByteOrderMark.UTF_8, ByteOrderMark.UTF_16LE, ByteOrderMark.UTF_16BE, ByteOrderMark.UTF_32LE, ByteOrderMark.UTF_32BE); try { if (bs.hasBOM()) { System.err.println("removing BOM " + bs.getBOM()); } return bs; } catch (IOException e) { return super.getInputStream(); } } }; }
From source file:ee.ria.xroad.proxy.testsuite.testcases.EmptyMultipartRequest.java
@Override protected InputStream getQueryInputStream(String fileName, boolean addUtf8Bom) throws Exception { return new ByteArrayInputStream(addUtf8Bom ? ByteOrderMark.UTF_8.getBytes() : new byte[] {}); }
From source file:ca.nines.ise.dom.DOMStream.java
/** * Construct a DOMStream from an input stream and record the source of the * input data./*from w ww . jav a 2s . c o m*/ * * @param in * @param source * @throws java.io.IOException */ public DOMStream(InputStream in, String source) throws IOException { lines = new ArrayList<>(); boolean warnedSmartQuotes = false; BOMInputStream bomStream = new BOMInputStream(in, ByteOrderMark.UTF_8, ByteOrderMark.UTF_32LE, ByteOrderMark.UTF_32BE, ByteOrderMark.UTF_16LE, ByteOrderMark.UTF_16BE); bom = bomStream.getBOM(); if (bom != null) { Message m = Message.builder("builder.bom").setSource(source) .addNote("The byte order mark was " + bom.getCharsetName()).build(); Log.addMessage(m); encoding = bom.getCharsetName(); } else { encoding = "UTF-8"; } if (!encoding.equals("UTF-8")) { Message m = Message.builder("builder.notutf8").setSource(source) .addNote("The incorrect encoding is " + encoding).build(); Log.addMessage(m); } BufferedReader buffer = new BufferedReader(new InputStreamReader(bomStream, encoding)); String line; StringBuilder sb = new StringBuilder(); Pattern p = Pattern.compile("\u201C|\u201D"); while ((line = buffer.readLine()) != null) { line = Normalizer.normalize(line, Form.NFKC); Matcher m = p.matcher(line); if (m.find()) { line = m.replaceAll("\""); if (!warnedSmartQuotes) { warnedSmartQuotes = true; Message msg = Message.builder("builder.smartquotes").setSource(source) .addNote("The first occurence of smart quotes was at line " + lines.size()).build(); Log.addMessage(msg); } } lines.add(line); sb.append(line).append("\n"); } content = sb.toString().trim(); }
From source file:com.vistatec.ocelot.xliff.okapi.OkapiXLIFFFactory.java
@Override public XLIFFVersion detectXLIFFVersion(File detectVersion) throws IOException, XMLStreamException { try (BOMInputStream bomInputStream = new BOMInputStream(new FileInputStream(detectVersion), ByteOrderMark.UTF_8, ByteOrderMark.UTF_16BE, ByteOrderMark.UTF_16LE, ByteOrderMark.UTF_32BE, ByteOrderMark.UTF_32LE)) {/* ww w . j av a 2 s. c o m*/ String bom = "UTF-8"; if (bomInputStream.hasBOM()) { bom = bomInputStream.getBOMCharsetName(); } XMLInputFactory xml = XMLInputFactory.newInstance(); XMLEventReader reader = xml.createXMLEventReader(bomInputStream, bom); while (reader.hasNext()) { XMLEvent event = reader.nextEvent(); switch (event.getEventType()) { case XMLEvent.START_ELEMENT: StartElement startElement = (StartElement) event; String localPart = startElement.getName().getLocalPart(); if (localPart.equals("xliff")) { @SuppressWarnings("unchecked") Iterator<Attribute> attrs = startElement.getAttributes(); while (attrs.hasNext()) { Attribute attr = attrs.next(); if (isXliffVersionAttributeName(attr.getName())) { String value = attr.getValue(); reader.close(); if ("2.0".equals(value)) { return XLIFFVersion.XLIFF20; } else { return XLIFFVersion.XLIFF12; } } } } break; default: break; } } throw new IllegalStateException("Could not detect XLIFF version"); } }
From source file:ee.ria.xroad.proxy.testsuite.testcases.AttachmentBig.java
@Override protected Pair<String, InputStream> getRequestInput(boolean addUtf8Bom) throws Exception { PipedOutputStream os = new PipedOutputStream(); PipedInputStream is = new PipedInputStream(os); MultiPartOutputStream mpos = new MultiPartOutputStream(os); if (addUtf8Bom) { mpos.write(ByteOrderMark.UTF_8.getBytes()); }// w w w . jav a2 s . c om new Thread(new MpWriter(mpos)).start(); return Pair.of("multipart/related; charset=UTF-8; " + "boundary=" + mpos.getBoundary(), (InputStream) is); }
From source file:net.sf.jmimemagic.detectors.TextFileDetector.java
/** * DOCUMENT ME!/*from w w w . j a v a 2s.c o m*/ * * @param data DOCUMENT ME! * @param offset DOCUMENT ME! * @param length DOCUMENT ME! * @param bitmask DOCUMENT ME! * @param comparator DOCUMENT ME! * @param mimeType DOCUMENT ME! * @param params DOCUMENT ME! * * @return DOCUMENT ME! */ public String[] process(byte[] data, int offset, int length, long bitmask, char comparator, String mimeType, Map params) { log.debug("processing stream data"); Perl5Util util = new Perl5Util(); try { BOMInputStream bomIn = new BOMInputStream(new ByteArrayInputStream(data), ByteOrderMark.UTF_8, ByteOrderMark.UTF_16LE, ByteOrderMark.UTF_16BE); if (bomIn.hasBOM()) { return new String[] { "text/plain" }; } } catch (IOException e) { log.error("TextFileDetector: error detecting byte order mark"); } try { String s = new String(data, "UTF-8"); if (!util.match("/[^[:ascii:][:space:]]/", s)) { return new String[] { "text/plain" }; } } catch (UnsupportedEncodingException e) { log.error("TextFileDetector: failed to process data"); } return null; }
From source file:com.examples.with.different.packagename.idnaming.BOMInputStream.java
/** * Constructs a new BOM InputStream that excludes a {@link ByteOrderMark#UTF_8} BOM. * //from w w w .j ava 2 s . c o m * @param delegate * the InputStream to delegate to */ public BOMInputStream(InputStream delegate) { this(delegate, false, ByteOrderMark.UTF_8); }
From source file:com.examples.with.different.packagename.coverage.BOMInputStreamTest.java
public void testReadWithoutBOM() throws Exception { byte[] data = new byte[] { 'A', 'B', 'C' }; BOMInputStream in = new BOMInputStream(createDataStream(data, false)); assertEquals('A', in.read()); assertEquals('B', in.read()); assertEquals('C', in.read()); assertEquals(-1, in.read());/*from w w w .j a va2 s . c o m*/ assertFalse("hasBOM()", in.hasBOM()); assertFalse("hasBOM(UTF-8)", in.hasBOM(ByteOrderMark.UTF_8)); assertNull("getBOM", in.getBOM()); }
From source file:com.examples.with.different.packagename.idnaming.BOMInputStream.java
/** * Constructs a new BOM InputStream that detects a a {@link ByteOrderMark#UTF_8} and optionally includes it. * //from www . j a v a 2s . c o m * @param delegate * the InputStream to delegate to * @param include * true to include the UTF-8 BOM or false to exclude it */ public BOMInputStream(InputStream delegate, boolean include) { this(delegate, include, ByteOrderMark.UTF_8); }
From source file:com.examples.with.different.packagename.coverage.BOMInputStreamTest.java
public void testReadEmpty() throws Exception { byte[] data = new byte[] {}; BOMInputStream in = new BOMInputStream(createDataStream(data, false)); assertEquals(-1, in.read());//from w w w .j av a 2 s. co m assertFalse("hasBOM()", in.hasBOM()); assertFalse("hasBOM(UTF-8)", in.hasBOM(ByteOrderMark.UTF_8)); assertNull("getBOM", in.getBOM()); }