List of usage examples for com.itextpdf.tool.xml.parser.io EncodingUtil getDeclaredEncoding
public static String getDeclaredEncoding(final String decl)
From source file:pdfreader.XMLParser.java
License:Open Source License
/** * Detects encoding from a stream.//from w w w .ja v a2 s.co m * * @param in the stream * @return a Reader with the deduced encoding. * @throws IOException if IO went wrong * @throws UnsupportedEncodingException if unsupported encoding was detected */ public InputStreamReader detectEncoding(final InputStream in) throws IOException, UnsupportedEncodingException { // we expect a '>' in the first 100 characters in.mark(1028); byte b4[] = new byte[4]; int count = in.read(b4); if (count != 4) throw new IOException("Insufficient length"); String encoding = XMLUtil.getEncodingName(b4); String decl = null; if (encoding.equals("UTF-8")) { StringBuffer sb = new StringBuffer(); int c; while ((c = in.read()) != -1) { if (c == '>') break; sb.append((char) c); } decl = sb.toString(); } else if (encoding.equals("CP037")) { ByteArrayOutputStream bi = new ByteArrayOutputStream(); int c; while ((c = in.read()) != -1) { if (c == 0x6e) // that's '>' in ebcdic break; bi.write(c); } decl = new String(bi.toByteArray(), "CP037"); } if (decl != null) { decl = EncodingUtil.getDeclaredEncoding(decl); if (decl != null) encoding = decl; } in.reset(); return new InputStreamReader(in, IanaEncodings.getJavaEncoding(encoding)); }