List of usage examples for org.apache.commons.io.input BOMInputStream hasBOM
public boolean hasBOM() throws IOException
From source file:com.ibm.wala.cast.ipa.callgraph.CAstCallGraphUtil.java
public static SourceFileModule makeSourceModule(URL script, String scriptName) { String hackedName = script.getFile().replaceAll("%5c", "/").replaceAll("%20", " "); File scriptFile = new File(hackedName); assert hackedName.endsWith(scriptName) : scriptName + " does not match file " + script.getFile(); return new SourceFileModule(scriptFile, scriptName, null) { @Override/*www . ja v a 2 s . c o m*/ public InputStream getInputStream() { BOMInputStream bs = new BOMInputStream(super.getInputStream(), false, ByteOrderMark.UTF_8, ByteOrderMark.UTF_16LE, ByteOrderMark.UTF_16BE, ByteOrderMark.UTF_32LE, ByteOrderMark.UTF_32BE); try { if (bs.hasBOM()) { System.err.println("removing BOM " + bs.getBOM()); } return bs; } catch (IOException e) { return super.getInputStream(); } } }; }
From source file:cn.dreampie.resource.LessSource.java
private String loadResource(Resource resource, Charset charset) throws IOException { BOMInputStream inputStream = new BOMInputStream(resource.getInputStream()); try {//from www .j a v a 2s .co m if (inputStream.hasBOM()) { logger.debug("BOM found %s", inputStream.getBOMCharsetName()); return IOUtils.toString(inputStream, inputStream.getBOMCharsetName()); } else { logger.debug("Using charset " + charset.name()); return IOUtils.toString(inputStream, charset.name()); } } finally { inputStream.close(); } }
From source file:net.sf.jmimemagic.detectors.TextFileDetector.java
/** * DOCUMENT ME!//from w w w . j a v a 2s . c o m * * @param data DOCUMENT ME! * @param offset DOCUMENT ME! * @param length DOCUMENT ME! * @param bitmask DOCUMENT ME! * @param comparator DOCUMENT ME! * @param mimeType DOCUMENT ME! * @param params DOCUMENT ME! * * @return DOCUMENT ME! */ public String[] process(byte[] data, int offset, int length, long bitmask, char comparator, String mimeType, Map params) { log.debug("processing stream data"); Perl5Util util = new Perl5Util(); try { BOMInputStream bomIn = new BOMInputStream(new ByteArrayInputStream(data), ByteOrderMark.UTF_8, ByteOrderMark.UTF_16LE, ByteOrderMark.UTF_16BE); if (bomIn.hasBOM()) { return new String[] { "text/plain" }; } } catch (IOException e) { log.error("TextFileDetector: error detecting byte order mark"); } try { String s = new String(data, "UTF-8"); if (!util.match("/[^[:ascii:][:space:]]/", s)) { return new String[] { "text/plain" }; } } catch (UnsupportedEncodingException e) { log.error("TextFileDetector: failed to process data"); } return null; }
From source file:net.sourceforge.users.dragomerlin.vcs2icsCalendarConverter.ConvertSingleFile.java
private static BufferedReader detectEncodingAndOpenFile(File inFile) throws IOException { String encodingType = null;/* www. j a va 2 s. c o m*/ BufferedReader input = null; BOMInputStream bomIn = null; // Detect file encoding encodingType = TestDetector.main(inFile.getAbsolutePath().toString()); // Entire file reading. FileReader always assumes default encoding is // OK! // We must check for BOM in UTF files and remove them with // org.apache.commons.io.input.BOMInputStream because // java doesn't do that automatically. See Oracle bug 4508058. if (encodingType == null) { // ASCII expected input = new BufferedReader(new InputStreamReader(new FileInputStream(inFile))); } else if (encodingType.startsWith("UTF-8")) { // UTF-8 requires an exclusive call to BOMInputStream bomIn = new BOMInputStream(new FileInputStream(inFile)); input = new BufferedReader(new InputStreamReader(bomIn, encodingType)); if (bomIn.hasBOM()) System.out.println("This file has UTF-8 BOM, removing it"); else System.out.println("This file has UTF-8 without BOM"); } else if (encodingType.startsWith("UTF-")) { // The other UTF cases except UTF-8 bomIn = new BOMInputStream(new FileInputStream(inFile), ByteOrderMark.UTF_16LE, ByteOrderMark.UTF_16BE, ByteOrderMark.UTF_32LE, ByteOrderMark.UTF_32BE); input = new BufferedReader(new InputStreamReader(bomIn, encodingType)); System.out.println("This file has " + bomIn.getBOMCharsetName() + " BOM, removing it"); } else { // Any other encoding input = new BufferedReader(new InputStreamReader(new FileInputStream(inFile), encodingType)); } return input; }
From source file:org.apache.any23.util.StreamUtils.java
public static Document inputStreamToDocument(InputStream is) throws MalformedByteSequenceException { DocumentBuilderFactory factory = null; DocumentBuilder builder = null; Document doc = null;//from ww w .j av a2 s . c o m try { factory = DocumentBuilderFactory.newInstance(); builder = factory.newDocumentBuilder(); } catch (ParserConfigurationException e) { logger.error("Error converting InputStream to Document: {}", e); } try { BOMInputStream bomIn = new BOMInputStream(is, ByteOrderMark.UTF_8, ByteOrderMark.UTF_16BE, ByteOrderMark.UTF_16LE, ByteOrderMark.UTF_32BE, ByteOrderMark.UTF_32LE); if (bomIn.hasBOM()) { @SuppressWarnings("unused") int firstNonBOMByte = bomIn.read(); // Skips BOM } doc = builder.parse(bomIn); } catch (SAXException | IOException e) { logger.error("Error converting InputStream to Document: {}", e); } return doc; }
From source file:org.jasig.resource.aggr.ResourcesAggregatorImpl.java
/** * Aggregate the specified Deque of elements into a single element. The provided MessageDigest is used for * building the file name based on the hash of the file contents. The callback is used for type specific * operations.// w w w .j a v a 2s .c om */ protected <T extends BasicInclude> T aggregateList(final MessageDigest digest, final Deque<T> elements, final List<File> skinDirectories, final File outputRoot, final File alternateOutput, final String extension, final AggregatorCallback<T> callback) throws IOException { if (null == elements || elements.size() == 0) { return null; } // reference to the head of the list final T headElement = elements.getFirst(); if (elements.size() == 1 && this.resourcesDao.isAbsolute(headElement)) { return headElement; } final File tempFile = File.createTempFile("working.", extension); final File aggregateOutputFile; try { //Make sure we're working with a clean MessageDigest digest.reset(); TrimmingWriter trimmingWriter = null; try { final BufferedOutputStream bufferedFileStream = new BufferedOutputStream( new FileOutputStream(tempFile)); final MessageDigestOutputStream digestStream = new MessageDigestOutputStream(bufferedFileStream, digest); final OutputStreamWriter aggregateWriter = new OutputStreamWriter(digestStream, this.encoding); trimmingWriter = new TrimmingWriter(aggregateWriter); for (final T element : elements) { final File resourceFile = this.findFile(skinDirectories, element.getValue()); FileInputStream fis = null; try { fis = new FileInputStream(resourceFile); final BOMInputStream bomIs = new BOMInputStream(new BufferedInputStream(fis)); if (bomIs.hasBOM()) { logger.debug("Stripping UTF-8 BOM from: " + resourceFile); } final Reader resourceIn = new InputStreamReader(bomIs, this.encoding); if (element.isCompressed()) { IOUtils.copy(resourceIn, trimmingWriter); } else { callback.compress(resourceIn, trimmingWriter); } } catch (IOException e) { throw new IOException( "Failed to read '" + resourceFile + "' for skin: " + skinDirectories.get(0), e); } finally { IOUtils.closeQuietly(fis); } trimmingWriter.write(SystemUtils.LINE_SEPARATOR); } } finally { IOUtils.closeQuietly(trimmingWriter); } if (trimmingWriter.getCharCount() == 0) { return null; } // temp file is created, get checksum final String checksum = Base64.encodeBase64URLSafeString(digest.digest()); digest.reset(); // create a new file name final String newFileName = checksum + extension; // Build the new file name and path if (alternateOutput == null) { final String elementRelativePath = FilenameUtils.getFullPath(headElement.getValue()); final File directoryInOutputRoot = new File(outputRoot, elementRelativePath); // create the same directory structure in the output root directoryInOutputRoot.mkdirs(); aggregateOutputFile = new File(directoryInOutputRoot, newFileName).getCanonicalFile(); } else { aggregateOutputFile = new File(alternateOutput, newFileName).getCanonicalFile(); } //Move the aggregate file into the correct location FileUtils.deleteQuietly(aggregateOutputFile); FileUtils.moveFile(tempFile, aggregateOutputFile); } finally { //Make sure the temp file gets deleted FileUtils.deleteQuietly(tempFile); } final String newResultValue = RelativePath.getRelativePath(outputRoot, aggregateOutputFile); this.logAggregation(elements, newResultValue); return callback.getAggregateElement(newResultValue, elements); }
From source file:org.languagetool.commandline.Main.java
private InputStreamReader getInputStreamReader(String filename, String encoding) throws IOException { String charsetName = encoding != null ? encoding : Charset.defaultCharset().name(); InputStream is = System.in; if (!isStdIn(filename)) { is = new FileInputStream(new File(filename)); BOMInputStream bomIn = new BOMInputStream(is, true, ByteOrderMark.UTF_8, ByteOrderMark.UTF_16BE, ByteOrderMark.UTF_16LE, ByteOrderMark.UTF_32BE, ByteOrderMark.UTF_32LE); if (bomIn.hasBOM() && encoding == null) { charsetName = bomIn.getBOMCharsetName(); }/*from w ww .j ava 2s. co m*/ is = bomIn; } return new InputStreamReader(new BufferedInputStream(is), charsetName); }
From source file:org.languagetool.gui.Main.java
private void loadFile(File file) { try (FileInputStream inputStream = new FileInputStream(file)) { BOMInputStream bomIn = new BOMInputStream(inputStream, false, ByteOrderMark.UTF_8, ByteOrderMark.UTF_16BE, ByteOrderMark.UTF_16LE, ByteOrderMark.UTF_32BE, ByteOrderMark.UTF_32LE); String charsetName;//www .j a v a 2 s . c o m if (bomIn.hasBOM()) { bom = bomIn.getBOM(); charsetName = bom.getCharsetName(); } else { // No BOM found bom = null; charsetName = null; } String fileContents = StringTools.readStream(bomIn, charsetName); textArea.setText(fileContents); currentFile = file; updateTitle(); if (recentFiles.contains(file.getAbsolutePath())) { recentFiles.remove(file.getAbsolutePath()); } recentFiles.add(file.getAbsolutePath()); localStorage.saveProperty("recentFiles", recentFiles); updateRecentFilesMenu(); } catch (IOException e) { Tools.showError(e); } }