List of usage examples for org.apache.commons.io.input BOMInputStream BOMInputStream
public BOMInputStream(InputStream delegate)
From source file:de.uzk.hki.da.metadata.EadMetsMetadataStructure.java
public void replaceMetsRefsInEad(File eadFile, HashMap<String, String> eadReplacements) throws JDOMException, IOException { File targetEadFile = eadFile; SAXBuilder builder = XMLUtils.createNonvalidatingSaxBuilder(); FileInputStream fileInputStream = new FileInputStream(Path.makeFile(workPath, eadFile.getPath())); BOMInputStream bomInputStream = new BOMInputStream(fileInputStream); Reader reader = new InputStreamReader(bomInputStream, "UTF-8"); InputSource is = new InputSource(reader); is.setEncoding("UTF-8"); Document currentEadDoc = builder.build(is); String namespaceUri = eadDoc.getRootElement().getNamespace().getURI(); XPath xPath = XPath.newInstance(C.EAD_XPATH_EXPRESSION); // Case of new DDB EAD with namespace xmlns="urn:isbn:1-931666-22-9" if (!namespaceUri.equals("")) { xPath = XPath.newInstance("//isbn:daoloc/@href"); xPath.addNamespace("isbn", eadDoc.getRootElement().getNamespace().getURI()); }//w ww. j a v a2s. c o m @SuppressWarnings("rawtypes") List allNodes = xPath.selectNodes(currentEadDoc); for (Object node : allNodes) { Attribute attr = (Attribute) node; for (String replacement : eadReplacements.keySet()) { if (attr.getValue().equals(replacement)) { attr.setValue(eadReplacements.get(replacement)); } } } XMLOutputter outputter = new XMLOutputter(); outputter.setFormat(Format.getPrettyFormat()); outputter.output(currentEadDoc, new FileWriter(Path.makeFile(workPath, targetEadFile.getPath()))); fileInputStream.close(); bomInputStream.close(); reader.close(); }
From source file:com.github.anba.es6draft.util.Resources.java
/** * Load the exclusion xml-list for invalid test cases from {@link InputStream} *///w w w.j a va 2 s . c o m private static Set<String> readExcludeXML(InputStream is) throws IOException { Set<String> exclude = new HashSet<>(); Reader reader = new InputStreamReader(new BOMInputStream(is), StandardCharsets.UTF_8); NodeList ns = xml(reader).getDocumentElement().getElementsByTagName("test"); for (int i = 0, len = ns.getLength(); i < len; ++i) { exclude.add(((Element) ns.item(i)).getAttribute("id")); } return exclude; }
From source file:crawlercommons.sitemaps.SiteMapParserSAX.java
/** * Process a text-based Sitemap. Text sitemaps only list URLs but no * priorities, last mods, etc.//from w w w . ja va 2 s . c o m * * @param sitemapUrl * URL to sitemap file * @param stream * content stream * @return The site map * @throws IOException * if there is an error reading in the site map content */ protected SiteMap processText(URL sitemapUrl, InputStream stream) throws IOException { LOG.debug("Processing textual Sitemap"); SiteMap textSiteMap = new SiteMap(sitemapUrl); textSiteMap.setType(SitemapType.TEXT); BOMInputStream bomIs = new BOMInputStream(stream); @SuppressWarnings("resource") BufferedReader reader = new BufferedReader(new InputStreamReader(bomIs, UTF_8)); String line; int i = 1; while ((line = reader.readLine()) != null) { if (line.length() > 0 && i <= MAX_URLS) { addUrlIntoSitemap(line, textSiteMap, null, null, null, i++); } } textSiteMap.setProcessed(true); return textSiteMap; }
From source file:com.hpe.application.automation.tools.run.RunLoadRunnerScript.java
private void createHtmlReports(FilePath buildWorkDir, String scriptName, FilePath outputHTML, FilePath xsltOnNode) throws IOException, InterruptedException, XMLStreamException { if (!buildWorkDir.exists()) { throw new IllegalArgumentException("Build worker doesn't exist"); }//from www.j ava2 s . com if ("".equals(scriptName)) { throw new IllegalArgumentException("Script name is empty"); } if (!xsltOnNode.exists()) { throw new IllegalArgumentException("LR Html report doesn't exist on the node"); } try { TransformerFactory factory = TransformerFactory.newInstance(); StreamSource xslStream = new StreamSource(xsltOnNode.read()); Transformer transformer = factory.newTransformer(xslStream); CharsetDecoder decoder = StandardCharsets.UTF_8.newDecoder(); decoder.onMalformedInput(CodingErrorAction.REPLACE).replacement(); final InputStreamReader inputStreamReader = new InputStreamReader( new BOMInputStream(buildWorkDir.child(scriptName).child("Results.xml").read()), decoder); StreamSource in = new StreamSource(new LrScriptResultsSanitizer(inputStreamReader)); StreamResult out = new StreamResult(outputHTML.write()); transformer.transform(in, out); final URL lrHtmlCSSPath = jenkinsInstance.pluginManager.uberClassLoader.getResource(LR_SCRIPT_HTML_CSS); if (lrHtmlCSSPath == null) { throw new LrScriptParserException( "For some reason the jenkins instance is null - is it an improper set tests?"); } FilePath lrScriptHtmlReportCss = buildWorkDir.child(scriptName).child(LR_SCRIPT_HTML_REPORT_CSS); lrScriptHtmlReportCss.copyFrom(lrHtmlCSSPath); logger.println("The generated HTML file is:" + outputHTML); } catch (TransformerConfigurationException e) { logger.println("TransformerConfigurationException"); logger.println(e); } catch (TransformerException e) { logger.println("TransformerException"); logger.println(e); } catch (LrScriptParserException e) { logger.println("General exception"); logger.println(e); } }
From source file:crawlercommons.sitemaps.SiteMapParserSAX.java
/** * Decompress the gzipped content and process the resulting XML Sitemap. * /* w w w. ja v a2 s . c o m*/ * @param url * - URL of the gzipped content * @param response * - Gzipped content * @return the site map * @throws UnknownFormatException * if there is an error parsing the gzip * @throws IOException * if there is an error reading in the gzip {@link java.net.URL} */ protected AbstractSiteMap processGzippedXML(URL url, byte[] response) throws IOException, UnknownFormatException { LOG.debug("Processing gzipped XML"); InputStream is = new ByteArrayInputStream(response); // Remove .gz ending String xmlUrl = url.toString().replaceFirst("\\.gz$", ""); LOG.debug("XML url = {}", xmlUrl); BOMInputStream decompressed = new BOMInputStream(new GZIPInputStream(is)); InputSource in = new InputSource(decompressed); in.setSystemId(xmlUrl); return processXml(url, in); }
From source file:ee.ria.xroad.common.message.SaxSoapParserImpl.java
private InputStream excludeUtf8Bom(String contentType, InputStream soapStream) { return hasUtf8Charset(contentType) ? new BOMInputStream(soapStream) : soapStream; }
From source file:net.billylieurance.azuresearch.AbstractAzureSearchQuery.java
/** * * @param is An InputStream holding some XML that needs parsing * @return a parsed Document from the XML in the stream *//*from w w w . j a v a 2 s.c o m*/ public Document loadXMLFromStream(InputStream is) { DocumentBuilderFactory factory; DocumentBuilder builder; BOMInputStream bis; String dumpable = ""; try { factory = DocumentBuilderFactory.newInstance(); builder = factory.newDocumentBuilder(); bis = new BOMInputStream(is); if (_debug) { java.util.Scanner s = new java.util.Scanner(bis).useDelimiter("\\A"); dumpable = s.hasNext() ? s.next() : ""; // convert String into InputStream InputStream istwo = new java.io.ByteArrayInputStream(dumpable.getBytes()); return builder.parse(istwo); } else { return builder.parse(bis); } } catch (IOException e) { e.printStackTrace(); } catch (SAXException e) { if (e instanceof SAXParseException) { SAXParseException ex = (SAXParseException) e; System.out.println("Line: " + ex.getLineNumber()); System.out.println("Col: " + ex.getColumnNumber()); System.out.println("Data: " + dumpable); } e.printStackTrace(); } catch (ParserConfigurationException e) { e.printStackTrace(); } return null; }
From source file:net.sourceforge.subsonic.controller.CaptionsController.java
private void send(File captionsFile, HttpServletResponse response, String format) throws IOException { if (CAPTION_FORMAT_VTT.equals(format)) { Files.copy(captionsFile, response.getOutputStream()); } else {/* w w w. j a v a 2s . c om*/ BOMInputStream bomInputStream = null; Reader reader = null; try { bomInputStream = new BOMInputStream(new FileInputStream(captionsFile)); String encoding = ByteOrderMark.UTF_8.equals(bomInputStream.getBOM()) ? StringUtil.ENCODING_UTF8 : StringUtil.ENCODING_LATIN; reader = new InputStreamReader(bomInputStream, encoding); IOUtils.copy(reader, response.getOutputStream(), StringUtil.ENCODING_UTF8); } finally { IOUtils.closeQuietly(bomInputStream); IOUtils.closeQuietly(reader); } } }
From source file:net.sourceforge.subsonic.controller.CaptionsController.java
private void convertAndSend(File captionsFile, HttpServletResponse response) throws IOException { BOMInputStream bomInputStream = null; Reader reader = null;/*www. j av a 2s .c o m*/ try { bomInputStream = new BOMInputStream(new FileInputStream(captionsFile)); String encoding = ByteOrderMark.UTF_8.equals(bomInputStream.getBOM()) ? StringUtil.ENCODING_UTF8 : StringUtil.ENCODING_LATIN; reader = new InputStreamReader(bomInputStream, encoding); Writer writer = new OutputStreamWriter(response.getOutputStream(), StringUtil.ENCODING_UTF8); SrtToVtt.convert(reader, writer); } finally { IOUtils.closeQuietly(bomInputStream); IOUtils.closeQuietly(reader); } }
From source file:net.sourceforge.users.dragomerlin.vcs2icsCalendarConverter.ConvertSingleFile.java
private static BufferedReader detectEncodingAndOpenFile(File inFile) throws IOException { String encodingType = null;//from ww w.j a va2 s . c om BufferedReader input = null; BOMInputStream bomIn = null; // Detect file encoding encodingType = TestDetector.main(inFile.getAbsolutePath().toString()); // Entire file reading. FileReader always assumes default encoding is // OK! // We must check for BOM in UTF files and remove them with // org.apache.commons.io.input.BOMInputStream because // java doesn't do that automatically. See Oracle bug 4508058. if (encodingType == null) { // ASCII expected input = new BufferedReader(new InputStreamReader(new FileInputStream(inFile))); } else if (encodingType.startsWith("UTF-8")) { // UTF-8 requires an exclusive call to BOMInputStream bomIn = new BOMInputStream(new FileInputStream(inFile)); input = new BufferedReader(new InputStreamReader(bomIn, encodingType)); if (bomIn.hasBOM()) System.out.println("This file has UTF-8 BOM, removing it"); else System.out.println("This file has UTF-8 without BOM"); } else if (encodingType.startsWith("UTF-")) { // The other UTF cases except UTF-8 bomIn = new BOMInputStream(new FileInputStream(inFile), ByteOrderMark.UTF_16LE, ByteOrderMark.UTF_16BE, ByteOrderMark.UTF_32LE, ByteOrderMark.UTF_32BE); input = new BufferedReader(new InputStreamReader(bomIn, encodingType)); System.out.println("This file has " + bomIn.getBOMCharsetName() + " BOM, removing it"); } else { // Any other encoding input = new BufferedReader(new InputStreamReader(new FileInputStream(inFile), encodingType)); } return input; }