Example usage for org.apache.commons.io.input BOMInputStream BOMInputStream

Introduction

In this page you can find the example usage for org.apache.commons.io.input BOMInputStream BOMInputStream.

Prototype

public BOMInputStream(InputStream delegate)

Source Link

Document

Constructs a new BOM InputStream that excludes a ByteOrderMark#UTF_8 BOM.

Usage

From source file:de.uzk.hki.da.metadata.EadMetsMetadataStructure.java

public void replaceMetsRefsInEad(File eadFile, HashMap<String, String> eadReplacements)
        throws JDOMException, IOException {

    File targetEadFile = eadFile;

    SAXBuilder builder = XMLUtils.createNonvalidatingSaxBuilder();
    FileInputStream fileInputStream = new FileInputStream(Path.makeFile(workPath, eadFile.getPath()));
    BOMInputStream bomInputStream = new BOMInputStream(fileInputStream);
    Reader reader = new InputStreamReader(bomInputStream, "UTF-8");
    InputSource is = new InputSource(reader);
    is.setEncoding("UTF-8");
    Document currentEadDoc = builder.build(is);

    String namespaceUri = eadDoc.getRootElement().getNamespace().getURI();
    XPath xPath = XPath.newInstance(C.EAD_XPATH_EXPRESSION);

    //      Case of new DDB EAD with namespace xmlns="urn:isbn:1-931666-22-9"
    if (!namespaceUri.equals("")) {
        xPath = XPath.newInstance("//isbn:daoloc/@href");
        xPath.addNamespace("isbn", eadDoc.getRootElement().getNamespace().getURI());
    }//w  ww. j a v  a2s.  c o  m

    @SuppressWarnings("rawtypes")
    List allNodes = xPath.selectNodes(currentEadDoc);

    for (Object node : allNodes) {
        Attribute attr = (Attribute) node;
        for (String replacement : eadReplacements.keySet()) {
            if (attr.getValue().equals(replacement)) {
                attr.setValue(eadReplacements.get(replacement));
            }
        }
    }

    XMLOutputter outputter = new XMLOutputter();
    outputter.setFormat(Format.getPrettyFormat());
    outputter.output(currentEadDoc, new FileWriter(Path.makeFile(workPath, targetEadFile.getPath())));
    fileInputStream.close();
    bomInputStream.close();
    reader.close();
}

From source file:com.github.anba.es6draft.util.Resources.java

/**
 * Load the exclusion xml-list for invalid test cases from {@link InputStream}
 *///w w w.j  a  va 2  s  . c o m
private static Set<String> readExcludeXML(InputStream is) throws IOException {
    Set<String> exclude = new HashSet<>();
    Reader reader = new InputStreamReader(new BOMInputStream(is), StandardCharsets.UTF_8);
    NodeList ns = xml(reader).getDocumentElement().getElementsByTagName("test");
    for (int i = 0, len = ns.getLength(); i < len; ++i) {
        exclude.add(((Element) ns.item(i)).getAttribute("id"));
    }
    return exclude;
}

From source file:crawlercommons.sitemaps.SiteMapParserSAX.java

/**
 * Process a text-based Sitemap. Text sitemaps only list URLs but no
 * priorities, last mods, etc.//from  w w w . ja va 2  s  . c o  m
 *
 * @param sitemapUrl
 *            URL to sitemap file
 * @param stream
 *            content stream
 * @return The site map
 * @throws IOException
 *             if there is an error reading in the site map content
 */
protected SiteMap processText(URL sitemapUrl, InputStream stream) throws IOException {
    LOG.debug("Processing textual Sitemap");

    SiteMap textSiteMap = new SiteMap(sitemapUrl);
    textSiteMap.setType(SitemapType.TEXT);

    BOMInputStream bomIs = new BOMInputStream(stream);
    @SuppressWarnings("resource")
    BufferedReader reader = new BufferedReader(new InputStreamReader(bomIs, UTF_8));

    String line;
    int i = 1;
    while ((line = reader.readLine()) != null) {
        if (line.length() > 0 && i <= MAX_URLS) {
            addUrlIntoSitemap(line, textSiteMap, null, null, null, i++);
        }
    }
    textSiteMap.setProcessed(true);

    return textSiteMap;
}

From source file:com.hpe.application.automation.tools.run.RunLoadRunnerScript.java

private void createHtmlReports(FilePath buildWorkDir, String scriptName, FilePath outputHTML,
        FilePath xsltOnNode) throws IOException, InterruptedException, XMLStreamException {
    if (!buildWorkDir.exists()) {
        throw new IllegalArgumentException("Build worker doesn't exist");
    }//from  www.j ava2  s  . com
    if ("".equals(scriptName)) {
        throw new IllegalArgumentException("Script name is empty");
    }
    if (!xsltOnNode.exists()) {
        throw new IllegalArgumentException("LR Html report doesn't exist on the node");
    }
    try {
        TransformerFactory factory = TransformerFactory.newInstance();
        StreamSource xslStream = new StreamSource(xsltOnNode.read());
        Transformer transformer = factory.newTransformer(xslStream);

        CharsetDecoder decoder = StandardCharsets.UTF_8.newDecoder();
        decoder.onMalformedInput(CodingErrorAction.REPLACE).replacement();

        final InputStreamReader inputStreamReader = new InputStreamReader(
                new BOMInputStream(buildWorkDir.child(scriptName).child("Results.xml").read()), decoder);

        StreamSource in = new StreamSource(new LrScriptResultsSanitizer(inputStreamReader));
        StreamResult out = new StreamResult(outputHTML.write());
        transformer.transform(in, out);
        final URL lrHtmlCSSPath = jenkinsInstance.pluginManager.uberClassLoader.getResource(LR_SCRIPT_HTML_CSS);
        if (lrHtmlCSSPath == null) {
            throw new LrScriptParserException(
                    "For some reason the jenkins instance is null - is it an improper set tests?");
        }

        FilePath lrScriptHtmlReportCss = buildWorkDir.child(scriptName).child(LR_SCRIPT_HTML_REPORT_CSS);
        lrScriptHtmlReportCss.copyFrom(lrHtmlCSSPath);

        logger.println("The generated HTML file is:" + outputHTML);
    } catch (TransformerConfigurationException e) {
        logger.println("TransformerConfigurationException");
        logger.println(e);
    } catch (TransformerException e) {
        logger.println("TransformerException");
        logger.println(e);
    } catch (LrScriptParserException e) {
        logger.println("General exception");
        logger.println(e);
    }
}

From source file:crawlercommons.sitemaps.SiteMapParserSAX.java

/**
 * Decompress the gzipped content and process the resulting XML Sitemap.
 * /* w  w  w. ja  v  a2  s  . c o m*/
 * @param url
 *            - URL of the gzipped content
 * @param response
 *            - Gzipped content
 * @return the site map
 * @throws UnknownFormatException
 *             if there is an error parsing the gzip
 * @throws IOException
 *             if there is an error reading in the gzip {@link java.net.URL}
 */
protected AbstractSiteMap processGzippedXML(URL url, byte[] response)
        throws IOException, UnknownFormatException {

    LOG.debug("Processing gzipped XML");

    InputStream is = new ByteArrayInputStream(response);

    // Remove .gz ending
    String xmlUrl = url.toString().replaceFirst("\\.gz$", "");
    LOG.debug("XML url = {}", xmlUrl);

    BOMInputStream decompressed = new BOMInputStream(new GZIPInputStream(is));
    InputSource in = new InputSource(decompressed);
    in.setSystemId(xmlUrl);
    return processXml(url, in);
}

From source file:ee.ria.xroad.common.message.SaxSoapParserImpl.java

private InputStream excludeUtf8Bom(String contentType, InputStream soapStream) {
    return hasUtf8Charset(contentType) ? new BOMInputStream(soapStream) : soapStream;
}

From source file:net.billylieurance.azuresearch.AbstractAzureSearchQuery.java

/**
 *
 * @param is An InputStream holding some XML that needs parsing
 * @return a parsed Document from the XML in the stream
 *//*from   w w  w  .  j a  v  a  2 s.c o  m*/
public Document loadXMLFromStream(InputStream is) {
    DocumentBuilderFactory factory;
    DocumentBuilder builder;
    BOMInputStream bis;
    String dumpable = "";
    try {
        factory = DocumentBuilderFactory.newInstance();
        builder = factory.newDocumentBuilder();
        bis = new BOMInputStream(is);

        if (_debug) {
            java.util.Scanner s = new java.util.Scanner(bis).useDelimiter("\\A");
            dumpable = s.hasNext() ? s.next() : "";
            // convert String into InputStream
            InputStream istwo = new java.io.ByteArrayInputStream(dumpable.getBytes());
            return builder.parse(istwo);

        } else {
            return builder.parse(bis);
        }
    } catch (IOException e) {
        e.printStackTrace();
    } catch (SAXException e) {
        if (e instanceof SAXParseException) {
            SAXParseException ex = (SAXParseException) e;
            System.out.println("Line: " + ex.getLineNumber());
            System.out.println("Col: " + ex.getColumnNumber());
            System.out.println("Data: " + dumpable);
        }
        e.printStackTrace();
    } catch (ParserConfigurationException e) {
        e.printStackTrace();
    }
    return null;
}

From source file:net.sourceforge.subsonic.controller.CaptionsController.java

private void send(File captionsFile, HttpServletResponse response, String format) throws IOException {
    if (CAPTION_FORMAT_VTT.equals(format)) {
        Files.copy(captionsFile, response.getOutputStream());
    } else {/*  w  w w.  j  a  v a  2s  . c om*/

        BOMInputStream bomInputStream = null;
        Reader reader = null;
        try {
            bomInputStream = new BOMInputStream(new FileInputStream(captionsFile));
            String encoding = ByteOrderMark.UTF_8.equals(bomInputStream.getBOM()) ? StringUtil.ENCODING_UTF8
                    : StringUtil.ENCODING_LATIN;

            reader = new InputStreamReader(bomInputStream, encoding);
            IOUtils.copy(reader, response.getOutputStream(), StringUtil.ENCODING_UTF8);
        } finally {
            IOUtils.closeQuietly(bomInputStream);
            IOUtils.closeQuietly(reader);
        }
    }
}

From source file:net.sourceforge.subsonic.controller.CaptionsController.java

private void convertAndSend(File captionsFile, HttpServletResponse response) throws IOException {
    BOMInputStream bomInputStream = null;
    Reader reader = null;/*www. j av a  2s .c  o m*/
    try {
        bomInputStream = new BOMInputStream(new FileInputStream(captionsFile));
        String encoding = ByteOrderMark.UTF_8.equals(bomInputStream.getBOM()) ? StringUtil.ENCODING_UTF8
                : StringUtil.ENCODING_LATIN;

        reader = new InputStreamReader(bomInputStream, encoding);
        Writer writer = new OutputStreamWriter(response.getOutputStream(), StringUtil.ENCODING_UTF8);
        SrtToVtt.convert(reader, writer);
    } finally {
        IOUtils.closeQuietly(bomInputStream);
        IOUtils.closeQuietly(reader);
    }
}

From source file:net.sourceforge.users.dragomerlin.vcs2icsCalendarConverter.ConvertSingleFile.java

private static BufferedReader detectEncodingAndOpenFile(File inFile) throws IOException {
    String encodingType = null;//from  ww  w.j a va2  s . c om
    BufferedReader input = null;
    BOMInputStream bomIn = null;

    // Detect file encoding
    encodingType = TestDetector.main(inFile.getAbsolutePath().toString());

    // Entire file reading. FileReader always assumes default encoding is
    // OK!
    // We must check for BOM in UTF files and remove them with
    // org.apache.commons.io.input.BOMInputStream because
    // java doesn't do that automatically. See Oracle bug 4508058.
    if (encodingType == null) {
        // ASCII expected
        input = new BufferedReader(new InputStreamReader(new FileInputStream(inFile)));
    } else if (encodingType.startsWith("UTF-8")) {
        // UTF-8 requires an exclusive call to BOMInputStream
        bomIn = new BOMInputStream(new FileInputStream(inFile));
        input = new BufferedReader(new InputStreamReader(bomIn, encodingType));
        if (bomIn.hasBOM())
            System.out.println("This file has UTF-8 BOM, removing it");
        else
            System.out.println("This file has UTF-8 without BOM");
    } else if (encodingType.startsWith("UTF-")) {
        // The other UTF cases except UTF-8
        bomIn = new BOMInputStream(new FileInputStream(inFile), ByteOrderMark.UTF_16LE, ByteOrderMark.UTF_16BE,
                ByteOrderMark.UTF_32LE, ByteOrderMark.UTF_32BE);
        input = new BufferedReader(new InputStreamReader(bomIn, encodingType));
        System.out.println("This file has " + bomIn.getBOMCharsetName() + " BOM, removing it");
    } else {
        // Any other encoding
        input = new BufferedReader(new InputStreamReader(new FileInputStream(inFile), encodingType));
    }
    return input;
}