Example usage for org.apache.commons.io.input BOMInputStream BOMInputStream

List of usage examples for org.apache.commons.io.input BOMInputStream BOMInputStream

Introduction

In this page you can find the example usage for org.apache.commons.io.input BOMInputStream BOMInputStream.

Prototype

public BOMInputStream(InputStream delegate) 

Source Link

Document

Constructs a new BOM InputStream that excludes a ByteOrderMark#UTF_8 BOM.

Usage

From source file:de.uzk.hki.da.metadata.EadMetsMetadataStructure.java

public void replaceMetsRefsInEad(File eadFile, HashMap<String, String> eadReplacements)
        throws JDOMException, IOException {

    File targetEadFile = eadFile;

    SAXBuilder builder = XMLUtils.createNonvalidatingSaxBuilder();
    FileInputStream fileInputStream = new FileInputStream(Path.makeFile(workPath, eadFile.getPath()));
    BOMInputStream bomInputStream = new BOMInputStream(fileInputStream);
    Reader reader = new InputStreamReader(bomInputStream, "UTF-8");
    InputSource is = new InputSource(reader);
    is.setEncoding("UTF-8");
    Document currentEadDoc = builder.build(is);

    String namespaceUri = eadDoc.getRootElement().getNamespace().getURI();
    XPath xPath = XPath.newInstance(C.EAD_XPATH_EXPRESSION);

    //      Case of new DDB EAD with namespace xmlns="urn:isbn:1-931666-22-9"
    if (!namespaceUri.equals("")) {
        xPath = XPath.newInstance("//isbn:daoloc/@href");
        xPath.addNamespace("isbn", eadDoc.getRootElement().getNamespace().getURI());
    }//w  ww. j a v  a2s.  c o  m

    @SuppressWarnings("rawtypes")
    List allNodes = xPath.selectNodes(currentEadDoc);

    for (Object node : allNodes) {
        Attribute attr = (Attribute) node;
        for (String replacement : eadReplacements.keySet()) {
            if (attr.getValue().equals(replacement)) {
                attr.setValue(eadReplacements.get(replacement));
            }
        }
    }

    XMLOutputter outputter = new XMLOutputter();
    outputter.setFormat(Format.getPrettyFormat());
    outputter.output(currentEadDoc, new FileWriter(Path.makeFile(workPath, targetEadFile.getPath())));
    fileInputStream.close();
    bomInputStream.close();
    reader.close();
}

From source file:com.github.anba.es6draft.util.Resources.java

/**
 * Load the exclusion xml-list for invalid test cases from {@link InputStream}
 *///w w w.j  a  va 2  s  . c o m
private static Set<String> readExcludeXML(InputStream is) throws IOException {
    Set<String> exclude = new HashSet<>();
    Reader reader = new InputStreamReader(new BOMInputStream(is), StandardCharsets.UTF_8);
    NodeList ns = xml(reader).getDocumentElement().getElementsByTagName("test");
    for (int i = 0, len = ns.getLength(); i < len; ++i) {
        exclude.add(((Element) ns.item(i)).getAttribute("id"));
    }
    return exclude;
}

From source file:crawlercommons.sitemaps.SiteMapParserSAX.java

/**
 * Process a text-based Sitemap. Text sitemaps only list URLs but no
 * priorities, last mods, etc.//from  w w w . ja va 2  s  . c o  m
 *
 * @param sitemapUrl
 *            URL to sitemap file
 * @param stream
 *            content stream
 * @return The site map
 * @throws IOException
 *             if there is an error reading in the site map content
 */
protected SiteMap processText(URL sitemapUrl, InputStream stream) throws IOException {
    LOG.debug("Processing textual Sitemap");

    SiteMap textSiteMap = new SiteMap(sitemapUrl);
    textSiteMap.setType(SitemapType.TEXT);

    BOMInputStream bomIs = new BOMInputStream(stream);
    @SuppressWarnings("resource")
    BufferedReader reader = new BufferedReader(new InputStreamReader(bomIs, UTF_8));

    String line;
    int i = 1;
    while ((line = reader.readLine()) != null) {
        if (line.length() > 0 && i <= MAX_URLS) {
            addUrlIntoSitemap(line, textSiteMap, null, null, null, i++);
        }
    }
    textSiteMap.setProcessed(true);

    return textSiteMap;
}

From source file:com.hpe.application.automation.tools.run.RunLoadRunnerScript.java

private void createHtmlReports(FilePath buildWorkDir, String scriptName, FilePath outputHTML,
        FilePath xsltOnNode) throws IOException, InterruptedException, XMLStreamException {
    if (!buildWorkDir.exists()) {
        throw new IllegalArgumentException("Build worker doesn't exist");
    }//from  www.j ava2  s  . com
    if ("".equals(scriptName)) {
        throw new IllegalArgumentException("Script name is empty");
    }
    if (!xsltOnNode.exists()) {
        throw new IllegalArgumentException("LR Html report doesn't exist on the node");
    }
    try {
        TransformerFactory factory = TransformerFactory.newInstance();
        StreamSource xslStream = new StreamSource(xsltOnNode.read());
        Transformer transformer = factory.newTransformer(xslStream);

        CharsetDecoder decoder = StandardCharsets.UTF_8.newDecoder();
        decoder.onMalformedInput(CodingErrorAction.REPLACE).replacement();

        final InputStreamReader inputStreamReader = new InputStreamReader(
                new BOMInputStream(buildWorkDir.child(scriptName).child("Results.xml").read()), decoder);

        StreamSource in = new StreamSource(new LrScriptResultsSanitizer(inputStreamReader));
        StreamResult out = new StreamResult(outputHTML.write());
        transformer.transform(in, out);
        final URL lrHtmlCSSPath = jenkinsInstance.pluginManager.uberClassLoader.getResource(LR_SCRIPT_HTML_CSS);
        if (lrHtmlCSSPath == null) {
            throw new LrScriptParserException(
                    "For some reason the jenkins instance is null - is it an improper set tests?");
        }

        FilePath lrScriptHtmlReportCss = buildWorkDir.child(scriptName).child(LR_SCRIPT_HTML_REPORT_CSS);
        lrScriptHtmlReportCss.copyFrom(lrHtmlCSSPath);

        logger.println("The generated HTML file is:" + outputHTML);
    } catch (TransformerConfigurationException e) {
        logger.println("TransformerConfigurationException");
        logger.println(e);
    } catch (TransformerException e) {
        logger.println("TransformerException");
        logger.println(e);
    } catch (LrScriptParserException e) {
        logger.println("General exception");
        logger.println(e);
    }
}

From source file:crawlercommons.sitemaps.SiteMapParserSAX.java

/**
 * Decompress the gzipped content and process the resulting XML Sitemap.
 * /* w  w  w. ja  v  a2  s  . c o m*/
 * @param url
 *            - URL of the gzipped content
 * @param response
 *            - Gzipped content
 * @return the site map
 * @throws UnknownFormatException
 *             if there is an error parsing the gzip
 * @throws IOException
 *             if there is an error reading in the gzip {@link java.net.URL}
 */
protected AbstractSiteMap processGzippedXML(URL url, byte[] response)
        throws IOException, UnknownFormatException {

    LOG.debug("Processing gzipped XML");

    InputStream is = new ByteArrayInputStream(response);

    // Remove .gz ending
    String xmlUrl = url.toString().replaceFirst("\\.gz$", "");
    LOG.debug("XML url = {}", xmlUrl);

    BOMInputStream decompressed = new BOMInputStream(new GZIPInputStream(is));
    InputSource in = new InputSource(decompressed);
    in.setSystemId(xmlUrl);
    return processXml(url, in);
}

From source file:ee.ria.xroad.common.message.SaxSoapParserImpl.java

private InputStream excludeUtf8Bom(String contentType, InputStream soapStream) {
    return hasUtf8Charset(contentType) ? new BOMInputStream(soapStream) : soapStream;
}

From source file:net.billylieurance.azuresearch.AbstractAzureSearchQuery.java

/**
 *
 * @param is An InputStream holding some XML that needs parsing
 * @return a parsed Document from the XML in the stream
 *//*from   w w  w  .  j a  v  a  2 s.c o  m*/
public Document loadXMLFromStream(InputStream is) {
    DocumentBuilderFactory factory;
    DocumentBuilder builder;
    BOMInputStream bis;
    String dumpable = "";
    try {
        factory = DocumentBuilderFactory.newInstance();
        builder = factory.newDocumentBuilder();
        bis = new BOMInputStream(is);

        if (_debug) {
            java.util.Scanner s = new java.util.Scanner(bis).useDelimiter("\\A");
            dumpable = s.hasNext() ? s.next() : "";
            // convert String into InputStream
            InputStream istwo = new java.io.ByteArrayInputStream(dumpable.getBytes());
            return builder.parse(istwo);

        } else {
            return builder.parse(bis);
        }
    } catch (IOException e) {
        e.printStackTrace();
    } catch (SAXException e) {
        if (e instanceof SAXParseException) {
            SAXParseException ex = (SAXParseException) e;
            System.out.println("Line: " + ex.getLineNumber());
            System.out.println("Col: " + ex.getColumnNumber());
            System.out.println("Data: " + dumpable);
        }
        e.printStackTrace();
    } catch (ParserConfigurationException e) {
        e.printStackTrace();
    }
    return null;
}

From source file:net.sourceforge.subsonic.controller.CaptionsController.java

private void send(File captionsFile, HttpServletResponse response, String format) throws IOException {
    if (CAPTION_FORMAT_VTT.equals(format)) {
        Files.copy(captionsFile, response.getOutputStream());
    } else {/*  w  w w.  j  a  v a  2s  . c om*/

        BOMInputStream bomInputStream = null;
        Reader reader = null;
        try {
            bomInputStream = new BOMInputStream(new FileInputStream(captionsFile));
            String encoding = ByteOrderMark.UTF_8.equals(bomInputStream.getBOM()) ? StringUtil.ENCODING_UTF8
                    : StringUtil.ENCODING_LATIN;

            reader = new InputStreamReader(bomInputStream, encoding);
            IOUtils.copy(reader, response.getOutputStream(), StringUtil.ENCODING_UTF8);
        } finally {
            IOUtils.closeQuietly(bomInputStream);
            IOUtils.closeQuietly(reader);
        }
    }
}

From source file:net.sourceforge.subsonic.controller.CaptionsController.java

private void convertAndSend(File captionsFile, HttpServletResponse response) throws IOException {
    BOMInputStream bomInputStream = null;
    Reader reader = null;/*www. j av a  2s .c  o m*/
    try {
        bomInputStream = new BOMInputStream(new FileInputStream(captionsFile));
        String encoding = ByteOrderMark.UTF_8.equals(bomInputStream.getBOM()) ? StringUtil.ENCODING_UTF8
                : StringUtil.ENCODING_LATIN;

        reader = new InputStreamReader(bomInputStream, encoding);
        Writer writer = new OutputStreamWriter(response.getOutputStream(), StringUtil.ENCODING_UTF8);
        SrtToVtt.convert(reader, writer);
    } finally {
        IOUtils.closeQuietly(bomInputStream);
        IOUtils.closeQuietly(reader);
    }
}

From source file:net.sourceforge.users.dragomerlin.vcs2icsCalendarConverter.ConvertSingleFile.java

private static BufferedReader detectEncodingAndOpenFile(File inFile) throws IOException {
    String encodingType = null;//from  ww  w.j a va2  s . c om
    BufferedReader input = null;
    BOMInputStream bomIn = null;

    // Detect file encoding
    encodingType = TestDetector.main(inFile.getAbsolutePath().toString());

    // Entire file reading. FileReader always assumes default encoding is
    // OK!
    // We must check for BOM in UTF files and remove them with
    // org.apache.commons.io.input.BOMInputStream because
    // java doesn't do that automatically. See Oracle bug 4508058.
    if (encodingType == null) {
        // ASCII expected
        input = new BufferedReader(new InputStreamReader(new FileInputStream(inFile)));
    } else if (encodingType.startsWith("UTF-8")) {
        // UTF-8 requires an exclusive call to BOMInputStream
        bomIn = new BOMInputStream(new FileInputStream(inFile));
        input = new BufferedReader(new InputStreamReader(bomIn, encodingType));
        if (bomIn.hasBOM())
            System.out.println("This file has UTF-8 BOM, removing it");
        else
            System.out.println("This file has UTF-8 without BOM");
    } else if (encodingType.startsWith("UTF-")) {
        // The other UTF cases except UTF-8
        bomIn = new BOMInputStream(new FileInputStream(inFile), ByteOrderMark.UTF_16LE, ByteOrderMark.UTF_16BE,
                ByteOrderMark.UTF_32LE, ByteOrderMark.UTF_32BE);
        input = new BufferedReader(new InputStreamReader(bomIn, encodingType));
        System.out.println("This file has " + bomIn.getBOMCharsetName() + " BOM, removing it");
    } else {
        // Any other encoding
        input = new BufferedReader(new InputStreamReader(new FileInputStream(inFile), encodingType));
    }
    return input;
}