Example usage for org.apache.commons.io ByteOrderMark UTF_16BE

List of usage examples for org.apache.commons.io ByteOrderMark UTF_16BE

Introduction

In this page you can find the example usage for org.apache.commons.io ByteOrderMark UTF_16BE.

Prototype

ByteOrderMark UTF_16BE

To view the source code for org.apache.commons.io ByteOrderMark UTF_16BE.

Click Source Link

Document

UTF-16BE BOM (Big Endian)

Usage

From source file:org.apache.any23.util.StreamUtils.java

public static Document inputStreamToDocument(InputStream is) throws MalformedByteSequenceException {
    DocumentBuilderFactory factory = null;
    DocumentBuilder builder = null;
    Document doc = null;/* ww w. j a va  2s . c  o m*/

    try {
        factory = DocumentBuilderFactory.newInstance();
        builder = factory.newDocumentBuilder();
    } catch (ParserConfigurationException e) {
        logger.error("Error converting InputStream to Document: {}", e);
    }

    try {
        BOMInputStream bomIn = new BOMInputStream(is, ByteOrderMark.UTF_8, ByteOrderMark.UTF_16BE,
                ByteOrderMark.UTF_16LE, ByteOrderMark.UTF_32BE, ByteOrderMark.UTF_32LE);
        if (bomIn.hasBOM()) {
            @SuppressWarnings("unused")
            int firstNonBOMByte = bomIn.read(); // Skips BOM
        }
        doc = builder.parse(bomIn);
    } catch (SAXException | IOException e) {
        logger.error("Error converting InputStream to Document: {}", e);
    }
    return doc;
}

From source file:org.hillview.storage.TextFileLoader.java

Reader getFileReader() {
    try {/*from  w w  w . j  a v  a 2  s .  com*/
        HillviewLogger.instance.info("Reading file", "{0}", this.filename);
        this.inputStream = new FileInputStream(this.filename);
        this.bufferedInputStream = new BufferedInputStream(inputStream);
        // The buffered input stream is needed by the CompressorStream
        // to detect the compression method at runtime.
        InputStream fis = this.bufferedInputStream;

        if (Utilities.isCompressed(this.filename)) {
            this.compressedStream = new CompressorStreamFactory().createCompressorInputStream(fis);
            fis = this.compressedStream;
        }
        this.bomStream = new BOMInputStream(fis, ByteOrderMark.UTF_8, ByteOrderMark.UTF_16LE,
                ByteOrderMark.UTF_16BE, ByteOrderMark.UTF_32LE, ByteOrderMark.UTF_32BE);
        ByteOrderMark bom = this.bomStream.getBOM();
        String charsetName = bom == null ? "UTF-8" : bom.getCharsetName();
        return new InputStreamReader(this.bomStream, charsetName);
    } catch (IOException | CompressorException e) {
        throw new RuntimeException(e);
    }
}

From source file:org.languagetool.commandline.Main.java

private InputStreamReader getInputStreamReader(String filename, String encoding) throws IOException {
    String charsetName = encoding != null ? encoding : Charset.defaultCharset().name();
    InputStream is = System.in;
    if (!isStdIn(filename)) {
        is = new FileInputStream(new File(filename));
        BOMInputStream bomIn = new BOMInputStream(is, true, ByteOrderMark.UTF_8, ByteOrderMark.UTF_16BE,
                ByteOrderMark.UTF_16LE, ByteOrderMark.UTF_32BE, ByteOrderMark.UTF_32LE);
        if (bomIn.hasBOM() && encoding == null) {
            charsetName = bomIn.getBOMCharsetName();
        }/*w w  w  . j  a  v a2s  . c o m*/
        is = bomIn;
    }
    return new InputStreamReader(new BufferedInputStream(is), charsetName);
}

From source file:org.languagetool.gui.Main.java

private void loadFile(File file) {
    try (FileInputStream inputStream = new FileInputStream(file)) {
        BOMInputStream bomIn = new BOMInputStream(inputStream, false, ByteOrderMark.UTF_8,
                ByteOrderMark.UTF_16BE, ByteOrderMark.UTF_16LE, ByteOrderMark.UTF_32BE, ByteOrderMark.UTF_32LE);
        String charsetName;//from w  w  w . jav  a2 s  .c  o m
        if (bomIn.hasBOM()) {
            bom = bomIn.getBOM();
            charsetName = bom.getCharsetName();
        } else {
            // No BOM found
            bom = null;
            charsetName = null;
        }
        String fileContents = StringTools.readStream(bomIn, charsetName);
        textArea.setText(fileContents);
        currentFile = file;
        updateTitle();
        if (recentFiles.contains(file.getAbsolutePath())) {
            recentFiles.remove(file.getAbsolutePath());
        }
        recentFiles.add(file.getAbsolutePath());
        localStorage.saveProperty("recentFiles", recentFiles);
        updateRecentFilesMenu();
    } catch (IOException e) {
        Tools.showError(e);
    }
}

From source file:org.omegat.util.TMXReaderTest.java

public void testCharset() throws Exception {
    File xml = new File("build/testdata/test.xml");
    xml.getParentFile().mkdirs();/*  w ww .  jav a  2s .  c o m*/

    testXml(xml, ByteOrderMark.UTF_8, "<?xml version=\"1.0\"?>", "UTF-8");
    testXml(xml, ByteOrderMark.UTF_16LE, "<?xml version=\"1.0\"?>", "UTF-16LE");
    testXml(xml, ByteOrderMark.UTF_16BE, "<?xml version=\"1.0\"?>", "UTF-16BE");
    testXml(xml, ByteOrderMark.UTF_32LE, "<?xml version=\"1.0\"?>", "UTF-32LE");
    testXml(xml, ByteOrderMark.UTF_32BE, "<?xml version=\"1.0\"?>", "UTF-32BE");
    testXml(xml, null, "<?xml version=\"1.0\" encoding=\"UTF-8\"?>", "UTF-8");
    testXml(xml, null, "<?xml version=\"1.0\" encoding=\"ISO-8859-1\"?>", "ISO-8859-1");
}

From source file:org.sonar.api.batch.fs.internal.DefaultInputFile.java

@Override
public InputStream inputStream() throws IOException {
    return contents != null ? new ByteArrayInputStream(contents.getBytes(charset()))
            : new BOMInputStream(Files.newInputStream(path()), ByteOrderMark.UTF_8, ByteOrderMark.UTF_16LE,
                    ByteOrderMark.UTF_16BE, ByteOrderMark.UTF_32LE, ByteOrderMark.UTF_32BE);
}

From source file:org.sonar.batch.report.SourcePublisher.java

@Override
public void publish(BatchReportWriter writer) {
    for (final BatchComponent resource : resourceCache.all()) {
        if (!resource.isFile()) {
            continue;
        }/*from   w w  w.ja  v  a  2s.  c  o  m*/

        DefaultInputFile inputFile = (DefaultInputFile) resource.inputComponent();
        File iofile = writer.getSourceFile(resource.batchId());
        int line = 0;
        try (FileOutputStream output = new FileOutputStream(iofile);
                BOMInputStream bomIn = new BOMInputStream(new FileInputStream(inputFile.file()),
                        ByteOrderMark.UTF_8, ByteOrderMark.UTF_16LE, ByteOrderMark.UTF_16BE,
                        ByteOrderMark.UTF_32LE, ByteOrderMark.UTF_32BE);
                BufferedReader reader = new BufferedReader(new InputStreamReader(bomIn, inputFile.charset()))) {
            String lineStr = reader.readLine();
            while (lineStr != null) {
                IOUtils.write(lineStr, output, StandardCharsets.UTF_8);
                line++;
                if (line < inputFile.lines()) {
                    IOUtils.write("\n", output, StandardCharsets.UTF_8);
                }
                lineStr = reader.readLine();
            }
        } catch (IOException e) {
            throw new IllegalStateException("Unable to store file source in the report", e);
        }
    }
}

From source file:org.sonar.scanner.scan.filesystem.ByteCharsetDetectorTest.java

@Test
public void detectBOM() throws URISyntaxException, IOException {
    byte[] b = ByteOrderMark.UTF_16BE.getBytes();
    assertThat(charsets.detectBOM(b)).isEqualTo(ByteOrderMark.UTF_16BE);

    assertThat(charsets.detectBOM(readFile("UTF-8"))).isEqualTo(ByteOrderMark.UTF_8);
    assertThat(charsets.detectBOM(readFile("UTF-16BE"))).isEqualTo(ByteOrderMark.UTF_16BE);
    assertThat(charsets.detectBOM(readFile("UTF-16LE"))).isEqualTo(ByteOrderMark.UTF_16LE);
    assertThat(charsets.detectBOM(readFile("UTF-32BE"))).isEqualTo(ByteOrderMark.UTF_32BE);
    assertThat(charsets.detectBOM(readFile("UTF-32LE"))).isEqualTo(ByteOrderMark.UTF_32LE);
}

From source file:org.sonarsource.sonarlint.core.container.analysis.filesystem.FileMetadata.java

private static InputStream streamFile(File file) {
    try {//from   w w w  .jav  a 2s.  co m
        return new BOMInputStream(new FileInputStream(file), ByteOrderMark.UTF_8, ByteOrderMark.UTF_16LE,
                ByteOrderMark.UTF_16BE, ByteOrderMark.UTF_32LE, ByteOrderMark.UTF_32BE);
    } catch (FileNotFoundException e) {
        throw new IllegalStateException("File not found: " + file.getAbsolutePath(), e);
    }
}

From source file:org.w3.i18n.Utils.java

public static ByteOrderMark findByteOrderMark(byte[] bytes) {
    if (bytes == null) {
        throw new NullPointerException("bytes: " + bytes);
    }/*from  ww  w . j a v  a  2 s. c  om*/
    ByteOrderMark byteOrderMark;
    try {
        byteOrderMark = new BOMInputStream(new ByteArrayInputStream(bytes), ByteOrderMark.UTF_8,
                ByteOrderMark.UTF_16BE, ByteOrderMark.UTF_16LE, ByteOrderMark.UTF_32BE, ByteOrderMark.UTF_32LE)
                        .getBOM();
    } catch (IOException ex) {
        throw new RuntimeException(ex);
    }
    return byteOrderMark;
}