Example usage for org.apache.commons.io.input BOMInputStream BOMInputStream

Introduction

In this page you can find the example usage for org.apache.commons.io.input BOMInputStream BOMInputStream.

Prototype

public BOMInputStream(InputStream delegate)

Source Link

Document

Constructs a new BOM InputStream that excludes a ByteOrderMark#UTF_8 BOM.

Usage

From source file:com.andyasprou.webcrawler.Utilities.GenericSiteMapParser.java

/**
 * Parse the given XML content./*from w ww. j  a  v  a 2 s .c  o m*/
 *
 * @param sitemapUrl URL to sitemap file
 * @param xmlContent the byte[] backing the sitemapUrl
 * @return The site map
 * @throws UnknownFormatException if there is an error parsing the sitemap
 */
protected AbstractSiteMap processXml(URL sitemapUrl, byte[] xmlContent) throws UnknownFormatException {

    BOMInputStream bomIs = new BOMInputStream(new ByteArrayInputStream(xmlContent));
    InputSource is = new InputSource();
    try {
        is.setCharacterStream(new BufferedReader(new InputStreamReader(bomIs, "UTF-8")));
    } catch (UnsupportedEncodingException e) {
        IOUtils.closeQuietly(bomIs);
        throw new RuntimeException("Impossible exception", e);
    }

    return processXml(sitemapUrl, is);
}

From source file:com.itemanalysis.jmetrik.file.JmetrikFileImporter.java

private void convertFile() {
    CSVParser parser = null;// w  ww  .ja v  a2s. com
    Reader reader = null;
    CSVPrinter printer = null;
    Writer writer = null;

    try {
        if (outputFile.exists()) {
            if (!overwrite) {
                theException = new IOException("File already exists and overwrite==false");
                return;
            }
        } else {
            outputFile.createNewFile();
        }

        //For debugging
        //            System.out.println("CREATED: " + outputFile.getAbsolutePath());

        //Writer header to file
        writer = new OutputStreamWriter(new FileOutputStream(outputFile));
        printer = new CSVPrinter(writer, CSVFormat.DEFAULT.withCommentMarker('#'));

        printer.printComment("VERSION");
        printer.printRecord(new String[] { "jmetrik1" });
        printer.printComment("METADATA");
        printer.printRecord(new String[] { Integer.valueOf(nrow).toString() });
        printer.printComment("ATTRIBUTES");
        for (VariableName v : variableAttributeMap.keySet()) {
            printer.printRecord(variableAttributeMap.get(v).getAttributeArray());
        }
        printer.printComment("DATA");

        //Write data to file
        reader = new InputStreamReader(new BOMInputStream(new FileInputStream(dataFile)), "UTF-8");
        parser = new CSVParser(reader, dataFileFormat);

        if (hasHeader) {
            parser = new CSVParser(reader, dataFileFormat.withHeader(colNames).withSkipHeaderRecord(true));
        } else {
            parser = new CSVParser(reader, dataFileFormat.withHeader(colNames));
        }

        Iterator<CSVRecord> iter = parser.iterator();
        CSVRecord csvRecord = null;
        VariableAttributes variableAttributes = null;
        DataType dataType = null;
        String temp = "";

        while (iter.hasNext()) {
            csvRecord = iter.next();

            for (VariableName v : variableAttributeMap.keySet()) {
                temp = csvRecord.get(v.toString());
                variableAttributes = variableAttributeMap.get(v);
                dataType = variableAttributes.getDataType();
                if (!variableAttributes.isMissing(temp)) {
                    if (DataType.INTEGER == dataType) {
                        printer.print(Double.valueOf(Double.parseDouble(temp)).intValue());
                    } else if (DataType.DOUBLE == dataType) {
                        printer.print(Double.parseDouble(temp));
                    } else {
                        printer.print(temp);
                    }
                } else {
                    printer.print(temp);
                }

            }
            printer.println();
        }

    } catch (IOException ex) {
        theException = ex;
    } finally {
        try {
            if (parser != null)
                parser.close();
            if (reader != null)
                reader.close();
            if (printer != null)
                printer.close();
            if (writer != null)
                writer.close();
        } catch (IOException ex) {
            theException = ex;
            logger.fatal(ex);
        }
    }
}

From source file:com.andyasprou.webcrawler.Utilities.GenericSiteMapParser.java

/**
 * Process a text-based Sitemap. Text sitemaps only list URLs but no
 * priorities, last mods, etc./*  w w  w. j a  v  a2  s  .c om*/
 * @param sitemapUrl a string sitemap URL
 * @param sitemapUrl URL to sitemap file
 * @param content the byte[] backing the sitemapUrl
 * @return The site map
 * @throws IOException if there is an error reading in the site map String
 */
protected SiteMap processText(String sitemapUrl, byte[] content) throws IOException {
    SiteMap textSiteMap = new SiteMap(sitemapUrl);

    BOMInputStream bomIs = new BOMInputStream(new ByteArrayInputStream(content));
    @SuppressWarnings("resource")
    BufferedReader reader = new BufferedReader(new InputStreamReader(bomIs, "UTF-8"));

    String line;
    int i = 1;
    while ((line = reader.readLine()) != null) {
        if (line.length() > 0 && i <= MAX_URLS) {
            addUrlIntoSitemap(line, textSiteMap, null, null, null, i++);
        }
    }
    textSiteMap.setProcessed(true);

    return textSiteMap;
}

From source file:apiconnector.TestDataFunctionality.java

@Ignore
@Test//www  .  j  a va  2 s.  co  m
public void testGetDataAsCsv() throws Exception {
    //client_read.setVerboseLevel(1);
    Random random = new Random();

    Map<String, String> filters = new TreeMap<String, String>();
    filters.put("tag", "study_14");

    DataSet[] all = client_read.dataList(filters).getData();

    for (int i = 0; i < 5;) {
        DataSet current = all[random.nextInt(all.length)];

        String numInst = current.getQualityMap().get("NumberOfInstances");

        if (current.getFileId() == null || !current.getFormat().toLowerCase().equals("arff")) {
            continue;
        }

        String fullUrl = url + "data/get_csv/" + current.getFileId() + "/" + current.getName() + ".csv";
        System.out.println(fullUrl);
        final URL url = new URL(fullUrl);
        final Reader reader = new InputStreamReader(new BOMInputStream(url.openStream()), "UTF-8");
        final CSVParser parser = new CSVParser(reader, CSVFormat.DEFAULT);
        try {
            if (numInst != null) {
                int numberOfInstances = (int) Double.parseDouble(numInst);
                assertEquals(parser.getRecords().size(), numberOfInstances);
            }
        } finally {
            parser.close();
            reader.close();
        }

        // important
        i += 1;
    }

}

From source file:de.uzk.hki.da.metadata.EadMetsMetadataStructure.java

private Document getMetsDocument(File metsFile) throws JDOMException, IOException {
    SAXBuilder builder = XMLUtils.createNonvalidatingSaxBuilder();
    FileInputStream fileInputStream = new FileInputStream(metsFile);
    BOMInputStream bomInputStream = new BOMInputStream(fileInputStream);
    Reader reader = new InputStreamReader(bomInputStream, "UTF-8");
    InputSource is = new InputSource(reader);
    is.setEncoding("UTF-8");
    eadDoc = builder.build(is);/*  w w w.  j a  va2  s.  c  o  m*/
    return eadDoc;
}

From source file:crawlercommons.sitemaps.SiteMapParser.java

/**
 * Decompress the gzipped content and process the resulting XML Sitemap.
 * //from  w  ww. j  ava2  s  .co m
 * @param url
 *            - URL of the gzipped content
 * @param response
 *            - Gzipped content
 * @return the site map
 * @throws MalformedURLException
 * @throws IOException
 * @throws UnknownFormatException
 */
protected AbstractSiteMap processGzip(URL url, byte[] response)
        throws MalformedURLException, IOException, UnknownFormatException {

    LOG.debug("Processing gzip");

    AbstractSiteMap smi;
    InputStream is = new ByteArrayInputStream(response);

    // Remove .gz ending
    String xmlUrl = url.toString().replaceFirst("\\.gz$", "");

    LOG.debug("XML url = {}", xmlUrl);

    BOMInputStream decompressed = new BOMInputStream(new GZIPInputStream(is));
    InputSource in = new InputSource(decompressed);
    in.setSystemId(xmlUrl);
    smi = processXml(url, in);
    decompressed.close();
    return smi;
}

From source file:com.hangum.tadpole.importdb.core.dialog.importdb.sql.SQLToDBImportDialog.java

private void insert() throws IOException {
    int ret;/*from   w w  w.  ja  va2 s . c om*/
    BOMInputStream bomInputStream = null;

    File[] arryFiles = receiver.getTargetFiles();
    if (arryFiles.length == 0) {
        MessageDialog.openError(null, Messages.CsvToRDBImportDialog_4, Messages.CsvToRDBImportDialog_21);
        return;
    }

    if (!MessageDialog.openConfirm(null, Messages.CsvToRDBImportDialog_4,
            Messages.SQLToDBImportDialog_UploadQuestion))
        return;
    bufferBatchResult = new StringBuffer();

    try {
        batchSize = Integer.valueOf(textBatchSize.getText());
    } catch (Exception e) {
        batchSize = 1000;
    }

    File userUploadFile = arryFiles[arryFiles.length - 1];
    try {
        // bom?  charset? ? ?.
        bomInputStream = new BOMInputStream(FileUtils.openInputStream(FileUtils.getFile(userUploadFile)));//`, false, ByteOrderMark.UTF_8, ByteOrderMark.UTF_16LE, ByteOrderMark.UTF_16BE, ByteOrderMark.UTF_32LE, ByteOrderMark.UTF_32BE);

        String charsetName = "utf-8";
        String strSQLData = "";
        if (bomInputStream.getBOM() == null) {
            strSQLData = FileUtils.readFileToString(userUploadFile, charsetName);
        } else {
            charsetName = bomInputStream.getBOMCharsetName();
            strSQLData = FileUtils.readFileToString(userUploadFile, charsetName).substring(1);
        }

        String[] strArrySQL = StringUtils.split(strSQLData, textSeprator.getText());
        ret = runSQLExecuteBatch(Arrays.asList(strArrySQL));

        if (ret == 0)
            MessageDialog.openInformation(null, "Confirm", Messages.SQLToDBImportDialog_StoreData); //$NON-NLS-1$
    } catch (IOException e) {
        logger.error(Messages.SQLToDBImportDialog_ReadError, e);
        MessageDialog.openError(null, Messages.CsvToRDBImportDialog_4,
                Messages.SQLToDBImportDialog_LoadException + e.getMessage());

    } catch (Exception e) {
        logger.error(Messages.SQLToDBImportDialog_ImportException, e);
        MessageDialog.openError(null, Messages.CsvToRDBImportDialog_4,
                Messages.SQLToDBImportDialog_LoadException + e.getMessage());
    } finally {
        if (bomInputStream != null)
            bomInputStream.close();
    }
}

From source file:crawlercommons.sitemaps.SiteMapParserSAX.java

/**
 * Parse the given XML content.//from  w  w  w  . j  a v  a  2s .com
 * 
 * @param sitemapUrl
 *            URL to sitemap file
 * @param xmlContent
 *            the byte[] backing the sitemapUrl
 * @return The site map
 * @throws UnknownFormatException
 *             if there is an error parsing the sitemap
 */
protected AbstractSiteMap processXml(URL sitemapUrl, byte[] xmlContent) throws UnknownFormatException {

    BOMInputStream bomIs = new BOMInputStream(new ByteArrayInputStream(xmlContent));
    InputSource is = new InputSource();
    is.setCharacterStream(new BufferedReader(new InputStreamReader(bomIs, UTF_8)));

    return processXml(sitemapUrl, is);
}

From source file:com.hangum.tadpole.importexport.core.dialogs.SQLToDBImportDialog.java

private void insert() throws IOException {
    int ret;//from   w  w  w .jav  a  2 s  . c o m
    BOMInputStream bomInputStream = null;

    File[] arryFiles = receiver.getTargetFiles();
    if (arryFiles.length == 0) {
        MessageDialog.openWarning(null, Messages.get().Warning, Messages.get().CsvToRDBImportDialog_21);
        return;
    }

    if (!MessageDialog.openConfirm(null, Messages.get().Confirm,
            Messages.get().SQLToDBImportDialog_UploadQuestion))
        return;
    bufferBatchResult = new StringBuffer();

    try {
        batchSize = Integer.valueOf(textBatchSize.getText());
    } catch (Exception e) {
        batchSize = 1000;
    }

    File userUploadFile = arryFiles[arryFiles.length - 1];
    try {
        // bom?  charset? ? ?.
        bomInputStream = new BOMInputStream(FileUtils.openInputStream(FileUtils.getFile(userUploadFile)));//`, false, ByteOrderMark.UTF_8, ByteOrderMark.UTF_16LE, ByteOrderMark.UTF_16BE, ByteOrderMark.UTF_32LE, ByteOrderMark.UTF_32BE);

        String charsetName = "utf-8"; //$NON-NLS-1$
        String strSQLData = ""; //$NON-NLS-1$
        if (bomInputStream.getBOM() == null) {
            strSQLData = FileUtils.readFileToString(userUploadFile, charsetName);
        } else {
            charsetName = bomInputStream.getBOMCharsetName();
            strSQLData = FileUtils.readFileToString(userUploadFile, charsetName).substring(1);
        }

        String[] strArrySQL = StringUtils.split(strSQLData, textSeprator.getText());
        ret = runSQLExecuteBatch(Arrays.asList(strArrySQL));

        if (ret == 0)
            MessageDialog.openInformation(null, Messages.get().Confirm,
                    Messages.get().SQLToDBImportDialog_StoreData); //$NON-NLS-1$
    } catch (IOException e) {
        logger.error(Messages.get().SQLToDBImportDialog_ReadError, e);
        MessageDialog.openError(null, Messages.get().Confirm,
                Messages.get().SQLToDBImportDialog_LoadException + e.getMessage());

    } catch (Exception e) {
        logger.error(Messages.get().SQLToDBImportDialog_ImportException, e);
        MessageDialog.openError(null, Messages.get().Confirm,
                Messages.get().SQLToDBImportDialog_LoadException + e.getMessage());
    } finally {
        if (bomInputStream != null)
            bomInputStream.close();
    }
}

From source file:com.andyasprou.webcrawler.Utilities.GenericSiteMapParser.java

/**
 * Decompress the gzipped content and process the resulting XML Sitemap.
 *
 * @param url//from   www . ja  v  a 2 s . c  om
 *            - URL of the gzipped content
 * @param response
 *            - Gzipped content
 * @return the site map
 * @throws UnknownFormatException if there is an error parsing the gzip
 * @throws IOException if there is an error reading in the gzip {@link java.net.URL}
 */
protected AbstractSiteMap processGzip(URL url, byte[] response) throws IOException, UnknownFormatException {

    AbstractSiteMap smi;
    InputStream is = new ByteArrayInputStream(response);

    // Remove .gz ending
    String xmlUrl = url.toString().replaceFirst("\\.gz$", "");

    BOMInputStream decompressed = new BOMInputStream(new GZIPInputStream(is));
    InputSource in = new InputSource(decompressed);
    in.setSystemId(xmlUrl);
    smi = processXml(url, in);
    decompressed.close();
    return smi;
}