Example usage for org.apache.commons.io.input BOMInputStream BOMInputStream

List of usage examples for org.apache.commons.io.input BOMInputStream BOMInputStream

Introduction

In this page you can find the example usage for org.apache.commons.io.input BOMInputStream BOMInputStream.

Prototype

public BOMInputStream(InputStream delegate) 

Source Link

Document

Constructs a new BOM InputStream that excludes a ByteOrderMark#UTF_8 BOM.

Usage

From source file:com.andyasprou.webcrawler.Utilities.GenericSiteMapParser.java

/**
 * Parse the given XML content./*from w ww. j  a  v  a 2 s .c  o m*/
 *
 * @param sitemapUrl URL to sitemap file
 * @param xmlContent the byte[] backing the sitemapUrl
 * @return The site map
 * @throws UnknownFormatException if there is an error parsing the sitemap
 */
protected AbstractSiteMap processXml(URL sitemapUrl, byte[] xmlContent) throws UnknownFormatException {

    BOMInputStream bomIs = new BOMInputStream(new ByteArrayInputStream(xmlContent));
    InputSource is = new InputSource();
    try {
        is.setCharacterStream(new BufferedReader(new InputStreamReader(bomIs, "UTF-8")));
    } catch (UnsupportedEncodingException e) {
        IOUtils.closeQuietly(bomIs);
        throw new RuntimeException("Impossible exception", e);
    }

    return processXml(sitemapUrl, is);
}

From source file:com.itemanalysis.jmetrik.file.JmetrikFileImporter.java

private void convertFile() {
    CSVParser parser = null;// w  ww  .ja v  a2s. com
    Reader reader = null;
    CSVPrinter printer = null;
    Writer writer = null;

    try {
        if (outputFile.exists()) {
            if (!overwrite) {
                theException = new IOException("File already exists and overwrite==false");
                return;
            }
        } else {
            outputFile.createNewFile();
        }

        //For debugging
        //            System.out.println("CREATED: " + outputFile.getAbsolutePath());

        //Writer header to file
        writer = new OutputStreamWriter(new FileOutputStream(outputFile));
        printer = new CSVPrinter(writer, CSVFormat.DEFAULT.withCommentMarker('#'));

        printer.printComment("VERSION");
        printer.printRecord(new String[] { "jmetrik1" });
        printer.printComment("METADATA");
        printer.printRecord(new String[] { Integer.valueOf(nrow).toString() });
        printer.printComment("ATTRIBUTES");
        for (VariableName v : variableAttributeMap.keySet()) {
            printer.printRecord(variableAttributeMap.get(v).getAttributeArray());
        }
        printer.printComment("DATA");

        //Write data to file
        reader = new InputStreamReader(new BOMInputStream(new FileInputStream(dataFile)), "UTF-8");
        parser = new CSVParser(reader, dataFileFormat);

        if (hasHeader) {
            parser = new CSVParser(reader, dataFileFormat.withHeader(colNames).withSkipHeaderRecord(true));
        } else {
            parser = new CSVParser(reader, dataFileFormat.withHeader(colNames));
        }

        Iterator<CSVRecord> iter = parser.iterator();
        CSVRecord csvRecord = null;
        VariableAttributes variableAttributes = null;
        DataType dataType = null;
        String temp = "";

        while (iter.hasNext()) {
            csvRecord = iter.next();

            for (VariableName v : variableAttributeMap.keySet()) {
                temp = csvRecord.get(v.toString());
                variableAttributes = variableAttributeMap.get(v);
                dataType = variableAttributes.getDataType();
                if (!variableAttributes.isMissing(temp)) {
                    if (DataType.INTEGER == dataType) {
                        printer.print(Double.valueOf(Double.parseDouble(temp)).intValue());
                    } else if (DataType.DOUBLE == dataType) {
                        printer.print(Double.parseDouble(temp));
                    } else {
                        printer.print(temp);
                    }
                } else {
                    printer.print(temp);
                }

            }
            printer.println();
        }

    } catch (IOException ex) {
        theException = ex;
    } finally {
        try {
            if (parser != null)
                parser.close();
            if (reader != null)
                reader.close();
            if (printer != null)
                printer.close();
            if (writer != null)
                writer.close();
        } catch (IOException ex) {
            theException = ex;
            logger.fatal(ex);
        }
    }
}

From source file:com.andyasprou.webcrawler.Utilities.GenericSiteMapParser.java

/**
 * Process a text-based Sitemap. Text sitemaps only list URLs but no
 * priorities, last mods, etc./*  w w  w. j a  v  a2  s  .c om*/
 * @param sitemapUrl a string sitemap URL
 * @param sitemapUrl URL to sitemap file
 * @param content the byte[] backing the sitemapUrl
 * @return The site map
 * @throws IOException if there is an error reading in the site map String
 */
protected SiteMap processText(String sitemapUrl, byte[] content) throws IOException {
    SiteMap textSiteMap = new SiteMap(sitemapUrl);

    BOMInputStream bomIs = new BOMInputStream(new ByteArrayInputStream(content));
    @SuppressWarnings("resource")
    BufferedReader reader = new BufferedReader(new InputStreamReader(bomIs, "UTF-8"));

    String line;
    int i = 1;
    while ((line = reader.readLine()) != null) {
        if (line.length() > 0 && i <= MAX_URLS) {
            addUrlIntoSitemap(line, textSiteMap, null, null, null, i++);
        }
    }
    textSiteMap.setProcessed(true);

    return textSiteMap;
}

From source file:apiconnector.TestDataFunctionality.java

@Ignore
@Test//www  .  j  a va  2 s.  co  m
public void testGetDataAsCsv() throws Exception {
    //client_read.setVerboseLevel(1);
    Random random = new Random();

    Map<String, String> filters = new TreeMap<String, String>();
    filters.put("tag", "study_14");

    DataSet[] all = client_read.dataList(filters).getData();

    for (int i = 0; i < 5;) {
        DataSet current = all[random.nextInt(all.length)];

        String numInst = current.getQualityMap().get("NumberOfInstances");

        if (current.getFileId() == null || !current.getFormat().toLowerCase().equals("arff")) {
            continue;
        }

        String fullUrl = url + "data/get_csv/" + current.getFileId() + "/" + current.getName() + ".csv";
        System.out.println(fullUrl);
        final URL url = new URL(fullUrl);
        final Reader reader = new InputStreamReader(new BOMInputStream(url.openStream()), "UTF-8");
        final CSVParser parser = new CSVParser(reader, CSVFormat.DEFAULT);
        try {
            if (numInst != null) {
                int numberOfInstances = (int) Double.parseDouble(numInst);
                assertEquals(parser.getRecords().size(), numberOfInstances);
            }
        } finally {
            parser.close();
            reader.close();
        }

        // important
        i += 1;
    }

}

From source file:de.uzk.hki.da.metadata.EadMetsMetadataStructure.java

private Document getMetsDocument(File metsFile) throws JDOMException, IOException {
    SAXBuilder builder = XMLUtils.createNonvalidatingSaxBuilder();
    FileInputStream fileInputStream = new FileInputStream(metsFile);
    BOMInputStream bomInputStream = new BOMInputStream(fileInputStream);
    Reader reader = new InputStreamReader(bomInputStream, "UTF-8");
    InputSource is = new InputSource(reader);
    is.setEncoding("UTF-8");
    eadDoc = builder.build(is);/*  w w w.  j a  va2  s.  c  o  m*/
    return eadDoc;
}

From source file:crawlercommons.sitemaps.SiteMapParser.java

/**
 * Decompress the gzipped content and process the resulting XML Sitemap.
 * //from  w  ww. j  ava2  s  .co m
 * @param url
 *            - URL of the gzipped content
 * @param response
 *            - Gzipped content
 * @return the site map
 * @throws MalformedURLException
 * @throws IOException
 * @throws UnknownFormatException
 */
protected AbstractSiteMap processGzip(URL url, byte[] response)
        throws MalformedURLException, IOException, UnknownFormatException {

    LOG.debug("Processing gzip");

    AbstractSiteMap smi;
    InputStream is = new ByteArrayInputStream(response);

    // Remove .gz ending
    String xmlUrl = url.toString().replaceFirst("\\.gz$", "");

    LOG.debug("XML url = {}", xmlUrl);

    BOMInputStream decompressed = new BOMInputStream(new GZIPInputStream(is));
    InputSource in = new InputSource(decompressed);
    in.setSystemId(xmlUrl);
    smi = processXml(url, in);
    decompressed.close();
    return smi;
}

From source file:com.hangum.tadpole.importdb.core.dialog.importdb.sql.SQLToDBImportDialog.java

private void insert() throws IOException {
    int ret;/*from   w w  w.  ja  va2 s . c om*/
    BOMInputStream bomInputStream = null;

    File[] arryFiles = receiver.getTargetFiles();
    if (arryFiles.length == 0) {
        MessageDialog.openError(null, Messages.CsvToRDBImportDialog_4, Messages.CsvToRDBImportDialog_21);
        return;
    }

    if (!MessageDialog.openConfirm(null, Messages.CsvToRDBImportDialog_4,
            Messages.SQLToDBImportDialog_UploadQuestion))
        return;
    bufferBatchResult = new StringBuffer();

    try {
        batchSize = Integer.valueOf(textBatchSize.getText());
    } catch (Exception e) {
        batchSize = 1000;
    }

    File userUploadFile = arryFiles[arryFiles.length - 1];
    try {
        // bom?  charset? ? ?.
        bomInputStream = new BOMInputStream(FileUtils.openInputStream(FileUtils.getFile(userUploadFile)));//`, false, ByteOrderMark.UTF_8, ByteOrderMark.UTF_16LE, ByteOrderMark.UTF_16BE, ByteOrderMark.UTF_32LE, ByteOrderMark.UTF_32BE);

        String charsetName = "utf-8";
        String strSQLData = "";
        if (bomInputStream.getBOM() == null) {
            strSQLData = FileUtils.readFileToString(userUploadFile, charsetName);
        } else {
            charsetName = bomInputStream.getBOMCharsetName();
            strSQLData = FileUtils.readFileToString(userUploadFile, charsetName).substring(1);
        }

        String[] strArrySQL = StringUtils.split(strSQLData, textSeprator.getText());
        ret = runSQLExecuteBatch(Arrays.asList(strArrySQL));

        if (ret == 0)
            MessageDialog.openInformation(null, "Confirm", Messages.SQLToDBImportDialog_StoreData); //$NON-NLS-1$
    } catch (IOException e) {
        logger.error(Messages.SQLToDBImportDialog_ReadError, e);
        MessageDialog.openError(null, Messages.CsvToRDBImportDialog_4,
                Messages.SQLToDBImportDialog_LoadException + e.getMessage());

    } catch (Exception e) {
        logger.error(Messages.SQLToDBImportDialog_ImportException, e);
        MessageDialog.openError(null, Messages.CsvToRDBImportDialog_4,
                Messages.SQLToDBImportDialog_LoadException + e.getMessage());
    } finally {
        if (bomInputStream != null)
            bomInputStream.close();
    }
}

From source file:crawlercommons.sitemaps.SiteMapParserSAX.java

/**
 * Parse the given XML content.//from  w  w  w  . j  a v  a  2s .com
 * 
 * @param sitemapUrl
 *            URL to sitemap file
 * @param xmlContent
 *            the byte[] backing the sitemapUrl
 * @return The site map
 * @throws UnknownFormatException
 *             if there is an error parsing the sitemap
 */
protected AbstractSiteMap processXml(URL sitemapUrl, byte[] xmlContent) throws UnknownFormatException {

    BOMInputStream bomIs = new BOMInputStream(new ByteArrayInputStream(xmlContent));
    InputSource is = new InputSource();
    is.setCharacterStream(new BufferedReader(new InputStreamReader(bomIs, UTF_8)));

    return processXml(sitemapUrl, is);
}

From source file:com.hangum.tadpole.importexport.core.dialogs.SQLToDBImportDialog.java

private void insert() throws IOException {
    int ret;//from   w  w  w .jav  a  2 s  . c o m
    BOMInputStream bomInputStream = null;

    File[] arryFiles = receiver.getTargetFiles();
    if (arryFiles.length == 0) {
        MessageDialog.openWarning(null, Messages.get().Warning, Messages.get().CsvToRDBImportDialog_21);
        return;
    }

    if (!MessageDialog.openConfirm(null, Messages.get().Confirm,
            Messages.get().SQLToDBImportDialog_UploadQuestion))
        return;
    bufferBatchResult = new StringBuffer();

    try {
        batchSize = Integer.valueOf(textBatchSize.getText());
    } catch (Exception e) {
        batchSize = 1000;
    }

    File userUploadFile = arryFiles[arryFiles.length - 1];
    try {
        // bom?  charset? ? ?.
        bomInputStream = new BOMInputStream(FileUtils.openInputStream(FileUtils.getFile(userUploadFile)));//`, false, ByteOrderMark.UTF_8, ByteOrderMark.UTF_16LE, ByteOrderMark.UTF_16BE, ByteOrderMark.UTF_32LE, ByteOrderMark.UTF_32BE);

        String charsetName = "utf-8"; //$NON-NLS-1$
        String strSQLData = ""; //$NON-NLS-1$
        if (bomInputStream.getBOM() == null) {
            strSQLData = FileUtils.readFileToString(userUploadFile, charsetName);
        } else {
            charsetName = bomInputStream.getBOMCharsetName();
            strSQLData = FileUtils.readFileToString(userUploadFile, charsetName).substring(1);
        }

        String[] strArrySQL = StringUtils.split(strSQLData, textSeprator.getText());
        ret = runSQLExecuteBatch(Arrays.asList(strArrySQL));

        if (ret == 0)
            MessageDialog.openInformation(null, Messages.get().Confirm,
                    Messages.get().SQLToDBImportDialog_StoreData); //$NON-NLS-1$
    } catch (IOException e) {
        logger.error(Messages.get().SQLToDBImportDialog_ReadError, e);
        MessageDialog.openError(null, Messages.get().Confirm,
                Messages.get().SQLToDBImportDialog_LoadException + e.getMessage());

    } catch (Exception e) {
        logger.error(Messages.get().SQLToDBImportDialog_ImportException, e);
        MessageDialog.openError(null, Messages.get().Confirm,
                Messages.get().SQLToDBImportDialog_LoadException + e.getMessage());
    } finally {
        if (bomInputStream != null)
            bomInputStream.close();
    }
}

From source file:com.andyasprou.webcrawler.Utilities.GenericSiteMapParser.java

/**
 * Decompress the gzipped content and process the resulting XML Sitemap.
 *
 * @param url//from   www . ja  v  a 2 s . c  om
 *            - URL of the gzipped content
 * @param response
 *            - Gzipped content
 * @return the site map
 * @throws UnknownFormatException if there is an error parsing the gzip
 * @throws IOException if there is an error reading in the gzip {@link java.net.URL}
 */
protected AbstractSiteMap processGzip(URL url, byte[] response) throws IOException, UnknownFormatException {

    AbstractSiteMap smi;
    InputStream is = new ByteArrayInputStream(response);

    // Remove .gz ending
    String xmlUrl = url.toString().replaceFirst("\\.gz$", "");

    BOMInputStream decompressed = new BOMInputStream(new GZIPInputStream(is));
    InputSource in = new InputSource(decompressed);
    in.setSystemId(xmlUrl);
    smi = processXml(url, in);
    decompressed.close();
    return smi;
}