List of usage examples for org.apache.commons.io.input BOMInputStream BOMInputStream
public BOMInputStream(InputStream delegate)
From source file:com.andyasprou.webcrawler.Utilities.GenericSiteMapParser.java
/** * Parse the given XML content./*from w ww. j a v a 2 s .c o m*/ * * @param sitemapUrl URL to sitemap file * @param xmlContent the byte[] backing the sitemapUrl * @return The site map * @throws UnknownFormatException if there is an error parsing the sitemap */ protected AbstractSiteMap processXml(URL sitemapUrl, byte[] xmlContent) throws UnknownFormatException { BOMInputStream bomIs = new BOMInputStream(new ByteArrayInputStream(xmlContent)); InputSource is = new InputSource(); try { is.setCharacterStream(new BufferedReader(new InputStreamReader(bomIs, "UTF-8"))); } catch (UnsupportedEncodingException e) { IOUtils.closeQuietly(bomIs); throw new RuntimeException("Impossible exception", e); } return processXml(sitemapUrl, is); }
From source file:com.itemanalysis.jmetrik.file.JmetrikFileImporter.java
private void convertFile() { CSVParser parser = null;// w ww .ja v a2s. com Reader reader = null; CSVPrinter printer = null; Writer writer = null; try { if (outputFile.exists()) { if (!overwrite) { theException = new IOException("File already exists and overwrite==false"); return; } } else { outputFile.createNewFile(); } //For debugging // System.out.println("CREATED: " + outputFile.getAbsolutePath()); //Writer header to file writer = new OutputStreamWriter(new FileOutputStream(outputFile)); printer = new CSVPrinter(writer, CSVFormat.DEFAULT.withCommentMarker('#')); printer.printComment("VERSION"); printer.printRecord(new String[] { "jmetrik1" }); printer.printComment("METADATA"); printer.printRecord(new String[] { Integer.valueOf(nrow).toString() }); printer.printComment("ATTRIBUTES"); for (VariableName v : variableAttributeMap.keySet()) { printer.printRecord(variableAttributeMap.get(v).getAttributeArray()); } printer.printComment("DATA"); //Write data to file reader = new InputStreamReader(new BOMInputStream(new FileInputStream(dataFile)), "UTF-8"); parser = new CSVParser(reader, dataFileFormat); if (hasHeader) { parser = new CSVParser(reader, dataFileFormat.withHeader(colNames).withSkipHeaderRecord(true)); } else { parser = new CSVParser(reader, dataFileFormat.withHeader(colNames)); } Iterator<CSVRecord> iter = parser.iterator(); CSVRecord csvRecord = null; VariableAttributes variableAttributes = null; DataType dataType = null; String temp = ""; while (iter.hasNext()) { csvRecord = iter.next(); for (VariableName v : variableAttributeMap.keySet()) { temp = csvRecord.get(v.toString()); variableAttributes = variableAttributeMap.get(v); dataType = variableAttributes.getDataType(); if (!variableAttributes.isMissing(temp)) { if (DataType.INTEGER == dataType) { printer.print(Double.valueOf(Double.parseDouble(temp)).intValue()); } else if (DataType.DOUBLE == dataType) { printer.print(Double.parseDouble(temp)); } else { printer.print(temp); } } else { printer.print(temp); } } printer.println(); } } catch (IOException ex) { theException = ex; } finally { try { if (parser != null) parser.close(); if (reader != null) reader.close(); if (printer != null) printer.close(); if (writer != null) writer.close(); } catch (IOException ex) { theException = ex; logger.fatal(ex); } } }
From source file:com.andyasprou.webcrawler.Utilities.GenericSiteMapParser.java
/** * Process a text-based Sitemap. Text sitemaps only list URLs but no * priorities, last mods, etc./* w w w. j a v a2 s .c om*/ * @param sitemapUrl a string sitemap URL * @param sitemapUrl URL to sitemap file * @param content the byte[] backing the sitemapUrl * @return The site map * @throws IOException if there is an error reading in the site map String */ protected SiteMap processText(String sitemapUrl, byte[] content) throws IOException { SiteMap textSiteMap = new SiteMap(sitemapUrl); BOMInputStream bomIs = new BOMInputStream(new ByteArrayInputStream(content)); @SuppressWarnings("resource") BufferedReader reader = new BufferedReader(new InputStreamReader(bomIs, "UTF-8")); String line; int i = 1; while ((line = reader.readLine()) != null) { if (line.length() > 0 && i <= MAX_URLS) { addUrlIntoSitemap(line, textSiteMap, null, null, null, i++); } } textSiteMap.setProcessed(true); return textSiteMap; }
From source file:apiconnector.TestDataFunctionality.java
@Ignore @Test//www . j a va 2 s. co m public void testGetDataAsCsv() throws Exception { //client_read.setVerboseLevel(1); Random random = new Random(); Map<String, String> filters = new TreeMap<String, String>(); filters.put("tag", "study_14"); DataSet[] all = client_read.dataList(filters).getData(); for (int i = 0; i < 5;) { DataSet current = all[random.nextInt(all.length)]; String numInst = current.getQualityMap().get("NumberOfInstances"); if (current.getFileId() == null || !current.getFormat().toLowerCase().equals("arff")) { continue; } String fullUrl = url + "data/get_csv/" + current.getFileId() + "/" + current.getName() + ".csv"; System.out.println(fullUrl); final URL url = new URL(fullUrl); final Reader reader = new InputStreamReader(new BOMInputStream(url.openStream()), "UTF-8"); final CSVParser parser = new CSVParser(reader, CSVFormat.DEFAULT); try { if (numInst != null) { int numberOfInstances = (int) Double.parseDouble(numInst); assertEquals(parser.getRecords().size(), numberOfInstances); } } finally { parser.close(); reader.close(); } // important i += 1; } }
From source file:de.uzk.hki.da.metadata.EadMetsMetadataStructure.java
private Document getMetsDocument(File metsFile) throws JDOMException, IOException { SAXBuilder builder = XMLUtils.createNonvalidatingSaxBuilder(); FileInputStream fileInputStream = new FileInputStream(metsFile); BOMInputStream bomInputStream = new BOMInputStream(fileInputStream); Reader reader = new InputStreamReader(bomInputStream, "UTF-8"); InputSource is = new InputSource(reader); is.setEncoding("UTF-8"); eadDoc = builder.build(is);/* w w w. j a va2 s. c o m*/ return eadDoc; }
From source file:crawlercommons.sitemaps.SiteMapParser.java
/** * Decompress the gzipped content and process the resulting XML Sitemap. * //from w ww. j ava2 s .co m * @param url * - URL of the gzipped content * @param response * - Gzipped content * @return the site map * @throws MalformedURLException * @throws IOException * @throws UnknownFormatException */ protected AbstractSiteMap processGzip(URL url, byte[] response) throws MalformedURLException, IOException, UnknownFormatException { LOG.debug("Processing gzip"); AbstractSiteMap smi; InputStream is = new ByteArrayInputStream(response); // Remove .gz ending String xmlUrl = url.toString().replaceFirst("\\.gz$", ""); LOG.debug("XML url = {}", xmlUrl); BOMInputStream decompressed = new BOMInputStream(new GZIPInputStream(is)); InputSource in = new InputSource(decompressed); in.setSystemId(xmlUrl); smi = processXml(url, in); decompressed.close(); return smi; }
From source file:com.hangum.tadpole.importdb.core.dialog.importdb.sql.SQLToDBImportDialog.java
private void insert() throws IOException { int ret;/*from w w w. ja va2 s . c om*/ BOMInputStream bomInputStream = null; File[] arryFiles = receiver.getTargetFiles(); if (arryFiles.length == 0) { MessageDialog.openError(null, Messages.CsvToRDBImportDialog_4, Messages.CsvToRDBImportDialog_21); return; } if (!MessageDialog.openConfirm(null, Messages.CsvToRDBImportDialog_4, Messages.SQLToDBImportDialog_UploadQuestion)) return; bufferBatchResult = new StringBuffer(); try { batchSize = Integer.valueOf(textBatchSize.getText()); } catch (Exception e) { batchSize = 1000; } File userUploadFile = arryFiles[arryFiles.length - 1]; try { // bom? charset? ? ?. bomInputStream = new BOMInputStream(FileUtils.openInputStream(FileUtils.getFile(userUploadFile)));//`, false, ByteOrderMark.UTF_8, ByteOrderMark.UTF_16LE, ByteOrderMark.UTF_16BE, ByteOrderMark.UTF_32LE, ByteOrderMark.UTF_32BE); String charsetName = "utf-8"; String strSQLData = ""; if (bomInputStream.getBOM() == null) { strSQLData = FileUtils.readFileToString(userUploadFile, charsetName); } else { charsetName = bomInputStream.getBOMCharsetName(); strSQLData = FileUtils.readFileToString(userUploadFile, charsetName).substring(1); } String[] strArrySQL = StringUtils.split(strSQLData, textSeprator.getText()); ret = runSQLExecuteBatch(Arrays.asList(strArrySQL)); if (ret == 0) MessageDialog.openInformation(null, "Confirm", Messages.SQLToDBImportDialog_StoreData); //$NON-NLS-1$ } catch (IOException e) { logger.error(Messages.SQLToDBImportDialog_ReadError, e); MessageDialog.openError(null, Messages.CsvToRDBImportDialog_4, Messages.SQLToDBImportDialog_LoadException + e.getMessage()); } catch (Exception e) { logger.error(Messages.SQLToDBImportDialog_ImportException, e); MessageDialog.openError(null, Messages.CsvToRDBImportDialog_4, Messages.SQLToDBImportDialog_LoadException + e.getMessage()); } finally { if (bomInputStream != null) bomInputStream.close(); } }
From source file:crawlercommons.sitemaps.SiteMapParserSAX.java
/** * Parse the given XML content.//from w w w . j a v a 2s .com * * @param sitemapUrl * URL to sitemap file * @param xmlContent * the byte[] backing the sitemapUrl * @return The site map * @throws UnknownFormatException * if there is an error parsing the sitemap */ protected AbstractSiteMap processXml(URL sitemapUrl, byte[] xmlContent) throws UnknownFormatException { BOMInputStream bomIs = new BOMInputStream(new ByteArrayInputStream(xmlContent)); InputSource is = new InputSource(); is.setCharacterStream(new BufferedReader(new InputStreamReader(bomIs, UTF_8))); return processXml(sitemapUrl, is); }
From source file:com.hangum.tadpole.importexport.core.dialogs.SQLToDBImportDialog.java
private void insert() throws IOException { int ret;//from w w w .jav a 2 s . c o m BOMInputStream bomInputStream = null; File[] arryFiles = receiver.getTargetFiles(); if (arryFiles.length == 0) { MessageDialog.openWarning(null, Messages.get().Warning, Messages.get().CsvToRDBImportDialog_21); return; } if (!MessageDialog.openConfirm(null, Messages.get().Confirm, Messages.get().SQLToDBImportDialog_UploadQuestion)) return; bufferBatchResult = new StringBuffer(); try { batchSize = Integer.valueOf(textBatchSize.getText()); } catch (Exception e) { batchSize = 1000; } File userUploadFile = arryFiles[arryFiles.length - 1]; try { // bom? charset? ? ?. bomInputStream = new BOMInputStream(FileUtils.openInputStream(FileUtils.getFile(userUploadFile)));//`, false, ByteOrderMark.UTF_8, ByteOrderMark.UTF_16LE, ByteOrderMark.UTF_16BE, ByteOrderMark.UTF_32LE, ByteOrderMark.UTF_32BE); String charsetName = "utf-8"; //$NON-NLS-1$ String strSQLData = ""; //$NON-NLS-1$ if (bomInputStream.getBOM() == null) { strSQLData = FileUtils.readFileToString(userUploadFile, charsetName); } else { charsetName = bomInputStream.getBOMCharsetName(); strSQLData = FileUtils.readFileToString(userUploadFile, charsetName).substring(1); } String[] strArrySQL = StringUtils.split(strSQLData, textSeprator.getText()); ret = runSQLExecuteBatch(Arrays.asList(strArrySQL)); if (ret == 0) MessageDialog.openInformation(null, Messages.get().Confirm, Messages.get().SQLToDBImportDialog_StoreData); //$NON-NLS-1$ } catch (IOException e) { logger.error(Messages.get().SQLToDBImportDialog_ReadError, e); MessageDialog.openError(null, Messages.get().Confirm, Messages.get().SQLToDBImportDialog_LoadException + e.getMessage()); } catch (Exception e) { logger.error(Messages.get().SQLToDBImportDialog_ImportException, e); MessageDialog.openError(null, Messages.get().Confirm, Messages.get().SQLToDBImportDialog_LoadException + e.getMessage()); } finally { if (bomInputStream != null) bomInputStream.close(); } }
From source file:com.andyasprou.webcrawler.Utilities.GenericSiteMapParser.java
/** * Decompress the gzipped content and process the resulting XML Sitemap. * * @param url//from www . ja v a 2 s . c om * - URL of the gzipped content * @param response * - Gzipped content * @return the site map * @throws UnknownFormatException if there is an error parsing the gzip * @throws IOException if there is an error reading in the gzip {@link java.net.URL} */ protected AbstractSiteMap processGzip(URL url, byte[] response) throws IOException, UnknownFormatException { AbstractSiteMap smi; InputStream is = new ByteArrayInputStream(response); // Remove .gz ending String xmlUrl = url.toString().replaceFirst("\\.gz$", ""); BOMInputStream decompressed = new BOMInputStream(new GZIPInputStream(is)); InputSource in = new InputSource(decompressed); in.setSystemId(xmlUrl); smi = processXml(url, in); decompressed.close(); return smi; }