List of usage examples for org.apache.commons.io.input BOMInputStream BOMInputStream
public BOMInputStream(InputStream delegate)
From source file:eu.apenet.dpt.utils.util.Ead2EdmInformation.java
private void determineDaoInformation(File fileToRead) throws IOException, SAXException, ParserConfigurationException { SAXParserFactory spf = SAXParserFactory.newInstance(); SAXParser sp = spf.newSAXParser(); XMLReader xr = sp.getXMLReader(); EadContentHandler myContentHandler = new EadContentHandler(); xr.setContentHandler(myContentHandler); xr.parse(new InputSource(new InputStreamReader(new BOMInputStream(new FileInputStream(fileToRead))))); if (this.roleType == null) { this.roleType = "UNSPECIFIED"; }/* w w w .j av a2s. c om*/ }
From source file:de.uzk.hki.da.metadata.MetsMetadataStructure.java
/** * Append to each dmdSec in a Mets-File one accessCondition-Element and save it. * /* w ww . java2s . c om*/ * @param targetMetsFile * @param licenseHref * @param displayLabel * @param text * @throws IOException * @throws JDOMException */ public void appendAccessCondition(File targetMetsFile, String licenseHref, String displayLabel, String text) throws IOException, JDOMException { SAXBuilder builder = XMLUtils.createNonvalidatingSaxBuilder(); FileInputStream fileInputStream = new FileInputStream(Path.makeFile(workPath, targetMetsFile.getPath())); BOMInputStream bomInputStream = new BOMInputStream(fileInputStream); Reader reader = new InputStreamReader(bomInputStream, "UTF-8"); InputSource is = new InputSource(reader); is.setEncoding("UTF-8"); Document metsDoc = builder.build(is); List<Element> dmdSections = metsDoc.getRootElement().getChildren("dmdSec", C.METS_NS); for (int i = 0; i < dmdSections.size(); i++) { Element newAccessConditionE = generateAccessCondition(licenseHref, displayLabel, text); logger.debug("Append to Mets new LicenseElement: " + newAccessConditionE.toString()); Element dmdSecElement = (Element) dmdSections.get(i); Element modsXmlData = MetsParser.getModsXmlData(dmdSecElement); modsXmlData.addContent(newAccessConditionE); } fileInputStream.close(); bomInputStream.close(); reader.close(); writeDocumentToFile(metsDoc, Path.makeFile(workPath, targetMetsFile.getPath())); }
From source file:com.itemanalysis.jmetrik.file.JmetrikFileReader.java
/** * Opens a connection by instantiating the reader and CSVParser. It extracts the * header into the VariableAttributeMap and moves the cursor to the first row of data. * /*from w w w.ja va2 s .c om*/ * @throws IOException */ public void openConnection() throws IOException { //opens file and advances cursor to beginning of data dataReader = new InputStreamReader(new BOMInputStream(Files.newInputStream(f)), "UTF-8"); dataParser = new CSVParser(dataReader, CSVFormat.DEFAULT.withCommentMarker('#')); dataIterator = dataParser.iterator(); setVariableAttributes(dataIterator); //Get Column names String[] colNames = new String[variableAttributes.size()]; int index = 0; for (VariableName v : variableAttributes.keySet()) { colNames[index] = v.toString(); index++; } dataReader.close(); dataParser.close(); //Advance iterator to first data record (A little inefficient because loops over header a second time) //This inefficiency is because CSVReader only allows the header to be set in the constructor. dataReader = new InputStreamReader(new BOMInputStream(Files.newInputStream(f)), "UTF-8"); dataParser = new CSVParser(dataReader, CSVFormat.DEFAULT.withCommentMarker('#').withHeader(colNames)); dataIterator = dataParser.iterator(); advanceToFirstDataRecord(); }
From source file:com.itemanalysis.jmetrik.file.JmetrikFileImporter.java
/** * Create a header map to the CSV file, but imposes naming conventions on the column names. * *///from w ww. j a va2s . co m private void setVariableAttributes() { VariableAttributes variableAttributes = null; int position = 0; Reader reader = null; CSVParser parser = null; VariableName tempName = null; try { reader = new InputStreamReader(new BOMInputStream(new FileInputStream(dataFile)), "UTF-8"); parser = new CSVParser(reader, dataFileFormat.withHeader()); if (hasHeader) { Map<String, Integer> csvMap = parser.getHeaderMap(); for (String s : csvMap.keySet()) { variableAttributes = new VariableAttributes(new VariableName(s), new VariableLabel(""), DataType.INTEGER, position); variableAttributeMap.put(variableAttributes.getName(), variableAttributes); position++; } } else { Iterator<CSVRecord> iter = parser.iterator(); CSVRecord csvRecord = iter.next(); for (int i = 0; i < csvRecord.size(); i++) { variableAttributes = new VariableAttributes(new VariableName("v" + (i + 1)), new VariableLabel(""), DataType.INTEGER, position); variableAttributeMap.put(variableAttributes.getName(), variableAttributes); position++; } } } catch (IOException ex) { theException = ex; } finally { try { if (parser != null) parser.close(); if (reader != null) reader.close(); } catch (IOException ex) { theException = ex; } } }
From source file:com.github.anba.es6draft.chakra.ChakraTest.java
private static BufferedReader bomReader(InputStream is) throws IOException { BOMInputStream bis = new BOMInputStream(is); Charset cs = charsetFor(bis, StandardCharsets.UTF_8); return new BufferedReader(new InputStreamReader(bis, cs)); }
From source file:de.uzk.hki.da.cb.CreateDCAction.java
/** * @param packageType//from ww w . j a v a2s . co m * @param metadataFile */ private void copyDCdatastreamFromMetadata(String audience) { FileInputStream inputStream = null; BOMInputStream bomInputStream = null; FileOutputStream outputStream = null; String xsltFile = getDcMappings().get(o.getPackage_type()); if (xsltFile == null) { throw new RuntimeException("No conversion available for package type '" + o.getPackage_type() + "'. DC can not be created."); } try { inputStream = new FileInputStream(wa.pipMetadataFile(audience, o.getPackage_type())); bomInputStream = new BOMInputStream(inputStream); XsltGenerator xsltGenerator = new XsltGenerator(xsltFile, bomInputStream); String result = xsltGenerator.generate(); File file = wa.pipMetadataFile(audience, METADATA_STREAM_ID_DC); outputStream = new FileOutputStream(file); outputStream.write(result.getBytes(C.ENCODING_UTF_8.toLowerCase())); outputStream.flush(); } catch (Exception e) { throw new RuntimeException("Unable to create DC file.", e); } finally { try { if (inputStream != null) inputStream.close(); if (bomInputStream != null) bomInputStream.close(); if (outputStream != null) outputStream.close(); } catch (IOException e) { throw new RuntimeException(e); } } }
From source file:gov.vha.isaac.rf2.filter.RF2Filter.java
private void handleFile(Path inputFile, Path outputFile) throws IOException { boolean justCopy = true; boolean justSkip = false; if (inputFile.toFile().getName().toLowerCase().endsWith(".txt")) { justCopy = false;/*w w w . j a v a 2 s .c o m*/ //Filter the file BufferedReader fileReader = new BufferedReader( new InputStreamReader(new BOMInputStream(new FileInputStream(inputFile.toFile())), "UTF-8")); BufferedWriter fileWriter = new BufferedWriter( new OutputStreamWriter(new FileOutputStream(outputFile.toFile()), "UTF-8")); PipedOutputStream pos = new PipedOutputStream(); PipedInputStream pis = new PipedInputStream(pos); CSVReader csvReader = new CSVReader(new InputStreamReader(pis), '\t', CSVParser.NULL_CHARACTER); //don't look for quotes, the data is bad, and has floating instances of '"' all by itself boolean firstLine = true; String line = null; long kept = 0; long skipped = 0; long total = 0; int moduleColumn = -1; while ((line = fileReader.readLine()) != null) { total++; //Write this line into the CSV parser pos.write(line.getBytes()); pos.write("\r\n".getBytes()); pos.flush(); String[] fields = csvReader.readNext(); boolean correctModule = false; for (String ms : moduleStrings_) { if (moduleColumn >= 0 && fields[moduleColumn].equals(ms)) { correctModule = true; break; } } if (firstLine || correctModule) { kept++; fileWriter.write(line); fileWriter.write("\r\n"); } else { //don't write line skipped++; } if (firstLine) { log("Filtering file " + inputDirectory.toPath().relativize(inputFile).toString()); firstLine = false; if (fields.length < 2) { log("txt file doesn't look like a data file - abort and just copy."); justCopy = true; break; } for (int i = 0; i < fields.length; i++) { if (fields[i].equals("moduleId")) { moduleColumn = i; break; } } if (moduleColumn < 0) { log("No moduleId column found - skipping file"); justSkip = true; break; } } } fileReader.close(); csvReader.close(); fileWriter.close(); if (!justCopy) { log("Kept " + kept + " Skipped " + skipped + " out of " + total + " lines in " + inputDirectory.toPath().relativize(inputFile).toString()); } } if (justCopy) { //Just copy the file Files.copy(inputFile, outputFile, StandardCopyOption.REPLACE_EXISTING); log("Copied file " + inputDirectory.toPath().relativize(inputFile).toString()); } if (justSkip) { Files.delete(outputFile); log("Skipped file " + inputDirectory.toPath().relativize(inputFile).toString() + " because it doesn't contain a moduleId"); } }
From source file:com.medvision360.medrecord.spi.tck.RMTestBase.java
protected Archetype loadArchetype(String archetypeId) throws Exception { InputStream is = getClass().getResourceAsStream("/" + archetypeId); final ADLParser parser = new ADLParser(new BOMInputStream(is), false, false); final Archetype archetype = parser.parse(); return archetype; }
From source file:de.iteratec.iteraplan.businesslogic.exchange.nettoExport.NettoCSVTransformerTest.java
@Test public void testCSVTransformerForOverviewPage() { NettoTransformer csvTransformer = NettoCSVTransformer.newInstance(createSimpleOverviewPageTableStructure()); assertNotNull("Can't create netto transformer for overview page table structure for csv", csvTransformer); //Create output stream ByteArrayOutputStream out = new ByteArrayOutputStream(); csvTransformer.transform(sourceList, out, TypeOfBuildingBlock.INFORMATIONSYSTEMRELEASE); //assert that output stream InputStream in = new ByteArrayInputStream(out.toByteArray()); CSVReader reader;//w w w .java2 s .c o m try { //BOMInputStream is necessary, because of leading BOM in Stream //Otherwise assertion would fail BOMInputStream bOMInputStream = new BOMInputStream(in); ByteOrderMark bom = bOMInputStream.getBOM(); String charsetName = bom == null ? "UTF-8" : bom.getCharsetName(); reader = new CSVReader(new InputStreamReader(new BufferedInputStream(bOMInputStream), charsetName), ';'); List<String[]> allLines = reader.readAll(); int index = 0; SimpleDateFormat df = new SimpleDateFormat("EEE MMM dd HH:mm:ss z yyyy", Locale.US); for (String[] line : allLines) { String name = line[0]; String desc = line[1]; String start = line[2]; String end = line[3]; String status = line[4]; if (index == 0) { //assert the headers of CSV file assertEquals("Name und Version", name.trim()); assertEquals("Beschreibung", desc); assertEquals("von", start); assertEquals("bis", end); assertEquals("Status", status); } else { assertEquals(isArray[index - 1].getName(), name); assertEquals(isArray[index - 1].getDescription(), desc); assertEquals(df.format(isArray[index - 1].getRuntimePeriod().getStart()), start); assertEquals(df.format(isArray[index - 1].getRuntimePeriod().getEnd()), end); assertEquals(isArray[index - 1].getTypeOfStatus().toString(), status); } index++; } reader.close(); } catch (IOException e) { e.printStackTrace(); fail("Fail due to IO Exception"); } }
From source file:crawlercommons.sitemaps.SiteMapParser.java
/** * Process a text-based Sitemap. Text sitemaps only list URLs but no * priorities, last mods, etc./*from w w w. j a v a 2 s . c o m*/ * @param sitemapUrl * * @param content * @return The site map * @throws IOException */ protected SiteMap processText(String sitemapUrl, byte[] content) throws IOException { LOG.debug("Processing textual Sitemap"); SiteMap textSiteMap = new SiteMap(sitemapUrl); textSiteMap.setType(SitemapType.TEXT); BOMInputStream bomIs = new BOMInputStream(new ByteArrayInputStream(content)); @SuppressWarnings("resource") BufferedReader reader = new BufferedReader(new InputStreamReader(bomIs, "UTF-8")); String line; int i = 1; while ((line = reader.readLine()) != null) { if (line.length() > 0 && i <= MAX_URLS) { addUrlIntoSitemap(line, textSiteMap, null, null, null, i++); } } textSiteMap.setProcessed(true); return textSiteMap; }