Example usage for org.apache.commons.io.input BOMInputStream BOMInputStream

Introduction

In this page you can find the example usage for org.apache.commons.io.input BOMInputStream BOMInputStream.

Prototype

public BOMInputStream(InputStream delegate)

Source Link

Document

Constructs a new BOM InputStream that excludes a ByteOrderMark#UTF_8 BOM.

Usage

From source file:eu.apenet.dpt.utils.util.Ead2EdmInformation.java

private void determineDaoInformation(File fileToRead)
        throws IOException, SAXException, ParserConfigurationException {
    SAXParserFactory spf = SAXParserFactory.newInstance();
    SAXParser sp = spf.newSAXParser();
    XMLReader xr = sp.getXMLReader();
    EadContentHandler myContentHandler = new EadContentHandler();
    xr.setContentHandler(myContentHandler);
    xr.parse(new InputSource(new InputStreamReader(new BOMInputStream(new FileInputStream(fileToRead)))));

    if (this.roleType == null) {
        this.roleType = "UNSPECIFIED";
    }/*  w  w w .j av a2s. c  om*/
}

From source file:de.uzk.hki.da.metadata.MetsMetadataStructure.java

/**
 * Append to each dmdSec in a Mets-File one accessCondition-Element and save it.
 * /* w  ww  . java2s  .  c  om*/
 * @param targetMetsFile
 * @param licenseHref
 * @param displayLabel
 * @param text
 * @throws IOException
 * @throws JDOMException
 */
public void appendAccessCondition(File targetMetsFile, String licenseHref, String displayLabel, String text)
        throws IOException, JDOMException {
    SAXBuilder builder = XMLUtils.createNonvalidatingSaxBuilder();

    FileInputStream fileInputStream = new FileInputStream(Path.makeFile(workPath, targetMetsFile.getPath()));
    BOMInputStream bomInputStream = new BOMInputStream(fileInputStream);
    Reader reader = new InputStreamReader(bomInputStream, "UTF-8");
    InputSource is = new InputSource(reader);
    is.setEncoding("UTF-8");
    Document metsDoc = builder.build(is);

    List<Element> dmdSections = metsDoc.getRootElement().getChildren("dmdSec", C.METS_NS);

    for (int i = 0; i < dmdSections.size(); i++) {
        Element newAccessConditionE = generateAccessCondition(licenseHref, displayLabel, text);
        logger.debug("Append to Mets new LicenseElement: " + newAccessConditionE.toString());
        Element dmdSecElement = (Element) dmdSections.get(i);
        Element modsXmlData = MetsParser.getModsXmlData(dmdSecElement);
        modsXmlData.addContent(newAccessConditionE);
    }
    fileInputStream.close();
    bomInputStream.close();
    reader.close();

    writeDocumentToFile(metsDoc, Path.makeFile(workPath, targetMetsFile.getPath()));
}

From source file:com.itemanalysis.jmetrik.file.JmetrikFileReader.java

/**
 * Opens a connection by instantiating the reader and CSVParser. It extracts the
 * header into the VariableAttributeMap and moves the cursor to the first row of data.
 * /*from  w w w.ja va2 s .c  om*/
 * @throws IOException
 */
public void openConnection() throws IOException {
    //opens file and advances cursor to beginning of data
    dataReader = new InputStreamReader(new BOMInputStream(Files.newInputStream(f)), "UTF-8");
    dataParser = new CSVParser(dataReader, CSVFormat.DEFAULT.withCommentMarker('#'));
    dataIterator = dataParser.iterator();
    setVariableAttributes(dataIterator);

    //Get Column names
    String[] colNames = new String[variableAttributes.size()];
    int index = 0;
    for (VariableName v : variableAttributes.keySet()) {
        colNames[index] = v.toString();
        index++;
    }
    dataReader.close();
    dataParser.close();

    //Advance iterator to first data record (A little inefficient because loops over header a second time)
    //This inefficiency is because CSVReader only allows the header to be set in the constructor.
    dataReader = new InputStreamReader(new BOMInputStream(Files.newInputStream(f)), "UTF-8");
    dataParser = new CSVParser(dataReader, CSVFormat.DEFAULT.withCommentMarker('#').withHeader(colNames));
    dataIterator = dataParser.iterator();
    advanceToFirstDataRecord();
}

From source file:com.itemanalysis.jmetrik.file.JmetrikFileImporter.java

/**
 * Create a header map to the CSV file, but imposes naming conventions on the column names.
 *
 *///from   w  ww. j  a va2s  .  co  m
private void setVariableAttributes() {
    VariableAttributes variableAttributes = null;
    int position = 0;

    Reader reader = null;
    CSVParser parser = null;
    VariableName tempName = null;

    try {
        reader = new InputStreamReader(new BOMInputStream(new FileInputStream(dataFile)), "UTF-8");
        parser = new CSVParser(reader, dataFileFormat.withHeader());

        if (hasHeader) {
            Map<String, Integer> csvMap = parser.getHeaderMap();
            for (String s : csvMap.keySet()) {
                variableAttributes = new VariableAttributes(new VariableName(s), new VariableLabel(""),
                        DataType.INTEGER, position);
                variableAttributeMap.put(variableAttributes.getName(), variableAttributes);
                position++;
            }
        } else {
            Iterator<CSVRecord> iter = parser.iterator();
            CSVRecord csvRecord = iter.next();

            for (int i = 0; i < csvRecord.size(); i++) {
                variableAttributes = new VariableAttributes(new VariableName("v" + (i + 1)),
                        new VariableLabel(""), DataType.INTEGER, position);
                variableAttributeMap.put(variableAttributes.getName(), variableAttributes);
                position++;
            }
        }

    } catch (IOException ex) {
        theException = ex;
    } finally {
        try {
            if (parser != null)
                parser.close();
            if (reader != null)
                reader.close();
        } catch (IOException ex) {
            theException = ex;
        }
    }
}

From source file:com.github.anba.es6draft.chakra.ChakraTest.java

private static BufferedReader bomReader(InputStream is) throws IOException {
    BOMInputStream bis = new BOMInputStream(is);
    Charset cs = charsetFor(bis, StandardCharsets.UTF_8);
    return new BufferedReader(new InputStreamReader(bis, cs));
}

From source file:de.uzk.hki.da.cb.CreateDCAction.java

/**
 * @param packageType//from   ww w  . j a v  a2s  .  co  m
 * @param metadataFile
 */
private void copyDCdatastreamFromMetadata(String audience) {

    FileInputStream inputStream = null;
    BOMInputStream bomInputStream = null;
    FileOutputStream outputStream = null;

    String xsltFile = getDcMappings().get(o.getPackage_type());
    if (xsltFile == null) {
        throw new RuntimeException("No conversion available for package type '" + o.getPackage_type()
                + "'. DC can not be created.");
    }
    try {
        inputStream = new FileInputStream(wa.pipMetadataFile(audience, o.getPackage_type()));

        bomInputStream = new BOMInputStream(inputStream);
        XsltGenerator xsltGenerator = new XsltGenerator(xsltFile, bomInputStream);

        String result = xsltGenerator.generate();

        File file = wa.pipMetadataFile(audience, METADATA_STREAM_ID_DC);
        outputStream = new FileOutputStream(file);

        outputStream.write(result.getBytes(C.ENCODING_UTF_8.toLowerCase()));
        outputStream.flush();

    } catch (Exception e) {
        throw new RuntimeException("Unable to create DC file.", e);
    } finally {
        try {
            if (inputStream != null)
                inputStream.close();
            if (bomInputStream != null)
                bomInputStream.close();
            if (outputStream != null)
                outputStream.close();
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    }

}

From source file:gov.vha.isaac.rf2.filter.RF2Filter.java

private void handleFile(Path inputFile, Path outputFile) throws IOException {
    boolean justCopy = true;
    boolean justSkip = false;

    if (inputFile.toFile().getName().toLowerCase().endsWith(".txt")) {
        justCopy = false;/*w  w  w .  j a v  a 2  s .c o m*/
        //Filter the file
        BufferedReader fileReader = new BufferedReader(
                new InputStreamReader(new BOMInputStream(new FileInputStream(inputFile.toFile())), "UTF-8"));
        BufferedWriter fileWriter = new BufferedWriter(
                new OutputStreamWriter(new FileOutputStream(outputFile.toFile()), "UTF-8"));
        PipedOutputStream pos = new PipedOutputStream();
        PipedInputStream pis = new PipedInputStream(pos);
        CSVReader csvReader = new CSVReader(new InputStreamReader(pis), '\t', CSVParser.NULL_CHARACTER); //don't look for quotes, the data is bad, and has floating instances of '"' all by itself

        boolean firstLine = true;
        String line = null;
        long kept = 0;
        long skipped = 0;
        long total = 0;

        int moduleColumn = -1;

        while ((line = fileReader.readLine()) != null) {
            total++;
            //Write this line into the CSV parser
            pos.write(line.getBytes());
            pos.write("\r\n".getBytes());
            pos.flush();
            String[] fields = csvReader.readNext();

            boolean correctModule = false;
            for (String ms : moduleStrings_) {
                if (moduleColumn >= 0 && fields[moduleColumn].equals(ms)) {
                    correctModule = true;
                    break;
                }
            }

            if (firstLine || correctModule) {
                kept++;
                fileWriter.write(line);
                fileWriter.write("\r\n");
            } else {
                //don't write line
                skipped++;
            }

            if (firstLine) {

                log("Filtering file " + inputDirectory.toPath().relativize(inputFile).toString());
                firstLine = false;
                if (fields.length < 2) {
                    log("txt file doesn't look like a data file - abort and just copy.");
                    justCopy = true;
                    break;
                }
                for (int i = 0; i < fields.length; i++) {
                    if (fields[i].equals("moduleId")) {
                        moduleColumn = i;
                        break;
                    }
                }
                if (moduleColumn < 0) {
                    log("No moduleId column found - skipping file");
                    justSkip = true;
                    break;
                }
            }
        }

        fileReader.close();
        csvReader.close();
        fileWriter.close();

        if (!justCopy) {
            log("Kept " + kept + " Skipped " + skipped + " out of " + total + " lines in "
                    + inputDirectory.toPath().relativize(inputFile).toString());
        }
    }

    if (justCopy) {
        //Just copy the file
        Files.copy(inputFile, outputFile, StandardCopyOption.REPLACE_EXISTING);
        log("Copied file " + inputDirectory.toPath().relativize(inputFile).toString());
    }

    if (justSkip) {
        Files.delete(outputFile);
        log("Skipped file " + inputDirectory.toPath().relativize(inputFile).toString()
                + " because it doesn't contain a moduleId");
    }
}

From source file:com.medvision360.medrecord.spi.tck.RMTestBase.java

protected Archetype loadArchetype(String archetypeId) throws Exception {
    InputStream is = getClass().getResourceAsStream("/" + archetypeId);
    final ADLParser parser = new ADLParser(new BOMInputStream(is), false, false);

    final Archetype archetype = parser.parse();
    return archetype;
}

From source file:de.iteratec.iteraplan.businesslogic.exchange.nettoExport.NettoCSVTransformerTest.java

@Test
public void testCSVTransformerForOverviewPage() {

    NettoTransformer csvTransformer = NettoCSVTransformer.newInstance(createSimpleOverviewPageTableStructure());

    assertNotNull("Can't create netto transformer for overview page table structure for csv", csvTransformer);

    //Create output stream
    ByteArrayOutputStream out = new ByteArrayOutputStream();
    csvTransformer.transform(sourceList, out, TypeOfBuildingBlock.INFORMATIONSYSTEMRELEASE);

    //assert that output stream
    InputStream in = new ByteArrayInputStream(out.toByteArray());
    CSVReader reader;//w  w w .java2  s .c  o m
    try {

        //BOMInputStream is necessary, because of leading BOM in Stream
        //Otherwise assertion would fail
        BOMInputStream bOMInputStream = new BOMInputStream(in);
        ByteOrderMark bom = bOMInputStream.getBOM();
        String charsetName = bom == null ? "UTF-8" : bom.getCharsetName();

        reader = new CSVReader(new InputStreamReader(new BufferedInputStream(bOMInputStream), charsetName),
                ';');

        List<String[]> allLines = reader.readAll();

        int index = 0;
        SimpleDateFormat df = new SimpleDateFormat("EEE MMM dd HH:mm:ss z yyyy", Locale.US);

        for (String[] line : allLines) {

            String name = line[0];
            String desc = line[1];
            String start = line[2];
            String end = line[3];
            String status = line[4];

            if (index == 0) {
                //assert the headers of CSV file
                assertEquals("Name und Version", name.trim());
                assertEquals("Beschreibung", desc);
                assertEquals("von", start);
                assertEquals("bis", end);
                assertEquals("Status", status);
            } else {
                assertEquals(isArray[index - 1].getName(), name);
                assertEquals(isArray[index - 1].getDescription(), desc);
                assertEquals(df.format(isArray[index - 1].getRuntimePeriod().getStart()), start);
                assertEquals(df.format(isArray[index - 1].getRuntimePeriod().getEnd()), end);
                assertEquals(isArray[index - 1].getTypeOfStatus().toString(), status);
            }

            index++;

        }

        reader.close();

    } catch (IOException e) {
        e.printStackTrace();
        fail("Fail due to IO Exception");
    }

}

From source file:crawlercommons.sitemaps.SiteMapParser.java

/**
 * Process a text-based Sitemap. Text sitemaps only list URLs but no
 * priorities, last mods, etc./*from   w  w  w. j  a v a 2  s  .  c  o m*/
 * @param sitemapUrl 
 * 
 * @param content
 * @return The site map
 * @throws IOException
 */
protected SiteMap processText(String sitemapUrl, byte[] content) throws IOException {
    LOG.debug("Processing textual Sitemap");

    SiteMap textSiteMap = new SiteMap(sitemapUrl);
    textSiteMap.setType(SitemapType.TEXT);

    BOMInputStream bomIs = new BOMInputStream(new ByteArrayInputStream(content));
    @SuppressWarnings("resource")
    BufferedReader reader = new BufferedReader(new InputStreamReader(bomIs, "UTF-8"));

    String line;
    int i = 1;
    while ((line = reader.readLine()) != null) {
        if (line.length() > 0 && i <= MAX_URLS) {
            addUrlIntoSitemap(line, textSiteMap, null, null, null, i++);
        }
    }
    textSiteMap.setProcessed(true);

    return textSiteMap;
}