Example usage for org.apache.commons.io.input BOMInputStream BOMInputStream

List of usage examples for org.apache.commons.io.input BOMInputStream BOMInputStream

Introduction

In this page you can find the example usage for org.apache.commons.io.input BOMInputStream BOMInputStream.

Prototype

public BOMInputStream(InputStream delegate) 

Source Link

Document

Constructs a new BOM InputStream that excludes a ByteOrderMark#UTF_8 BOM.

Usage

From source file:org.apache.flex.compiler.internal.embedding.transcoders.XMLTranscoder.java

private String getXMLString(Collection<ICompilerProblem> problems) {
    InputStream strm = getDataStream(problems);
    if (strm == null)
        return "";

    Reader reader = null;/*from  www . j a  v  a2  s.c  o  m*/
    BOMInputStream bomStream = null;
    StringBuilder str = new StringBuilder();
    try {
        bomStream = new BOMInputStream(strm);
        String bomCharsetName = bomStream.getBOMCharsetName();
        if (bomCharsetName == null) {
            if (encoding == null || encoding.length() == 0) {
                bomCharsetName = System.getProperty("file.encoding");
            } else {
                bomCharsetName = encoding;
            }
        }

        reader = new InputStreamReader(bomStream, bomCharsetName);
        char[] line = new char[2048];
        int count = 0;
        while ((count = reader.read(line, 0, line.length)) >= 0) {
            str.append(line, 0, count);
        }
    } catch (IOException e) {
        problems.add(new EmbedSourceAttributeCouldNotBeReadProblem(source));
    } finally {
        if (bomStream != null) {
            try {
                bomStream.close();
            } catch (IOException e) {
            }
        }

        if (reader != null) {
            try {
                reader.close();
            } catch (IOException e) {
            }
        }
    }

    return str.toString();
}

From source file:org.apache.nifi.csv.CSVHeaderSchemaStrategy.java

@Override
public RecordSchema getSchema(Map<String, String> variables, final InputStream contentStream,
        final RecordSchema readSchema) throws SchemaNotFoundException {
    if (this.context == null) {
        throw new SchemaNotFoundException(
                "Schema Access Strategy intended only for validation purposes and cannot obtain schema");
    }/* w  ww  . j a  va 2s  . com*/

    try {
        final CSVFormat csvFormat = CSVUtils.createCSVFormat(context).withFirstRecordAsHeader();
        try (final Reader reader = new InputStreamReader(new BOMInputStream(contentStream));
                final CSVParser csvParser = new CSVParser(reader, csvFormat)) {

            final List<RecordField> fields = new ArrayList<>();
            for (final String columnName : csvParser.getHeaderMap().keySet()) {
                fields.add(new RecordField(columnName, RecordFieldType.STRING.getDataType(), true));
            }

            return new SimpleRecordSchema(fields);
        }
    } catch (final Exception e) {
        throw new SchemaNotFoundException("Failed to read Header line from CSV", e);
    }
}

From source file:org.apache.nifi.csv.CSVRecordReader.java

public CSVRecordReader(final InputStream in, final ComponentLog logger, final RecordSchema schema,
        final CSVFormat csvFormat, final boolean hasHeader, final boolean ignoreHeader, final String dateFormat,
        final String timeFormat, final String timestampFormat, final String encoding) throws IOException {

    this.schema = schema;
    final DateFormat df = dateFormat == null ? null : DataTypeUtils.getDateFormat(dateFormat);
    final DateFormat tf = timeFormat == null ? null : DataTypeUtils.getDateFormat(timeFormat);
    final DateFormat tsf = timestampFormat == null ? null : DataTypeUtils.getDateFormat(timestampFormat);

    LAZY_DATE_FORMAT = () -> df;/*from w  w w. ja  va 2  s.  c  o m*/
    LAZY_TIME_FORMAT = () -> tf;
    LAZY_TIMESTAMP_FORMAT = () -> tsf;

    final Reader reader = new InputStreamReader(new BOMInputStream(in), encoding);

    CSVFormat withHeader;
    if (hasHeader) {
        withHeader = csvFormat.withSkipHeaderRecord();

        if (ignoreHeader) {
            withHeader = withHeader.withHeader(schema.getFieldNames().toArray(new String[0]));
        }
    } else {
        withHeader = csvFormat.withHeader(schema.getFieldNames().toArray(new String[0]));
    }

    csvParser = new CSVParser(reader, withHeader);
}

From source file:org.apache.nifi.csv.CSVRecordSource.java

public CSVRecordSource(final InputStream in, final PropertyContext context) throws IOException {
    final String charset = context.getProperty(CSVUtils.CHARSET).getValue();

    final Reader reader;
    try {//from  w w w . j a  v a2  s.c  o  m
        reader = new InputStreamReader(new BOMInputStream(in), charset);
    } catch (UnsupportedEncodingException e) {
        throw new ProcessException(e);
    }

    final CSVFormat csvFormat = CSVUtils.createCSVFormat(context).withFirstRecordAsHeader().withTrim();
    final CSVParser csvParser = new CSVParser(reader, csvFormat);
    fieldNames = Collections.unmodifiableList(new ArrayList<>(csvParser.getHeaderMap().keySet()));

    csvRecordIterator = csvParser.iterator();
}

From source file:org.apache.nifi.csv.JacksonCSVRecordReader.java

public JacksonCSVRecordReader(final InputStream in, final ComponentLog logger, final RecordSchema schema,
        final CSVFormat csvFormat, final boolean hasHeader, final boolean ignoreHeader, final String dateFormat,
        final String timeFormat, final String timestampFormat, final String encoding) throws IOException {
    super(logger, schema, hasHeader, ignoreHeader, dateFormat, timeFormat, timestampFormat);

    final Reader reader = new InputStreamReader(new BOMInputStream(in));

    CsvSchema.Builder csvSchemaBuilder = CsvSchema.builder().setColumnSeparator(csvFormat.getDelimiter())
            .setLineSeparator(csvFormat.getRecordSeparator())
            // Can only use comments in Jackson CSV if the correct marker is set
            .setAllowComments("#".equals(CharUtils.toString(csvFormat.getCommentMarker())))
            // The call to setUseHeader(false) in all code paths is due to the way Jackson does data binding/mapping. Missing or extra columns may not
            // be handled correctly when using the header for mapping.
            .setUseHeader(false);//from   w w  w  .  j  ava 2  s  .  c  om

    csvSchemaBuilder = (csvFormat.getQuoteCharacter() == null) ? csvSchemaBuilder
            : csvSchemaBuilder.setQuoteChar(csvFormat.getQuoteCharacter());
    csvSchemaBuilder = (csvFormat.getEscapeCharacter() == null) ? csvSchemaBuilder
            : csvSchemaBuilder.setEscapeChar(csvFormat.getEscapeCharacter());

    if (hasHeader) {
        if (ignoreHeader) {
            csvSchemaBuilder = csvSchemaBuilder.setSkipFirstDataRow(true);
        }
    }

    CsvSchema csvSchema = csvSchemaBuilder.build();

    // Add remaining config options to the mapper
    List<CsvParser.Feature> features = new ArrayList<>();
    features.add(CsvParser.Feature.INSERT_NULLS_FOR_MISSING_COLUMNS);
    if (csvFormat.getIgnoreEmptyLines()) {
        features.add(CsvParser.Feature.SKIP_EMPTY_LINES);
    }
    if (csvFormat.getTrim()) {
        features.add(CsvParser.Feature.TRIM_SPACES);
    }

    ObjectReader objReader = mapper.readerFor(String[].class).with(csvSchema)
            .withFeatures(features.toArray(new CsvParser.Feature[features.size()]));

    recordStream = objReader.readValues(reader);
}

From source file:org.cast.cwm.service.UserSpreadsheetReader.java

/**
 * Read spreadsheet of user information and generate potential users.
 * Returns true if all was sucessful and users could be created as specified.
 * //www . jav a 2s.  co  m
 * This method does NOT modify the datastore.
 * 
 * @param stream the input stream of CSV data
 * @return true if no errors encountered.
 */
@Override
public boolean readInput(InputStream stream) {
    potentialUsers = new ArrayList<PotentialUserSave>();
    potentialSites = new HashMap<String, Site>();
    potentialPeriods = new HashMap<Site, Map<String, Period>>();

    CSVParser parser;
    try {
        parser = CSVFormat.EXCEL.withHeader().withIgnoreEmptyLines().withIgnoreSurroundingSpaces()
                .parse(new InputStreamReader(new BOMInputStream(stream), "UTF-8"));
    } catch (IOException e) {
        log.error(e.getMessage());
        globalError = e.getMessage();
        return false;
    }

    // Make our own secondary mapping of header names to fields, by
    // lowercasing and removing spaces from all header names
    headerMap = parser.getHeaderMap();
    for (String hdr : new HashSet<String>(headerMap.keySet())) {
        String normalized = hdr.toLowerCase().replaceAll("\\s", "");
        if (!normalized.equals(hdr)) {
            headerMap.put(normalized, headerMap.get(hdr));
        }
    }

    globalError = checkRequiredHeaders(headerMap);
    if (!Strings.isEmpty(globalError))
        return false;

    // Read the CSV file, create PotentialUserSave objects, record error messages, add to potentialUsers List
    try {
        boolean errors = false; // have errors been encountered?
        for (CSVRecord record : parser) {

            try {
                User user = createUserObject(record);
                String messages = populateUserObject(user, record);
                if (Strings.isEmpty(messages))
                    messages = validateUser(user);

                // Add a PotentialUserSave to the list.
                potentialUsers.add(new PotentialUserSave(modelProvider.modelOf(user), messages, record));
                if (!Strings.isEmpty(messages))
                    errors = true;

            } catch (ArrayIndexOutOfBoundsException e) {
                // This can happen if the last row is missing values; Excel doesn't fill them out to the last column
                log.error("Caught exception importing line {}: {}", parser.getCurrentLineNumber(),
                        e.getClass());
                potentialUsers.add(new PotentialUserSave(null, "Data missing from CSV.\n", record));
                errors = true;
            } catch (Exception e) {
                e.printStackTrace();
                log.error("Caught exception importing line {}: {}", parser.getCurrentLineNumber(),
                        e.getClass());
                potentialUsers.add(new PotentialUserSave(null, "Error: " + e, record));
                errors = true;
            }
        }

        // If CSV file has only one line, it is either empty or has unrecognized LF/CR values.
        if (parser.getCurrentLineNumber() == 1) {
            potentialUsers.add(
                    new PotentialUserSave(null, "Empty or Corrupted File.  Note: Save as Windows CSV.", null));
            globalError = "Empty or Corrupted File - LF/CR values may be invalid!";
            throw new CharacterCodingException();
        }
        return (!errors);

    } catch (CharacterCodingException e) {
        log.error("Empty or Corrupted File - only 1 line found - CR/LF issue?. {}", e.getClass());
        return false;
    }

}

From source file:org.codelibs.fess.crawler.helper.RobotsTxtHelper.java

public RobotsTxt parse(final InputStream stream, final String charsetName) {
    if (!enabled) {
        return null;
    }//  ww w  .j a v a2  s .c  om

    try {
        @SuppressWarnings("resource")
        final BufferedReader reader = new BufferedReader(
                new InputStreamReader(new BOMInputStream(stream), charsetName));

        String line;
        final RobotsTxt robotsTxt = new RobotsTxt();
        final List<Directive> currentDirectiveList = new ArrayList<Directive>();
        boolean isGroupRecodeStarted = false;
        while ((line = reader.readLine()) != null) {
            line = stripComment(line).trim();
            if (StringUtil.isEmpty(line)) {
                continue;
            }

            String value;
            if ((value = getValue(USER_AGENT_RECORD, line)) != null) {
                if (isGroupRecodeStarted) {
                    currentDirectiveList.clear();
                    isGroupRecodeStarted = false;
                }
                final String userAgent = value.toLowerCase(Locale.ENGLISH);
                Directive currentDirective = robotsTxt.getDirective(userAgent);
                if (currentDirective == null) {
                    currentDirective = new Directive(userAgent);
                    robotsTxt.addDirective(currentDirective);
                    currentDirectiveList.add(currentDirective);
                }
            } else {
                isGroupRecodeStarted = true;
                if ((value = getValue(DISALLOW_RECORD, line)) != null) {
                    if (!currentDirectiveList.isEmpty() && value.length() > 0) {
                        for (final Directive directive : currentDirectiveList) {
                            directive.addDisallow(value);
                        }
                    }
                } else if ((value = getValue(ALLOW_RECORD, line)) != null) {
                    if (!currentDirectiveList.isEmpty() && value.length() > 0) {
                        for (final Directive directive : currentDirectiveList) {
                            directive.addAllow(value);
                        }
                    }
                } else if ((value = getValue(CRAWL_DELAY_RECORD, line)) != null) {
                    if (!currentDirectiveList.isEmpty()) {
                        try {
                            final int crawlDelay = Integer.parseInt(value);
                            for (final Directive directive : currentDirectiveList) {
                                directive.setCrawlDelay(Math.max(0, crawlDelay));
                            }
                        } catch (final NumberFormatException e) {
                            // ignore
                        }
                    }
                } else if ((value = getValue(SITEMAP_RECORD, line)) != null) {
                    if (value.length() > 0) {
                        robotsTxt.addSitemap(value);
                    }
                }
            }
        }

        return robotsTxt;
    } catch (final Exception e) {
        throw new RobotsTxtException("Failed to parse robots.txt.", e);
    }
}

From source file:org.codelibs.robot.helper.RobotsTxtHelper.java

public RobotsTxt parse(final InputStream stream, final String charsetName) {
    if (!enabled) {
        return null;
    }/*from   w  w  w .j a  v  a2s  .c  o  m*/

    try {
        @SuppressWarnings("resource")
        final BufferedReader reader = new BufferedReader(
                new InputStreamReader(new BOMInputStream(stream), charsetName));

        String line;
        final RobotsTxt robotsTxt = new RobotsTxt();
        final List<Directive> currentDirectiveList = new ArrayList<Directive>();
        boolean isGroupRecodeStarted = false;
        while ((line = reader.readLine()) != null) {
            line = stripComment(line).trim();
            if (StringUtil.isEmpty(line)) {
                continue;
            }

            String value;
            if ((value = getValue(USER_AGENT_RECORD, line)) != null) {
                if (isGroupRecodeStarted) {
                    currentDirectiveList.clear();
                    isGroupRecodeStarted = false;
                }
                final String userAgent = value.toLowerCase(Locale.ENGLISH);
                Directive currentDirective = robotsTxt.getDirective(userAgent);
                if (currentDirective == null) {
                    currentDirective = new Directive(userAgent);
                    robotsTxt.addDirective(currentDirective);
                    currentDirectiveList.add(currentDirective);
                }
            } else {
                isGroupRecodeStarted = true;
                if ((value = getValue(DISALLOW_RECORD, line)) != null) {
                    if (!currentDirectiveList.isEmpty() && value.length() > 0) {
                        for (final Directive directive : currentDirectiveList) {
                            directive.addDisallow(value);
                        }
                    }
                } else if ((value = getValue(ALLOW_RECORD, line)) != null) {
                    if (!currentDirectiveList.isEmpty() && value.length() > 0) {
                        for (final Directive directive : currentDirectiveList) {
                            directive.addAllow(value);
                        }
                    }
                } else if ((value = getValue(CRAWL_DELAY_RECORD, line)) != null) {
                    if (!currentDirectiveList.isEmpty()) {
                        try {
                            final int crawlDelay = Integer.parseInt(value);
                            for (final Directive directive : currentDirectiveList) {
                                directive.setCrawlDelay(Math.max(0, crawlDelay));
                            }
                        } catch (final NumberFormatException e) {
                            // ignore
                        }
                    }
                } else if ((value = getValue(SITEMAP_RECORD, line)) != null) {
                    if (value.length() > 0) {
                        robotsTxt.addSitemap(value);
                    }
                }
            }
        }

        return robotsTxt;
    } catch (final Exception e) {
        throw new RobotTxtException("Failed to parse robots.txt.", e);
    }
}

From source file:org.culturegraph.mf.io.FileOpener.java

@Override
public void process(final String file) {
    try {//from  w w w  .  j  av  a2  s.  c om
        final InputStream fileStream = new FileInputStream(file);
        try {
            final InputStream decompressor = compression.createDecompressor(fileStream);
            try {

                final Reader reader = new InputStreamReader(new BOMInputStream(decompressor), encoding);
                getReceiver().process(reader);
            } catch (final IOException | MetafactureException e) {
                decompressor.close();
                throw e;
            }
        } catch (final IOException | MetafactureException e) {
            fileStream.close();
            throw e;
        }
    } catch (final IOException e) {
        throw new MetafactureException(e);
    }
}

From source file:org.culturegraph.mf.stream.source.FileOpener.java

@Override
public void process(final String file) {
    try {//  www.  j a  v  a 2s  . co  m
        final InputStream fileStream = new FileInputStream(file);
        try {
            final InputStream decompressor = compression.createDecompressor(fileStream);
            try {

                final Reader reader = new InputStreamReader(new BOMInputStream(decompressor), encoding);
                getReceiver().process(reader);
            } catch (IOException e) {
                decompressor.close();
                throw e;
            } catch (MetafactureException e) {
                decompressor.close();
                throw e;
            }
        } catch (IOException e) {
            fileStream.close();
            throw e;
        } catch (MetafactureException e) {
            fileStream.close();
            throw e;
        }
    } catch (IOException e) {
        throw new MetafactureException(e);
    }
}