Example usage for com.fasterxml.jackson.dataformat.csv CsvSchema builder

List of usage examples for com.fasterxml.jackson.dataformat.csv CsvSchema builder

Introduction

In this page you can find the example usage for com.fasterxml.jackson.dataformat.csv CsvSchema builder.

Prototype

public static Builder builder() 

Source Link

Usage

From source file:com.marklogic.client.test.JacksonDatabindTest.java

/** Demonstrate using Jackson's CSV mapper directly to simplify reading in data, populating a 
 * third-party pojo (one we cannot annotate) then writing it out
 * via JacksonDatabindHandle with configuration provided by mix-in annotations.
 **///w  ww.j  a  va2 s.c  o  m
@Test
public void testDatabindingThirdPartyPojoWithMixinAnnotations() throws JsonProcessingException, IOException {
    CsvSchema schema = CsvSchema.builder().setColumnSeparator('\t').addColumn("geoNameId").addColumn("name")
            .addColumn("asciiName").addColumn("alternateNames")
            .addColumn("latitude", CsvSchema.ColumnType.NUMBER)
            .addColumn("longitude", CsvSchema.ColumnType.NUMBER).addColumn("featureClass")
            .addColumn("featureCode").addColumn("countryCode").addColumn("countryCode2").addColumn("adminCode1")
            .addColumn("adminCode2").addColumn("adminCode3").addColumn("adminCode4").addColumn("population")
            .addColumn("elevation", CsvSchema.ColumnType.NUMBER).addColumn("dem", CsvSchema.ColumnType.NUMBER)
            .addColumn("timezoneCode").addColumn("lastModified").build();
    CsvMapper mapper = new CsvMapper();
    mapper.addMixInAnnotations(Toponym.class, ToponymMixIn1.class);
    ObjectReader reader = mapper.reader(Toponym.class).with(schema);
    BufferedReader cityReader = new BufferedReader(Common.testFileToReader(CITIES_FILE));
    GenericDocumentManager docMgr = Common.client.newDocumentManager();
    DocumentWriteSet set = docMgr.newWriteSet();
    String line = null;
    for (int numWritten = 0; numWritten < MAX_TO_WRITE
            && (line = cityReader.readLine()) != null; numWritten++) {
        Toponym city = reader.readValue(line);
        JacksonDatabindHandle handle = new JacksonDatabindHandle(city);
        handle.getMapper().addMixInAnnotations(Toponym.class, ToponymMixIn2.class);
        set.add(DIRECTORY + "/thirdPartyJsonCities/" + city.getGeoNameId() + ".json", handle);
    }
    docMgr.write(set);
    cityReader.close();
    // we can add assertions later, for now this test just serves as example code and 
    // ensures no exceptions are thrown
}

From source file:net.arp7.HdfsPerfTest.WriteFile.java

private static void writeCsvResult(final FileIoStats stats) {
    if (params.getResultCsvFile() == null) {
        return;/* w  w  w  .  j av a 2 s. co m*/
    }

    final Object[] results = new Object[] { new Date().toGMTString(), params.getNumFiles(),
            params.getNumThreads(), params.getReplication(), params.getBlockSize(), params.getIoSize(),
            stats.getFilesWritten(), stats.getBytesWritten(), stats.getMeanCreateTimeMs(),
            stats.getMeanWriteTimeMs(), stats.getMeanCloseTimeMs(), stats.getElapsedTimeMs(),
            (params.getFileSize() * 1000) / stats.getElapsedTimeMs(),
            (params.getNumFiles() * params.getFileSize() * 1000) / stats.getElapsedTimeMs(), params.getNote() };

    final CsvSchema schema = CsvSchema.builder().setColumnSeparator(';').setQuoteChar('"')
            .setUseHeader(!params.getResultCsvFile().exists())
            .addColumn("timestamp", CsvSchema.ColumnType.STRING)
            .addColumn("number of files", CsvSchema.ColumnType.NUMBER)
            .addColumn("number of threads", CsvSchema.ColumnType.NUMBER)
            .addColumn("replication factor", CsvSchema.ColumnType.NUMBER)
            .addColumn("block size", CsvSchema.ColumnType.NUMBER)
            .addColumn("io size", CsvSchema.ColumnType.NUMBER)
            .addColumn("total files written", CsvSchema.ColumnType.NUMBER)
            .addColumn("total bytes written", CsvSchema.ColumnType.NUMBER)
            .addColumn("mean time to create file in ms", CsvSchema.ColumnType.NUMBER)
            .addColumn("mean time to write file in ms", CsvSchema.ColumnType.NUMBER)
            .addColumn("mean time to close file in ms", CsvSchema.ColumnType.NUMBER)
            .addColumn("total ms", CsvSchema.ColumnType.NUMBER)
            .addColumn("mean throughput bytes per s", CsvSchema.ColumnType.NUMBER)
            .addColumn("aggregate throughput bytes per s", CsvSchema.ColumnType.NUMBER)
            .addColumn("note", CsvSchema.ColumnType.STRING).build();

    try (FileWriter fileWriter = new FileWriter(params.getResultCsvFile(), true)) {
        final CsvMapper mapper = new CsvMapper();
        final ObjectWriter writer = mapper.writer(schema);
        writer.writeValue(fileWriter, results);
    } catch (IOException e) {
        LOG.error("Could not write results to CSV file '{}': '{}'", params.getResultCsvFile().getPath(),
                e.getMessage());
    }
}

From source file:com.datafibers.kafka.connect.SchemaedFileSourceTask.java

private List<SourceRecord> pollFromFile() throws InterruptedException {
    log.trace("pollFromFile");
    CsvSchema bootstrapCsv;/*from   w  ww .  j a  va  2s  .c o m*/
    CsvMapper csvMapper = new CsvMapper();
    ObjectMapper jsonMapper = new ObjectMapper();
    MappingIterator<Map<?, ?>> mappingIterator;
    ArrayList<SourceRecord> records = null;
    long currentTime = System.currentTimeMillis();
    long recordsPerPoll;

    // TODO: Improve ExceptionOnEof logic.
    // The code below only works when each pass through
    // poll() reads all available records (not a given).
    if (config.getExceptionOnEof() && streamOffset != null) {
        throw new ConnectException("No more deta available on FileInputStream");
    }

    // Initialize the bootstrapCsv schema if necessary
    if (recordSchema == null || inputType.equalsIgnoreCase("json")) {
        log.trace("Constructing csvSchema from emptySchema");
        bootstrapCsv = config.getCsvHeaders() ? CsvSchema.emptySchema().withHeader()
                : CsvSchema.emptySchema().withoutHeader();
    } else {
        // We've seen a schema, so we'll assume headers from the recordSchema
        log.trace("Constructing csvSchema from recordSchema");
        CsvSchema.Builder builder = new CsvSchema.Builder();
        builder.setUseHeader(false);
        builder.setColumnSeparator(',');
        for (Field f : recordSchema.fields()) {
            log.trace("adding column {}", f.name());
            builder.addColumn(f.name());
        }
        bootstrapCsv = builder.build();
    }
    try {
        if (stream == null)
            openFileStream();
        if (reader == null)
            reader = new BufferedReader(new InputStreamReader(stream));

        if (inputType.equalsIgnoreCase("json")) {
            mappingIterator = jsonMapper.readerFor(Map.class).readValues(reader);
        } else if (inputType.equalsIgnoreCase("csv")) {
            mappingIterator = csvMapper.readerWithSchemaFor(Map.class).with(bootstrapCsv).readValues(reader);
        } else {
            log.error("Unsupported file input type specified ({})", inputType);
            return null;
        }
    } catch (FileNotFoundException fnf) {
        log.warn("Couldn't find file {} for SchemaedFileSourceTask, sleeping to wait for it to be created",
                logFilename());
        synchronized (this) {
            this.wait(1000);
        }
        return null;
    } catch (IOException e) {
        // IOException thrown when no more records in stream
        log.warn("Processed all available data from {}; sleeping to wait additional records", logFilename());
        // Close reader and stream; swallowing exceptions ... we're about to throw a Retry
        try {
            reader.close();
        } catch (Exception nested) {
        } finally {
            reader = null;
        }

        if (stream != System.in) {
            try {
                stream.close();
            } catch (Exception nested) {
            } finally {
                stream = null;
            }
        }

        synchronized (this) {
            this.wait(1000);
        }
        return null;
    }
    log.debug("mappingIterator of type {} created; begin reading data file",
            mappingIterator.getClass().toString());

    // The csvMapper class is really screwy; can't figure out why it
    // won't return a rational Schema ... so we'll extract it from the
    // the first object later.
    if (recordSchema == null && inputType.equalsIgnoreCase("csv") && csvMapper.schema().size() > 0) {
        recordSchema = ConvertMappingSchema(csvMapper.schemaWithHeader());
        log.trace("recordSchema created from csvMapper; type {}", recordSchema.type().toString());
    }
    try {
        FileInputStream fstream = (FileInputStream) stream;
        Long lastElementOffset = streamOffset;
        recordsPerPoll = 3;

        while (mappingIterator.hasNext()) {
            Map<?, ?> element = mappingIterator.next();
            Long elementOffset, iteratorOffset;
            recordCount++;
            recordsPerPoll--;

            iteratorOffset = mappingIterator.getCurrentLocation().getByteOffset(); // never works !!!
            if (iteratorOffset < 0) {
                // The stream channel will CLOSE on the last clean record
                // seen by mapping Iterator, so we have be careful here
                // Additionally, when parsing CSV files, there seems to be a
                // lot of Bad File Descriptor errors; ignore them.
                try {
                    elementOffset = fstream.getChannel().position();
                } catch (java.nio.channels.ClosedChannelException e) {
                    log.trace("getChannel.position threw {}", e.toString());
                    elementOffset = lastElementOffset;
                } catch (IOException e) {
                    log.trace("getChannel.position threw {}", e.toString());
                    elementOffset = lastElementOffset;
                }
            } else {
                log.trace("mappingIterator.getCurrentLocation() returns {}", iteratorOffset.toString());
                elementOffset = iteratorOffset;
            }
            log.trace("Next input record: {} (class {}) from file position {}", element.toString(),
                    element.getClass().toString(), elementOffset.toString());

            if (recordSchema == null) {
                recordSchema = ConvertMappingSchema(element.keySet());
                log.trace("recordSchema created from element; type {}", recordSchema.type().toString());
            }

            if (records == null)
                records = new ArrayList<>();
            records.add(new SourceRecord(offsetKey(filename), offsetValue(elementOffset), topic, recordSchema,
                    ConvertMappingElement(recordSchema, (HashMap<?, ?>) element)));
            streamOffset = lastElementOffset = elementOffset;
        }
    } catch (Exception e) {
        throw new ConnectException(e);
    }

    lastPollTime = currentTime;
    return records;
}

From source file:datadidit.helpful.hints.processors.csv.converter.ConvertCSVToJSON.java

private CsvSchema buildCsvSchema(String fieldNames, Boolean withHeader) {
    Builder build = CsvSchema.builder();
    for (String field : fieldNames.split(",")) {
        String[] fieldWithType = field.split("#");
        if (fieldWithType.length == 2) {
            getLogger().info("Field: " + fieldWithType[0]);
            getLogger().info("Type: " + fieldWithType[1]);
            build.addColumn(fieldWithType[0], CsvSchema.ColumnType.valueOf(fieldWithType[1]));
        } else {/*w ww  .j a  v a2 s  .c o  m*/
            build.addColumn(field);
        }
    }
    if (withHeader) {
        return build.build().withHeader();
    }
    return build.build();
}

From source file:org.apache.nifi.csv.JacksonCSVRecordReader.java

public JacksonCSVRecordReader(final InputStream in, final ComponentLog logger, final RecordSchema schema,
        final CSVFormat csvFormat, final boolean hasHeader, final boolean ignoreHeader, final String dateFormat,
        final String timeFormat, final String timestampFormat, final String encoding) throws IOException {
    super(logger, schema, hasHeader, ignoreHeader, dateFormat, timeFormat, timestampFormat);

    final Reader reader = new InputStreamReader(new BOMInputStream(in));

    CsvSchema.Builder csvSchemaBuilder = CsvSchema.builder().setColumnSeparator(csvFormat.getDelimiter())
            .setLineSeparator(csvFormat.getRecordSeparator())
            // Can only use comments in Jackson CSV if the correct marker is set
            .setAllowComments("#".equals(CharUtils.toString(csvFormat.getCommentMarker())))
            // The call to setUseHeader(false) in all code paths is due to the way Jackson does data binding/mapping. Missing or extra columns may not
            // be handled correctly when using the header for mapping.
            .setUseHeader(false);//from   w  w  w.  j  a  v a  2 s .co  m

    csvSchemaBuilder = (csvFormat.getQuoteCharacter() == null) ? csvSchemaBuilder
            : csvSchemaBuilder.setQuoteChar(csvFormat.getQuoteCharacter());
    csvSchemaBuilder = (csvFormat.getEscapeCharacter() == null) ? csvSchemaBuilder
            : csvSchemaBuilder.setEscapeChar(csvFormat.getEscapeCharacter());

    if (hasHeader) {
        if (ignoreHeader) {
            csvSchemaBuilder = csvSchemaBuilder.setSkipFirstDataRow(true);
        }
    }

    CsvSchema csvSchema = csvSchemaBuilder.build();

    // Add remaining config options to the mapper
    List<CsvParser.Feature> features = new ArrayList<>();
    features.add(CsvParser.Feature.INSERT_NULLS_FOR_MISSING_COLUMNS);
    if (csvFormat.getIgnoreEmptyLines()) {
        features.add(CsvParser.Feature.SKIP_EMPTY_LINES);
    }
    if (csvFormat.getTrim()) {
        features.add(CsvParser.Feature.TRIM_SPACES);
    }

    ObjectReader objReader = mapper.readerFor(String[].class).with(csvSchema)
            .withFeatures(features.toArray(new CsvParser.Feature[features.size()]));

    recordStream = objReader.readValues(reader);
}