Example usage for org.apache.commons.csv CSVRecord size

Introduction

In this page you can find the example usage for org.apache.commons.csv CSVRecord size.

Prototype

public int size()

Source Link

Document

Returns the number of values in this record.

Usage

From source file:org.thegalactic.context.io.ContextSerializerCsv.java

/**
 * Read a context from a csv file./*  ww  w  .  j  ava 2s  . com*/
 *
 * The following format is respected:
 *
 * The first line contains the attribute names, the other lines contains the
 * observations identifier followed by boolean values
 *
 * ~~~
 * "",a,b,c,d,e
 * 1,1,0,1,0,0
 * 2,1,1,0,0,0
 * 3,0,1,0,1,1
 * 4,0,0,1,0,1
 * ~~~
 *
 * If the first attribute is the empty string, the first column corresponds
 * to the individual identifiers. In the other case, the individual
 * identifiers will be generated by successive integers.
 *
 * ~~~
 * a,b,c,d,e
 * 1,0,1,0,0
 * 1,1,0,0,0
 * 0,1,0,1,1
 * 0,0,1,0,1
 * ~~~
 *
 * @param context a context to read
 * @param file    a file
 *
 * @throws IOException When an IOException occurs
 */
public void read(Context context, BufferedReader file) throws IOException {
    // Parse the file
    CSVParser parser = CSVFormat.RFC4180.parse(file);

    // Get the records and record size
    List<CSVRecord> records = parser.getRecords();
    int length = records.size();

    // Verify length
    if (length == 0) {
        throw new IOException("CSV cannot be empty");
    }

    // Get the attributes and the attribute size
    CSVRecord attributes = records.get(0);
    int size = attributes.size();

    // Detect invalid attribute size
    if (size == 1 && attributes.get(0).equals("")) {
        throw new IOException("Attribute size cannot be 0");
    }

    // Index of the first attribute
    int first = 0;
    if (attributes.get(0).equals("")) {
        first = 1;
    }

    // Get the attributes
    for (int i = first; i < size; i++) {
        String attribute = attributes.get(i);

        // Detect duplicated attribute
        if (!context.addToAttributes(attribute)) {
            throw new IOException("Duplicated attribute");
        }

        // Detect empty attribute
        if ("".equals(attribute)) {
            throw new IOException("Empty attribute");
        }
    }

    // Get the data
    for (int j = 1; j < length; j++) {
        // Get the current record
        CSVRecord record = records.get(j);

        // Detect incorrect size
        if (record.size() != size) {
            throw new IOException("Line does not have the correct number of attributes");
        }

        // Get the observation identifier
        String identifier;
        if (first == 1) {
            identifier = record.get(0);
        } else {
            identifier = String.valueOf(j);
        }

        // Detect duplicated identifier
        if (!context.addToObservations(identifier)) {
            throw new IOException("Duplicated identifier");
        }

        // Add the extent/intent for the current identifier and current attribute
        for (int i = first; i < size; i++) {
            if (record.get(i).equals("1")) {
                context.addExtentIntent(identifier, attributes.get(i));
            }
        }
    }

    // Close the parser
    parser.close();
    context.setBitSets();
}

From source file:org.totschnig.myexpenses.task.CsvImportTask.java

private String saveGetFromRecord(CSVRecord record, int index) {
    return record.size() > index ? record.get(index).trim() : "";
}

From source file:org.transitime.utils.csv.CsvBaseReader.java

/**
 * Parse the CSV file. Reads in the header info and then each line. Calls
 * the abstract handleRecord() method for each record. Adds each resulting
 * CSV object to the gtfsObjecgts array.
 *//*w ww .  j  a  v  a2  s . c  o  m*/
private void parse() {
    CSVRecord record = null;
    try {
        IntervalTimer timer = new IntervalTimer();

        logger.debug("Parsing CSV file {} ...", fileName);

        // Open the file for reading. Use UTF-8 format since that will work
        // for both regular ASCII format and UTF-8 extended format files 
        // since UTF-8 was designed to be backwards compatible with ASCII. 
        // This way will work for Chinese and other character sets. Use
        // InputStreamReader so can specify that using UTF-8 format. Use
        // BufferedReader so that can determine if first character is an
        // optional BOM (Byte Order Mark) character used to indicate that 
        // file is in UTF-8 format. BufferedReader allows us to read in
        // first character and then discard if it is a BOM character or
        // reset the reader to back to the beginning if it is not. This
        // way the CSV parser will process the file starting with the first
        // true character.         
        Reader in = new BufferedReader(new InputStreamReader(new FileInputStream(fileName), "UTF-8"));

        // Deal with the possible BOM character at the beginning of the file
        in.mark(1);
        int firstRead = in.read();
        final int BOM_CHARACTER = 0xFEFF;
        if (firstRead != BOM_CHARACTER)
            in.reset();

        // Get ready to parse the CSV file.
        // Allow lines to be comments if they start with "-" so that can
        // easily comment out problems and also test what happens when
        // certain data is missing. Using the '-' character so can
        // comment out line that starts with "--", which is what is 
        // used for SQL. 
        CSVFormat formatter = CSVFormat.DEFAULT.withHeader().withCommentMarker('-');

        // Parse the file
        Iterable<CSVRecord> records = formatter.parse(in);

        logger.debug("Finished CSV parsing of file {}. Took {} msec.", fileName, timer.elapsedMsec());

        int lineNumberWhenLogged = 0;
        timer = new IntervalTimer();
        IntervalTimer loggingTimer = new IntervalTimer();

        Iterator<CSVRecord> iterator = records.iterator();
        while (iterator.hasNext()) {
            // Determine the record to process
            record = iterator.next();

            // If blank line then skip it. This way avoid error messages since
            // expected data column won't exist
            if (record.size() == 0)
                continue;

            // Process the record using appropriate handler
            // and create the corresponding CSV object
            T gtfsObject;
            try {
                gtfsObject = handleRecord(record, supplemental);
            } catch (ParseException e) {
                logger.error("ParseException occurred for record {} "
                        + "(comment lines not included when determing record #) for " + "filename {} . {}",
                        record.getRecordNumber(), fileName, e.getMessage());

                // Continue even though there was an error so that all errors 
                // logged at once.               
                continue;
            } catch (NumberFormatException e) {
                logger.error("NumberFormatException occurred for record {} "
                        + "(comment lines not included when determing record #) " + "for filename {} . {}",
                        record.getRecordNumber(), fileName, e.getMessage());

                // Continue even though there was an error so that all errors 
                // logged at once.               
                continue;
            }

            // Add the newly created CSV object to the object list
            if (gtfsObject != null)
                gtfsObjects.add(gtfsObject);

            // Log info if it has been a while. Check only every 20,000
            // lines to see if the 10 seconds has gone by. If so, then log
            // number of lines. By only looking at timer every 20,000 lines
            // not slowing things down by for every line doing system call 
            // for to get current time.
            final int LINES_TO_PROCESS_BEFORE_CHECKING_IF_SHOULD_LOG = 20000;
            final long SECONDS_ELSAPSED_UNTIL_SHOULD_LOG = 5;
            if (record.getRecordNumber() >= lineNumberWhenLogged
                    + LINES_TO_PROCESS_BEFORE_CHECKING_IF_SHOULD_LOG) {
                lineNumberWhenLogged = (int) record.getRecordNumber();
                if (loggingTimer.elapsedMsec() > SECONDS_ELSAPSED_UNTIL_SHOULD_LOG * Time.MS_PER_SEC) {
                    logger.info("  Processed {} lines. Took {} msec...", lineNumberWhenLogged,
                            timer.elapsedMsec());
                    loggingTimer = new IntervalTimer();
                }
            }
        } // End of while iterating over records

        // Close up the file reader
        in.close();

        // Determine number of records for logging message
        long numberRecords = 0;
        if (record != null)
            numberRecords = record.getRecordNumber();

        logger.info("Finished parsing {} records from file {} . Took {} msec.", numberRecords, fileName,
                timer.elapsedMsec());
    } catch (FileNotFoundException e) {
        if (required)
            logger.error("Required CSV file {} not found.", fileName);
        else
            logger.info("CSV file {} not found but OK because this file " + "not required.", fileName);
    } catch (IOException e) {
        logger.error("IOException occurred when reading in filename {}.", fileName, e);
    }
}

From source file:org.wheatinitiative.vitro.webapp.ontology.update.OntologyChangeParser.java

/**
 * @param args//from   ww w . java2  s .  c  o  m
 * @throws IOException 
 */

@SuppressWarnings({ "unchecked", "null", "static-access" })
public ArrayList<AtomicOntologyChange> parseFile(String diffPath) throws IOException {
    AtomicOntologyChange changeObj;
    ArrayList<AtomicOntologyChange> changeObjects = new ArrayList<AtomicOntologyChange>();
    int countColumns = 0;
    String URI = null;
    String rename = null;
    String sourceURI = null;
    String destinationURI = null;
    StringTokenizer stArr = null;
    InputStreamReader in = new InputStreamReader(this.getClass().getResourceAsStream(diffPath));

    Iterable<CSVRecord> records = CSVFormat.TDF.parse(in);
    Iterator<CSVRecord> rows = records.iterator();
    //CSVReader readFile = new SimpleReader();
    //readFile.setSeperator('\t');

    //List<String[]> rows = readFile.parse(in);

    int rowNum = 0;
    while (rows.hasNext()) {
        rowNum++;
        CSVRecord row = rows.next();
        if (row.size() != 5) {
            log.error("Invalid PromptDiff data at row " + (rowNum + 1) + ". Expected 5 columns; found "
                    + row.size());
        } else {
            changeObj = new AtomicOntologyChange();
            if (row.get(0) != null && row.get(0).length() > 0) {
                changeObj.setSourceURI(row.get(0));
            }
            if (row.get(1) != null && row.get(1).length() > 0) {
                changeObj.setDestinationURI(row.get(1));
            }
            if (row.get(4) != null && row.get(4).length() > 0) {
                changeObj.setNotes(row.get(4));
            }
            if ("Yes".equals(row.get(2))) {
                changeObj.setAtomicChangeType(AtomicChangeType.RENAME);
            } else if ("Delete".equals(row.get(3))) {
                changeObj.setAtomicChangeType(AtomicChangeType.DELETE);
            } else if ("Add".equals(row.get(3))) {
                changeObj.setAtomicChangeType(AtomicChangeType.ADD);
            } else {
                log.error("Invalid rename or change type data: '" + row.get(2) + " " + row.get(3) + "'");
            }
            log.debug(changeObj);
            changeObjects.add(changeObj);
        }

    }
    if (changeObjects.size() == 0) {
        log.debug("No ABox updates are required.");
    }
    return changeObjects;
}

From source file:org.wso2.carbon.event.simulator.csvFeedSimulation.core.CSVFeedEventSimulator.java

/**
 * This method must be called within a synchronized block to avoid multiple file simulators from running simultaneously.
 * Read the values from uploaded CSV file and convert those values into event and send those events to
 * input handler//from   w w  w.j  a v a2 s.c o m
 * <p>
 * <p>
 * To read the CSV file It uses CSV parser Library.
 * {@link <a href="https://commons.apache.org/proper/commons-csv/apidocs/org/apache/commons/csv/CSVParser.html">CSVParser</a>}
 * </p>
 * <p>
 * <p>
 * CSV file can be separated by one of these fallowing character , , ; , \t by default
 * It has capability to have user defined delimiter
 * Any field may be quoted (with double quotes)
 * Fields with embedded commas or delimiter characters must be double quoted.
 * </p>
 * <p>
 * Initialize CSVParser
 *
 * @param executionPlanDto ExecutionPlanDto
 * @param csvFileConfig    CSVFileSimulationDto
 */
private void sendEvent(ExecutionPlanDto executionPlanDto, CSVFileSimulationDto csvFileConfig) {

    /*
      return no of events read from CSV file during ever iteration
     */
    long noOfEvents = 0;
    int delay = csvFileConfig.getDelay();
    /*
    Reader for reading character streams from file
     */
    Reader in = null;
    /*
    CSVParser to read CSV Values
     */
    CSVParser csvParser = null;
    if (delay <= 0) {
        log.warn("Events will be sent continuously since the delay between events are set to " + delay
                + "milliseconds");
        delay = 0;
    }

    try {
        /*
        Initialize Reader
         */
        in = new FileReader(String.valueOf(Paths.get(System.getProperty("java.io.tmpdir"),
                csvFileConfig.getFileDto().getFileInfo().getFileName())));

        /*
        Initialize CSVParser with appropriate CSVFormat according to delimiter
         */

        switch (csvFileConfig.getDelimiter()) {
        case ",":
            csvParser = CSVParser.parse(in, CSVFormat.DEFAULT);
            break;
        case ";":
            csvParser = CSVParser.parse(in, CSVFormat.EXCEL);
            break;
        case "\\t":
            csvParser = CSVParser.parse(in, CSVFormat.TDF);
            break;
        default:
            csvParser = CSVParser.parse(in, CSVFormat.newFormat(csvFileConfig.getDelimiter().charAt(0)));
        }

        int attributeSize = executionPlanDto.getInputStreamDtoMap().get(csvFileConfig.getStreamName())
                .getStreamAttributeDtos().size();

        /*
        Iterate through the CSV file line by line
         */

        for (CSVRecord record : csvParser) {
            try {
                synchronized (this) {
                    if (isStopped) {
                        isStopped = false;
                        break;
                    }
                    if (isPaused) {
                        this.wait();
                    }
                }

                if (record.size() != attributeSize) {
                    log.warn("No of attribute is not equal to attribute size: " + attributeSize + " is needed"
                            + "in Row no:" + noOfEvents + 1);
                }
                String[] attributes = new String[attributeSize];
                noOfEvents = csvParser.getCurrentLineNumber();

                for (int i = 0; i < record.size(); i++) {
                    attributes[i] = record.get(i);
                }

                //convert Attribute values into event
                Event event = EventConverter.eventConverter(csvFileConfig.getStreamName(), attributes,
                        executionPlanDto);
                // TODO: 13/12/16 delete sout
                System.out.println("Input Event " + Arrays.deepToString(event.getEventData()));
                //

                //send the event to input handler
                send(csvFileConfig.getStreamName(), event);

                //delay between two events
                if (delay > 0) {
                    Thread.sleep(delay);
                }
            } catch (EventSimulationException e) {
                log.error("Event dropped due to Error occurred during generating an event" + e.getMessage());
            } catch (InterruptedException e) {
                log.error("Error occurred during send event" + e.getMessage());
            }
        }

    } catch (IllegalArgumentException e) {
        // TODO: 02/12/16 proper error message
        throw new EventSimulationException("File Parameters are null" + e.getMessage());
    } catch (FileNotFoundException e) {
        throw new EventSimulationException(
                "File not found :" + csvFileConfig.getFileDto().getFileInfo().getFileName());
    } catch (IOException e) {
        throw new EventSimulationException("Error occurred while reading the file");
    } finally {
        try {
            if (in != null && csvParser != null)
                in.close();
            csvParser.close();
        } catch (IOException e) {
            throw new EventSimulationException("Error occurred during closing the file");
        }
    }
}

From source file:org.wso2.carbon.event.simulator.randomdatafeedsimulation.bean.CustomBasedAttribute.java

/**
 * Method to split the data list into seperated values and assign it to customDataList
 *
 * @param customData String that has data list values
 *                   Initial string format is ""CEP,Siddhi",ESB,DAS"
 *//* w ww .  j  av  a 2s .c  om*/
public void setCustomData(String customData) {
    CSVParser csvParser = null;
    List<String> dataList = null;
    try {
        csvParser = CSVParser.parse(customData, CSVFormat.newFormat(',').withQuote('/'));
        dataList = new ArrayList<>();
        for (CSVRecord record : csvParser) {
            for (int i = 0; i < record.size(); i++) {
                dataList.add(record.get(i));
            }
        }
    } catch (IOException e) {
        throw new EventSimulationException("I/O error occurs :" + e.getMessage());
    } catch (IllegalArgumentException e) {
        throw new EventSimulationException("Data set is null :" + e.getMessage());
    }
    customDataList = dataList.toArray(new String[dataList.size()]);
}

From source file:org.zanata.adapter.glossary.GlossaryCSVReader.java

public Map<LocaleId, List<GlossaryEntry>> extractGlossary(Reader reader, String qualifiedName)
        throws IOException {
    try {/*www  . j  ava  2 s. co m*/
        Iterable<CSVRecord> rawRecords = CSVFormat.RFC4180.parse(reader);
        List<CSVRecord> records = Lists.newArrayList(rawRecords);

        validateCSVEntries(records);
        Map<String, Integer> descriptionMap = setupDescMap(records);
        Map<Integer, LocaleId> localeColMap = setupLocalesMap(records, descriptionMap);

        LocaleId srcLocale = localeColMap.get(0);

        if (!srcLang.equals(srcLocale)) {
            throw new RuntimeException("input source language '" + srcLang
                    + "' does not match source language in file '" + srcLocale + "'");
        }
        Map<LocaleId, List<GlossaryEntry>> results = Maps.newHashMap();

        for (int i = 1; i < records.size(); i++) {
            CSVRecord row = records.get(i);
            for (int x = 1; x < row.size() && localeColMap.containsKey(x); x++) {

                GlossaryEntry entry = new GlossaryEntry();
                entry.setSrcLang(srcLocale);
                entry.setPos(row.get(descriptionMap.get(POS)));
                entry.setDescription(row.get(descriptionMap.get(DESC)));
                entry.setQualifiedName(new QualifiedName(qualifiedName));

                GlossaryTerm srcTerm = new GlossaryTerm();
                srcTerm.setLocale(srcLocale);
                srcTerm.setContent(row.get(0));

                entry.getGlossaryTerms().add(srcTerm);

                LocaleId transLocaleId = localeColMap.get(x);
                String transContent = row.get(x);

                GlossaryTerm transTerm = new GlossaryTerm();
                transTerm.setLocale(transLocaleId);
                transTerm.setContent(transContent);

                entry.getGlossaryTerms().add(transTerm);
                List<GlossaryEntry> list = results.get(transLocaleId);
                if (list == null) {
                    list = Lists.newArrayList();
                }
                list.add(entry);
                results.put(transLocaleId, list);
            }
        }
        return results;
    } finally {
        reader.close();
    }
}

From source file:org.zanata.adapter.glossary.GlossaryCSVReader.java

/**
 * Basic validation of CVS file format - At least 2 rows in the CVS file -
 * Empty content validation - All row must have the same column count
 *///  www  . j a  v a2s . c  om
private void validateCSVEntries(@Nonnull List<CSVRecord> records) {
    if (records.isEmpty()) {
        throw new RuntimeException("Invalid CSV file - empty file");
    }
    if (records.size() < 2) {
        throw new RuntimeException("Invalid CSV file - no entries found");
    }
    for (int i = 1; i < records.size(); i++) {
        CSVRecord record = records.get(i);
        //checking each row size is matching with header size
        if (records.get(0).size() != record.size()) {
            throw new RuntimeException("Invalid CSV file - inconsistency of columns with header");
        }
    }
}

From source file:org.zanata.adapter.glossary.GlossaryCSVReader.java

/**
 * Parser reads from all from first row and exclude column from description
 * map. Format of CVS: {source locale},{locale},{locale}...,pos,description
 *//*from w  w  w  .j a  va2  s .co  m*/
private Map<Integer, LocaleId> setupLocalesMap(List<CSVRecord> records, Map<String, Integer> descriptionMap) {
    Map<Integer, LocaleId> localeColMap = new HashMap<Integer, LocaleId>();
    CSVRecord headerRow = records.get(0);
    for (int row = 0; row <= headerRow.size() && !descriptionMap.containsValue(row); row++) {
        LocaleId locale = new LocaleId(StringUtils.trim(headerRow.get(row)));
        localeColMap.put(row, locale);
    }
    return localeColMap;
}

From source file:org.zanata.adapter.glossary.GlossaryCSVReader.java

/**
 * Read last 2 columns in CSV://  www .  jav a2s  . co  m
 * {source locale},{locale},{locale}...,pos,description
 *
 * @param records
 */
private Map<String, Integer> setupDescMap(List<CSVRecord> records) {
    Map<String, Integer> descMap = new HashMap<String, Integer>();
    CSVRecord headerRow = records.get(0);
    descMap.put(POS, headerRow.size() - 2);
    descMap.put(DESC, headerRow.size() - 1);
    return descMap;
}