Example usage for org.apache.commons.csv CSVRecord iterator

List of usage examples for org.apache.commons.csv CSVRecord iterator

Introduction

In this page you can find the example usage for org.apache.commons.csv CSVRecord iterator.

Prototype

@Override
public Iterator<String> iterator() 

Source Link

Document

Returns an iterator over the values of this record.

Usage

From source file:com.stratio.decision.executables.DataFlowFromCsvMain.java

public static void main(String[] args) throws IOException, NumberFormatException, InterruptedException {
    if (args.length < 4) {
        log.info(//  w w w.j av a  2s .c  om
                "Usage: \n param 1 - path to file \n param 2 - stream name to send the data \n param 3 - time in ms to wait to send each data \n param 4 - broker list");
    } else {
        Producer<String, String> producer = new Producer<String, String>(createProducerConfig(args[3]));
        Gson gson = new Gson();

        Reader in = new FileReader(args[0]);
        CSVParser parser = CSVFormat.DEFAULT.parse(in);

        List<String> columnNames = new ArrayList<>();
        for (CSVRecord csvRecord : parser.getRecords()) {

            if (columnNames.size() == 0) {
                Iterator<String> iterator = csvRecord.iterator();
                while (iterator.hasNext()) {
                    columnNames.add(iterator.next());
                }
            } else {
                StratioStreamingMessage message = new StratioStreamingMessage();

                message.setOperation(STREAM_OPERATIONS.MANIPULATION.INSERT.toLowerCase());
                message.setStreamName(args[1]);
                message.setTimestamp(System.currentTimeMillis());
                message.setSession_id(String.valueOf(System.currentTimeMillis()));
                message.setRequest_id(String.valueOf(System.currentTimeMillis()));
                message.setRequest("dummy request");

                List<ColumnNameTypeValue> sensorData = new ArrayList<>();
                for (int i = 0; i < columnNames.size(); i++) {

                    // Workaround
                    Object value = null;
                    try {
                        value = Double.valueOf(csvRecord.get(i));
                    } catch (NumberFormatException e) {
                        value = csvRecord.get(i);
                    }
                    sensorData.add(new ColumnNameTypeValue(columnNames.get(i), null, value));
                }

                message.setColumns(sensorData);

                String json = gson.toJson(message);
                log.info("Sending data: {}", json);
                producer.send(new KeyedMessage<String, String>(InternalTopic.TOPIC_DATA.getTopicName(),
                        STREAM_OPERATIONS.MANIPULATION.INSERT, json));

                log.info("Sleeping {} ms...", args[2]);
                Thread.sleep(Long.valueOf(args[2]));
            }
        }
        log.info("Program completed.");
    }
}

From source file:edu.caltech.ipac.firefly.server.util.DsvToDataGroup.java

public static DataGroup parse(File inf, CSVFormat format) throws IOException {

    BufferedReader reader = new BufferedReader(new FileReader(inf), IpacTableUtil.FILE_IO_BUFFER_SIZE);

    List<DataType> columns = new ArrayList<DataType>();
    CSVParser parser = new CSVParser(reader, format);
    List<CSVRecord> records = parser.getRecords();
    if (records != null && records.size() > 0) {

        // parse the column info
        CSVRecord cols = records.get(0);
        for (Iterator<String> itr = cols.iterator(); itr.hasNext();) {
            String s = itr.next();
            if (!StringUtils.isEmpty(s)) {
                columns.add(new DataType(s, null)); // unknown type
            }/*from  w w w .  j  av a  2s . c o  m*/
        }

        DataGroup dg = new DataGroup(null, columns);

        // parse the data
        for (int i = 1; i < records.size(); i++) {
            DataObject row = parseRow(dg, records.get(i));
            if (row != null) {
                dg.add(row);
            }
        }
        dg.shrinkToFitData();
        return dg;
    }
    return null;
}

From source file:edu.ucla.cs.scai.swim.qa.ontology.dbpedia.tipicality.DbpediaCategoryAttributeCounts.java

private static void processFile(File csvData, String category) throws IOException {
    BufferedReader in = new BufferedReader(
            new InputStreamReader(new GZIPInputStream(new FileInputStream(csvData))));
    //CSVParser parser = CSVParser.parse(csvData, Charset.defaultCharset(), CSVFormat.RFC4180);
    CSVParser parser = CSVFormat.EXCEL.parse(in);
    int r = 0;//from w ww .j  ava2 s.c  o m
    ArrayList<Integer> attributePositions = new ArrayList<>();
    ArrayList<String> attributeNames = new ArrayList<>();
    HashMap<String, Integer> thisCategoryAttributeCounts = new HashMap<>();
    categoryAttributeCount.put(category, thisCategoryAttributeCounts);
    for (CSVRecord csvRecord : parser) {
        if (r == 0) {
            Iterator<String> it = csvRecord.iterator();
            it.next(); //skip URI
            if (!it.hasNext()) { //it is an empty file
                return;
            }
            it.next(); //skip rdf-schema#label
            it.next(); //skip rdf-schema#comment
            int c = 2;
            for (; it.hasNext();) {
                c++;
                String attr = it.next();
                if (!attr.endsWith("_label")) {
                    attributePositions.add(c);
                }
            }
            categories.add(category);
        } else if (r == 1) {
            Iterator<String> it = csvRecord.iterator();
            it.next(); //skip uri
            it.next(); //skip rdf-schema#label
            it.next(); //skip rdf-schema#comment
            int c = 2;
            int i = 0;
            while (i < attributePositions.size()) {
                c++;
                String attr = it.next();
                if (attributePositions.get(i) == c) {
                    if (!stopAttributes.contains(attr)) {
                        attributes.add(attr);
                    }
                    attributeNames.add(attr);
                    i++;
                }

            }
        } else if (r > 3) {
            Iterator<String> it = csvRecord.iterator();
            String uri = it.next();
            /*if (entities.contains(uri)) {
             System.out.println(uri + " already processed");
             continue;
             }*/
            entities.add(uri);
            it.next(); //skip rdf-schema#label
            it.next(); //skip rdf-schema#comment
            int c = 2;
            int i = 0;
            while (i < attributePositions.size()) {
                c++;
                String val = it.next();
                if (attributePositions.get(i) == c) {
                    if (!val.equalsIgnoreCase("null")) {
                        String attribute = attributeNames.get(i);
                        if (!stopAttributes.contains(attribute)) {
                            Integer ac = attributeCount.get(attribute);
                            if (ac == null) {
                                attributeCount.put(attribute, 1);
                            } else {
                                attributeCount.put(attribute, ac + 1);
                            }

                            Integer tcac = thisCategoryAttributeCounts.get(attribute);
                            if (tcac == null) {
                                thisCategoryAttributeCounts.put(attribute, 1);
                            } else {
                                thisCategoryAttributeCounts.put(attribute, tcac + 1);
                            }

                            HashMap<String, Integer> thisAttributeCategoryCounts = attributeCategoryCount
                                    .get(attribute);
                            if (thisAttributeCategoryCounts == null) {
                                thisAttributeCategoryCounts = new HashMap<>();
                                attributeCategoryCount.put(attribute, thisAttributeCategoryCounts);
                            }
                            Integer tacc = thisAttributeCategoryCounts.get(category);
                            if (tacc == null) {
                                thisAttributeCategoryCounts.put(category, 1);
                            } else {
                                thisAttributeCategoryCounts.put(category, tacc + 1);
                            }
                        }
                    }
                    i++;
                }
            }
        }
        r++;
    }
    categoryCount.put(category, r - 3);
}

From source file:edu.ucla.cs.scai.swim.qa.ontology.dbpedia.tipicality.Test.java

private static ArrayList<HashSet<String>> extractEntities(File csvData, int nOfAttributes) throws IOException {
    CSVParser parser = CSVParser.parse(csvData, Charset.defaultCharset(), CSVFormat.RFC4180);
    int r = 0;/*from   www. jav a  2 s  .  co m*/
    ArrayList<Integer> attributePositions = new ArrayList<>();
    ArrayList<String> attributeNames = new ArrayList<>();
    ArrayList<HashSet<String>> res = new ArrayList<>();
    for (CSVRecord csvRecord : parser) {
        if (r == 0) {
            Iterator<String> it = csvRecord.iterator();
            it.next(); //skip URI
            if (!it.hasNext()) { //it is an empty file
                return res;
            }
            it.next(); //skip rdf-schema#label
            it.next(); //skip rdf-schema#comment
            int c = 2;
            for (; it.hasNext();) {
                c++;
                String attr = it.next();
                if (!attr.endsWith("_label")) {
                    attributePositions.add(c);
                }
            }
        } else if (r == 1) {
            Iterator<String> it = csvRecord.iterator();
            it.next(); //skip uri
            it.next(); //skip rdf-schema#label
            it.next(); //skip rdf-schema#comment
            int c = 2;
            int i = 0;
            while (i < attributePositions.size()) {
                c++;
                String attr = it.next();
                if (attributePositions.get(i) == c) {
                    if (!stopAttributes.contains(attr)) {
                        attributes.add(attr);
                    }
                    attributeNames.add(attr);
                    i++;
                }
            }
        } else if (r > 3) {
            ArrayList<String> attributesOfThisEntity = new ArrayList<>();
            Iterator<String> it = csvRecord.iterator();
            String uri = it.next();
            it.next(); //skip rdf-schema#label
            it.next(); //skip rdf-schema#comment
            int c = 2;
            int i = 0;
            while (i < attributePositions.size()) {
                c++;
                String val = it.next();
                if (attributePositions.get(i) == c) {
                    if (!val.equalsIgnoreCase("null")) {
                        String attribute = attributeNames.get(i);
                        if (!stopAttributes.contains(attribute)) {
                            attributesOfThisEntity.add(attribute);
                        }
                    }
                    i++;
                }
            }
            Collections.shuffle(attributesOfThisEntity);
            HashSet<String> s = new HashSet<>();
            for (int k = 0; k < Math.min(nOfAttributes, attributesOfThisEntity.size()); k++) {
                s.add(attributesOfThisEntity.get(k));
            }
            res.add(s);
        }
        r++;
    }
    return res;
}

From source file:cma.fa.tc.impl.utils.files.SimpleCsvReader.java

@Override
public List<List<String>> read() {
    List<List<String>> result = new ArrayList<>();

    try (BufferedReader in = new BufferedReader(
            new InputStreamReader(this.getClass().getResourceAsStream(this.path), this.charset))) {
        Iterable<CSVRecord> parser = CSVFormat.EXCEL.parse(in);
        for (CSVRecord record : parser) {
            result.add(Lists.newArrayList(record.iterator()));

        }/*  w w  w. ja  va2s. co  m*/

    } catch (Exception ex) {
        log.error("Cannot read {}", ex);
    }

    return result;
}

From source file:com.willwinder.universalgcodesender.utils.GrblLookups.java

public GrblLookups(String prefix) {
    String filename = prefix + "_" + Localization.loadedLocale() + ".csv";

    URL u = GrblLookups.class.getResource(pathFor(filename));
    if (u == null) {
        filename = prefix + "_en_US.csv";
    }//from   ww  w  .  j  ava  2  s. co  m

    try {
        try (BufferedReader reader = new BufferedReader(
                new InputStreamReader(GrblLookups.class.getResourceAsStream(pathFor(filename))))) {
            Iterable<CSVRecord> records = CSVFormat.RFC4180.withFirstRecordAsHeader().parse(reader);
            for (CSVRecord record : records) {
                List<String> list = Lists.newArrayList(record.iterator());
                lookups.put(record.get(0), list.toArray(new String[0]));
            }
        }
    } catch (IOException ex) {
        System.out.println("Unable to load GRBL resources.");
        ex.printStackTrace();
    }
}

From source file:citation_prediction.CitationCore.java

/**
 * Fix the citation data, which is in years by translating the timestamps and citations to be in days.
 * /*from  w ww.j a v a  2s  .  co m*/
 * @param record The citation history in years.
 * @param limitToRows Limit the rows being processed.
 * @return The citation history in days.
 */
private static double[][] fixData(CSVRecord record, int limitToRows) {

    double[][] r = null;
    int citationCount = 0;
    int numberOfRowsToProcess = 0;

    Iterator<String> record_iterator = record.iterator();
    record_iterator.next(); //move pass paper id
    record_iterator.next(); //move pass paper publish year

    if (limitToRows != 0) {
        numberOfRowsToProcess = limitToRows;
        r = new double[numberOfRowsToProcess + 1][2];
    } else {
        numberOfRowsToProcess = record.size() - 2;
        r = new double[record.size() - 2][2];
    }

    for (int rowIndex = 0; record_iterator.hasNext() && rowIndex < numberOfRowsToProcess; rowIndex++) {

        String citations_forthis_year = record_iterator.next();

        r[rowIndex][0] = Double.valueOf(rowIndex); //timestamp
        r[rowIndex][1] = Double.valueOf(citations_forthis_year); //citation

        citationCount += r[rowIndex][1];
    }

    return fixData(r, citationCount);

}

From source file:biz.ganttproject.impex.csv.RecordGroup.java

boolean isHeader(CSVRecord record) {
    Set<String> thoseFields = Sets.newHashSet();
    for (Iterator<String> it = record.iterator(); it.hasNext();) {
        thoseFields.add(it.next());//from  ww w  . j av  a2  s.  c o m
    }
    return thoseFields.containsAll(myMandatoryFields);
}

From source file:biz.ganttproject.impex.csv.RecordGroup.java

boolean process(CSVRecord record) {
    assert record.size() > 0;
    boolean allEmpty = true;
    for (Iterator<String> it = record.iterator(); it.hasNext();) {
        if (!Strings.isNullOrEmpty(it.next())) {
            allEmpty = false;/*  www . java2s .co  m*/
            break;
        }
    }
    if (allEmpty) {
        return false;
    }
    try {
        return doProcess(record);
    } catch (Throwable e) {
        GPLogger.getLogger(GanttCSVOpen.class).log(Level.WARNING,
                String.format("Failed to process record:\n%s", record), e);
        return false;
    }
}

From source file:com.marklogic.contentpump.DelimitedTextInputFormat.java

public List<InputSplit> getSplits(JobContext job) throws IOException {
    boolean delimSplit = isSplitInput(job.getConfiguration());
    //if delimSplit is true, size of each split is determined by 
    //Math.max(minSize, Math.min(maxSize, blockSize)) in FileInputFormat
    List<InputSplit> splits = super.getSplits(job);
    if (!delimSplit) {
        return splits;
    }// ww  w  .j  av  a  2 s. c  o m

    if (splits.size() >= SPLIT_COUNT_LIMIT) {
        //if #splits > 1 million, there is enough parallelism
        //therefore no point to split
        LOG.warn("Exceeding SPLIT_COUNT_LIMIT, input_split is off:" + SPLIT_COUNT_LIMIT);
        DefaultStringifier.store(job.getConfiguration(), false, ConfigConstants.CONF_SPLIT_INPUT);
        return splits;
    }
    // add header info into splits
    List<InputSplit> populatedSplits = new ArrayList<InputSplit>();
    LOG.info(splits.size() + " DelimitedSplits generated");
    Configuration conf = job.getConfiguration();
    char delimiter = 0;
    ArrayList<Text> hlist = new ArrayList<Text>();
    for (InputSplit file : splits) {
        FileSplit fsplit = ((FileSplit) file);
        Path path = fsplit.getPath();
        FileSystem fs = path.getFileSystem(conf);

        if (fsplit.getStart() == 0) {
            // parse the inSplit, get the header
            FSDataInputStream fileIn = fs.open(path);

            String delimStr = conf.get(ConfigConstants.CONF_DELIMITER, ConfigConstants.DEFAULT_DELIMITER);
            if (delimStr.length() == 1) {
                delimiter = delimStr.charAt(0);
            } else {
                LOG.error("Incorrect delimitor: " + delimiter + ". Expects single character.");
            }
            String encoding = conf.get(MarkLogicConstants.OUTPUT_CONTENT_ENCODING,
                    MarkLogicConstants.DEFAULT_OUTPUT_CONTENT_ENCODING);
            InputStreamReader instream = new InputStreamReader(fileIn, encoding);
            CSVParser parser = new CSVParser(instream,
                    CSVParserFormatter.getFormat(delimiter, DelimitedTextReader.encapsulator, true, true));
            Iterator<CSVRecord> it = parser.iterator();

            String[] header = null;
            if (it.hasNext()) {
                CSVRecord record = (CSVRecord) it.next();
                Iterator<String> recordIterator = record.iterator();
                int recordSize = record.size();
                header = new String[recordSize];
                for (int i = 0; i < recordSize; i++) {
                    if (recordIterator.hasNext()) {
                        header[i] = (String) recordIterator.next();
                    } else {
                        throw new IOException("Record size doesn't match the real size");
                    }
                }

                EncodingUtil.handleBOMUTF8(header, 0);

                hlist.clear();
                for (String s : header) {
                    hlist.add(new Text(s));
                }
            }
            instream.close();
        }

        DelimitedSplit ds = new DelimitedSplit(new TextArrayWritable(hlist.toArray(new Text[hlist.size()])),
                path, fsplit.getStart(), fsplit.getLength(), fsplit.getLocations());
        populatedSplits.add(ds);
    }

    return populatedSplits;
}