Example usage for org.apache.commons.csv CSVRecord values

Introduction

In this page you can find the example usage for org.apache.commons.csv CSVRecord values.

Prototype

String[] values

To view the source code for org.apache.commons.csv CSVRecord values.

Click Source Link

Document

The values of the record

Usage

From source file:io.ecarf.core.cloud.task.processor.reason.phase2.ReasonUtils.java

/**
 * //from  www.j av a 2 s  .co m
 * @param file
 * @param writer
 * @param compressed
 * @return
 * @throws IOException 
 */
public static int reason(String inFile, String outFile, boolean compressed, Map<Long, Set<Triple>> schemaTerms,
        Set<Long> productiveTerms) throws IOException {

    log.info("Reasoning for file: " + inFile + ", memory usage: " + Utils.getMemoryUsageInGB() + "GB");

    int inferredTriples = 0;

    // loop through the instance triples probably stored in a file and generate all the triples matching the schema triples set
    try (BufferedReader reader = getQueryResultsReader(inFile, compressed);
            PrintWriter writer = new PrintWriter(new BufferedOutputStream(
                    new GZIPOutputStream(new FileOutputStream(outFile), Constants.GZIP_BUF_SIZE),
                    Constants.GZIP_BUF_SIZE));) {

        Iterable<CSVRecord> records;

        if (compressed) {
            // ignore first row subject,predicate,object
            records = CSVFormat.DEFAULT.withHeader().withSkipHeaderRecord().parse(reader);

        } else {
            records = CSVFormat.DEFAULT.parse(reader);
        }

        Long term;

        for (CSVRecord record : records) {

            ETriple instanceTriple = ETriple.fromCSV(record.values());

            // TODO review for OWL ruleset
            if (SchemaURIType.RDF_TYPE.id == instanceTriple.getPredicate()) {

                term = instanceTriple.getObject(); // object

            } else {

                term = instanceTriple.getPredicate(); // predicate
            }

            Set<Triple> schemaTriples = schemaTerms.get(term);

            if ((schemaTriples != null) && !schemaTriples.isEmpty()) {
                productiveTerms.add(term);

                for (Triple schemaTriple : schemaTriples) {
                    Rule rule = GenericRule.getRule(schemaTriple);
                    Triple inferredTriple = rule.head(schemaTriple, instanceTriple);

                    if (inferredTriple != null) {
                        writer.println(inferredTriple.toCsv());
                        inferredTriples++;
                    }
                }
            }

        }

    }

    return inferredTriples;
}

From source file:io.ecarf.core.cloud.task.processor.reason.phase2.ReasonUtils.java

/**
 * /*from ww  w  .  j  a va 2s.co m*/
 * @param file
 * @param writer
 * @param compressed
 * @return
 * @throws IOException 
 */
public static int reason(String inFile, String outFile, boolean compressed, Map<Long, Set<Triple>> schemaTerms,
        Set<Long> productiveTerms, DuplicatesBuster duplicatesBuster) throws IOException {

    log.info("Reasoning for file: " + inFile + ", memory usage: " + Utils.getMemoryUsageInGB() + "GB");

    int inferredTriples = 0;

    // loop through the instance triples probably stored in a file and generate all the triples matching the schema triples set
    try (BufferedReader reader = getQueryResultsReader(inFile, compressed);
            PrintWriter writer = new PrintWriter(new BufferedOutputStream(
                    new GZIPOutputStream(new FileOutputStream(outFile), Constants.GZIP_BUF_SIZE),
                    Constants.GZIP_BUF_SIZE));) {

        Iterable<CSVRecord> records;

        if (compressed) {
            // ignore first row subject,predicate,object
            records = CSVFormat.DEFAULT.withHeader().withSkipHeaderRecord().parse(reader);

        } else {
            records = CSVFormat.DEFAULT.parse(reader);
        }

        Long term;

        for (CSVRecord record : records) {

            ETriple instanceTriple = ETriple.fromCSV(record.values());

            // TODO review for OWL ruleset
            if (SchemaURIType.RDF_TYPE.id == instanceTriple.getPredicate()) {

                term = instanceTriple.getObject(); // object

            } else {

                term = instanceTriple.getPredicate(); // predicate
            }

            Set<Triple> schemaTriples = schemaTerms.get(term);

            if ((schemaTriples != null) && !schemaTriples.isEmpty()) {
                productiveTerms.add(term);

                for (Triple schemaTriple : schemaTriples) {
                    Rule rule = GenericRule.getRule(schemaTriple);
                    Triple inferredTriple = rule.head(schemaTriple, instanceTriple);

                    if ((inferredTriple != null) && !duplicatesBuster.isDuplicate(inferredTriple)) {
                        writer.println(inferredTriple.toCsv());
                        inferredTriples++;
                    }
                }
            }

        }

    }

    return inferredTriples;
}

From source file:io.ecarf.core.cloud.task.processor.reason.phase2.DoReasonTask8IntTest.java

@Test
@Ignore//from   w  w w  . j  a v a  2 s  . co  m
public void testCsvParser() throws FileNotFoundException, IOException {

    String filename = "/var/folders/3h/0whnrhjn1ddfb5p9pq_c6_mh0000gn/T//ecarf-evm-1_1456690870927_QueryResults_0";
    int rows = 0;

    try (BufferedReader reader = new BufferedReader(
            new InputStreamReader(new GZIPInputStream(new FileInputStream(filename), Constants.GZIP_BUF_SIZE)),
            Constants.GZIP_BUF_SIZE);) {

        Iterable<CSVRecord> records = CSVFormat.DEFAULT.withHeader().withSkipHeaderRecord().parse(reader);

        for (CSVRecord record : records) {

            ETriple instanceTriple = ETriple.fromCSV(record.values());
            rows++;
        }
    }

    assertEquals(8091263, rows);

}

From source file:io.ecarf.core.utils.UsageParser.java

public void parse() throws FileNotFoundException, IOException {

    for (String file : files) {

        try (BufferedReader reader = new BufferedReader(new FileReader(file), Constants.GZIP_BUF_SIZE);) {

            Iterable<CSVRecord> records = CSVFormat.DEFAULT.withHeader().withSkipHeaderRecord().parse(reader);

            for (CSVRecord record : records) {
                String[] values = record.values();

                String measurement = StringUtils.remove(values[1], MEASURE_PREFIX);
                this.measurementIds.add(measurement);

                if (!measurement.contains(CONTAINER_ENGINE_VM)) {

                    if (measurement.contains(VM)) {
                        this.numberOfVms++;

                        this.vms.add(values[4]);
                    }// w  ww  .j a  v  a2  s  .  c o  m

                    Usage usage = this.usages.get(measurement);

                    if (usage == null) {
                        usage = new Usage();
                        this.usages.put(measurement, usage);
                    }

                    long value = Long.parseLong(values[2]);

                    usage.raw += value;

                    if (measurement.contains(VM)) {

                        long adjusted = value;
                        // minimum billable is 10 minutes for VMs
                        if (adjusted < MIN_BILL) {
                            adjusted = MIN_BILL;
                        }

                        // round up value to the nearest minute
                        adjusted = (long) (MINUTE * Math.ceil(adjusted / 60.0));

                        usage.value += adjusted;

                        // hourly based billing
                        adjusted = value;
                        if (adjusted < HOUR) {
                            adjusted = HOUR;

                        } else {
                            adjusted = (long) (HOUR * Math.ceil(adjusted / 3600.0));
                        }
                        usage.adjusted += adjusted;

                    }

                } else {
                    // container engine vms
                    System.out.println(StringUtils.join(values, ','));
                }

            }
        }
    }

    for (String measureId : this.measurementIds) {
        System.out.println(measureId);
    }

    System.out.println("Total number of VMs: " + this.numberOfVms);

    System.out.println(this.vms);
    System.out.println(this.vms.size());

    for (Entry<String, Usage> entry : this.usages.entrySet()) {
        Usage usage = entry.getValue();
        System.out.println(entry.getKey() + ',' + usage.raw + ',' + usage.value + ',' + usage.adjusted);
    }
}

From source file:io.ecarf.core.cloud.task.processor.reason.phase0.DoReasonTask3.java

/**
 * /*from  w  w  w  . j  av  a2s  .c  o  m*/
 * @param term
 * @param select
 * @param schemaTriples
 * @param rows
 * @param table
 * @param writer
 * @return
 * @throws IOException
 */
private int inferAndSaveTriplesToFile(Term term, List<String> select, Set<Triple> schemaTriples,
        BigInteger rows, String table, PrintWriter writer) throws IOException {

    int inferredTriples = 0;
    int failedTriples = 0;

    // loop through the instance triples probably stored in a file and generate all the triples matching the schema triples set
    try (BufferedReader r = new BufferedReader(new FileReader(term.getFilename()), Constants.GZIP_BUF_SIZE)) {

        Iterable<CSVRecord> records = CSVFormat.DEFAULT.parse(r);

        // records will contain lots of duplicates
        Set<String> inferredAlready = new HashSet<String>();

        try {

            for (CSVRecord record : records) {

                String values = ((select.size() == 1) ? record.get(0) : StringUtils.join(record.values(), ','));

                if (!inferredAlready.contains(values)) {
                    inferredAlready.add(values);

                    NTriple instanceTriple = new NTriple();

                    if (select.size() == 1) {
                        instanceTriple.set(select.get(0), record.get(0));
                    } else {

                        instanceTriple.set(select, record.values());
                    }

                    for (Triple schemaTriple : schemaTriples) {
                        Rule rule = GenericRule.getRule(schemaTriple);
                        Triple inferredTriple = rule.head(schemaTriple, instanceTriple);
                        writer.println(inferredTriple.toCsv());
                        inferredTriples++;
                    }

                    // this is just to avoid any memory issues
                    if (inferredAlready.size() > MAX_CACHE) {
                        inferredAlready.clear();
                        log.info("Cleared cache of inferred terms");
                    }
                }

            }
        } catch (Exception e) {
            log.error("Failed to parse selected terms", e);
            failedTriples++;
        }
    }

    //inferredFiles.add(inferredTriplesFile);
    log.info("\nSelect Triples: " + rows + ", Inferred: " + inferredTriples + ", Triples for term: " + term
            + ", Failed Triples: " + failedTriples);

    return inferredTriples;
}

From source file:io.ecarf.core.cloud.task.processor.reason.phase0.DoReasonTask4.java

/**
 * /*from   w  w  w .j  a v  a2  s . co m*/
 * @param term
 * @param select
 * @param schemaTriples
 * @param rows
 * @param table
 * @param writer
 * @return
 * @throws IOException
 */
private int inferAndSaveTriplesToFile(Term term, List<String> select, Set<Triple> schemaTriples,
        BigInteger rows, String table, PrintWriter writer) throws IOException {

    int inferredTriples = 0;
    int failedTriples = 0;

    // loop through the instance triples probably stored in a file and generate all the triples matching the schema triples set
    try (BufferedReader r = new BufferedReader(new FileReader(term.getFilename()), Constants.GZIP_BUF_SIZE)) {

        Iterable<CSVRecord> records = CSVFormat.DEFAULT.parse(r);

        // records will contain lots of duplicates
        Set<String> inferredAlready = new HashSet<String>();

        try {

            for (CSVRecord record : records) {

                String values = ((select.size() == 1) ? record.get(0) : StringUtils.join(record.values(), ','));

                if (!inferredAlready.contains(values)) {
                    inferredAlready.add(values);

                    NTriple instanceTriple = new NTriple();

                    if (select.size() == 1) {
                        instanceTriple.set(select.get(0), record.get(0));
                    } else {

                        instanceTriple.set(select, record.values());
                    }

                    for (Triple schemaTriple : schemaTriples) {
                        Rule rule = GenericRule.getRule(schemaTriple);
                        Triple inferredTriple = rule.head(schemaTriple, instanceTriple);
                        writer.println(inferredTriple.toCsv());
                        inferredTriples++;
                    }

                    // this is just to avoid any memory issues
                    if (inferredAlready.size() > MAX_CACHE) {
                        inferredAlready.clear();
                        log.info("Cleared cache of inferred terms");
                    }
                } else {
                    this.duplicates++;
                }

            }
        } catch (Exception e) {
            log.error("Failed to parse selected terms", e);
            failedTriples++;
        }
    }

    //inferredFiles.add(inferredTriplesFile);
    log.info("\nSelect Triples: " + rows + ", Inferred: " + inferredTriples + ", Triples for term: " + term
            + ", Failed Triples: " + failedTriples);

    return inferredTriples;
}

From source file:io.ecarf.core.cloud.task.processor.reason.phase1.DoReasonTask5.java

/**
 * //from   w  w w .  j  a va2 s  .  co  m
 * @param term
 * @param select
 * @param schemaTriples
 * @param rows
 * @param table
 * @param writer
 * @return
 * @throws IOException
 */
private int inferAndSaveTriplesToFile(Term term, List<String> select, Set<Triple> schemaTriples,
        BigInteger rows, String table, PrintWriter writer) throws IOException {

    int inferredTriples = 0;
    int failedTriples = 0;

    // loop through the instance triples probably stored in a file and generate all the triples matching the schema triples set
    try (BufferedReader r = new BufferedReader(new FileReader(term.getFilename()), Constants.GZIP_BUF_SIZE)) {

        Iterable<CSVRecord> records = CSVFormat.DEFAULT.parse(r);

        // records will contain lots of duplicates
        Set<String> inferredAlready = new HashSet<String>();

        try {

            for (CSVRecord record : records) {

                String values = ((select.size() == 1) ? record.get(0) : StringUtils.join(record.values(), ','));

                if (!inferredAlready.contains(values)) {
                    inferredAlready.add(values);

                    NTriple instanceTriple = new NTriple();

                    if (select.size() == 1) {
                        instanceTriple.set(select.get(0), record.get(0));
                    } else {

                        instanceTriple.set(select, record.values());
                    }

                    for (Triple schemaTriple : schemaTriples) {
                        Rule rule = GenericRule.getRule(schemaTriple);
                        Triple inferredTriple = rule.head(schemaTriple, instanceTriple);

                        if (inferredTriple != null) {
                            writer.println(inferredTriple.toCsv());
                            inferredTriples++;
                        }
                    }

                    // this is just to avoid any memory issues
                    if (inferredAlready.size() > MAX_CACHE) {
                        inferredAlready.clear();
                        log.info("Cleared cache of inferred terms");
                    }
                } else {
                    this.duplicates++;
                }

            }
        } catch (Exception e) {
            log.error("Failed to parse selected terms", e);
            failedTriples++;
        }
    }

    //inferredFiles.add(inferredTriplesFile);
    log.info("\nSelect Triples: " + rows + ", Inferred: " + inferredTriples + ", Triples for term: " + term
            + ", Failed Triples: " + failedTriples);

    return inferredTriples;
}

From source file:io.ecarf.core.cloud.task.processor.reason.phase2.DoReasonTask6.java

/**
 * /*from w ww .j  a  va 2  s . c  o  m*/
 * @param term
 * @param select
 * @param schemaTriples
 * @param rows
 * @param table
 * @param writer
 * @return
 * @throws IOException
 */
protected int inferAndSaveTriplesToFile(QueryResult queryResult, Set<String> productiveTerms, String table,
        PrintWriter writer) throws IOException {

    //Term term, List<String> select, Set<Triple> schemaTriples
    log.info("********************** Starting Inference Round **********************");

    int inferredTriples = 0;
    //int failedTriples = 0;

    // loop through the instance triples probably stored in a file and generate all the triples matching the schema triples set
    try (BufferedReader r = new BufferedReader(new FileReader(queryResult.getFilename()),
            Constants.GZIP_BUF_SIZE)) {

        Iterable<CSVRecord> records = CSVFormat.DEFAULT.parse(r);

        // records will contain lots of duplicates
        //Set<String> inferredAlready = new HashSet<String>();

        try {

            String term;

            for (CSVRecord record : records) {

                //String values = ((select.size() == 1) ? record.get(0): StringUtils.join(record.values(), ','));

                //if(!inferredAlready.contains(values)) {
                //inferredAlready.add(values);

                NTriple instanceTriple = (NTriple) NTriple.fromCSV(record.values());//new NTriple();

                /*instanceTriple.setSubject(record.get(0));
                instanceTriple.setPredicate(record.get(1));
                instanceTriple.setObject(record.get(2));*/

                // TODO review for OWL ruleset
                if (SchemaURIType.RDF_TYPE.getUri().equals(instanceTriple.getPredicate())) {
                    term = instanceTriple.getObject(); // object
                } else {
                    term = instanceTriple.getPredicate(); // predicate
                }

                Set<Triple> schemaTriples = schemaTerms.get(term);

                if ((schemaTriples != null) && !schemaTriples.isEmpty()) {
                    productiveTerms.add(term);

                    for (Triple schemaTriple : schemaTriples) {
                        Rule rule = GenericRule.getRule(schemaTriple);
                        Triple inferredTriple = rule.head(schemaTriple, instanceTriple);

                        if (inferredTriple != null) {
                            writer.println(inferredTriple.toCsv());
                            inferredTriples++;
                        }
                    }
                }

                // this is just to avoid any memory issues
                //if(inferredAlready.size() > MAX_CACHE) {
                //   inferredAlready.clear();
                //   log.info("Cleared cache of inferred terms");
                //}
                //} else {
                //this.duplicates++;
                //}

            }
        } catch (Exception e) {
            log.error("Failed to parse selected terms", e);
            throw new IOException(e);
            //failedTriples++;
        }
    }

    log.info("Total Rows: " + queryResult.getStats().getTotalRows() + ", Total Processed Bytes: "
            + queryResult.getStats().getTotalProcessedGBytes() + " GB" + ", Inferred: " + inferredTriples);

    log.info("********************** Completed Inference Round **********************");

    return inferredTriples;
}

From source file:io.ecarf.core.cloud.task.processor.reason.phase2.DoReasonTask7.java

/**
 * /*www.j  a va  2 s .  c  om*/
 * @param term
 * @param select
 * @param schemaTriples
 * @param rows
 * @param table
 * @param writer
 * @return
 * @throws IOException
 */
protected int inferAndSaveTriplesToFile(QueryResult queryResult, Set<Long> productiveTerms, String table,
        PrintWriter writer) throws IOException {

    //Term term, List<String> select, Set<Triple> schemaTriples
    log.info("********************** Starting Inference Round **********************");

    int inferredTriples = 0;
    //int failedTriples = 0;

    // loop through the instance triples probably stored in a file and generate all the triples matching the schema triples set
    try (BufferedReader r = new BufferedReader(new FileReader(queryResult.getFilename()),
            Constants.GZIP_BUF_SIZE)) {

        Iterable<CSVRecord> records = CSVFormat.DEFAULT.parse(r);

        // records will contain lots of duplicates
        //Set<String> inferredAlready = new HashSet<String>();

        try {

            Long term;

            for (CSVRecord record : records) {

                //String values = ((select.size() == 1) ? record.get(0): StringUtils.join(record.values(), ','));

                //if(!inferredAlready.contains(values)) {
                //inferredAlready.add(values);

                /*ETriple instanceTriple = new ETriple();
                instanceTriple.setSubject(record.get(0));
                instanceTriple.setPredicate(record.get(1));
                instanceTriple.setObject(record.get(2));*/

                ETriple instanceTriple = ETriple.fromCSV(record.values());

                // TODO review for OWL ruleset
                if (SchemaURIType.RDF_TYPE.id == instanceTriple.getPredicate()) {

                    term = instanceTriple.getObject(); // object

                } else {

                    term = instanceTriple.getPredicate(); // predicate
                }

                Set<Triple> schemaTriples = schemaTerms.get(term);

                if ((schemaTriples != null) && !schemaTriples.isEmpty()) {
                    productiveTerms.add(term);

                    for (Triple schemaTriple : schemaTriples) {
                        Rule rule = GenericRule.getRule(schemaTriple);
                        Triple inferredTriple = rule.head(schemaTriple, instanceTriple);

                        if (inferredTriple != null) {
                            writer.println(inferredTriple.toCsv());
                            inferredTriples++;
                        }
                    }
                }

                // this is just to avoid any memory issues
                //if(inferredAlready.size() > MAX_CACHE) {
                //   inferredAlready.clear();
                //   log.info("Cleared cache of inferred terms");
                //}
                //} else {
                //this.duplicates++;
                //}

            }
        } catch (Exception e) {
            log.error("Failed to parse selected terms", e);
            throw new IOException(e);
            //failedTriples++;
        }
    }

    log.info("Total Rows: " + queryResult.getStats().getTotalRows() + ", Total Processed Bytes: "
            + queryResult.getStats().getTotalProcessedGBytes() + " GB" + ", Inferred: " + inferredTriples);

    log.info("********************** Completed Inference Round **********************");

    return inferredTriples;
}

From source file:io.ecarf.core.cloud.task.processor.reason.phase2.DoReasonTask8.java

/**
 * /*  www  . j ava 2s.c o  m*/
 * @param term
 * @param select
 * @param schemaTriples
 * @param rows
 * @param table
 * @param writer
 * @return
 * @throws IOException
 */
protected int inferAndSaveTriplesToFile(QueryResult queryResult, Set<Long> productiveTerms, PrintWriter writer)
        throws IOException {

    //Term term, List<String> select, Set<Triple> schemaTriples
    log.info("********************** Starting Inference Round **********************");

    int inferredTriples = 0;
    //int failedTriples = 0;

    boolean compressed = queryResult.getTotalRows() > this.ddLimit;

    List<String> files = queryResult.getStats().getOutputFiles();

    for (String file : files) {

        // loop through the instance triples probably stored in a file and generate all the triples matching the schema triples set
        try (BufferedReader r = this.getQueryResultsReader(file, compressed);) {

            Iterable<CSVRecord> records;

            if (compressed) {
                // ignore first row subject,predicate,object
                records = CSVFormat.DEFAULT.withHeader().withSkipHeaderRecord().parse(r);

            } else {
                records = CSVFormat.DEFAULT.parse(r);
            }

            // records will contain lots of duplicates
            //Set<String> inferredAlready = new HashSet<String>();

            try {

                Long term;

                for (CSVRecord record : records) {

                    //String values = ((select.size() == 1) ? record.get(0): StringUtils.join(record.values(), ','));

                    //if(!inferredAlready.contains(values)) {
                    //inferredAlready.add(values);

                    /*ETriple instanceTriple = new ETriple();
                    instanceTriple.setSubject(record.get(0));
                    instanceTriple.setPredicate(record.get(1));
                    instanceTriple.setObject(record.get(2));*/

                    ETriple instanceTriple = ETriple.fromCSV(record.values());

                    // TODO review for OWL ruleset
                    if (SchemaURIType.RDF_TYPE.id == instanceTriple.getPredicate()) {

                        term = instanceTriple.getObject(); // object

                    } else {

                        term = instanceTriple.getPredicate(); // predicate
                    }

                    Set<Triple> schemaTriples = schemaTerms.get(term);

                    if ((schemaTriples != null) && !schemaTriples.isEmpty()) {
                        productiveTerms.add(term);

                        for (Triple schemaTriple : schemaTriples) {
                            Rule rule = GenericRule.getRule(schemaTriple);
                            Triple inferredTriple = rule.head(schemaTriple, instanceTriple);

                            if (inferredTriple != null) {
                                writer.println(inferredTriple.toCsv());
                                inferredTriples++;
                            }
                        }
                    }

                    // this is just to avoid any memory issues
                    //if(inferredAlready.size() > MAX_CACHE) {
                    //   inferredAlready.clear();
                    //   log.info("Cleared cache of inferred terms");
                    //}
                    //} else {
                    //this.duplicates++;
                    //}

                }
            } catch (Exception e) {
                log.error("Failed to parse selected terms", e);
                throw new IOException(e);
                //failedTriples++;
            }
        }
    }

    log.info("Total Rows: " + queryResult.getStats().getTotalRows() + ", Total Processed Bytes: "
            + queryResult.getStats().getTotalProcessedGBytes() + " GB" + ", Inferred: " + inferredTriples
            + ", compressed = " + compressed);

    log.info("********************** Completed Inference Round **********************");

    return inferredTriples;
}