List of usage examples for org.apache.commons.csv CSVRecord values
String[] values
To view the source code for org.apache.commons.csv CSVRecord values.
Click Source Link
From source file:io.ecarf.core.cloud.task.processor.reason.phase2.ReasonUtils.java
/** * //from www.j av a 2 s .co m * @param file * @param writer * @param compressed * @return * @throws IOException */ public static int reason(String inFile, String outFile, boolean compressed, Map<Long, Set<Triple>> schemaTerms, Set<Long> productiveTerms) throws IOException { log.info("Reasoning for file: " + inFile + ", memory usage: " + Utils.getMemoryUsageInGB() + "GB"); int inferredTriples = 0; // loop through the instance triples probably stored in a file and generate all the triples matching the schema triples set try (BufferedReader reader = getQueryResultsReader(inFile, compressed); PrintWriter writer = new PrintWriter(new BufferedOutputStream( new GZIPOutputStream(new FileOutputStream(outFile), Constants.GZIP_BUF_SIZE), Constants.GZIP_BUF_SIZE));) { Iterable<CSVRecord> records; if (compressed) { // ignore first row subject,predicate,object records = CSVFormat.DEFAULT.withHeader().withSkipHeaderRecord().parse(reader); } else { records = CSVFormat.DEFAULT.parse(reader); } Long term; for (CSVRecord record : records) { ETriple instanceTriple = ETriple.fromCSV(record.values()); // TODO review for OWL ruleset if (SchemaURIType.RDF_TYPE.id == instanceTriple.getPredicate()) { term = instanceTriple.getObject(); // object } else { term = instanceTriple.getPredicate(); // predicate } Set<Triple> schemaTriples = schemaTerms.get(term); if ((schemaTriples != null) && !schemaTriples.isEmpty()) { productiveTerms.add(term); for (Triple schemaTriple : schemaTriples) { Rule rule = GenericRule.getRule(schemaTriple); Triple inferredTriple = rule.head(schemaTriple, instanceTriple); if (inferredTriple != null) { writer.println(inferredTriple.toCsv()); inferredTriples++; } } } } } return inferredTriples; }
From source file:io.ecarf.core.cloud.task.processor.reason.phase2.ReasonUtils.java
/** * /*from ww w . j a va 2s.co m*/ * @param file * @param writer * @param compressed * @return * @throws IOException */ public static int reason(String inFile, String outFile, boolean compressed, Map<Long, Set<Triple>> schemaTerms, Set<Long> productiveTerms, DuplicatesBuster duplicatesBuster) throws IOException { log.info("Reasoning for file: " + inFile + ", memory usage: " + Utils.getMemoryUsageInGB() + "GB"); int inferredTriples = 0; // loop through the instance triples probably stored in a file and generate all the triples matching the schema triples set try (BufferedReader reader = getQueryResultsReader(inFile, compressed); PrintWriter writer = new PrintWriter(new BufferedOutputStream( new GZIPOutputStream(new FileOutputStream(outFile), Constants.GZIP_BUF_SIZE), Constants.GZIP_BUF_SIZE));) { Iterable<CSVRecord> records; if (compressed) { // ignore first row subject,predicate,object records = CSVFormat.DEFAULT.withHeader().withSkipHeaderRecord().parse(reader); } else { records = CSVFormat.DEFAULT.parse(reader); } Long term; for (CSVRecord record : records) { ETriple instanceTriple = ETriple.fromCSV(record.values()); // TODO review for OWL ruleset if (SchemaURIType.RDF_TYPE.id == instanceTriple.getPredicate()) { term = instanceTriple.getObject(); // object } else { term = instanceTriple.getPredicate(); // predicate } Set<Triple> schemaTriples = schemaTerms.get(term); if ((schemaTriples != null) && !schemaTriples.isEmpty()) { productiveTerms.add(term); for (Triple schemaTriple : schemaTriples) { Rule rule = GenericRule.getRule(schemaTriple); Triple inferredTriple = rule.head(schemaTriple, instanceTriple); if ((inferredTriple != null) && !duplicatesBuster.isDuplicate(inferredTriple)) { writer.println(inferredTriple.toCsv()); inferredTriples++; } } } } } return inferredTriples; }
From source file:io.ecarf.core.cloud.task.processor.reason.phase2.DoReasonTask8IntTest.java
@Test @Ignore//from w w w . j a v a 2 s . co m public void testCsvParser() throws FileNotFoundException, IOException { String filename = "/var/folders/3h/0whnrhjn1ddfb5p9pq_c6_mh0000gn/T//ecarf-evm-1_1456690870927_QueryResults_0"; int rows = 0; try (BufferedReader reader = new BufferedReader( new InputStreamReader(new GZIPInputStream(new FileInputStream(filename), Constants.GZIP_BUF_SIZE)), Constants.GZIP_BUF_SIZE);) { Iterable<CSVRecord> records = CSVFormat.DEFAULT.withHeader().withSkipHeaderRecord().parse(reader); for (CSVRecord record : records) { ETriple instanceTriple = ETriple.fromCSV(record.values()); rows++; } } assertEquals(8091263, rows); }
From source file:io.ecarf.core.utils.UsageParser.java
public void parse() throws FileNotFoundException, IOException { for (String file : files) { try (BufferedReader reader = new BufferedReader(new FileReader(file), Constants.GZIP_BUF_SIZE);) { Iterable<CSVRecord> records = CSVFormat.DEFAULT.withHeader().withSkipHeaderRecord().parse(reader); for (CSVRecord record : records) { String[] values = record.values(); String measurement = StringUtils.remove(values[1], MEASURE_PREFIX); this.measurementIds.add(measurement); if (!measurement.contains(CONTAINER_ENGINE_VM)) { if (measurement.contains(VM)) { this.numberOfVms++; this.vms.add(values[4]); }// w ww .j a v a2 s . c o m Usage usage = this.usages.get(measurement); if (usage == null) { usage = new Usage(); this.usages.put(measurement, usage); } long value = Long.parseLong(values[2]); usage.raw += value; if (measurement.contains(VM)) { long adjusted = value; // minimum billable is 10 minutes for VMs if (adjusted < MIN_BILL) { adjusted = MIN_BILL; } // round up value to the nearest minute adjusted = (long) (MINUTE * Math.ceil(adjusted / 60.0)); usage.value += adjusted; // hourly based billing adjusted = value; if (adjusted < HOUR) { adjusted = HOUR; } else { adjusted = (long) (HOUR * Math.ceil(adjusted / 3600.0)); } usage.adjusted += adjusted; } } else { // container engine vms System.out.println(StringUtils.join(values, ',')); } } } } for (String measureId : this.measurementIds) { System.out.println(measureId); } System.out.println("Total number of VMs: " + this.numberOfVms); System.out.println(this.vms); System.out.println(this.vms.size()); for (Entry<String, Usage> entry : this.usages.entrySet()) { Usage usage = entry.getValue(); System.out.println(entry.getKey() + ',' + usage.raw + ',' + usage.value + ',' + usage.adjusted); } }
From source file:io.ecarf.core.cloud.task.processor.reason.phase0.DoReasonTask3.java
/** * /*from w w w . j av a2s .c o m*/ * @param term * @param select * @param schemaTriples * @param rows * @param table * @param writer * @return * @throws IOException */ private int inferAndSaveTriplesToFile(Term term, List<String> select, Set<Triple> schemaTriples, BigInteger rows, String table, PrintWriter writer) throws IOException { int inferredTriples = 0; int failedTriples = 0; // loop through the instance triples probably stored in a file and generate all the triples matching the schema triples set try (BufferedReader r = new BufferedReader(new FileReader(term.getFilename()), Constants.GZIP_BUF_SIZE)) { Iterable<CSVRecord> records = CSVFormat.DEFAULT.parse(r); // records will contain lots of duplicates Set<String> inferredAlready = new HashSet<String>(); try { for (CSVRecord record : records) { String values = ((select.size() == 1) ? record.get(0) : StringUtils.join(record.values(), ',')); if (!inferredAlready.contains(values)) { inferredAlready.add(values); NTriple instanceTriple = new NTriple(); if (select.size() == 1) { instanceTriple.set(select.get(0), record.get(0)); } else { instanceTriple.set(select, record.values()); } for (Triple schemaTriple : schemaTriples) { Rule rule = GenericRule.getRule(schemaTriple); Triple inferredTriple = rule.head(schemaTriple, instanceTriple); writer.println(inferredTriple.toCsv()); inferredTriples++; } // this is just to avoid any memory issues if (inferredAlready.size() > MAX_CACHE) { inferredAlready.clear(); log.info("Cleared cache of inferred terms"); } } } } catch (Exception e) { log.error("Failed to parse selected terms", e); failedTriples++; } } //inferredFiles.add(inferredTriplesFile); log.info("\nSelect Triples: " + rows + ", Inferred: " + inferredTriples + ", Triples for term: " + term + ", Failed Triples: " + failedTriples); return inferredTriples; }
From source file:io.ecarf.core.cloud.task.processor.reason.phase0.DoReasonTask4.java
/** * /*from w w w .j a v a2 s . co m*/ * @param term * @param select * @param schemaTriples * @param rows * @param table * @param writer * @return * @throws IOException */ private int inferAndSaveTriplesToFile(Term term, List<String> select, Set<Triple> schemaTriples, BigInteger rows, String table, PrintWriter writer) throws IOException { int inferredTriples = 0; int failedTriples = 0; // loop through the instance triples probably stored in a file and generate all the triples matching the schema triples set try (BufferedReader r = new BufferedReader(new FileReader(term.getFilename()), Constants.GZIP_BUF_SIZE)) { Iterable<CSVRecord> records = CSVFormat.DEFAULT.parse(r); // records will contain lots of duplicates Set<String> inferredAlready = new HashSet<String>(); try { for (CSVRecord record : records) { String values = ((select.size() == 1) ? record.get(0) : StringUtils.join(record.values(), ',')); if (!inferredAlready.contains(values)) { inferredAlready.add(values); NTriple instanceTriple = new NTriple(); if (select.size() == 1) { instanceTriple.set(select.get(0), record.get(0)); } else { instanceTriple.set(select, record.values()); } for (Triple schemaTriple : schemaTriples) { Rule rule = GenericRule.getRule(schemaTriple); Triple inferredTriple = rule.head(schemaTriple, instanceTriple); writer.println(inferredTriple.toCsv()); inferredTriples++; } // this is just to avoid any memory issues if (inferredAlready.size() > MAX_CACHE) { inferredAlready.clear(); log.info("Cleared cache of inferred terms"); } } else { this.duplicates++; } } } catch (Exception e) { log.error("Failed to parse selected terms", e); failedTriples++; } } //inferredFiles.add(inferredTriplesFile); log.info("\nSelect Triples: " + rows + ", Inferred: " + inferredTriples + ", Triples for term: " + term + ", Failed Triples: " + failedTriples); return inferredTriples; }
From source file:io.ecarf.core.cloud.task.processor.reason.phase1.DoReasonTask5.java
/** * //from w w w . j a va2 s . co m * @param term * @param select * @param schemaTriples * @param rows * @param table * @param writer * @return * @throws IOException */ private int inferAndSaveTriplesToFile(Term term, List<String> select, Set<Triple> schemaTriples, BigInteger rows, String table, PrintWriter writer) throws IOException { int inferredTriples = 0; int failedTriples = 0; // loop through the instance triples probably stored in a file and generate all the triples matching the schema triples set try (BufferedReader r = new BufferedReader(new FileReader(term.getFilename()), Constants.GZIP_BUF_SIZE)) { Iterable<CSVRecord> records = CSVFormat.DEFAULT.parse(r); // records will contain lots of duplicates Set<String> inferredAlready = new HashSet<String>(); try { for (CSVRecord record : records) { String values = ((select.size() == 1) ? record.get(0) : StringUtils.join(record.values(), ',')); if (!inferredAlready.contains(values)) { inferredAlready.add(values); NTriple instanceTriple = new NTriple(); if (select.size() == 1) { instanceTriple.set(select.get(0), record.get(0)); } else { instanceTriple.set(select, record.values()); } for (Triple schemaTriple : schemaTriples) { Rule rule = GenericRule.getRule(schemaTriple); Triple inferredTriple = rule.head(schemaTriple, instanceTriple); if (inferredTriple != null) { writer.println(inferredTriple.toCsv()); inferredTriples++; } } // this is just to avoid any memory issues if (inferredAlready.size() > MAX_CACHE) { inferredAlready.clear(); log.info("Cleared cache of inferred terms"); } } else { this.duplicates++; } } } catch (Exception e) { log.error("Failed to parse selected terms", e); failedTriples++; } } //inferredFiles.add(inferredTriplesFile); log.info("\nSelect Triples: " + rows + ", Inferred: " + inferredTriples + ", Triples for term: " + term + ", Failed Triples: " + failedTriples); return inferredTriples; }
From source file:io.ecarf.core.cloud.task.processor.reason.phase2.DoReasonTask6.java
/** * /*from w ww .j a va 2 s . c o m*/ * @param term * @param select * @param schemaTriples * @param rows * @param table * @param writer * @return * @throws IOException */ protected int inferAndSaveTriplesToFile(QueryResult queryResult, Set<String> productiveTerms, String table, PrintWriter writer) throws IOException { //Term term, List<String> select, Set<Triple> schemaTriples log.info("********************** Starting Inference Round **********************"); int inferredTriples = 0; //int failedTriples = 0; // loop through the instance triples probably stored in a file and generate all the triples matching the schema triples set try (BufferedReader r = new BufferedReader(new FileReader(queryResult.getFilename()), Constants.GZIP_BUF_SIZE)) { Iterable<CSVRecord> records = CSVFormat.DEFAULT.parse(r); // records will contain lots of duplicates //Set<String> inferredAlready = new HashSet<String>(); try { String term; for (CSVRecord record : records) { //String values = ((select.size() == 1) ? record.get(0): StringUtils.join(record.values(), ',')); //if(!inferredAlready.contains(values)) { //inferredAlready.add(values); NTriple instanceTriple = (NTriple) NTriple.fromCSV(record.values());//new NTriple(); /*instanceTriple.setSubject(record.get(0)); instanceTriple.setPredicate(record.get(1)); instanceTriple.setObject(record.get(2));*/ // TODO review for OWL ruleset if (SchemaURIType.RDF_TYPE.getUri().equals(instanceTriple.getPredicate())) { term = instanceTriple.getObject(); // object } else { term = instanceTriple.getPredicate(); // predicate } Set<Triple> schemaTriples = schemaTerms.get(term); if ((schemaTriples != null) && !schemaTriples.isEmpty()) { productiveTerms.add(term); for (Triple schemaTriple : schemaTriples) { Rule rule = GenericRule.getRule(schemaTriple); Triple inferredTriple = rule.head(schemaTriple, instanceTriple); if (inferredTriple != null) { writer.println(inferredTriple.toCsv()); inferredTriples++; } } } // this is just to avoid any memory issues //if(inferredAlready.size() > MAX_CACHE) { // inferredAlready.clear(); // log.info("Cleared cache of inferred terms"); //} //} else { //this.duplicates++; //} } } catch (Exception e) { log.error("Failed to parse selected terms", e); throw new IOException(e); //failedTriples++; } } log.info("Total Rows: " + queryResult.getStats().getTotalRows() + ", Total Processed Bytes: " + queryResult.getStats().getTotalProcessedGBytes() + " GB" + ", Inferred: " + inferredTriples); log.info("********************** Completed Inference Round **********************"); return inferredTriples; }
From source file:io.ecarf.core.cloud.task.processor.reason.phase2.DoReasonTask7.java
/** * /*www.j a va 2 s . c om*/ * @param term * @param select * @param schemaTriples * @param rows * @param table * @param writer * @return * @throws IOException */ protected int inferAndSaveTriplesToFile(QueryResult queryResult, Set<Long> productiveTerms, String table, PrintWriter writer) throws IOException { //Term term, List<String> select, Set<Triple> schemaTriples log.info("********************** Starting Inference Round **********************"); int inferredTriples = 0; //int failedTriples = 0; // loop through the instance triples probably stored in a file and generate all the triples matching the schema triples set try (BufferedReader r = new BufferedReader(new FileReader(queryResult.getFilename()), Constants.GZIP_BUF_SIZE)) { Iterable<CSVRecord> records = CSVFormat.DEFAULT.parse(r); // records will contain lots of duplicates //Set<String> inferredAlready = new HashSet<String>(); try { Long term; for (CSVRecord record : records) { //String values = ((select.size() == 1) ? record.get(0): StringUtils.join(record.values(), ',')); //if(!inferredAlready.contains(values)) { //inferredAlready.add(values); /*ETriple instanceTriple = new ETriple(); instanceTriple.setSubject(record.get(0)); instanceTriple.setPredicate(record.get(1)); instanceTriple.setObject(record.get(2));*/ ETriple instanceTriple = ETriple.fromCSV(record.values()); // TODO review for OWL ruleset if (SchemaURIType.RDF_TYPE.id == instanceTriple.getPredicate()) { term = instanceTriple.getObject(); // object } else { term = instanceTriple.getPredicate(); // predicate } Set<Triple> schemaTriples = schemaTerms.get(term); if ((schemaTriples != null) && !schemaTriples.isEmpty()) { productiveTerms.add(term); for (Triple schemaTriple : schemaTriples) { Rule rule = GenericRule.getRule(schemaTriple); Triple inferredTriple = rule.head(schemaTriple, instanceTriple); if (inferredTriple != null) { writer.println(inferredTriple.toCsv()); inferredTriples++; } } } // this is just to avoid any memory issues //if(inferredAlready.size() > MAX_CACHE) { // inferredAlready.clear(); // log.info("Cleared cache of inferred terms"); //} //} else { //this.duplicates++; //} } } catch (Exception e) { log.error("Failed to parse selected terms", e); throw new IOException(e); //failedTriples++; } } log.info("Total Rows: " + queryResult.getStats().getTotalRows() + ", Total Processed Bytes: " + queryResult.getStats().getTotalProcessedGBytes() + " GB" + ", Inferred: " + inferredTriples); log.info("********************** Completed Inference Round **********************"); return inferredTriples; }
From source file:io.ecarf.core.cloud.task.processor.reason.phase2.DoReasonTask8.java
/** * /* www . j ava 2s.c o m*/ * @param term * @param select * @param schemaTriples * @param rows * @param table * @param writer * @return * @throws IOException */ protected int inferAndSaveTriplesToFile(QueryResult queryResult, Set<Long> productiveTerms, PrintWriter writer) throws IOException { //Term term, List<String> select, Set<Triple> schemaTriples log.info("********************** Starting Inference Round **********************"); int inferredTriples = 0; //int failedTriples = 0; boolean compressed = queryResult.getTotalRows() > this.ddLimit; List<String> files = queryResult.getStats().getOutputFiles(); for (String file : files) { // loop through the instance triples probably stored in a file and generate all the triples matching the schema triples set try (BufferedReader r = this.getQueryResultsReader(file, compressed);) { Iterable<CSVRecord> records; if (compressed) { // ignore first row subject,predicate,object records = CSVFormat.DEFAULT.withHeader().withSkipHeaderRecord().parse(r); } else { records = CSVFormat.DEFAULT.parse(r); } // records will contain lots of duplicates //Set<String> inferredAlready = new HashSet<String>(); try { Long term; for (CSVRecord record : records) { //String values = ((select.size() == 1) ? record.get(0): StringUtils.join(record.values(), ',')); //if(!inferredAlready.contains(values)) { //inferredAlready.add(values); /*ETriple instanceTriple = new ETriple(); instanceTriple.setSubject(record.get(0)); instanceTriple.setPredicate(record.get(1)); instanceTriple.setObject(record.get(2));*/ ETriple instanceTriple = ETriple.fromCSV(record.values()); // TODO review for OWL ruleset if (SchemaURIType.RDF_TYPE.id == instanceTriple.getPredicate()) { term = instanceTriple.getObject(); // object } else { term = instanceTriple.getPredicate(); // predicate } Set<Triple> schemaTriples = schemaTerms.get(term); if ((schemaTriples != null) && !schemaTriples.isEmpty()) { productiveTerms.add(term); for (Triple schemaTriple : schemaTriples) { Rule rule = GenericRule.getRule(schemaTriple); Triple inferredTriple = rule.head(schemaTriple, instanceTriple); if (inferredTriple != null) { writer.println(inferredTriple.toCsv()); inferredTriples++; } } } // this is just to avoid any memory issues //if(inferredAlready.size() > MAX_CACHE) { // inferredAlready.clear(); // log.info("Cleared cache of inferred terms"); //} //} else { //this.duplicates++; //} } } catch (Exception e) { log.error("Failed to parse selected terms", e); throw new IOException(e); //failedTriples++; } } } log.info("Total Rows: " + queryResult.getStats().getTotalRows() + ", Total Processed Bytes: " + queryResult.getStats().getTotalProcessedGBytes() + " GB" + ", Inferred: " + inferredTriples + ", compressed = " + compressed); log.info("********************** Completed Inference Round **********************"); return inferredTriples; }