List of usage examples for org.apache.commons.csv CSVFormat TDF
CSVFormat TDF
To view the source code for org.apache.commons.csv CSVFormat TDF.
Click Source Link
From source file:ca.nrc.cadc.tap.db.AsciiTableData.java
/** * Constructor.//from w w w.j a v a 2 s . c om * * @param in The data stream * @param contentType The content type of the data * @throws IOException If a data handling error occurs */ public AsciiTableData(InputStream in, String contentType) throws IOException { char delimiter = ','; if (contentType.equals(TableContentHandler.CONTENT_TYPE_TSV)) { delimiter = '\t'; } InputStreamReader ir = new InputStreamReader(in); if (TableContentHandler.CONTENT_TYPE_TSV.equals(contentType)) { this.reader = new CSVParser(ir, CSVFormat.TDF.withFirstRecordAsHeader()); } else if (TableContentHandler.CONTENT_TYPE_CSV.equals(contentType)) { this.reader = new CSVParser(ir, CSVFormat.DEFAULT.withFirstRecordAsHeader()); } else { throw new UnsupportedOperationException("contentType: " + contentType); } this.rowIterator = reader.iterator(); Map<String, Integer> header = reader.getHeaderMap(); columnNames = new ArrayList<String>(header.size()); for (String s : header.keySet()) { columnNames.add(s.trim()); log.debug("found column: " + s); } if (columnNames.isEmpty()) { throw new IllegalArgumentException("No data columns."); } }
From source file:co.cask.hydrator.plugin.CSVParser.java
@Override public void initialize(TransformContext context) throws Exception { super.initialize(context); String csvFormatString = config.format.toLowerCase(); switch (csvFormatString) { case "default": csvFormat = CSVFormat.DEFAULT;//from w w w . j a va 2 s .co m break; case "excel": csvFormat = CSVFormat.EXCEL; break; case "mysql": csvFormat = CSVFormat.MYSQL; break; case "rfc4180": csvFormat = CSVFormat.RFC4180; break; case "tdf": csvFormat = CSVFormat.TDF; break; case "pdl": csvFormat = PDL; break; default: throw new IllegalArgumentException( "Format {} specified is not one of the allowed format. Allowed formats are" + "DEFAULT, EXCEL, MYSQL, RFC4180, PDL and TDF"); } try { outSchema = Schema.parseJson(config.schema); fields = outSchema.getFields(); } catch (IOException e) { throw new IllegalArgumentException("Format of schema specified is invalid. Please check the format."); } }
From source file:com.ibm.watson.developer_cloud.professor_languo.pipeline.QuestionSetManagerTest.java
private void set_of_duplicate_question_ids_is_built() throws PipelineException { // Compile a list of all duplicate thread QIDs from the TSV file this.duplicateQuestionIDs = new HashSet<>(); try (CSVParser parser = CSVFormat.TDF.withHeader().parse(new FileReader(dupThreadTsvFile))) { for (CSVRecord record : parser.getRecords()) { duplicateQuestionIDs.add(record.get(CorpusBuilder.TSV_COL_HEADER_THREAD_ID)); }/*from w w w . ja v a 2 s .c om*/ } catch (IOException e) { throw new PipelineException(e); } }
From source file:com.ggvaidya.scinames.ui.DatasetImporterController.java
private Dataset loadDataset() throws IOException { String format = fileFormatComboBox.getSelectionModel().getSelectedItem(); CSVFormat csvFormat = null;// w w w . j a v a 2s . c o m if (format == null) { csvFormat = CSVFormat.DEFAULT; } else { switch (format) { case "List of names": return Checklist.fromListInFile(currentFile); case "Default CSV": csvFormat = CSVFormat.DEFAULT; break; case "Microsoft Excel CSV": csvFormat = CSVFormat.EXCEL; break; case "RFC 4180 CSV": csvFormat = CSVFormat.RFC4180; break; case "Oracle MySQL CSV": csvFormat = CSVFormat.MYSQL; break; case "Tab-delimited file": csvFormat = CSVFormat.TDF; break; case "TaxDiff file": return ChecklistDiff.fromTaxDiffFile(currentFile); case "Excel file": return new ExcelImporter(currentFile).asDataset(0); } } if (csvFormat == null) { LOGGER.info("Could not determine CSV format from format '" + format + "', using CSV default."); csvFormat = CSVFormat.DEFAULT; } return Dataset.fromCSV(csvFormat, currentFile); }
From source file:com.ggvaidya.scinames.complexquery.ComplexQueryViewController.java
@FXML private void copyToClipboard(ActionEvent evt) { try {//from w ww.j av a 2s . co m StringWriter writer = new StringWriter(); List<List<String>> dataAsTable = getDataAsTable(); fillCSVFormat(CSVFormat.TDF, writer, getDataAsTable()); Clipboard clipboard = Clipboard.getSystemClipboard(); HashMap<DataFormat, Object> content = new HashMap<>(); content.put(DataFormat.PLAIN_TEXT, writer.getBuffer().toString()); clipboard.setContent(content); Alert window = new Alert(Alert.AlertType.CONFIRMATION, (dataAsTable.get(0).size() - 1) + " rows written to clipboard."); window.showAndWait(); } catch (IOException e) { Alert window = new Alert(Alert.AlertType.ERROR, "Could not save CSV to the clipboard: " + e); window.showAndWait(); } }
From source file:edu.caltech.ipac.firefly.server.util.ipactable.DataGroupReader.java
public static Format guessFormat(File inf) throws IOException { String fileExt = FileUtil.getExtension(inf); if (fileExt != null) { if (fileExt.equalsIgnoreCase("tbl")) { return Format.IPACTABLE; } else if (fileExt.equalsIgnoreCase("csv")) { return Format.CSV; } else if (fileExt.equalsIgnoreCase("tsv")) { return Format.TSV; } else if (fileExt.equalsIgnoreCase("fits")) { return Format.FITS; } else if (fileExt.equalsIgnoreCase("json")) { return Format.JSON; }//w w w . j a v a 2s. c o m } int readAhead = 10; int row = 0; BufferedReader reader = new BufferedReader(new FileReader(inf), IpacTableUtil.FILE_IO_BUFFER_SIZE); try { String line = reader.readLine(); if (line.startsWith("{")) { return Format.JSON; } int[][] counts = new int[readAhead][2]; int csvIdx = 0, tsvIdx = 1; while (line != null && row < readAhead) { if (line.startsWith("|") || line.startsWith("\\")) { return Format.IPACTABLE; } else if (line.startsWith("COORD_SYSTEM: ") || line.startsWith("EQUINOX: ") || line.startsWith("NAME-RESOLVER: ")) { //NOTE: a fixed targets file contains the following lines at the beginning: //COORD_SYSTEM: xxx //EQUINOX: xxx //NAME-RESOLVER: xxx return Format.FIXEDTARGETS; } counts[row][csvIdx] = CSVFormat.DEFAULT.parse(new StringReader(line)).iterator().next().size(); counts[row][tsvIdx] = CSVFormat.TDF.parse(new StringReader(line)).iterator().next().size(); row++; line = reader.readLine(); } // check csv int c = counts[0][csvIdx]; boolean cMatch = true; for (int i = 1; i < row; i++) { cMatch = cMatch && counts[i][csvIdx] == c; } // check tsv int t = counts[0][tsvIdx]; boolean tMatch = true; for (int i = 1; i < row; i++) { tMatch = tMatch && counts[i][tsvIdx] == t; } if (cMatch && tMatch) { if (t > c) { return Format.TSV; } else { return Format.CSV; } } else { if (cMatch) { return Format.CSV; } else if (tMatch) { return Format.TSV; } else { return Format.UNKNOWN; } } } finally { try { reader.close(); } catch (Exception e) { e.printStackTrace(); } } }
From source file:com.hurence.logisland.service.cache.CSVKeyValueCacheService.java
@Override // @OnEnabled// w ww .java2s .c om public void init(ControllerServiceInitializationContext context) throws InitializationException { super.init(context); try { if (context.getPropertyValue(DATABASE_FILE_URI).isSet()) { dbUri = context.getPropertyValue(DATABASE_FILE_URI).asString(); } if (context.getPropertyValue(DATABASE_FILE_PATH).isSet()) { dbPath = context.getPropertyValue(DATABASE_FILE_PATH).asString(); } if ((dbUri == null) && (dbPath == null)) { throw new Exception( "You must declare " + DATABASE_FILE_URI.getName() + " or " + DATABASE_FILE_PATH.getName()); } InputStream is = null; if (dbUri != null) { logger.info("opening csv database from hdfs : " + dbUri); is = initFromUri(dbUri); } if (dbPath != null) { logger.info("opening csv database from local fs : " + dbPath); is = initFromPath(context, dbPath); } if (is == null) { throw new InitializationException("Something went wrong while initializing csv db from " + DATABASE_FILE_URI.getName() + " or " + DATABASE_FILE_PATH.getName()); } // final Reader reader = new InputStreamReader(is); CSVFormat format = CSVFormat.DEFAULT; if (context.getPropertyValue(CSV_FORMAT).asString().equals(CSV_EXCEL.getValue())) { format = CSVFormat.EXCEL; } else if (context.getPropertyValue(CSV_FORMAT).asString().equals(CSV_EXCEL_FR.getValue())) { format = CSVFormat.EXCEL.withDelimiter(';'); } else if (context.getPropertyValue(CSV_FORMAT).asString().equals(CSV_MYSQL.getValue())) { format = CSVFormat.MYSQL; } else if (context.getPropertyValue(CSV_FORMAT).asString().equals(CSV_RFC4180.getValue())) { format = CSVFormat.RFC4180; } else if (context.getPropertyValue(CSV_FORMAT).asString().equals(CSV_TDF.getValue())) { format = CSVFormat.TDF; } if (context.getPropertyValue(CSV_HEADER).isSet()) { String[] columnNames = context.getPropertyValue(CSV_HEADER).asString().split(","); for (String name : columnNames) { headers.get().put(name, "string"); } format = format.withHeader(columnNames); } else if (context.getPropertyValue(FIRST_LINE_HEADER).isSet()) { format = format.withFirstRecordAsHeader(); } else { throw new InitializationException("unable to get headers from somewhere"); } Charset charset = Charset.forName("UTF-8"); if (context.getPropertyValue(ENCODING_CHARSET).isSet()) { String encoding = context.getPropertyValue(ENCODING_CHARSET).asString(); charset = Charset.forName(encoding); } rowKey = context.getPropertyValue(ROW_KEY).asString(); CSVParser parser = CSVParser.parse(is, charset, format); //new CSVParser(reader, format); /* * CSVParser parser = null; if (context.getPropertyValue(ENCODING_CHARSET).isSet()) { String encoding = context.getPropertyValue(ENCODING_CHARSET).asString(); parser = CSVParser.parse(reader, Charset.forName(encoding), format); } else { parser = CSVParser.parse(reader, format); } */ long count = 0; try { final Set<String> columnNames = parser.getHeaderMap().keySet(); for (final CSVRecord record : parser) { Record logislandRecord = new StandardRecord(); for (final String column : columnNames) { logislandRecord.setStringField(column, record.get(column)); } set(logislandRecord.getField(rowKey).asString(), logislandRecord); count++; } } finally { logger.info("successfully loaded " + count + " records from CSV file"); parser.close(); is.close(); } } catch (Exception e) { getLogger().error("Could not load database file: {}", new Object[] { e.getMessage() }); throw new InitializationException(e); } }
From source file:com.ggvaidya.scinames.complexquery.ComplexQueryViewController.java
@FXML private void exportToCSV(ActionEvent evt) { FileChooser chooser = new FileChooser(); chooser.getExtensionFilters().setAll(new FileChooser.ExtensionFilter("CSV file", "*.csv"), new FileChooser.ExtensionFilter("Tab-delimited file", "*.txt")); File file = chooser.showSaveDialog(scene.getWindow()); if (file != null) { CSVFormat format = CSVFormat.RFC4180; String outputFormat = chooser.getSelectedExtensionFilter().getDescription(); if (outputFormat.equalsIgnoreCase("Tab-delimited file")) format = CSVFormat.TDF; try {// w ww.j a va 2 s .c om List<List<String>> dataAsTable = getDataAsTable(); fillCSVFormat(format, new FileWriter(file), dataAsTable); Alert window = new Alert(Alert.AlertType.CONFIRMATION, "CSV file '" + file + "' saved with " + (dataAsTable.get(0).size() - 1) + " rows."); window.showAndWait(); } catch (IOException e) { Alert window = new Alert(Alert.AlertType.ERROR, "Could not save CSV to '" + file + "': " + e); window.showAndWait(); } } }
From source file:com.ibm.watson.developer_cloud.professor_languo.pipeline.QuestionSetManager.java
/** * This function is responsible for parsing a duplicate Stack Exchange thread TSV file produced by * {@link StackExchangeThreadSerializer}, and partitioning each such thread into the training set, * test set, or validation set. In addition, the corresponding row of the TSV file will be written * out to a training-, test-, or validation-set-specific TSV file in the same directory as the * input TSV file.//from www. j ava 2 s. c o m * * @param dupQuestionFile - A TSV file containing duplicate {@link StackExchangeThread} records * @param trainTestValidateCumulativeProbs - A CDF of the desired proportion of training, test, * and validation set records * @throws PipelineException */ private void parseTsvAndPartitionRecords(File dupQuestionFile, double[] trainTestValidateCumulativeProbs) throws PipelineException { // Open the TSV file for parsing, and CSVPrinters for outputting train, // test, and validation set // TSV files String baseName = FilenameUtils.removeExtension(dupQuestionFile.getAbsolutePath()); String extension = FilenameUtils.getExtension(dupQuestionFile.getAbsolutePath()); try (FileReader reader = new FileReader(dupQuestionFile); CSVPrinter trainSetPrinter = new CSVPrinter( new FileWriter(baseName + StackExchangeConstants.DUP_THREAD_TSV_TRAIN_FILE_SUFFIX + FilenameUtils.EXTENSION_SEPARATOR + extension), CSVFormat.TDF.withHeader(CorpusBuilder.getTsvColumnHeaders())); CSVPrinter testSetPrinter = new CSVPrinter( new FileWriter(baseName + StackExchangeConstants.DUP_THREAD_TSV_TEST_FILE_SUFFIX + FilenameUtils.EXTENSION_SEPARATOR + extension), CSVFormat.TDF.withHeader(CorpusBuilder.getTsvColumnHeaders())); CSVPrinter validationSetPrinter = new CSVPrinter( new FileWriter(baseName + StackExchangeConstants.DUP_THREAD_TSV_VALIDATE_FILE_SUFFIX + FilenameUtils.EXTENSION_SEPARATOR + extension), CSVFormat.TDF.withHeader(CorpusBuilder.getTsvColumnHeaders()))) { // Parse the duplicate thread TSV file CSVParser parser = CSVFormat.TDF.withHeader().parse(reader); // Iterate over each CSV record, and place into a desired partition // (train, test, or // validation) Iterator<CSVRecord> recordIterator = parser.iterator(); while (recordIterator.hasNext()) { CSVRecord record = recordIterator.next(); // Get the StackExchangeThread associated with this record, and // create a question from it StackExchangeThread duplicateThread = StackExchangeThreadSerializer.deserializeThreadFromBinFile( record.get(CorpusBuilder.TSV_COL_HEADER_SERIALIZED_FILE_PATH)); StackExchangeQuestion duplicateQuestion = new StackExchangeQuestion(duplicateThread); String parentId = record.get(CorpusBuilder.TSV_COL_HEADER_PARENT_ID); // Now drop this question into a partition, and write it to a // corresponding TSV file double p = rng.nextDouble(); // Random number determines // partition for this record if (p <= trainTestValidateCumulativeProbs[0]) { // This record goes in the training set if (!addQuestionToSet(duplicateQuestion, parentId, this.trainingSet)) { throw new PipelineException( MessageFormat.format(Messages.getString("RetrieveAndRank.TRAINING_SET_FAILED_Q"), //$NON-NLS-1$ duplicateThread.getId())); } trainSetPrinter.printRecord((Object[]) convertRecordToArray(record)); } else if (p <= trainTestValidateCumulativeProbs[1]) { // This record goes in the test set if (!addQuestionToSet(duplicateQuestion, parentId, this.testSet)) { throw new PipelineException( MessageFormat.format(Messages.getString("RetrieveAndRank.TEST_SET_FAILED_Q"), //$NON-NLS-1$ duplicateThread.getId())); } testSetPrinter.printRecord((Object[]) convertRecordToArray(record)); } else { // This record goes in the validation set assert (p <= trainTestValidateCumulativeProbs[2]); if (!addQuestionToSet(duplicateQuestion, parentId, this.validationSet)) { throw new PipelineException( MessageFormat.format(Messages.getString("RetrieveAndRank.VALIDATION_SET_FAILED_Q"), //$NON-NLS-1$ duplicateThread.getId())); } validationSetPrinter.printRecord((Object[]) convertRecordToArray(record)); } } // Flush all the printers prior to closing trainSetPrinter.flush(); testSetPrinter.flush(); validationSetPrinter.flush(); } catch (IOException | IngestionException e) { throw new PipelineException(e); } }
From source file:com.ibm.watson.developer_cloud.professor_languo.pipeline.QuestionSetManagerTest.java
private void subset_tsv_files_are_properly_created() throws PipelineException { // Confirm that TSV files of the same format as the duplicate thread TSV // file are created // for each of the subsets that are generated by the QuestionSetManager for (File file : Arrays.asList(trainFile, testFile, validateFile)) { assertTrue("File " + file.getName() + " is missing", file.exists()); QuestionAnswerSet set;// w w w.ja va 2 s .c o m if (file == trainFile) set = this.questionSetManager.getTrainingSet(); else if (file == testFile) set = this.questionSetManager.getTestSet(); else set = this.questionSetManager.getValidationSet(); // Iterate through each line of the subset TSV file and verify that // the records // it contains are in fact in the corresponding QuestionAnswerSet try (CSVParser parser = CSVFormat.TDF.withHeader().parse(new FileReader(file))) { for (CSVRecord record : parser.getRecords()) { assertTrue("Subset TSV file has erroneous QID", set.getQuestionIds().contains(record.get(CorpusBuilder.TSV_COL_HEADER_THREAD_ID))); } } catch (IOException e) { throw new PipelineException(e); } } }