Example usage for org.apache.commons.csv CSVFormat TDF

List of usage examples for org.apache.commons.csv CSVFormat TDF

Introduction

In this page you can find the example usage for org.apache.commons.csv CSVFormat TDF.

Prototype

CSVFormat TDF

To view the source code for org.apache.commons.csv CSVFormat TDF.

Click Source Link

Document

Tab-delimited format.

Usage

From source file:ca.nrc.cadc.tap.db.AsciiTableData.java

/**
 * Constructor.//from  w w w.j a v  a  2  s  .  c  om
 * 
 * @param in The data stream
 * @param contentType The content type of the data
 * @throws IOException If a data handling error occurs
 */
public AsciiTableData(InputStream in, String contentType) throws IOException {
    char delimiter = ',';
    if (contentType.equals(TableContentHandler.CONTENT_TYPE_TSV)) {
        delimiter = '\t';
    }
    InputStreamReader ir = new InputStreamReader(in);

    if (TableContentHandler.CONTENT_TYPE_TSV.equals(contentType)) {
        this.reader = new CSVParser(ir, CSVFormat.TDF.withFirstRecordAsHeader());
    } else if (TableContentHandler.CONTENT_TYPE_CSV.equals(contentType)) {
        this.reader = new CSVParser(ir, CSVFormat.DEFAULT.withFirstRecordAsHeader());
    } else {
        throw new UnsupportedOperationException("contentType: " + contentType);
    }

    this.rowIterator = reader.iterator();
    Map<String, Integer> header = reader.getHeaderMap();
    columnNames = new ArrayList<String>(header.size());
    for (String s : header.keySet()) {
        columnNames.add(s.trim());
        log.debug("found column: " + s);
    }
    if (columnNames.isEmpty()) {
        throw new IllegalArgumentException("No data columns.");
    }
}

From source file:co.cask.hydrator.plugin.CSVParser.java

@Override
public void initialize(TransformContext context) throws Exception {
    super.initialize(context);

    String csvFormatString = config.format.toLowerCase();
    switch (csvFormatString) {
    case "default":
        csvFormat = CSVFormat.DEFAULT;//from w w  w . j  a va 2 s  .co  m
        break;

    case "excel":
        csvFormat = CSVFormat.EXCEL;
        break;

    case "mysql":
        csvFormat = CSVFormat.MYSQL;
        break;

    case "rfc4180":
        csvFormat = CSVFormat.RFC4180;
        break;

    case "tdf":
        csvFormat = CSVFormat.TDF;
        break;

    case "pdl":
        csvFormat = PDL;
        break;

    default:
        throw new IllegalArgumentException(
                "Format {} specified is not one of the allowed format. Allowed formats are"
                        + "DEFAULT, EXCEL, MYSQL, RFC4180, PDL and TDF");
    }

    try {
        outSchema = Schema.parseJson(config.schema);
        fields = outSchema.getFields();
    } catch (IOException e) {
        throw new IllegalArgumentException("Format of schema specified is invalid. Please check the format.");
    }
}

From source file:com.ibm.watson.developer_cloud.professor_languo.pipeline.QuestionSetManagerTest.java

private void set_of_duplicate_question_ids_is_built() throws PipelineException {
    // Compile a list of all duplicate thread QIDs from the TSV file
    this.duplicateQuestionIDs = new HashSet<>();
    try (CSVParser parser = CSVFormat.TDF.withHeader().parse(new FileReader(dupThreadTsvFile))) {
        for (CSVRecord record : parser.getRecords()) {
            duplicateQuestionIDs.add(record.get(CorpusBuilder.TSV_COL_HEADER_THREAD_ID));
        }/*from w  w  w .  ja v a 2  s  .c  om*/
    } catch (IOException e) {
        throw new PipelineException(e);
    }
}

From source file:com.ggvaidya.scinames.ui.DatasetImporterController.java

private Dataset loadDataset() throws IOException {
    String format = fileFormatComboBox.getSelectionModel().getSelectedItem();
    CSVFormat csvFormat = null;//  w w  w .  j  a v  a  2s  .  c o  m
    if (format == null) {
        csvFormat = CSVFormat.DEFAULT;
    } else {
        switch (format) {
        case "List of names":
            return Checklist.fromListInFile(currentFile);
        case "Default CSV":
            csvFormat = CSVFormat.DEFAULT;
            break;
        case "Microsoft Excel CSV":
            csvFormat = CSVFormat.EXCEL;
            break;
        case "RFC 4180 CSV":
            csvFormat = CSVFormat.RFC4180;
            break;
        case "Oracle MySQL CSV":
            csvFormat = CSVFormat.MYSQL;
            break;
        case "Tab-delimited file":
            csvFormat = CSVFormat.TDF;
            break;
        case "TaxDiff file":
            return ChecklistDiff.fromTaxDiffFile(currentFile);
        case "Excel file":
            return new ExcelImporter(currentFile).asDataset(0);
        }
    }

    if (csvFormat == null) {
        LOGGER.info("Could not determine CSV format from format '" + format + "', using CSV default.");
        csvFormat = CSVFormat.DEFAULT;
    }

    return Dataset.fromCSV(csvFormat, currentFile);
}

From source file:com.ggvaidya.scinames.complexquery.ComplexQueryViewController.java

@FXML
private void copyToClipboard(ActionEvent evt) {
    try {//from  w  ww.j  av a 2s . co m
        StringWriter writer = new StringWriter();
        List<List<String>> dataAsTable = getDataAsTable();

        fillCSVFormat(CSVFormat.TDF, writer, getDataAsTable());

        Clipboard clipboard = Clipboard.getSystemClipboard();
        HashMap<DataFormat, Object> content = new HashMap<>();
        content.put(DataFormat.PLAIN_TEXT, writer.getBuffer().toString());
        clipboard.setContent(content);

        Alert window = new Alert(Alert.AlertType.CONFIRMATION,
                (dataAsTable.get(0).size() - 1) + " rows written to clipboard.");
        window.showAndWait();
    } catch (IOException e) {
        Alert window = new Alert(Alert.AlertType.ERROR, "Could not save CSV to the clipboard: " + e);
        window.showAndWait();
    }
}

From source file:edu.caltech.ipac.firefly.server.util.ipactable.DataGroupReader.java

public static Format guessFormat(File inf) throws IOException {

    String fileExt = FileUtil.getExtension(inf);
    if (fileExt != null) {
        if (fileExt.equalsIgnoreCase("tbl")) {
            return Format.IPACTABLE;
        } else if (fileExt.equalsIgnoreCase("csv")) {
            return Format.CSV;
        } else if (fileExt.equalsIgnoreCase("tsv")) {
            return Format.TSV;
        } else if (fileExt.equalsIgnoreCase("fits")) {
            return Format.FITS;
        } else if (fileExt.equalsIgnoreCase("json")) {
            return Format.JSON;
        }//w w w . j  a v  a  2s.  c o m
    }

    int readAhead = 10;

    int row = 0;
    BufferedReader reader = new BufferedReader(new FileReader(inf), IpacTableUtil.FILE_IO_BUFFER_SIZE);
    try {
        String line = reader.readLine();
        if (line.startsWith("{")) {
            return Format.JSON;
        }
        int[][] counts = new int[readAhead][2];
        int csvIdx = 0, tsvIdx = 1;
        while (line != null && row < readAhead) {
            if (line.startsWith("|") || line.startsWith("\\")) {
                return Format.IPACTABLE;
            } else if (line.startsWith("COORD_SYSTEM: ") || line.startsWith("EQUINOX: ")
                    || line.startsWith("NAME-RESOLVER: ")) {
                //NOTE: a fixed targets file contains the following lines at the beginning:
                //COORD_SYSTEM: xxx
                //EQUINOX: xxx
                //NAME-RESOLVER: xxx
                return Format.FIXEDTARGETS;
            }

            counts[row][csvIdx] = CSVFormat.DEFAULT.parse(new StringReader(line)).iterator().next().size();
            counts[row][tsvIdx] = CSVFormat.TDF.parse(new StringReader(line)).iterator().next().size();
            row++;
            line = reader.readLine();
        }
        // check csv
        int c = counts[0][csvIdx];
        boolean cMatch = true;
        for (int i = 1; i < row; i++) {
            cMatch = cMatch && counts[i][csvIdx] == c;
        }
        // check tsv
        int t = counts[0][tsvIdx];
        boolean tMatch = true;
        for (int i = 1; i < row; i++) {
            tMatch = tMatch && counts[i][tsvIdx] == t;
        }

        if (cMatch && tMatch) {
            if (t > c) {
                return Format.TSV;
            } else {
                return Format.CSV;
            }
        } else {
            if (cMatch) {
                return Format.CSV;
            } else if (tMatch) {
                return Format.TSV;
            } else {
                return Format.UNKNOWN;
            }
        }
    } finally {
        try {
            reader.close();
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

}

From source file:com.hurence.logisland.service.cache.CSVKeyValueCacheService.java

@Override
// @OnEnabled//  w  ww .java2s .c om
public void init(ControllerServiceInitializationContext context) throws InitializationException {
    super.init(context);
    try {

        if (context.getPropertyValue(DATABASE_FILE_URI).isSet()) {
            dbUri = context.getPropertyValue(DATABASE_FILE_URI).asString();
        }

        if (context.getPropertyValue(DATABASE_FILE_PATH).isSet()) {
            dbPath = context.getPropertyValue(DATABASE_FILE_PATH).asString();
        }

        if ((dbUri == null) && (dbPath == null)) {
            throw new Exception(
                    "You must declare " + DATABASE_FILE_URI.getName() + " or " + DATABASE_FILE_PATH.getName());
        }

        InputStream is = null;
        if (dbUri != null) {
            logger.info("opening csv database from hdfs : " + dbUri);
            is = initFromUri(dbUri);
        }

        if (dbPath != null) {
            logger.info("opening csv database from local fs : " + dbPath);
            is = initFromPath(context, dbPath);
        }

        if (is == null) {
            throw new InitializationException("Something went wrong while initializing csv db from "
                    + DATABASE_FILE_URI.getName() + " or " + DATABASE_FILE_PATH.getName());
        }

        // final Reader reader = new InputStreamReader(is);
        CSVFormat format = CSVFormat.DEFAULT;
        if (context.getPropertyValue(CSV_FORMAT).asString().equals(CSV_EXCEL.getValue())) {
            format = CSVFormat.EXCEL;
        } else if (context.getPropertyValue(CSV_FORMAT).asString().equals(CSV_EXCEL_FR.getValue())) {
            format = CSVFormat.EXCEL.withDelimiter(';');
        } else if (context.getPropertyValue(CSV_FORMAT).asString().equals(CSV_MYSQL.getValue())) {
            format = CSVFormat.MYSQL;
        } else if (context.getPropertyValue(CSV_FORMAT).asString().equals(CSV_RFC4180.getValue())) {
            format = CSVFormat.RFC4180;
        } else if (context.getPropertyValue(CSV_FORMAT).asString().equals(CSV_TDF.getValue())) {
            format = CSVFormat.TDF;
        }

        if (context.getPropertyValue(CSV_HEADER).isSet()) {
            String[] columnNames = context.getPropertyValue(CSV_HEADER).asString().split(",");
            for (String name : columnNames) {
                headers.get().put(name, "string");
            }
            format = format.withHeader(columnNames);
        } else if (context.getPropertyValue(FIRST_LINE_HEADER).isSet()) {
            format = format.withFirstRecordAsHeader();
        } else {
            throw new InitializationException("unable to get headers from somewhere");
        }

        Charset charset = Charset.forName("UTF-8");
        if (context.getPropertyValue(ENCODING_CHARSET).isSet()) {
            String encoding = context.getPropertyValue(ENCODING_CHARSET).asString();
            charset = Charset.forName(encoding);
        }

        rowKey = context.getPropertyValue(ROW_KEY).asString();
        CSVParser parser = CSVParser.parse(is, charset, format); //new CSVParser(reader, format);

        /*
        *    CSVParser parser = null;
                
        if (context.getPropertyValue(ENCODING_CHARSET).isSet()) {
        String encoding = context.getPropertyValue(ENCODING_CHARSET).asString();
        parser = CSVParser.parse(reader, Charset.forName(encoding), format);
        } else {
        parser = CSVParser.parse(reader, format);
        }
        */
        long count = 0;
        try {
            final Set<String> columnNames = parser.getHeaderMap().keySet();
            for (final CSVRecord record : parser) {

                Record logislandRecord = new StandardRecord();
                for (final String column : columnNames) {
                    logislandRecord.setStringField(column, record.get(column));
                }

                set(logislandRecord.getField(rowKey).asString(), logislandRecord);
                count++;
            }
        } finally {
            logger.info("successfully loaded " + count + " records from CSV file");

            parser.close();
            is.close();
        }

    } catch (Exception e) {
        getLogger().error("Could not load database file: {}", new Object[] { e.getMessage() });
        throw new InitializationException(e);
    }
}

From source file:com.ggvaidya.scinames.complexquery.ComplexQueryViewController.java

@FXML
private void exportToCSV(ActionEvent evt) {
    FileChooser chooser = new FileChooser();
    chooser.getExtensionFilters().setAll(new FileChooser.ExtensionFilter("CSV file", "*.csv"),
            new FileChooser.ExtensionFilter("Tab-delimited file", "*.txt"));
    File file = chooser.showSaveDialog(scene.getWindow());
    if (file != null) {
        CSVFormat format = CSVFormat.RFC4180;

        String outputFormat = chooser.getSelectedExtensionFilter().getDescription();
        if (outputFormat.equalsIgnoreCase("Tab-delimited file"))
            format = CSVFormat.TDF;

        try {// w  ww.j  a va 2  s .c om
            List<List<String>> dataAsTable = getDataAsTable();
            fillCSVFormat(format, new FileWriter(file), dataAsTable);

            Alert window = new Alert(Alert.AlertType.CONFIRMATION,
                    "CSV file '" + file + "' saved with " + (dataAsTable.get(0).size() - 1) + " rows.");
            window.showAndWait();

        } catch (IOException e) {
            Alert window = new Alert(Alert.AlertType.ERROR, "Could not save CSV to '" + file + "': " + e);
            window.showAndWait();
        }
    }
}

From source file:com.ibm.watson.developer_cloud.professor_languo.pipeline.QuestionSetManager.java

/**
 * This function is responsible for parsing a duplicate Stack Exchange thread TSV file produced by
 * {@link StackExchangeThreadSerializer}, and partitioning each such thread into the training set,
 * test set, or validation set. In addition, the corresponding row of the TSV file will be written
 * out to a training-, test-, or validation-set-specific TSV file in the same directory as the
 * input TSV file.//from www. j ava 2 s.  c o  m
 * 
 * @param dupQuestionFile - A TSV file containing duplicate {@link StackExchangeThread} records
 * @param trainTestValidateCumulativeProbs - A CDF of the desired proportion of training, test,
 *        and validation set records
 * @throws PipelineException
 */
private void parseTsvAndPartitionRecords(File dupQuestionFile, double[] trainTestValidateCumulativeProbs)
        throws PipelineException {
    // Open the TSV file for parsing, and CSVPrinters for outputting train,
    // test, and validation set
    // TSV files
    String baseName = FilenameUtils.removeExtension(dupQuestionFile.getAbsolutePath());
    String extension = FilenameUtils.getExtension(dupQuestionFile.getAbsolutePath());
    try (FileReader reader = new FileReader(dupQuestionFile);
            CSVPrinter trainSetPrinter = new CSVPrinter(
                    new FileWriter(baseName + StackExchangeConstants.DUP_THREAD_TSV_TRAIN_FILE_SUFFIX
                            + FilenameUtils.EXTENSION_SEPARATOR + extension),
                    CSVFormat.TDF.withHeader(CorpusBuilder.getTsvColumnHeaders()));
            CSVPrinter testSetPrinter = new CSVPrinter(
                    new FileWriter(baseName + StackExchangeConstants.DUP_THREAD_TSV_TEST_FILE_SUFFIX
                            + FilenameUtils.EXTENSION_SEPARATOR + extension),
                    CSVFormat.TDF.withHeader(CorpusBuilder.getTsvColumnHeaders()));
            CSVPrinter validationSetPrinter = new CSVPrinter(
                    new FileWriter(baseName + StackExchangeConstants.DUP_THREAD_TSV_VALIDATE_FILE_SUFFIX
                            + FilenameUtils.EXTENSION_SEPARATOR + extension),
                    CSVFormat.TDF.withHeader(CorpusBuilder.getTsvColumnHeaders()))) {

        // Parse the duplicate thread TSV file
        CSVParser parser = CSVFormat.TDF.withHeader().parse(reader);

        // Iterate over each CSV record, and place into a desired partition
        // (train, test, or
        // validation)
        Iterator<CSVRecord> recordIterator = parser.iterator();
        while (recordIterator.hasNext()) {
            CSVRecord record = recordIterator.next();

            // Get the StackExchangeThread associated with this record, and
            // create a question from it
            StackExchangeThread duplicateThread = StackExchangeThreadSerializer.deserializeThreadFromBinFile(
                    record.get(CorpusBuilder.TSV_COL_HEADER_SERIALIZED_FILE_PATH));
            StackExchangeQuestion duplicateQuestion = new StackExchangeQuestion(duplicateThread);
            String parentId = record.get(CorpusBuilder.TSV_COL_HEADER_PARENT_ID);

            // Now drop this question into a partition, and write it to a
            // corresponding TSV file
            double p = rng.nextDouble(); // Random number determines
            // partition for this record
            if (p <= trainTestValidateCumulativeProbs[0]) {
                // This record goes in the training set
                if (!addQuestionToSet(duplicateQuestion, parentId, this.trainingSet)) {
                    throw new PipelineException(
                            MessageFormat.format(Messages.getString("RetrieveAndRank.TRAINING_SET_FAILED_Q"), //$NON-NLS-1$
                                    duplicateThread.getId()));
                }
                trainSetPrinter.printRecord((Object[]) convertRecordToArray(record));
            } else if (p <= trainTestValidateCumulativeProbs[1]) {
                // This record goes in the test set
                if (!addQuestionToSet(duplicateQuestion, parentId, this.testSet)) {
                    throw new PipelineException(
                            MessageFormat.format(Messages.getString("RetrieveAndRank.TEST_SET_FAILED_Q"), //$NON-NLS-1$
                                    duplicateThread.getId()));
                }
                testSetPrinter.printRecord((Object[]) convertRecordToArray(record));
            } else {
                // This record goes in the validation set
                assert (p <= trainTestValidateCumulativeProbs[2]);
                if (!addQuestionToSet(duplicateQuestion, parentId, this.validationSet)) {
                    throw new PipelineException(
                            MessageFormat.format(Messages.getString("RetrieveAndRank.VALIDATION_SET_FAILED_Q"), //$NON-NLS-1$
                                    duplicateThread.getId()));
                }
                validationSetPrinter.printRecord((Object[]) convertRecordToArray(record));
            }
        }

        // Flush all the printers prior to closing
        trainSetPrinter.flush();
        testSetPrinter.flush();
        validationSetPrinter.flush();
    } catch (IOException | IngestionException e) {
        throw new PipelineException(e);
    }
}

From source file:com.ibm.watson.developer_cloud.professor_languo.pipeline.QuestionSetManagerTest.java

private void subset_tsv_files_are_properly_created() throws PipelineException {
    // Confirm that TSV files of the same format as the duplicate thread TSV
    // file are created
    // for each of the subsets that are generated by the QuestionSetManager
    for (File file : Arrays.asList(trainFile, testFile, validateFile)) {
        assertTrue("File " + file.getName() + " is missing", file.exists());

        QuestionAnswerSet set;//  w  w  w.ja  va  2  s .c o m
        if (file == trainFile)
            set = this.questionSetManager.getTrainingSet();
        else if (file == testFile)
            set = this.questionSetManager.getTestSet();
        else
            set = this.questionSetManager.getValidationSet();

        // Iterate through each line of the subset TSV file and verify that
        // the records
        // it contains are in fact in the corresponding QuestionAnswerSet
        try (CSVParser parser = CSVFormat.TDF.withHeader().parse(new FileReader(file))) {
            for (CSVRecord record : parser.getRecords()) {
                assertTrue("Subset TSV file has erroneous QID",
                        set.getQuestionIds().contains(record.get(CorpusBuilder.TSV_COL_HEADER_THREAD_ID)));
            }
        } catch (IOException e) {
            throw new PipelineException(e);
        }
    }
}