Example usage for org.apache.commons.csv CSVStrategy CSVStrategy

List of usage examples for org.apache.commons.csv CSVStrategy CSVStrategy

Introduction

In this page you can find the example usage for org.apache.commons.csv CSVStrategy CSVStrategy.

Prototype

public CSVStrategy(char delimiter, char encapsulator, char commentStart, char escape,
        boolean ignoreLeadingWhitespace, boolean ignoreTrailingWhitespace, boolean interpretUnicodeEscapes,
        boolean ignoreEmptyLines) 

Source Link

Document

Customized CSV strategy setter.

Usage

From source file:org.apache.ctakes.jdl.data.loader.CsvLoader.java

/**
 * @param loader//  w  w w.j  a  v a 2s.  co  m
 *            the loader
 * @param file
 *            the file
 * @throws FileNotFoundException
 *             exception
 */
@SuppressWarnings({ "rawtypes", "unchecked" })
public CsvLoader(final CsvLoadType loader, final File file) throws FileNotFoundException {
    InputStream inputStrem = new FileInputStream(file);
    Reader reader = new InputStreamReader(inputStrem);
    char delimiter = CharUtils.toChar(loader.getDelimiter());
    char encapsulator = (loader.getEncapsulator() == null || loader.getEncapsulator().length() == 0)
            ? CSVStrategy.ENCAPSULATOR_DISABLED
            : CharUtils.toChar(loader.getEncapsulator());
    log.info(String.format("delimiter %d encapsulator %d", (int) delimiter, (int) encapsulator));
    CSVStrategy strategy = new CSVStrategy(delimiter, encapsulator, CSVStrategy.COMMENTS_DISABLED,
            CSVStrategy.ESCAPE_DISABLED, true, true, false, true);
    parser = new CSVParser(reader, strategy);
    this.loader = loader;
    formatMap = new HashMap<String, Format>();
    try {
        for (Column col : loader.getColumn()) {
            if (col.getFormat() != null && col.getFormat().length() > 0) {
                Class cf = Class.forName(col.getFormat());
                Constructor ccf = cf.getConstructor(String.class);
                this.formatMap.put(col.getName(), (Format) ccf.newInstance(col.getPattern()));
            }
        }
    } catch (Exception e) {
        throw new RuntimeException("oops", e);
    }

}

From source file:org.apache.solr.handler.CSVRequestHandler.java

CSVLoader(SolrQueryRequest req, UpdateRequestProcessor processor) {
    this.processor = processor;
    this.params = req.getParams();
    schema = req.getSchema();// ww  w  .j  a  v  a2 s  .com

    templateAdd = new AddUpdateCommand();
    templateAdd.allowDups = false;
    templateAdd.overwriteCommitted = true;
    templateAdd.overwritePending = true;

    if (params.getBool(OVERWRITE, true)) {
        templateAdd.allowDups = false;
        templateAdd.overwriteCommitted = true;
        templateAdd.overwritePending = true;
    } else {
        templateAdd.allowDups = true;
        templateAdd.overwriteCommitted = false;
        templateAdd.overwritePending = false;
    }

    templateAdd.commitWithin = params.getInt(UpdateParams.COMMIT_WITHIN, -1);

    strategy = new CSVStrategy(',', '"', CSVStrategy.COMMENTS_DISABLED, CSVStrategy.ESCAPE_DISABLED, false,
            false, false, true);
    String sep = params.get(SEPARATOR);
    if (sep != null) {
        if (sep.length() != 1)
            throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Invalid separator:'" + sep + "'");
        strategy.setDelimiter(sep.charAt(0));
    }

    String encapsulator = params.get(ENCAPSULATOR);
    if (encapsulator != null) {
        if (encapsulator.length() != 1)
            throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
                    "Invalid encapsulator:'" + encapsulator + "'");
    }

    String escape = params.get(ESCAPE);
    if (escape != null) {
        if (escape.length() != 1)
            throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Invalid escape:'" + escape + "'");
    }

    // if only encapsulator or escape is set, disable the other escaping mechanism
    if (encapsulator == null && escape != null) {
        strategy.setEncapsulator(CSVStrategy.ENCAPSULATOR_DISABLED);
        strategy.setEscape(escape.charAt(0));
    } else {
        if (encapsulator != null) {
            strategy.setEncapsulator(encapsulator.charAt(0));
        }
        if (escape != null) {
            char ch = escape.charAt(0);
            strategy.setEscape(ch);
            if (ch == '\\') {
                // If the escape is the standard backslash, then also enable
                // unicode escapes (it's harmless since 'u' would not otherwise
                // be escaped.                    
                strategy.setUnicodeEscapeInterpretation(true);
            }
        }
    }

    String fn = params.get(FIELDNAMES);
    fieldnames = fn != null ? commaSplit.split(fn, -1) : null;

    Boolean hasHeader = params.getBool(HEADER);

    skipLines = params.getInt(SKIPLINES, 0);

    if (fieldnames == null) {
        if (null == hasHeader) {
            // assume the file has the headers if they aren't supplied in the args
            hasHeader = true;
        } else if (!hasHeader) {
            throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
                    "CSVLoader: must specify fieldnames=<fields>* or header=true");
        }
    } else {
        // if the fieldnames were supplied and the file has a header, we need to
        // skip over that header.
        if (hasHeader != null && hasHeader)
            skipLines++;

        prepareFields();
    }
}

From source file:org.apache.solr.handler.CSVRequestHandler.java

/** create the FieldAdders that control how each field  is indexed */
void prepareFields() {
    // Possible future optimization: for really rapid incremental indexing
    // from a POST, one could cache all of this setup info based on the params.
    // The link from FieldAdder to this would need to be severed for that to happen.

    fields = new SchemaField[fieldnames.length];
    adders = new CSVLoader.FieldAdder[fieldnames.length];
    String skipStr = params.get(SKIP);
    List<String> skipFields = skipStr == null ? null : StrUtils.splitSmart(skipStr, ',');

    CSVLoader.FieldAdder adder = new CSVLoader.FieldAdder();
    CSVLoader.FieldAdder adderKeepEmpty = new CSVLoader.FieldAdderEmpty();

    for (int i = 0; i < fields.length; i++) {
        String fname = fieldnames[i];
        // to skip a field, leave the entries in fields and addrs null
        if (fname.length() == 0 || (skipFields != null && skipFields.contains(fname)))
            continue;

        fields[i] = schema.getField(fname);
        boolean keepEmpty = params.getFieldBool(fname, EMPTY, false);
        adders[i] = keepEmpty ? adderKeepEmpty : adder;

        // Order that operations are applied: split -> trim -> map -> add
        // so create in reverse order.
        // Creation of FieldAdders could be optimized and shared among fields

        String[] fmap = params.getFieldParams(fname, MAP);
        if (fmap != null) {
            for (String mapRule : fmap) {
                String[] mapArgs = colonSplit.split(mapRule, -1);
                if (mapArgs.length != 2)
                    throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
                            "Map rules must be of the form 'from:to' ,got '" + mapRule + "'");
                adders[i] = new CSVLoader.FieldMapperSingle(mapArgs[0], mapArgs[1], adders[i]);
            }/*from  w w  w.  j  av a2  s. co  m*/
        }

        if (params.getFieldBool(fname, TRIM, false)) {
            adders[i] = new CSVLoader.FieldTrimmer(adders[i]);
        }

        if (params.getFieldBool(fname, SPLIT, false)) {
            String sepStr = params.getFieldParam(fname, SEPARATOR);
            char fsep = sepStr == null || sepStr.length() == 0 ? ',' : sepStr.charAt(0);
            String encStr = params.getFieldParam(fname, ENCAPSULATOR);
            char fenc = encStr == null || encStr.length() == 0 ? (char) -2 : encStr.charAt(0);
            String escStr = params.getFieldParam(fname, ESCAPE);
            char fesc = escStr == null || escStr.length() == 0 ? CSVStrategy.ESCAPE_DISABLED : escStr.charAt(0);

            CSVStrategy fstrat = new CSVStrategy(fsep, fenc, CSVStrategy.COMMENTS_DISABLED, fesc, false, false,
                    false, false);
            adders[i] = new CSVLoader.FieldSplitter(fstrat, adders[i]);
        }
    }
}

From source file:org.apache.solr.response.CSVResponseWriter.java

public void writeResponse() throws IOException {
    SolrParams params = req.getParams();

    strategy = new CSVStrategy(',', '"', CSVStrategy.COMMENTS_DISABLED, CSVStrategy.ESCAPE_DISABLED, false,
            false, false, true);//ww  w  . ja va  2s  .c o m
    CSVStrategy strat = strategy;

    String sep = params.get(CSV_SEPARATOR);
    if (sep != null) {
        if (sep.length() != 1)
            throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Invalid separator:'" + sep + "'");
        strat.setDelimiter(sep.charAt(0));
    }

    String nl = params.get(CSV_NEWLINE);
    if (nl != null) {
        if (nl.length() == 0)
            throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Invalid newline:'" + nl + "'");
        strat.setPrinterNewline(nl);
    }

    String encapsulator = params.get(CSV_ENCAPSULATOR);
    String escape = params.get(CSV_ESCAPE);
    if (encapsulator != null) {
        if (encapsulator.length() != 1)
            throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
                    "Invalid encapsulator:'" + encapsulator + "'");
        strat.setEncapsulator(encapsulator.charAt(0));
    }

    if (escape != null) {
        if (escape.length() != 1)
            throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Invalid escape:'" + escape + "'");
        strat.setEscape(escape.charAt(0));
        if (encapsulator == null) {
            strat.setEncapsulator(CSVStrategy.ENCAPSULATOR_DISABLED);
        }
    }

    if (strat.getEscape() == '\\') {
        // If the escape is the standard backslash, then also enable
        // unicode escapes (it's harmless since 'u' would not otherwise
        // be escaped.
        strat.setUnicodeEscapeInterpretation(true);
    }

    printer = new CSVPrinter(writer, strategy);

    CSVStrategy mvStrategy = new CSVStrategy(strategy.getDelimiter(), CSVStrategy.ENCAPSULATOR_DISABLED,
            CSVStrategy.COMMENTS_DISABLED, '\\', false, false, false, false);
    strat = mvStrategy;

    sep = params.get(MV_SEPARATOR);
    if (sep != null) {
        if (sep.length() != 1)
            throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Invalid mv separator:'" + sep + "'");
        strat.setDelimiter(sep.charAt(0));
    }

    encapsulator = params.get(MV_ENCAPSULATOR);
    escape = params.get(MV_ESCAPE);

    if (encapsulator != null) {
        if (encapsulator.length() != 1)
            throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
                    "Invalid mv encapsulator:'" + encapsulator + "'");
        strat.setEncapsulator(encapsulator.charAt(0));
        if (escape == null) {
            strat.setEscape(CSVStrategy.ESCAPE_DISABLED);
        }
    }

    escape = params.get(MV_ESCAPE);
    if (escape != null) {
        if (escape.length() != 1)
            throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Invalid mv escape:'" + escape + "'");
        strat.setEscape(escape.charAt(0));
        // encapsulator will already be disabled if it wasn't specified
    }

    returnScore = returnFields != null && returnFields.contains("score");
    boolean needListOfFields = returnFields == null || returnFields.size() == 0
            || (returnFields.size() == 1 && returnScore) || returnFields.contains("*");
    Collection<String> fields = returnFields;

    Object responseObj = rsp.getValues().get("response");
    if (needListOfFields) {
        if (responseObj instanceof SolrDocumentList) {
            // get the list of fields from the SolrDocumentList
            fields = new LinkedHashSet<String>();
            for (SolrDocument sdoc : (SolrDocumentList) responseObj) {
                fields.addAll(sdoc.getFieldNames());
            }
        } else {
            // get the list of fields from the index
            fields = req.getSearcher().getFieldNames();
        }
        if (returnScore) {
            fields.add("score");
        } else {
            fields.remove("score");
        }
    }

    CSVSharedBufPrinter csvPrinterMV = new CSVSharedBufPrinter(mvWriter, mvStrategy);

    for (String field : fields) {
        if (field.equals("score")) {
            CSVField csvField = new CSVField();
            csvField.name = "score";
            csvFields.put("score", csvField);
            continue;
        }

        SchemaField sf = schema.getFieldOrNull(field);
        if (sf == null) {
            FieldType ft = new StrField();
            sf = new SchemaField(field, ft);
        }

        // if we got the list of fields from the index, only list stored fields
        if (returnFields == null && sf != null && !sf.stored()) {
            continue;
        }

        // check for per-field overrides
        sep = params.get("f." + field + '.' + CSV_SEPARATOR);
        encapsulator = params.get("f." + field + '.' + CSV_ENCAPSULATOR);
        escape = params.get("f." + field + '.' + CSV_ESCAPE);

        CSVSharedBufPrinter csvPrinter = csvPrinterMV;
        if (sep != null || encapsulator != null || escape != null) {
            // create a new strategy + printer if there were any per-field overrides
            strat = (CSVStrategy) mvStrategy.clone();
            if (sep != null) {
                if (sep.length() != 1)
                    throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
                            "Invalid mv separator:'" + sep + "'");
                strat.setDelimiter(sep.charAt(0));
            }
            if (encapsulator != null) {
                if (encapsulator.length() != 1)
                    throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
                            "Invalid mv encapsulator:'" + encapsulator + "'");
                strat.setEncapsulator(encapsulator.charAt(0));
                if (escape == null) {
                    strat.setEscape(CSVStrategy.ESCAPE_DISABLED);
                }
            }
            if (escape != null) {
                if (escape.length() != 1)
                    throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
                            "Invalid mv escape:'" + escape + "'");
                strat.setEscape(escape.charAt(0));
                if (encapsulator == null) {
                    strat.setEncapsulator(CSVStrategy.ENCAPSULATOR_DISABLED);
                }
            }
            csvPrinter = new CSVSharedBufPrinter(mvWriter, strat);
        }

        CSVField csvField = new CSVField();
        csvField.name = field;
        csvField.sf = sf;
        csvField.mvPrinter = csvPrinter;
        csvFields.put(field, csvField);
    }

    NullValue = params.get(CSV_NULL, "");

    if (params.getBool(CSV_HEADER, true)) {
        for (CSVField csvField : csvFields.values()) {
            printer.print(csvField.name);
        }
        printer.println();
    }

    if (responseObj instanceof DocList) {
        writeDocList(null, (DocList) responseObj, null, null);
    } else if (responseObj instanceof SolrDocumentList) {
        writeSolrDocumentList(null, (SolrDocumentList) responseObj, null, null);
    }

}