List of usage examples for org.apache.commons.csv CSVStrategy CSVStrategy
public CSVStrategy(char delimiter, char encapsulator, char commentStart, char escape, boolean ignoreLeadingWhitespace, boolean ignoreTrailingWhitespace, boolean interpretUnicodeEscapes, boolean ignoreEmptyLines)
From source file:org.apache.ctakes.jdl.data.loader.CsvLoader.java
/** * @param loader// w w w.j a v a 2s. co m * the loader * @param file * the file * @throws FileNotFoundException * exception */ @SuppressWarnings({ "rawtypes", "unchecked" }) public CsvLoader(final CsvLoadType loader, final File file) throws FileNotFoundException { InputStream inputStrem = new FileInputStream(file); Reader reader = new InputStreamReader(inputStrem); char delimiter = CharUtils.toChar(loader.getDelimiter()); char encapsulator = (loader.getEncapsulator() == null || loader.getEncapsulator().length() == 0) ? CSVStrategy.ENCAPSULATOR_DISABLED : CharUtils.toChar(loader.getEncapsulator()); log.info(String.format("delimiter %d encapsulator %d", (int) delimiter, (int) encapsulator)); CSVStrategy strategy = new CSVStrategy(delimiter, encapsulator, CSVStrategy.COMMENTS_DISABLED, CSVStrategy.ESCAPE_DISABLED, true, true, false, true); parser = new CSVParser(reader, strategy); this.loader = loader; formatMap = new HashMap<String, Format>(); try { for (Column col : loader.getColumn()) { if (col.getFormat() != null && col.getFormat().length() > 0) { Class cf = Class.forName(col.getFormat()); Constructor ccf = cf.getConstructor(String.class); this.formatMap.put(col.getName(), (Format) ccf.newInstance(col.getPattern())); } } } catch (Exception e) { throw new RuntimeException("oops", e); } }
From source file:org.apache.solr.handler.CSVRequestHandler.java
CSVLoader(SolrQueryRequest req, UpdateRequestProcessor processor) { this.processor = processor; this.params = req.getParams(); schema = req.getSchema();// ww w .j a v a2 s .com templateAdd = new AddUpdateCommand(); templateAdd.allowDups = false; templateAdd.overwriteCommitted = true; templateAdd.overwritePending = true; if (params.getBool(OVERWRITE, true)) { templateAdd.allowDups = false; templateAdd.overwriteCommitted = true; templateAdd.overwritePending = true; } else { templateAdd.allowDups = true; templateAdd.overwriteCommitted = false; templateAdd.overwritePending = false; } templateAdd.commitWithin = params.getInt(UpdateParams.COMMIT_WITHIN, -1); strategy = new CSVStrategy(',', '"', CSVStrategy.COMMENTS_DISABLED, CSVStrategy.ESCAPE_DISABLED, false, false, false, true); String sep = params.get(SEPARATOR); if (sep != null) { if (sep.length() != 1) throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Invalid separator:'" + sep + "'"); strategy.setDelimiter(sep.charAt(0)); } String encapsulator = params.get(ENCAPSULATOR); if (encapsulator != null) { if (encapsulator.length() != 1) throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Invalid encapsulator:'" + encapsulator + "'"); } String escape = params.get(ESCAPE); if (escape != null) { if (escape.length() != 1) throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Invalid escape:'" + escape + "'"); } // if only encapsulator or escape is set, disable the other escaping mechanism if (encapsulator == null && escape != null) { strategy.setEncapsulator(CSVStrategy.ENCAPSULATOR_DISABLED); strategy.setEscape(escape.charAt(0)); } else { if (encapsulator != null) { strategy.setEncapsulator(encapsulator.charAt(0)); } if (escape != null) { char ch = escape.charAt(0); strategy.setEscape(ch); if (ch == '\\') { // If the escape is the standard backslash, then also enable // unicode escapes (it's harmless since 'u' would not otherwise // be escaped. strategy.setUnicodeEscapeInterpretation(true); } } } String fn = params.get(FIELDNAMES); fieldnames = fn != null ? commaSplit.split(fn, -1) : null; Boolean hasHeader = params.getBool(HEADER); skipLines = params.getInt(SKIPLINES, 0); if (fieldnames == null) { if (null == hasHeader) { // assume the file has the headers if they aren't supplied in the args hasHeader = true; } else if (!hasHeader) { throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "CSVLoader: must specify fieldnames=<fields>* or header=true"); } } else { // if the fieldnames were supplied and the file has a header, we need to // skip over that header. if (hasHeader != null && hasHeader) skipLines++; prepareFields(); } }
From source file:org.apache.solr.handler.CSVRequestHandler.java
/** create the FieldAdders that control how each field is indexed */ void prepareFields() { // Possible future optimization: for really rapid incremental indexing // from a POST, one could cache all of this setup info based on the params. // The link from FieldAdder to this would need to be severed for that to happen. fields = new SchemaField[fieldnames.length]; adders = new CSVLoader.FieldAdder[fieldnames.length]; String skipStr = params.get(SKIP); List<String> skipFields = skipStr == null ? null : StrUtils.splitSmart(skipStr, ','); CSVLoader.FieldAdder adder = new CSVLoader.FieldAdder(); CSVLoader.FieldAdder adderKeepEmpty = new CSVLoader.FieldAdderEmpty(); for (int i = 0; i < fields.length; i++) { String fname = fieldnames[i]; // to skip a field, leave the entries in fields and addrs null if (fname.length() == 0 || (skipFields != null && skipFields.contains(fname))) continue; fields[i] = schema.getField(fname); boolean keepEmpty = params.getFieldBool(fname, EMPTY, false); adders[i] = keepEmpty ? adderKeepEmpty : adder; // Order that operations are applied: split -> trim -> map -> add // so create in reverse order. // Creation of FieldAdders could be optimized and shared among fields String[] fmap = params.getFieldParams(fname, MAP); if (fmap != null) { for (String mapRule : fmap) { String[] mapArgs = colonSplit.split(mapRule, -1); if (mapArgs.length != 2) throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Map rules must be of the form 'from:to' ,got '" + mapRule + "'"); adders[i] = new CSVLoader.FieldMapperSingle(mapArgs[0], mapArgs[1], adders[i]); }/*from w w w. j av a2 s. co m*/ } if (params.getFieldBool(fname, TRIM, false)) { adders[i] = new CSVLoader.FieldTrimmer(adders[i]); } if (params.getFieldBool(fname, SPLIT, false)) { String sepStr = params.getFieldParam(fname, SEPARATOR); char fsep = sepStr == null || sepStr.length() == 0 ? ',' : sepStr.charAt(0); String encStr = params.getFieldParam(fname, ENCAPSULATOR); char fenc = encStr == null || encStr.length() == 0 ? (char) -2 : encStr.charAt(0); String escStr = params.getFieldParam(fname, ESCAPE); char fesc = escStr == null || escStr.length() == 0 ? CSVStrategy.ESCAPE_DISABLED : escStr.charAt(0); CSVStrategy fstrat = new CSVStrategy(fsep, fenc, CSVStrategy.COMMENTS_DISABLED, fesc, false, false, false, false); adders[i] = new CSVLoader.FieldSplitter(fstrat, adders[i]); } } }
From source file:org.apache.solr.response.CSVResponseWriter.java
public void writeResponse() throws IOException { SolrParams params = req.getParams(); strategy = new CSVStrategy(',', '"', CSVStrategy.COMMENTS_DISABLED, CSVStrategy.ESCAPE_DISABLED, false, false, false, true);//ww w . ja va 2s .c o m CSVStrategy strat = strategy; String sep = params.get(CSV_SEPARATOR); if (sep != null) { if (sep.length() != 1) throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Invalid separator:'" + sep + "'"); strat.setDelimiter(sep.charAt(0)); } String nl = params.get(CSV_NEWLINE); if (nl != null) { if (nl.length() == 0) throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Invalid newline:'" + nl + "'"); strat.setPrinterNewline(nl); } String encapsulator = params.get(CSV_ENCAPSULATOR); String escape = params.get(CSV_ESCAPE); if (encapsulator != null) { if (encapsulator.length() != 1) throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Invalid encapsulator:'" + encapsulator + "'"); strat.setEncapsulator(encapsulator.charAt(0)); } if (escape != null) { if (escape.length() != 1) throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Invalid escape:'" + escape + "'"); strat.setEscape(escape.charAt(0)); if (encapsulator == null) { strat.setEncapsulator(CSVStrategy.ENCAPSULATOR_DISABLED); } } if (strat.getEscape() == '\\') { // If the escape is the standard backslash, then also enable // unicode escapes (it's harmless since 'u' would not otherwise // be escaped. strat.setUnicodeEscapeInterpretation(true); } printer = new CSVPrinter(writer, strategy); CSVStrategy mvStrategy = new CSVStrategy(strategy.getDelimiter(), CSVStrategy.ENCAPSULATOR_DISABLED, CSVStrategy.COMMENTS_DISABLED, '\\', false, false, false, false); strat = mvStrategy; sep = params.get(MV_SEPARATOR); if (sep != null) { if (sep.length() != 1) throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Invalid mv separator:'" + sep + "'"); strat.setDelimiter(sep.charAt(0)); } encapsulator = params.get(MV_ENCAPSULATOR); escape = params.get(MV_ESCAPE); if (encapsulator != null) { if (encapsulator.length() != 1) throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Invalid mv encapsulator:'" + encapsulator + "'"); strat.setEncapsulator(encapsulator.charAt(0)); if (escape == null) { strat.setEscape(CSVStrategy.ESCAPE_DISABLED); } } escape = params.get(MV_ESCAPE); if (escape != null) { if (escape.length() != 1) throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Invalid mv escape:'" + escape + "'"); strat.setEscape(escape.charAt(0)); // encapsulator will already be disabled if it wasn't specified } returnScore = returnFields != null && returnFields.contains("score"); boolean needListOfFields = returnFields == null || returnFields.size() == 0 || (returnFields.size() == 1 && returnScore) || returnFields.contains("*"); Collection<String> fields = returnFields; Object responseObj = rsp.getValues().get("response"); if (needListOfFields) { if (responseObj instanceof SolrDocumentList) { // get the list of fields from the SolrDocumentList fields = new LinkedHashSet<String>(); for (SolrDocument sdoc : (SolrDocumentList) responseObj) { fields.addAll(sdoc.getFieldNames()); } } else { // get the list of fields from the index fields = req.getSearcher().getFieldNames(); } if (returnScore) { fields.add("score"); } else { fields.remove("score"); } } CSVSharedBufPrinter csvPrinterMV = new CSVSharedBufPrinter(mvWriter, mvStrategy); for (String field : fields) { if (field.equals("score")) { CSVField csvField = new CSVField(); csvField.name = "score"; csvFields.put("score", csvField); continue; } SchemaField sf = schema.getFieldOrNull(field); if (sf == null) { FieldType ft = new StrField(); sf = new SchemaField(field, ft); } // if we got the list of fields from the index, only list stored fields if (returnFields == null && sf != null && !sf.stored()) { continue; } // check for per-field overrides sep = params.get("f." + field + '.' + CSV_SEPARATOR); encapsulator = params.get("f." + field + '.' + CSV_ENCAPSULATOR); escape = params.get("f." + field + '.' + CSV_ESCAPE); CSVSharedBufPrinter csvPrinter = csvPrinterMV; if (sep != null || encapsulator != null || escape != null) { // create a new strategy + printer if there were any per-field overrides strat = (CSVStrategy) mvStrategy.clone(); if (sep != null) { if (sep.length() != 1) throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Invalid mv separator:'" + sep + "'"); strat.setDelimiter(sep.charAt(0)); } if (encapsulator != null) { if (encapsulator.length() != 1) throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Invalid mv encapsulator:'" + encapsulator + "'"); strat.setEncapsulator(encapsulator.charAt(0)); if (escape == null) { strat.setEscape(CSVStrategy.ESCAPE_DISABLED); } } if (escape != null) { if (escape.length() != 1) throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Invalid mv escape:'" + escape + "'"); strat.setEscape(escape.charAt(0)); if (encapsulator == null) { strat.setEncapsulator(CSVStrategy.ENCAPSULATOR_DISABLED); } } csvPrinter = new CSVSharedBufPrinter(mvWriter, strat); } CSVField csvField = new CSVField(); csvField.name = field; csvField.sf = sf; csvField.mvPrinter = csvPrinter; csvFields.put(field, csvField); } NullValue = params.get(CSV_NULL, ""); if (params.getBool(CSV_HEADER, true)) { for (CSVField csvField : csvFields.values()) { printer.print(csvField.name); } printer.println(); } if (responseObj instanceof DocList) { writeDocList(null, (DocList) responseObj, null, null); } else if (responseObj instanceof SolrDocumentList) { writeSolrDocumentList(null, (SolrDocumentList) responseObj, null, null); } }