List of usage examples for org.apache.commons.lang StringUtils splitByWholeSeparatorPreserveAllTokens
public static String[] splitByWholeSeparatorPreserveAllTokens(String str, String separator)
Splits the provided text into an array, separator string specified.
From source file:com.spotify.hdfs2cass.CassandraPartitioner.java
@SuppressWarnings("unchecked") @Override//from w w w . ja v a2s .com public void configure(JobConf entries) { final String partitionerParam = entries.get(ClusterInfo.SPOTIFY_CASSANDRA_PARTITIONER_PARAM); if (partitionerParam == null) { throw new RuntimeException("Didn't get any cassandra partitioner information"); } try { partitioner = (AbstractPartitioner<BigIntegerToken>) Class.forName(partitionerParam).newInstance(); } catch (Exception ex) { throw new RuntimeException("Invalid partitioner class name: " + partitionerParam); } final String tokenNodesParam = entries.get(ClusterInfo.SPOTIFY_CASSANDRA_TOKENS_PARAM); if (tokenNodesParam == null) { throw new RuntimeException("Didn't get any cassandra information"); } final String[] parts = StringUtils.splitByWholeSeparatorPreserveAllTokens(tokenNodesParam, ","); if ((parts == null) || (parts.length == 0)) { throw new RuntimeException("Didn't get any valid cassandra nodes information"); } tokenNodes = new ArrayList<TokenNode>(); for (String part : parts) { tokenNodes.add(new TokenNode(part)); } Collections.sort(tokenNodes, new Comparator<TokenNode>() { @Override public int compare(TokenNode o1, TokenNode o2) { if (o1.equals(o2)) { return 0; } return o1.getStartToken().compareTo(o2.getStartToken()); } }); }
From source file:com.act.lcms.db.io.parser.ConstructAnalysisFileParser.java
public void parse(File inFile) throws IOException { try (BufferedReader reader = new BufferedReader(new FileReader(inFile))) { String line;/* w ww . j a va 2s .c o m*/ String constructId = null; List<ConstructAssociatedChemical> products = null; while ((line = reader.readLine()) != null) { Matcher matcher = CONSTRUCT_DESIGNATOR_PATTERN.matcher(line); if (matcher.matches()) { if (constructId != null) { handleConstructProductsList(constructId, products); } constructId = matcher.group(1).trim(); products = new ArrayList<>(); } else { if (constructId == null || products == null) { throw new RuntimeException( "Found construct product step line without a pre-defined construct"); } String[] fields = StringUtils.splitByWholeSeparatorPreserveAllTokens(line, PRODUCT_KIND_SEPARATOR); if (fields.length != 2) { System.err.format("Skipping line with unexpected number of fields (%d): %s\n", fields.length, line); continue; } String chemical = fields[0]; String kind = fields[1]; products.add(new ConstructAssociatedChemical(chemical, kind)); } } // Finish processing anything that's left over. if (constructId != null) { handleConstructProductsList(constructId, products); } } }
From source file:com.ibm.bi.dml.runtime.io.IOUtilFunctions.java
/** * Splits a string by a specified delimiter into all tokens, including empty. * NOTE: This method is meant as a faster drop-in replacement of the regular * string split.//from www . j a va2 s. c om * * @param str * @param delim * @return */ public static String[] split(String str, String delim) { //note: split via stringutils faster than precompiled pattern / guava splitter //split by whole separator required for multi-character delimiters, preserve //all tokens required for empty cells and in order to keep cell alignment return StringUtils.splitByWholeSeparatorPreserveAllTokens(str, delim); }
From source file:com.spotify.hdfs2cass.cassandra.utils.CassandraPartitioner.java
@Override public void setConf(Configuration conf) { this.conf = conf; final String partitionerParam = conf.get(CassandraParams.SCRUB_CASSANDRACLUSTER_PARTITIONER_CONFIG); logger.info(CassandraParams.SCRUB_CASSANDRACLUSTER_PARTITIONER_CONFIG + ": " + partitionerParam); if (partitionerParam == null) { throw new RuntimeException("Didn't get any cassandra partitioner information"); }/* w ww . j av a 2 s . c o m*/ try { partitioner = (AbstractPartitioner) Class.forName(partitionerParam).newInstance(); } catch (Exception ex) { throw new RuntimeException("Invalid partitioner class name: " + partitionerParam); } final String rangePerReducerStr = conf.get(CassandraParams.SCRUB_CASSANDRACLUSTER_RANGE_PER_REDUCER_CONFIG); if (rangePerReducerStr == null) { throw new RuntimeException("Didn't get cassandra range per reducer"); } rangePerReducer = new BigInteger(rangePerReducerStr); final String reducersStr = conf.get(CassandraParams.SCRUB_CASSANDRACLUSTER_REDUCERS_CONFIG); if (reducersStr == null) { throw new RuntimeException("Failed to get list of reducers"); } final String[] parts = StringUtils.splitByWholeSeparatorPreserveAllTokens(reducersStr, ","); if ((parts == null) || (parts.length == 0)) { throw new RuntimeException("Didn't get any valid list of reducers"); } reducers = new ArrayList<>(parts.length); for (String part : parts) { reducers.add(Integer.parseInt(part)); } distributeRandomly = conf.getBoolean(CassandraParams.SCRUB_CASSANDRACLUSTER_DISTRIBUTE_RANDOMLY_CONFIG, false); if (distributeRandomly) { random = new Random(); } logger.info("CP: range per reducer: {}, reducers: {}, distribute randomly: {}", new Object[] { rangePerReducerStr, Arrays.toString(reducers.toArray()), distributeRandomly }); }
From source file:com.floreantpos.license.FiveStarPOSLicenseManager.java
/** * Get the template file based on command line arguments. * //w w w.j a va2 s . c om * @param args * @return File */ private static File getTemplateFile(String[] args) { if (args != null) { String templateFileUri = null; for (String string : args) { if (StringUtils.contains(string, "-t=")) { templateFileUri = StringUtils.splitByWholeSeparatorPreserveAllTokens(string, "-t=")[1]; break; } } if (StringUtils.isNotBlank(templateFileUri)) { return new File(templateFileUri); } } return new File(TEMPLATE_FILE); }
From source file:com.opengamma.id.UniqueId.java
/** * Parses a {@code UniqueId} from a formatted scheme and value. * <p>//from w w w . j av a 2 s. co m * This parses the identifier from the form produced by {@code toString()} * which is {@code <SCHEME>~<VALUE>~<VERSION>}. * * @param str the unique identifier to parse, not null * @return the unique identifier, not null * @throws IllegalArgumentException if the identifier cannot be parsed */ @FromString public static UniqueId parse(String str) { ArgumentChecker.notEmpty(str, "str"); if (str.contains("~") == false) { str = StringUtils.replace(str, "::", "~"); // leniently parse old data } String[] split = StringUtils.splitByWholeSeparatorPreserveAllTokens(str, "~"); switch (split.length) { case 2: return UniqueId.of(split[0], split[1], null); case 3: return UniqueId.of(split[0], split[1], split[2]); } throw new IllegalArgumentException("Invalid identifier format: " + str); }
From source file:com.aipo.container.gadgets.uri.AipoProxyUriManager.java
@SuppressWarnings("deprecation") public ProxyUri process(Uri uriIn) throws GadgetException { UriStatus status = UriStatus.BAD_URI; Uri uri = null;/*from w ww . jav a 2s . c om*/ // First determine if the URI is chained-syntax or query-style. String container = uriIn.getQueryParameter(Param.CONTAINER.getKey()); if (container == null) { container = uriIn.getQueryParameter(Param.SYND.getKey()); } String uriStr = null; Uri queryUri = null; if (container != null && config.getString(container, PROXY_PATH_PARAM) != null && config.getString(container, PROXY_PATH_PARAM).equalsIgnoreCase(uriIn.getPath())) { // Query-style. Has container param and path matches. uriStr = uriIn.getQueryParameter(Param.URL.getKey()); queryUri = uriIn; } else { // Check for chained query string in the path. String containerStr = Param.CONTAINER.getKey() + '='; String path = uriIn.getPath(); // It is possible to get decoded url ('=' converted to %3d) // for example from CssResponseRewriter, so we should support it boolean doDecode = (!path.contains(containerStr)); if (doDecode) { path = Utf8UrlCoder.decode(path); } int start = path.indexOf(containerStr); if (start > 0) { start += containerStr.length(); int end = path.indexOf('&', start); if (end < start) { end = path.indexOf('/', start); } if (end > start) { // Looks like chained proxy syntax. Pull out params. container = path.substring(start, end); } if (container != null) { String proxyPath = config.getString(container, PROXY_PATH_PARAM); if (proxyPath != null) { String[] chainedChunks = StringUtils.splitByWholeSeparatorPreserveAllTokens(proxyPath, CHAINED_PARAMS_TOKEN); // Parse out the URI of the actual resource. This URI is found as // the // substring of the "full" URI, after the chained proxy prefix. We // first search for the pre- and post-fixes of the original // /pre/%chained_params%/post // ContainerConfig value, and take the URI as everything beyond that // point. String startToken = chainedChunks[0]; String endToken = "/"; if (chainedChunks.length == 2 && chainedChunks[1].length() > 0) { endToken = chainedChunks[1]; } // Pull URI out of original inUri's full representation. String fullProxyUri = uriIn.toString(); int startIx = fullProxyUri.indexOf(startToken) + startToken.length(); int endIx = fullProxyUri.indexOf(endToken, startIx); if (startIx > 0 && endIx > 0) { String chainedQuery = fullProxyUri.substring(startIx, endIx); if (doDecode) { chainedQuery = Utf8UrlCoder.decode(chainedQuery); } queryUri = new UriBuilder().setQuery(chainedQuery).toUri(); uriStr = fullProxyUri.substring(endIx + endToken.length()); while (uriStr.startsWith("/")) { uriStr = uriStr.substring(1); } } } } } } if (!strictParsing && container != null && StringUtils.isEmpty(uriStr)) { // Query-style despite the container being configured for chained style. uriStr = uriIn.getQueryParameter(Param.URL.getKey()); queryUri = uriIn; } // Parameter validation. if (StringUtils.isEmpty(uriStr) || StringUtils.isEmpty(container)) { throw new GadgetException(GadgetException.Code.MISSING_PARAMETER, "Missing required parameter(s):" + (StringUtils.isEmpty(uriStr) ? ' ' + Param.URL.getKey() : "") + (StringUtils.isEmpty(container) ? ' ' + Param.CONTAINER.getKey() : ""), HttpResponse.SC_BAD_REQUEST); } String queryHost = config.getString(container, PROXY_HOST_PARAM); if (strictParsing) { if (queryHost == null || !queryHost.equalsIgnoreCase(uriIn.getAuthority())) { throw new GadgetException(GadgetException.Code.INVALID_PATH, "Invalid proxy host", HttpResponse.SC_BAD_REQUEST); } } try { uri = Uri.parse(uriStr); } catch (Exception e) { // NullPointerException or InvalidArgumentException. throw new GadgetException(GadgetException.Code.INVALID_PARAMETER, "Invalid " + Param.URL.getKey() + ": " + uriStr, HttpResponse.SC_BAD_REQUEST); } // URI is valid. status = UriStatus.VALID_UNVERSIONED; String version = queryUri.getQueryParameter(Param.VERSION.getKey()); if (versioner != null && version != null) { status = versioner.validate(uri, container, version); } ProxyUri proxied = new ProxyUri(status, uri, queryUri); proxied.setHtmlTagContext(uriIn.getQueryParameter(Param.HTML_TAG_CONTEXT.getKey())); return proxied; }
From source file:com.edgenius.wiki.render.filter.TableFilter.java
/** * @param line /* ww w . j a v a 2s .c o m*/ * @return */ private String buildRow(String line, boolean head) { String sep; String tag; if (head) { //header sep = "||"; tag = "th"; line = trimSurronding(line); } else { sep = "|"; tag = "td"; line = trimSurronding(line); } StringBuffer sb = new StringBuffer(); //split String[] columns = StringUtils.splitByWholeSeparatorPreserveAllTokens(line, sep); for (String col : columns) { sb.append("<" + tag + ">").append(col.trim().length() == 0 ? " " : col).append("</" + tag + ">"); } return sb.toString(); }
From source file:com.ailk.oci.ocnosql.tools.load.csvbulkload.PhoenixCsvToKeyValueMapper.java
/** * ?csv???rowkey(hash?++md5?)/* ww w .j av a2 s .c o m*/ */ private String generateRowKey(String lineStr) { String rowkey = ""; // 1.??(??) String[] lineArr = StringUtils.splitByWholeSeparatorPreserveAllTokens(lineStr, separator); rowGentemp.delete(0, rowGentemp.length()); for (int idx : rowPrefixColIdxs) { rowGentemp.append(lineArr[idx]); } rowkey += rowKeyGenerator.generatePrefix(rowGentemp.toString()); // 2.?(?) rowGentemp.delete(0, rowGentemp.length()); for (int idx : rowColIdxs) { rowGentemp.append(lineArr[idx]); } rowkey += rowGentemp.toString(); // 3.??(?,a.b.) if (null != unqIdxColIdxs) {// a. rowGentemp.delete(0, rowGentemp.length()); for (int idx : unqIdxColIdxs) { rowGentemp.append(lineArr[idx]); } rowkey += getUniquePostfix(rowGentemp.toString()); } else {// b., rowkey += getUniquePostfix(lineStr); } // 4. return rowkey; }
From source file:com.edgenius.wiki.render.impl.RichRenderEngineImpl.java
/** * @param htmlText/*from w w w .j a v a2s . c o m*/ * @param nodeContainer * @return */ private HTMLNodeContainer escapeText(String htmlText, HTMLNodeContainer nodeContainer) { //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ // all Text node need do HTMLEscape and markup escape conversion,e.g, > to >, *bold* to \*bold\* etc. // if handle this textNode by textNode, it may run n * f times. n is text node number, f is filter number // so here do following: // 1. convert HTMLNode list to string again but replace all non-text node to unique string // 1.1 As TinyMCE will save text into entity code even "entity_encoding" set to "raw", so even text inside // OriginalTextRequest, it still need do HTML unescapeHtml() // 1.2 But, the text which is surrounding by these tag which is identified by OriginalTextRequest, don't // need do any escapeMarkupToSlash(). // 2. do escape conversion // 3. replace back unique string pointer back to Tag node, reset all text node (it may has escape occur) StringBuffer unescTextSb = new StringBuffer(); String uniqueKey = WikiUtil.findUniqueKey(htmlText); //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ // do all textnode HTML unescape //do HTML unescape,eg, > to >; Although I only do limited entity escape(see EscapeUtil.escapeHTML()), //but it is still safe use this method for (HTMLNode node : nodeContainer) { if (node.isTextNode()) { unescTextSb.append(node.getText()); } else { unescTextSb.append(uniqueKey); } } String escText = unescTextSb.toString(); if (!StringUtils.isBlank(escText)) { escText = StringEscapeUtil.unescapeHtml(escText); //recover HTMLNode list: reset all text node as their content may changed by conversion //this will split N+1 strings,N is tag number. String[] textNodes = StringUtils.splitByWholeSeparatorPreserveAllTokens(escText, uniqueKey); HTMLNodeContainer newNodeList = new HTMLNodeContainer(); Iterator<HTMLNode> iter = nodeContainer.iterator(); for (int idx = 0; idx < textNodes.length; idx++) { String text = textNodes[idx]; if (!"".equals(text)) newNodeList.add(new HTMLNode(text, true)); for (; idx < textNodes.length - 1 && iter.hasNext();) { HTMLNode node = iter.next(); if (!node.isTextNode()) { newNodeList.add(node); break; } } } nodeContainer = newNodeList; } //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ // do markup unescape for only text is not inside OriginalTextRequest tag, such preview MacroFilter macroFilter = (MacroFilter) filterProvider.getFilter(MacroFilter.class.getName()); List<HTMLNode> orgTextMacroHTMLIds = macroFilter.getImmutableHTMLIdenifiers(); unescTextSb = new StringBuffer(); HTMLNode preReqCloseNode = null; //this container save all tag (or text) which won't do escape: //normally, each uniqueKey will mapping to one HTMLNode, but for OriginalTextRequest macro, which may contain multiple //HTMLNodes, so that, the container use List<HTMLNode> as element. List<List<HTMLNode>> tagContainer = new ArrayList<List<HTMLNode>>(); String multipleLineUniqueKeyS = null; String multipleLineUniqueKeyE = null; for (HTMLNode node : nodeContainer) { if (preReqCloseNode != null) { if (preReqCloseNode == node) { preReqCloseNode = null; } //skip all tags between preview text continue; } if (!node.isTextNode()) { List<HTMLNode> nodes = new ArrayList<HTMLNode>(); if (node.isIdentified(orgTextMacroHTMLIds) && node.getPair() != null) { //this is preview, code or some others tags which Macro implements OriginalTextRequest //the put all tags/text surrounded by this preview tag(include its self pair) into tagContainer preReqCloseNode = node.getPair(); for (HTMLNode preNode = node; preNode != null && preNode != node.getPair();) { nodes.add(preNode); preNode = preNode.next(); } nodes.add(node.getPair()); } else { nodes.add(node); } if (RenderUtil.isBlockTag(node)) { //for example start<p>[test]</p>end. If simply replace <p> and </p> with normal key, this [test] will treat //as normal character surrounded, actually, it is newline surrounded, so use multiple lines key if (!node.isCloseTag()) { if (multipleLineUniqueKeyS == null) multipleLineUniqueKeyS = "\n" + WikiUtil.findUniqueKey(htmlText) + "\n"; unescTextSb.append(multipleLineUniqueKeyS); } else { if (multipleLineUniqueKeyE == null) multipleLineUniqueKeyE = "\n" + WikiUtil.findUniqueKey(htmlText) + "\n"; unescTextSb.append(multipleLineUniqueKeyE); } } else { unescTextSb.append(uniqueKey); } tagContainer.add(nodes); } else { unescTextSb.append(node.getText()); } } escText = unescTextSb.toString(); if (!StringUtils.isBlank(escText)) { //do all markup filter escape, eg, *bold* \*bold\* escText = MarkupUtil.escapeMarkupToSlash(escText, uniqueKey); //recover HTMLNode list: reset all text node as their content may changed by conversion //this will split N+1 strings,N is tag number. if (multipleLineUniqueKeyS != null) { escText = StringUtils.replace(escText, multipleLineUniqueKeyS, uniqueKey); } if (multipleLineUniqueKeyE != null) { escText = StringUtils.replace(escText, multipleLineUniqueKeyE, uniqueKey); } String[] textNodes = StringUtils.splitByWholeSeparatorPreserveAllTokens(escText, uniqueKey); HTMLNodeContainer newNodeList = new HTMLNodeContainer(); Iterator<List<HTMLNode>> iter = tagContainer.iterator(); for (int idx = 0; idx < textNodes.length; idx++) { String text = textNodes[idx]; if (!"".equals(text)) newNodeList.add(new HTMLNode(text, true)); //find first available non-text node, the replace the tag-token... if (idx < textNodes.length - 1 && iter.hasNext()) { List<HTMLNode> insertList = iter.next(); for (HTMLNode insert : insertList) { newNodeList.add(insert); } } } nodeContainer = newNodeList; } return nodeContainer; }