Example usage for org.apache.commons.lang StringUtils splitByWholeSeparatorPreserveAllTokens

Introduction

In this page you can find the example usage for org.apache.commons.lang StringUtils splitByWholeSeparatorPreserveAllTokens.

Prototype

public static String[] splitByWholeSeparatorPreserveAllTokens(String str, String separator)

Source Link

Document

Splits the provided text into an array, separator string specified.

Usage

From source file:com.spotify.hdfs2cass.CassandraPartitioner.java

@SuppressWarnings("unchecked")
@Override//from  w w w  . ja  v a2s .com
public void configure(JobConf entries) {
    final String partitionerParam = entries.get(ClusterInfo.SPOTIFY_CASSANDRA_PARTITIONER_PARAM);
    if (partitionerParam == null) {
        throw new RuntimeException("Didn't get any cassandra partitioner information");
    }

    try {
        partitioner = (AbstractPartitioner<BigIntegerToken>) Class.forName(partitionerParam).newInstance();
    } catch (Exception ex) {
        throw new RuntimeException("Invalid partitioner class name: " + partitionerParam);
    }

    final String tokenNodesParam = entries.get(ClusterInfo.SPOTIFY_CASSANDRA_TOKENS_PARAM);
    if (tokenNodesParam == null) {
        throw new RuntimeException("Didn't get any cassandra information");
    }

    final String[] parts = StringUtils.splitByWholeSeparatorPreserveAllTokens(tokenNodesParam, ",");
    if ((parts == null) || (parts.length == 0)) {
        throw new RuntimeException("Didn't get any valid cassandra nodes information");
    }

    tokenNodes = new ArrayList<TokenNode>();
    for (String part : parts) {
        tokenNodes.add(new TokenNode(part));
    }

    Collections.sort(tokenNodes, new Comparator<TokenNode>() {
        @Override
        public int compare(TokenNode o1, TokenNode o2) {
            if (o1.equals(o2)) {
                return 0;
            }

            return o1.getStartToken().compareTo(o2.getStartToken());
        }
    });
}

From source file:com.act.lcms.db.io.parser.ConstructAnalysisFileParser.java

public void parse(File inFile) throws IOException {
    try (BufferedReader reader = new BufferedReader(new FileReader(inFile))) {

        String line;/*  w  ww  .  j  a  va 2s  .c  o  m*/
        String constructId = null;
        List<ConstructAssociatedChemical> products = null;
        while ((line = reader.readLine()) != null) {
            Matcher matcher = CONSTRUCT_DESIGNATOR_PATTERN.matcher(line);
            if (matcher.matches()) {
                if (constructId != null) {
                    handleConstructProductsList(constructId, products);
                }
                constructId = matcher.group(1).trim();
                products = new ArrayList<>();
            } else {
                if (constructId == null || products == null) {
                    throw new RuntimeException(
                            "Found construct product step line without a pre-defined construct");
                }
                String[] fields = StringUtils.splitByWholeSeparatorPreserveAllTokens(line,
                        PRODUCT_KIND_SEPARATOR);
                if (fields.length != 2) {
                    System.err.format("Skipping line with unexpected number of fields (%d): %s\n",
                            fields.length, line);
                    continue;
                }
                String chemical = fields[0];
                String kind = fields[1];
                products.add(new ConstructAssociatedChemical(chemical, kind));
            }
        }
        // Finish processing anything that's left over.
        if (constructId != null) {
            handleConstructProductsList(constructId, products);
        }
    }
}

From source file:com.ibm.bi.dml.runtime.io.IOUtilFunctions.java

/**
 * Splits a string by a specified delimiter into all tokens, including empty.
 * NOTE: This method is meant as a faster drop-in replacement of the regular 
 * string split.//from  www  . j  a  va2 s. c om
 * 
 * @param str
 * @param delim
 * @return
 */
public static String[] split(String str, String delim) {
    //note: split via stringutils faster than precompiled pattern / guava splitter

    //split by whole separator required for multi-character delimiters, preserve
    //all tokens required for empty cells and in order to keep cell alignment
    return StringUtils.splitByWholeSeparatorPreserveAllTokens(str, delim);
}

From source file:com.spotify.hdfs2cass.cassandra.utils.CassandraPartitioner.java

@Override
public void setConf(Configuration conf) {
    this.conf = conf;

    final String partitionerParam = conf.get(CassandraParams.SCRUB_CASSANDRACLUSTER_PARTITIONER_CONFIG);
    logger.info(CassandraParams.SCRUB_CASSANDRACLUSTER_PARTITIONER_CONFIG + ": " + partitionerParam);
    if (partitionerParam == null) {
        throw new RuntimeException("Didn't get any cassandra partitioner information");
    }/* w  ww . j  av  a 2 s  .  c o  m*/

    try {
        partitioner = (AbstractPartitioner) Class.forName(partitionerParam).newInstance();
    } catch (Exception ex) {
        throw new RuntimeException("Invalid partitioner class name: " + partitionerParam);
    }

    final String rangePerReducerStr = conf.get(CassandraParams.SCRUB_CASSANDRACLUSTER_RANGE_PER_REDUCER_CONFIG);
    if (rangePerReducerStr == null) {
        throw new RuntimeException("Didn't get cassandra range per reducer");
    }

    rangePerReducer = new BigInteger(rangePerReducerStr);

    final String reducersStr = conf.get(CassandraParams.SCRUB_CASSANDRACLUSTER_REDUCERS_CONFIG);
    if (reducersStr == null) {
        throw new RuntimeException("Failed to get list of reducers");
    }

    final String[] parts = StringUtils.splitByWholeSeparatorPreserveAllTokens(reducersStr, ",");
    if ((parts == null) || (parts.length == 0)) {
        throw new RuntimeException("Didn't get any valid list of reducers");
    }

    reducers = new ArrayList<>(parts.length);
    for (String part : parts) {
        reducers.add(Integer.parseInt(part));
    }

    distributeRandomly = conf.getBoolean(CassandraParams.SCRUB_CASSANDRACLUSTER_DISTRIBUTE_RANDOMLY_CONFIG,
            false);
    if (distributeRandomly) {
        random = new Random();
    }

    logger.info("CP: range per reducer: {}, reducers: {}, distribute randomly: {}",
            new Object[] { rangePerReducerStr, Arrays.toString(reducers.toArray()), distributeRandomly });
}

From source file:com.floreantpos.license.FiveStarPOSLicenseManager.java

/**
 * Get the template file based on command line arguments.
 * //w  w w.j  a va2  s  . c  om
 * @param args
 * @return File
 */
private static File getTemplateFile(String[] args) {
    if (args != null) {
        String templateFileUri = null;
        for (String string : args) {
            if (StringUtils.contains(string, "-t=")) {
                templateFileUri = StringUtils.splitByWholeSeparatorPreserveAllTokens(string, "-t=")[1];
                break;
            }
        }
        if (StringUtils.isNotBlank(templateFileUri)) {
            return new File(templateFileUri);
        }
    }
    return new File(TEMPLATE_FILE);
}

From source file:com.opengamma.id.UniqueId.java

/**
 * Parses a {@code UniqueId} from a formatted scheme and value.
 * <p>//from  w  w  w  .  j av a 2 s. co  m
 * This parses the identifier from the form produced by {@code toString()}
 * which is {@code <SCHEME>~<VALUE>~<VERSION>}.
 * 
 * @param str  the unique identifier to parse, not null
 * @return the unique identifier, not null
 * @throws IllegalArgumentException if the identifier cannot be parsed
 */
@FromString
public static UniqueId parse(String str) {
    ArgumentChecker.notEmpty(str, "str");
    if (str.contains("~") == false) {
        str = StringUtils.replace(str, "::", "~"); // leniently parse old data
    }
    String[] split = StringUtils.splitByWholeSeparatorPreserveAllTokens(str, "~");
    switch (split.length) {
    case 2:
        return UniqueId.of(split[0], split[1], null);
    case 3:
        return UniqueId.of(split[0], split[1], split[2]);
    }
    throw new IllegalArgumentException("Invalid identifier format: " + str);
}

From source file:com.aipo.container.gadgets.uri.AipoProxyUriManager.java

@SuppressWarnings("deprecation")
public ProxyUri process(Uri uriIn) throws GadgetException {
    UriStatus status = UriStatus.BAD_URI;
    Uri uri = null;/*from w ww  .  jav  a  2s . c om*/

    // First determine if the URI is chained-syntax or query-style.
    String container = uriIn.getQueryParameter(Param.CONTAINER.getKey());
    if (container == null) {
        container = uriIn.getQueryParameter(Param.SYND.getKey());
    }
    String uriStr = null;
    Uri queryUri = null;
    if (container != null && config.getString(container, PROXY_PATH_PARAM) != null
            && config.getString(container, PROXY_PATH_PARAM).equalsIgnoreCase(uriIn.getPath())) {
        // Query-style. Has container param and path matches.
        uriStr = uriIn.getQueryParameter(Param.URL.getKey());
        queryUri = uriIn;
    } else {
        // Check for chained query string in the path.
        String containerStr = Param.CONTAINER.getKey() + '=';
        String path = uriIn.getPath();
        // It is possible to get decoded url ('=' converted to %3d)
        // for example from CssResponseRewriter, so we should support it
        boolean doDecode = (!path.contains(containerStr));
        if (doDecode) {
            path = Utf8UrlCoder.decode(path);
        }
        int start = path.indexOf(containerStr);
        if (start > 0) {
            start += containerStr.length();
            int end = path.indexOf('&', start);
            if (end < start) {
                end = path.indexOf('/', start);
            }
            if (end > start) {
                // Looks like chained proxy syntax. Pull out params.
                container = path.substring(start, end);
            }
            if (container != null) {
                String proxyPath = config.getString(container, PROXY_PATH_PARAM);
                if (proxyPath != null) {
                    String[] chainedChunks = StringUtils.splitByWholeSeparatorPreserveAllTokens(proxyPath,
                            CHAINED_PARAMS_TOKEN);

                    // Parse out the URI of the actual resource. This URI is found as
                    // the
                    // substring of the "full" URI, after the chained proxy prefix. We
                    // first search for the pre- and post-fixes of the original
                    // /pre/%chained_params%/post
                    // ContainerConfig value, and take the URI as everything beyond that
                    // point.
                    String startToken = chainedChunks[0];
                    String endToken = "/";
                    if (chainedChunks.length == 2 && chainedChunks[1].length() > 0) {
                        endToken = chainedChunks[1];
                    }

                    // Pull URI out of original inUri's full representation.
                    String fullProxyUri = uriIn.toString();
                    int startIx = fullProxyUri.indexOf(startToken) + startToken.length();
                    int endIx = fullProxyUri.indexOf(endToken, startIx);
                    if (startIx > 0 && endIx > 0) {
                        String chainedQuery = fullProxyUri.substring(startIx, endIx);
                        if (doDecode) {
                            chainedQuery = Utf8UrlCoder.decode(chainedQuery);
                        }
                        queryUri = new UriBuilder().setQuery(chainedQuery).toUri();
                        uriStr = fullProxyUri.substring(endIx + endToken.length());
                        while (uriStr.startsWith("/")) {
                            uriStr = uriStr.substring(1);
                        }

                    }
                }
            }
        }
    }

    if (!strictParsing && container != null && StringUtils.isEmpty(uriStr)) {
        // Query-style despite the container being configured for chained style.
        uriStr = uriIn.getQueryParameter(Param.URL.getKey());
        queryUri = uriIn;
    }

    // Parameter validation.
    if (StringUtils.isEmpty(uriStr) || StringUtils.isEmpty(container)) {
        throw new GadgetException(GadgetException.Code.MISSING_PARAMETER,
                "Missing required parameter(s):" + (StringUtils.isEmpty(uriStr) ? ' ' + Param.URL.getKey() : "")
                        + (StringUtils.isEmpty(container) ? ' ' + Param.CONTAINER.getKey() : ""),
                HttpResponse.SC_BAD_REQUEST);
    }

    String queryHost = config.getString(container, PROXY_HOST_PARAM);
    if (strictParsing) {
        if (queryHost == null || !queryHost.equalsIgnoreCase(uriIn.getAuthority())) {
            throw new GadgetException(GadgetException.Code.INVALID_PATH, "Invalid proxy host",
                    HttpResponse.SC_BAD_REQUEST);
        }
    }

    try {
        uri = Uri.parse(uriStr);
    } catch (Exception e) {
        // NullPointerException or InvalidArgumentException.
        throw new GadgetException(GadgetException.Code.INVALID_PARAMETER,
                "Invalid " + Param.URL.getKey() + ": " + uriStr, HttpResponse.SC_BAD_REQUEST);
    }

    // URI is valid.
    status = UriStatus.VALID_UNVERSIONED;

    String version = queryUri.getQueryParameter(Param.VERSION.getKey());
    if (versioner != null && version != null) {
        status = versioner.validate(uri, container, version);
    }

    ProxyUri proxied = new ProxyUri(status, uri, queryUri);
    proxied.setHtmlTagContext(uriIn.getQueryParameter(Param.HTML_TAG_CONTEXT.getKey()));
    return proxied;
}

From source file:com.edgenius.wiki.render.filter.TableFilter.java

/**
 * @param line /* ww w . j a  v a 2s .c o  m*/
 * @return
 */
private String buildRow(String line, boolean head) {
    String sep;
    String tag;
    if (head) {
        //header
        sep = "||";
        tag = "th";
        line = trimSurronding(line);
    } else {
        sep = "|";
        tag = "td";
        line = trimSurronding(line);
    }
    StringBuffer sb = new StringBuffer();
    //split
    String[] columns = StringUtils.splitByWholeSeparatorPreserveAllTokens(line, sep);
    for (String col : columns) {
        sb.append("<" + tag + ">").append(col.trim().length() == 0 ? "&nbsp;" : col).append("</" + tag + ">");
    }
    return sb.toString();
}

From source file:com.ailk.oci.ocnosql.tools.load.csvbulkload.PhoenixCsvToKeyValueMapper.java

/**
 * ?csv???rowkey(hash?++md5?)/*  ww w  .j  av  a2 s  .c o m*/
 */
private String generateRowKey(String lineStr) {
    String rowkey = "";
    // 1.??(??)
    String[] lineArr = StringUtils.splitByWholeSeparatorPreserveAllTokens(lineStr, separator);
    rowGentemp.delete(0, rowGentemp.length());
    for (int idx : rowPrefixColIdxs) {
        rowGentemp.append(lineArr[idx]);
    }
    rowkey += rowKeyGenerator.generatePrefix(rowGentemp.toString());
    // 2.?(?)
    rowGentemp.delete(0, rowGentemp.length());
    for (int idx : rowColIdxs) {
        rowGentemp.append(lineArr[idx]);
    }
    rowkey += rowGentemp.toString();
    // 3.??(?,a.b.)

    if (null != unqIdxColIdxs) {// a.
        rowGentemp.delete(0, rowGentemp.length());
        for (int idx : unqIdxColIdxs) {
            rowGentemp.append(lineArr[idx]);
        }
        rowkey += getUniquePostfix(rowGentemp.toString());
    } else {// b.,
        rowkey += getUniquePostfix(lineStr);
    }
    // 4.
    return rowkey;
}

From source file:com.edgenius.wiki.render.impl.RichRenderEngineImpl.java

/**
 * @param htmlText/*from  w  w  w  .j a  v  a2s  . c o m*/
 * @param nodeContainer
 * @return
 */
private HTMLNodeContainer escapeText(String htmlText, HTMLNodeContainer nodeContainer) {
    //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    // all Text node need do HTMLEscape and markup escape conversion,e.g, &gt; to >, *bold* to \*bold\* etc.
    // if handle this textNode by textNode, it may run n * f times. n is text node number, f is filter number
    // so here do following: 
    // 1. convert HTMLNode list to string again but replace all non-text node to unique string
    // 1.1 As TinyMCE will save text into entity code even "entity_encoding" set to "raw", so even text inside
    // OriginalTextRequest, it still need do HTML unescapeHtml()
    // 1.2 But, the text which is surrounding by these tag which is identified by OriginalTextRequest, don't 
    // need do any escapeMarkupToSlash().
    // 2. do escape conversion
    // 3. replace back unique string pointer back to Tag node, reset all text node (it may has escape occur)

    StringBuffer unescTextSb = new StringBuffer();
    String uniqueKey = WikiUtil.findUniqueKey(htmlText);

    //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    // do all textnode HTML unescape
    //do HTML unescape,eg, &gt; to >; Although I only do limited entity escape(see EscapeUtil.escapeHTML()), 
    //but it is still safe use this method 
    for (HTMLNode node : nodeContainer) {
        if (node.isTextNode()) {
            unescTextSb.append(node.getText());
        } else {
            unescTextSb.append(uniqueKey);
        }
    }
    String escText = unescTextSb.toString();
    if (!StringUtils.isBlank(escText)) {
        escText = StringEscapeUtil.unescapeHtml(escText);

        //recover HTMLNode list: reset all text node as their content may changed by conversion
        //this will split N+1 strings,N is tag number.
        String[] textNodes = StringUtils.splitByWholeSeparatorPreserveAllTokens(escText, uniqueKey);
        HTMLNodeContainer newNodeList = new HTMLNodeContainer();
        Iterator<HTMLNode> iter = nodeContainer.iterator();
        for (int idx = 0; idx < textNodes.length; idx++) {
            String text = textNodes[idx];
            if (!"".equals(text))
                newNodeList.add(new HTMLNode(text, true));
            for (; idx < textNodes.length - 1 && iter.hasNext();) {
                HTMLNode node = iter.next();
                if (!node.isTextNode()) {
                    newNodeList.add(node);
                    break;
                }
            }
        }

        nodeContainer = newNodeList;
    }

    //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    // do markup unescape for only text is not inside OriginalTextRequest tag, such preview
    MacroFilter macroFilter = (MacroFilter) filterProvider.getFilter(MacroFilter.class.getName());
    List<HTMLNode> orgTextMacroHTMLIds = macroFilter.getImmutableHTMLIdenifiers();

    unescTextSb = new StringBuffer();
    HTMLNode preReqCloseNode = null;
    //this container save all tag (or text) which won't do escape:
    //normally, each uniqueKey will mapping to one HTMLNode, but for OriginalTextRequest macro, which may contain multiple 
    //HTMLNodes, so that, the container use List<HTMLNode> as element.
    List<List<HTMLNode>> tagContainer = new ArrayList<List<HTMLNode>>();
    String multipleLineUniqueKeyS = null;
    String multipleLineUniqueKeyE = null;
    for (HTMLNode node : nodeContainer) {
        if (preReqCloseNode != null) {
            if (preReqCloseNode == node) {
                preReqCloseNode = null;
            }
            //skip all tags between preview text
            continue;
        }
        if (!node.isTextNode()) {
            List<HTMLNode> nodes = new ArrayList<HTMLNode>();
            if (node.isIdentified(orgTextMacroHTMLIds) && node.getPair() != null) {
                //this is preview, code or some others tags which Macro implements OriginalTextRequest
                //the put all tags/text surrounded by this preview tag(include its self pair) into tagContainer
                preReqCloseNode = node.getPair();
                for (HTMLNode preNode = node; preNode != null && preNode != node.getPair();) {
                    nodes.add(preNode);
                    preNode = preNode.next();
                }
                nodes.add(node.getPair());
            } else {
                nodes.add(node);
            }
            if (RenderUtil.isBlockTag(node)) {
                //for example start<p>[test]</p>end. If simply replace <p> and </p> with normal key, this [test] will treat
                //as normal character surrounded, actually, it is newline surrounded, so use multiple lines key
                if (!node.isCloseTag()) {
                    if (multipleLineUniqueKeyS == null)
                        multipleLineUniqueKeyS = "\n" + WikiUtil.findUniqueKey(htmlText) + "\n";
                    unescTextSb.append(multipleLineUniqueKeyS);
                } else {
                    if (multipleLineUniqueKeyE == null)
                        multipleLineUniqueKeyE = "\n" + WikiUtil.findUniqueKey(htmlText) + "\n";
                    unescTextSb.append(multipleLineUniqueKeyE);
                }
            } else {
                unescTextSb.append(uniqueKey);
            }
            tagContainer.add(nodes);
        } else {
            unescTextSb.append(node.getText());
        }
    }
    escText = unescTextSb.toString();
    if (!StringUtils.isBlank(escText)) {

        //do all markup filter escape, eg, *bold* \*bold\*
        escText = MarkupUtil.escapeMarkupToSlash(escText, uniqueKey);

        //recover HTMLNode list: reset all text node as their content may changed by conversion
        //this will split N+1 strings,N is tag number.
        if (multipleLineUniqueKeyS != null) {
            escText = StringUtils.replace(escText, multipleLineUniqueKeyS, uniqueKey);
        }
        if (multipleLineUniqueKeyE != null) {
            escText = StringUtils.replace(escText, multipleLineUniqueKeyE, uniqueKey);
        }

        String[] textNodes = StringUtils.splitByWholeSeparatorPreserveAllTokens(escText, uniqueKey);
        HTMLNodeContainer newNodeList = new HTMLNodeContainer();
        Iterator<List<HTMLNode>> iter = tagContainer.iterator();
        for (int idx = 0; idx < textNodes.length; idx++) {
            String text = textNodes[idx];
            if (!"".equals(text))
                newNodeList.add(new HTMLNode(text, true));
            //find first available non-text node, the replace the tag-token...
            if (idx < textNodes.length - 1 && iter.hasNext()) {
                List<HTMLNode> insertList = iter.next();
                for (HTMLNode insert : insertList) {
                    newNodeList.add(insert);
                }

            }
        }
        nodeContainer = newNodeList;
    }
    return nodeContainer;
}