Example usage for org.apache.commons.lang StringUtils splitByWholeSeparatorPreserveAllTokens

List of usage examples for org.apache.commons.lang StringUtils splitByWholeSeparatorPreserveAllTokens

Introduction

In this page you can find the example usage for org.apache.commons.lang StringUtils splitByWholeSeparatorPreserveAllTokens.

Prototype

public static String[] splitByWholeSeparatorPreserveAllTokens(String str, String separator) 

Source Link

Document

Splits the provided text into an array, separator string specified.

Usage

From source file:com.spotify.hdfs2cass.CassandraPartitioner.java

@SuppressWarnings("unchecked")
@Override//from  w w w  . ja  v a2s .com
public void configure(JobConf entries) {
    final String partitionerParam = entries.get(ClusterInfo.SPOTIFY_CASSANDRA_PARTITIONER_PARAM);
    if (partitionerParam == null) {
        throw new RuntimeException("Didn't get any cassandra partitioner information");
    }

    try {
        partitioner = (AbstractPartitioner<BigIntegerToken>) Class.forName(partitionerParam).newInstance();
    } catch (Exception ex) {
        throw new RuntimeException("Invalid partitioner class name: " + partitionerParam);
    }

    final String tokenNodesParam = entries.get(ClusterInfo.SPOTIFY_CASSANDRA_TOKENS_PARAM);
    if (tokenNodesParam == null) {
        throw new RuntimeException("Didn't get any cassandra information");
    }

    final String[] parts = StringUtils.splitByWholeSeparatorPreserveAllTokens(tokenNodesParam, ",");
    if ((parts == null) || (parts.length == 0)) {
        throw new RuntimeException("Didn't get any valid cassandra nodes information");
    }

    tokenNodes = new ArrayList<TokenNode>();
    for (String part : parts) {
        tokenNodes.add(new TokenNode(part));
    }

    Collections.sort(tokenNodes, new Comparator<TokenNode>() {
        @Override
        public int compare(TokenNode o1, TokenNode o2) {
            if (o1.equals(o2)) {
                return 0;
            }

            return o1.getStartToken().compareTo(o2.getStartToken());
        }
    });
}

From source file:com.act.lcms.db.io.parser.ConstructAnalysisFileParser.java

public void parse(File inFile) throws IOException {
    try (BufferedReader reader = new BufferedReader(new FileReader(inFile))) {

        String line;/*  w  ww  .  j  a  va 2s  .c  o  m*/
        String constructId = null;
        List<ConstructAssociatedChemical> products = null;
        while ((line = reader.readLine()) != null) {
            Matcher matcher = CONSTRUCT_DESIGNATOR_PATTERN.matcher(line);
            if (matcher.matches()) {
                if (constructId != null) {
                    handleConstructProductsList(constructId, products);
                }
                constructId = matcher.group(1).trim();
                products = new ArrayList<>();
            } else {
                if (constructId == null || products == null) {
                    throw new RuntimeException(
                            "Found construct product step line without a pre-defined construct");
                }
                String[] fields = StringUtils.splitByWholeSeparatorPreserveAllTokens(line,
                        PRODUCT_KIND_SEPARATOR);
                if (fields.length != 2) {
                    System.err.format("Skipping line with unexpected number of fields (%d): %s\n",
                            fields.length, line);
                    continue;
                }
                String chemical = fields[0];
                String kind = fields[1];
                products.add(new ConstructAssociatedChemical(chemical, kind));
            }
        }
        // Finish processing anything that's left over.
        if (constructId != null) {
            handleConstructProductsList(constructId, products);
        }
    }
}

From source file:com.ibm.bi.dml.runtime.io.IOUtilFunctions.java

/**
 * Splits a string by a specified delimiter into all tokens, including empty.
 * NOTE: This method is meant as a faster drop-in replacement of the regular 
 * string split.//from  www  . j  a  va2 s. c om
 * 
 * @param str
 * @param delim
 * @return
 */
public static String[] split(String str, String delim) {
    //note: split via stringutils faster than precompiled pattern / guava splitter

    //split by whole separator required for multi-character delimiters, preserve
    //all tokens required for empty cells and in order to keep cell alignment
    return StringUtils.splitByWholeSeparatorPreserveAllTokens(str, delim);
}

From source file:com.spotify.hdfs2cass.cassandra.utils.CassandraPartitioner.java

@Override
public void setConf(Configuration conf) {
    this.conf = conf;

    final String partitionerParam = conf.get(CassandraParams.SCRUB_CASSANDRACLUSTER_PARTITIONER_CONFIG);
    logger.info(CassandraParams.SCRUB_CASSANDRACLUSTER_PARTITIONER_CONFIG + ": " + partitionerParam);
    if (partitionerParam == null) {
        throw new RuntimeException("Didn't get any cassandra partitioner information");
    }/* w  ww . j  av  a 2 s  .  c o  m*/

    try {
        partitioner = (AbstractPartitioner) Class.forName(partitionerParam).newInstance();
    } catch (Exception ex) {
        throw new RuntimeException("Invalid partitioner class name: " + partitionerParam);
    }

    final String rangePerReducerStr = conf.get(CassandraParams.SCRUB_CASSANDRACLUSTER_RANGE_PER_REDUCER_CONFIG);
    if (rangePerReducerStr == null) {
        throw new RuntimeException("Didn't get cassandra range per reducer");
    }

    rangePerReducer = new BigInteger(rangePerReducerStr);

    final String reducersStr = conf.get(CassandraParams.SCRUB_CASSANDRACLUSTER_REDUCERS_CONFIG);
    if (reducersStr == null) {
        throw new RuntimeException("Failed to get list of reducers");
    }

    final String[] parts = StringUtils.splitByWholeSeparatorPreserveAllTokens(reducersStr, ",");
    if ((parts == null) || (parts.length == 0)) {
        throw new RuntimeException("Didn't get any valid list of reducers");
    }

    reducers = new ArrayList<>(parts.length);
    for (String part : parts) {
        reducers.add(Integer.parseInt(part));
    }

    distributeRandomly = conf.getBoolean(CassandraParams.SCRUB_CASSANDRACLUSTER_DISTRIBUTE_RANDOMLY_CONFIG,
            false);
    if (distributeRandomly) {
        random = new Random();
    }

    logger.info("CP: range per reducer: {}, reducers: {}, distribute randomly: {}",
            new Object[] { rangePerReducerStr, Arrays.toString(reducers.toArray()), distributeRandomly });
}

From source file:com.floreantpos.license.FiveStarPOSLicenseManager.java

/**
 * Get the template file based on command line arguments.
 * //w  w w.j  a va2  s  . c  om
 * @param args
 * @return File
 */
private static File getTemplateFile(String[] args) {
    if (args != null) {
        String templateFileUri = null;
        for (String string : args) {
            if (StringUtils.contains(string, "-t=")) {
                templateFileUri = StringUtils.splitByWholeSeparatorPreserveAllTokens(string, "-t=")[1];
                break;
            }
        }
        if (StringUtils.isNotBlank(templateFileUri)) {
            return new File(templateFileUri);
        }
    }
    return new File(TEMPLATE_FILE);
}

From source file:com.opengamma.id.UniqueId.java

/**
 * Parses a {@code UniqueId} from a formatted scheme and value.
 * <p>//from  w  w  w  .  j av a 2 s. co  m
 * This parses the identifier from the form produced by {@code toString()}
 * which is {@code <SCHEME>~<VALUE>~<VERSION>}.
 * 
 * @param str  the unique identifier to parse, not null
 * @return the unique identifier, not null
 * @throws IllegalArgumentException if the identifier cannot be parsed
 */
@FromString
public static UniqueId parse(String str) {
    ArgumentChecker.notEmpty(str, "str");
    if (str.contains("~") == false) {
        str = StringUtils.replace(str, "::", "~"); // leniently parse old data
    }
    String[] split = StringUtils.splitByWholeSeparatorPreserveAllTokens(str, "~");
    switch (split.length) {
    case 2:
        return UniqueId.of(split[0], split[1], null);
    case 3:
        return UniqueId.of(split[0], split[1], split[2]);
    }
    throw new IllegalArgumentException("Invalid identifier format: " + str);
}

From source file:com.aipo.container.gadgets.uri.AipoProxyUriManager.java

@SuppressWarnings("deprecation")
public ProxyUri process(Uri uriIn) throws GadgetException {
    UriStatus status = UriStatus.BAD_URI;
    Uri uri = null;/*from w ww  .  jav  a  2s . c om*/

    // First determine if the URI is chained-syntax or query-style.
    String container = uriIn.getQueryParameter(Param.CONTAINER.getKey());
    if (container == null) {
        container = uriIn.getQueryParameter(Param.SYND.getKey());
    }
    String uriStr = null;
    Uri queryUri = null;
    if (container != null && config.getString(container, PROXY_PATH_PARAM) != null
            && config.getString(container, PROXY_PATH_PARAM).equalsIgnoreCase(uriIn.getPath())) {
        // Query-style. Has container param and path matches.
        uriStr = uriIn.getQueryParameter(Param.URL.getKey());
        queryUri = uriIn;
    } else {
        // Check for chained query string in the path.
        String containerStr = Param.CONTAINER.getKey() + '=';
        String path = uriIn.getPath();
        // It is possible to get decoded url ('=' converted to %3d)
        // for example from CssResponseRewriter, so we should support it
        boolean doDecode = (!path.contains(containerStr));
        if (doDecode) {
            path = Utf8UrlCoder.decode(path);
        }
        int start = path.indexOf(containerStr);
        if (start > 0) {
            start += containerStr.length();
            int end = path.indexOf('&', start);
            if (end < start) {
                end = path.indexOf('/', start);
            }
            if (end > start) {
                // Looks like chained proxy syntax. Pull out params.
                container = path.substring(start, end);
            }
            if (container != null) {
                String proxyPath = config.getString(container, PROXY_PATH_PARAM);
                if (proxyPath != null) {
                    String[] chainedChunks = StringUtils.splitByWholeSeparatorPreserveAllTokens(proxyPath,
                            CHAINED_PARAMS_TOKEN);

                    // Parse out the URI of the actual resource. This URI is found as
                    // the
                    // substring of the "full" URI, after the chained proxy prefix. We
                    // first search for the pre- and post-fixes of the original
                    // /pre/%chained_params%/post
                    // ContainerConfig value, and take the URI as everything beyond that
                    // point.
                    String startToken = chainedChunks[0];
                    String endToken = "/";
                    if (chainedChunks.length == 2 && chainedChunks[1].length() > 0) {
                        endToken = chainedChunks[1];
                    }

                    // Pull URI out of original inUri's full representation.
                    String fullProxyUri = uriIn.toString();
                    int startIx = fullProxyUri.indexOf(startToken) + startToken.length();
                    int endIx = fullProxyUri.indexOf(endToken, startIx);
                    if (startIx > 0 && endIx > 0) {
                        String chainedQuery = fullProxyUri.substring(startIx, endIx);
                        if (doDecode) {
                            chainedQuery = Utf8UrlCoder.decode(chainedQuery);
                        }
                        queryUri = new UriBuilder().setQuery(chainedQuery).toUri();
                        uriStr = fullProxyUri.substring(endIx + endToken.length());
                        while (uriStr.startsWith("/")) {
                            uriStr = uriStr.substring(1);
                        }

                    }
                }
            }
        }
    }

    if (!strictParsing && container != null && StringUtils.isEmpty(uriStr)) {
        // Query-style despite the container being configured for chained style.
        uriStr = uriIn.getQueryParameter(Param.URL.getKey());
        queryUri = uriIn;
    }

    // Parameter validation.
    if (StringUtils.isEmpty(uriStr) || StringUtils.isEmpty(container)) {
        throw new GadgetException(GadgetException.Code.MISSING_PARAMETER,
                "Missing required parameter(s):" + (StringUtils.isEmpty(uriStr) ? ' ' + Param.URL.getKey() : "")
                        + (StringUtils.isEmpty(container) ? ' ' + Param.CONTAINER.getKey() : ""),
                HttpResponse.SC_BAD_REQUEST);
    }

    String queryHost = config.getString(container, PROXY_HOST_PARAM);
    if (strictParsing) {
        if (queryHost == null || !queryHost.equalsIgnoreCase(uriIn.getAuthority())) {
            throw new GadgetException(GadgetException.Code.INVALID_PATH, "Invalid proxy host",
                    HttpResponse.SC_BAD_REQUEST);
        }
    }

    try {
        uri = Uri.parse(uriStr);
    } catch (Exception e) {
        // NullPointerException or InvalidArgumentException.
        throw new GadgetException(GadgetException.Code.INVALID_PARAMETER,
                "Invalid " + Param.URL.getKey() + ": " + uriStr, HttpResponse.SC_BAD_REQUEST);
    }

    // URI is valid.
    status = UriStatus.VALID_UNVERSIONED;

    String version = queryUri.getQueryParameter(Param.VERSION.getKey());
    if (versioner != null && version != null) {
        status = versioner.validate(uri, container, version);
    }

    ProxyUri proxied = new ProxyUri(status, uri, queryUri);
    proxied.setHtmlTagContext(uriIn.getQueryParameter(Param.HTML_TAG_CONTEXT.getKey()));
    return proxied;
}

From source file:com.edgenius.wiki.render.filter.TableFilter.java

/**
 * @param line /* ww w . j a  v a 2s .c o  m*/
 * @return
 */
private String buildRow(String line, boolean head) {
    String sep;
    String tag;
    if (head) {
        //header
        sep = "||";
        tag = "th";
        line = trimSurronding(line);
    } else {
        sep = "|";
        tag = "td";
        line = trimSurronding(line);
    }
    StringBuffer sb = new StringBuffer();
    //split
    String[] columns = StringUtils.splitByWholeSeparatorPreserveAllTokens(line, sep);
    for (String col : columns) {
        sb.append("<" + tag + ">").append(col.trim().length() == 0 ? "&nbsp;" : col).append("</" + tag + ">");
    }
    return sb.toString();
}

From source file:com.ailk.oci.ocnosql.tools.load.csvbulkload.PhoenixCsvToKeyValueMapper.java

/**
 * ?csv???rowkey(hash?++md5?)/*  ww w  .j  av  a2 s  .c o m*/
 */
private String generateRowKey(String lineStr) {
    String rowkey = "";
    // 1.??(??)
    String[] lineArr = StringUtils.splitByWholeSeparatorPreserveAllTokens(lineStr, separator);
    rowGentemp.delete(0, rowGentemp.length());
    for (int idx : rowPrefixColIdxs) {
        rowGentemp.append(lineArr[idx]);
    }
    rowkey += rowKeyGenerator.generatePrefix(rowGentemp.toString());
    // 2.?(?)
    rowGentemp.delete(0, rowGentemp.length());
    for (int idx : rowColIdxs) {
        rowGentemp.append(lineArr[idx]);
    }
    rowkey += rowGentemp.toString();
    // 3.??(?,a.b.)

    if (null != unqIdxColIdxs) {// a.
        rowGentemp.delete(0, rowGentemp.length());
        for (int idx : unqIdxColIdxs) {
            rowGentemp.append(lineArr[idx]);
        }
        rowkey += getUniquePostfix(rowGentemp.toString());
    } else {// b.,
        rowkey += getUniquePostfix(lineStr);
    }
    // 4.
    return rowkey;
}

From source file:com.edgenius.wiki.render.impl.RichRenderEngineImpl.java

/**
 * @param htmlText/*from  w  w  w  .j a  v  a2s  . c o m*/
 * @param nodeContainer
 * @return
 */
private HTMLNodeContainer escapeText(String htmlText, HTMLNodeContainer nodeContainer) {
    //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    // all Text node need do HTMLEscape and markup escape conversion,e.g, &gt; to >, *bold* to \*bold\* etc.
    // if handle this textNode by textNode, it may run n * f times. n is text node number, f is filter number
    // so here do following: 
    // 1. convert HTMLNode list to string again but replace all non-text node to unique string
    // 1.1 As TinyMCE will save text into entity code even "entity_encoding" set to "raw", so even text inside
    // OriginalTextRequest, it still need do HTML unescapeHtml()
    // 1.2 But, the text which is surrounding by these tag which is identified by OriginalTextRequest, don't 
    // need do any escapeMarkupToSlash().
    // 2. do escape conversion
    // 3. replace back unique string pointer back to Tag node, reset all text node (it may has escape occur)

    StringBuffer unescTextSb = new StringBuffer();
    String uniqueKey = WikiUtil.findUniqueKey(htmlText);

    //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    // do all textnode HTML unescape
    //do HTML unescape,eg, &gt; to >; Although I only do limited entity escape(see EscapeUtil.escapeHTML()), 
    //but it is still safe use this method 
    for (HTMLNode node : nodeContainer) {
        if (node.isTextNode()) {
            unescTextSb.append(node.getText());
        } else {
            unescTextSb.append(uniqueKey);
        }
    }
    String escText = unescTextSb.toString();
    if (!StringUtils.isBlank(escText)) {
        escText = StringEscapeUtil.unescapeHtml(escText);

        //recover HTMLNode list: reset all text node as their content may changed by conversion
        //this will split N+1 strings,N is tag number.
        String[] textNodes = StringUtils.splitByWholeSeparatorPreserveAllTokens(escText, uniqueKey);
        HTMLNodeContainer newNodeList = new HTMLNodeContainer();
        Iterator<HTMLNode> iter = nodeContainer.iterator();
        for (int idx = 0; idx < textNodes.length; idx++) {
            String text = textNodes[idx];
            if (!"".equals(text))
                newNodeList.add(new HTMLNode(text, true));
            for (; idx < textNodes.length - 1 && iter.hasNext();) {
                HTMLNode node = iter.next();
                if (!node.isTextNode()) {
                    newNodeList.add(node);
                    break;
                }
            }
        }

        nodeContainer = newNodeList;
    }

    //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    // do markup unescape for only text is not inside OriginalTextRequest tag, such preview
    MacroFilter macroFilter = (MacroFilter) filterProvider.getFilter(MacroFilter.class.getName());
    List<HTMLNode> orgTextMacroHTMLIds = macroFilter.getImmutableHTMLIdenifiers();

    unescTextSb = new StringBuffer();
    HTMLNode preReqCloseNode = null;
    //this container save all tag (or text) which won't do escape:
    //normally, each uniqueKey will mapping to one HTMLNode, but for OriginalTextRequest macro, which may contain multiple 
    //HTMLNodes, so that, the container use List<HTMLNode> as element.
    List<List<HTMLNode>> tagContainer = new ArrayList<List<HTMLNode>>();
    String multipleLineUniqueKeyS = null;
    String multipleLineUniqueKeyE = null;
    for (HTMLNode node : nodeContainer) {
        if (preReqCloseNode != null) {
            if (preReqCloseNode == node) {
                preReqCloseNode = null;
            }
            //skip all tags between preview text
            continue;
        }
        if (!node.isTextNode()) {
            List<HTMLNode> nodes = new ArrayList<HTMLNode>();
            if (node.isIdentified(orgTextMacroHTMLIds) && node.getPair() != null) {
                //this is preview, code or some others tags which Macro implements OriginalTextRequest
                //the put all tags/text surrounded by this preview tag(include its self pair) into tagContainer
                preReqCloseNode = node.getPair();
                for (HTMLNode preNode = node; preNode != null && preNode != node.getPair();) {
                    nodes.add(preNode);
                    preNode = preNode.next();
                }
                nodes.add(node.getPair());
            } else {
                nodes.add(node);
            }
            if (RenderUtil.isBlockTag(node)) {
                //for example start<p>[test]</p>end. If simply replace <p> and </p> with normal key, this [test] will treat
                //as normal character surrounded, actually, it is newline surrounded, so use multiple lines key
                if (!node.isCloseTag()) {
                    if (multipleLineUniqueKeyS == null)
                        multipleLineUniqueKeyS = "\n" + WikiUtil.findUniqueKey(htmlText) + "\n";
                    unescTextSb.append(multipleLineUniqueKeyS);
                } else {
                    if (multipleLineUniqueKeyE == null)
                        multipleLineUniqueKeyE = "\n" + WikiUtil.findUniqueKey(htmlText) + "\n";
                    unescTextSb.append(multipleLineUniqueKeyE);
                }
            } else {
                unescTextSb.append(uniqueKey);
            }
            tagContainer.add(nodes);
        } else {
            unescTextSb.append(node.getText());
        }
    }
    escText = unescTextSb.toString();
    if (!StringUtils.isBlank(escText)) {

        //do all markup filter escape, eg, *bold* \*bold\*
        escText = MarkupUtil.escapeMarkupToSlash(escText, uniqueKey);

        //recover HTMLNode list: reset all text node as their content may changed by conversion
        //this will split N+1 strings,N is tag number.
        if (multipleLineUniqueKeyS != null) {
            escText = StringUtils.replace(escText, multipleLineUniqueKeyS, uniqueKey);
        }
        if (multipleLineUniqueKeyE != null) {
            escText = StringUtils.replace(escText, multipleLineUniqueKeyE, uniqueKey);
        }

        String[] textNodes = StringUtils.splitByWholeSeparatorPreserveAllTokens(escText, uniqueKey);
        HTMLNodeContainer newNodeList = new HTMLNodeContainer();
        Iterator<List<HTMLNode>> iter = tagContainer.iterator();
        for (int idx = 0; idx < textNodes.length; idx++) {
            String text = textNodes[idx];
            if (!"".equals(text))
                newNodeList.add(new HTMLNode(text, true));
            //find first available non-text node, the replace the tag-token...
            if (idx < textNodes.length - 1 && iter.hasNext()) {
                List<HTMLNode> insertList = iter.next();
                for (HTMLNode insert : insertList) {
                    newNodeList.add(insert);
                }

            }
        }
        nodeContainer = newNodeList;
    }
    return nodeContainer;
}