Example usage for java.util.regex Matcher regionStart

List of usage examples for java.util.regex Matcher regionStart

Introduction

In this page you can find the example usage for java.util.regex Matcher regionStart.

Prototype

public int regionStart() 

Source Link

Document

Reports the start index of this matcher's region.

Usage

From source file:com.application.utils.FastDateParser.java

/**
 * Initialize derived fields from defining fields.
 * This is called from constructor and from readObject (de-serialization)
 *
 * @param definingCalendar the {@link java.util.Calendar} instance used to initialize this FastDateParser
 *//*from w w  w  .  ja va  2s. co m*/
private void init(Calendar definingCalendar) {

    final StringBuilder regex = new StringBuilder();
    final List<Strategy> collector = new ArrayList<Strategy>();

    final Matcher patternMatcher = formatPattern.matcher(pattern);
    if (!patternMatcher.lookingAt()) {
        throw new IllegalArgumentException(
                "Illegal pattern character '" + pattern.charAt(patternMatcher.regionStart()) + "'");
    }

    currentFormatField = patternMatcher.group();
    Strategy currentStrategy = getStrategy(currentFormatField, definingCalendar);
    for (;;) {
        patternMatcher.region(patternMatcher.end(), patternMatcher.regionEnd());
        if (!patternMatcher.lookingAt()) {
            nextStrategy = null;
            break;
        }
        final String nextFormatField = patternMatcher.group();
        nextStrategy = getStrategy(nextFormatField, definingCalendar);
        if (currentStrategy.addRegex(this, regex)) {
            collector.add(currentStrategy);
        }
        currentFormatField = nextFormatField;
        currentStrategy = nextStrategy;
    }
    if (patternMatcher.regionStart() != patternMatcher.regionEnd()) {
        throw new IllegalArgumentException(
                "Failed to parse \"" + pattern + "\" ; gave up at index " + patternMatcher.regionStart());
    }
    if (currentStrategy.addRegex(this, regex)) {
        collector.add(currentStrategy);
    }
    currentFormatField = null;
    strategies = collector.toArray(new Strategy[collector.size()]);
    parsePattern = Pattern.compile(regex.toString());
}

From source file:au.org.ala.biocache.dao.SearchDAOImpl.java

/**
 * Format the search input query for a full-text search.
 *
 * This includes constructing a user friendly version of the query to
 * be used for display purposes./*from   w ww . j a v a2  s.c om*/
 * 
 * TODO Fix this to use a state.  REVISE!!
 *
 * @param searchParams
 */
protected void formatSearchQuery(SpatialSearchRequestParams searchParams, boolean forceQueryFormat) {
    //Only format the query if it doesn't already supply a formattedQuery.
    if (forceQueryFormat || StringUtils.isEmpty(searchParams.getFormattedQuery())) {
        // set the query
        String query = searchParams.getQ();

        //cached query parameters are already formatted
        if (query.contains("qid:")) {
            Matcher matcher = qidPattern.matcher(query);
            long qid = 0;
            while (matcher.find()) {
                String value = matcher.group();
                try {
                    String qidValue = SearchUtils.stripEscapedQuotes(value.substring(4));
                    qid = Long.parseLong(qidValue);
                    ParamsCacheObject pco = ParamsCache.get(qid);
                    if (pco != null) {
                        searchParams.setQId(qid);
                        searchParams.setQ(pco.getQ());
                        //add the fqs from the params cache
                        if (pco.getFqs() != null) {
                            String[] currentFqs = searchParams.getFq();
                            if (currentFqs == null || (currentFqs.length == 1 && currentFqs[0].length() == 0)) {
                                searchParams.setFq(pco.getFqs());
                            } else {
                                //we need to add the current Fqs together
                                searchParams.setFq((String[]) ArrayUtils.addAll(currentFqs, pco.getFqs()));
                            }
                        }
                        String displayString = pco.getDisplayString();

                        if (StringUtils.isNotEmpty(pco.getWkt())) {
                            displayString = displayString + " within user defined polygon";
                        }
                        searchParams.setDisplayString(displayString);

                        if (searchParams instanceof SpatialSearchRequestParams) {
                            ((SpatialSearchRequestParams) searchParams).setWkt(pco.getWkt());
                        } else if (StringUtils.isNotEmpty(pco.getWkt())) {
                            String originalQ = searchParams.getQ();
                            searchParams.setQ(spatialField + ":\"Intersects(" + pco.getWkt() + ")");
                            if (StringUtils.isNotEmpty(originalQ))
                                searchParams.setQ(searchParams.getQ() + " AND " + originalQ);
                        }
                        searchParams.setFormattedQuery(searchParams.getQ());
                        return;
                    }
                } catch (NumberFormatException e) {
                } catch (ParamsCacheMissingException e) {
                }
            }
        }
        StringBuffer queryString = new StringBuffer();
        StringBuffer displaySb = new StringBuffer();
        String displayString = query;

        // look for field:term sub queries and catch fields: matched_name & matched_name_children
        if (query.contains(":")) {
            // will match foo:bar, foo:"bar bash" & foo:bar\ bash
            Matcher matcher = termPattern.matcher(query);
            queryString.setLength(0);

            while (matcher.find()) {
                String value = matcher.group();
                logger.debug("term query: " + value);
                logger.debug("groups: " + matcher.group(1) + "|" + matcher.group(2));

                if ("matched_name".equals(matcher.group(1))) {
                    // name -> accepted taxon name (taxon_name:)
                    String field = matcher.group(1);
                    String queryText = matcher.group(2);

                    if (queryText != null && !queryText.isEmpty()) {
                        String guid = speciesLookupService.getGuidForName(queryText.replaceAll("\"", "")); // strip any quotes
                        logger.info("GUID for " + queryText + " = " + guid);

                        if (guid != null && !guid.isEmpty()) {
                            String acceptedName = speciesLookupService.getAcceptedNameForGuid(guid); // strip any quotes
                            logger.info("acceptedName for " + queryText + " = " + acceptedName);

                            if (acceptedName != null && !acceptedName.isEmpty()) {
                                field = "taxon_name";
                                queryText = acceptedName;
                            }
                        } else {
                            field = "taxon_name";
                        }

                        // also change the display query
                        displayString = displayString.replaceAll("matched_name", "taxon_name");
                    }

                    if (StringUtils.containsAny(queryText, CHARS) && !queryText.startsWith("[")) {
                        // quote any text that has spaces or colons but not range queries
                        queryText = QUOTE + queryText + QUOTE;
                    }

                    logger.debug("queryText: " + queryText);

                    matcher.appendReplacement(queryString, matcher.quoteReplacement(field + ":" + queryText));

                } else if ("matched_name_children".equals(matcher.group(1))) {
                    String field = matcher.group(1);
                    String queryText = matcher.group(2);

                    if (queryText != null && !queryText.isEmpty()) {
                        String guid = speciesLookupService.getGuidForName(queryText.replaceAll("\"", "")); // strip any quotes
                        logger.info("GUID for " + queryText + " = " + guid);

                        if (guid != null && !guid.isEmpty()) {
                            field = "lsid";
                            queryText = guid;
                        } else {
                            field = "taxon_name";
                        }
                    }

                    if (StringUtils.containsAny(queryText, CHARS) && !queryText.startsWith("[")) {
                        // quote any text that has spaces or colons but not range queries
                        queryText = QUOTE + queryText + QUOTE;
                    }

                    matcher.appendReplacement(queryString, matcher.quoteReplacement(field + ":" + queryText));
                } else {
                    matcher.appendReplacement(queryString, matcher.quoteReplacement(value));
                }
            }
            matcher.appendTail(queryString);
            query = queryString.toString();
        }

        //if the query string contains lsid: we will need to replace it with the corresponding lft range
        int last = 0;
        if (query.contains("lsid:")) {
            Matcher matcher = lsidPattern.matcher(query);
            queryString.setLength(0);
            while (matcher.find()) {
                //only want to process the "lsid" if it does not represent taxon_concept_lsid etc...
                if ((matcher.start() > 0 && query.charAt(matcher.start() - 1) != '_') || matcher.start() == 0) {
                    String value = matcher.group();
                    logger.debug("preprocessing " + value);
                    String lsid = matcher.group(2);
                    if (lsid.contains("\"")) {
                        //remove surrounding quotes, if present
                        lsid = lsid.replaceAll("\"", "");
                    }
                    if (lsid.contains("\\")) {
                        //remove internal \ chars, if present
                        //noinspection MalformedRegex
                        lsid = lsid.replaceAll("\\\\", "");
                    }
                    logger.debug("lsid = " + lsid);
                    String[] values = searchUtils.getTaxonSearch(lsid);
                    String lsidHeader = matcher.group(1).length() > 0 ? matcher.group(1) : "";
                    matcher.appendReplacement(queryString, lsidHeader + values[0]);
                    displaySb.append(query.substring(last, matcher.start()));
                    if (!values[1].startsWith("taxon_concept_lsid:"))
                        displaySb.append(lsidHeader).append("<span class='lsid' id='").append(lsid).append("'>")
                                .append(values[1]).append("</span>");
                    else
                        displaySb.append(lsidHeader).append(values[1]);
                    last = matcher.end();
                    //matcher.appendReplacement(displayString, values[1]);
                }
            }
            matcher.appendTail(queryString);
            displaySb.append(query.substring(last, query.length()));

            query = queryString.toString();
            displayString = displaySb.toString();
        }

        if (query.contains("urn")) {
            //escape the URN strings before escaping the rest this avoids the issue with attempting to search on a urn field
            Matcher matcher = urnPattern.matcher(query);
            queryString.setLength(0);
            while (matcher.find()) {
                String value = matcher.group();

                logger.debug("escaping lsid urns  " + value);
                matcher.appendReplacement(queryString, prepareSolrStringForReplacement(value));
            }
            matcher.appendTail(queryString);
            query = queryString.toString();
        }

        if (query.contains("Intersects")) {
            Matcher matcher = spatialPattern.matcher(query);
            if (matcher.find()) {
                String spatial = matcher.group();
                SpatialSearchRequestParams subQuery = new SpatialSearchRequestParams();
                logger.debug("region Start : " + matcher.regionStart() + " start :  " + matcher.start()
                        + " spatial length " + spatial.length() + " query length " + query.length());
                //format the search query of the remaining text only
                subQuery.setQ(query.substring(matcher.start() + spatial.length(), query.length()));
                //format the remaining query
                formatSearchQuery(subQuery);

                //now append Q's together
                queryString.setLength(0);
                //need to include the prefix
                queryString.append(query.substring(0, matcher.start()));
                queryString.append(spatial);
                queryString.append(subQuery.getFormattedQuery());
                searchParams.setFormattedQuery(queryString.toString());
                //add the spatial information to the display string
                if (spatial.contains("circles")) {
                    String[] values = spatial.substring(spatial.indexOf("=") + 1, spatial.indexOf("}"))
                            .split(",");
                    if (values.length == 3) {
                        displaySb.setLength(0);
                        displaySb.append(subQuery.getDisplayString());
                        displaySb.append(" - within ").append(values[2]).append(" km of point(")
                                .append(values[0]).append(",").append(values[1]).append(")");
                        searchParams.setDisplayString(displaySb.toString());
                    }

                } else {
                    searchParams.setDisplayString(subQuery.getDisplayString() + " - within supplied region");
                }
            }
        } else {
            //escape reserved characters unless the colon represnts a field name colon
            queryString.setLength(0);

            Matcher matcher = spacesPattern.matcher(query);
            while (matcher.find()) {
                String value = matcher.group();

                //special cases to ignore from character escaping
                //if the value is a single - or * it means that we don't want to escape it as it is likely to have occurred in the following situation -(occurrence_date:[* TO *]) or *:*
                if (!value.equals("-")
                        && /*!value.equals("*")  && !value.equals("*:*") && */ !value.endsWith("*")) {

                    //split on the colon
                    String[] bits = StringUtils.split(value, ":", 2);
                    if (bits.length == 2) {
                        if (!bits[0].contains("urn") && !bits[1].contains("urn\\"))
                            matcher.appendReplacement(queryString,
                                    bits[0] + ":" + prepareSolrStringForReplacement(bits[1]));

                    } else if (!value.endsWith(":")) {
                        //need to ignore field names where the : is at the end because the pattern matching will return field_name: as a match when it has a double quoted value
                        //default behaviour is to escape all 
                        matcher.appendReplacement(queryString, prepareSolrStringForReplacement(value));
                    }
                }
            }
            matcher.appendTail(queryString);

            //substitute better display strings for collection/inst etc searches
            if (displayString.contains("_uid")) {
                displaySb.setLength(0);
                String normalised = displayString.replaceAll("\"", "");
                matcher = uidPattern.matcher(normalised);
                while (matcher.find()) {
                    String newVal = "<span>"
                            + searchUtils.getUidDisplayString(matcher.group(1), matcher.group(2)) + "</span>";
                    if (newVal != null)
                        matcher.appendReplacement(displaySb, newVal);
                }
                matcher.appendTail(displaySb);
                displayString = displaySb.toString();
            }
            if (searchParams.getQ().equals("*:*")) {
                displayString = "[all records]";
            }
            if (searchParams.getLat() != null && searchParams.getLon() != null
                    && searchParams.getRadius() != null) {
                displaySb.setLength(0);
                displaySb.append(displayString);
                displaySb.append(" - within ").append(searchParams.getRadius()).append(" km of point(")
                        .append(searchParams.getLat()).append(",").append(searchParams.getLon()).append(")");
                displayString = displaySb.toString();

            }

            // substitute i18n version of field name, if found in messages.properties
            displayString = formatDisplayStringWithI18n(displayString);

            searchParams.setFormattedQuery(queryString.toString());
            logger.debug("formattedQuery = " + queryString);
            logger.debug("displayString = " + displayString);
            searchParams.setDisplayString(displayString);
        }

        //format the fq's for facets that need ranges substituted
        for (int i = 0; i < searchParams.getFq().length; i++) {
            String fq = searchParams.getFq()[i];
            String[] parts = fq.split(":", 2);
            //check to see if the first part is a range based query and update if necessary
            Map<String, String> titleMap = RangeBasedFacets.getTitleMap(parts[0]);
            if (titleMap != null) {
                searchParams.getFq()[i] = titleMap.get(parts[1]);
            }
        }
    }
    searchParams.setDisplayString(formatDisplayStringWithI18n(searchParams.getDisplayString()));
}

From source file:org.sqlite.date.FastDateParser.java

/**
 * Initialize derived fields from defining fields.
 * This is called from constructor and from readObject (de-serialization)
 *
 * @param definingCalendar the {@link java.util.Calendar} instance used to initialize this FastDateParser
 *///w ww  .  j  ava  2s .  c o  m
private void init(final Calendar definingCalendar) {

    final StringBuilder regex = new StringBuilder();
    final List<Strategy> collector = new ArrayList<Strategy>();

    final Matcher patternMatcher = formatPattern.matcher(pattern);
    if (!patternMatcher.lookingAt()) {
        throw new IllegalArgumentException(
                "Illegal pattern character '" + pattern.charAt(patternMatcher.regionStart()) + "'");
    }

    currentFormatField = patternMatcher.group();
    Strategy currentStrategy = getStrategy(currentFormatField, definingCalendar);
    for (;;) {
        patternMatcher.region(patternMatcher.end(), patternMatcher.regionEnd());
        if (!patternMatcher.lookingAt()) {
            nextStrategy = null;
            break;
        }
        final String nextFormatField = patternMatcher.group();
        nextStrategy = getStrategy(nextFormatField, definingCalendar);
        if (currentStrategy.addRegex(this, regex)) {
            collector.add(currentStrategy);
        }
        currentFormatField = nextFormatField;
        currentStrategy = nextStrategy;
    }
    if (patternMatcher.regionStart() != patternMatcher.regionEnd()) {
        throw new IllegalArgumentException(
                "Failed to parse \"" + pattern + "\" ; gave up at index " + patternMatcher.regionStart());
    }
    if (currentStrategy.addRegex(this, regex)) {
        collector.add(currentStrategy);
    }
    currentFormatField = null;
    strategies = collector.toArray(new Strategy[collector.size()]);
    parsePattern = Pattern.compile(regex.toString());
}

From source file:org.xchain.framework.jsl.TemplateSourceBuilder.java

/**
 * Parses an attribute value template into fixed and dynamic parts.  This list will always start with a fixed part and
 * then include alternating dynamic and fixed parts.
 *//*  w  ww  . j a  va 2s  .co  m*/
public static List<String> parseAttributeValueTemplate(String attributeValueTemplate) throws SAXException {
    // the result.
    ArrayList<String> result = new ArrayList<String>();

    // create the matcher.
    Matcher matcher = ATTRIBUTE_VALUE_TEMPLATE_PATTERN.matcher(attributeValueTemplate);

    while (matcher.find()) {
        String fixedPart = matcher.group(1);
        String dynamicPart = matcher.group(2);

        if (result.isEmpty() && fixedPart == null) {
            result.add("");
        }

        if (fixedPart != null) {
            result.add(fixedPart.replaceAll("\\{\\{", "{").replaceAll("\\}\\}", "}"));
        }
        if (dynamicPart != null) {
            result.add(dynamicPart);
        }
    }

    if (!matcher.hitEnd()) {
        throw new SAXException(
                "The attribute value template '" + attributeValueTemplate + "' has an error between characters "
                        + matcher.regionStart() + " and " + matcher.regionEnd() + ".");
    }

    return result;
}

From source file:org.xchain.framework.util.AttributesUtil.java

/**
 * Parses an attribute value template into fixed and dynamic parts.  This list will always start with a fixed part and
 * then include alternating dynamic and fixed parts.
 *///from ww  w  .ja v a 2  s . c  o m
public static List<String> parseAttributeValueTemplate(String attributeValueTemplate) throws SAXException {
    // the result.
    ArrayList<String> result = new ArrayList<String>();

    // create the matcher.
    Matcher matcher = attributeValueTemplatePattern.matcher(attributeValueTemplate);

    while (matcher.lookingAt()) {
        String fixedPart = matcher.group(1);
        String dynamicPart = matcher.group(2);

        if (result.isEmpty() && fixedPart == null) {
            result.add("");
        }

        if (fixedPart != null) {
            result.add(fixedPart.replaceAll("\\{\\{", "{").replaceAll("\\}\\}", "}"));
        }
        if (dynamicPart != null) {
            result.add(dynamicPart);
        }
        matcher.region(matcher.regionStart() + matcher.group().length(), matcher.regionEnd());
    }

    if (!matcher.hitEnd()) {
        throw new SAXException(
                "The attribute value template '" + attributeValueTemplate + "' has an error between characters "
                        + matcher.regionStart() + " and " + matcher.regionEnd() + ".");
    }

    return result;
}