Example usage for java.util.regex Matcher end

List of usage examples for java.util.regex Matcher end

Introduction

In this page you can find the example usage for java.util.regex Matcher end.

Prototype

public int end(String name) 

Source Link

Document

Returns the offset after the last character of the subsequence captured by the given named-capturing group during the previous match operation.

Usage

From source file:com.cyberway.issue.extractor.RegexpHTMLLinkExtractor.java

protected void processMeta(CharSequence cs) {
    Matcher attr = TextUtils.getMatcher(EACH_ATTRIBUTE_EXTRACTOR, cs);

    String name = null;/*from ww  w.  ja va 2s.  c o m*/
    String httpEquiv = null;
    String content = null;

    while (attr.find()) {
        int valueGroup = (attr.start(12) > -1) ? 12 : (attr.start(13) > -1) ? 13 : 14;
        CharSequence value = cs.subSequence(attr.start(valueGroup), attr.end(valueGroup));
        if (attr.group(1).equalsIgnoreCase("name")) {
            name = value.toString();
        } else if (attr.group(1).equalsIgnoreCase("http-equiv")) {
            httpEquiv = value.toString();
        } else if (attr.group(1).equalsIgnoreCase("content")) {
            content = value.toString();
        }
        // TODO: handle other stuff
    }
    TextUtils.recycleMatcher(attr);

    // Look for the 'robots' meta-tag
    if ("robots".equalsIgnoreCase(name) && content != null) {
        if (getHonorRobots()) {
            String contentLower = content.toLowerCase();
            if ((contentLower.indexOf("nofollow") >= 0 || contentLower.indexOf("none") >= 0)) {
                // if 'nofollow' or 'none' is specified and we
                // are honoring robots, end html extraction
                logger.fine("HTML extraction skipped due to robots meta-tag for: " + source);
                cancelFurtherExtraction();
                return;
            }
        }
    } else if ("refresh".equalsIgnoreCase(httpEquiv) && content != null) {
        String refreshUri = content.substring(content.indexOf("=") + 1);
        try {
            Link refreshLink = new Link(source, UURIFactory.getInstance(base, refreshUri),
                    Link.elementContext("meta", httpEquiv), Link.REFER_HOP);
            next.addLast(refreshLink);
        } catch (URIException e) {
            extractErrorListener.noteExtractError(e, source, refreshUri);
        }
    }
}

From source file:com.smartitengineering.cms.ws.resources.content.ContentResource.java

protected static void formFields(final Map<String, FieldDef> allDefs,
        final Map<String, List<FormDataBodyPart>> bodyParts,
        final Collection<com.smartitengineering.cms.ws.common.domains.Field> fields) {
    for (Entry<String, FieldDef> fieldDef : allDefs.entrySet()) {
        if (bodyParts != null && !bodyParts.isEmpty()) {
            if (LOGGER.isDebugEnabled()) {
                LOGGER.debug("Creating field for " + fieldDef.getKey() + " with type "
                        + fieldDef.getValue().getValueDef().getType());
            }//  ww w  .java 2 s  .  c  om
            FieldImpl fieldImpl = new FieldImpl();
            fieldImpl.setName(fieldDef.getKey());
            final boolean containsKey = bodyParts.containsKey(fieldDef.getKey());
            switch (fieldDef.getValue().getValueDef().getType()) {
            case COMPOSITE: {
                boolean hasCompositeValue = false;
                Map<String, List<FormDataBodyPart>> composites = new LinkedHashMap<String, List<FormDataBodyPart>>();
                final String prefix = new StringBuilder(fieldDef.getKey()).append('.').toString();
                for (String key : bodyParts.keySet()) {
                    if (key.startsWith(prefix)) {
                        hasCompositeValue = true;
                        composites.put(key.substring(prefix.length()), bodyParts.get(key));
                    }
                }
                if (hasCompositeValue) {
                    Collection<com.smartitengineering.cms.ws.common.domains.Field> composedFields = new ArrayList<com.smartitengineering.cms.ws.common.domains.Field>();
                    CompositeDataType compositeDataType = (CompositeDataType) fieldDef.getValue().getValueDef();
                    formFields(compositeDataType.getComposedFieldDefs(), composites, composedFields);
                    CompositeFieldValueImpl valueImpl = new CompositeFieldValueImpl();
                    valueImpl.setValuesAsCollection(composedFields);
                    valueImpl.setType(FieldValueType.COMPOSITE.name());
                    fieldImpl.setValue(valueImpl);
                }
                break;
            }
            case COLLECTION: {
                CollectionDataType collectionFieldDef = (CollectionDataType) fieldDef.getValue().getValueDef();
                CollectionFieldValueImpl fieldValueImpl = new CollectionFieldValueImpl();
                fieldValueImpl.setType(FieldValueType.COLLECTION.name());
                switch (collectionFieldDef.getItemDataType().getType()) {
                case COMPOSITE: {
                    boolean hasCompositeValue = false;
                    Map<String, Map<String, List<FormDataBodyPart>>> compositesCollection = new HashMap<String, Map<String, List<FormDataBodyPart>>>();
                    final String prefixPattern = new StringBuilder(fieldDef.getKey()).append("\\.([0-9]+)\\..+")
                            .toString();
                    Pattern pattern = Pattern.compile(prefixPattern);
                    for (String key : bodyParts.keySet()) {
                        Matcher matcher = pattern.matcher(key);
                        if (matcher.matches()) {
                            hasCompositeValue = true;
                            final Map<String, List<FormDataBodyPart>> composites;
                            String groupKey = matcher.group(1);
                            if (compositesCollection.containsKey(groupKey)) {
                                composites = compositesCollection.get(groupKey);
                            } else {
                                composites = new LinkedHashMap<String, List<FormDataBodyPart>>();
                                compositesCollection.put(groupKey, composites);
                            }
                            composites.put(key.substring(matcher.end(1) + 1), bodyParts.get(key));
                        }
                    }
                    if (hasCompositeValue) {
                        CompositeDataType compositeDataType = (CompositeDataType) collectionFieldDef
                                .getItemDataType();
                        for (Entry<String, Map<String, List<FormDataBodyPart>>> cols : compositesCollection
                                .entrySet()) {
                            Collection<com.smartitengineering.cms.ws.common.domains.Field> composedFields = new ArrayList<com.smartitengineering.cms.ws.common.domains.Field>();
                            formFields(compositeDataType.getComposedFieldDefs(), cols.getValue(),
                                    composedFields);
                            CompositeFieldValueImpl valueImpl = new CompositeFieldValueImpl();
                            valueImpl.setValuesAsCollection(composedFields);
                            valueImpl.setType(FieldValueType.COMPOSITE.name());
                            fieldValueImpl.getValues().add(valueImpl);
                        }
                    }
                    break;
                }
                default:
                    if (containsKey) {
                        for (FormDataBodyPart bodyPart : bodyParts.get(fieldDef.getKey())) {
                            if (bodyPart == null
                                    || org.apache.commons.lang.StringUtils.isBlank(bodyPart.getValue())) {
                                continue;
                            }
                            FieldValueImpl valueImpl = addFieldFromBodyPart(bodyPart,
                                    collectionFieldDef.getItemDataType());
                            if (valueImpl != null) {
                                fieldValueImpl.getValues().add(valueImpl);
                            }
                        }
                    }
                }
                if (fieldValueImpl.getValues().isEmpty()) {
                    continue;
                }
                fieldImpl.setValue(fieldValueImpl);
                break;
            }

            case OTHER: {
                if (containsKey) {
                    final FormDataBodyPart singleBodyPart = bodyParts.get(fieldDef.getKey()).get(0);
                    FieldValueImpl valueImpl = addFieldFromBodyPart(singleBodyPart,
                            fieldDef.getValue().getValueDef());
                    fieldImpl.setValue(valueImpl);
                }
                break;
            }
            default: {
                if (containsKey) {
                    final FormDataBodyPart singleBodyPart = bodyParts.get(fieldDef.getKey()).get(0);
                    if (singleBodyPart == null
                            || org.apache.commons.lang.StringUtils.isBlank(singleBodyPart.getValue())) {
                        continue;
                    }
                    FieldValueImpl valueImpl = addFieldFromBodyPart(singleBodyPart,
                            fieldDef.getValue().getValueDef());
                    fieldImpl.setValue(valueImpl);
                }
                break;
            }
            }
            fields.add(fieldImpl);
        }
    }
}

From source file:Repackage.java

public void repackageJavaFile(String name) throws IOException {
    File sourceFile = new File(_sourceBase, name);
    StringBuffer sb = readFile(sourceFile);

    Matcher packageMatcher = _packagePattern.matcher(sb);

    if (packageMatcher.find()) {
        String pkg = packageMatcher.group(1);
        int pkgStart = packageMatcher.start(1);
        int pkgEnd = packageMatcher.end(1);

        if (packageMatcher.find())
            throw new RuntimeException("Two package specifications found: " + name);

        List filePath = Repackager.splitPath(name, File.separatorChar);
        String srcDir = Repackager.dirForPath(name);

        // Sort the repackage spec so that longer from's are first to match
        // longest package first

        for (;;) {
            boolean swapped = false;

            for (int i = 1; i < filePath.size(); i++) {
                String spec1 = (String) filePath.get(i - 1);
                String spec2 = (String) filePath.get(i);

                if (spec1.indexOf(':') < spec2.indexOf(':')) {
                    filePath.set(i - 1, spec2);
                    filePath.set(i, spec1);

                    swapped = true;/*from   ww w.j  ava  2  s . c  o  m*/
                }
            }

            if (!swapped)
                break;
        }

        List pkgPath = Repackager.splitPath(pkg, '.');

        int f = filePath.size() - 2;

        if (f < 0 || (filePath.size() - 1) < pkgPath.size())
            throw new RuntimeException("Package spec differs from file path: " + name);

        for (int i = pkgPath.size() - 1; i >= 0; i--) {
            if (!pkgPath.get(i).equals(filePath.get(f)))
                throw new RuntimeException("Package spec differs from file path: " + name);
            f--;
        }

        List changeTo = null;
        List changeFrom = null;

        from: for (int i = 0; i < _fromPackages.size(); i++) {
            List from = (List) _fromPackages.get(i);

            if (from.size() <= pkgPath.size()) {
                for (int j = 0; j < from.size(); j++)
                    if (!from.get(j).equals(pkgPath.get(j)))
                        continue from;

                changeFrom = from;
                changeTo = (List) _toPackages.get(i);

                break;
            }
        }

        if (changeTo != null) {
            String newPkg = "";
            String newName = "";

            for (int i = 0; i < changeTo.size(); i++) {
                if (i > 0) {
                    newPkg += ".";
                    newName += File.separatorChar;
                }

                newPkg += changeTo.get(i);
                newName += changeTo.get(i);
            }

            for (int i = filePath.size() - pkgPath.size() - 2; i >= 0; i--)
                newName = (String) filePath.get(i) + File.separatorChar + newName;

            for (int i = changeFrom.size(); i < pkgPath.size(); i++) {
                newName += File.separatorChar + (String) pkgPath.get(i);
                newPkg += '.' + (String) pkgPath.get(i);
            }

            newName += File.separatorChar + (String) filePath.get(filePath.size() - 1);

            sb.replace(pkgStart, pkgEnd, newPkg);

            name = newName;
            String newDir = Repackager.dirForPath(name);

            if (!srcDir.equals(newDir)) {
                _movedDirs.put(srcDir, newDir);
            }
        }
    }

    File targetFile = new File(_targetBase, name); // new name

    if (sourceFile.lastModified() < targetFile.lastModified()) {
        _skippedFiles += 1;
        return;
    }

    writeFile(new File(_targetBase, name), _repackager.repackage(sb));
}

From source file:cn.dreampie.resource.LessSource.java

private String includeImportedContent(LessSource importedLessSource, Matcher importMatcher) {
    StringBuilder builder = new StringBuilder();
    builder.append(normalizedContent.substring(0, importMatcher.start(1)));

    String mediaQuery = importMatcher.group(8);
    if (mediaQuery != null && mediaQuery.length() > 0) {
        builder.append("@media");
        builder.append(mediaQuery);//w  w w . j a  v  a 2s . c o m
        builder.append("{\n");
    }
    builder.append(importedLessSource.getNormalizedContent());
    if (mediaQuery != null && mediaQuery.length() > 0) {
        builder.append("}\n");
    }
    builder.append(normalizedContent.substring(importMatcher.end(1)));
    return builder.toString();
}

From source file:com.haulmont.cuba.gui.config.WindowConfig.java

/**
 * Get screen information by screen ID.//from   w  w w  .ja  va2s.  c o  m
 *
 * @param id         screen ID as set up in <code>screens.xml</code>
 * @param deviceInfo target device info
 * @return screen's registration information or null if not found
 */
@Nullable
public WindowInfo findWindowInfo(String id, @Nullable DeviceInfo deviceInfo) {
    lock.readLock().lock();
    try {
        checkInitialized();

        List<WindowInfo> infos = screens.get(id);

        if (infos == null) {
            Matcher matcher = ENTITY_SCREEN_PATTERN.matcher(id);
            if (matcher.matches()) {
                MetaClass metaClass = metadata.getClass(matcher.group(1));
                if (metaClass == null) {
                    return null;
                }

                MetaClass originalMetaClass = metadata.getExtendedEntities().getOriginalMetaClass(metaClass);
                if (originalMetaClass != null) {
                    String originalId = new StringBuilder(id)
                            .replace(matcher.start(1), matcher.end(1), originalMetaClass.getName()).toString();
                    infos = screens.get(originalId);
                }
            }
        }

        List<WindowInfo> foundWindowInfos = infos;

        if (foundWindowInfos != null) {
            // do not perform stream processing in a simple case
            if (foundWindowInfos.size() == 1 && foundWindowInfos.get(0).getScreenAgent() == null) {
                return foundWindowInfos.get(0);
            }

            if (deviceInfo == null) {
                // find default screen
                return foundWindowInfos.stream().filter(windowInfo -> windowInfo.getScreenAgent() == null)
                        .findFirst().orElse(null);
            } else {
                return infos.stream().filter(
                        wi -> wi.getScreenAgent() != null && wi.getScreenAgent().isSupported(deviceInfo))
                        .findFirst()
                        .orElseGet(() -> foundWindowInfos.stream()
                                .filter(windowInfo -> windowInfo.getScreenAgent() == null).findFirst()
                                .orElse(null));
            }
        }

        return null;
    } finally {
        lock.readLock().unlock();
    }
}

From source file:com.cyberway.issue.extractor.RegexpHTMLLinkExtractor.java

protected boolean processGeneralTag(CharSequence element, CharSequence cs) {

    Matcher attr = TextUtils.getMatcher(EACH_ATTRIBUTE_EXTRACTOR, cs);

    // Just in case it's an OBJECT or APPLET tag
    String codebase = null;/*w  w w  . j  a  v  a  2s  . c  o  m*/
    ArrayList<String> resources = null;
    long tally = next.size();

    while (attr.find()) {
        int valueGroup = (attr.start(12) > -1) ? 12 : (attr.start(13) > -1) ? 13 : 14;
        int start = attr.start(valueGroup);
        int end = attr.end(valueGroup);
        CharSequence value = cs.subSequence(start, end);
        if (attr.start(2) > -1) {
            // HREF
            CharSequence context = Link.elementContext(element, attr.group(2));
            if (element.toString().equalsIgnoreCase(LINK)) {
                // <LINK> elements treated as embeds (css, ico, etc)
                processEmbed(value, context);
            } else {
                if (element.toString().equalsIgnoreCase(BASE)) {
                    try {
                        base = UURIFactory.getInstance(value.toString());
                    } catch (URIException e) {
                        extractErrorListener.noteExtractError(e, source, value);
                    }
                }
                // other HREFs treated as links
                processLink(value, context);
            }
        } else if (attr.start(3) > -1) {
            // ACTION
            CharSequence context = Link.elementContext(element, attr.group(3));
            processLink(value, context);
        } else if (attr.start(4) > -1) {
            // ON____
            processScriptCode(value); // TODO: context?
        } else if (attr.start(5) > -1) {
            // SRC etc.
            CharSequence context = Link.elementContext(element, attr.group(5));
            processEmbed(value, context);
        } else if (attr.start(6) > -1) {
            // CODEBASE
            // TODO: more HTML deescaping?
            codebase = TextUtils.replaceAll(ESCAPED_AMP, value, AMP);
            CharSequence context = Link.elementContext(element, attr.group(6));
            processEmbed(codebase, context);
        } else if (attr.start(7) > -1) {
            // CLASSID, DATA
            if (resources == null) {
                resources = new ArrayList<String>();
            }
            resources.add(value.toString());
        } else if (attr.start(8) > -1) {
            // ARCHIVE
            if (resources == null) {
                resources = new ArrayList<String>();
            }
            String[] multi = TextUtils.split(WHITESPACE, value);
            for (int i = 0; i < multi.length; i++) {
                resources.add(multi[i]);
            }
        } else if (attr.start(9) > -1) {
            // CODE
            if (resources == null) {
                resources = new ArrayList<String>();
            }
            // If element is applet and code value does not end with
            // '.class' then append '.class' to the code value.
            if (element.toString().toLowerCase().equals(APPLET)
                    && !value.toString().toLowerCase().endsWith(CLASSEXT)) {
                resources.add(value.toString() + CLASSEXT);
            } else {
                resources.add(value.toString());
            }

        } else if (attr.start(10) > -1) {
            // VALUE
            if (TextUtils.matches(LIKELY_URI_PATH, value)) {
                CharSequence context = Link.elementContext(element, attr.group(10));
                processLink(value, context);
            }

        } else if (attr.start(11) > -1) {
            // any other attribute
            // ignore for now
            // could probe for path- or script-looking strings, but
            // those should be vanishingly rare in other attributes,
            // and/or symptomatic of page bugs
        }
    }
    TextUtils.recycleMatcher(attr);

    // handle codebase/resources
    if (resources == null) {
        return (tally - next.size()) > 0;
    }
    Iterator iter = resources.iterator();
    UURI codebaseURI = null;
    String res = null;
    try {
        if (codebase != null) {
            // TODO: Pass in the charset.
            codebaseURI = UURIFactory.getInstance(base, codebase);
        }
        while (iter.hasNext()) {
            res = iter.next().toString();
            // TODO: more HTML deescaping?
            res = TextUtils.replaceAll(ESCAPED_AMP, res, AMP);
            if (codebaseURI != null) {
                res = codebaseURI.resolve(res).toString();
            }
            processEmbed(res, element); // TODO: include attribute too
        }
    } catch (URIException e) {
        extractErrorListener.noteExtractError(e, source, codebase);
    } catch (IllegalArgumentException e) {
        DevUtils.logger.log(Level.WARNING,
                "processGeneralTag()\n" + "codebase=" + codebase + " res=" + res + "\n" + DevUtils.extraInfo(),
                e);
    }
    return (tally - next.size()) > 0;
}

From source file:biz.astute.test.simulator.rest.RequestContext.java

/**
 * Return path portion of URL. The url may be modified to extract variables.
 *
 * @param globalProperties global properties
 * @return path portion of url/*from  w  w w  . j a  va  2 s  .c  o m*/
 * @throws UnsupportedEncodingException exception
 */
public final String getResourcePath(final Properties globalProperties) throws UnsupportedEncodingException {

    uriProperties.clear();
    String requestURI = URLDecoder.decode(request.getRequestURI(), "utf-8");
    Pattern[] currentPatterns = getPatterns(globalProperties);
    if (currentPatterns.length < 1) {
        return requestURI;
    }

    StringBuilder resourceName = new StringBuilder(requestURI);
    resourceName.append('/'); // Remove this later - need for matcher

    for (Pattern pattern : currentPatterns) {
        Matcher matcher = pattern.matcher(resourceName);
        if (matcher.matches() && (matcher.groupCount() > 0)) {
            for (int index = 1; index <= matcher.groupCount(); index++) {
                String matched = matcher.group(index);
                uriProperties.add(matched);
            }
            // Do so in reverse order so as to not affect offset
            for (int index = matcher.groupCount(); index > 0; index--) {
                resourceName.replace(matcher.start(index), matcher.end(index), StringUtils.EMPTY);
            }
            break;
        }
    }

    // remove '/' appended earlier
    resourceName.setLength(resourceName.length() - 1);
    // Remove any // that result from pattern replacement
    return resourceName.toString().replaceAll("//", "/");
}

From source file:com.icesoft.faces.component.style.OutputStyleRenderer.java

public void encodeEnd(FacesContext facesContext, UIComponent uiComponent) throws IOException {
    validateParameters(facesContext, uiComponent, OutputStyle.class);
    try {//from   w  w w .  j a  va  2  s  .co  m
        DOMContext domContext = DOMContext.attachDOMContext(facesContext, uiComponent);
        if (!domContext.isInitialized()) {
            OutputStyle outputStyle = (OutputStyle) uiComponent;
            Element styleEle = buildCssElement(domContext);
            String href = outputStyle.getHref();
            styleEle.setAttribute(HTML.HREF_ATTR, getResourceURL(facesContext, href));
            domContext.setRootNode(styleEle);
            int browserType = browserType(facesContext, uiComponent);
            if (browserType != DEFAULT_TYPE) {
                if (href.endsWith(CSS_EXTENTION)) {
                    int i = href.indexOf(CSS_EXTENTION);
                    if (i > 0) {
                        String start = href.substring(0, i);
                        Element ieStyleEle = buildCssElement(domContext);
                        String extention = IE_EXTENTION;
                        if (browserType == SAFARI) {
                            extention = SAFARI_EXTENTION;
                        }
                        if (browserType == DT) {
                            extention = DT_EXTENTION;
                        }
                        if (browserType == IE_7) {
                            extention = IE_7_EXTENTION;
                        }
                        if (browserType == IE_8) {
                            extention = IE_8_EXTENSION;
                        }
                        if (browserType == SAFARI_MOBILE) {
                            extention = SAFARI_MOBILE_EXTENTION;
                        }
                        if (browserType == OPERA) {
                            extention = OPERA_EXTENTION;
                        }
                        if (browserType == OPERA_MOBILE) {
                            extention = OPERA_MOBILE_EXTENTION;
                        }
                        String browserSpecificFilename = useSpecific(facesContext, start, extention);
                        if (browserSpecificFilename != null) {
                            // W3C spec: To make a style sheet preferred, set the rel attribute to "stylesheet" and name the style sheet with the title attribute
                            ieStyleEle.setAttribute(HTML.TITLE_ATTR, extention);
                            String hrefURL = CoreUtils.resolveResourceURL(facesContext,
                                    browserSpecificFilename);
                            ieStyleEle.setAttribute(HTML.HREF_ATTR, hrefURL);
                            styleEle.getParentNode().appendChild(ieStyleEle);
                        }
                    } else {
                        throw new RuntimeException("OutputStyle file attribute is too short. "
                                + "Needs at least one character before .css. Current Value is [" + href + "]");
                    }
                } else {
                    Matcher matcher = Pattern
                            .compile(".*javax\\.faces\\.resource/((.*)\\.css)(\\..*)?\\?ln=([^&]*)(&.*|$)")
                            .matcher(href);
                    if (matcher.matches()) {
                        Element ieStyleEle = buildCssElement(domContext);
                        String extension = browserType >= 0 && browserType < extensions.length
                                ? extensions[browserType]
                                : IE_EXTENTION;
                        ieStyleEle.setAttribute(HTML.TITLE_ATTR, extension);
                        String hrefURL = new StringBuffer(matcher.group(0)).insert(matcher.end(2), extension)
                                .toString();
                        ieStyleEle.setAttribute(HTML.HREF_ATTR, hrefURL);
                        String resourceName = new StringBuffer(matcher.group(1))
                                .insert(matcher.end(2) - matcher.start(2), extension).toString();
                        Resource resource = facesContext.getApplication().getResourceHandler()
                                .createResource(resourceName, matcher.group(4));
                        if (resource != null) {
                            styleEle.getParentNode().appendChild(ieStyleEle);
                        }
                    }
                }
            }

        }
        domContext.stepOver();
    } catch (Exception e) {
        log.error("Error in OutputStyleRenderer", e);
    }
}

From source file:com.cyberway.issue.crawler.extractor.ExtractorHTML.java

/**
 * Process metadata tags./*from  w  w w . ja  v a  2  s  . c  o m*/
 * @param curi CrawlURI we're processing.
 * @param cs Sequence from underlying ReplayCharSequence. This
 * is TRANSIENT data. Make a copy if you want the data to live outside
 * of this extractors' lifetime.
 * @return True robots exclusion metatag.
 */
protected boolean processMeta(CrawlURI curi, CharSequence cs) {
    Matcher attr = TextUtils.getMatcher(EACH_ATTRIBUTE_EXTRACTOR, cs);
    String name = null;
    String httpEquiv = null;
    String content = null;
    while (attr.find()) {
        int valueGroup = (attr.start(14) > -1) ? 14 : (attr.start(15) > -1) ? 15 : 16;
        CharSequence value = cs.subSequence(attr.start(valueGroup), attr.end(valueGroup));
        if (attr.group(1).equalsIgnoreCase("name")) {
            name = value.toString();
        } else if (attr.group(1).equalsIgnoreCase("http-equiv")) {
            httpEquiv = value.toString();
        } else if (attr.group(1).equalsIgnoreCase("content")) {
            content = value.toString();
        }
        // TODO: handle other stuff
    }
    TextUtils.recycleMatcher(attr);

    // Look for the 'robots' meta-tag
    if ("robots".equalsIgnoreCase(name) && content != null) {
        curi.putString(A_META_ROBOTS, content);
        RobotsHonoringPolicy policy = getSettingsHandler().getOrder().getRobotsHonoringPolicy();
        String contentLower = content.toLowerCase();
        if ((policy == null || (!policy.isType(curi, RobotsHonoringPolicy.IGNORE)
                && !policy.isType(curi, RobotsHonoringPolicy.CUSTOM)))
                && (contentLower.indexOf("nofollow") >= 0 || contentLower.indexOf("none") >= 0)) {
            // if 'nofollow' or 'none' is specified and the
            // honoring policy is not IGNORE or CUSTOM, end html extraction
            logger.fine("HTML extraction skipped due to robots meta-tag for: " + curi.toString());
            return true;
        }
    } else if ("refresh".equalsIgnoreCase(httpEquiv) && content != null) {
        int urlIndex = content.indexOf("=") + 1;
        if (urlIndex > 0) {
            String refreshUri = content.substring(urlIndex);
            try {
                curi.createAndAddLinkRelativeToBase(refreshUri, "meta", Link.REFER_HOP);
            } catch (URIException e) {
                if (getController() != null) {
                    getController().logUriError(e, curi.getUURI(), refreshUri);
                } else {
                    logger.info("Failed createAndAddLinkRelativeToBase " + curi + ", " + cs + ", " + refreshUri
                            + ": " + e);
                }
            }
        }
    }
    return false;
}

From source file:com.app.util.browser.BrowserSniffer.java

private ArrayList getMatches(Pattern pat, String str, int countGroups) {
    Matcher matcher = pat.matcher(str);
    ArrayList matches = new ArrayList();
    try {//from  w  ww.j a v  a 2  s. co m
        ArrayList groups = new ArrayList();
        while (matcher.find()) {
            groups.clear();
            int nullCount = 0;
            for (int i = 0; i < countGroups; i++) {
                int start = matcher.start(i);
                int end = matcher.end(i);
                if (start >= 0 && end >= 0) {
                    String sub = str.substring(start, end);
                    if (StringUtils.isNotEmpty(sub))
                        groups.add(sub);
                    else {
                        groups.add(null);
                        nullCount++;
                    }

                } else {
                    groups.add(null);
                    nullCount++;
                }
            }
            if (groups.size() > 0 && nullCount != groups.size())
                matches.add(groups.toArray(new String[groups.size()]));
        }
    } catch (Exception e) {
        log.error(e);
    }

    return matches;
}