Example usage for java.util.regex Pattern UNICODE_CASE

List of usage examples for java.util.regex Pattern UNICODE_CASE

Introduction

In this page you can find the example usage for java.util.regex Pattern UNICODE_CASE.

Prototype

int UNICODE_CASE

To view the source code for java.util.regex Pattern UNICODE_CASE.

Click Source Link

Document

Enables Unicode-aware case folding.

Usage

From source file:com.ikanow.infinit.e.harvest.enrichment.custom.UnstructuredAnalysisHarvester.java

private static Pattern createRegex(String regEx, String flags) {
    int nflags = 0;

    if (null != flags) {
        for (int i = 0; i < flags.length(); ++i) {
            char c = flags.charAt(i);
            switch (c) {
            case 'm':
                nflags |= Pattern.MULTILINE;
                break;
            case 'i':
                nflags |= Pattern.CASE_INSENSITIVE;
                break;
            case 'd':
                nflags |= Pattern.DOTALL;
                break;
            case 'u':
                nflags |= Pattern.UNICODE_CASE;
                break;
            case 'n':
                nflags |= Pattern.UNIX_LINES;
                break;
            }/*www. j a  v a 2s.  c om*/
        }
    }
    return Pattern.compile(regEx, nflags);
}

From source file:org.etudes.mneme.impl.AttachmentServiceImpl.java

/**
 * Collect all the attachment references in the html data:<br />
 * Anything referenced by a src= or href=. in our content docs, or in a site content area <br />
 * Ignore anything in a myWorkspace content area or the public content area. <br />
 *
 * @param data//from   ww w.ja v  a  2  s  . co  m
 *        The data string.
 * @param normalize
 *        if true, decode the references by URL decoding rules.
 * @param parentRef
 *        Reference string to the embedding (parent) resource - used to resolve relative references.
 * @return The set of attachment references.
 */
protected Set<String> harvestAttachmentsReferenced(String data, boolean normalize, String parentRef) {
    Set<String> rv = new HashSet<String>();
    if (data == null)
        return rv;

    // pattern to find any src= or href= text
    // groups: 0: the whole matching text 1: src|href 2: the string in the quotes 3: the terminator character
    Pattern p = Pattern.compile("(src|href)[\\s]*=[\\s]*\"([^#\"]*)([#\"])",
            Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE);

    Matcher m = p.matcher(data);
    while (m.find()) {
        if (m.groupCount() == 3) {
            String ref = m.group(2);

            if (ref != null)
                ref = ref.trim();

            // expand to a full reference if relative
            ref = adjustRelativeReference(ref, parentRef);

            // harvest any content hosting reference
            int index = ref.indexOf("/access/content/");
            if (index != -1) {
                // except for any in /user/ or /public/
                if (ref.indexOf("/access/content/user/") != -1) {
                    index = -1;
                } else if (ref.indexOf("/access/content/public/") != -1) {
                    index = -1;
                }
            }

            // harvest also the mneme docs references
            if (index == -1)
                index = ref.indexOf("/access/mneme/content/");

            // TODO: further filter to docs root and context (optional)
            if (index != -1) {
                // save just the reference part (i.e. after the /access);
                String refString = ref.substring(index + 7);

                // deal with %20 and other encoded URL stuff
                if (normalize) {
                    refString = decodeUrl(refString);
                }

                rv.add(refString);
            }
        }
    }

    return rv;
}

From source file:org.etudes.mneme.impl.AttachmentServiceImpl.java

/**
 * {@inheritDoc}//from w  w  w.j a v a  2 s.c  om
 */
protected String translateEmbeddedReferences(String data, Collection<Translation> translations,
        String parentRef) {
    if (data == null)
        return data;
    if (translations == null)
        return data;

    // pattern to find any src= or href= text
    // groups: 0: the whole matching text 1: src|href 2: the string in the quotes 3: the terminator character
    Pattern p = Pattern.compile("(src|href)[\\s]*=[\\s]*\"([^#\"]*)([#\"])",
            Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE);

    Matcher m = p.matcher(data);
    StringBuffer sb = new StringBuffer();

    // process each "harvested" string (avoiding like strings that are not in src= or href= patterns)
    while (m.find()) {
        if (m.groupCount() == 3) {
            String ref = m.group(2);
            String terminator = m.group(3);

            if (ref != null)
                ref = ref.trim();

            // expand to a full reference if relative
            ref = adjustRelativeReference(ref, parentRef);

            // harvest any content hosting reference
            int index = ref.indexOf("/access/content/");
            if (index != -1) {
                // except for any in /user/ or /public/
                if (ref.indexOf("/access/content/user/") != -1) {
                    index = -1;
                } else if (ref.indexOf("/access/content/public/") != -1) {
                    index = -1;
                }
            }

            // harvest also the mneme docs references
            if (index == -1)
                index = ref.indexOf("/access/mneme/content/");

            if (index != -1) {
                // save just the reference part (i.e. after the /access);
                String normal = ref.substring(index + 7);

                // deal with %20, &amp;, and other encoded URL stuff
                normal = decodeUrl(normal);

                // translate the normal form
                String translated = normal;
                for (Translation translation : translations) {
                    translated = translation.translate(translated);
                }

                // URL encode translated
                String escaped = EscapeRefUrl.escapeUrl(translated);

                // if changed, replace
                if (!normal.equals(translated)) {
                    m.appendReplacement(sb, Matcher.quoteReplacement(
                            m.group(1) + "=\"" + ref.substring(0, index + 7) + escaped + terminator));
                }
            }
        }
    }

    m.appendTail(sb);
    return sb.toString();
}

From source file:org.sakaiproject.tool.assessment.services.GradingService.java

public boolean fibmatch(String answer, String input, boolean casesensitive) {

    try {/*from   www  . jav  a 2s .  c  o m*/
        StringBuilder regex_quotebuf = new StringBuilder();

        String REGEX = answer.replaceAll("\\*", "|*|");
        String[] oneblank = REGEX.split("\\|");
        for (int j = 0; j < oneblank.length; j++) {
            if ("*".equals(oneblank[j])) {
                regex_quotebuf.append(".+");
            } else {
                regex_quotebuf.append(Pattern.quote(oneblank[j]));
            }
        }

        String regex_quote = regex_quotebuf.toString();
        Pattern p;
        if (casesensitive) {
            p = Pattern.compile(regex_quote);
        } else {
            p = Pattern.compile(regex_quote, Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE);
        }
        Matcher m = p.matcher(input);
        boolean result = m.matches();
        return result;

    } catch (Exception e) {
        return false;
    }
}

From source file:lineage2.gameserver.Config.java

/**
 * Method abuseLoad.//w w  w. j a va  2s . c  om
 */
public static void abuseLoad() {
    List<Pattern> tmp = new ArrayList<Pattern>();
    LineNumberReader lnr = null;
    try {
        String line;
        lnr = new LineNumberReader(new InputStreamReader(new FileInputStream(ANUSEWORDS_CONFIG_FILE), "UTF-8"));
        while ((line = lnr.readLine()) != null) {
            StringTokenizer st = new StringTokenizer(line, "\n\r");
            if (st.hasMoreTokens()) {
                tmp.add(Pattern.compile(".*" + st.nextToken() + ".*",
                        Pattern.DOTALL | Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE));
            }
        }
        ABUSEWORD_LIST = tmp.toArray(new Pattern[tmp.size()]);
        tmp.clear();
        _log.info("Abuse: Loaded " + ABUSEWORD_LIST.length + " abuse words.");
    } catch (IOException e1) {
        _log.warn("Error reading abuse: " + e1);
    } finally {
        try {
            if (lnr != null) {
                lnr.close();
            }
        } catch (Exception e2) {
        }
    }
}

From source file:org.etudes.jforum.view.admin.ImportExportAction.java

/**
 * parse export content resource reference urls
 * @param message//from  w  ww  .  j a v a2s.  co  m
 *             - messge   
 * @param ref
 *             - reference
 * @param parentPath
 *             - parent path
 * @return
 *             - modified content
 */
private String parseExportContentResourceUrls(String message, String ref, String parentPath) {
    ref = Validator.escapeUrl(ref);

    // file name with spaces doesn't have %20 for spaces
    // get file name
    /*This may not be needed as spaces have %20
     String fileName = ref.substring(ref.lastIndexOf("/") + 1);
            
    try
    {
       fileName = URLDecoder.decode(fileName, "UTF-8");
    }
    catch (UnsupportedEncodingException e)
    {
       if (logger.isWarnEnabled()) logger.warn("parseExportContentResourceUrls: " + e);
    }*/

    //ref = ref.substring(0, ref.lastIndexOf("/") + 1) + fileName;

    parentPath = Validator.escapeUrl(parentPath);

    StringBuffer sb = new StringBuffer();

    Pattern p = Pattern.compile("(src|href)[\\s]*=[\\s]*[\\\"'](/access" + ref + ")[\\\"']",
            Pattern.CASE_INSENSITIVE | Pattern.MULTILINE | Pattern.UNICODE_CASE);

    Matcher m = p.matcher(message);
    while (m.find()) {
        if (m.groupCount() == 2) {
            String refMatch = m.group(2);
            if (parentPath == null || parentPath.trim().length() == 0) {
                String siteId = ToolManager.getCurrentPlacement().getContext();
                refMatch = refMatch.substring(("/access/content/group/" + siteId).length() + 1);
            } else {

                if (refMatch.indexOf(parentPath) == -1) {
                    String siteId = ToolManager.getCurrentPlacement().getContext();
                    refMatch = refMatch.substring(("/access/content/group/" + siteId).length() + 1);

                    String pathRef[] = parentPath.split("/");

                    StringBuilder refPath = new StringBuilder();

                    for (int i = 0; i < (pathRef.length - 1); i++) {
                        refPath.append("../");
                    }
                    refMatch = refPath.toString() + refMatch;
                } else {
                    int index = refMatch.indexOf(parentPath);
                    refMatch = refMatch.substring(index + parentPath.length() + 1);
                }
            }

            /*String fileName1 = null;
            boolean escapeFilePath = false;
                    
            try
            {
               if (logger.isDebugEnabled()) logger.debug("parseExportContentResourceUrls: refMatch :"+ refMatch);
                              
               if (refMatch.lastIndexOf("/") != -1)
               {
                  fileName1 = refMatch.substring(refMatch.lastIndexOf("/")+1);
                  refMatch = refMatch.substring(0, refMatch.lastIndexOf("/")+1);
                          
                  if (logger.isDebugEnabled()) logger.debug("parseExportContentResourceUrls: refMatch sub string :"+ refMatch);
                          
                  fileName1 = URLDecoder.decode(fileName1, "UTF-8");
                  escapeFilePath = true;
               }
            }
            catch (UnsupportedEncodingException e)
            {
               if (logger.isWarnEnabled()) logger.warn("parseExportContentResourceUrls: " + e);
            }
                    
            if (escapeFilePath)
            {
               m.appendReplacement(sb, Matcher.quoteReplacement(m.group(1)+ "=\""+ refMatch + fileName1 +"\""));
            }
            else
               m.appendReplacement(sb, Matcher.quoteReplacement(m.group(1)+ "=\""+ refMatch + "\""));*/
            m.appendReplacement(sb, Matcher.quoteReplacement(m.group(1) + "=\"" + refMatch + "\""));

        }
    }
    m.appendTail(sb);

    return sb.toString();
}

From source file:org.etudes.jforum.view.admin.ImportExportAction.java

/**
 * Create the embedded reference detection pattern. It creates three groups: 0 - the entire matc, 1- src|href, 2-server url up to access/content/...., 3-siteid/refwithfolders.
 * /*from  ww  w.  ja  va2s . c  o  m*/
 * @return The Pattern.
 */
private Pattern getExportContentResourcePattern() {
    return Pattern.compile("(src|href)[\\s]*=[\\s]*[\\\"'](.*?)/access/content/group/([^\"]*)[\\\"']",
            Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE);
}

From source file:org.etudes.jforum.view.admin.ImportExportAction.java

/**
 * Create the embedded reference detection pattern. It creates three groups: 0 - the entire matc, 1- src|href, 2-the reference.
 * //  ww  w  .jav a  2s  .  co m
 * @return The Pattern.
 */
private Pattern getContentResourcePattern() {
    return Pattern.compile("(src|href)[\\s]*=[\\s]*[\\\"']embeded_jf_content/content/group([^\"]*)[\\\"']",
            Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE);
}

From source file:org.etudes.jforum.view.admin.ImportExportAction.java

/**
 * Create the embedded reference detection pattern. It creates three groups: 0 - the entire match, 1- src|href, 2-the reference.
 * /*from  w  ww  .j a  v a 2  s.  c om*/
 * @return The Pattern.
 */
private Pattern getEmbeddedContentResourcePattern() {
    return Pattern.compile("(src|href)[\\s]*=[\\s]*[\\\"'](?!http|www|file)([^\"]*)[\\\"']",
            Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE);
}

From source file:org.etudes.jforum.view.admin.ImportExportAction.java

/**
 * Create the embedded reference detection pattern. It creates three groups: 0 - the entire matc, 1- src|href, 2-the reference.
 * /*from   w  w w. ja  v  a  2s . com*/
 * @return The Pattern.
 */
private Pattern getMeleteResourcePattern() {
    return Pattern.compile(
            "(src|href)[\\s]*=[\\s]*[\\\"']embeded_jf_content/meleteDocs/content/private/meleteDocs([^\"]*)[\\\"']",
            Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE);
}