Example usage for org.apache.commons.lang StringUtils isAlphanumeric

List of usage examples for org.apache.commons.lang StringUtils isAlphanumeric

Introduction

In this page you can find the example usage for org.apache.commons.lang StringUtils isAlphanumeric.

Prototype

public static boolean isAlphanumeric(String str) 

Source Link

Document

Checks if the String contains only unicode letters or digits.

Usage

From source file:gov.nih.nci.evs.browser.utils.DataUtils.java

public static String encode_term(String s) {
    if (s == null)
        return null;
    if (StringUtils.isAlphanumeric(s))
        return s;
    StringBuilder buf = new StringBuilder();
    for (int i = 0; i < s.length(); i++) {
        char c = s.charAt(i);
        if (c == 60) {
            buf.append("&lt; ");
        } else if (c == 62) {
            buf.append("&gt;");
        } else if (c == 38) {
            buf.append("&amp;");
        } else if (c == 32) {
            buf.append("&#32;");
        } else {//from   w ww  .j  a  v  a 2 s.  c om
            buf.append(c);
        }
    }
    String t = buf.toString();
    return t;
}

From source file:opennlp.tools.apps.object_dedup.SimilarityAccessorBase.java

protected List<String> removeDollarWordAndNonAlphaFromList(List<String> list) {
    List<String> result = new ArrayList<String>();
    Pattern p = Pattern.compile("^\\$(\\d{1,3}(\\,\\d{3})*|(\\d+))(\\.\\d{2})?$");
    for (String w : list) {
        if (!(p.matcher(w).find()) && StringUtils.isAlphanumeric(w)
                && (w.length() >= 3 || !StringUtils.isAlpha(w)))
            result.add(w);/*from w w w . j  a  v  a2s  . co m*/
    }
    return result;
}

From source file:opennlp.tools.apps.object_dedup.SimilarityAccessorBase.java

public List<String> removeVenuePart(ArrayList<String> toks) {
    List<String> results = new ArrayList<String>();
    boolean bVenuePart = false;
    for (String word : toks) {
        // beginning of venue part
        if (word.equals("at") || word.equals("@"))
            bVenuePart = true;/*ww  w . j a va  2  s.  c  om*/
        // end of venue part
        if (!StringUtils.isAlphanumeric(word) || word.startsWith("<punc"))
            bVenuePart = false;

        if (!bVenuePart && !word.startsWith("<punc"))
            results.add(word);

    }
    return results;
}

From source file:opennlp.tools.apps.object_dedup.SimilarityAccessorBase.java

protected boolean isCapitalized(String lookup) {
    String[] titleWords = lookup.split(" ");
    int count = 0;
    for (String word : titleWords) {
        if (word.length() < 2) // '-', '|', ':'
            break;

        if (word.equals(word.toLowerCase()) && (!Arrays.asList(englishPrepositions).contains(word))
                && word.length() > 3 && StringUtils.isAlphanumeric(word))
            continue; // was return false;
        if (count > 3)
            break;
        count++;//from  w  ww  . j  a  va 2 s  .c o m
    }
    return true;
}

From source file:opennlp.tools.parse_thicket.opinion_processor.StopList.java

public static List<List<String>> preFilterCommonEnglishExpressions(List<String> userLikes) {
    List<List<String>> results = new ArrayList<List<String>>();

    List<String> resultUserLikes = new ArrayList<String>(), potentialCategs = new ArrayList<String>();
    if (userLikes.size() < 6) {// too short, do not filter
        results.add(userLikes);/* ww  w. j av  a 2  s . c om*/
        results.add(potentialCategs);
        return results;

    }

    for (String like : userLikes) {
        like = like.toLowerCase();
        if (!StringUtils.isAlphanumeric(like.replace(" ", ""))) {
            logger.info("removed isAlphanumeric " + like);
            continue;
        }

        if (StringUtils.isNumeric(like)) {
            logger.info("removed isNumericSpace " + like);
            continue;
        }

        if (like.length() < 4) {
            logger.info("removed too short likes " + like);
            continue;
        }
        boolean existFirstName = false, allWordsCommonEnglish = true, bStop = false;
        String[] comps = like.split(" ");
        StringBuffer buf = new StringBuffer();
        for (String word : comps) {
            boolean isCommon = isCommonWord(word);
            boolean isName = isFirstName(word);
            if (!isCommon)
                allWordsCommonEnglish = false;
            if (isName)
                existFirstName = true;
            if (isStopWord(word) || word.length() < 3)
                bStop = true;
            else
                buf.append(word + " ");
        } // / does not have to include stop word
        if (!existFirstName && allWordsCommonEnglish && comps.length < 3) {
            logger.info("moved to category:  NoFirstName+AllCommonEng+ShorterThan3 " + like);

            continue;
        }
        if (!existFirstName && allWordsCommonEnglish && comps.length == 1) {
            logger.info("moved to category: NoFirstName+AllCommonEng+Short1word " + like);
            potentialCategs.add(like);
            continue;
        }

        if (existFirstName && comps.length == 1) {
            logger.info("removed : only first name, no last name " + like);

            continue;
        }

        resultUserLikes.add(buf.toString().trim());

    }

    resultUserLikes = new ArrayList<String>(new HashSet<String>(resultUserLikes));
    if (resultUserLikes.size() > 1) {
        results.add(resultUserLikes);
        results.add(potentialCategs);
        return results;
    }

    else {// do not do reduction
        results.add(userLikes);
        results.add(potentialCategs);
        return results;
    }
}

From source file:opennlp.tools.parse_thicket.opinion_processor.StopList.java

public static boolean isAcceptableIndividualLikes(String like) {
    StopList finder = StopList.getInstance();
    like = like.toLowerCase();/*from   w ww .j av  a  2 s.  co m*/
    if (!StringUtils.isAlphanumeric(like.replace(" ", ""))) {
        logger.info("removed isAlphanumeric " + like);
        return false;
    }

    if (StringUtils.isNumeric(like)) {
        logger.info("removed isNumericSpace " + like);
        return false;
    }

    if (like.length() < 4) {
        logger.info("removed too short likes " + like);
        return false;
    }
    boolean existFirstName = false, allWordsCommonEnglish = true, bStop = false;
    String[] comps = like.split(" ");
    StringBuffer buf = new StringBuffer();
    for (String word : comps) {
        boolean isCommon = finder.isCommonWord(word);
        boolean isName = finder.isFirstName(word);
        if (!isCommon)
            allWordsCommonEnglish = false;
        if (isName)
            existFirstName = true;
        if (finder.isStopWord(word) || word.length() < 3)
            bStop = true;
        else
            buf.append(word + " ");
    } // / does not have to include stop word
    if (!existFirstName && allWordsCommonEnglish && comps.length < 3) {
        logger.info("  NoFirstName+AllCommonEng+ShorterThan3 " + like);

        return false;
    }
    if (!existFirstName && allWordsCommonEnglish && comps.length == 1) {
        logger.info(" NoFirstName+AllCommonEng+Short1word " + like);

        return false;
    }

    if (existFirstName && comps.length == 1) {
        logger.info("removed : only first name, no last name " + like);

        return false;
    }

    return true;
}

From source file:opennlp.tools.similarity.apps.solr.WordDocBuilderEndNotes.java

public String buildWordDoc(List<HitBase> content, String title) {

    String outputDocFinename = absPath + "written/" + title.replace(' ', '_').replace('\"', ' ').trim()
            + ".docx";

    WordprocessingMLPackage wordMLPackage = null;

    List<String> imageURLs = getAllImageSearchResults(title);
    int count = 0;
    BigInteger refId = BigInteger.ONE;
    try {/*  w ww.ja v a  2  s  .  c o  m*/
        wordMLPackage = WordprocessingMLPackage.createPackage();

        CTEndnotes endnotes = null;
        try {
            EndnotesPart ep = new EndnotesPart();
            endnotes = Context.getWmlObjectFactory().createCTEndnotes();
            ep.setJaxbElement(endnotes);
            wordMLPackage.getMainDocumentPart().addTargetPart(ep);
        } catch (InvalidFormatException e1) {
            // TODO Auto-generated catch block
            e1.printStackTrace();
        }

        wordMLPackage.getMainDocumentPart().addStyledParagraphOfText("Title", title.toUpperCase());
        for (HitBase para : content) {
            if (para.getFragments() == null || para.getFragments().size() < 1) // no found content in this hit
                continue;
            try {
                String processedParaTitle = processParagraphTitle(para.getTitle());

                if (processedParaTitle != null && !processedParaTitle.endsWith("..")
                        || StringUtils.isAlphanumeric(processedParaTitle)) {
                    wordMLPackage.getMainDocumentPart().addStyledParagraphOfText("Subtitle",
                            processedParaTitle);
                }
                String paraText = processParagraphText(para.getFragments().toString());
                wordMLPackage.getMainDocumentPart().addParagraphOfText(paraText);

                CTFtnEdn endnote = Context.getWmlObjectFactory().createCTFtnEdn();
                endnotes.getEndnote().add(endnote);

                endnote.setId(refId);
                refId.add(BigInteger.ONE);
                String url = para.getUrl();
                String endnoteBody = "<w:p xmlns:w=\"http://schemas.openxmlformats.org/wordprocessingml/2006/main\" ><w:pPr><w:pStyle w:val=\"EndnoteText\"/></w:pPr><w:r><w:rPr>"
                        + "<w:rStyle w:val=\"EndnoteReference\"/></w:rPr><w:endnoteRef/></w:r><w:r><w:t xml:space=\"preserve\"> "
                        + url + "</w:t></w:r></w:p>";
                try {
                    endnote.getEGBlockLevelElts().add(XmlUtils.unmarshalString(endnoteBody));
                } catch (JAXBException e) {
                    // TODO Auto-generated catch block
                    e.printStackTrace();
                }

                // Add the body text referencing it
                String docBody = "<w:p xmlns:w=\"http://schemas.openxmlformats.org/wordprocessingml/2006/main\" ><w:r><w:t>"//+ paraText
                        /*+ refId.toString()*/ + "</w:t></w:r><w:r><w:rPr><w:rStyle w:val=\"EndnoteReference\"/></w:rPr><w:endnoteReference w:id=\""
                        + refId.toString() + "\"/></w:r></w:p>";

                try {
                    wordMLPackage.getMainDocumentPart().addParagraph(docBody);
                } catch (JAXBException e) {
                    // TODO Auto-generated catch block
                    e.printStackTrace();
                }

                try {
                    addImageByImageURLToPackage(count, wordMLPackage, imageURLs);
                } catch (Exception e) {
                    // no need to report issues
                    //e.printStackTrace();
                }
            } catch (Exception e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            }
            count++;
        }
        // now add URLs
        wordMLPackage.getMainDocumentPart().addStyledParagraphOfText("Subtitle", "REFERENCES");
        for (HitBase para : content) {
            if (para.getFragments() == null || para.getFragments().size() < 1) // no found content in this hit
                continue;
            try {
                wordMLPackage.getMainDocumentPart().addStyledParagraphOfText("Subtitle", para.getTitle());
                String paraText = para.getUrl();
                wordMLPackage.getMainDocumentPart().addParagraphOfText(paraText);

            } catch (Exception e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            }
        }

        try {
            wordMLPackage.save(new File(outputDocFinename));
            System.out.println("Finished creating docx =" + outputDocFinename);
        } catch (Exception e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }

        try {
            String fileNameToDownload = "/var/www/wrt_latest/"
                    + title.replace(' ', '_').replace('\"', ' ').trim() + ".docx";
            wordMLPackage.save(new File(fileNameToDownload));
            System.out.println("Wrote a doc for download :" + fileNameToDownload);
        } catch (Exception e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }

    } catch (Exception e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }

    return outputDocFinename;
}

From source file:org.apache.cloudstack.storage.datastore.driver.SolidfirePrimaryDataStoreDriver.java

private String getSolidFireVolumeName(String strCloudStackVolumeName) {
    final String specialChar = "-";

    StringBuilder strSolidFireVolumeName = new StringBuilder();

    for (int i = 0; i < strCloudStackVolumeName.length(); i++) {
        String strChar = strCloudStackVolumeName.substring(i, i + 1);

        if (StringUtils.isAlphanumeric(strChar)) {
            strSolidFireVolumeName.append(strChar);
        } else {//from   w  w w .ja  v a2s .c  o m
            strSolidFireVolumeName.append(specialChar);
        }
    }

    return strSolidFireVolumeName.toString();
}

From source file:org.apache.cloudstack.storage.datastore.util.SolidFireUtil.java

public static String getSolidFireVolumeName(String strCloudStackVolumeName) {
    final String specialChar = "-";

    StringBuilder strSolidFireVolumeName = new StringBuilder();

    for (int i = 0; i < strCloudStackVolumeName.length(); i++) {
        String strChar = strCloudStackVolumeName.substring(i, i + 1);

        if (StringUtils.isAlphanumeric(strChar)) {
            strSolidFireVolumeName.append(strChar);
        } else {//w  w w  .j  a  va 2s  .  c o m
            strSolidFireVolumeName.append(specialChar);
        }
    }

    return strSolidFireVolumeName.toString();
}

From source file:org.apache.roller.weblogger.ui.rendering.model.UtilitiesModel.java

public boolean isAlphanumeric(String str) {
    return StringUtils.isAlphanumeric(str);
}