Example usage for java.lang Character isUpperCase

List of usage examples for java.lang Character isUpperCase

Introduction

In this page you can find the example usage for java.lang Character isUpperCase.

Prototype

public static boolean isUpperCase(int codePoint) 

Source Link

Document

Determines if the specified character (Unicode code point) is an uppercase character.

Usage

From source file:it.cnr.isti.hpc.dexter.disambiguation.TurkishEntityDisambiguator.java

@Override
public EntityMatchList disambiguate(DexterLocalParams localParams, SpotMatchList sml) {
    entityScoreMap = new HashMap<String, EntityScores>();
    selectedEntities = new HashSet<String>();
    Multiset<String> entityFrequencyMultiset = HashMultiset.create();

    EntityMatchList entities = sml.getEntities();
    String inputText = localParams.getParams().get("text");
    String algorithm = Property.getInstance().get("algorithm");

    String ambigious = Property.getInstance().get("algorithm.ambigious");

    List<Token> inputTokens = Zemberek.getInstance().disambiguateFindTokens(inputText, false, true);
    List<Double> documentVector = DescriptionEmbeddingAverage.getAverageVectorList(inputText);
    Multiset<String> inputTokensMultiset = HashMultiset.create();
    for (Token token : inputTokens) {
        inputTokensMultiset.add(token.getMorphText());
    }//from  w w  w . jav  a 2  s. c o m

    Multiset<String> domainMultiset = HashMultiset.create();
    Multiset<String> typeMultiset = HashMultiset.create();
    HashMap<String, Double> entitySimMap = new HashMap<String, Double>();
    // if (printCandidateEntities) {
    // printEntities(entities);
    // }
    HashSet<String> words = new HashSet<String>();
    Multiset<String> leskWords = HashMultiset.create();

    // first pass for finding number of types and domains
    for (int i = 0; i < entities.size(); i++) {
        EntityMatch em = entities.get(i);
        String id = em.getId();
        if (!entityFrequencyMultiset.contains(id)) {
            entityFrequencyMultiset.add(id);
            Entity entity = em.getEntity();
            words.add(entity.getShingle().getText());
            String type = entity.getPage().getType();
            if (type != null && type.length() > 0) {
                typeMultiset.add(type);
            }
            String domain = entity.getPage().getDomain();
            if (domain != null && domain.length() > 0) {
                domainMultiset.add(domain);
            }

            String desc = entity.getPage().getDescription();
            List<Token> tokens = Zemberek.getInstance().disambiguateFindTokens(desc, false, true);
            for (Token token : tokens) {
                leskWords.add(token.getMorphText());
            }

        } else {
            entityFrequencyMultiset.add(id);
        }
    }

    int maxDomainCount = 0;
    for (String domain : Multisets.copyHighestCountFirst(domainMultiset).elementSet()) {
        maxDomainCount = domainMultiset.count(domain);
        break;
    }
    int maxTypeCount = 0;
    for (String type : Multisets.copyHighestCountFirst(typeMultiset).elementSet()) {
        maxTypeCount = typeMultiset.count(type);
        break;
    }

    double maxSuffixScore = 0, maxLeskScore = 0, maxSimpleLeskScore = 0, maxLinkScore = 0,
            maxHashInfoboxScore = 0, maxwordvecDescriptionLocalScore = 0, maxHashDescriptionScore = 0,
            maxPopularityScore = 0, maxWordvectorAverage = 0, maxWordvecLinksScore = 0;
    // second pass compute similarities between entities in a window
    int currentSpotIndex = -1;
    SpotMatch currentSpot = null;
    for (int i = 0; i < entities.size(); i++) {
        EntityMatch em = entities.get(i);
        SpotMatch spot = em.getSpot();
        if (currentSpot == null || spot != currentSpot) {
            currentSpotIndex++;
            currentSpot = spot;
        }

        String id = em.getId();
        Entity entity = entities.get(i).getEntity();
        EntityPage page = entities.get(i).getEntity().getPage();
        String domain = page.getDomain();
        String type = page.getType();
        Shingle shingle = entity.getShingle();

        /* windowing algorithms stars */
        int left = currentSpotIndex - window;
        int right = currentSpotIndex + window;
        if (left < 0) {
            right -= left;
            left = 0;
        }
        if (right > sml.size()) {
            left += (sml.size()) - right;
            right = sml.size();
            if (left < 0) {
                left = 0;
            }
        }

        double linkScore = 0, hashInfoboxScore = 0, wordvecDescriptionLocalScore = 0, hashDescriptionScore = 0,
                wordvecLinksScore = 0;
        for (int j = left; j < right; j++) {
            SpotMatch sm2 = sml.get(j);
            EntityMatchList entities2 = sm2.getEntities();
            for (EntityMatch em2 : entities2) {
                String id2 = em2.getId();
                EntityPage page2 = em2.getEntity().getPage();
                int counter = 0;
                if (!ambigious.equals("true")) {
                    for (EntityMatch entityMatch : entities2) {
                        if (entityMatch.getId().startsWith("w")) {
                            counter++;
                        }
                    }
                }

                if ((ambigious.equals("true") || counter == 1) && em.getSpot() != em2.getSpot()
                        && !id.equals(id2)) {
                    // Link Similarity calculation starts
                    double linkSim = 0;
                    if (id.startsWith("w") && id2.startsWith("w")) {
                        if (entitySimMap.containsKey("link" + id + id2)) {
                            linkSim = entitySimMap.get("link" + id + id2);
                        } else {
                            HashSet<String> set1 = Sets.newHashSet(page.getLinks().split(" "));
                            HashSet<String> set2 = Sets.newHashSet(page2.getLinks().split(" "));
                            linkSim = JaccardCalculator.calculateSimilarity(set1, set2);
                            entitySimMap.put("link" + id + id2, linkSim);
                        }
                        linkScore += linkSim;
                        // Link Similarity calculation ends
                    }
                    // Entity embedding similarity calculation starts
                    double eeSim = 0;
                    if (id.startsWith("w") && id2.startsWith("w")) {
                        if (entitySimMap.containsKey("ee" + id + id2)) {
                            eeSim = entitySimMap.get("ee" + id + id2);
                        } else {
                            eeSim = EntityEmbeddingSimilarity.getInstance().getSimilarity(page, page2);
                            entitySimMap.put("ee" + id + id2, eeSim);
                        }
                        hashInfoboxScore += eeSim;
                    }
                    double w2veclinksSim = 0;
                    if (id.startsWith("w") && id2.startsWith("w")) {
                        if (entitySimMap.containsKey("wl" + id + id2)) {
                            w2veclinksSim = entitySimMap.get("wl" + id + id2);
                        } else {
                            w2veclinksSim = AveragePooling.getInstance().getSimilarity(page.getWord2vec(),
                                    page2.getWord2vec());
                            entitySimMap.put("wl" + id + id2, w2veclinksSim);
                        }
                        wordvecLinksScore += w2veclinksSim;
                    }

                    // Entity embedding similarity calculation ends

                    // Description word2vec similarity calculation
                    // starts
                    double word2vecSim = 0;

                    if (entitySimMap.containsKey("w2v" + id + id2)) {
                        word2vecSim = entitySimMap.get("w2v" + id + id2);
                    } else {
                        word2vecSim = AveragePooling.getInstance().getSimilarity(page2.getDword2vec(),
                                page.getDword2vec());
                        entitySimMap.put("w2v" + id + id2, word2vecSim);
                    }
                    wordvecDescriptionLocalScore += word2vecSim;
                    // Description word2vec similarity calculation ends

                    // Description autoencoder similarity calculation
                    // starts
                    double autoVecSim = 0;

                    if (entitySimMap.containsKey("a2v" + id + id2)) {
                        autoVecSim = entitySimMap.get("a2v" + id + id2);
                    } else {
                        autoVecSim = AveragePooling.getInstance().getSimilarity(page2.getDautoencoder(),
                                page.getDautoencoder());
                        entitySimMap.put("a2v" + id + id2, autoVecSim);
                    }
                    hashDescriptionScore += autoVecSim;
                    // Description autoencoder similarity calculation
                    // ends

                }
            }
        }
        if (linkScore > maxLinkScore) {
            maxLinkScore = linkScore;
        }
        if (hashInfoboxScore > maxHashInfoboxScore) {
            maxHashInfoboxScore = hashInfoboxScore;
        }
        if (wordvecDescriptionLocalScore > maxwordvecDescriptionLocalScore) {
            maxwordvecDescriptionLocalScore = wordvecDescriptionLocalScore;
        }
        if (hashDescriptionScore > maxHashDescriptionScore) {
            maxHashDescriptionScore = hashDescriptionScore;
        }
        if (wordvecLinksScore > maxWordvecLinksScore) {
            maxWordvecLinksScore = wordvecLinksScore;
        }

        /* windowing algorithms ends */

        double domainScore = 0;
        if (domainMultiset.size() > 0 && maxDomainCount > 1 && domainMultiset.count(domain) > 1) {
            domainScore = (double) domainMultiset.count(domain) / maxDomainCount;
        }
        double typeScore = 0;
        if (typeMultiset.size() > 0 && maxTypeCount > 1 && typeMultiset.count(type) > 1) {
            typeScore = (double) typeMultiset.count(type) / maxTypeCount;
        }
        if (typeBlackList.contains(type)) {
            typeScore /= 10;
        }

        double typeContentScore = 0;
        if (type.length() > 0 && StringUtils.containsIgnoreCase(words.toString(), type)) {
            typeContentScore = 1;
        }

        double typeClassifierScore = TypeClassifier.getInstance().predict(page, page.getTitle(), page.getType(),
                entity.getShingle().getSentence());

        double wordvecDescriptionScore = AveragePooling.getInstance().getSimilarity(documentVector,
                page.getDword2vec());
        if (wordvecDescriptionScore > maxWordvectorAverage) {
            maxWordvectorAverage = wordvecDescriptionScore;
        }

        double suffixScore = 0;

        if (type != null && type.length() > 0) {
            Set<String> suffixes = new HashSet<String>();
            String t = entity.getTitle().toLowerCase(new Locale("tr", "TR"));

            for (int x = 0; x < entities.size(); x++) {
                EntityMatch e2 = entities.get(x);
                if (e2.getId().equals(entity.getId())) {
                    suffixes.add(e2.getMention());
                }
            }
            suffixes.remove(t);
            suffixes.remove(entity.getTitle());
            // String inputTextLower = inputText.toLowerCase(new
            // Locale("tr",
            // "TR"));
            // while (inputTextLower.contains(t)) {
            // int start = inputTextLower.indexOf(t);
            // int end = inputTextLower.indexOf(" ", start + t.length());
            // if (end > start) {
            // String suffix = inputTextLower.substring(start, end);
            // // .replaceAll("\\W", "");
            // if (suffix.contains("'")
            // || (Zemberek.getInstance().hasMorph(suffix)
            // && !suffix.equals(t) && suffix.length() > 4)) {
            // suffixes.add(suffix);
            // }
            // inputTextLower = inputTextLower.substring(end);
            // } else {
            // break;
            // }
            // }
            if (suffixes.size() >= minSuffix) {
                for (String suffix : suffixes) {
                    double sim = gd.calculateSimilarity(suffix, type);
                    suffixScore += sim;
                }
            }
        }

        // String entitySuffix = page.getSuffix();
        // String[] inputSuffix = shingle.getSuffix().split(" ");
        // for (int j = 0; j < inputSuffix.length; j++) {
        // if (entitySuffix.contains(inputSuffix[j])) {
        // suffixScore += 0.25f;
        // }
        // }

        if (suffixScore > maxSuffixScore) {
            maxSuffixScore = suffixScore;
        }
        // if (id.equals("w691538")) {
        // LOGGER.info("");
        // }
        double letterCaseScore = 0;
        int lc = page.getLetterCase();
        if (StringUtils.isAllLowerCase(em.getMention()) && lc == 0 && id.startsWith("t")) {
            letterCaseScore = 1;
        } else if (StringUtils.isAllUpperCase(em.getMention()) && lc == 1 && id.startsWith("w")) {
            letterCaseScore = 1;
        } else if (Character.isUpperCase(em.getMention().charAt(0)) && lc == 2 && id.startsWith("w")) {
            letterCaseScore = 1;
        } else if (StringUtils.isAllLowerCase(em.getMention()) && id.startsWith("t")) {
            letterCaseScore = 1;
        }

        double nameScore = 1 - LevenshteinDistanceCalculator.calculateDistance(page.getTitle(),
                Zemberek.removeAfterSpostrophe(em.getMention()));

        double popularityScore = page.getRank();
        if (id.startsWith("w")) {
            popularityScore = Math.log10(popularityScore + 1);
            if (popularityScore > maxPopularityScore) {
                maxPopularityScore = popularityScore;
            }
        }

        double leskScore = 0, simpleLeskScore = 0;

        String desc = em.getEntity().getPage().getDescription();
        if (desc != null) {
            List<Token> tokens = Zemberek.getInstance().disambiguateFindTokens(desc, false, true);
            for (Token token : tokens) {
                if (inputTokensMultiset.contains(token.getMorphText())
                        && !TurkishNLP.isStopWord(token.getMorphText())) {
                    simpleLeskScore += inputTokensMultiset.count(token.getMorphText());
                }
                if (leskWords.contains(token.getMorphText()) && !TurkishNLP.isStopWord(token.getMorphText())) {
                    leskScore += leskWords.count(token.getMorphText());
                }

            }
            leskScore /= Math.log(tokens.size() + 1);
            simpleLeskScore /= Math.log(tokens.size() + 1);
            if (leskScore > maxLeskScore) {
                maxLeskScore = leskScore;
            }
            if (simpleLeskScore > maxSimpleLeskScore) {
                maxSimpleLeskScore = simpleLeskScore;
            }

            if (!entityScoreMap.containsKey(id)) {
                EntityScores scores = new EntityScores(em, id, popularityScore, nameScore, letterCaseScore,
                        suffixScore, wordvecDescriptionScore, typeContentScore, typeScore, domainScore,
                        hashDescriptionScore, wordvecDescriptionLocalScore, hashInfoboxScore, linkScore,
                        wordvecLinksScore, leskScore, simpleLeskScore, typeClassifierScore);
                entityScoreMap.put(id, scores);
            } else {
                EntityScores entityScores = entityScoreMap.get(id);
                entityScores.setHashInfoboxScore((entityScores.getHashInfoboxScore() + hashInfoboxScore) / 2);
                entityScores.setHashDescriptionScore(
                        (entityScores.getHashInfoboxScore() + hashDescriptionScore) / 2);
                entityScores.setLinkScore((entityScores.getLinkScore() + linkScore) / 2);
                entityScores.setWordvecDescriptionLocalScore(
                        (entityScores.getWordvecDescriptionLocalScore() + wordvecDescriptionLocalScore) / 2);
                entityScores
                        .setWordvecLinksScore((entityScores.getWordvecLinksScore() + wordvecLinksScore) / 2);
                entityScores.setLeskScore((entityScores.getLeskScore() + leskScore) / 2);

            }

        }
    }
    /* normalization and total score calculation starts */
    Set<String> set = new HashSet<String>();
    for (int i = 0; i < entities.size(); i++) {
        EntityMatch em = entities.get(i);
        String id = em.getId();
        EntityScores entityScores = entityScoreMap.get(id);
        if (set.contains(id)) {
            continue;
        }
        if (id.startsWith("w")) {
            if (maxLinkScore > 0 && entityScores.getLinkScore() > 0) {
                entityScores.setLinkScore(entityScores.getLinkScore() / maxLinkScore);
            }
            if (maxHashInfoboxScore > 0 && entityScores.getHashInfoboxScore() > 0) {
                entityScores.setHashInfoboxScore(entityScores.getHashInfoboxScore() / maxHashInfoboxScore);
            }
            if (maxWordvecLinksScore > 0 && entityScores.getWordvecLinksScore() > 0) {
                entityScores.setWordvecLinksScore(entityScores.getWordvecLinksScore() / maxWordvecLinksScore);
            }
            if (maxPopularityScore > 0 && entityScores.getPopularityScore() > 0) {
                entityScores.setPopularityScore(entityScores.getPopularityScore() / maxPopularityScore);
            }
        }
        if (maxwordvecDescriptionLocalScore > 0 && entityScores.getWordvecDescriptionLocalScore() > 0) {
            entityScores.setWordvecDescriptionLocalScore(
                    entityScores.getWordvecDescriptionLocalScore() / maxwordvecDescriptionLocalScore);
        }
        if (maxHashDescriptionScore > 0 && entityScores.getHashDescriptionScore() > 0) {
            entityScores
                    .setHashDescriptionScore(entityScores.getHashDescriptionScore() / maxHashDescriptionScore);
        }
        if (maxWordvectorAverage > 0 && entityScores.getWordvecDescriptionScore() > 0) {
            entityScores.setWordvecDescriptionScore(
                    entityScores.getWordvecDescriptionScore() / maxWordvectorAverage);
        }
        if (maxLeskScore > 0 && entityScores.getLeskScore() > 0) {
            entityScores.setLeskScore(entityScores.getLeskScore() / maxLeskScore);
        }
        if (maxSimpleLeskScore > 0 && entityScores.getSimpleLeskScore() > 0) {
            entityScores.setSimpleLeskScore(entityScores.getSimpleLeskScore() / maxSimpleLeskScore);
        }
        if (maxSuffixScore > 0 && entityScores.getSuffixScore() > 0) {
            entityScores.setSuffixScore(entityScores.getSuffixScore() / maxSuffixScore);
        }
        set.add(id);
    }

    LOGGER.info("\t"
            + "id\tTitle\tURL\tScore\tPopularity\tName\tLesk\tSimpeLesk\tCase\tNoun\tSuffix\tTypeContent\tType\tDomain\twordvecDescription\twordvecDescriptionLocal\thashDescription\thashInfobox\tword2vecLinks\tLink\t\ttypeClassifier\tDescription");
    for (int i = 0; i < entities.size(); i++) {
        EntityMatch em = entities.get(i);
        String id = em.getId();
        EntityScores e = entityScoreMap.get(id);
        double wikiScore = 0;
        if (id.startsWith("w") && Character.isUpperCase(em.getMention().charAt(0))) {
            wikiScore = wikiWeight;
        } else if (id.startsWith("t") && Character.isLowerCase(em.getMention().charAt(0))) {
            wikiScore = wikiWeight;
        }
        // if(id.equals("w508792")){
        // LOGGER.info("");
        // }
        double totalScore = wikiScore + e.getPopularityScore() * popularityWeight
                + e.getNameScore() * nameWeight + e.getLeskScore() * leskWeight
                + e.getSimpleLeskScore() * simpleLeskWeight + e.getLetterCaseScore() * letterCaseWeight
                + e.getSuffixScore() * suffixWeight + e.getTypeContentScore() * typeContentWeight
                + e.getTypeScore() * typeWeight + e.getDomainScore() * domainWeight
                + e.getWordvecDescriptionScore() * wordvecDescriptionWeight
                + e.getWordvecDescriptionLocalScore() * wordvecDescriptionLocalWeight
                + e.getHashDescriptionScore() * hashDescriptionWeight
                + e.getHashInfoboxScore() * hashInfoboxWeight + e.getWordvecLinksScore() * word2vecLinksWeight
                + e.getLinkScore() * linkWeight + e.getTypeClassifierkScore() * typeClassifierkWeight;
        if (ranklib == true) {
            totalScore = RankLib.getInstance().score(e);
        }

        if (em.getEntity().getPage().getUrlTitle().contains("(")) {
            totalScore /= 2;
        }
        em.setScore(totalScore);
        e.setScore(totalScore);

        LOGGER.info("\t" + id + "\t" + em.getEntity().getPage().getTitle() + "\t"
                + em.getEntity().getPage().getUrlTitle() + "\t" + em.getScore() + "\t"
                + e.getPopularityScore() * popularityWeight + "\t" + e.getNameScore() * nameWeight + "\t"
                + e.getLeskScore() * leskWeight + "\t" + e.getSimpleLeskScore() * simpleLeskWeight + "\t"
                + e.getLetterCaseScore() * letterCaseWeight + "\t" + e.getSuffixScore() * suffixWeight + "\t"
                + e.getTypeContentScore() * typeContentWeight + "\t" + e.getTypeScore() * typeWeight + "\t"
                + e.getDomainScore() * domainWeight + "\t"
                + e.getWordvecDescriptionScore() * wordvecDescriptionWeight + "\t"
                + e.getWordvecDescriptionLocalScore() * wordvecDescriptionLocalWeight + "\t"
                + e.getHashDescriptionScore() * hashDescriptionWeight + "\t"
                + e.getHashInfoboxScore() * hashInfoboxWeight + "\t"
                + e.getWordvecLinksScore() * word2vecLinksWeight + "\t" + e.getLinkScore() * linkWeight + "\t"
                + e.getTypeClassifierkScore() * typeClassifierkWeight + "\t"
                + em.getEntity().getPage().getDescription());
    }

    // if (annotateEntities) {
    // annotateEntities(localParams.getParams().get("originalText"), sml);
    // }

    EntityMatchList eml = new EntityMatchList();
    for (SpotMatch match : sml) {
        EntityMatchList list = match.getEntities();
        if (!list.isEmpty()) {
            list.sort();
            eml.add(list.get(0));
            selectedEntities.add(list.get(0).getId());
        }
    }
    return eml;
}

From source file:io.manasobi.utils.StringUtils.java

/**
 * camel case ? ?? underscore ? ? <br><br>
 *
 * StringUtils.convertToUnderScore("anyframeJavaTest") = "anyframe_java_test"
 *
 * @param camelCase  camel case ? ?/*from   w  w  w .ja v a2s.co  m*/
 * @return underscore  ? ?
 */
public static String convertToUnderScore(String camelCase) {
    String result = "";
    for (int i = 0; i < camelCase.length(); i++) {
        char currentChar = camelCase.charAt(i);
        // This is starting at 1 so the result does not end up with an
        // underscore at the begin of the value
        if (i > 0 && Character.isUpperCase(currentChar)) {
            result = result.concat("_");
        }
        result = result.concat(Character.toString(currentChar).toLowerCase());
    }
    return result;
}

From source file:org.sd.token.StandardTokenizerOptions.java

public boolean isUpperCase(int codePoint) {
    boolean result = Character.isUpperCase(codePoint);

    if (!result && symbolUppersCodePoints != null) {
        result = symbolUppersCodePoints.contains(codePoint);
    }//from   w  ww . j ava  2s.  com

    return result;
}

From source file:org.jboss.fuse.mqtt.interop.MqttTestClient.java

private StringBuffer getTestName(String className) {
    StringBuffer buffer = new StringBuffer();
    boolean first = true;
    for (char c : className.substring(className.lastIndexOf('$') + 1).toCharArray()) {
        if (Character.isUpperCase(c)) {
            if (!first) {
                buffer.append(' ');
                buffer.append(Character.toLowerCase(c));
            } else {
                buffer.append(c);/*from   w w w  .j a v  a 2  s  . c  o  m*/
                first = false;
            }
        } else {
            buffer.append(c);
        }
    }
    return buffer;
}

From source file:opennlp.tools.textsimilarity.TextProcessor.java

public static String generateSummary(String txt, String title, int numChars, boolean truncateInSentence) {
    String finalSummary = "";

    try {//ww w.  j  a v a2s.  com

        String[] puncChars = { ":", "--", "PM", "MST", "EST", "CST", "PST", "GMT", "AM", "  " };

        txt = txt.replace(" | ", " ");
        txt = txt.replace(" |", " ");
        ArrayList<String> sentences = TextProcessor.splitToSentences(txt);
        // System.out.println("Sentences are:");
        StringBuffer sum = new StringBuffer();
        int cnt = 0;
        int lCnt = 0;
        for (String s : sentences) {
            cnt++;
            // System.out.println(s + "\n");
            s = trimSentence(s, title);
            // see if sentence has a time in it
            // boolean containsTime = s.co("[0-9]");
            if (s.length() > 60 && !s.contains("By") && !s.contains("Page") && !s.contains(">>")
                    && Character.isUpperCase(s.charAt(0))) {
                // System.out.println("cleaned: " + s + "\n");
                if (Math.abs(cnt - lCnt) != 1 && lCnt != 0) {

                    if (sum.toString().endsWith(".")) {
                        sum.append("..");
                    } else {
                        sum.append("...");
                    }
                } else {
                    sum.append(" ");
                }
                sum.append(s.trim());
                lCnt = cnt;
            }
            if (sum.length() > numChars) {
                break;
            }
        }

        finalSummary = sum.toString().trim();

        if (truncateInSentence) {
            finalSummary = truncateTextOnSpace(finalSummary, numChars);
            int numPeriods = countTrailingPeriods(finalSummary);

            if (numPeriods < 3 && finalSummary.length() > 0) {
                for (int i = 0; i < 3 - numPeriods; i++) {
                    finalSummary += ".";
                }
            }
        } else {
            // trim final period
            if (finalSummary.endsWith("..")) {
                finalSummary = finalSummary.substring(0, finalSummary.length() - 2);
            }
        }
        // check to see if we have anything, if not, return the fullcontent
        if (finalSummary.trim().length() < 5) {
            finalSummary = txt;
        }
        // see if have a punc in the first 30 chars
        int highestIdx = -1;
        int sIdx = Math.min(finalSummary.length() - 1, 45);
        for (String p : puncChars) {
            int idx = finalSummary.trim().substring(0, sIdx).lastIndexOf(p);
            if (idx > highestIdx && idx < 45) {
                highestIdx = idx + p.length();
            }
        }

        if (highestIdx > -1) {
            finalSummary = finalSummary.substring(highestIdx);
        }

        int closeParenIdx = finalSummary.indexOf(")");
        int openParenIdx = finalSummary.indexOf("(");
        // if(closeParenIdx < )
        if (closeParenIdx != -1 && closeParenIdx < 15 && (openParenIdx == -1 || openParenIdx > closeParenIdx)) {
            finalSummary = finalSummary.substring(closeParenIdx + 1).trim();
        }

        finalSummary = trimPunctuationFromStart(finalSummary);

        // check to see if we have anything, if not, return the fullcontent
        if (finalSummary.trim().length() < 5) {
            finalSummary = txt;
        }

    } catch (Exception e) {
        LOG.severe("Problem forming summary for: " + txt);
        LOG.severe("Using full text for the summary" + e);
        finalSummary = txt;
    }

    return finalSummary.trim();
}

From source file:org.romaframework.core.schema.reflection.SchemaClassReflection.java

private static boolean ignoreMethod(String iItem, String iMethodName) {
    if (iItem.endsWith("^*")) {
        String trunk = iItem.substring(0, iItem.length() - 2);
        if (iMethodName.startsWith(trunk) && Character.isUpperCase(iMethodName.charAt(trunk.length())))
            return true;
    } else if (iItem.endsWith("*")) {
        if (iMethodName.startsWith(iItem.substring(0, iItem.length() - 1)))
            return true;
    } else if (iItem.startsWith("*")) {
        if (iMethodName.endsWith(iItem.substring(1)))
            return true;
    } else {/*  ww  w.  j a v a  2 s.  co  m*/
        if (iMethodName.equals(iItem))
            return true;
    }

    return false;
}

From source file:com.avricot.prediction.utils.Steemer.java

/**
  * Checks a term if it can be processed correctly.
  */* w  w  w.  j  a v a 2 s  . co  m*/
  * @return boolean - true if, and only if, the given term consists in letters.
  */
 private boolean isStemmable(String term) {
     boolean upper = false;
     int first = -1;
     for (int c = 0; c < term.length(); c++) {
         // Discard terms that contain non-letter characters.
         if (!Character.isLetter(term.charAt(c))) {
             return false;
         }
         // Discard terms that contain multiple uppercase letters.
         if (Character.isUpperCase(term.charAt(c))) {
             if (upper) {
                 return false;
             }
             // First encountered uppercase letter, set flag and save
             // position.
             else {
                 first = c;
                 upper = true;
             }
         }
     }
     // Discard the term if it contains a single uppercase letter that
     // is not starting the term.
     if (first > 0) {
         return false;
     }
     return true;
 }

From source file:org.codehaus.groovy.grails.commons.GrailsClassUtils.java

/**
 * Converts a property name into its natural language equivalent eg ('firstName' becomes 'First Name')
 * @param name The property name to convert
 * @return The converted property name//from  w w  w .  j  a va 2 s  .co m
 *
 * @deprecated Use {@link grails.util.GrailsNameUtils#getNaturalName(String)} instead.
 */
@Deprecated
public static String getNaturalName(String name) {
    List<String> words = new ArrayList<String>();
    int i = 0;
    char[] chars = name.toCharArray();
    for (int j = 0; j < chars.length; j++) {
        char c = chars[j];
        String w;
        if (i >= words.size()) {
            w = "";
            words.add(i, w);
        } else {
            w = words.get(i);
        }

        if (Character.isLowerCase(c) || Character.isDigit(c)) {
            if (Character.isLowerCase(c) && w.length() == 0) {
                c = Character.toUpperCase(c);
            } else if (w.length() > 1 && Character.isUpperCase(w.charAt(w.length() - 1))) {
                w = "";
                words.add(++i, w);
            }

            words.set(i, w + c);
        } else if (Character.isUpperCase(c)) {
            if ((i == 0 && w.length() == 0) || Character.isUpperCase(w.charAt(w.length() - 1))) {
                words.set(i, w + c);
            } else {
                words.add(++i, String.valueOf(c));
            }
        }
    }

    StringBuilder buf = new StringBuilder();
    for (Iterator<String> j = words.iterator(); j.hasNext();) {
        buf.append(j.next());
        if (j.hasNext()) {
            buf.append(' ');
        }
    }
    return buf.toString();
}

From source file:net.paoding.rose.web.impl.module.ModulesBuilderImpl.java

private List<InterceptorDelegate> findInterceptors(XmlWebApplicationContext context) {
    String[] interceptorNames = SpringUtils.getBeanNames(context.getBeanFactory(), ControllerInterceptor.class);
    ArrayList<InterceptorDelegate> interceptors = new ArrayList<InterceptorDelegate>(interceptorNames.length);
    for (String beanName : interceptorNames) {
        ControllerInterceptor interceptor = (ControllerInterceptor) context.getBean(beanName);
        Class<?> userClass = ClassUtils.getUserClass(interceptor);
        if (userClass.isAnnotationPresent(Ignored.class)) {
            if (logger.isDebugEnabled()) {
                logger.debug("Ignored interceptor (Ignored):" + interceptor);
            }/*  w  w w  .j  av a2  s .co  m*/
            continue;
        }
        if (userClass.isAnnotationPresent(NotForSubModules.class)
                && !context.getBeanFactory().containsBeanDefinition(beanName)) {
            if (logger.isDebugEnabled()) {
                logger.debug("Ignored interceptor (NotForSubModules):" + interceptor);
            }
            continue;
        }
        if (!userClass.getSimpleName().endsWith(RoseConstants.INTERCEPTOR_SUFFIX)) {
            logger.error("", new IllegalArgumentException("Interceptor must be end with '"
                    + RoseConstants.INTERCEPTOR_SUFFIX + "': " + userClass.getName()));
            continue;
        }
        InterceptorBuilder builder = new InterceptorBuilder(interceptor);
        Interceptor annotation = userClass.getAnnotation(Interceptor.class);
        if (annotation != null) {
            builder.oncePerRequest(annotation.oncePerRequest());
        }
        String interceporName;
        if (beanName.startsWith(AUTO_BEAN_NAME_PREFIX)) {
            interceporName = StringUtils.removeEnd(StringUtils.uncapitalize(userClass.getSimpleName()),
                    RoseConstants.INTERCEPTOR_SUFFIX);
        } else {
            interceporName = StringUtils.removeEnd(beanName, RoseConstants.INTERCEPTOR_SUFFIX);
        }
        final String rose = "rose";
        if (interceporName.startsWith(rose)
                && (interceporName.length() == rose.length()
                        || Character.isUpperCase(interceporName.charAt(rose.length())))
                && !userClass.getName().startsWith("net.paoding.rose.")) {
            throw new IllegalArgumentException("illegal interceptor name '" + interceporName + "' for "
                    + userClass.getName() + ": don't starts with 'rose', it's reserved");
        }

        builder.name(interceporName);

        InterceptorDelegate wrapper = builder.build();
        interceptors.add(wrapper);
        if (logger.isDebugEnabled()) {
            int priority = 0;
            if (interceptor instanceof Ordered) {
                priority = ((Ordered) interceptor).getPriority();
            }
            logger.debug("recognized interceptor[priority=" + priority + "]: " // \r\n
                    + wrapper.getName() + "=" + userClass.getName());
        }
    }
    Collections.sort(interceptors);
    throwExceptionIfDuplicatedNames(interceptors);
    return interceptors;
}

From source file:com.laxser.blitz.web.impl.module.ModulesBuilderImpl.java

private List<InterceptorDelegate> findInterceptors(XmlWebApplicationContext context) {
    String[] interceptorNames = SpringUtils.getBeanNames(context.getBeanFactory(), ControllerInterceptor.class);
    ArrayList<InterceptorDelegate> interceptors = new ArrayList<InterceptorDelegate>(interceptorNames.length);
    for (String beanName : interceptorNames) {
        ControllerInterceptor interceptor = (ControllerInterceptor) context.getBean(beanName);
        Class<?> userClass = ClassUtils.getUserClass(interceptor);
        if (userClass.isAnnotationPresent(Ignored.class)) {
            if (logger.isDebugEnabled()) {
                logger.debug("Ignored interceptor (Ignored):" + interceptor);
            }//w w  w  .j a  va  2 s . co m
            continue;
        }
        if (userClass.isAnnotationPresent(NotForSubModules.class)
                && !context.getBeanFactory().containsBeanDefinition(beanName)) {
            if (logger.isDebugEnabled()) {
                logger.debug("Ignored interceptor (NotForSubModules):" + interceptor);
            }
            continue;
        }
        if (!userClass.getSimpleName().endsWith(BlitzConstants.INTERCEPTOR_SUFFIX)) {
            logger.error("", new IllegalArgumentException("Interceptor must be end with '"
                    + BlitzConstants.INTERCEPTOR_SUFFIX + "': " + userClass.getName()));
            continue;
        }
        InterceptorBuilder builder = new InterceptorBuilder(interceptor);
        Interceptor annotation = userClass.getAnnotation(Interceptor.class);
        if (annotation != null) {
            builder.oncePerRequest(annotation.oncePerRequest());
        }
        String interceporName;
        if (beanName.startsWith(AUTO_BEAN_NAME_PREFIX)) {
            interceporName = StringUtils.removeEnd(StringUtils.uncapitalize(userClass.getSimpleName()),
                    BlitzConstants.INTERCEPTOR_SUFFIX);
        } else {
            interceporName = StringUtils.removeEnd(beanName, BlitzConstants.INTERCEPTOR_SUFFIX);
        }
        final String rose = "rose";
        if (interceporName.startsWith(rose)
                && (interceporName.length() == rose.length()
                        || Character.isUpperCase(interceporName.charAt(rose.length())))
                && !userClass.getName().startsWith("net.paoding.rose.")) {
            throw new IllegalArgumentException("illegal interceptor name '" + interceporName + "' for "
                    + userClass.getName() + ": don't starts with 'rose', it's reserved");
        }

        builder.name(interceporName);

        InterceptorDelegate wrapper = builder.build();
        interceptors.add(wrapper);
        if (logger.isDebugEnabled()) {
            int priority = 0;
            if (interceptor instanceof Ordered) {
                priority = ((Ordered) interceptor).getPriority();
            }
            logger.debug("recognized interceptor[priority=" + priority + "]: " // \r\n
                    + wrapper.getName() + "=" + userClass.getName());
        }
    }
    Collections.sort(interceptors);
    throwExceptionIfDuplicatedNames(interceptors);
    return interceptors;
}