Example usage for org.apache.commons.lang3 StringUtils isAllLowerCase

List of usage examples for org.apache.commons.lang3 StringUtils isAllLowerCase

Introduction

In this page you can find the example usage for org.apache.commons.lang3 StringUtils isAllLowerCase.

Prototype

public static boolean isAllLowerCase(final CharSequence cs) 

Source Link

Document

Checks if the CharSequence contains only lowercase characters.

null will return false .

Usage

From source file:com.qq.tars.validate.SetAreaValidator.java

@Override
public boolean isValid(String value, ConstraintValidatorContext context) {
    return StringUtils.isNotBlank(value) && StringUtils.isAlpha(value) && StringUtils.isAllLowerCase(value);
}

From source file:com.nridge.core.io.csv.DataAnalyzerCSV.java

/**
 * This method will stream, analyze the CSV row data and return a
 * summary detail report table.  This method assumes that
 * the CSV file has a header identifying the name of columns.
 *
 * @param aPathFileName CSV file name to stream in (with a header).
 *
 * @throws IOException Thrown if an I/O issue is detected.
 *//*from  w ww  .j  av a 2s  .c om*/
public DataTable streamAnalyzeData(String aPathFileName) throws IOException {
    int adjCount;
    DataField dataField;
    String cellValue, mvDelimiter;

    try (FileReader fileReader = new FileReader(aPathFileName)) {
        CsvListReader csvListReader = new CsvListReader(fileReader, CsvPreference.EXCEL_PREFERENCE);
        String[] columnHeaders = csvListReader.getHeader(true);
        if (columnHeaders == null)
            throw new IOException(aPathFileName + ": Does not have a header row.");

        DataBag columnBag = new DataBag("Data Analysis Table");
        for (String columnName : columnHeaders) {
            if (StringUtils.isAllLowerCase(columnName))
                dataField = new DataTextField(columnName, Field.nameToTitle(columnName));
            else
                dataField = new DataTextField(Field.titleToName(columnName), columnName);
            columnBag.add(dataField);
        }
        int columnCount = columnBag.count();
        DataAnalyzer dataAnalyzer = new DataAnalyzer(columnBag);

        List<String> rowCells = csvListReader.read();
        while (rowCells != null) {
            DataBag dataBag = new DataBag(columnBag);
            adjCount = Math.min(rowCells.size(), columnCount);
            for (int col = 0; col < adjCount; col++) {
                cellValue = rowCells.get(col);
                if (StringUtils.isNotEmpty(cellValue)) {
                    dataField = dataBag.getByOffset(col);
                    if (dataField.isMultiValue()) {
                        mvDelimiter = dataField.getFeature(Field.FEATURE_MV_DELIMITER);
                        if (StringUtils.isNotEmpty(mvDelimiter))
                            dataField.setValues(StrUtl.expandToList(cellValue, mvDelimiter.charAt(0)));
                        else
                            dataField.setValues(StrUtl.expandToList(cellValue, StrUtl.CHAR_PIPE));
                    } else
                        dataBag.setValueByName(dataField.getName(), cellValue);
                }
            }
            dataAnalyzer.scan(dataBag);
            rowCells = csvListReader.read();
        }

        return dataAnalyzer.getDetails();
    } catch (Exception e) {
        throw new IOException(aPathFileName + ": " + e.getMessage());
    }
}

From source file:chat.viska.xmpp.Connection.java

/**
 * Constructs a {@link Connection} with full server URI.
 * @param port Use {@code -1} to indicate no port.
 * @throws IllegalArgumentException If {@link Protocol#TCP} is specified.
 *///  w w w .j  av a2s .c o  m
public Connection(final Protocol protocol, final String scheme, final String domain, final int port,
        @Nullable final String path) {
    Objects.requireNonNull(protocol, "`protocol` is absent.");
    if (protocol == Protocol.TCP) {
        throw new IllegalArgumentException("TCP protocol is not suitable for this constructor.");
    }
    this.protocol = protocol;
    Validate.notBlank(scheme, "`scheme` is absent.");
    this.scheme = StringUtils.isAllLowerCase(scheme) ? scheme : scheme.toLowerCase();
    Validate.notBlank(domain, "`domain` is absent.");
    this.domain = domain;
    this.port = port;
    this.path = StringUtils.defaultIfBlank(path, "");
    this.tlsMethod = TlsMethod.NONE;
}

From source file:ita.parthenope.twitternlp.semantic.CloudOfWords.java

private String analyzeParolaChiave() {
    if (getParolaChiave().equals("Brexit"))
        this.parolaChiave = getParolaChiave().toUpperCase();
    else if (StringUtils.isAllLowerCase(getParolaChiave())) {
        StopWordRemover stopword = new StopWordRemover("resources/stopwords_en.txt");
        this.parolaChiave = stopword.capitalizeFirstLetter(getParolaChiave());
    }/*  www  . ja v  a 2 s .  c o m*/

    return parolaChiave;
}

From source file:it.cnr.isti.hpc.dexter.disambiguation.TurkishEntityDisambiguator.java

@Override
public EntityMatchList disambiguate(DexterLocalParams localParams, SpotMatchList sml) {
    entityScoreMap = new HashMap<String, EntityScores>();
    selectedEntities = new HashSet<String>();
    Multiset<String> entityFrequencyMultiset = HashMultiset.create();

    EntityMatchList entities = sml.getEntities();
    String inputText = localParams.getParams().get("text");
    String algorithm = Property.getInstance().get("algorithm");

    String ambigious = Property.getInstance().get("algorithm.ambigious");

    List<Token> inputTokens = Zemberek.getInstance().disambiguateFindTokens(inputText, false, true);
    List<Double> documentVector = DescriptionEmbeddingAverage.getAverageVectorList(inputText);
    Multiset<String> inputTokensMultiset = HashMultiset.create();
    for (Token token : inputTokens) {
        inputTokensMultiset.add(token.getMorphText());
    }/*from w  ww . j  a v  a  2  s . c  o m*/

    Multiset<String> domainMultiset = HashMultiset.create();
    Multiset<String> typeMultiset = HashMultiset.create();
    HashMap<String, Double> entitySimMap = new HashMap<String, Double>();
    // if (printCandidateEntities) {
    // printEntities(entities);
    // }
    HashSet<String> words = new HashSet<String>();
    Multiset<String> leskWords = HashMultiset.create();

    // first pass for finding number of types and domains
    for (int i = 0; i < entities.size(); i++) {
        EntityMatch em = entities.get(i);
        String id = em.getId();
        if (!entityFrequencyMultiset.contains(id)) {
            entityFrequencyMultiset.add(id);
            Entity entity = em.getEntity();
            words.add(entity.getShingle().getText());
            String type = entity.getPage().getType();
            if (type != null && type.length() > 0) {
                typeMultiset.add(type);
            }
            String domain = entity.getPage().getDomain();
            if (domain != null && domain.length() > 0) {
                domainMultiset.add(domain);
            }

            String desc = entity.getPage().getDescription();
            List<Token> tokens = Zemberek.getInstance().disambiguateFindTokens(desc, false, true);
            for (Token token : tokens) {
                leskWords.add(token.getMorphText());
            }

        } else {
            entityFrequencyMultiset.add(id);
        }
    }

    int maxDomainCount = 0;
    for (String domain : Multisets.copyHighestCountFirst(domainMultiset).elementSet()) {
        maxDomainCount = domainMultiset.count(domain);
        break;
    }
    int maxTypeCount = 0;
    for (String type : Multisets.copyHighestCountFirst(typeMultiset).elementSet()) {
        maxTypeCount = typeMultiset.count(type);
        break;
    }

    double maxSuffixScore = 0, maxLeskScore = 0, maxSimpleLeskScore = 0, maxLinkScore = 0,
            maxHashInfoboxScore = 0, maxwordvecDescriptionLocalScore = 0, maxHashDescriptionScore = 0,
            maxPopularityScore = 0, maxWordvectorAverage = 0, maxWordvecLinksScore = 0;
    // second pass compute similarities between entities in a window
    int currentSpotIndex = -1;
    SpotMatch currentSpot = null;
    for (int i = 0; i < entities.size(); i++) {
        EntityMatch em = entities.get(i);
        SpotMatch spot = em.getSpot();
        if (currentSpot == null || spot != currentSpot) {
            currentSpotIndex++;
            currentSpot = spot;
        }

        String id = em.getId();
        Entity entity = entities.get(i).getEntity();
        EntityPage page = entities.get(i).getEntity().getPage();
        String domain = page.getDomain();
        String type = page.getType();
        Shingle shingle = entity.getShingle();

        /* windowing algorithms stars */
        int left = currentSpotIndex - window;
        int right = currentSpotIndex + window;
        if (left < 0) {
            right -= left;
            left = 0;
        }
        if (right > sml.size()) {
            left += (sml.size()) - right;
            right = sml.size();
            if (left < 0) {
                left = 0;
            }
        }

        double linkScore = 0, hashInfoboxScore = 0, wordvecDescriptionLocalScore = 0, hashDescriptionScore = 0,
                wordvecLinksScore = 0;
        for (int j = left; j < right; j++) {
            SpotMatch sm2 = sml.get(j);
            EntityMatchList entities2 = sm2.getEntities();
            for (EntityMatch em2 : entities2) {
                String id2 = em2.getId();
                EntityPage page2 = em2.getEntity().getPage();
                int counter = 0;
                if (!ambigious.equals("true")) {
                    for (EntityMatch entityMatch : entities2) {
                        if (entityMatch.getId().startsWith("w")) {
                            counter++;
                        }
                    }
                }

                if ((ambigious.equals("true") || counter == 1) && em.getSpot() != em2.getSpot()
                        && !id.equals(id2)) {
                    // Link Similarity calculation starts
                    double linkSim = 0;
                    if (id.startsWith("w") && id2.startsWith("w")) {
                        if (entitySimMap.containsKey("link" + id + id2)) {
                            linkSim = entitySimMap.get("link" + id + id2);
                        } else {
                            HashSet<String> set1 = Sets.newHashSet(page.getLinks().split(" "));
                            HashSet<String> set2 = Sets.newHashSet(page2.getLinks().split(" "));
                            linkSim = JaccardCalculator.calculateSimilarity(set1, set2);
                            entitySimMap.put("link" + id + id2, linkSim);
                        }
                        linkScore += linkSim;
                        // Link Similarity calculation ends
                    }
                    // Entity embedding similarity calculation starts
                    double eeSim = 0;
                    if (id.startsWith("w") && id2.startsWith("w")) {
                        if (entitySimMap.containsKey("ee" + id + id2)) {
                            eeSim = entitySimMap.get("ee" + id + id2);
                        } else {
                            eeSim = EntityEmbeddingSimilarity.getInstance().getSimilarity(page, page2);
                            entitySimMap.put("ee" + id + id2, eeSim);
                        }
                        hashInfoboxScore += eeSim;
                    }
                    double w2veclinksSim = 0;
                    if (id.startsWith("w") && id2.startsWith("w")) {
                        if (entitySimMap.containsKey("wl" + id + id2)) {
                            w2veclinksSim = entitySimMap.get("wl" + id + id2);
                        } else {
                            w2veclinksSim = AveragePooling.getInstance().getSimilarity(page.getWord2vec(),
                                    page2.getWord2vec());
                            entitySimMap.put("wl" + id + id2, w2veclinksSim);
                        }
                        wordvecLinksScore += w2veclinksSim;
                    }

                    // Entity embedding similarity calculation ends

                    // Description word2vec similarity calculation
                    // starts
                    double word2vecSim = 0;

                    if (entitySimMap.containsKey("w2v" + id + id2)) {
                        word2vecSim = entitySimMap.get("w2v" + id + id2);
                    } else {
                        word2vecSim = AveragePooling.getInstance().getSimilarity(page2.getDword2vec(),
                                page.getDword2vec());
                        entitySimMap.put("w2v" + id + id2, word2vecSim);
                    }
                    wordvecDescriptionLocalScore += word2vecSim;
                    // Description word2vec similarity calculation ends

                    // Description autoencoder similarity calculation
                    // starts
                    double autoVecSim = 0;

                    if (entitySimMap.containsKey("a2v" + id + id2)) {
                        autoVecSim = entitySimMap.get("a2v" + id + id2);
                    } else {
                        autoVecSim = AveragePooling.getInstance().getSimilarity(page2.getDautoencoder(),
                                page.getDautoencoder());
                        entitySimMap.put("a2v" + id + id2, autoVecSim);
                    }
                    hashDescriptionScore += autoVecSim;
                    // Description autoencoder similarity calculation
                    // ends

                }
            }
        }
        if (linkScore > maxLinkScore) {
            maxLinkScore = linkScore;
        }
        if (hashInfoboxScore > maxHashInfoboxScore) {
            maxHashInfoboxScore = hashInfoboxScore;
        }
        if (wordvecDescriptionLocalScore > maxwordvecDescriptionLocalScore) {
            maxwordvecDescriptionLocalScore = wordvecDescriptionLocalScore;
        }
        if (hashDescriptionScore > maxHashDescriptionScore) {
            maxHashDescriptionScore = hashDescriptionScore;
        }
        if (wordvecLinksScore > maxWordvecLinksScore) {
            maxWordvecLinksScore = wordvecLinksScore;
        }

        /* windowing algorithms ends */

        double domainScore = 0;
        if (domainMultiset.size() > 0 && maxDomainCount > 1 && domainMultiset.count(domain) > 1) {
            domainScore = (double) domainMultiset.count(domain) / maxDomainCount;
        }
        double typeScore = 0;
        if (typeMultiset.size() > 0 && maxTypeCount > 1 && typeMultiset.count(type) > 1) {
            typeScore = (double) typeMultiset.count(type) / maxTypeCount;
        }
        if (typeBlackList.contains(type)) {
            typeScore /= 10;
        }

        double typeContentScore = 0;
        if (type.length() > 0 && StringUtils.containsIgnoreCase(words.toString(), type)) {
            typeContentScore = 1;
        }

        double typeClassifierScore = TypeClassifier.getInstance().predict(page, page.getTitle(), page.getType(),
                entity.getShingle().getSentence());

        double wordvecDescriptionScore = AveragePooling.getInstance().getSimilarity(documentVector,
                page.getDword2vec());
        if (wordvecDescriptionScore > maxWordvectorAverage) {
            maxWordvectorAverage = wordvecDescriptionScore;
        }

        double suffixScore = 0;

        if (type != null && type.length() > 0) {
            Set<String> suffixes = new HashSet<String>();
            String t = entity.getTitle().toLowerCase(new Locale("tr", "TR"));

            for (int x = 0; x < entities.size(); x++) {
                EntityMatch e2 = entities.get(x);
                if (e2.getId().equals(entity.getId())) {
                    suffixes.add(e2.getMention());
                }
            }
            suffixes.remove(t);
            suffixes.remove(entity.getTitle());
            // String inputTextLower = inputText.toLowerCase(new
            // Locale("tr",
            // "TR"));
            // while (inputTextLower.contains(t)) {
            // int start = inputTextLower.indexOf(t);
            // int end = inputTextLower.indexOf(" ", start + t.length());
            // if (end > start) {
            // String suffix = inputTextLower.substring(start, end);
            // // .replaceAll("\\W", "");
            // if (suffix.contains("'")
            // || (Zemberek.getInstance().hasMorph(suffix)
            // && !suffix.equals(t) && suffix.length() > 4)) {
            // suffixes.add(suffix);
            // }
            // inputTextLower = inputTextLower.substring(end);
            // } else {
            // break;
            // }
            // }
            if (suffixes.size() >= minSuffix) {
                for (String suffix : suffixes) {
                    double sim = gd.calculateSimilarity(suffix, type);
                    suffixScore += sim;
                }
            }
        }

        // String entitySuffix = page.getSuffix();
        // String[] inputSuffix = shingle.getSuffix().split(" ");
        // for (int j = 0; j < inputSuffix.length; j++) {
        // if (entitySuffix.contains(inputSuffix[j])) {
        // suffixScore += 0.25f;
        // }
        // }

        if (suffixScore > maxSuffixScore) {
            maxSuffixScore = suffixScore;
        }
        // if (id.equals("w691538")) {
        // LOGGER.info("");
        // }
        double letterCaseScore = 0;
        int lc = page.getLetterCase();
        if (StringUtils.isAllLowerCase(em.getMention()) && lc == 0 && id.startsWith("t")) {
            letterCaseScore = 1;
        } else if (StringUtils.isAllUpperCase(em.getMention()) && lc == 1 && id.startsWith("w")) {
            letterCaseScore = 1;
        } else if (Character.isUpperCase(em.getMention().charAt(0)) && lc == 2 && id.startsWith("w")) {
            letterCaseScore = 1;
        } else if (StringUtils.isAllLowerCase(em.getMention()) && id.startsWith("t")) {
            letterCaseScore = 1;
        }

        double nameScore = 1 - LevenshteinDistanceCalculator.calculateDistance(page.getTitle(),
                Zemberek.removeAfterSpostrophe(em.getMention()));

        double popularityScore = page.getRank();
        if (id.startsWith("w")) {
            popularityScore = Math.log10(popularityScore + 1);
            if (popularityScore > maxPopularityScore) {
                maxPopularityScore = popularityScore;
            }
        }

        double leskScore = 0, simpleLeskScore = 0;

        String desc = em.getEntity().getPage().getDescription();
        if (desc != null) {
            List<Token> tokens = Zemberek.getInstance().disambiguateFindTokens(desc, false, true);
            for (Token token : tokens) {
                if (inputTokensMultiset.contains(token.getMorphText())
                        && !TurkishNLP.isStopWord(token.getMorphText())) {
                    simpleLeskScore += inputTokensMultiset.count(token.getMorphText());
                }
                if (leskWords.contains(token.getMorphText()) && !TurkishNLP.isStopWord(token.getMorphText())) {
                    leskScore += leskWords.count(token.getMorphText());
                }

            }
            leskScore /= Math.log(tokens.size() + 1);
            simpleLeskScore /= Math.log(tokens.size() + 1);
            if (leskScore > maxLeskScore) {
                maxLeskScore = leskScore;
            }
            if (simpleLeskScore > maxSimpleLeskScore) {
                maxSimpleLeskScore = simpleLeskScore;
            }

            if (!entityScoreMap.containsKey(id)) {
                EntityScores scores = new EntityScores(em, id, popularityScore, nameScore, letterCaseScore,
                        suffixScore, wordvecDescriptionScore, typeContentScore, typeScore, domainScore,
                        hashDescriptionScore, wordvecDescriptionLocalScore, hashInfoboxScore, linkScore,
                        wordvecLinksScore, leskScore, simpleLeskScore, typeClassifierScore);
                entityScoreMap.put(id, scores);
            } else {
                EntityScores entityScores = entityScoreMap.get(id);
                entityScores.setHashInfoboxScore((entityScores.getHashInfoboxScore() + hashInfoboxScore) / 2);
                entityScores.setHashDescriptionScore(
                        (entityScores.getHashInfoboxScore() + hashDescriptionScore) / 2);
                entityScores.setLinkScore((entityScores.getLinkScore() + linkScore) / 2);
                entityScores.setWordvecDescriptionLocalScore(
                        (entityScores.getWordvecDescriptionLocalScore() + wordvecDescriptionLocalScore) / 2);
                entityScores
                        .setWordvecLinksScore((entityScores.getWordvecLinksScore() + wordvecLinksScore) / 2);
                entityScores.setLeskScore((entityScores.getLeskScore() + leskScore) / 2);

            }

        }
    }
    /* normalization and total score calculation starts */
    Set<String> set = new HashSet<String>();
    for (int i = 0; i < entities.size(); i++) {
        EntityMatch em = entities.get(i);
        String id = em.getId();
        EntityScores entityScores = entityScoreMap.get(id);
        if (set.contains(id)) {
            continue;
        }
        if (id.startsWith("w")) {
            if (maxLinkScore > 0 && entityScores.getLinkScore() > 0) {
                entityScores.setLinkScore(entityScores.getLinkScore() / maxLinkScore);
            }
            if (maxHashInfoboxScore > 0 && entityScores.getHashInfoboxScore() > 0) {
                entityScores.setHashInfoboxScore(entityScores.getHashInfoboxScore() / maxHashInfoboxScore);
            }
            if (maxWordvecLinksScore > 0 && entityScores.getWordvecLinksScore() > 0) {
                entityScores.setWordvecLinksScore(entityScores.getWordvecLinksScore() / maxWordvecLinksScore);
            }
            if (maxPopularityScore > 0 && entityScores.getPopularityScore() > 0) {
                entityScores.setPopularityScore(entityScores.getPopularityScore() / maxPopularityScore);
            }
        }
        if (maxwordvecDescriptionLocalScore > 0 && entityScores.getWordvecDescriptionLocalScore() > 0) {
            entityScores.setWordvecDescriptionLocalScore(
                    entityScores.getWordvecDescriptionLocalScore() / maxwordvecDescriptionLocalScore);
        }
        if (maxHashDescriptionScore > 0 && entityScores.getHashDescriptionScore() > 0) {
            entityScores
                    .setHashDescriptionScore(entityScores.getHashDescriptionScore() / maxHashDescriptionScore);
        }
        if (maxWordvectorAverage > 0 && entityScores.getWordvecDescriptionScore() > 0) {
            entityScores.setWordvecDescriptionScore(
                    entityScores.getWordvecDescriptionScore() / maxWordvectorAverage);
        }
        if (maxLeskScore > 0 && entityScores.getLeskScore() > 0) {
            entityScores.setLeskScore(entityScores.getLeskScore() / maxLeskScore);
        }
        if (maxSimpleLeskScore > 0 && entityScores.getSimpleLeskScore() > 0) {
            entityScores.setSimpleLeskScore(entityScores.getSimpleLeskScore() / maxSimpleLeskScore);
        }
        if (maxSuffixScore > 0 && entityScores.getSuffixScore() > 0) {
            entityScores.setSuffixScore(entityScores.getSuffixScore() / maxSuffixScore);
        }
        set.add(id);
    }

    LOGGER.info("\t"
            + "id\tTitle\tURL\tScore\tPopularity\tName\tLesk\tSimpeLesk\tCase\tNoun\tSuffix\tTypeContent\tType\tDomain\twordvecDescription\twordvecDescriptionLocal\thashDescription\thashInfobox\tword2vecLinks\tLink\t\ttypeClassifier\tDescription");
    for (int i = 0; i < entities.size(); i++) {
        EntityMatch em = entities.get(i);
        String id = em.getId();
        EntityScores e = entityScoreMap.get(id);
        double wikiScore = 0;
        if (id.startsWith("w") && Character.isUpperCase(em.getMention().charAt(0))) {
            wikiScore = wikiWeight;
        } else if (id.startsWith("t") && Character.isLowerCase(em.getMention().charAt(0))) {
            wikiScore = wikiWeight;
        }
        // if(id.equals("w508792")){
        // LOGGER.info("");
        // }
        double totalScore = wikiScore + e.getPopularityScore() * popularityWeight
                + e.getNameScore() * nameWeight + e.getLeskScore() * leskWeight
                + e.getSimpleLeskScore() * simpleLeskWeight + e.getLetterCaseScore() * letterCaseWeight
                + e.getSuffixScore() * suffixWeight + e.getTypeContentScore() * typeContentWeight
                + e.getTypeScore() * typeWeight + e.getDomainScore() * domainWeight
                + e.getWordvecDescriptionScore() * wordvecDescriptionWeight
                + e.getWordvecDescriptionLocalScore() * wordvecDescriptionLocalWeight
                + e.getHashDescriptionScore() * hashDescriptionWeight
                + e.getHashInfoboxScore() * hashInfoboxWeight + e.getWordvecLinksScore() * word2vecLinksWeight
                + e.getLinkScore() * linkWeight + e.getTypeClassifierkScore() * typeClassifierkWeight;
        if (ranklib == true) {
            totalScore = RankLib.getInstance().score(e);
        }

        if (em.getEntity().getPage().getUrlTitle().contains("(")) {
            totalScore /= 2;
        }
        em.setScore(totalScore);
        e.setScore(totalScore);

        LOGGER.info("\t" + id + "\t" + em.getEntity().getPage().getTitle() + "\t"
                + em.getEntity().getPage().getUrlTitle() + "\t" + em.getScore() + "\t"
                + e.getPopularityScore() * popularityWeight + "\t" + e.getNameScore() * nameWeight + "\t"
                + e.getLeskScore() * leskWeight + "\t" + e.getSimpleLeskScore() * simpleLeskWeight + "\t"
                + e.getLetterCaseScore() * letterCaseWeight + "\t" + e.getSuffixScore() * suffixWeight + "\t"
                + e.getTypeContentScore() * typeContentWeight + "\t" + e.getTypeScore() * typeWeight + "\t"
                + e.getDomainScore() * domainWeight + "\t"
                + e.getWordvecDescriptionScore() * wordvecDescriptionWeight + "\t"
                + e.getWordvecDescriptionLocalScore() * wordvecDescriptionLocalWeight + "\t"
                + e.getHashDescriptionScore() * hashDescriptionWeight + "\t"
                + e.getHashInfoboxScore() * hashInfoboxWeight + "\t"
                + e.getWordvecLinksScore() * word2vecLinksWeight + "\t" + e.getLinkScore() * linkWeight + "\t"
                + e.getTypeClassifierkScore() * typeClassifierkWeight + "\t"
                + em.getEntity().getPage().getDescription());
    }

    // if (annotateEntities) {
    // annotateEntities(localParams.getParams().get("originalText"), sml);
    // }

    EntityMatchList eml = new EntityMatchList();
    for (SpotMatch match : sml) {
        EntityMatchList list = match.getEntities();
        if (!list.isEmpty()) {
            list.sort();
            eml.add(list.get(0));
            selectedEntities.add(list.get(0).getId());
        }
    }
    return eml;
}

From source file:com.moviejukebox.reader.MovieNFOReader.java

/**
 * Parse the FileInfo section//from   w  w w.j  a v  a2  s  .  c om
 *
 * @param movie
 * @param eFileInfo
 */
private static void parseFileInfo(Movie movie, Element eFileInfo) {
    if (eFileInfo == null) {
        return;
    }

    if (OverrideTools.checkOverwriteContainer(movie, NFO_PLUGIN_ID)) {
        String container = DOMHelper.getValueFromElement(eFileInfo, "container");
        movie.setContainer(container, NFO_PLUGIN_ID);
    }

    Element eStreamDetails = DOMHelper.getElementByName(eFileInfo, "streamdetails");

    if (eStreamDetails == null) {
        return;
    }

    // Video
    NodeList nlStreams = eStreamDetails.getElementsByTagName("video");
    Node nStreams;
    for (int looper = 0; looper < nlStreams.getLength(); looper++) {
        nStreams = nlStreams.item(looper);
        if (nStreams.getNodeType() == Node.ELEMENT_NODE) {
            Element eStreams = (Element) nStreams;

            String temp = DOMHelper.getValueFromElement(eStreams, "codec");
            if (isValidString(temp)) {
                Codec videoCodec = new Codec(CodecType.VIDEO);
                videoCodec.setCodecSource(CodecSource.NFO);
                videoCodec.setCodec(temp);
                movie.addCodec(videoCodec);
            }

            if (OverrideTools.checkOverwriteAspectRatio(movie, NFO_PLUGIN_ID)) {
                temp = DOMHelper.getValueFromElement(eStreams, "aspect");
                movie.setAspectRatio(ASPECT_TOOLS.cleanAspectRatio(temp), NFO_PLUGIN_ID);
            }

            if (OverrideTools.checkOverwriteResolution(movie, NFO_PLUGIN_ID)) {
                movie.setResolution(DOMHelper.getValueFromElement(eStreams, "width"),
                        DOMHelper.getValueFromElement(eStreams, "height"), NFO_PLUGIN_ID);
            }
        }
    } // End of VIDEO

    // Audio
    nlStreams = eStreamDetails.getElementsByTagName("audio");

    for (int looper = 0; looper < nlStreams.getLength(); looper++) {
        nStreams = nlStreams.item(looper);
        if (nStreams.getNodeType() == Node.ELEMENT_NODE) {
            Element eStreams = (Element) nStreams;

            String aCodec = DOMHelper.getValueFromElement(eStreams, "codec").trim();
            String aLanguage = DOMHelper.getValueFromElement(eStreams, "language");
            String aChannels = DOMHelper.getValueFromElement(eStreams, "channels");

            // If the codec is lowercase, covert it to uppercase, otherwise leave it alone
            if (StringUtils.isAllLowerCase(aCodec)) {
                aCodec = aCodec.toUpperCase();
            }

            if (StringTools.isValidString(aLanguage)) {
                aLanguage = MovieFilenameScanner.determineLanguage(aLanguage);
            }

            Codec audioCodec = new Codec(CodecType.AUDIO, aCodec);
            audioCodec.setCodecSource(CodecSource.NFO);
            audioCodec.setCodecLanguage(aLanguage);
            audioCodec.setCodecChannels(aChannels);
            movie.addCodec(audioCodec);
        }
    } // End of AUDIO

    // Update the language
    if (OverrideTools.checkOverwriteLanguage(movie, NFO_PLUGIN_ID)) {
        Set<String> langs = new HashSet<>();
        // Process the languages and remove any duplicates
        for (Codec codec : movie.getCodecs()) {
            if (codec.getCodecType() == CodecType.AUDIO) {
                langs.add(codec.getCodecLanguage());
            }
        }

        // Remove UNKNOWN if it is NOT the only entry
        if (langs.contains(Movie.UNKNOWN) && langs.size() > 1) {
            langs.remove(Movie.UNKNOWN);
        } else if (langs.isEmpty()) {
            // Add the language as UNKNOWN by default.
            langs.add(Movie.UNKNOWN);
        }

        // Build the language string
        StringBuilder movieLanguage = new StringBuilder();
        for (String lang : langs) {
            if (movieLanguage.length() > 0) {
                movieLanguage.append(LANGUAGE_DELIMITER);
            }
            movieLanguage.append(lang);
        }
        movie.setLanguage(movieLanguage.toString(), NFO_PLUGIN_ID);
    }

    // Subtitles
    List<String> subtitles = new ArrayList<>();
    nlStreams = eStreamDetails.getElementsByTagName("subtitle");
    for (int looper = 0; looper < nlStreams.getLength(); looper++) {
        nStreams = nlStreams.item(looper);
        if (nStreams.getNodeType() == Node.ELEMENT_NODE) {
            Element eStreams = (Element) nStreams;
            subtitles.add(DOMHelper.getValueFromElement(eStreams, "language"));
        }
    }
    SubtitleTools.setMovieSubtitles(movie, subtitles);
}

From source file:org.jsweet.input.typescriptdef.util.Util.java

public static String toJavaName(String name, boolean forceLowerCase) {
    if (name == null) {
        return null;
    }//from  www . jav  a  2s.  c om
    if (forceLowerCase) {
        if (!StringUtils.isAllLowerCase(name)) {
            name = name.toLowerCase();
        }
    }
    if (name.contains("-") || name.contains("/")) {
        return name.replace('/', '_').replace('-', '_');
    }
    if (Character.isDigit(name.charAt(0))) {
        return "_" + name;
    }
    if (JSweetDefTranslatorConfig.JAVA_KEYWORDS.contains(name)) {
        if (forceLowerCase) {
            return "_" + name;
        } else {
            return StringUtils.capitalize(name);
        }
    }
    return name;
}

From source file:org.languagetool.rules.de.GermanSpellerRule.java

private boolean ignoreCompoundWithIgnoredWord(String word) throws IOException {
    if (!StringTools.startsWithUppercase(word)
            && !StringUtils.startsWithAny(word, "nord", "west", "ost", "sd")) {
        // otherwise stuff like "rumfangreichen" gets accepted
        return false;
    }//from   w w  w  .  j  ava  2s  . c  om
    String[] words = word.split("-");
    if (words.length < 2) {
        // non-hyphenated compound (e.g., "Feynmandiagramm"):
        // only search for compounds that start(!) with a word from spelling.txt
        int end = super.startsWithIgnoredWord(word, true);
        if (end < 3) {
            // support for geographical adjectives - although "sd/ost/west/nord" are not in spelling.txt 
            // to accept sentences such as
            // "Der westperuanische Ferienort, das ostargentinische Stdtchen, das sdukrainische Brauchtum, der nordgyptische Staudamm."
            if (word.startsWith("ost") || word.startsWith("sd")) {
                end = 3;
            } else if (word.startsWith("west") || word.startsWith("nord")) {
                end = 4;
            } else {
                return false;
            }
        }
        String ignoredWord = word.substring(0, end);
        String partialWord = word.substring(end);
        boolean isCandidateForNonHyphenatedCompound = !StringUtils.isAllUpperCase(ignoredWord)
                && (StringUtils.isAllLowerCase(partialWord) || ignoredWord.endsWith("-"));
        boolean needFugenS = isNeedingFugenS(ignoredWord);
        if (isCandidateForNonHyphenatedCompound && !needFugenS && partialWord.length() > 2) {
            return !hunspellDict.misspelled(partialWord)
                    || !hunspellDict.misspelled(StringUtils.capitalize(partialWord));
        } else if (isCandidateForNonHyphenatedCompound && needFugenS && partialWord.length() > 2) {
            partialWord = partialWord.startsWith("s") ? partialWord.substring(1) : partialWord;
            return !hunspellDict.misspelled(partialWord)
                    || !hunspellDict.misspelled(StringUtils.capitalize(partialWord));
        }
        return false;
    }
    // hyphenated compound (e.g., "Feynman-Diagramm"):
    boolean hasIgnoredWord = false;
    List<String> toSpellCheck = new ArrayList<>(3);
    String stripFirst = word.substring(words[0].length() + 1); // everything after the first "-"
    String stripLast = word.substring(0, word.length() - words[words.length - 1].length() - 1); // everything up to the last "-"

    if (super.ignoreWord(stripFirst) || wordsToBeIgnoredInCompounds.contains(stripFirst)) { // e.g., "Senioren-Au-pair"
        hasIgnoredWord = true;
        if (!super.ignoreWord(words[0])) {
            toSpellCheck.add(words[0]);
        }
    } else if (super.ignoreWord(stripLast) || wordsToBeIgnoredInCompounds.contains(stripLast)) { // e.g., "Au-pair-Agentur"
        hasIgnoredWord = true;
        if (!super.ignoreWord(words[words.length - 1])) {
            toSpellCheck.add(words[words.length - 1]);
        }
    } else {
        for (String word1 : words) {
            if (super.ignoreWord(word1) || wordsToBeIgnoredInCompounds.contains(word1)) {
                hasIgnoredWord = true;
            } else {
                toSpellCheck.add(word1);
            }
        }
    }

    if (hasIgnoredWord) {
        for (String w : toSpellCheck) {
            if (hunspellDict.misspelled(w)) {
                return false;
            }
        }
    }
    return hasIgnoredWord;
}

From source file:org.lockss.util.NumberUtil.java

/**
 * Construct a sequence of Roman numerals. If both start and end are lower
 * case, the resulting sequence is lower case.
 * @param start a String representing a Roman numeral
 * @param end a String representing a Roman numeral
 * @param delta the magnitude of the increment or decrement, expressed as a positive integer
 * @return/*from w  w w  .ja v a2s  . c  o m*/
 * @throws NumberFormatException if the start or end argument cannot be parsed as a Roman numeral
 */
public static List<String> constructRomanSequence(String start, String end, int delta)
        throws NumberFormatException {
    List<String> seq = new ArrayList<String>();
    // Try and maintain the case - if it is mixed we use the default upper
    // case as we cannot decide what is appropriate
    boolean lower = StringUtils.isAllLowerCase(start) && StringUtils.isAllLowerCase(end);
    int s = parseRomanNumber(start);
    int e = parseRomanNumber(end);
    // Construct an int sequence
    int[] intSeq = constructSequence(s, e, delta);
    // Convert the numbers back to appropriately-cased Roman, and add to result list
    for (int i : intSeq) {
        String rn = toRomanNumber(i);
        seq.add(lower ? StringUtils.lowerCase(rn) : rn);
    }
    return seq;
}

From source file:org.n52.oss.Identifiers.java

@Test
public void identifiersAreGenerated() {
    IdentifierGenerator gen = new ShortAlphanumericIdentifierGenerator();
    String id0 = gen.generate();//from   w  ww  . j a  v  a 2s .  c  om
    System.out.println("Generated id: " + id0);
    String id1 = gen.generate();
    System.out.println("Generated id: " + id1);
    String id2 = gen.generate();
    System.out.println("Generated id: " + id2);

    assertThat(id0, not(equalTo(id1)));
    assertThat(id0, not(equalTo(id2)));
    assertThat(id1, not(equalTo(id2)));

    assertTrue(StringUtils.isAlphanumeric(id0));
    assertTrue(StringUtils.isAlphanumeric(id1));
    assertTrue(StringUtils.isAlphanumeric(id2));

    assertTrue(StringUtils.isAllLowerCase(id0.replaceAll("[\\d.]", "")));
    assertTrue(StringUtils.isAllLowerCase(id1.replaceAll("[\\d.]", "")));
    assertTrue(StringUtils.isAllLowerCase(id2.replaceAll("[\\d.]", "")));
}