com.screenslicer.core.util.StringUtil.java Source code

Java tutorial

Introduction

Here is the source code for com.screenslicer.core.util.StringUtil.java

Source

/* 
 * ScreenSlicer (TM) -- automatic, zero-config web scraping (TM)
 * Copyright (C) 2013-2015 Machine Publishers, LLC
 * ops@machinepublishers.com | screenslicer.com | machinepublishers.com
 * Cincinnati, Ohio, USA
 *
 * You can redistribute this program and/or modify it under the terms of the
 * GNU Affero General Public License version 3 as published by the Free
 * Software Foundation. Additional permissions or commercial licensing may be
 * available--see LICENSE file or contact Machine Publishers, LLC for details.
 * 
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 * FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License version 3
 * for more details.
 * 
 * You should have received a copy of the GNU Affero General Public License
 * version 3 along with this program. If not, see <http://www.gnu.org/licenses/>.
 * 
 * For general details about how to investigate and report license violations,
 * please see: https://www.gnu.org/licenses/gpl-violation.html
 * and email the author: ops@machinepublishers.com
 * Keep in mind that paying customers have more rights than the AGPL alone offers.
 */
package com.screenslicer.core.util;

import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;

import org.apache.commons.lang3.StringUtils;
import org.apache.commons.math3.stat.descriptive.DescriptiveStatistics;

public class StringUtil {
    private static final Map<String, Integer> distCache = new HashMap<String, Integer>();
    private static final int MAX_DIST_CACHE = 4000;

    public static int diff(Collection<String> collectionA, Collection<String> collectionB) {
        Collection<String> composite = new HashSet<String>();
        composite.addAll(collectionA);
        composite.addAll(collectionB);
        int diff = 0;
        for (String string : composite) {
            if (!collectionA.contains(string) || !collectionB.contains(string)) {
                diff++;
            }
        }
        return diff;
    }

    public static boolean contains(List<String> list, List<List<String>> listOfLists) {
        for (List<String> cur : listOfLists) {
            if (isSame(list, cur)) {
                return true;
            }
        }
        return false;
    }

    public static int dist(String str1, String str2) {
        if (distCache.size() > MAX_DIST_CACHE) {
            distCache.clear();
        }
        String cacheKey = str1 + "<<>>" + str2;
        if (distCache.containsKey(cacheKey)) {
            return distCache.get(cacheKey);
        }
        int dist = StringUtils.getLevenshteinDistance(str1, str2);
        distCache.put(cacheKey, dist);
        return dist;
    }

    public static void trimLargeItems(int[] stringLengths, List<? extends Object> originals) {
        DescriptiveStatistics stats = new DescriptiveStatistics();
        for (int i = 0; i < stringLengths.length; i++) {
            stats.addValue(stringLengths[i]);
        }
        double stdDev = stats.getStandardDeviation();
        double mean = stats.getMean();
        List<Object> toRemove = new ArrayList<Object>();
        for (int i = 0; i < stringLengths.length; i++) {
            double diff = stringLengths[i] - mean;
            if (diff / stdDev > 4d) {
                toRemove.add(originals.get(i));
            }
        }
        for (Object obj : toRemove) {
            originals.remove(obj);
        }
    }

    public static boolean isSame(List<String> lhs, List<String> rhs) {
        if (lhs == null && rhs == null) {
            return true;
        }
        if (lhs == null) {
            return false;
        }
        if (rhs == null) {
            return false;
        }
        if (lhs.size() != rhs.size()) {
            return false;
        }
        for (int i = 0; i < lhs.size(); i++) {
            if (!lhs.get(i).equals(rhs.get(i))) {
                return false;
            }
        }
        return true;
    }

    public static List<String> join(List<String> list1, List<String> list2) {
        List<String> list = new ArrayList<String>();
        for (String cur : list1) {
            if (!list.contains(cur)) {
                list.add(cur);
            }
        }
        for (String cur : list2) {
            if (!list.contains(cur)) {
                list.add(cur);
            }
        }
        return list;
    }
}