Example usage for org.apache.commons.lang StringUtils substringsBetween

List of usage examples for org.apache.commons.lang StringUtils substringsBetween

Introduction

In this page you can find the example usage for org.apache.commons.lang StringUtils substringsBetween.

Prototype

public static String[] substringsBetween(String str, String open, String close) 

Source Link

Document

Searches a String for substrings delimited by a start and end tag, returning all matching substrings in an array.

Usage

From source file:adalid.commons.util.StrUtils.java

public static String[] getParametros(String string) {
    return StringUtils.substringsBetween(string, "{", "}");
}

From source file:opennlp.tools.doc_classifier.DocClassifierTrainingSetMultilingualExtender.java

public List<String> extractEntriesFromSpecial_Export(String filename) {
    List<String> filteredEntries = new ArrayList<String>();
    String content = null;//from   w w w. ja v  a 2  s. c o  m
    try {
        content = FileUtils.readFileToString(new File(filename));
    } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }
    String[] entries = StringUtils.substringsBetween(content, "[[", "]]");
    for (String e : entries) {
        if (e.startsWith("Kategorie") || e.startsWith("Category") || e.startsWith("d:") || e.startsWith("User")
                || e.startsWith("Portal"))
            continue;
        if (e.indexOf(':') > -1)
            continue;

        if (e.indexOf(":") > -1)
            continue;
        int endofEntry = e.indexOf('|');
        if (endofEntry > -1)
            e = e.substring(0, endofEntry);
        filteredEntries.add(e);
    }

    filteredEntries = new ArrayList<String>(new HashSet<String>(filteredEntries));
    return filteredEntries;
}

From source file:opennlp.tools.parse_thicket.kernel_interface.style_classif.TSNE_ImporterProcessor.java

public void importFileCreatClassifDirs() {
    Map<Integer, String> id_Text = new HashMap<Integer, String>();
    Map<Integer, String> id_Label = new HashMap<Integer, String>();

    try {//from w w w .j ava 2s .  c  o  m
        FileUtils.cleanDirectory(new File(resourceWorkDir + "/txt"));
    } catch (IOException e2) {
        e2.printStackTrace();
    }

    String text = null;
    try {
        text = FileUtils.readFileToString(new File(resourceWorkDir + importFilePath),
                Charset.defaultCharset().toString());
    } catch (IOException e) {

        e.printStackTrace();
    }

    String[] portions = StringUtils.substringsBetween(text, "<text ", "/text>");
    for (int i = 0; i < portions.length; i++) {
        String label = StringUtils.substringBetween(portions[i], "id=\"", "\">");
        String po = StringUtils.substringBetween(portions[i], "\">", "<");
        id_Text.put(i, po);
        id_Label.put(i, label);
        if (true) {
            String localDirName = label.substring(0, 4);
            if (!new File(resourceWorkDir + "txt/" + localDirName).exists())
                try {
                    FileUtils.forceMkdir(new File(resourceWorkDir + "txt/" + localDirName));
                } catch (IOException e1) {
                    e1.printStackTrace();
                }
            try {
                label = label.replace('/', '_');
                String fullPath = resourceWorkDir + "txt/" + localDirName + "/" + i + label + ".txt";
                FileUtils.writeStringToFile(new File(fullPath), po);
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }

}

From source file:opennlp.tools.similarity.apps.GoogleAutoCompleteQueryRunner.java

public List<String> getAutoCompleteExpression(String rawExpr) {
    // insert spaces into camel cases
    rawExpr = rawExpr.replaceAll("([a-z][a-z])([A-Z][a-z])", "$1 $2");
    String query = rawExpr.replace(' ', '+');
    try {/* w  w w.  j a  v a2  s.c  om*/
        query = URLEncoder.encode(query, "UTF-8");
    } catch (UnsupportedEncodingException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }

    String pageOrigHTML = pageFetcher.fetchOrigHTML(searchRequest + query + suffix);
    String[] results = StringUtils.substringsBetween(pageOrigHTML, "<CompleteSuggestion>",
            "</CompleteSuggestion>");
    List<List<String>> accum = new ArrayList<List<String>>();
    if (results == null)
        return null;
    for (String wrapped : results) {
        List<String> accumCase = new ArrayList<String>();
        String[] words = null;
        try {
            words = StringUtils.substringBetween(wrapped, "\"").split(" ");
        } catch (Exception e) {

        }
        if (words == null || words.length < 1)
            continue;
        accumCase = Arrays.asList(words);
        accum.add(accumCase);
    }

    //TODO make more noise-resistant algo
    if (accum.size() > 1) {
        List<String> first = new ArrayList<String>(accum.get(0));
        List<String> second = new ArrayList<String>(accum.get(1));

        first.retainAll(second);
        if (first.size() > 0)
            return first;
        else
            return accum.get(0);
    }

    if (accum.size() == 1)
        return accum.get(0);

    return null;
}

From source file:opennlp.tools.similarity.apps.StoryDiscourseNavigator.java

private String[] obtainKeywordsForAnEntityFromWikipedia(String entity) {
    yrunner.setKey("xdnRVcVf9m4vDvW1SkTAz5kS5DFYa19CrPYGelGJxnc");
    List<HitBase> resultList = yrunner.runSearch(entity, 20);
    HitBase h = null;/*from w  w w .  jav  a 2  s .c  o  m*/
    for (int i = 0; i < resultList.size(); i++) {
        h = resultList.get(i);
        if (h.getUrl().indexOf("wikipedia.") > -1)
            break;
    }
    String content = pFetcher.fetchOrigHTML(h.getUrl());
    content = content.replace("\"><a href=\"#", "&_&_&_&");
    String[] portions = StringUtils.substringsBetween(content, "&_&_&_&", "\"><span");
    List<String> results = new ArrayList<String>();
    for (int i = 0; i < portions.length; i++) {
        if (portions[i].indexOf("cite_note") > -1)
            continue;
        results.add(entity + " " + portions[i].replace('_', ' ').replace('.', ' '));
    }
    return results.toArray(new String[0]);
}

From source file:opennlp.tools.similarity.apps.utils.Utils.java

public static String removeHTMLTagsFromStr(String inputStr) {
    String[] removeTags = StringUtils.substringsBetween(inputStr, "<", ">");

    if (removeTags != null && removeTags.length > 0) {
        for (String tag : removeTags) {
            inputStr = StringUtils.remove(inputStr, "<" + tag + ">");
        }//  w  w  w. java 2 s .  c om
    }

    return inputStr;
}

From source file:org.apache.forrest.conf.AntProperties.java

public synchronized Object put(Object name, Object value) {
    //if the property is already there don't overwrite, as in Ant
    //properties defined first take precedence
    if (!super.containsKey(name)) {
        String[] names = StringUtils.substringsBetween(value.toString(), "${", "}");
        if (names != null) {
            for (int i = 0; i < names.length; i++) {
                String currentName = names[i];
                String valueToSearchFor = "${" + currentName + "}";
                String valueToReplaceWith = (String) super.get(currentName);
                value = StringUtils.replace(value.toString(), valueToSearchFor, valueToReplaceWith);
            }/*from   w w  w.  jav a 2  s  .co m*/
        }
        return super.put(name, value);
    }

    return null;
}

From source file:org.apache.nutch.crawl.SeedGenerator.java

public static void main(String[] args) throws Exception {
    String urlFormat = "http://oumen.com/detail.php?atid={{{1000,4460}}}";
    String[] urlParts = urlFormat.split("\\{\\{\\{\\d+\\,\\d+\\}\\}\\}");
    String[] placeholders = StringUtils.substringsBetween(urlFormat, "{{{", "}}}");

    ArrayList<ArrayList<Integer>> ranges = Lists.newArrayList();
    for (int i = 0; i < placeholders.length; ++i) {
        int min = Integer.parseInt(StringUtils.substringBefore(placeholders[i], ","));
        int max = Integer.parseInt(StringUtils.substringAfter(placeholders[i], ","));

        ranges.add(Lists.newArrayList(min, max));
    }//from   w ww . jav  a 2s  . com

    // we can support only one placeholder right now

    StringBuilder content = new StringBuilder();
    for (int i = ranges.get(0).get(0); i <= ranges.get(0).get(1); ++i) {
        String url = urlParts[0] + i;
        if (urlParts.length > 1) {
            url += urlParts[1];
        }

        content.append(url);
        content.append("\n");
    }

    String tidyDomain = NetUtil.getTopLevelDomain(urlFormat);
    String file = StringUtils.substringBefore(tidyDomain, ".").toLowerCase().replaceAll("[^a-z]", "_");

    file = "/tmp/" + file + ".txt";
    FileUtils.writeStringToFile(new File(file), content.toString(), "utf-8");

    System.out.println("url seed results are saved in : " + file);
}

From source file:org.apache.uima.alchemy.utils.Alchemy2TypeSystemMapper.java

public static void mapAnnotatedEntities(AnnotatedResults results, JCas aJCas) {
    setLanaguage(results, aJCas);/*  w w w .  j ava  2  s.c om*/
    String annotatedText = results.getAnnotatedText();

    // find strings of pattern 'TYPE[TEXT'
    String[] ants = StringUtils.substringsBetween(annotatedText, "[", "]");

    // map the ants to UIMA CAS
    for (String ant : ants) {
        if (ant.indexOf("[") > 0) {
            AlchemyAnnotation alchemyAnnotation = new AlchemyAnnotation(aJCas);

            int indexOfAnt = annotatedText.indexOf(ant);
            alchemyAnnotation.setBegin(indexOfAnt - 1);

            String antText = ant.substring(ant.indexOf("[") + 1);
            alchemyAnnotation.setEnd(indexOfAnt + antText.length() - 1);

            String antType = ant.substring(0, ant.indexOf("["));
            alchemyAnnotation.setAlchemyType(antType);
            alchemyAnnotation.addToIndexes();

            annotatedText = annotatedText.replaceFirst("\\[" + ant.replace("[", "\\[") + "\\]\\]", antText);
        }
    }

}

From source file:org.b3log.solo.processor.SkinRenderer.java

/**
 * Processes the specified FreeMarker template with the specified request, data model, pjax hacking.
 *
 * @param request   the specified request
 * @param dataModel the specified data model
 * @param template  the specified FreeMarker template
 * @return generated HTML//from  w  w  w . j  av a 2s . co m
 * @throws Exception exception
 */
@Override
protected String genHTML(final HttpServletRequest request, final Map<String, Object> dataModel,
        final Template template) throws Exception {
    final boolean isPJAX = isPJAX(request);
    dataModel.put("pjax", isPJAX);

    if (!isPJAX) {
        return super.genHTML(request, dataModel, template);
    }

    final StringWriter stringWriter = new StringWriter();
    template.setOutputEncoding("UTF-8");
    template.process(dataModel, stringWriter);
    final long endTimeMillis = System.currentTimeMillis();
    final String dateString = DateFormatUtils.format(endTimeMillis, "yyyy/MM/dd HH:mm:ss");
    final long startTimeMillis = (Long) request.getAttribute(Keys.HttpRequest.START_TIME_MILLIS);
    final String latke = String.format(
            "\n<!-- Generated by Latke (https://github.com/b3log/latke) in %1$dms, %2$s -->",
            endTimeMillis - startTimeMillis, dateString);
    final String pjaxContainer = request.getHeader("X-PJAX-Container");

    final String html = stringWriter.toString();
    final String[] containers = StringUtils.substringsBetween(html,
            "<!---- pjax {" + pjaxContainer + "} start ---->", "<!---- pjax {" + pjaxContainer + "} end ---->");
    if (null == containers) {
        return html + latke;
    }

    return String.join("", containers) + latke;
}