Example usage for org.apache.commons.lang StringUtils substringBetween

Introduction

In this page you can find the example usage for org.apache.commons.lang StringUtils substringBetween.

Prototype

public static String substringBetween(String str, String open, String close)

Source Link

Document

Gets the String that is nested in between two Strings.

Usage

From source file:opennlp.tools.parse_thicket.external_rst.RstNode.java

public RstNode(String line) {
    if (StringUtils.trim(line).startsWith(")"))
        return;/*from   w w  w.  j  av  a2  s  . c  o  m*/

    level = line.indexOf("(");
    line = line.substring(line.indexOf("(") + 2);

    isNucleus = line.substring(0, line.indexOf("(")).indexOf("Nucleus") > -1;
    line = line.substring(line.indexOf("(") + 1);
    if (line.startsWith("span")) {
        line = line.substring(5);
        try {
            span = new Pair<Integer, Integer>();
            String[] spanStr = line.substring(0, line.indexOf(")")).split(" ");
            span.setFirst(Integer.parseInt(spanStr[0]));
            span.setSecond(Integer.parseInt(spanStr[1]));
        } catch (Exception e) {
            e.printStackTrace();
        }

    } else if (line.startsWith("leaf")) {
        try {
            String leafStr = line.substring(5, line.indexOf(")"));
            leaf = Integer.parseInt(leafStr);
        } catch (Exception e) {
            e.printStackTrace();
        }

    } else
        System.err.println("Problem parsing RST results: '" + line);

    line = line.substring(line.indexOf("rel2par") + 8);
    rel2par = line.substring(0, line.indexOf(")")).trim();

    text = StringUtils.substringBetween(line, "_!", "_!)");

}

From source file:opennlp.tools.parse_thicket.kernel_interface.style_classif.TSNE_ImporterProcessor.java

public void importFileCreatClassifDirs() {
    Map<Integer, String> id_Text = new HashMap<Integer, String>();
    Map<Integer, String> id_Label = new HashMap<Integer, String>();

    try {//from   ww  w.  ja v a 2 s  .com
        FileUtils.cleanDirectory(new File(resourceWorkDir + "/txt"));
    } catch (IOException e2) {
        e2.printStackTrace();
    }

    String text = null;
    try {
        text = FileUtils.readFileToString(new File(resourceWorkDir + importFilePath),
                Charset.defaultCharset().toString());
    } catch (IOException e) {

        e.printStackTrace();
    }

    String[] portions = StringUtils.substringsBetween(text, "<text ", "/text>");
    for (int i = 0; i < portions.length; i++) {
        String label = StringUtils.substringBetween(portions[i], "id=\"", "\">");
        String po = StringUtils.substringBetween(portions[i], "\">", "<");
        id_Text.put(i, po);
        id_Label.put(i, label);
        if (true) {
            String localDirName = label.substring(0, 4);
            if (!new File(resourceWorkDir + "txt/" + localDirName).exists())
                try {
                    FileUtils.forceMkdir(new File(resourceWorkDir + "txt/" + localDirName));
                } catch (IOException e1) {
                    e1.printStackTrace();
                }
            try {
                label = label.replace('/', '_');
                String fullPath = resourceWorkDir + "txt/" + localDirName + "/" + i + label + ".txt";
                FileUtils.writeStringToFile(new File(fullPath), po);
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }

}

From source file:opennlp.tools.parse_thicket.opinion_processor.YouTubeMiner.java

public YouTubeMinerResult getData(String url) {
    YouTubeMinerResult result = new YouTubeMinerResult();
    String content = fetcher.fetchOrigHTML(url);
    try {// w  w w. ja  v  a  2  s. co  m
        FileUtils.writeStringToFile(new File(url.replace(':', '_').replace('/', '_')), content);
    } catch (IOException e1) {
        // TODO Auto-generated catch block
        e1.printStackTrace();
    }
    if (url.indexOf("channel") > -1) {
        try { // subscriber-count" title="30"
            String subscribersStr = StringUtils.substringBetween(content, "subscriber-count", "tabindex");
            String dirtyNumber = StringUtils.substringBetween(subscribersStr, "title=\"", "\"");
            String cleanNumber = dirtyNumber.replaceAll("[^\\x00-\\x7F]", "");
            if (cleanNumber != null) {
                int subscribers = Integer.parseInt(cleanNumber);
                result.subscribers = subscribers;
            } else {
                System.err.println("Not found data for 'subscriber-count', 'tabindex'");
            }
        } catch (NumberFormatException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
    } else {
        try {

            String subscribersStr = StringUtils.substringBetween(content, "subscriber-count", "tabindex");
            String dirtyNumber = StringUtils.substringBetween(subscribersStr, "title=\"", "\"").replace(" ",
                    "");
            if (dirtyNumber != null) {
                int subscribers = Integer.parseInt(dirtyNumber);
                result.subscribers = subscribers;
            } else {
                System.err.println("Not found data for 'subscriber-count', 'tabindex'");
            }

            String viewsStrDirty = StringUtils.substringBetween(content,
                    // "div class=\"watch-view-count\">"," views</div>");
                    // view-count">12 ?</div>
                    "view-count", "<div>");
            String viewsStr = StringUtils.substringBetween(viewsStrDirty, ">", " ");
            if (viewsStr != null) {
                int views = Integer.parseInt(viewsStr);
                result.views = views;
            } else {
                System.err.println("Not found data for 'view-count','<div>'");
            }
        } catch (NumberFormatException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
    }

    return result;
}

From source file:opennlp.tools.similarity.apps.solr.IterativeQueryComponent.java

private ResponseBuilder substituteField(ResponseBuilder rb, String newFieldName) {
    SolrParams params = rb.req.getParams();
    String query = params.get("q");
    String currField = StringUtils.substringBetween(" " + query, " ", ":");
    if (currField != null && newFieldName != null)
        query = query.replace(currField, newFieldName);
    NamedList values = params.toNamedList();
    values.remove("q");
    values.add("q", query);
    params = SolrParams.toSolrParams(values);
    rb.req.setParams(params);/*  ww  w.j a va  2 s . c o  m*/
    rb.setQueryString(query);

    String defType = params.get(QueryParsing.DEFTYPE, QParserPlugin.DEFAULT_QTYPE);

    // get it from the response builder to give a different component a chance
    // to set it.
    String queryString = rb.getQueryString();
    if (queryString == null) {
        // this is the normal way it's set.
        queryString = params.get(CommonParams.Q);
        rb.setQueryString(queryString);
    }

    QParser parser = null;
    try {
        parser = QParser.getParser(rb.getQueryString(), defType, rb.req);
    } catch (Exception e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }
    Query q = null;
    try {
        q = parser.getQuery();
    } catch (Exception e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }
    if (q == null) {
        // normalize a null query to a query that matches nothing
        q = new BooleanQuery();
    }
    rb.setQuery(q);
    try {
        rb.setSortSpec(parser.getSort(true));
    } catch (Exception e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }
    rb.setQparser(parser);
    /*   try {
          rb.setScoreDoc(parser.getPaging());
       } catch (Exception e) {
          // TODO Auto-generated catch block
          e.printStackTrace();
       }
    */
    String[] fqs = rb.req.getParams().getParams(CommonParams.FQ);
    if (fqs != null && fqs.length != 0) {
        List<Query> filters = rb.getFilters();
        if (filters == null) {
            filters = new ArrayList<Query>(fqs.length);
        }
        for (String fq : fqs) {
            if (fq != null && fq.trim().length() != 0) {
                QParser fqp = null;
                try {
                    fqp = QParser.getParser(fq, null, rb.req);
                } catch (Exception e) {
                    // TODO Auto-generated catch block
                    e.printStackTrace();
                }
                try {
                    filters.add(fqp.getQuery());
                } catch (Exception e) {
                    // TODO Auto-generated catch block
                    e.printStackTrace();
                }
            }
        }
        // only set the filters if they are not empty otherwise
        // fq=&someotherParam= will trigger all docs filter for every request 
        // if filter cache is disabled
        if (!filters.isEmpty()) {
            rb.setFilters(filters);
        }
    }

    return rb;
}

From source file:opennlp.tools.similarity.apps.solr.IterativeSearchRequestHandler.java

public static SolrQueryRequest substituteField(SolrQueryRequest req, String newFieldName) {
    SolrParams params = req.getParams();
    String query = params.get("q");
    String currField = StringUtils.substringBetween(" " + query, " ", ":");
    if (currField != null && newFieldName != null)
        query = query.replace(currField, newFieldName);
    NamedList values = params.toNamedList();
    values.remove("q");
    values.add("q", query);
    params = SolrParams.toSolrParams(values);
    req.setParams(params);//  ww w.j  av a 2  s .  co m
    return req;

}

From source file:opennlp.tools.similarity.apps.solr.IterativeSearchRequestHandler.java

public DocList filterResultsBySyntMatchReduceDocSet(DocList docList, SolrQueryRequest req, SolrParams params) {
    //if (!docList.hasScores()) 
    //   return docList;

    int len = docList.size();
    if (len < 1) // do nothing
        return docList;
    ParserChunker2MatcherProcessor pos = ParserChunker2MatcherProcessor.getInstance();

    DocIterator iter = docList.iterator();
    float[] syntMatchScoreArr = new float[len];
    String requestExpression = req.getParamString();
    String[] exprParts = requestExpression.split("&");
    for (String part : exprParts) {
        if (part.startsWith("q="))
            requestExpression = part;//from   w w w . java  2s .com
    }
    String fieldNameQuery = StringUtils.substringBetween(requestExpression, "=", ":");
    // extract phrase query (in double-quotes)
    String[] queryParts = requestExpression.split("\"");
    if (queryParts.length >= 2 && queryParts[1].length() > 5)
        requestExpression = queryParts[1].replace('+', ' ');
    else if (requestExpression.indexOf(":") > -1) {// still field-based expression
        requestExpression = requestExpression.replaceAll(fieldNameQuery + ":", "").replace('+', ' ')
                .replaceAll("  ", " ").replace("q=", "");
    }

    if (fieldNameQuery == null)
        return docList;
    if (requestExpression == null || requestExpression.length() < 5 || requestExpression.split(" ").length < 3)
        return docList;
    int[] docIDsHits = new int[len];

    IndexReader indexReader = req.getSearcher().getIndexReader();
    List<Integer> bestMatchesDocIds = new ArrayList<Integer>();
    List<Float> bestMatchesScore = new ArrayList<Float>();
    List<Pair<Integer, Float>> docIdsScores = new ArrayList<Pair<Integer, Float>>();
    try {
        for (int i = 0; i < docList.size(); ++i) {
            int docId = iter.nextDoc();
            docIDsHits[i] = docId;
            Document doc = indexReader.document(docId);

            // get text for event
            String answerText = doc.get(fieldNameQuery);
            if (answerText == null)
                continue;
            SentencePairMatchResult matchResult = pos.assessRelevance(requestExpression, answerText);
            float syntMatchScore = new Double(
                    parseTreeChunkListScorer.getParseTreeChunkListScore(matchResult.getMatchResult()))
                            .floatValue();
            bestMatchesDocIds.add(docId);
            bestMatchesScore.add(syntMatchScore);
            syntMatchScoreArr[i] = (float) syntMatchScore; //*iter.score();
            System.out.println(" Matched query = '" + requestExpression + "' with answer = '" + answerText
                    + "' | doc_id = '" + docId);
            System.out.println(" Match result = '" + matchResult.getMatchResult() + "' with score = '"
                    + syntMatchScore + "';");
            docIdsScores.add(new Pair(docId, syntMatchScore));
        }

    } catch (CorruptIndexException e1) {
        // TODO Auto-generated catch block
        e1.printStackTrace();
        //log.severe("Corrupt index"+e1);
    } catch (IOException e1) {
        // TODO Auto-generated catch block
        e1.printStackTrace();
        //log.severe("File read IO / index"+e1);
    }

    Collections.sort(docIdsScores, new PairComparable());
    for (int i = 0; i < docIdsScores.size(); i++) {
        bestMatchesDocIds.set(i, docIdsScores.get(i).getFirst());
        bestMatchesScore.set(i, docIdsScores.get(i).getSecond());
    }
    System.out.println(bestMatchesScore);
    float maxScore = docList.maxScore(); // do not change
    int limit = docIdsScores.size();
    int start = 0;
    DocSlice ds = null;

    ds = new DocSlice(start, limit, ArrayUtils.toPrimitive(bestMatchesDocIds.toArray(new Integer[0])),
            ArrayUtils.toPrimitive(bestMatchesScore.toArray(new Float[0])), bestMatchesDocIds.size(), maxScore);

    return ds;
}

From source file:opennlp.tools.similarity.apps.utils.Utils.java

public static String addDomainToUrl(String siteUrl, String url) {
    if (StringUtils.isEmpty(url)) {
        return null; // should we return siteUrl here ??
    }/* www  .j  a  v a2s  . c o m*/
    if (!url.startsWith("http://")) {
        String domain = StringUtils.substringBetween(siteUrl, "http://", "/");
        if (domain == null) {
            url = siteUrl + (url.startsWith("/") ? "" : "/") + url;
        } else {
            if (!url.startsWith("/")) {
                int lastIndex = StringUtils.lastIndexOf(siteUrl, "/");
                url = siteUrl.substring(0, lastIndex) + "/" + url;
            } else {
                url = "http://" + domain + url;
            }
        }
    }
    return url;
}

From source file:opennlp.tools.similarity.apps.WebSearchEngineResultsScraper.java

private static List<String> extractURLsFromPage(String content, String domain) {
    List<String> results = new ArrayList<String>();
    if (content == null)
        return results;
    content = StringUtils.substringBetween(content, ">Advanced</a></div>", "<input type=\"text\" value=");
    if (content == null)
        return results;
    String[] urls = content.split("<cite>");
    if (urls == null)
        return results;
    for (String u : urls) {
        int endPos = u.indexOf("</cite>");

        if (endPos > 0) {
            u = u.substring(0, endPos).replace("</strong>", "").replace("<strong>", "");
            if (!u.equals(domain))
                results.add(u);/* w w w  .j  a v a  2s  .  c o m*/
        }
    }

    return results;
}

From source file:opennlp.tools.similarity.apps.WebSearchEngineResultsScraper.java

private static List<HitBase> extractSearchResultFromPage(String content) {
    List<HitBase> results = new ArrayList<HitBase>();
    if (content == null)
        return results;
    content = StringUtils.substringBetween(content, "<div id=\"results", "class=\"pagination");
    if (content == null)
        return results;
    String[] srchResArea = content.split("</p>");
    if (srchResArea == null)
        return results;
    for (String u : srchResArea) {
        try {//from ww w . j  av  a2s. co  m
            u = u.substring(5);
            HitBase hit = new HitBase();
            String url = StringUtils.substringBetween(u, "class=\"url", "</span>");
            if (url != null)
                url = url.substring(2);
            String title = StringUtils.substringBetween(u, "\">", "</a><br />");
            title = title.substring(title.indexOf("\">") + 2);
            String abstr = StringUtils.substringBetween(u, "\"body\">", "</span><br /");
            hit.setUrl(url);
            hit.setAbstractText(abstr);
            hit.setTitle(title);
            results.add(hit);
        } catch (Exception e) {
            //problem parsing SERP page; source - specific problem so we swallow exceptions here
        }
    }

    return results;
}

From source file:org.alfresco.repo.security.authentication.ResetPasswordServiceImplTest.java

public static Pair<String, String> getWorkflowIdAndKeyFromUrl(String url) {
    //url example: http://localhost:8081/share/page/reset-password?key=164e37bf-2590-414e-94db-8b8cfe5be790&id=activiti$156
    assertNotNull(url);//  w  ww. j  a  v a 2  s  . c o  m

    String id = StringUtils.trimToNull(StringUtils.substringAfter(url, "id="));
    String key = StringUtils.substringBetween(url, "key=", "&id=");

    Pair<String, String> pair = new Pair<>(id, key);
    return pair;
}