List of usage examples for org.apache.commons.lang StringUtils substringBetween
public static String substringBetween(String str, String open, String close)
Gets the String that is nested in between two Strings.
From source file:opennlp.tools.parse_thicket.external_rst.RstNode.java
public RstNode(String line) { if (StringUtils.trim(line).startsWith(")")) return;/*from w w w. j av a2 s . c o m*/ level = line.indexOf("("); line = line.substring(line.indexOf("(") + 2); isNucleus = line.substring(0, line.indexOf("(")).indexOf("Nucleus") > -1; line = line.substring(line.indexOf("(") + 1); if (line.startsWith("span")) { line = line.substring(5); try { span = new Pair<Integer, Integer>(); String[] spanStr = line.substring(0, line.indexOf(")")).split(" "); span.setFirst(Integer.parseInt(spanStr[0])); span.setSecond(Integer.parseInt(spanStr[1])); } catch (Exception e) { e.printStackTrace(); } } else if (line.startsWith("leaf")) { try { String leafStr = line.substring(5, line.indexOf(")")); leaf = Integer.parseInt(leafStr); } catch (Exception e) { e.printStackTrace(); } } else System.err.println("Problem parsing RST results: '" + line); line = line.substring(line.indexOf("rel2par") + 8); rel2par = line.substring(0, line.indexOf(")")).trim(); text = StringUtils.substringBetween(line, "_!", "_!)"); }
From source file:opennlp.tools.parse_thicket.kernel_interface.style_classif.TSNE_ImporterProcessor.java
public void importFileCreatClassifDirs() { Map<Integer, String> id_Text = new HashMap<Integer, String>(); Map<Integer, String> id_Label = new HashMap<Integer, String>(); try {//from ww w. ja v a 2 s .com FileUtils.cleanDirectory(new File(resourceWorkDir + "/txt")); } catch (IOException e2) { e2.printStackTrace(); } String text = null; try { text = FileUtils.readFileToString(new File(resourceWorkDir + importFilePath), Charset.defaultCharset().toString()); } catch (IOException e) { e.printStackTrace(); } String[] portions = StringUtils.substringsBetween(text, "<text ", "/text>"); for (int i = 0; i < portions.length; i++) { String label = StringUtils.substringBetween(portions[i], "id=\"", "\">"); String po = StringUtils.substringBetween(portions[i], "\">", "<"); id_Text.put(i, po); id_Label.put(i, label); if (true) { String localDirName = label.substring(0, 4); if (!new File(resourceWorkDir + "txt/" + localDirName).exists()) try { FileUtils.forceMkdir(new File(resourceWorkDir + "txt/" + localDirName)); } catch (IOException e1) { e1.printStackTrace(); } try { label = label.replace('/', '_'); String fullPath = resourceWorkDir + "txt/" + localDirName + "/" + i + label + ".txt"; FileUtils.writeStringToFile(new File(fullPath), po); } catch (IOException e) { e.printStackTrace(); } } } }
From source file:opennlp.tools.parse_thicket.opinion_processor.YouTubeMiner.java
public YouTubeMinerResult getData(String url) { YouTubeMinerResult result = new YouTubeMinerResult(); String content = fetcher.fetchOrigHTML(url); try {// w w w. ja v a 2 s. co m FileUtils.writeStringToFile(new File(url.replace(':', '_').replace('/', '_')), content); } catch (IOException e1) { // TODO Auto-generated catch block e1.printStackTrace(); } if (url.indexOf("channel") > -1) { try { // subscriber-count" title="30" String subscribersStr = StringUtils.substringBetween(content, "subscriber-count", "tabindex"); String dirtyNumber = StringUtils.substringBetween(subscribersStr, "title=\"", "\""); String cleanNumber = dirtyNumber.replaceAll("[^\\x00-\\x7F]", ""); if (cleanNumber != null) { int subscribers = Integer.parseInt(cleanNumber); result.subscribers = subscribers; } else { System.err.println("Not found data for 'subscriber-count', 'tabindex'"); } } catch (NumberFormatException e) { // TODO Auto-generated catch block e.printStackTrace(); } } else { try { String subscribersStr = StringUtils.substringBetween(content, "subscriber-count", "tabindex"); String dirtyNumber = StringUtils.substringBetween(subscribersStr, "title=\"", "\"").replace(" ", ""); if (dirtyNumber != null) { int subscribers = Integer.parseInt(dirtyNumber); result.subscribers = subscribers; } else { System.err.println("Not found data for 'subscriber-count', 'tabindex'"); } String viewsStrDirty = StringUtils.substringBetween(content, // "div class=\"watch-view-count\">"," views</div>"); // view-count">12 ?</div> "view-count", "<div>"); String viewsStr = StringUtils.substringBetween(viewsStrDirty, ">", " "); if (viewsStr != null) { int views = Integer.parseInt(viewsStr); result.views = views; } else { System.err.println("Not found data for 'view-count','<div>'"); } } catch (NumberFormatException e) { // TODO Auto-generated catch block e.printStackTrace(); } } return result; }
From source file:opennlp.tools.similarity.apps.solr.IterativeQueryComponent.java
private ResponseBuilder substituteField(ResponseBuilder rb, String newFieldName) { SolrParams params = rb.req.getParams(); String query = params.get("q"); String currField = StringUtils.substringBetween(" " + query, " ", ":"); if (currField != null && newFieldName != null) query = query.replace(currField, newFieldName); NamedList values = params.toNamedList(); values.remove("q"); values.add("q", query); params = SolrParams.toSolrParams(values); rb.req.setParams(params);/* ww w.j a va 2 s . c o m*/ rb.setQueryString(query); String defType = params.get(QueryParsing.DEFTYPE, QParserPlugin.DEFAULT_QTYPE); // get it from the response builder to give a different component a chance // to set it. String queryString = rb.getQueryString(); if (queryString == null) { // this is the normal way it's set. queryString = params.get(CommonParams.Q); rb.setQueryString(queryString); } QParser parser = null; try { parser = QParser.getParser(rb.getQueryString(), defType, rb.req); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } Query q = null; try { q = parser.getQuery(); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } if (q == null) { // normalize a null query to a query that matches nothing q = new BooleanQuery(); } rb.setQuery(q); try { rb.setSortSpec(parser.getSort(true)); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } rb.setQparser(parser); /* try { rb.setScoreDoc(parser.getPaging()); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } */ String[] fqs = rb.req.getParams().getParams(CommonParams.FQ); if (fqs != null && fqs.length != 0) { List<Query> filters = rb.getFilters(); if (filters == null) { filters = new ArrayList<Query>(fqs.length); } for (String fq : fqs) { if (fq != null && fq.trim().length() != 0) { QParser fqp = null; try { fqp = QParser.getParser(fq, null, rb.req); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } try { filters.add(fqp.getQuery()); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } } } // only set the filters if they are not empty otherwise // fq=&someotherParam= will trigger all docs filter for every request // if filter cache is disabled if (!filters.isEmpty()) { rb.setFilters(filters); } } return rb; }
From source file:opennlp.tools.similarity.apps.solr.IterativeSearchRequestHandler.java
public static SolrQueryRequest substituteField(SolrQueryRequest req, String newFieldName) { SolrParams params = req.getParams(); String query = params.get("q"); String currField = StringUtils.substringBetween(" " + query, " ", ":"); if (currField != null && newFieldName != null) query = query.replace(currField, newFieldName); NamedList values = params.toNamedList(); values.remove("q"); values.add("q", query); params = SolrParams.toSolrParams(values); req.setParams(params);// ww w.j av a 2 s . co m return req; }
From source file:opennlp.tools.similarity.apps.solr.IterativeSearchRequestHandler.java
public DocList filterResultsBySyntMatchReduceDocSet(DocList docList, SolrQueryRequest req, SolrParams params) { //if (!docList.hasScores()) // return docList; int len = docList.size(); if (len < 1) // do nothing return docList; ParserChunker2MatcherProcessor pos = ParserChunker2MatcherProcessor.getInstance(); DocIterator iter = docList.iterator(); float[] syntMatchScoreArr = new float[len]; String requestExpression = req.getParamString(); String[] exprParts = requestExpression.split("&"); for (String part : exprParts) { if (part.startsWith("q=")) requestExpression = part;//from w w w . java 2s .com } String fieldNameQuery = StringUtils.substringBetween(requestExpression, "=", ":"); // extract phrase query (in double-quotes) String[] queryParts = requestExpression.split("\""); if (queryParts.length >= 2 && queryParts[1].length() > 5) requestExpression = queryParts[1].replace('+', ' '); else if (requestExpression.indexOf(":") > -1) {// still field-based expression requestExpression = requestExpression.replaceAll(fieldNameQuery + ":", "").replace('+', ' ') .replaceAll(" ", " ").replace("q=", ""); } if (fieldNameQuery == null) return docList; if (requestExpression == null || requestExpression.length() < 5 || requestExpression.split(" ").length < 3) return docList; int[] docIDsHits = new int[len]; IndexReader indexReader = req.getSearcher().getIndexReader(); List<Integer> bestMatchesDocIds = new ArrayList<Integer>(); List<Float> bestMatchesScore = new ArrayList<Float>(); List<Pair<Integer, Float>> docIdsScores = new ArrayList<Pair<Integer, Float>>(); try { for (int i = 0; i < docList.size(); ++i) { int docId = iter.nextDoc(); docIDsHits[i] = docId; Document doc = indexReader.document(docId); // get text for event String answerText = doc.get(fieldNameQuery); if (answerText == null) continue; SentencePairMatchResult matchResult = pos.assessRelevance(requestExpression, answerText); float syntMatchScore = new Double( parseTreeChunkListScorer.getParseTreeChunkListScore(matchResult.getMatchResult())) .floatValue(); bestMatchesDocIds.add(docId); bestMatchesScore.add(syntMatchScore); syntMatchScoreArr[i] = (float) syntMatchScore; //*iter.score(); System.out.println(" Matched query = '" + requestExpression + "' with answer = '" + answerText + "' | doc_id = '" + docId); System.out.println(" Match result = '" + matchResult.getMatchResult() + "' with score = '" + syntMatchScore + "';"); docIdsScores.add(new Pair(docId, syntMatchScore)); } } catch (CorruptIndexException e1) { // TODO Auto-generated catch block e1.printStackTrace(); //log.severe("Corrupt index"+e1); } catch (IOException e1) { // TODO Auto-generated catch block e1.printStackTrace(); //log.severe("File read IO / index"+e1); } Collections.sort(docIdsScores, new PairComparable()); for (int i = 0; i < docIdsScores.size(); i++) { bestMatchesDocIds.set(i, docIdsScores.get(i).getFirst()); bestMatchesScore.set(i, docIdsScores.get(i).getSecond()); } System.out.println(bestMatchesScore); float maxScore = docList.maxScore(); // do not change int limit = docIdsScores.size(); int start = 0; DocSlice ds = null; ds = new DocSlice(start, limit, ArrayUtils.toPrimitive(bestMatchesDocIds.toArray(new Integer[0])), ArrayUtils.toPrimitive(bestMatchesScore.toArray(new Float[0])), bestMatchesDocIds.size(), maxScore); return ds; }
From source file:opennlp.tools.similarity.apps.utils.Utils.java
public static String addDomainToUrl(String siteUrl, String url) { if (StringUtils.isEmpty(url)) { return null; // should we return siteUrl here ?? }/* www .j a v a2s . c o m*/ if (!url.startsWith("http://")) { String domain = StringUtils.substringBetween(siteUrl, "http://", "/"); if (domain == null) { url = siteUrl + (url.startsWith("/") ? "" : "/") + url; } else { if (!url.startsWith("/")) { int lastIndex = StringUtils.lastIndexOf(siteUrl, "/"); url = siteUrl.substring(0, lastIndex) + "/" + url; } else { url = "http://" + domain + url; } } } return url; }
From source file:opennlp.tools.similarity.apps.WebSearchEngineResultsScraper.java
private static List<String> extractURLsFromPage(String content, String domain) { List<String> results = new ArrayList<String>(); if (content == null) return results; content = StringUtils.substringBetween(content, ">Advanced</a></div>", "<input type=\"text\" value="); if (content == null) return results; String[] urls = content.split("<cite>"); if (urls == null) return results; for (String u : urls) { int endPos = u.indexOf("</cite>"); if (endPos > 0) { u = u.substring(0, endPos).replace("</strong>", "").replace("<strong>", ""); if (!u.equals(domain)) results.add(u);/* w w w .j a v a 2s . c o m*/ } } return results; }
From source file:opennlp.tools.similarity.apps.WebSearchEngineResultsScraper.java
private static List<HitBase> extractSearchResultFromPage(String content) { List<HitBase> results = new ArrayList<HitBase>(); if (content == null) return results; content = StringUtils.substringBetween(content, "<div id=\"results", "class=\"pagination"); if (content == null) return results; String[] srchResArea = content.split("</p>"); if (srchResArea == null) return results; for (String u : srchResArea) { try {//from ww w . j av a2s. co m u = u.substring(5); HitBase hit = new HitBase(); String url = StringUtils.substringBetween(u, "class=\"url", "</span>"); if (url != null) url = url.substring(2); String title = StringUtils.substringBetween(u, "\">", "</a><br />"); title = title.substring(title.indexOf("\">") + 2); String abstr = StringUtils.substringBetween(u, "\"body\">", "</span><br /"); hit.setUrl(url); hit.setAbstractText(abstr); hit.setTitle(title); results.add(hit); } catch (Exception e) { //problem parsing SERP page; source - specific problem so we swallow exceptions here } } return results; }
From source file:org.alfresco.repo.security.authentication.ResetPasswordServiceImplTest.java
public static Pair<String, String> getWorkflowIdAndKeyFromUrl(String url) { //url example: http://localhost:8081/share/page/reset-password?key=164e37bf-2590-414e-94db-8b8cfe5be790&id=activiti$156 assertNotNull(url);// w ww. j a v a 2 s . c o m String id = StringUtils.trimToNull(StringUtils.substringAfter(url, "id=")); String key = StringUtils.substringBetween(url, "key=", "&id="); Pair<String, String> pair = new Pair<>(id, key); return pair; }