List of usage examples for org.jsoup.nodes TextNode getWholeText
public String getWholeText()
From source file:Main.java
private static void appendNormalisedText(StringBuilder accum, TextNode textNode) { String text = textNode.getWholeText(); if (preserveWhitespace(textNode.parentNode())) accum.append(text);/*from ww w . jav a 2s . co m*/ else StringUtil.appendNormalisedWhitespace(accum, text, lastCharIsWhitespace(accum)); }
From source file:by.heap.remark.convert.TextCleaner.java
private String getTextNodeText(TextNode tn, boolean normalText) { String input = normalText ? tn.text() : tn.getWholeText(); Node prev = tn.previousSibling(); Node next = tn.nextSibling(); boolean parentIsBlock = isBlock(tn.parent()); if (isBlock(prev)) { input = ltrim(input);/*from w w w. j a v a 2 s .c o m*/ } else if (prev == null && parentIsBlock) { input = ltrim(input); } else if (normalText && prev instanceof TextNode) { TextNode tprev = (TextNode) prev; if (EMPTY_MATCHER.matcher(tprev.text()).matches()) { input = ltrim(input); } } if (input.length() > 0) { if (isBlock(next)) { input = rtrim(input); } else if (next == null && parentIsBlock) { input = rtrim(input); } else if (normalText && next instanceof TextNode) { TextNode tnext = (TextNode) next; if (EMPTY_MATCHER.matcher(tnext.text()).matches()) { input = rtrim(input); } } } return input; }
From source file:jodtemplate.pptx.style.HtmlStylizer.java
private Element createTextElement(final List<org.jsoup.nodes.Element> tags, final Element arPr, final TextNode textNode, final Slide slide) { final Element ar = new Element(PPTXDocument.R_ELEMENT, getDrawingmlNamespace()); final Element formattedArPr = applyFormatting(tags, arPr, slide); if (formattedArPr.hasAttributes() || formattedArPr.getContentSize() != 0) { ar.addContent(formattedArPr);/*from w w w. ja v a2 s . c o m*/ } final Element at = new Element(PPTXDocument.T_ELEMENT, getDrawingmlNamespace()); at.setText(textNode.getWholeText()); ar.addContent(at); return ar; }
From source file:mml.handler.post.MMLPostHTMLHandler.java
/** * Parse a paragraph. These may be "p" or "hN" elements, often with classes * @param p the paragraph/heading element from the document fragment * @param defaultName the default name for the property *//*w w w . ja va 2s. c o m*/ private void parsePara(Element p, String defaultName) throws JSONException { List<Node> children = p.childNodes(); String name = p.attr("class"); if (name == null || name.length() == 0) name = defaultName; if (isLineFormat(name) || prevWasMilestone) ensure(1, false); else ensure(2, true); int offset = sb.length(); Range r = new Range(name, offset, 0); stil.add(r); for (Node child : children) { if (child instanceof Element) { String nName = child.nodeName().toLowerCase(); if (nName.equals("span")) parseSpan((Element) child); else parseOtherElement((Element) child); } else if (child instanceof TextNode) { TextNode tn = (TextNode) child; sb.append(tn.getWholeText()); } } if (isLineFormat(name)) ensure(1, true); else ensure(2, true); this.stil.updateLen(r, sb.length() - offset); prevWasMilestone = false; }
From source file:de.geeksfactory.opacclient.apis.Bibliotheca.java
@Override public List<SearchField> getSearchFields() throws IOException, JSONException { if (!initialised) { start();//from w ww . j a v a 2 s . c om } List<SearchField> fields = new ArrayList<>(); // Read branches and media types List<NameValuePair> nameValuePairs = new ArrayList<>(2); nameValuePairs.add(new BasicNameValuePair("link_profis.x", "0")); nameValuePairs.add(new BasicNameValuePair("link_profis.y", "1")); String html = httpPost(opac_url + "/index.asp", new UrlEncodedFormEntity(nameValuePairs), getDefaultEncoding()); Document doc = Jsoup.parse(html); Elements fieldElems = doc.select(".suchfeldinhalt"); for (Element fieldElem : fieldElems) { String name = fieldElem.select(".suchfeld_inhalt_titel label").text(); String hint = ""; if (fieldElem.select(".suchfeld_inhalt_input").size() > 0) { List<TextNode> textNodes = fieldElem.select(".suchfeld_inhalt_input").first().textNodes(); if (textNodes.size() > 0) { for (TextNode node : textNodes) { String text = node.getWholeText().replace("\n", ""); if (!text.equals("")) { hint = node.getWholeText().replace("\n", ""); break; } } } } Elements inputs = fieldElem .select(".suchfeld_inhalt_input input[type=text], " + ".suchfeld_inhalt_input select"); if (inputs.size() == 1) { fields.add(createSearchField(name, hint, inputs.get(0))); } else if (inputs.size() == 2 && inputs.select("input[type=text]").size() == 2) { // Two text fields, e.g. year from/to or two keywords fields.add(createSearchField(name, hint, inputs.get(0))); TextSearchField secondField = (TextSearchField) createSearchField(name, hint, inputs.get(1)); secondField.setHalfWidth(true); fields.add(secondField); } else if (inputs.size() == 2 && inputs.get(0).tagName().equals("select") && inputs.get(1).tagName().equals("input") && inputs.get(0).attr("name").equals("feld1")) { // A dropdown to select from different search field types. // Break it down into single text fields. for (Element option : inputs.get(0).select("option")) { TextSearchField field = new TextSearchField(); field.setHint(hint); field.setDisplayName(option.text()); field.setId(inputs.get(1).attr("name") + "$" + option.attr("value")); JSONObject data = new JSONObject(); JSONObject params = new JSONObject(); params.put(inputs.get(0).attr("name"), option.attr("value")); data.put("additional_params", params); field.setData(data); fields.add(field); } } } DropdownSearchField orderField = new DropdownSearchField("orderselect", stringProvider.getString(StringProvider.ORDER), false, null); orderField.addDropdownValue("1", stringProvider.getString(StringProvider.ORDER_DEFAULT)); orderField.addDropdownValue("2:desc", stringProvider.getString(StringProvider.ORDER_YEAR_DESC)); orderField.addDropdownValue("2:asc", stringProvider.getString(StringProvider.ORDER_YEAR_ASC)); orderField.addDropdownValue("3:desc", stringProvider.getString(StringProvider.ORDER_CATEGORY_DESC)); orderField.addDropdownValue("3:asc", stringProvider.getString(StringProvider.ORDER_CATEGORY_ASC)); orderField.setMeaning(Meaning.ORDER); fields.add(orderField); return fields; }
From source file:org.apache.eagle.security.hive.jobrunning.HiveJobFetchSpout.java
private boolean fetchFinishedConfig(AppInfo appInfo, List<MRJob> mrJobs) { InputStream is = null;/*from ww w. ja v a2s .co m*/ for (MRJob mrJob : mrJobs) { String urlString = crawlConfig.endPointConfig.HSBasePath + "jobhistory/conf/" + mrJob.getId() + "?" + Constants.ANONYMOUS_PARAMETER; try { LOG.info("fetch job conf from {}", urlString); is = InputStreamUtils.getInputStream(urlString, null, Constants.CompressionType.NONE); final org.jsoup.nodes.Document doc = Jsoup.parse(is, "UTF-8", urlString); doc.outputSettings().prettyPrint(false); org.jsoup.select.Elements elements = doc.select("table[id=conf]").select("tbody").select("tr"); Map<String, String> hiveQueryLog = new HashMap<>(); Iterator<org.jsoup.nodes.Element> iter = elements.iterator(); while (iter.hasNext()) { org.jsoup.nodes.Element element = iter.next(); org.jsoup.select.Elements tds = element.children(); String key = tds.get(0).text(); String value = ""; org.jsoup.nodes.Element valueElement = tds.get(1); if (Constants.HIVE_QUERY_STRING.equals(key)) { for (org.jsoup.nodes.Node child : valueElement.childNodes()) { if (child instanceof TextNode) { TextNode valueTextNode = (TextNode) child; value = valueTextNode.getWholeText(); value = StringUtils.strip(value); } } } else { value = valueElement.text(); } hiveQueryLog.put(key, value); } if (hiveQueryLog.containsKey(Constants.HIVE_QUERY_STRING)) { collector.emit(new ValuesArray(appInfo.getUser(), mrJob.getId(), Constants.ResourceType.JOB_CONFIGURATION, hiveQueryLog), mrJob.getId()); } } catch (Exception e) { LOG.warn("fetch job conf from {} failed, {}", urlString, e); e.printStackTrace(); return false; } finally { Utils.closeInputStream(is); } } return true; }
From source file:org.apache.james.jmap.utils.JsoupHtmlTextExtractor.java
private String convertNodeToText(HTMLNode htmlNode) { Node node = htmlNode.underlyingNode; if (node instanceof TextNode) { TextNode textNode = (TextNode) node; return textNode.getWholeText(); }//from w w w . ja v a 2s.c o m if (node instanceof Element) { Element element = (Element) node; if (element.tagName().equals(BR_TAG)) { return "\n"; } if (isList(element)) { return convertListElement(htmlNode.listNestedLevel); } if (element.tagName().equals(OL_TAG)) { return "\n\n"; } if (element.tagName().equals(LI_TAG)) { return "\n" + StringUtils.repeat(" ", htmlNode.listNestedLevel) + "- "; } if (element.tagName().equals(P_TAG)) { return "\n\n"; } if (element.tagName().equals(IMG_TAG)) { return generateImageAlternativeText(element); } } return ""; }
From source file:org.b3log.symphony.util.Markdowns.java
/** * Converts the specified markdown text to HTML. * * @param markdownText the specified markdown text * @return converted HTML, returns an empty string "" if the specified markdown text is "" or {@code null}, returns * 'markdownErrorLabel' if exception/*from www .ja va 2 s.c o m*/ */ public static String toHTML(final String markdownText) { if (Strings.isEmptyOrNull(markdownText)) { return ""; } final String cachedHTML = getHTML(markdownText); if (null != cachedHTML) { return cachedHTML; } final ExecutorService pool = Executors.newSingleThreadExecutor(); final long[] threadId = new long[1]; final Callable<String> call = () -> { threadId[0] = Thread.currentThread().getId(); String html = LANG_PROPS_SERVICE.get("contentRenderFailedLabel"); if (MARKED_AVAILABLE) { html = toHtmlByMarked(markdownText); if (!StringUtils.startsWith(html, "<p>")) { html = "<p>" + html + "</p>"; } } else { com.vladsch.flexmark.ast.Node document = PARSER.parse(markdownText); html = RENDERER.render(document); if (!StringUtils.startsWith(html, "<p>")) { html = "<p>" + html + "</p>"; } } final Document doc = Jsoup.parse(html); final List<org.jsoup.nodes.Node> toRemove = new ArrayList<>(); doc.traverse(new NodeVisitor() { @Override public void head(final org.jsoup.nodes.Node node, int depth) { if (node instanceof org.jsoup.nodes.TextNode) { final org.jsoup.nodes.TextNode textNode = (org.jsoup.nodes.TextNode) node; final org.jsoup.nodes.Node parent = textNode.parent(); if (parent instanceof Element) { final Element parentElem = (Element) parent; if (!parentElem.tagName().equals("code")) { String text = textNode.getWholeText(); boolean nextIsBr = false; final org.jsoup.nodes.Node nextSibling = textNode.nextSibling(); if (nextSibling instanceof Element) { nextIsBr = "br".equalsIgnoreCase(((Element) nextSibling).tagName()); } if (null != userQueryService) { try { final Set<String> userNames = userQueryService.getUserNames(text); for (final String userName : userNames) { text = text.replace('@' + userName + (nextIsBr ? "" : " "), "@<a href='" + Latkes.getServePath() + "/member/" + userName + "'>" + userName + "</a> "); } text = text.replace("@participants ", "@<a href='https://hacpai.com/article/1458053458339' class='ft-red'>participants</a> "); } finally { JdbcRepository.dispose(); } } if (text.contains("@<a href=")) { final List<org.jsoup.nodes.Node> nodes = Parser.parseFragment(text, parentElem, ""); final int index = textNode.siblingIndex(); parentElem.insertChildren(index, nodes); toRemove.add(node); } else { textNode.text(Pangu.spacingText(text)); } } } } } @Override public void tail(org.jsoup.nodes.Node node, int depth) { } }); toRemove.forEach(node -> node.remove()); doc.select("pre>code").addClass("hljs"); doc.select("a").forEach(a -> { String src = a.attr("href"); if (!StringUtils.startsWithIgnoreCase(src, Latkes.getServePath())) { try { src = URLEncoder.encode(src, "UTF-8"); } catch (final Exception e) { } a.attr("href", Latkes.getServePath() + "/forward?goto=" + src); a.attr("target", "_blank"); } }); doc.outputSettings().prettyPrint(false); String ret = doc.select("body").html(); ret = StringUtils.trim(ret); // cache it putHTML(markdownText, ret); return ret; }; Stopwatchs.start("Md to HTML"); try { final Future<String> future = pool.submit(call); return future.get(MD_TIMEOUT, TimeUnit.MILLISECONDS); } catch (final TimeoutException e) { LOGGER.log(Level.ERROR, "Markdown timeout [md=" + markdownText + "]"); Callstacks.printCallstack(Level.ERROR, new String[] { "org.b3log" }, null); final Set<Thread> threads = Thread.getAllStackTraces().keySet(); for (final Thread thread : threads) { if (thread.getId() == threadId[0]) { thread.stop(); break; } } } catch (final Exception e) { LOGGER.log(Level.ERROR, "Markdown failed [md=" + markdownText + "]", e); } finally { pool.shutdownNow(); Stopwatchs.end(); } return LANG_PROPS_SERVICE.get("contentRenderFailedLabel"); }
From source file:org.dswarm.xmlenhancer.XMLEnhancer.java
private static void unescapeEntity(final PrintWriter out, final Node node) { node.ownerDocument().outputSettings().escapeMode(Entities.EscapeMode.xhtml) .syntax(Document.OutputSettings.Syntax.xml).prettyPrint(false); if (node instanceof TextNode) { final TextNode textNode = (TextNode) node; final String wholeText = textNode.getWholeText(); out.print(wholeText);/* www. j a v a 2 s. c o m*/ return; } final String nodeString = node.toString(); final String unescapedNodeString = Parser.unescapeEntities(nodeString, true); out.print(unescapedNodeString); }
From source file:org.dswarm.xmlenhancer.XMLEnhancer.java
private static void enhanceTextNode(final Node node) { final TextNode textNode = (TextNode) node; final String wholeText = textNode.getWholeText(); final String text = node.toString(); if (text.trim().isEmpty()) { return;// ww w .j av a2s.c o m } if (wholeText.startsWith(START_CDATA)) { // do not add CDATA multiple times return; } final String alignedText = alignTextWithWholeText(wholeText, text); final String unescapeEntities = String.format("%s%s%s", START_CDATA, alignedText, END_CDATA); textNode.text(unescapeEntities); }