Example usage for org.jsoup.nodes TextNode getWholeText

List of usage examples for org.jsoup.nodes TextNode getWholeText

Introduction

In this page you can find the example usage for org.jsoup.nodes TextNode getWholeText.

Prototype

public String getWholeText() 

Source Link

Document

Get the (unencoded) text of this text node, including any newlines and spaces present in the original.

Usage

From source file:Main.java

private static void appendNormalisedText(StringBuilder accum, TextNode textNode) {
    String text = textNode.getWholeText();

    if (preserveWhitespace(textNode.parentNode()))
        accum.append(text);/*from  ww w  . jav  a  2s . co m*/
    else
        StringUtil.appendNormalisedWhitespace(accum, text, lastCharIsWhitespace(accum));
}

From source file:by.heap.remark.convert.TextCleaner.java

private String getTextNodeText(TextNode tn, boolean normalText) {
    String input = normalText ? tn.text() : tn.getWholeText();
    Node prev = tn.previousSibling();
    Node next = tn.nextSibling();
    boolean parentIsBlock = isBlock(tn.parent());
    if (isBlock(prev)) {
        input = ltrim(input);/*from  w w  w. j a  v  a  2  s .c o m*/
    } else if (prev == null && parentIsBlock) {
        input = ltrim(input);
    } else if (normalText && prev instanceof TextNode) {
        TextNode tprev = (TextNode) prev;
        if (EMPTY_MATCHER.matcher(tprev.text()).matches()) {
            input = ltrim(input);
        }
    }
    if (input.length() > 0) {
        if (isBlock(next)) {
            input = rtrim(input);
        } else if (next == null && parentIsBlock) {
            input = rtrim(input);
        } else if (normalText && next instanceof TextNode) {
            TextNode tnext = (TextNode) next;
            if (EMPTY_MATCHER.matcher(tnext.text()).matches()) {
                input = rtrim(input);
            }
        }
    }
    return input;
}

From source file:jodtemplate.pptx.style.HtmlStylizer.java

private Element createTextElement(final List<org.jsoup.nodes.Element> tags, final Element arPr,
        final TextNode textNode, final Slide slide) {
    final Element ar = new Element(PPTXDocument.R_ELEMENT, getDrawingmlNamespace());
    final Element formattedArPr = applyFormatting(tags, arPr, slide);
    if (formattedArPr.hasAttributes() || formattedArPr.getContentSize() != 0) {
        ar.addContent(formattedArPr);/*from   w w w. ja  v  a2 s  .  c o m*/
    }
    final Element at = new Element(PPTXDocument.T_ELEMENT, getDrawingmlNamespace());
    at.setText(textNode.getWholeText());
    ar.addContent(at);
    return ar;
}

From source file:mml.handler.post.MMLPostHTMLHandler.java

/**
 * Parse a paragraph. These may be "p" or "hN" elements, often with classes
 * @param p the paragraph/heading element from the document fragment
 * @param defaultName the default name for the property
 *//*w w w .  ja va 2s. c  o m*/
private void parsePara(Element p, String defaultName) throws JSONException {
    List<Node> children = p.childNodes();
    String name = p.attr("class");
    if (name == null || name.length() == 0)
        name = defaultName;
    if (isLineFormat(name) || prevWasMilestone)
        ensure(1, false);
    else
        ensure(2, true);
    int offset = sb.length();
    Range r = new Range(name, offset, 0);
    stil.add(r);
    for (Node child : children) {
        if (child instanceof Element) {
            String nName = child.nodeName().toLowerCase();
            if (nName.equals("span"))
                parseSpan((Element) child);
            else
                parseOtherElement((Element) child);
        } else if (child instanceof TextNode) {
            TextNode tn = (TextNode) child;
            sb.append(tn.getWholeText());
        }
    }
    if (isLineFormat(name))
        ensure(1, true);
    else
        ensure(2, true);
    this.stil.updateLen(r, sb.length() - offset);
    prevWasMilestone = false;
}

From source file:de.geeksfactory.opacclient.apis.Bibliotheca.java

@Override
public List<SearchField> getSearchFields() throws IOException, JSONException {
    if (!initialised) {
        start();//from w ww  .  j a v  a  2 s  . c  om
    }

    List<SearchField> fields = new ArrayList<>();
    // Read branches and media types
    List<NameValuePair> nameValuePairs = new ArrayList<>(2);
    nameValuePairs.add(new BasicNameValuePair("link_profis.x", "0"));
    nameValuePairs.add(new BasicNameValuePair("link_profis.y", "1"));
    String html = httpPost(opac_url + "/index.asp", new UrlEncodedFormEntity(nameValuePairs),
            getDefaultEncoding());
    Document doc = Jsoup.parse(html);

    Elements fieldElems = doc.select(".suchfeldinhalt");
    for (Element fieldElem : fieldElems) {
        String name = fieldElem.select(".suchfeld_inhalt_titel label").text();
        String hint = "";
        if (fieldElem.select(".suchfeld_inhalt_input").size() > 0) {
            List<TextNode> textNodes = fieldElem.select(".suchfeld_inhalt_input").first().textNodes();
            if (textNodes.size() > 0) {
                for (TextNode node : textNodes) {
                    String text = node.getWholeText().replace("\n", "");
                    if (!text.equals("")) {
                        hint = node.getWholeText().replace("\n", "");
                        break;
                    }
                }
            }
        }

        Elements inputs = fieldElem
                .select(".suchfeld_inhalt_input input[type=text], " + ".suchfeld_inhalt_input select");
        if (inputs.size() == 1) {
            fields.add(createSearchField(name, hint, inputs.get(0)));
        } else if (inputs.size() == 2 && inputs.select("input[type=text]").size() == 2) {
            // Two text fields, e.g. year from/to or two keywords
            fields.add(createSearchField(name, hint, inputs.get(0)));
            TextSearchField secondField = (TextSearchField) createSearchField(name, hint, inputs.get(1));
            secondField.setHalfWidth(true);
            fields.add(secondField);
        } else if (inputs.size() == 2 && inputs.get(0).tagName().equals("select")
                && inputs.get(1).tagName().equals("input") && inputs.get(0).attr("name").equals("feld1")) {
            // A dropdown to select from different search field types.
            // Break it down into single text fields.
            for (Element option : inputs.get(0).select("option")) {
                TextSearchField field = new TextSearchField();
                field.setHint(hint);
                field.setDisplayName(option.text());
                field.setId(inputs.get(1).attr("name") + "$" + option.attr("value"));

                JSONObject data = new JSONObject();
                JSONObject params = new JSONObject();
                params.put(inputs.get(0).attr("name"), option.attr("value"));
                data.put("additional_params", params);
                field.setData(data);

                fields.add(field);
            }
        }
    }

    DropdownSearchField orderField = new DropdownSearchField("orderselect",
            stringProvider.getString(StringProvider.ORDER), false, null);
    orderField.addDropdownValue("1", stringProvider.getString(StringProvider.ORDER_DEFAULT));
    orderField.addDropdownValue("2:desc", stringProvider.getString(StringProvider.ORDER_YEAR_DESC));
    orderField.addDropdownValue("2:asc", stringProvider.getString(StringProvider.ORDER_YEAR_ASC));
    orderField.addDropdownValue("3:desc", stringProvider.getString(StringProvider.ORDER_CATEGORY_DESC));
    orderField.addDropdownValue("3:asc", stringProvider.getString(StringProvider.ORDER_CATEGORY_ASC));
    orderField.setMeaning(Meaning.ORDER);
    fields.add(orderField);

    return fields;
}

From source file:org.apache.eagle.security.hive.jobrunning.HiveJobFetchSpout.java

private boolean fetchFinishedConfig(AppInfo appInfo, List<MRJob> mrJobs) {
    InputStream is = null;/*from  ww w.  ja  v  a2s .co  m*/
    for (MRJob mrJob : mrJobs) {
        String urlString = crawlConfig.endPointConfig.HSBasePath + "jobhistory/conf/" + mrJob.getId() + "?"
                + Constants.ANONYMOUS_PARAMETER;
        try {
            LOG.info("fetch job conf from {}", urlString);
            is = InputStreamUtils.getInputStream(urlString, null, Constants.CompressionType.NONE);
            final org.jsoup.nodes.Document doc = Jsoup.parse(is, "UTF-8", urlString);
            doc.outputSettings().prettyPrint(false);
            org.jsoup.select.Elements elements = doc.select("table[id=conf]").select("tbody").select("tr");
            Map<String, String> hiveQueryLog = new HashMap<>();
            Iterator<org.jsoup.nodes.Element> iter = elements.iterator();
            while (iter.hasNext()) {
                org.jsoup.nodes.Element element = iter.next();
                org.jsoup.select.Elements tds = element.children();
                String key = tds.get(0).text();
                String value = "";
                org.jsoup.nodes.Element valueElement = tds.get(1);
                if (Constants.HIVE_QUERY_STRING.equals(key)) {
                    for (org.jsoup.nodes.Node child : valueElement.childNodes()) {
                        if (child instanceof TextNode) {
                            TextNode valueTextNode = (TextNode) child;
                            value = valueTextNode.getWholeText();
                            value = StringUtils.strip(value);
                        }
                    }
                } else {
                    value = valueElement.text();
                }
                hiveQueryLog.put(key, value);
            }
            if (hiveQueryLog.containsKey(Constants.HIVE_QUERY_STRING)) {
                collector.emit(new ValuesArray(appInfo.getUser(), mrJob.getId(),
                        Constants.ResourceType.JOB_CONFIGURATION, hiveQueryLog), mrJob.getId());
            }
        } catch (Exception e) {
            LOG.warn("fetch job conf from {} failed, {}", urlString, e);
            e.printStackTrace();
            return false;
        } finally {
            Utils.closeInputStream(is);
        }
    }
    return true;
}

From source file:org.apache.james.jmap.utils.JsoupHtmlTextExtractor.java

private String convertNodeToText(HTMLNode htmlNode) {
    Node node = htmlNode.underlyingNode;
    if (node instanceof TextNode) {
        TextNode textNode = (TextNode) node;
        return textNode.getWholeText();
    }//from w w  w  . ja v  a  2s.c  o  m
    if (node instanceof Element) {
        Element element = (Element) node;
        if (element.tagName().equals(BR_TAG)) {
            return "\n";
        }
        if (isList(element)) {
            return convertListElement(htmlNode.listNestedLevel);
        }
        if (element.tagName().equals(OL_TAG)) {
            return "\n\n";
        }
        if (element.tagName().equals(LI_TAG)) {
            return "\n" + StringUtils.repeat(" ", htmlNode.listNestedLevel) + "- ";
        }
        if (element.tagName().equals(P_TAG)) {
            return "\n\n";
        }
        if (element.tagName().equals(IMG_TAG)) {
            return generateImageAlternativeText(element);
        }
    }
    return "";
}

From source file:org.b3log.symphony.util.Markdowns.java

/**
 * Converts the specified markdown text to HTML.
 *
 * @param markdownText the specified markdown text
 * @return converted HTML, returns an empty string "" if the specified markdown text is "" or {@code null}, returns
 * 'markdownErrorLabel' if exception/*from   www .ja va 2  s.c  o m*/
 */
public static String toHTML(final String markdownText) {
    if (Strings.isEmptyOrNull(markdownText)) {
        return "";
    }

    final String cachedHTML = getHTML(markdownText);
    if (null != cachedHTML) {
        return cachedHTML;
    }

    final ExecutorService pool = Executors.newSingleThreadExecutor();
    final long[] threadId = new long[1];

    final Callable<String> call = () -> {
        threadId[0] = Thread.currentThread().getId();

        String html = LANG_PROPS_SERVICE.get("contentRenderFailedLabel");

        if (MARKED_AVAILABLE) {
            html = toHtmlByMarked(markdownText);
            if (!StringUtils.startsWith(html, "<p>")) {
                html = "<p>" + html + "</p>";
            }
        } else {
            com.vladsch.flexmark.ast.Node document = PARSER.parse(markdownText);
            html = RENDERER.render(document);
            if (!StringUtils.startsWith(html, "<p>")) {
                html = "<p>" + html + "</p>";
            }
        }

        final Document doc = Jsoup.parse(html);
        final List<org.jsoup.nodes.Node> toRemove = new ArrayList<>();
        doc.traverse(new NodeVisitor() {
            @Override
            public void head(final org.jsoup.nodes.Node node, int depth) {
                if (node instanceof org.jsoup.nodes.TextNode) {
                    final org.jsoup.nodes.TextNode textNode = (org.jsoup.nodes.TextNode) node;
                    final org.jsoup.nodes.Node parent = textNode.parent();

                    if (parent instanceof Element) {
                        final Element parentElem = (Element) parent;

                        if (!parentElem.tagName().equals("code")) {
                            String text = textNode.getWholeText();
                            boolean nextIsBr = false;
                            final org.jsoup.nodes.Node nextSibling = textNode.nextSibling();
                            if (nextSibling instanceof Element) {
                                nextIsBr = "br".equalsIgnoreCase(((Element) nextSibling).tagName());
                            }

                            if (null != userQueryService) {
                                try {
                                    final Set<String> userNames = userQueryService.getUserNames(text);
                                    for (final String userName : userNames) {
                                        text = text.replace('@' + userName + (nextIsBr ? "" : " "),
                                                "@<a href='" + Latkes.getServePath() + "/member/" + userName
                                                        + "'>" + userName + "</a> ");
                                    }
                                    text = text.replace("@participants ",
                                            "@<a href='https://hacpai.com/article/1458053458339' class='ft-red'>participants</a> ");
                                } finally {
                                    JdbcRepository.dispose();
                                }
                            }

                            if (text.contains("@<a href=")) {
                                final List<org.jsoup.nodes.Node> nodes = Parser.parseFragment(text, parentElem,
                                        "");
                                final int index = textNode.siblingIndex();

                                parentElem.insertChildren(index, nodes);
                                toRemove.add(node);
                            } else {
                                textNode.text(Pangu.spacingText(text));
                            }
                        }
                    }
                }
            }

            @Override
            public void tail(org.jsoup.nodes.Node node, int depth) {
            }
        });

        toRemove.forEach(node -> node.remove());

        doc.select("pre>code").addClass("hljs");
        doc.select("a").forEach(a -> {
            String src = a.attr("href");
            if (!StringUtils.startsWithIgnoreCase(src, Latkes.getServePath())) {
                try {
                    src = URLEncoder.encode(src, "UTF-8");
                } catch (final Exception e) {
                }
                a.attr("href", Latkes.getServePath() + "/forward?goto=" + src);
                a.attr("target", "_blank");
            }
        });
        doc.outputSettings().prettyPrint(false);

        String ret = doc.select("body").html();
        ret = StringUtils.trim(ret);

        // cache it
        putHTML(markdownText, ret);

        return ret;
    };

    Stopwatchs.start("Md to HTML");
    try {
        final Future<String> future = pool.submit(call);

        return future.get(MD_TIMEOUT, TimeUnit.MILLISECONDS);
    } catch (final TimeoutException e) {
        LOGGER.log(Level.ERROR, "Markdown timeout [md=" + markdownText + "]");
        Callstacks.printCallstack(Level.ERROR, new String[] { "org.b3log" }, null);

        final Set<Thread> threads = Thread.getAllStackTraces().keySet();
        for (final Thread thread : threads) {
            if (thread.getId() == threadId[0]) {
                thread.stop();

                break;
            }
        }
    } catch (final Exception e) {
        LOGGER.log(Level.ERROR, "Markdown failed [md=" + markdownText + "]", e);
    } finally {
        pool.shutdownNow();

        Stopwatchs.end();
    }

    return LANG_PROPS_SERVICE.get("contentRenderFailedLabel");
}

From source file:org.dswarm.xmlenhancer.XMLEnhancer.java

private static void unescapeEntity(final PrintWriter out, final Node node) {

    node.ownerDocument().outputSettings().escapeMode(Entities.EscapeMode.xhtml)
            .syntax(Document.OutputSettings.Syntax.xml).prettyPrint(false);

    if (node instanceof TextNode) {

        final TextNode textNode = (TextNode) node;

        final String wholeText = textNode.getWholeText();

        out.print(wholeText);/*  www.  j  a  v a  2  s.  c o m*/

        return;
    }

    final String nodeString = node.toString();
    final String unescapedNodeString = Parser.unescapeEntities(nodeString, true);

    out.print(unescapedNodeString);
}

From source file:org.dswarm.xmlenhancer.XMLEnhancer.java

private static void enhanceTextNode(final Node node) {

    final TextNode textNode = (TextNode) node;
    final String wholeText = textNode.getWholeText();
    final String text = node.toString();

    if (text.trim().isEmpty()) {

        return;// ww w  .j av  a2s.c  o m
    }

    if (wholeText.startsWith(START_CDATA)) {

        // do not add CDATA multiple times

        return;
    }

    final String alignedText = alignTextWithWholeText(wholeText, text);

    final String unescapeEntities = String.format("%s%s%s", START_CDATA, alignedText, END_CDATA);

    textNode.text(unescapeEntities);
}