Example usage for org.jsoup.nodes Document traverse

List of usage examples for org.jsoup.nodes Document traverse

Introduction

In this page you can find the example usage for org.jsoup.nodes Document traverse.

Prototype

public Node traverse(NodeVisitor nodeVisitor) 

Source Link

Document

Perform a depth-first traversal through this node and its descendants.

Usage

From source file:com.screenslicer.common.CommonUtil.java

private static Element sanitize(Document doc, final boolean ascii) {
    if (ascii) {/* ww  w .  j av a  2s  .co  m*/
        doc.outputSettings().charset("ascii");
    } else {
        doc.outputSettings().charset("utf-8");
    }
    doc.traverse(new NodeVisitor() {
        @Override
        public void tail(Node n, int d) {
        }

        @Override
        public void head(Node n, int d) {
            try {
                if (n.nodeName().equals("#text") && !CommonUtil.isEmpty(n.outerHtml())) {
                    ((TextNode) n).text(HtmlCoder.decode(n.toString()));
                }
            } catch (Throwable t) {
                Log.exception(t);
            }
        }
    });
    return doc;
}

From source file:org.b3log.symphony.util.Markdowns.java

/**
 * Converts the specified markdown text to HTML.
 *
 * @param markdownText the specified markdown text
 * @return converted HTML, returns an empty string "" if the specified markdown text is "" or {@code null}, returns
 * 'markdownErrorLabel' if exception/*  w  w  w  . j  av a  2s  .co  m*/
 */
public static String toHTML(final String markdownText) {
    if (Strings.isEmptyOrNull(markdownText)) {
        return "";
    }

    final String cachedHTML = getHTML(markdownText);
    if (null != cachedHTML) {
        return cachedHTML;
    }

    final ExecutorService pool = Executors.newSingleThreadExecutor();
    final long[] threadId = new long[1];

    final Callable<String> call = () -> {
        threadId[0] = Thread.currentThread().getId();

        String html = LANG_PROPS_SERVICE.get("contentRenderFailedLabel");

        if (MARKED_AVAILABLE) {
            html = toHtmlByMarked(markdownText);
            if (!StringUtils.startsWith(html, "<p>")) {
                html = "<p>" + html + "</p>";
            }
        } else {
            com.vladsch.flexmark.ast.Node document = PARSER.parse(markdownText);
            html = RENDERER.render(document);
            if (!StringUtils.startsWith(html, "<p>")) {
                html = "<p>" + html + "</p>";
            }
        }

        final Document doc = Jsoup.parse(html);
        final List<org.jsoup.nodes.Node> toRemove = new ArrayList<>();
        doc.traverse(new NodeVisitor() {
            @Override
            public void head(final org.jsoup.nodes.Node node, int depth) {
                if (node instanceof org.jsoup.nodes.TextNode) {
                    final org.jsoup.nodes.TextNode textNode = (org.jsoup.nodes.TextNode) node;
                    final org.jsoup.nodes.Node parent = textNode.parent();

                    if (parent instanceof Element) {
                        final Element parentElem = (Element) parent;

                        if (!parentElem.tagName().equals("code")) {
                            String text = textNode.getWholeText();
                            boolean nextIsBr = false;
                            final org.jsoup.nodes.Node nextSibling = textNode.nextSibling();
                            if (nextSibling instanceof Element) {
                                nextIsBr = "br".equalsIgnoreCase(((Element) nextSibling).tagName());
                            }

                            if (null != userQueryService) {
                                try {
                                    final Set<String> userNames = userQueryService.getUserNames(text);
                                    for (final String userName : userNames) {
                                        text = text.replace('@' + userName + (nextIsBr ? "" : " "),
                                                "@<a href='" + Latkes.getServePath() + "/member/" + userName
                                                        + "'>" + userName + "</a> ");
                                    }
                                    text = text.replace("@participants ",
                                            "@<a href='https://hacpai.com/article/1458053458339' class='ft-red'>participants</a> ");
                                } finally {
                                    JdbcRepository.dispose();
                                }
                            }

                            if (text.contains("@<a href=")) {
                                final List<org.jsoup.nodes.Node> nodes = Parser.parseFragment(text, parentElem,
                                        "");
                                final int index = textNode.siblingIndex();

                                parentElem.insertChildren(index, nodes);
                                toRemove.add(node);
                            } else {
                                textNode.text(Pangu.spacingText(text));
                            }
                        }
                    }
                }
            }

            @Override
            public void tail(org.jsoup.nodes.Node node, int depth) {
            }
        });

        toRemove.forEach(node -> node.remove());

        doc.select("pre>code").addClass("hljs");
        doc.select("a").forEach(a -> {
            String src = a.attr("href");
            if (!StringUtils.startsWithIgnoreCase(src, Latkes.getServePath())) {
                try {
                    src = URLEncoder.encode(src, "UTF-8");
                } catch (final Exception e) {
                }
                a.attr("href", Latkes.getServePath() + "/forward?goto=" + src);
                a.attr("target", "_blank");
            }
        });
        doc.outputSettings().prettyPrint(false);

        String ret = doc.select("body").html();
        ret = StringUtils.trim(ret);

        // cache it
        putHTML(markdownText, ret);

        return ret;
    };

    Stopwatchs.start("Md to HTML");
    try {
        final Future<String> future = pool.submit(call);

        return future.get(MD_TIMEOUT, TimeUnit.MILLISECONDS);
    } catch (final TimeoutException e) {
        LOGGER.log(Level.ERROR, "Markdown timeout [md=" + markdownText + "]");
        Callstacks.printCallstack(Level.ERROR, new String[] { "org.b3log" }, null);

        final Set<Thread> threads = Thread.getAllStackTraces().keySet();
        for (final Thread thread : threads) {
            if (thread.getId() == threadId[0]) {
                thread.stop();

                break;
            }
        }
    } catch (final Exception e) {
        LOGGER.log(Level.ERROR, "Markdown failed [md=" + markdownText + "]", e);
    } finally {
        pool.shutdownNow();

        Stopwatchs.end();
    }

    return LANG_PROPS_SERVICE.get("contentRenderFailedLabel");
}