Example usage for org.jsoup.select NodeTraversor traverse

List of usage examples for org.jsoup.select NodeTraversor traverse

Introduction

In this page you can find the example usage for org.jsoup.select NodeTraversor traverse.

Prototype

public void traverse(Node root) 

Source Link

Document

Start a depth-first traverse of the root and all of its descendants.

Usage

From source file:com.trackplus.track.util.html.Html2LaTeX.java

/**
 * Format an Element to LaTeX/*from w ww .  j a  va 2 s  .  com*/
 *
 * @param element
 *            the root element to format
 * @return formatted text
 */
public static String getLatexText(Element element) {
    FormattingVisitor formatter = new FormattingVisitor();
    NodeTraversor traversor = new NodeTraversor(formatter);
    traversor.traverse(element); // walk the DOM, and call .head() and
    // .tail() for each node

    return formatter.toString();
}

From source file:com.twentyn.patentExtractor.PatentDocument.java

private static List<String> extractTextFromHTML(DocumentBuilder docBuilder, NodeList textNodes)
        throws ParserConfigurationException, TransformerConfigurationException, TransformerException,
        XPathExpressionException {
    List<String> allTextList = new ArrayList<>(0);
    if (textNodes != null) {
        for (int i = 0; i < textNodes.getLength(); i++) {
            Node n = textNodes.item(i);
            /* This extremely around-the-horn approach to handling text content is due to the mix of HTML and
             * XML in the patent body.  We use Jsoup to parse the HTML entities we find in the body, and use
             * its extremely convenient NodeVisitor API to recursively traverse the document and extract the
             * text content in reasonable chunks.
             *///from   w  ww  .  j a va 2s.c om
            Document contentsDoc = Util.nodeToDocument(docBuilder, "body", n);
            String docText = Util.documentToString(contentsDoc);
            // With help from http://stackoverflow.com/questions/832620/stripping-html-tags-in-java
            org.jsoup.nodes.Document htmlDoc = Jsoup.parse(docText);
            HtmlVisitor visitor = new HtmlVisitor();
            NodeTraversor traversor = new NodeTraversor(visitor);
            traversor.traverse(htmlDoc);
            List<String> textSegments = visitor.getTextContent();
            allTextList.addAll(textSegments);
        }
    }
    return allTextList;
}

From source file:no.kantega.publishing.admin.content.htmlfilter.RemoveNestedSpanTagsFilter.java

private void copySafeNodes(Element source, Element dest) {
    CleaningVisitor cleaningVisitor = new CleaningVisitor(dest);
    NodeTraversor traversor = new NodeTraversor(cleaningVisitor);
    traversor.traverse(source);
}

From source file:org.jsweet.input.typescriptdef.visitor.DocFiller.java

@Override
public void visitTypeDeclaration(TypeDeclaration typeDeclaration) {
    if (typeDeclaration.getDocumentation() == null) {
        String moduleName = getCurrentModuleName();
        if (JSweetDefTranslatorConfig.LANG_PACKAGE.equals(moduleName)
                || JSweetDefTranslatorConfig.DOM_PACKAGE.equals(moduleName)) {
            this.currentModule = moduleName;

            String content = getTypeContent(context.cacheDir, "mdn", moduleName, typeDeclaration.getName());
            if (content != null) {
                try {
                    Document doc = Jsoup.parse(content, "UTF-8");
                    NodeTraversor traversor;
                    traversor = new NodeTraversor(new MdnTableFormatGrabber(this, typeDeclaration));
                    traversor.traverse(doc.body());
                    traversor = new NodeTraversor(new MdnDefinitionListFormatGrabber(this, typeDeclaration));
                    traversor.traverse(doc.body());
                    traversor = new NodeTraversor(new MdnMainDescriptionGrabber(this, typeDeclaration));
                    traversor.traverse(doc.body());
                } catch (Throwable t) {
                    context.reportError("cannot fill documentation for " + context.getTypeName(typeDeclaration),
                            typeDeclaration.getToken(), t);
                }/*w ww . j a  va 2  s.  c  o  m*/
            }
        }
    }
}

From source file:org.jsweet.input.typescriptdef.visitor.DocFiller.java

public static String removeTags(String html, String[] tagsToBeRemoved) {
    StringBuilder sb = new StringBuilder();
    NodeTraversor traversor = new NodeTraversor(new TagRemover(sb, tagsToBeRemoved));
    traversor.traverse(Jsoup.parse(html).body());
    return sb.toString().replace("<p></p>", "");
}