Example usage for org.jsoup.nodes Document clone

Introduction

In this page you can find the example usage for org.jsoup.nodes Document clone.

Prototype

@Override
    public Document clone()

Source Link

Usage

From source file:net.slkdev.swagger.confluence.service.impl.XHtmlToConfluenceServiceImpl.java

private static List<ConfluencePage> handlePagination() {
    final List<ConfluencePage> confluencePages = new ArrayList<>();
    final SwaggerConfluenceConfig swaggerConfluenceConfig = SWAGGER_CONFLUENCE_CONFIG.get();

    final PaginationMode paginationMode = swaggerConfluenceConfig.getPaginationMode();

    final Document originalDocument = SWAGGER_DOCUMENT.get();
    final Document transformedDocument = originalDocument.clone();

    final Elements categoryElements = transformedDocument.select(".sect1");

    // Remove ToC form the transformed document
    final Elements toc = transformedDocument.select(".toc");
    toc.html("");
    toc.unwrap();//from  w  w  w .ja va 2 s  .co m

    // For Single Page Mode, the incoming XHTML can be used directly.
    if (paginationMode == SINGLE_PAGE) {
        final ConfluencePage confluencePage = ConfluencePageBuilder.aConfluencePage()
                .withPageType(PageType.ROOT).withOriginalTitle(swaggerConfluenceConfig.getTitle())
                .withConfluenceTitle(buildConfluenceTitle(swaggerConfluenceConfig.getTitle(), null, null))
                .build();

        if (swaggerConfluenceConfig.isIncludeTableOfContentsOnSinglePage()) {
            confluencePage.setXhtml(originalDocument.html());
        } else {
            confluencePage.setXhtml(transformedDocument.html());
        }

        confluencePages.add(confluencePage);

        return confluencePages;
    }

    // Before beginning further processing, we need to know if we're in individual
    // page mode or not, as that will effect how we split the DOM. If we're in this
    // mode then the category pages will contain inner table of contents.
    final boolean individualPages = (paginationMode == INDIVIDUAL_PAGES);

    // From here on, if we're still proceeding then we know the meat of the document
    // will go in sub-pages. So for the master page, we will use the table of contents
    final Elements tocElements = originalDocument.select(".toc");

    final List<String> innerTocXHtmlList = new ArrayList<>();
    final Elements innerTocElements = originalDocument.select(".sectlevel2");

    for (final Element innerTocElement : innerTocElements) {
        // If we're in individual page mode, then we collect the inner ToCs
        if (individualPages) {
            final StringBuilder tocHtml = new StringBuilder();
            tocHtml.append("<div id=\"toc\" class=\"toc\">");
            tocHtml.append("<h4 id=\"toctitle\">Table of Contents</h4>");
            tocHtml.append("<div><ul class=\"sectlevel1\">");
            tocHtml.append(innerTocElement.html());
            tocHtml.append("</ul></div></div>");
            innerTocXHtmlList.add(tocHtml.toString());
        }
        // If we're in category page mode, then we strip out the inner table of contents.
        else {
            innerTocElement.html("");
            innerTocElement.unwrap();
        }
    }

    // Build the Root Page w/ the Appropriate Level of Table of Contents
    final ConfluencePage rootConfluencePage = ConfluencePageBuilder.aConfluencePage()
            .withPageType(PageType.ROOT).withOriginalTitle(swaggerConfluenceConfig.getTitle())
            .withConfluenceTitle(buildConfluenceTitle(swaggerConfluenceConfig.getTitle(), null, null))
            .withXhtml(tocElements.html()).build();
    confluencePages.add(rootConfluencePage);

    int category = 1;

    // Now we process the category pages
    for (final Element categoryElement : categoryElements) {
        // Fetch the title from the first child, which is the header element
        final String categoryTitle = categoryElement.children().first().text();

        // If we're in individual mode then we need these to be sub table of contents
        if (individualPages) {

            final ConfluencePage categoryConfluencePage = ConfluencePageBuilder.aConfluencePage()
                    .withPageType(PageType.CATEGORY).withOriginalTitle(categoryTitle)
                    .withConfluenceTitle(buildConfluenceTitle(categoryTitle, category, null))
                    .withXhtml(innerTocXHtmlList.get(category - 1)).build();
            confluencePages.add(categoryConfluencePage);

            final Elements individualElements = categoryElement.getElementsByClass("sect2");

            int individual = 1;

            for (final Element individualElement : individualElements) {
                final String individualTitle = individualElement.children().first().text();
                final ConfluencePage individualConfluencePage = ConfluencePageBuilder.aConfluencePage()
                        .withPageType(INDIVIDUAL).withOriginalTitle(individualTitle)
                        .withConfluenceTitle(buildConfluenceTitle(individualTitle, category, individual))
                        .withXhtml(individualElement.html()).build();
                confluencePages.add(individualConfluencePage);

                individual++;
            }

            category++;
            continue;
        }

        // If we're in category mode, we use the remaining page data
        final ConfluencePage categoryConfluencePage = ConfluencePageBuilder.aConfluencePage()
                .withPageType(PageType.CATEGORY).withOriginalTitle(categoryTitle)
                .withConfluenceTitle(buildConfluenceTitle(categoryTitle, category, null))
                .withXhtml(categoryElement.html()).build();
        confluencePages.add(categoryConfluencePage);

        category++;
    }

    return confluencePages;
}

From source file:org.symphonyoss.client.util.MlMessageParser.java

public void parseMessage(String message) throws SymException {

    Document doc = Jsoup.parse(message);
    originalDoc = doc.clone();
    Element elementErrors = doc.body().getElementsByTag("errors").first();

    if (elementErrors != null) {
        if (elementErrors.outerHtml() != null)
            logger.debug("Errors found in message: {}", elementErrors.outerHtml());
    }/*  w ww  .  j a v a  2  s .co m*/
    //Lets remove the errors elements
    doc.select("errors").remove();

    elementMessageML = doc.select("messageML").first();

    if (elementMessageML == null)
        elementMessageML = doc.select("div").first();

    if (elementMessageML != null) {
        if (elementMessageML.outerHtml() != null)
            logger.debug("Doc parsed: {}", elementMessageML.outerHtml());
    } else {

        logger.error("Could not parse document for message {}", message);
        throw new SymException("Malformed message");
    }

    textDoc = new StringBuilder();
    stripTags(textDoc, elementMessageML.childNodes());

    textChunks = textDoc.toString().split("\\s+");

}