Example usage for org.jsoup.nodes Element unwrap

List of usage examples for org.jsoup.nodes Element unwrap

Introduction

In this page you can find the example usage for org.jsoup.nodes Element unwrap.

Prototype

public Node unwrap() 

Source Link

Document

Removes this node from the DOM, and moves its children up into the node's parent.

Usage

From source file:com.maxl.java.aips2xml.Aips2Xml.java

static String addHeaderToXml(String xml_str) {
    Document mDoc = Jsoup.parse("<kompendium>\n" + xml_str + "</kompendium>");
    mDoc.outputSettings().escapeMode(EscapeMode.xhtml);
    mDoc.outputSettings().prettyPrint(true);
    mDoc.outputSettings().indentAmount(4);

    // Add date//from  ww  w  .j  a  v  a  2 s  . c  o m
    Date df = new Date();
    String date_str = df.toString();
    mDoc.select("kompendium").first().prependElement("date");
    mDoc.select("date").first().text(date_str);
    // Add language
    mDoc.select("date").after("<lang></lang>");
    if (DB_LANGUAGE.equals("de"))
        mDoc.select("lang").first().text("DE");
    else if (DB_LANGUAGE.equals("fr"))
        mDoc.select("lang").first().text("FR");

    // Fool jsoup.parse which seems to have its own "life" 
    mDoc.select("tbody").unwrap();
    Elements img_elems = mDoc.select("img");
    for (Element img_e : img_elems) {
        if (!img_e.hasAttr("src"))
            img_e.unwrap();
    }
    mDoc.select("img").tagName("image");

    String final_xml_str = mDoc.select("kompendium").first().outerHtml();

    return final_xml_str;
}

From source file:net.slkdev.swagger.confluence.service.impl.XHtmlToConfluenceServiceImpl.java

private static String reformatXHtml(final String inputXhtml,
        final Map<String, ConfluenceLink> confluenceLinkMap) {
    final Document document = Jsoup.parse(inputXhtml, "utf-8", Parser.xmlParser());
    document.outputSettings().prettyPrint(false);
    document.outputSettings().escapeMode(xhtml);
    document.outputSettings().charset("UTF-8");

    final Elements linkElements = document.select("a");

    for (final Element linkElement : linkElements) {
        final String originalHref = linkElement.attr("href");
        final ConfluenceLink confluenceLink = confluenceLinkMap.get(originalHref);

        if (confluenceLink == null) {
            LOG.debug("NO LINK MAPPING FOUND TO COVERT LINK: {}", originalHref);
            continue;
        }//  www .j  a  va 2 s  .  c om

        final String confluenceLinkMarkup = confluenceLink.getConfluenceLinkMarkup();

        LOG.debug("LINK CONVERSION: {} -> {}", originalHref, confluenceLinkMarkup);

        linkElement.before(confluenceLinkMarkup);

        linkElement.html("");
        linkElement.unwrap();
    }

    reformatXHtmlHeadings(document, "h2");
    reformatXHtmlHeadings(document, "h3");
    reformatXHtmlHeadings(document, "#toctitle");

    final SwaggerConfluenceConfig swaggerConfluenceConfig = SWAGGER_CONFLUENCE_CONFIG.get();

    if (swaggerConfluenceConfig.getPaginationMode() == PaginationMode.SINGLE_PAGE) {
        if (swaggerConfluenceConfig.isIncludeTableOfContentsOnSinglePage()) {
            reformatXHtmlBreakAfterElements(document, "#toc");
        }

        reformatXHtmlBreakAfterElements(document, ".sect1");
    }

    reformatXHtmlSpacing(document.select(".sect2"));
    reformatXHtmlSpacing(document.select(".sect3"));

    return document.html();
}

From source file:net.slkdev.swagger.confluence.service.impl.XHtmlToConfluenceServiceImpl.java

private static List<ConfluencePage> handlePagination() {
    final List<ConfluencePage> confluencePages = new ArrayList<>();
    final SwaggerConfluenceConfig swaggerConfluenceConfig = SWAGGER_CONFLUENCE_CONFIG.get();

    final PaginationMode paginationMode = swaggerConfluenceConfig.getPaginationMode();

    final Document originalDocument = SWAGGER_DOCUMENT.get();
    final Document transformedDocument = originalDocument.clone();

    final Elements categoryElements = transformedDocument.select(".sect1");

    // Remove ToC form the transformed document
    final Elements toc = transformedDocument.select(".toc");
    toc.html("");
    toc.unwrap();/* www  .ja  v  a 2  s. c  o  m*/

    // For Single Page Mode, the incoming XHTML can be used directly.
    if (paginationMode == SINGLE_PAGE) {
        final ConfluencePage confluencePage = ConfluencePageBuilder.aConfluencePage()
                .withPageType(PageType.ROOT).withOriginalTitle(swaggerConfluenceConfig.getTitle())
                .withConfluenceTitle(buildConfluenceTitle(swaggerConfluenceConfig.getTitle(), null, null))
                .build();

        if (swaggerConfluenceConfig.isIncludeTableOfContentsOnSinglePage()) {
            confluencePage.setXhtml(originalDocument.html());
        } else {
            confluencePage.setXhtml(transformedDocument.html());
        }

        confluencePages.add(confluencePage);

        return confluencePages;
    }

    // Before beginning further processing, we need to know if we're in individual
    // page mode or not, as that will effect how we split the DOM. If we're in this
    // mode then the category pages will contain inner table of contents.
    final boolean individualPages = (paginationMode == INDIVIDUAL_PAGES);

    // From here on, if we're still proceeding then we know the meat of the document
    // will go in sub-pages. So for the master page, we will use the table of contents
    final Elements tocElements = originalDocument.select(".toc");

    final List<String> innerTocXHtmlList = new ArrayList<>();
    final Elements innerTocElements = originalDocument.select(".sectlevel2");

    for (final Element innerTocElement : innerTocElements) {
        // If we're in individual page mode, then we collect the inner ToCs
        if (individualPages) {
            final StringBuilder tocHtml = new StringBuilder();
            tocHtml.append("<div id=\"toc\" class=\"toc\">");
            tocHtml.append("<h4 id=\"toctitle\">Table of Contents</h4>");
            tocHtml.append("<div><ul class=\"sectlevel1\">");
            tocHtml.append(innerTocElement.html());
            tocHtml.append("</ul></div></div>");
            innerTocXHtmlList.add(tocHtml.toString());
        }
        // If we're in category page mode, then we strip out the inner table of contents.
        else {
            innerTocElement.html("");
            innerTocElement.unwrap();
        }
    }

    // Build the Root Page w/ the Appropriate Level of Table of Contents
    final ConfluencePage rootConfluencePage = ConfluencePageBuilder.aConfluencePage()
            .withPageType(PageType.ROOT).withOriginalTitle(swaggerConfluenceConfig.getTitle())
            .withConfluenceTitle(buildConfluenceTitle(swaggerConfluenceConfig.getTitle(), null, null))
            .withXhtml(tocElements.html()).build();
    confluencePages.add(rootConfluencePage);

    int category = 1;

    // Now we process the category pages
    for (final Element categoryElement : categoryElements) {
        // Fetch the title from the first child, which is the header element
        final String categoryTitle = categoryElement.children().first().text();

        // If we're in individual mode then we need these to be sub table of contents
        if (individualPages) {

            final ConfluencePage categoryConfluencePage = ConfluencePageBuilder.aConfluencePage()
                    .withPageType(PageType.CATEGORY).withOriginalTitle(categoryTitle)
                    .withConfluenceTitle(buildConfluenceTitle(categoryTitle, category, null))
                    .withXhtml(innerTocXHtmlList.get(category - 1)).build();
            confluencePages.add(categoryConfluencePage);

            final Elements individualElements = categoryElement.getElementsByClass("sect2");

            int individual = 1;

            for (final Element individualElement : individualElements) {
                final String individualTitle = individualElement.children().first().text();
                final ConfluencePage individualConfluencePage = ConfluencePageBuilder.aConfluencePage()
                        .withPageType(INDIVIDUAL).withOriginalTitle(individualTitle)
                        .withConfluenceTitle(buildConfluenceTitle(individualTitle, category, individual))
                        .withXhtml(individualElement.html()).build();
                confluencePages.add(individualConfluencePage);

                individual++;
            }

            category++;
            continue;
        }

        // If we're in category mode, we use the remaining page data
        final ConfluencePage categoryConfluencePage = ConfluencePageBuilder.aConfluencePage()
                .withPageType(PageType.CATEGORY).withOriginalTitle(categoryTitle)
                .withConfluenceTitle(buildConfluenceTitle(categoryTitle, category, null))
                .withXhtml(categoryElement.html()).build();
        confluencePages.add(categoryConfluencePage);

        category++;
    }

    return confluencePages;
}

From source file:org.structr.web.common.microformat.MicroformatParser.java

private void unwrap(final Element element) {

    final Set<Element> elementsToUnwrap = new LinkedHashSet<>();

    element.traverse(new NodeVisitor() {

        @Override//ww  w  .ja  va  2 s  . co m
        public void head(Node node, int depth) {

            if (node instanceof Element) {

                final Element element = (Element) node;

                if (element.isBlock()) {
                    final Set<String> classes = element.classNames();

                    removeEmpty(classes);

                    if (classes.isEmpty()) {
                        elementsToUnwrap.add(element);
                    }
                }
            }
        }

        @Override
        public void tail(Node node, int depth) {
        }
    });

    for (final Element unwrap : elementsToUnwrap) {
        unwrap.unwrap();
    }
}