List of usage examples for org.jsoup.nodes Element unwrap
public Node unwrap()
From source file:com.maxl.java.aips2xml.Aips2Xml.java
static String addHeaderToXml(String xml_str) { Document mDoc = Jsoup.parse("<kompendium>\n" + xml_str + "</kompendium>"); mDoc.outputSettings().escapeMode(EscapeMode.xhtml); mDoc.outputSettings().prettyPrint(true); mDoc.outputSettings().indentAmount(4); // Add date//from ww w .j a v a 2 s . c o m Date df = new Date(); String date_str = df.toString(); mDoc.select("kompendium").first().prependElement("date"); mDoc.select("date").first().text(date_str); // Add language mDoc.select("date").after("<lang></lang>"); if (DB_LANGUAGE.equals("de")) mDoc.select("lang").first().text("DE"); else if (DB_LANGUAGE.equals("fr")) mDoc.select("lang").first().text("FR"); // Fool jsoup.parse which seems to have its own "life" mDoc.select("tbody").unwrap(); Elements img_elems = mDoc.select("img"); for (Element img_e : img_elems) { if (!img_e.hasAttr("src")) img_e.unwrap(); } mDoc.select("img").tagName("image"); String final_xml_str = mDoc.select("kompendium").first().outerHtml(); return final_xml_str; }
From source file:net.slkdev.swagger.confluence.service.impl.XHtmlToConfluenceServiceImpl.java
private static String reformatXHtml(final String inputXhtml, final Map<String, ConfluenceLink> confluenceLinkMap) { final Document document = Jsoup.parse(inputXhtml, "utf-8", Parser.xmlParser()); document.outputSettings().prettyPrint(false); document.outputSettings().escapeMode(xhtml); document.outputSettings().charset("UTF-8"); final Elements linkElements = document.select("a"); for (final Element linkElement : linkElements) { final String originalHref = linkElement.attr("href"); final ConfluenceLink confluenceLink = confluenceLinkMap.get(originalHref); if (confluenceLink == null) { LOG.debug("NO LINK MAPPING FOUND TO COVERT LINK: {}", originalHref); continue; }// www .j a va 2 s . c om final String confluenceLinkMarkup = confluenceLink.getConfluenceLinkMarkup(); LOG.debug("LINK CONVERSION: {} -> {}", originalHref, confluenceLinkMarkup); linkElement.before(confluenceLinkMarkup); linkElement.html(""); linkElement.unwrap(); } reformatXHtmlHeadings(document, "h2"); reformatXHtmlHeadings(document, "h3"); reformatXHtmlHeadings(document, "#toctitle"); final SwaggerConfluenceConfig swaggerConfluenceConfig = SWAGGER_CONFLUENCE_CONFIG.get(); if (swaggerConfluenceConfig.getPaginationMode() == PaginationMode.SINGLE_PAGE) { if (swaggerConfluenceConfig.isIncludeTableOfContentsOnSinglePage()) { reformatXHtmlBreakAfterElements(document, "#toc"); } reformatXHtmlBreakAfterElements(document, ".sect1"); } reformatXHtmlSpacing(document.select(".sect2")); reformatXHtmlSpacing(document.select(".sect3")); return document.html(); }
From source file:net.slkdev.swagger.confluence.service.impl.XHtmlToConfluenceServiceImpl.java
private static List<ConfluencePage> handlePagination() { final List<ConfluencePage> confluencePages = new ArrayList<>(); final SwaggerConfluenceConfig swaggerConfluenceConfig = SWAGGER_CONFLUENCE_CONFIG.get(); final PaginationMode paginationMode = swaggerConfluenceConfig.getPaginationMode(); final Document originalDocument = SWAGGER_DOCUMENT.get(); final Document transformedDocument = originalDocument.clone(); final Elements categoryElements = transformedDocument.select(".sect1"); // Remove ToC form the transformed document final Elements toc = transformedDocument.select(".toc"); toc.html(""); toc.unwrap();/* www .ja v a 2 s. c o m*/ // For Single Page Mode, the incoming XHTML can be used directly. if (paginationMode == SINGLE_PAGE) { final ConfluencePage confluencePage = ConfluencePageBuilder.aConfluencePage() .withPageType(PageType.ROOT).withOriginalTitle(swaggerConfluenceConfig.getTitle()) .withConfluenceTitle(buildConfluenceTitle(swaggerConfluenceConfig.getTitle(), null, null)) .build(); if (swaggerConfluenceConfig.isIncludeTableOfContentsOnSinglePage()) { confluencePage.setXhtml(originalDocument.html()); } else { confluencePage.setXhtml(transformedDocument.html()); } confluencePages.add(confluencePage); return confluencePages; } // Before beginning further processing, we need to know if we're in individual // page mode or not, as that will effect how we split the DOM. If we're in this // mode then the category pages will contain inner table of contents. final boolean individualPages = (paginationMode == INDIVIDUAL_PAGES); // From here on, if we're still proceeding then we know the meat of the document // will go in sub-pages. So for the master page, we will use the table of contents final Elements tocElements = originalDocument.select(".toc"); final List<String> innerTocXHtmlList = new ArrayList<>(); final Elements innerTocElements = originalDocument.select(".sectlevel2"); for (final Element innerTocElement : innerTocElements) { // If we're in individual page mode, then we collect the inner ToCs if (individualPages) { final StringBuilder tocHtml = new StringBuilder(); tocHtml.append("<div id=\"toc\" class=\"toc\">"); tocHtml.append("<h4 id=\"toctitle\">Table of Contents</h4>"); tocHtml.append("<div><ul class=\"sectlevel1\">"); tocHtml.append(innerTocElement.html()); tocHtml.append("</ul></div></div>"); innerTocXHtmlList.add(tocHtml.toString()); } // If we're in category page mode, then we strip out the inner table of contents. else { innerTocElement.html(""); innerTocElement.unwrap(); } } // Build the Root Page w/ the Appropriate Level of Table of Contents final ConfluencePage rootConfluencePage = ConfluencePageBuilder.aConfluencePage() .withPageType(PageType.ROOT).withOriginalTitle(swaggerConfluenceConfig.getTitle()) .withConfluenceTitle(buildConfluenceTitle(swaggerConfluenceConfig.getTitle(), null, null)) .withXhtml(tocElements.html()).build(); confluencePages.add(rootConfluencePage); int category = 1; // Now we process the category pages for (final Element categoryElement : categoryElements) { // Fetch the title from the first child, which is the header element final String categoryTitle = categoryElement.children().first().text(); // If we're in individual mode then we need these to be sub table of contents if (individualPages) { final ConfluencePage categoryConfluencePage = ConfluencePageBuilder.aConfluencePage() .withPageType(PageType.CATEGORY).withOriginalTitle(categoryTitle) .withConfluenceTitle(buildConfluenceTitle(categoryTitle, category, null)) .withXhtml(innerTocXHtmlList.get(category - 1)).build(); confluencePages.add(categoryConfluencePage); final Elements individualElements = categoryElement.getElementsByClass("sect2"); int individual = 1; for (final Element individualElement : individualElements) { final String individualTitle = individualElement.children().first().text(); final ConfluencePage individualConfluencePage = ConfluencePageBuilder.aConfluencePage() .withPageType(INDIVIDUAL).withOriginalTitle(individualTitle) .withConfluenceTitle(buildConfluenceTitle(individualTitle, category, individual)) .withXhtml(individualElement.html()).build(); confluencePages.add(individualConfluencePage); individual++; } category++; continue; } // If we're in category mode, we use the remaining page data final ConfluencePage categoryConfluencePage = ConfluencePageBuilder.aConfluencePage() .withPageType(PageType.CATEGORY).withOriginalTitle(categoryTitle) .withConfluenceTitle(buildConfluenceTitle(categoryTitle, category, null)) .withXhtml(categoryElement.html()).build(); confluencePages.add(categoryConfluencePage); category++; } return confluencePages; }
From source file:org.structr.web.common.microformat.MicroformatParser.java
private void unwrap(final Element element) { final Set<Element> elementsToUnwrap = new LinkedHashSet<>(); element.traverse(new NodeVisitor() { @Override//ww w .ja va 2 s . co m public void head(Node node, int depth) { if (node instanceof Element) { final Element element = (Element) node; if (element.isBlock()) { final Set<String> classes = element.classNames(); removeEmpty(classes); if (classes.isEmpty()) { elementsToUnwrap.add(element); } } } } @Override public void tail(Node node, int depth) { } }); for (final Element unwrap : elementsToUnwrap) { unwrap.unwrap(); } }