List of usage examples for org.jsoup.nodes Element outerHtml
public String outerHtml()
From source file:com.jimplush.goose.ContentExtractor.java
/** * adds any siblings that may have a decent score to this node * * @param node/*ww w. ja va2s .c om*/ * @return */ private Element addSiblings(Element node) { if (logger.isDebugEnabled()) { logger.debug("Starting to add siblings"); } int baselineScoreForSiblingParagraphs = getBaselineScoreForSiblings(node); Element currentSibling = node.previousElementSibling(); while (currentSibling != null) { if (logger.isDebugEnabled()) { logger.debug("SIBLINGCHECK: " + debugNode(currentSibling)); } if (currentSibling.tagName().equals("p")) { node.child(0).before(currentSibling.outerHtml()); currentSibling = currentSibling.previousElementSibling(); continue; } // check for a paraph embedded in a containing element int insertedSiblings = 0; Elements potentialParagraphs = currentSibling.getElementsByTag("p"); if (potentialParagraphs.first() == null) { currentSibling = currentSibling.previousElementSibling(); continue; } for (Element firstParagraph : potentialParagraphs) { WordStats wordStats = StopWords.getStopWordCount(firstParagraph.text()); int paragraphScore = wordStats.getStopWordCount(); if ((float) (baselineScoreForSiblingParagraphs * .30) < paragraphScore) { if (logger.isDebugEnabled()) { logger.debug("This node looks like a good sibling, adding it"); } node.child(insertedSiblings).before("<p>" + firstParagraph.text() + "<p>"); insertedSiblings++; } } currentSibling = currentSibling.previousElementSibling(); } return node; }
From source file:com.astamuse.asta4d.web.form.field.impl.AbstractRadioAndCheckboxPrepareRenderer.java
@Override public Renderer preRender(final String editSelector, final String displaySelector) { if (duplicateSelector != null && labelWrapperIndicatorAttr != null) { String msg = "duplicateSelector (%s) and labelWrapperIndicatorAttr (%s) cannot be specified at same time."; throw new IllegalArgumentException(String.format(msg, duplicateSelector, labelWrapperIndicatorAttr)); }// ww w.j a v a 2 s .com Renderer renderer = super.preRender(editSelector, displaySelector); renderer.disableMissingSelectorWarning(); // create wrapper for input element final WrapperIdHolder wrapperIdHolder = new WrapperIdHolder(); if (duplicateSelector == null && optionMap != null) { renderer.add(new Renderer(editSelector, new ElementTransformer(null) { @Override public Element invoke(Element elem) { if (wrapperIdHolder.wrapperId != null) { throw new RuntimeException("The target of selector[" + editSelector + "] must be unique but over than 1 target was found." + "Perhaps you have specified an option value map on a group of elements " + "which is intented to be treated as predefined static options by html directly."); } String id = elem.id(); if (StringUtils.isEmpty(id)) { String msg = "A %s input element must have id value being configured:%s"; throw new RuntimeException(String.format(msg, getTypeString(), elem.outerHtml())); } GroupNode wrapper = new GroupNode(); // cheating the rendering engine for not skipping the rendering on group node wrapper.attr(ExtNodeConstants.GROUP_NODE_ATTR_TYPE, ExtNodeConstants.GROUP_NODE_ATTR_TYPE_USERDEFINE); // put the input element under the wrapper node wrapper.appendChild(elem.clone()); String wrapperId = IdGenerator.createId(); wrapper.attr("id", wrapperId); wrapperIdHolder.inputId = id; wrapperIdHolder.wrapperId = wrapperId; // record the selector for against label if (labelWrapperIndicatorAttr == null) { wrapperIdHolder.labelSelector = SelectorUtil.attr("label", "for", wrapperIdHolder.inputId); } else { wrapperIdHolder.labelSelector = SelectorUtil.attr(labelWrapperIndicatorAttr, wrapperIdHolder.inputId); } return wrapper; } })); renderer.add(":root", new Renderable() { @Override public Renderer render() { if (wrapperIdHolder.wrapperId == null) { // for display mode? return Renderer.create(); } // remove the label element and cache it in warpperIdHolder, we will relocate it later(since we have to duplicate the // input // and label pair by given option value map, we have to make sure that the input and label elements are in same parent // node // which can be duplicated) Renderer renderer = Renderer.create().disableMissingSelectorWarning(); renderer.add(new Renderer(wrapperIdHolder.labelSelector, new ElementTransformer(null) { @Override public Element invoke(Element elem) { wrapperIdHolder.relocatingLabels.add(elem.clone()); return new GroupNode(); } })); return renderer.enableMissingSelectorWarning(); } }); renderer.add(":root", new Renderable() { @Override public Renderer render() { if (wrapperIdHolder.wrapperId == null) { // for display mode? return Renderer.create(); } String selector = SelectorUtil.id(wrapperIdHolder.wrapperId); // relocate the label element to the wrapper node return Renderer.create(selector, new ElementSetter() { @Override public void set(Element elem) { if (wrapperIdHolder.relocatingLabels.isEmpty()) {// no existing label found Element label = new Element(Tag.valueOf("label"), ""); label.attr("for", wrapperIdHolder.inputId); elem.appendChild(label); } else { for (Element label : wrapperIdHolder.relocatingLabels) { elem.appendChild(label); } } } }); } }); } else { if (duplicateSelector != null && optionMap != null) { // if duplicateSelector is specified, we just only need to store the input element id renderer.add(editSelector, new ElementSetter() { @Override public void set(Element elem) { if (wrapperIdHolder.inputId != null) { String msg = "The target of selector[%s] (inside duplicator:%s) must be unique but over than 1 target was found."; throw new RuntimeException(String.format(msg, editSelector, duplicateSelector)); } String id = elem.id(); if (StringUtils.isEmpty(id)) { String msg = "A %s input element (inside duplicator:%s) must have id value being configured:%s"; throw new RuntimeException( String.format(msg, getTypeString(), duplicateSelector, elem.outerHtml())); } wrapperIdHolder.inputId = id; // record the selector for against label // labelWrapperIndicatorAttr would not be null since we checked it at the entry of this method. wrapperIdHolder.labelSelector = SelectorUtil.attr("label", "for", wrapperIdHolder.inputId); } }); } } // here we finished restructure the input element and its related label element and then we begin to manufacture all the input/label // pairs for option list renderer.add(":root", new Renderable() { @Override public Renderer render() { if (optionMap == null) { // for static options Renderer renderer = Renderer.create(); final List<String> inputIdList = new LinkedList<>(); renderer.add(editSelector, new ElementSetter() { @Override public void set(Element elem) { inputIdList.add(elem.id()); } }); renderer.add(":root", new Renderable() { @Override public Renderer render() { Renderer render = Renderer.create().disableMissingSelectorWarning(); for (String id : inputIdList) { render.add(SelectorUtil.attr(labelWrapperIndicatorAttr, id), LABEL_REF_ATTR, id); render.add(SelectorUtil.attr("label", "for", id), LABEL_REF_ATTR, id); } return render.enableMissingSelectorWarning(); } }); if (duplicateSelector != null) { renderer.add(duplicateSelector, new Renderable() { @Override public Renderer render() { String duplicatorRef = IdGenerator.createId(); Renderer render = Renderer.create(":root", DUPLICATOR_REF_ID_ATTR, duplicatorRef); render.add("input", DUPLICATOR_REF_ATTR, duplicatorRef); String labelSelector; if (labelWrapperIndicatorAttr == null) { labelSelector = SelectorUtil.tag("label"); } else { labelSelector = SelectorUtil.attr(labelWrapperIndicatorAttr); } render.add(labelSelector, DUPLICATOR_REF_ATTR, duplicatorRef); return render; } }); } return renderer; } else { if (wrapperIdHolder.wrapperId == null && duplicateSelector == null) { // for display mode? return Renderer.create(); } if (wrapperIdHolder.inputId == null) { // target input element not found return Renderer.create(); } String selector = duplicateSelector == null ? SelectorUtil.id(wrapperIdHolder.wrapperId) : duplicateSelector; return Renderer.create(selector, optionMap.getOptionList(), row -> { Renderer renderer = Renderer.create().disableMissingSelectorWarning(); String inputSelector = SelectorUtil.id("input", wrapperIdHolder.inputId); renderer.add(inputSelector, "value", row.getValue()); // we have to generate a new uuid for the input element to make sure its id is unique even we duplicated it. String newInputId = inputIdByValue ? row.getValue() : IdGenerator.createId(); // make the generated id more understandable by prefixing with original id newInputId = wrapperIdHolder.inputId + "-" + newInputId; String duplicatorRef = null; if (duplicateSelector != null) { duplicatorRef = IdGenerator.createId(); } renderer.add(":root", DUPLICATOR_REF_ID_ATTR, duplicatorRef); renderer.add(inputSelector, DUPLICATOR_REF_ATTR, duplicatorRef); renderer.add(inputSelector, "id", newInputId); // may be a wrapper container of label renderer.add(wrapperIdHolder.labelSelector, LABEL_REF_ATTR, newInputId); if (labelWrapperIndicatorAttr != null) { renderer.add(wrapperIdHolder.labelSelector, labelWrapperIndicatorAttr, newInputId); } renderer.add(wrapperIdHolder.labelSelector, DUPLICATOR_REF_ATTR, duplicatorRef); renderer.add("label", "for", newInputId); renderer.add("label", row.getDisplayText()); return renderer.enableMissingSelectorWarning(); }); } } }); // since we cheated the rendering engine, we should set the type of group node created to faked for fast clean up renderer.add(":root", new Renderable() { @Override public Renderer render() { if (wrapperIdHolder.wrapperId == null) { // for display mode? return Renderer.create(); } String selector = SelectorUtil.id(wrapperIdHolder.wrapperId); return Renderer.create(selector, new ElementSetter() { @Override public void set(Element elem) { elem.attr(ExtNodeConstants.GROUP_NODE_ATTR_TYPE, ExtNodeConstants.GROUP_NODE_ATTR_TYPE_FAKE); } }); } }); PrepareRenderingDataUtil.storeDataToContextBySelector(editSelector, displaySelector, optionMap); return renderer.enableMissingSelectorWarning(); }
From source file:com.screenslicer.core.util.BrowserUtil.java
public static Element openElement(final Browser browser, boolean init, final String[] whitelist, final String[] patterns, final HtmlNode[] urlNodes, final UrlTransform[] transforms) throws ActionFailed { try {/* w w w . j a v a 2 s. co m*/ if (init) { int myStartId; synchronized (startIdLock) { startId = startId == Integer.MAX_VALUE ? 0 : startId + 1; myStartId = startId; } browser.executeScript(" var all = document.body.getElementsByTagName('*');" + "for(var i = 0; i < all.length; i++){" + " if(all[i].className && typeof all[i].className == 'string'){" + " all[i].className=all[i].className.replace(/" + HIDDEN_MARKER + "/g,'').replace(/" + FILTERED_MARKER + "/g,'').replace(/" + FILTERED_LENIENT_MARKER + "/g,'').replace(/\\s+/g,' ').trim();" + " }" + "}" + isVisible + "for(var j = 0; j < all.length; j++){" + " if(!all[j].className.match(/" + NODE_MARKER + "\\d+_\\d+/g)){" + " all[j].className += ' " + NODE_MARKER + myStartId + "_'+j+' ';" + " }" + " if(!isVisible(all[j])){" + " all[j].className += ' " + HIDDEN_MARKER + " ';" + " }" + "}"); } String url = browser.getCurrentUrl(); new URL(url); Element element = CommonUtil.parse(browser.getPageSource(), url, false).body(); element.traverse(new NodeVisitor() { @Override public void tail(Node node, int depth) { } @Override public void head(Node node, int depth) { if (!node.nodeName().equals("#text") && !NodeUtil.isEmpty(node)) { NodeUtil.markVisible(node); } } }); if ((whitelist != null && whitelist.length > 0) || (patterns != null && patterns.length > 0) || (urlNodes != null && urlNodes.length > 0)) { element.traverse(new NodeVisitor() { @Override public void tail(Node node, int depth) { } @Override public void head(Node node, int depth) { if (node.nodeName().equals("a")) { if (UrlUtil.isUrlFiltered(browser.getCurrentUrl(), node.attr("href"), node, whitelist, patterns, urlNodes, transforms)) { NodeUtil.markFiltered(node, false); } } else { String urlAttr = UrlUtil.urlFromAttr(node); if (!CommonUtil.isEmpty(urlAttr) && UrlUtil.isUrlFiltered(browser.getCurrentUrl(), urlAttr, node, whitelist, patterns, urlNodes, transforms)) { NodeUtil.markFiltered(node, true); } } } }); } if (WebApp.DEBUG) { try { FileUtils.writeStringToFile(new File("./" + System.currentTimeMillis() + ".log.scrape"), element.outerHtml(), "utf-8"); } catch (IOException e) { } } return element; } catch (Browser.Retry r) { throw r; } catch (Browser.Fatal f) { throw f; } catch (Throwable t) { throw new ActionFailed(t); } }
From source file:com.near.chimerarevo.fragments.PostFragment.java
private void parseBulletedLists(Elements itms) { String bld = ""; for (Element itm : itms) { Elements str = itm.getElementsByTag("li"); for (Element itm2 : str) { if (itm2.children().size() >= 1) { Elements ch = itm2.getElementsByTag("a"); for (Element c : ch) { if (c.attr("href").contains("#")) c.removeAttr("href"); }/*from w ww. ja va 2 s . c o m*/ } bld += ("\u2022 " + itm2.outerHtml() + "<br />"); } } addText(bld, true, Typeface.DEFAULT); }
From source file:com.near.chimerarevo.fragments.PostFragment.java
private void parseOrderedLists(Elements itms) { String bld = ""; for (Element itm : itms) { Elements str = itm.getElementsByTag("li"); for (int j = 0; j < str.size(); j++) { Element itm2 = str.get(j); bld += ("<b>" + (j + 1) + ")</b> <i>" + itm2.outerHtml() + "</i><br />"); }/*from w ww.j a va 2 s.c o m*/ } addText(bld, true, Typeface.DEFAULT); }
From source file:org.asqatasun.processing.ProcessRemarkServiceImpl.java
/** * /* w ww .j a v a 2 s . c o m*/ * @param element * @return */ public String getSnippetFromElement(Element element) { String elementHtml = StringEscapeUtils.escapeHtml4(StringUtil.normaliseWhitespace(element.outerHtml())) .trim(); if (element.children().isEmpty() || elementHtml.length() <= SNIPPET_MAX_LENGTH) { return elementHtml; } return properlyCloseSnippet(element, elementHtml, elementHtml.substring(0, SNIPPET_MAX_LENGTH)); }
From source file:org.mar9000.space2latex.WikiPage.java
public static void downloadWikiPageImages(WikiPage page) throws MalformedURLException { String pageUrl = page.json.getJSONObject(JSON_LINKS_ATTR).getString(JSON_SELF_ATTR); Document document = Jsoup.parseBodyFragment(page.storage); document.outputSettings().prettyPrint(false); Elements images = document.select("ac|image"); if (images.size() > 0) LOGGER.info(" Download images:"); for (Element element : images) { String downloadURL = null; String imageKey = null;//from www.jav a 2s. co m // Attachment? Elements refs = element.select("ri|attachment"); WikiImage image = new WikiImage(); image.pageId = page.id; image.acImage = element.outerHtml(); // if (refs.size() > 0) { // Attachment. Element riAttachment = refs.get(0); imageKey = riAttachment.attr("ri:filename"); Elements riPages = riAttachment.select("ri|page"); // Thumbnails are not found with "child/attachment" URL schema. boolean isThumbnail = "true".equals(element.attr("ac:thumbnail")); String queryURL = null; if (!isThumbnail) { queryURL = pageUrl + "/child/attachment?filename=" + URLEncoder.encode(imageKey); } else { // For thumbnail we construct directly the downloadURL without queryURL. /* Some pages have thumbnail images for better online reading. * Here we download always the attached file to embed readable imagesinto the pdf. downloadURL = pageUrl.substring(0, pageUrl.indexOf("/rest/api")) + "/download/thumbnails/" + page.id + "/" + URLEncoder.encode(imageKey); */ downloadURL = pageUrl.substring(0, pageUrl.indexOf("/rest/api")) + "/download/attachments/" + page.id + "/" + URLEncoder.encode(imageKey); } if (riPages.size() > 0) { // The attachment is related with another page. Element riPage = riPages.get(0); String space = riPage.attr("ri:space-key"); String contentTitle = riPage.attr("ri:content-title").replaceAll(" ", "%20"); String self = page.json.getJSONObject(JSON_LINKS_ATTR).getString(JSON_SELF_ATTR); String newQueryURL = self.substring(0, self.lastIndexOf('/')) + "?title=" + contentTitle + "&spaceKey=" + space; JSONObject jsonNewQuery = ConfluenceRESTUtils.getURLResponse(newQueryURL); if (jsonNewQuery.getInt(JSON_SIZE_ATTR) == 0) throw new RuntimeException( "Page \"" + contentTitle + "\" in space " + space + " not found."); JSONObject jsonNewPage = (JSONObject) jsonNewQuery.getJSONArray(JSON_RESULTS_ATTR).get(0); image.pageId = jsonNewPage.getString(JSON_ID_ATTR); // Overwrite queryURL. String newPageUrl = jsonNewPage.getJSONObject(JSON_LINKS_ATTR).getString(JSON_SELF_ATTR); queryURL = newPageUrl + "/child/attachment?filename=" + URLEncoder.encode(imageKey); } if (!isThumbnail) downloadURL = getAttachmentDownloadURL(queryURL); } else { refs = element.select("ri|url"); if (refs.size() > 0) { // URL. downloadURL = refs.get(0).attr("ri:value"); URL tempURL = new URL(downloadURL); String urlPath = tempURL.getPath(); imageKey = urlPath.substring(urlPath.lastIndexOf('/') + 1); } else { throw new RuntimeException("Image format unknown: " + element.toString()); } } // Download the image data. image.filename = imageKey.replace(' ', '_'); // Space are not handled by LaTeX. if (downloadURL != null) { LOGGER.info(" about to download image {}/{}", new Object[] { image.pageId, image.filename }); image.data = IOUtils.getImageFromURL(downloadURL); } else { LOGGER.info(" NULL download URL for page/image: {}/{}", new Object[] { image.pageId, image.filename }); } page.images.put(imageKey, image); } }
From source file:org.silverpeas.mobile.server.servlets.PublicationContentServlet.java
private void displayWysiwyg(String html, HttpServletRequest request, HttpServletResponse response, String instanceId) throws IOException { html = "<html><body>" + html + "</body></html>"; Document doc = Jsoup.parse(html); Elements body = doc.getElementsByTag("body"); if (!body.isEmpty()) { html = body.first().html();/*from w w w .ja va 2 s.c o m*/ } Elements images = doc.getElementsByTag("img"); for (Element img : images) { String source = img.attr("src"); String newSource = source; if (source.contains("/silverpeas")) { // need to convert in dataurl newSource = convertSpImageUrlToDataUrl(source); } img.attr("src", newSource); } Elements embeds = doc.getElementsByTag("embed"); for (Element embed : embeds) { String htmlPart = embed.outerHtml(); if (htmlPart.contains("flash")) { String attachmentId = htmlPart .substring(htmlPart.indexOf("attachmentId/") + "attachmentId/".length()); attachmentId = attachmentId.substring(0, attachmentId.indexOf("/")); SimpleDocument attachment = AttachmentServiceProvider.getAttachmentService().searchDocumentById( new SimpleDocumentPK(attachmentId), getUserInSession(request).getUserPreferences().getLanguage()); String type = attachment.getContentType(); String url = getServletContext().getContextPath() + "/services/spmobile/Attachment"; url = url + "?id=" + attachmentId + "&instanceId=" + instanceId + "&lang=" + getUserInSession(request).getUserPreferences().getLanguage() + "&userId=" + getUserInSession(request).getId(); if (type.equals("audio/mpeg") || type.equals("audio/ogg") || type.equals("audio/wav")) { embed.parent().append("<audio controls><source src='" + url + "' type='" + type + "'></audio>"); embed.remove(); } else if (type.equals("video/mp4") || type.equals("video/ogg") || type.equals("video/webm")) { embed.parent() .append("<video controls='controls'><source src='" + url + "' type='" + type + "' />"); embed.remove(); } } } html = doc.outerHtml(); OutputStreamWriter out = new OutputStreamWriter(response.getOutputStream(), "UTF-8"); writeContainer(out, html); out.flush(); }
From source file:org.structr.web.importer.Importer.java
private String nodeToString(Node node) { if (node instanceof TextNode) { return ((TextNode) node).getWholeText(); } else if (node instanceof Element) { final Element el = (Element) node; final boolean prettyPrintBackup = el.ownerDocument().outputSettings().prettyPrint(); el.ownerDocument().outputSettings().prettyPrint(false); final String result = el.outerHtml(); el.ownerDocument().outputSettings().prettyPrint(prettyPrintBackup); return result; } else {// ww w . ja v a 2 s.co m return node.toString(); } }
From source file:org.symphonyoss.client.util.MlMessageParser.java
public void parseMessage(String message) throws SymException { Document doc = Jsoup.parse(message); originalDoc = doc.clone();//w w w . j a v a2 s . c o m Element elementErrors = doc.body().getElementsByTag("errors").first(); if (elementErrors != null) { if (elementErrors.outerHtml() != null) logger.debug("Errors found in message: {}", elementErrors.outerHtml()); } //Lets remove the errors elements doc.select("errors").remove(); elementMessageML = doc.select("messageML").first(); if (elementMessageML == null) elementMessageML = doc.select("div").first(); if (elementMessageML != null) { if (elementMessageML.outerHtml() != null) logger.debug("Doc parsed: {}", elementMessageML.outerHtml()); } else { logger.error("Could not parse document for message {}", message); throw new SymException("Malformed message"); } textDoc = new StringBuilder(); stripTags(textDoc, elementMessageML.childNodes()); textChunks = textDoc.toString().split("\\s+"); }