Example usage for org.jsoup.nodes Element outerHtml

List of usage examples for org.jsoup.nodes Element outerHtml

Introduction

In this page you can find the example usage for org.jsoup.nodes Element outerHtml.

Prototype

public String outerHtml() 

Source Link

Document

Get the outer HTML of this node.

Usage

From source file:com.jimplush.goose.ContentExtractor.java

/**
 * adds any siblings that may have a decent score to this node
 *
 * @param node/*ww w. ja va2s  .c om*/
 * @return
 */
private Element addSiblings(Element node) {
    if (logger.isDebugEnabled()) {
        logger.debug("Starting to add siblings");
    }
    int baselineScoreForSiblingParagraphs = getBaselineScoreForSiblings(node);

    Element currentSibling = node.previousElementSibling();
    while (currentSibling != null) {
        if (logger.isDebugEnabled()) {
            logger.debug("SIBLINGCHECK: " + debugNode(currentSibling));
        }

        if (currentSibling.tagName().equals("p")) {

            node.child(0).before(currentSibling.outerHtml());
            currentSibling = currentSibling.previousElementSibling();
            continue;
        }

        // check for a paraph embedded in a containing element
        int insertedSiblings = 0;
        Elements potentialParagraphs = currentSibling.getElementsByTag("p");
        if (potentialParagraphs.first() == null) {
            currentSibling = currentSibling.previousElementSibling();
            continue;
        }
        for (Element firstParagraph : potentialParagraphs) {
            WordStats wordStats = StopWords.getStopWordCount(firstParagraph.text());

            int paragraphScore = wordStats.getStopWordCount();

            if ((float) (baselineScoreForSiblingParagraphs * .30) < paragraphScore) {
                if (logger.isDebugEnabled()) {
                    logger.debug("This node looks like a good sibling, adding it");
                }
                node.child(insertedSiblings).before("<p>" + firstParagraph.text() + "<p>");
                insertedSiblings++;
            }

        }

        currentSibling = currentSibling.previousElementSibling();
    }
    return node;

}

From source file:com.astamuse.asta4d.web.form.field.impl.AbstractRadioAndCheckboxPrepareRenderer.java

@Override
public Renderer preRender(final String editSelector, final String displaySelector) {

    if (duplicateSelector != null && labelWrapperIndicatorAttr != null) {
        String msg = "duplicateSelector (%s) and labelWrapperIndicatorAttr (%s) cannot be specified at same time.";
        throw new IllegalArgumentException(String.format(msg, duplicateSelector, labelWrapperIndicatorAttr));
    }//  ww  w.j a  v  a 2  s  .com

    Renderer renderer = super.preRender(editSelector, displaySelector);

    renderer.disableMissingSelectorWarning();

    // create wrapper for input element
    final WrapperIdHolder wrapperIdHolder = new WrapperIdHolder();

    if (duplicateSelector == null && optionMap != null) {

        renderer.add(new Renderer(editSelector, new ElementTransformer(null) {
            @Override
            public Element invoke(Element elem) {

                if (wrapperIdHolder.wrapperId != null) {
                    throw new RuntimeException("The target of selector[" + editSelector
                            + "] must be unique but over than 1 target was found."
                            + "Perhaps you have specified an option value map on a group of elements "
                            + "which is intented to be treated as predefined static options by html directly.");
                }

                String id = elem.id();
                if (StringUtils.isEmpty(id)) {
                    String msg = "A %s input element must have id value being configured:%s";
                    throw new RuntimeException(String.format(msg, getTypeString(), elem.outerHtml()));
                }

                GroupNode wrapper = new GroupNode();

                // cheating the rendering engine for not skipping the rendering on group node
                wrapper.attr(ExtNodeConstants.GROUP_NODE_ATTR_TYPE,
                        ExtNodeConstants.GROUP_NODE_ATTR_TYPE_USERDEFINE);

                // put the input element under the wrapper node
                wrapper.appendChild(elem.clone());

                String wrapperId = IdGenerator.createId();
                wrapper.attr("id", wrapperId);

                wrapperIdHolder.inputId = id;
                wrapperIdHolder.wrapperId = wrapperId;

                // record the selector for against label
                if (labelWrapperIndicatorAttr == null) {
                    wrapperIdHolder.labelSelector = SelectorUtil.attr("label", "for", wrapperIdHolder.inputId);
                } else {
                    wrapperIdHolder.labelSelector = SelectorUtil.attr(labelWrapperIndicatorAttr,
                            wrapperIdHolder.inputId);
                }

                return wrapper;
            }
        }));

        renderer.add(":root", new Renderable() {
            @Override
            public Renderer render() {
                if (wrapperIdHolder.wrapperId == null) {
                    // for display mode?
                    return Renderer.create();
                }

                // remove the label element and cache it in warpperIdHolder, we will relocate it later(since we have to duplicate the
                // input
                // and label pair by given option value map, we have to make sure that the input and label elements are in same parent
                // node
                // which can be duplicated)
                Renderer renderer = Renderer.create().disableMissingSelectorWarning();
                renderer.add(new Renderer(wrapperIdHolder.labelSelector, new ElementTransformer(null) {
                    @Override
                    public Element invoke(Element elem) {
                        wrapperIdHolder.relocatingLabels.add(elem.clone());
                        return new GroupNode();
                    }

                }));

                return renderer.enableMissingSelectorWarning();
            }
        });

        renderer.add(":root", new Renderable() {
            @Override
            public Renderer render() {

                if (wrapperIdHolder.wrapperId == null) {
                    // for display mode?
                    return Renderer.create();
                }

                String selector = SelectorUtil.id(wrapperIdHolder.wrapperId);

                // relocate the label element to the wrapper node
                return Renderer.create(selector, new ElementSetter() {
                    @Override
                    public void set(Element elem) {
                        if (wrapperIdHolder.relocatingLabels.isEmpty()) {// no existing label found
                            Element label = new Element(Tag.valueOf("label"), "");
                            label.attr("for", wrapperIdHolder.inputId);
                            elem.appendChild(label);
                        } else {
                            for (Element label : wrapperIdHolder.relocatingLabels) {
                                elem.appendChild(label);
                            }
                        }
                    }
                });

            }
        });

    } else {
        if (duplicateSelector != null && optionMap != null) {
            // if duplicateSelector is specified, we just only need to store the input element id
            renderer.add(editSelector, new ElementSetter() {
                @Override
                public void set(Element elem) {
                    if (wrapperIdHolder.inputId != null) {
                        String msg = "The target of selector[%s] (inside duplicator:%s) must be unique but over than 1 target was found.";
                        throw new RuntimeException(String.format(msg, editSelector, duplicateSelector));
                    }
                    String id = elem.id();
                    if (StringUtils.isEmpty(id)) {
                        String msg = "A %s input element (inside duplicator:%s) must have id value being configured:%s";
                        throw new RuntimeException(
                                String.format(msg, getTypeString(), duplicateSelector, elem.outerHtml()));
                    }
                    wrapperIdHolder.inputId = id;

                    // record the selector for against label
                    // labelWrapperIndicatorAttr would not be null since we checked it at the entry of this method.
                    wrapperIdHolder.labelSelector = SelectorUtil.attr("label", "for", wrapperIdHolder.inputId);
                }
            });
        }
    }

    // here we finished restructure the input element and its related label element and then we begin to manufacture all the input/label
    // pairs for option list

    renderer.add(":root", new Renderable() {
        @Override
        public Renderer render() {

            if (optionMap == null) {
                // for static options
                Renderer renderer = Renderer.create();
                final List<String> inputIdList = new LinkedList<>();
                renderer.add(editSelector, new ElementSetter() {
                    @Override
                    public void set(Element elem) {
                        inputIdList.add(elem.id());
                    }
                });
                renderer.add(":root", new Renderable() {
                    @Override
                    public Renderer render() {
                        Renderer render = Renderer.create().disableMissingSelectorWarning();
                        for (String id : inputIdList) {
                            render.add(SelectorUtil.attr(labelWrapperIndicatorAttr, id), LABEL_REF_ATTR, id);
                            render.add(SelectorUtil.attr("label", "for", id), LABEL_REF_ATTR, id);
                        }
                        return render.enableMissingSelectorWarning();
                    }
                });

                if (duplicateSelector != null) {
                    renderer.add(duplicateSelector, new Renderable() {
                        @Override
                        public Renderer render() {
                            String duplicatorRef = IdGenerator.createId();
                            Renderer render = Renderer.create(":root", DUPLICATOR_REF_ID_ATTR, duplicatorRef);
                            render.add("input", DUPLICATOR_REF_ATTR, duplicatorRef);
                            String labelSelector;
                            if (labelWrapperIndicatorAttr == null) {
                                labelSelector = SelectorUtil.tag("label");
                            } else {
                                labelSelector = SelectorUtil.attr(labelWrapperIndicatorAttr);
                            }
                            render.add(labelSelector, DUPLICATOR_REF_ATTR, duplicatorRef);
                            return render;
                        }
                    });
                }
                return renderer;
            } else {
                if (wrapperIdHolder.wrapperId == null && duplicateSelector == null) {
                    // for display mode?
                    return Renderer.create();
                }
                if (wrapperIdHolder.inputId == null) {
                    // target input element not found
                    return Renderer.create();
                }
                String selector = duplicateSelector == null ? SelectorUtil.id(wrapperIdHolder.wrapperId)
                        : duplicateSelector;
                return Renderer.create(selector, optionMap.getOptionList(), row -> {

                    Renderer renderer = Renderer.create().disableMissingSelectorWarning();

                    String inputSelector = SelectorUtil.id("input", wrapperIdHolder.inputId);
                    renderer.add(inputSelector, "value", row.getValue());

                    // we have to generate a new uuid for the input element to make sure its id is unique even we duplicated it.
                    String newInputId = inputIdByValue ? row.getValue() : IdGenerator.createId();

                    // make the generated id more understandable by prefixing with original id
                    newInputId = wrapperIdHolder.inputId + "-" + newInputId;

                    String duplicatorRef = null;

                    if (duplicateSelector != null) {
                        duplicatorRef = IdGenerator.createId();
                    }

                    renderer.add(":root", DUPLICATOR_REF_ID_ATTR, duplicatorRef);

                    renderer.add(inputSelector, DUPLICATOR_REF_ATTR, duplicatorRef);
                    renderer.add(inputSelector, "id", newInputId);

                    // may be a wrapper container of label
                    renderer.add(wrapperIdHolder.labelSelector, LABEL_REF_ATTR, newInputId);
                    if (labelWrapperIndicatorAttr != null) {
                        renderer.add(wrapperIdHolder.labelSelector, labelWrapperIndicatorAttr, newInputId);
                    }
                    renderer.add(wrapperIdHolder.labelSelector, DUPLICATOR_REF_ATTR, duplicatorRef);

                    renderer.add("label", "for", newInputId);
                    renderer.add("label", row.getDisplayText());

                    return renderer.enableMissingSelectorWarning();
                });
            }
        }
    });

    // since we cheated the rendering engine, we should set the type of group node created to faked for fast clean up
    renderer.add(":root", new Renderable() {
        @Override
        public Renderer render() {
            if (wrapperIdHolder.wrapperId == null) {
                // for display mode?
                return Renderer.create();
            }
            String selector = SelectorUtil.id(wrapperIdHolder.wrapperId);
            return Renderer.create(selector, new ElementSetter() {
                @Override
                public void set(Element elem) {
                    elem.attr(ExtNodeConstants.GROUP_NODE_ATTR_TYPE,
                            ExtNodeConstants.GROUP_NODE_ATTR_TYPE_FAKE);
                }
            });
        }
    });

    PrepareRenderingDataUtil.storeDataToContextBySelector(editSelector, displaySelector, optionMap);

    return renderer.enableMissingSelectorWarning();
}

From source file:com.screenslicer.core.util.BrowserUtil.java

public static Element openElement(final Browser browser, boolean init, final String[] whitelist,
        final String[] patterns, final HtmlNode[] urlNodes, final UrlTransform[] transforms)
        throws ActionFailed {
    try {/* w w w  .  j  a v  a 2 s. co  m*/
        if (init) {
            int myStartId;
            synchronized (startIdLock) {
                startId = startId == Integer.MAX_VALUE ? 0 : startId + 1;
                myStartId = startId;
            }
            browser.executeScript("      var all = document.body.getElementsByTagName('*');"
                    + "for(var i = 0; i < all.length; i++){"
                    + "  if(all[i].className && typeof all[i].className == 'string'){"
                    + "    all[i].className=all[i].className.replace(/" + HIDDEN_MARKER + "/g,'').replace(/"
                    + FILTERED_MARKER + "/g,'').replace(/" + FILTERED_LENIENT_MARKER
                    + "/g,'').replace(/\\s+/g,' ').trim();" + "  }" + "}" + isVisible
                    + "for(var j = 0; j < all.length; j++){" + "  if(!all[j].className.match(/" + NODE_MARKER
                    + "\\d+_\\d+/g)){" + "    all[j].className += ' " + NODE_MARKER + myStartId + "_'+j+' ';"
                    + "  }" + "  if(!isVisible(all[j])){" + "    all[j].className += ' " + HIDDEN_MARKER + " ';"
                    + "  }" + "}");
        }
        String url = browser.getCurrentUrl();
        new URL(url);
        Element element = CommonUtil.parse(browser.getPageSource(), url, false).body();
        element.traverse(new NodeVisitor() {
            @Override
            public void tail(Node node, int depth) {
            }

            @Override
            public void head(Node node, int depth) {
                if (!node.nodeName().equals("#text") && !NodeUtil.isEmpty(node)) {
                    NodeUtil.markVisible(node);
                }
            }
        });
        if ((whitelist != null && whitelist.length > 0) || (patterns != null && patterns.length > 0)
                || (urlNodes != null && urlNodes.length > 0)) {
            element.traverse(new NodeVisitor() {
                @Override
                public void tail(Node node, int depth) {
                }

                @Override
                public void head(Node node, int depth) {
                    if (node.nodeName().equals("a")) {
                        if (UrlUtil.isUrlFiltered(browser.getCurrentUrl(), node.attr("href"), node, whitelist,
                                patterns, urlNodes, transforms)) {
                            NodeUtil.markFiltered(node, false);
                        }
                    } else {
                        String urlAttr = UrlUtil.urlFromAttr(node);
                        if (!CommonUtil.isEmpty(urlAttr) && UrlUtil.isUrlFiltered(browser.getCurrentUrl(),
                                urlAttr, node, whitelist, patterns, urlNodes, transforms)) {
                            NodeUtil.markFiltered(node, true);
                        }
                    }
                }
            });
        }
        if (WebApp.DEBUG) {
            try {
                FileUtils.writeStringToFile(new File("./" + System.currentTimeMillis() + ".log.scrape"),
                        element.outerHtml(), "utf-8");
            } catch (IOException e) {
            }
        }
        return element;
    } catch (Browser.Retry r) {
        throw r;
    } catch (Browser.Fatal f) {
        throw f;
    } catch (Throwable t) {
        throw new ActionFailed(t);
    }
}

From source file:com.near.chimerarevo.fragments.PostFragment.java

private void parseBulletedLists(Elements itms) {
    String bld = "";
    for (Element itm : itms) {
        Elements str = itm.getElementsByTag("li");
        for (Element itm2 : str) {
            if (itm2.children().size() >= 1) {
                Elements ch = itm2.getElementsByTag("a");
                for (Element c : ch) {
                    if (c.attr("href").contains("#"))
                        c.removeAttr("href");
                }/*from   w  ww.  ja va 2 s  . c o m*/
            }
            bld += ("\u2022 " + itm2.outerHtml() + "<br />");
        }
    }
    addText(bld, true, Typeface.DEFAULT);
}

From source file:com.near.chimerarevo.fragments.PostFragment.java

private void parseOrderedLists(Elements itms) {
    String bld = "";
    for (Element itm : itms) {
        Elements str = itm.getElementsByTag("li");
        for (int j = 0; j < str.size(); j++) {
            Element itm2 = str.get(j);
            bld += ("<b>" + (j + 1) + ")</b> <i>" + itm2.outerHtml() + "</i><br />");
        }/*from   w  ww.j  a va  2 s.c  o  m*/
    }
    addText(bld, true, Typeface.DEFAULT);
}

From source file:org.asqatasun.processing.ProcessRemarkServiceImpl.java

/**
 * /*  w  ww .j a v  a  2 s .  c o m*/
 * @param element
 * @return 
 */
public String getSnippetFromElement(Element element) {
    String elementHtml = StringEscapeUtils.escapeHtml4(StringUtil.normaliseWhitespace(element.outerHtml()))
            .trim();
    if (element.children().isEmpty() || elementHtml.length() <= SNIPPET_MAX_LENGTH) {
        return elementHtml;
    }
    return properlyCloseSnippet(element, elementHtml, elementHtml.substring(0, SNIPPET_MAX_LENGTH));
}

From source file:org.mar9000.space2latex.WikiPage.java

public static void downloadWikiPageImages(WikiPage page) throws MalformedURLException {
    String pageUrl = page.json.getJSONObject(JSON_LINKS_ATTR).getString(JSON_SELF_ATTR);
    Document document = Jsoup.parseBodyFragment(page.storage);
    document.outputSettings().prettyPrint(false);
    Elements images = document.select("ac|image");
    if (images.size() > 0)
        LOGGER.info("  Download images:");
    for (Element element : images) {
        String downloadURL = null;
        String imageKey = null;//from www.jav  a 2s. co m
        // Attachment?
        Elements refs = element.select("ri|attachment");
        WikiImage image = new WikiImage();
        image.pageId = page.id;
        image.acImage = element.outerHtml();
        //
        if (refs.size() > 0) { // Attachment.
            Element riAttachment = refs.get(0);
            imageKey = riAttachment.attr("ri:filename");
            Elements riPages = riAttachment.select("ri|page");
            // Thumbnails are not found with "child/attachment" URL schema.
            boolean isThumbnail = "true".equals(element.attr("ac:thumbnail"));
            String queryURL = null;
            if (!isThumbnail) {
                queryURL = pageUrl + "/child/attachment?filename=" + URLEncoder.encode(imageKey);
            } else {
                // For thumbnail we construct directly the downloadURL without queryURL.
                /* Some pages have thumbnail images for better online reading.
                 * Here we download always the attached file to embed readable imagesinto the pdf.
                downloadURL = pageUrl.substring(0, pageUrl.indexOf("/rest/api"))
                      + "/download/thumbnails/" + page.id + "/" + URLEncoder.encode(imageKey);
                */
                downloadURL = pageUrl.substring(0, pageUrl.indexOf("/rest/api")) + "/download/attachments/"
                        + page.id + "/" + URLEncoder.encode(imageKey);
            }
            if (riPages.size() > 0) {
                // The attachment is related with another page.
                Element riPage = riPages.get(0);
                String space = riPage.attr("ri:space-key");
                String contentTitle = riPage.attr("ri:content-title").replaceAll(" ", "%20");
                String self = page.json.getJSONObject(JSON_LINKS_ATTR).getString(JSON_SELF_ATTR);
                String newQueryURL = self.substring(0, self.lastIndexOf('/')) + "?title=" + contentTitle
                        + "&spaceKey=" + space;
                JSONObject jsonNewQuery = ConfluenceRESTUtils.getURLResponse(newQueryURL);
                if (jsonNewQuery.getInt(JSON_SIZE_ATTR) == 0)
                    throw new RuntimeException(
                            "Page \"" + contentTitle + "\" in space " + space + " not found.");
                JSONObject jsonNewPage = (JSONObject) jsonNewQuery.getJSONArray(JSON_RESULTS_ATTR).get(0);
                image.pageId = jsonNewPage.getString(JSON_ID_ATTR);
                // Overwrite queryURL.
                String newPageUrl = jsonNewPage.getJSONObject(JSON_LINKS_ATTR).getString(JSON_SELF_ATTR);
                queryURL = newPageUrl + "/child/attachment?filename=" + URLEncoder.encode(imageKey);
            }
            if (!isThumbnail)
                downloadURL = getAttachmentDownloadURL(queryURL);
        } else {
            refs = element.select("ri|url");
            if (refs.size() > 0) { // URL.
                downloadURL = refs.get(0).attr("ri:value");
                URL tempURL = new URL(downloadURL);
                String urlPath = tempURL.getPath();
                imageKey = urlPath.substring(urlPath.lastIndexOf('/') + 1);
            } else {
                throw new RuntimeException("Image format unknown: " + element.toString());
            }
        }
        // Download the image data.
        image.filename = imageKey.replace(' ', '_'); // Space are not handled by LaTeX.
        if (downloadURL != null) {
            LOGGER.info("    about to download image {}/{}", new Object[] { image.pageId, image.filename });
            image.data = IOUtils.getImageFromURL(downloadURL);
        } else {
            LOGGER.info("    NULL download URL for page/image: {}/{}",
                    new Object[] { image.pageId, image.filename });
        }
        page.images.put(imageKey, image);
    }
}

From source file:org.silverpeas.mobile.server.servlets.PublicationContentServlet.java

private void displayWysiwyg(String html, HttpServletRequest request, HttpServletResponse response,
        String instanceId) throws IOException {
    html = "<html><body>" + html + "</body></html>";
    Document doc = Jsoup.parse(html);

    Elements body = doc.getElementsByTag("body");
    if (!body.isEmpty()) {
        html = body.first().html();/*from   w  w  w .ja va  2  s.c  o  m*/
    }

    Elements images = doc.getElementsByTag("img");
    for (Element img : images) {
        String source = img.attr("src");
        String newSource = source;
        if (source.contains("/silverpeas")) {
            // need to convert in dataurl
            newSource = convertSpImageUrlToDataUrl(source);
        }
        img.attr("src", newSource);
    }
    Elements embeds = doc.getElementsByTag("embed");
    for (Element embed : embeds) {
        String htmlPart = embed.outerHtml();
        if (htmlPart.contains("flash")) {
            String attachmentId = htmlPart
                    .substring(htmlPart.indexOf("attachmentId/") + "attachmentId/".length());
            attachmentId = attachmentId.substring(0, attachmentId.indexOf("/"));
            SimpleDocument attachment = AttachmentServiceProvider.getAttachmentService().searchDocumentById(
                    new SimpleDocumentPK(attachmentId),
                    getUserInSession(request).getUserPreferences().getLanguage());
            String type = attachment.getContentType();
            String url = getServletContext().getContextPath() + "/services/spmobile/Attachment";
            url = url + "?id=" + attachmentId + "&instanceId=" + instanceId + "&lang="
                    + getUserInSession(request).getUserPreferences().getLanguage() + "&userId="
                    + getUserInSession(request).getId();
            if (type.equals("audio/mpeg") || type.equals("audio/ogg") || type.equals("audio/wav")) {
                embed.parent().append("<audio controls><source src='" + url + "' type='" + type + "'></audio>");
                embed.remove();
            } else if (type.equals("video/mp4") || type.equals("video/ogg") || type.equals("video/webm")) {
                embed.parent()
                        .append("<video controls='controls'><source src='" + url + "' type='" + type + "' />");
                embed.remove();
            }
        }
    }
    html = doc.outerHtml();
    OutputStreamWriter out = new OutputStreamWriter(response.getOutputStream(), "UTF-8");
    writeContainer(out, html);
    out.flush();
}

From source file:org.structr.web.importer.Importer.java

private String nodeToString(Node node) {

    if (node instanceof TextNode) {

        return ((TextNode) node).getWholeText();

    } else if (node instanceof Element) {

        final Element el = (Element) node;

        final boolean prettyPrintBackup = el.ownerDocument().outputSettings().prettyPrint();

        el.ownerDocument().outputSettings().prettyPrint(false);

        final String result = el.outerHtml();

        el.ownerDocument().outputSettings().prettyPrint(prettyPrintBackup);

        return result;

    } else {// ww w .  ja v a  2  s.co  m

        return node.toString();

    }

}

From source file:org.symphonyoss.client.util.MlMessageParser.java

public void parseMessage(String message) throws SymException {

    Document doc = Jsoup.parse(message);
    originalDoc = doc.clone();//w w  w .  j  a v  a2  s .  c o  m
    Element elementErrors = doc.body().getElementsByTag("errors").first();

    if (elementErrors != null) {
        if (elementErrors.outerHtml() != null)
            logger.debug("Errors found in message: {}", elementErrors.outerHtml());
    }
    //Lets remove the errors elements
    doc.select("errors").remove();

    elementMessageML = doc.select("messageML").first();

    if (elementMessageML == null)
        elementMessageML = doc.select("div").first();

    if (elementMessageML != null) {
        if (elementMessageML.outerHtml() != null)
            logger.debug("Doc parsed: {}", elementMessageML.outerHtml());
    } else {

        logger.error("Could not parse document for message {}", message);
        throw new SymException("Malformed message");
    }

    textDoc = new StringBuilder();
    stripTags(textDoc, elementMessageML.childNodes());

    textChunks = textDoc.toString().split("\\s+");

}