Example usage for org.jsoup.nodes Element replaceWith

Introduction

In this page you can find the example usage for org.jsoup.nodes Element replaceWith.

Prototype

public void replaceWith(Node in)

Source Link

Document

Replace this node in the DOM with the supplied node.

Usage

From source file:com.jimplush.goose.outputformatters.DefaultOutputFormatter.java

/**
 * replace common tags with just text so we don't have any crazy formatting issues
 * so replace <br>, <i>, <strong>, etc.... with whatever text is inside them
 *//*from  ww  w. j  ava  2 s .co m*/
private void replaceTagsWithText() {

    Elements strongs = topNode.getElementsByTag("strong");
    for (Element item : strongs) {
        TextNode tn = new TextNode(item.text(), topNode.baseUri());
        item.replaceWith(tn);
    }

    Elements bolds = topNode.getElementsByTag("b");
    for (Element item : bolds) {
        TextNode tn = new TextNode(item.text(), topNode.baseUri());
        item.replaceWith(tn);
    }

    Elements italics = topNode.getElementsByTag("i");
    for (Element item : italics) {
        TextNode tn = new TextNode(item.text(), topNode.baseUri());
        item.replaceWith(tn);
    }
}

From source file:com.jimplush.goose.outputformatters.DefaultOutputFormatter.java

/**
 * cleans up and converts any nodes that should be considered text into text
 */// w ww.  jav a2s  .  c  om
private void convertLinksToText() {
    if (logger.isDebugEnabled()) {
        logger.debug("Turning links to text");
    }
    Elements links = topNode.getElementsByTag("a");
    for (Element item : links) {
        if (item.getElementsByTag("img").size() == 0) {
            TextNode tn = new TextNode(item.text(), topNode.baseUri());
            item.replaceWith(tn);
        }
    }
}

From source file:com.aurel.track.exchange.docx.exporter.PreprocessImage.java

/**
 * Removes the HTML5 figure tag and saves the figcaption in the <img> tag's "alt" attribute for later use
 * @param htmlContent//from   ww w  .j  av a  2  s . c  o  m
 * @return
 */
private Document removeFigureSaveFigcaption(String htmlContent) {
    Document doc = Jsoup.parseBodyFragment(htmlContent);
    //figure is a HTML5 tag not accepted by Tidy, so it should be replaced by the content <img>-tag, and the figcaption is saved in the "alt" attribute
    Elements figureElements = doc.select("figure");
    Element figcaptionNode = null;
    if (figureElements != null) {
        for (Iterator<Element> iterator = figureElements.iterator(); iterator.hasNext();) {
            Element figureElement = iterator.next();
            Elements figureChildren = figureElement.getAllElements();
            Node imageNode = null;
            if (figureChildren != null) {
                for (Element figureChild : figureChildren) {
                    if ("img".equals(figureChild.nodeName())) {
                        imageNode = figureChild;
                    } else {
                        if ("figcaption".equals(figureChild.nodeName())) {
                            figcaptionNode = figureChild;
                            //set "figcaption" text as value for "alt" attribute  
                            if (imageNode != null) {
                                imageNode.attr("alt", figcaptionNode.text());
                            }
                        }
                    }
                }
            }
            if (imageNode != null) {
                figureElement.replaceWith(imageNode);
            }
        }
    }
    return doc;
}

From source file:com.astamuse.asta4d.render.RenderUtil.java

public final static void applyMessages(Element target) {
    Context context = Context.getCurrentThreadContext();
    List<Element> msgElems = target.select(ExtNodeConstants.MSG_NODE_TAG_SELECTOR);
    for (final Element msgElem : msgElems) {
        Attributes attributes = msgElem.attributes();
        String key = attributes.get(ExtNodeConstants.MSG_NODE_ATTR_KEY);
        // List<String> externalizeParamKeys = getExternalizeParamKeys(attributes);
        Object defaultMsg = new Object() {
            @Override/*from  w  w  w  .j a v a2  s  .  c o m*/
            public String toString() {
                return ExtNodeConstants.MSG_NODE_ATTRVALUE_HTML_PREFIX + msgElem.html();
            }
        };
        Locale locale = LocalizeUtil.getLocale(attributes.get(ExtNodeConstants.MSG_NODE_ATTR_LOCALE));
        String currentTemplatePath = attributes.get(ExtNodeConstants.ATTR_TEMPLATE_PATH);
        if (StringUtils.isEmpty(currentTemplatePath)) {
            logger.warn("There is a msg tag which does not hold corresponding template file path:{}",
                    msgElem.outerHtml());
        } else {
            context.setData(TRACE_VAR_TEMPLATE_PATH, currentTemplatePath);
        }

        final Map<String, Object> paramMap = getMessageParams(attributes, locale, key);
        String text;
        switch (I18nMessageHelperTypeAssistant.configuredHelperType()) {
        case Mapped:
            text = I18nMessageHelperTypeAssistant.getConfiguredMappedHelper().getMessageWithDefault(locale, key,
                    defaultMsg, paramMap);
            break;
        case Ordered:
        default:
            // convert map to array
            List<Object> numberedParamNameList = new ArrayList<>();
            for (int index = 0; paramMap
                    .containsKey(ExtNodeConstants.MSG_NODE_ATTR_PARAM_PREFIX + index); index++) {
                numberedParamNameList.add(paramMap.get(ExtNodeConstants.MSG_NODE_ATTR_PARAM_PREFIX + index));
            }
            text = I18nMessageHelperTypeAssistant.getConfiguredOrderedHelper().getMessageWithDefault(locale,
                    key, defaultMsg, numberedParamNameList.toArray());
        }

        Node node;
        if (text.startsWith(ExtNodeConstants.MSG_NODE_ATTRVALUE_TEXT_PREFIX)) {
            node = ElementUtil.text(text.substring(ExtNodeConstants.MSG_NODE_ATTRVALUE_TEXT_PREFIX.length()));
        } else if (text.startsWith(ExtNodeConstants.MSG_NODE_ATTRVALUE_HTML_PREFIX)) {
            node = ElementUtil
                    .parseAsSingle(text.substring(ExtNodeConstants.MSG_NODE_ATTRVALUE_HTML_PREFIX.length()));
        } else {
            node = ElementUtil.text(text);
        }
        msgElem.replaceWith(node);

        context.setData(TRACE_VAR_TEMPLATE_PATH, null);
    }
}

From source file:org.niord.core.publication.PublicationUtils.java

/**
 * Updates the message publications from the publication, parameters and link
 *
 * @param message the message//w  w w .  j a  v a2  s .c  om
 * @param publication the publication to extract
 * @param parameters the optional parameters
 * @param link the optional link
 * @param lang either a specific language or null for all languages
 * @return the message publication or null if not found
 */
public static MessageVo updateMessagePublications(MessageVo message, SystemPublicationVo publication,
        String parameters, String link, String lang) {
    // Sanity check
    if (message == null || publication == null) {
        return null;
    }

    boolean internal = publication.getMessagePublication() == MessagePublication.INTERNAL;

    message.getDescs().stream().filter(msgDesc -> lang == null || lang.equals(msgDesc.getLang()))
            .forEach(msgDesc -> {

                String updatedPubHtml = computeMessagePublication(publication, parameters, link,
                        msgDesc.getLang());

                String pubHtml = internal ? msgDesc.getInternalPublication() : msgDesc.getPublication();
                pubHtml = StringUtils.defaultIfBlank(pubHtml, "");

                Document doc = Jsoup.parseBodyFragment(pubHtml);
                String pubAttr = "[publication=" + publication.getPublicationId() + "]";
                Element e = doc.select("a" + pubAttr + ",span" + pubAttr).first();
                if (e != null) {
                    // TODO: Is there a better way to replace an element?
                    e.replaceWith(Jsoup.parse(updatedPubHtml).body().child(0));
                    pubHtml = doc.body().html();
                } else {
                    pubHtml += " " + updatedPubHtml;
                }
                // Lastly, clean up html for artifacts often added by TinyMCE
                if (StringUtils.isNotBlank(pubHtml)) {
                    pubHtml = pubHtml.replace("<p>", "").replace("</p>", "").trim();
                    if (internal) {
                        msgDesc.setInternalPublication(pubHtml);
                    } else {
                        msgDesc.setPublication(pubHtml);
                    }
                }
            });

    return message;
}

From source file:us.colloquy.sandbox.TestExtractor.java

@Test
public void useJsoup() {

    String homeDir = System.getProperty("user.home");

    System.out.println(homeDir);/*  w  w  w .  j a v a  2s  .  c  o m*/

    //JSOUP API allows to extract all  elements of letters in files

    // File input = new File("samples/OEBPS/Text/0001_1006_2001.xhtml");

    File input = new File("samples/pisma-1904/OEBPS/Text/single_doc.html");

    try {
        Document doc = Jsoup.parse(input, "UTF-8");

        List<Letter> letters = new ArrayList<>(); //our model contains only a subset of fields

        String previousYear = "";

        for (Element element : doc.getElementsByClass("section")) {
            Letter letter = new Letter();

            StringBuilder content = new StringBuilder();

            for (Element child : element.children()) {

                for (Attribute att : child.attributes()) {
                    System.out.println(att.getKey() + " " + att.getValue());
                }

                if ("center".equalsIgnoreCase(child.className())) {
                    String toWhom = child.getElementsByTag("strong").text();

                    if (StringUtils.isEmpty(toWhom)) {
                        toWhom = child.text();
                        // System.out.println(toWhom);
                    }

                    String[] toWhomArray = toWhom.split("(\\s\\s)|(,)");

                    for (String to : toWhomArray) {
                        RussianDate.parseToWhom(letter, to); //here we need to recognize a russian name and store that but for now we store the content
                    }

                    //check if there is anything else here and find date and place - it will be replaced if exists below

                    String entireText = child.text();

                    String tail = entireText.replace(toWhom, "");

                    if (StringUtils.isNotEmpty(tail)) {
                        RussianDate.parseDateAndPlace(letter, tail, previousYear); //a parser that figures out date and place if they are present
                    }

                    // System.out.println("two whom\t " +  child.getElementsByTag("strong").text() );

                } else if ("Data".equalsIgnoreCase(child.className())) {

                    if (child.getElementsByTag("em") != null
                            && StringUtils.isNotEmpty(child.getElementsByTag("em").text())) {
                        RussianDate.parseDateAndPlace(letter, child.getElementsByTag("em").text(),
                                previousYear); //most often date and place are enclosed in em tag

                        if (letter.getDate() != null) {
                            LocalDate localDate = letter.getDate().toInstant().atZone(ZoneId.systemDefault())
                                    .toLocalDate();
                            int year = localDate.getYear();
                            previousYear = year + "";
                        }
                    }

                    // System.out.println("when and where\t " + child.getElementsByTag("em").text());

                } else if ("petit".equalsIgnoreCase(child.className())
                        || "Textpetit_otstup".equalsIgnoreCase(child.className())) {
                    letter.getNotes().add(child.text());

                } else {
                    //System.out.println(child.text() );

                    Elements elements = child.getElementsByTag("sup");

                    for (Element e : elements) {
                        String value = e.text();

                        e.replaceWith(new TextNode("[" + value + "]", null));
                    }

                    for (Element el : child.getAllElements()) {
                        // System.out.println(el.tagName());
                        if ("sup".equalsIgnoreCase(el.tagName())) {
                            content.append(" [" + el.text() + "] ");
                        } else {
                            content.append(el.text());
                        }

                    }

                    content.append("\n");

                }

                //                  System.out.println(child.tag() + "\n" );
                //                  System.out.println(child.outerHtml() + "\n" + child.text());
            }

            letter.setContent(content.toString());
            letters.add(letter);
        }

        ObjectWriter ow = new com.fasterxml.jackson.databind.ObjectMapper().writer().withDefaultPrettyPrinter();

        for (Letter letter : letters) {
            //                if (letter.getDate() == null)
            //                {

            //                        if (StringUtils.isNotEmpty(person.getLastName()))
            //                        {
            String json = ow.writeValueAsString(letter);

            System.out.println(json);
            //                        }

            //}

        }

    } catch (IOException e) {
        e.printStackTrace();
    }

}

From source file:us.colloquy.util.DiaryParser.java

private static void replaceSupTag(Element child) {
        Elements elements = child.getElementsByTag("sup");

        for (Element e : elements) {
            String value = e.text();

            e.replaceWith(new TextNode("[" + value + "]", null));
        }/*  w  w w. j  a  va2s .c o m*/

    }