Example usage for org.jsoup.nodes Document html

List of usage examples for org.jsoup.nodes Document html

Introduction

In this page you can find the example usage for org.jsoup.nodes Document html.

Prototype

public String html() 

Source Link

Document

Retrieves the element's inner HTML.

Usage

From source file:com.example.app.ui.DemoUserProfileViewer.java

@Override
public void init() {
    // Make sure you call super.init() at the top of this method.
    /// See the Javadoc for #init() for more information about what it does.
    super.init();

    // Set HTML element type and class names for presentation use on this Container component.
    withHTMLElement(HTMLElement.section);
    addClassName("user-profile-viewer");
    // property_viewer is a standard class name.
    addClassName("property-viewer");
    // Add microdata for programmatic / SEO use
    /// OR use RDFa support
    /// You typically only do this in viewers - not editors.
    setAttribute("itemscope", "");
    setAttribute("itemtype", "http://schema.org/Person");
    // setAttribute allows you to set any attribute as long as it will not interfere with a component's
    /// native HTML. For example, you cannot set the "value" attribute on a Field since
    /// it uses that attribute.

    // It's a good idea to *not* mark variables final that you don't want in the scope of event listeners.
    /// Hibernate/JPA entities are a great example of this pattern. You always need to re-attach
    /// entities before using them, so we should always call getUserProfile() in the context
    /// of handling an event. Note: our getUserProfile() method re-attaches the entity.
    DemoUserProfile demoUserProfile = getDemoUserProfile();

    Name name = demoUserProfile.getName();
    // You can use a Field for displaying non-internationalized content.
    /// It is desirable to do this since you don't need to create a LocalizedText.
    /// However, you cannot change the HTMLElement of a Field at this time,
    /// so some of the following code uses a Label which does allow
    /// specification of the HTMLElement.
    final Field namePrefix = new Field(name.getFormOfAddress(), false);
    final Field nameGiven = new Field(name.getFirst(), false);
    final Field nameFamily = new Field(name.getLast(), false);
    final Field nameSuffix = new Field(name.getSuffix(), false);
    // Sometimes it is easier and less error prone to make a component non-visible
    /// than checking for null on each use. Use this pattern with care. You don't
    /// want to consume a lot of resource unnecessarily.
    if (isEmptyString(namePrefix.getText()))
        namePrefix.setVisible(false);//from ww w .  j  a  v  a2 s. co m
    if (isEmptyString(nameSuffix.getText()))
        nameSuffix.setVisible(false);

    // Address
    Address address = demoUserProfile.getPostalAddress();
    // Address lines are always on their own line so we make sure they are enclosed by a block element like a DIV..
    final Label addressLine1 = new Label();
    addressLine1.withHTMLElement(HTMLElement.div).addClassName("prop").addClassName("address-line");
    final Label addressLine2 = new Label();
    addressLine2.withHTMLElement(HTMLElement.div).addClassName("prop").addClassName("address-line");
    if (address.getAddressLineList().size() > 0)
        addressLine1.setText(createText(address.getAddressLine(1)));
    if (address.getAddressLineList().size() > 1)
        addressLine2.setText(createText(address.getAddressLine(2)));
    final HTMLComponent city = new HTMLComponent();
    // The "prop" class name is part of the standard HTML structure. It is always a good idea to also
    /// add a specific class name like "city" in this example. Please be consistent when using class names.
    /// For example, if everyone else is using "city", please use "city" too. Don't come up with another class name
    /// that means something similar like "town" or "locality". Consistency has a big impact on
    /// the time required to style HTML as well as the ability to reuse CSS.
    city.withHTMLElement(HTMLElement.span).addClassName("prop").addClassName("city");
    if (!isEmptyString(address.getCity())) {
        // Our microdata for the city shouldn't include the comma, so this is a bit more complicated than the other examples.
        city.setText(createText("<span itemprop=\"addressLocality\">" + address.getCity()
                + "</span><span class=\"delimiter\">,</span>"));
    } else
        city.setVisible(false);
    final Label state = new Label(createText(address.getState()));
    state.addClassName("prop").addClassName("state");
    final Label postalCode = new Label(createText(address.getPostalCode()));
    postalCode.addClassName("prop").addClassName("postal_code");

    // Other Contact
    final Field phoneNumber = new Field(demoUserProfile.getPhoneNumber(), false);
    final Field emailAddress = new Field(demoUserProfile.getEmailAddress(), false);

    // Social Contact
    final URILink twitterLink = demoUserProfile.getTwitterLink() != null
            ? new URILink(_demoUserProfileDAO.toURI(demoUserProfile.getTwitterLink(), null))
            : null;
    final URILink facebookLink = demoUserProfile.getFacebookLink() != null
            ? new URILink(_demoUserProfileDAO.toURI(demoUserProfile.getFacebookLink(), null))
            : null;
    final URILink linkedInLink = demoUserProfile.getLinkedInLink() != null
            ? new URILink(_demoUserProfileDAO.toURI(demoUserProfile.getLinkedInLink(), null))
            : null;

    // We are going to output HTML received from the outside, so we need to sanitize it first for security reasons.
    /// Sometimes you'll do this sanitation prior to persisting the data. It depends on whether or not you need to
    /// keep the original unsanitized HTML around.
    String processedHTML = demoUserProfile.getAboutMeProse();
    if (!isEmptyString(processedHTML)) {
        // Process the HTML converting links as necessary (adding JSESSIONID(s)
        /// for URL based session tracking, converting resource links to increase concurrent loading limit,
        /// CMS link externalization, etc).
        /// This is *not* sanitation and should always be done before rendering - never before persisting.
        /// We are doing this before sanitizing the HTML to avoid having to whitelist internal URL protocols, etc.
        try {
            processedHTML = XMLRenderer.parseWithRoot(processedHTML, Event.getRequest(), Event.getResponse());
        } catch (IOException e) {
            _logger.error("Unable to accept HTML: " + processedHTML, e);
        }

        // We don't trust the input, so we sanitize it with a whitelist of allowed HTML.
        Document dirty = Jsoup.parseBodyFragment(processedHTML, "");
        Whitelist whitelist = Whitelist.relaxed();
        // Don't allow users to use our website as a link farm
        whitelist.addEnforcedAttribute("a", "rel", "nofollow");
        Cleaner cleaner = new Cleaner(whitelist);
        Document clean = cleaner.clean(dirty);
        processedHTML = clean.html();
    }
    final HTMLComponent aboutMeProse = new HTMLComponent(processedHTML);
    Component aboutMeVideo = null;
    URL videoLink = demoUserProfile.getAboutMeVideoLink();
    if (videoLink != null) {
        // There are several ways to link to media (Youtube video URL, Vimeo video URL, Flickr URL,
        // internally hosted media file, etc).
        /// You can link to it.
        /// You can embed it. See http://oembed.com/ for a common protocol for doing this.
        /// If the link is to the media itself, you can create a player for it.
        /// Below is an example of creating a link to the video as well as a player.
        final URI videoLinkURI = _demoUserProfileDAO.toURI(videoLink, null);
        URILink videoLinkComponent = new URILink(videoLinkURI, createText("My Video"));
        videoLinkComponent.setTarget("_blank");
        aboutMeVideo = getAboutMe(videoLink, videoLinkURI, videoLinkComponent);
        if (aboutMeVideo == null) {
            // We could check for oEmbed support in case link was to youtube, vimeo, etc - http://oembed.com/
            // Since this is an example, we'll just output the link.
            aboutMeVideo = videoLinkComponent;
        }
    }
    ImageComponent picture = null;
    final FileEntity userProfilePicture = demoUserProfile.getPicture();
    if (userProfilePicture != null) {
        picture = new ImageComponent(new Image(userProfilePicture));
        picture.setImageCaching(userProfilePicture.getLastModifiedTime()
                .before(new Date(System.currentTimeMillis() - TimeUnit.MINUTES.toMillis(60))));
    }

    // Now that we've initialized most of the content, we'll add all the components to this View
    /// using the standard HTML structure for a property viewer.
    add(of(HTMLElement.section, "prop-group name",
            new Label(createText("Name")).withHTMLElement(HTMLElement.h1),
            namePrefix.setAttribute("itemprop", "honorificPrefix").addClassName("prop").addClassName("prefix"),
            nameGiven.setAttribute("itemprop", "givenName").addClassName("prop").addClassName("given"),
            nameFamily.setAttribute("itemprop", "familyName").addClassName("prop").addClassName("family"),
            nameSuffix.setAttribute("itemprop", "honorificSuffix").addClassName("prop")
                    .addClassName("suffix")));

    // Add wrapping DIV to group address lines if necessary.
    Component streetAddress = (!isEmptyString(addressLine1.getText()) && !isEmptyString(addressLine2.getText())
            ? of(HTMLElement.div, "address-lines", addressLine1, addressLine2)
            : (isEmptyString(addressLine1.getText()) ? addressLine2 : addressLine1)
                    .withHTMLElement(HTMLElement.div));
    streetAddress.setAttribute("itemprop", "streetAddress");
    boolean hasAddress = (!isEmptyString(addressLine1.getText()) || !isEmptyString(addressLine2.getText())
            || !isEmptyString(city.getText()) || !isEmptyString(state.getText())
            || !isEmptyString(postalCode.getText()));
    boolean hasPhone = !isEmptyString(phoneNumber.getText());
    boolean hasEmail = !isEmptyString(emailAddress.getText());
    // We only want to output the enclosing HTML if we have content to display.
    if (hasAddress || hasPhone || hasEmail) {
        Container contactContainer = of(HTMLElement.section, "contact",
                new Label(createText("Contact Information")).withHTMLElement(HTMLElement.h1));
        add(contactContainer);
        if (hasAddress) {
            contactContainer.add(of(HTMLElement.div, "prop-group address",
                    // We are using an H2 here because are immediate ancestor is a DIV. If it was a SECTION,
                    /// then we would use an H1. See the UserProfileViewer for a comparison.
                    new Label(createText("Address")).withHTMLElement(HTMLElement.h2), streetAddress,
                    of(HTMLElement.div, "place", city, state.setAttribute("itemprop", "addressRegion"),
                            postalCode.setAttribute("itemprop", "postalCode")))
                                    .setAttribute("itemprop", "address").setAttribute("itemscope", "")
                                    .setAttribute("itemtype", "http://schema.org/PostalAddress"));
        }
        if (hasPhone) {
            contactContainer.add(of(HTMLElement.div, "prop phone",
                    new Label(createText("Phone")).withHTMLElement(HTMLElement.h2),
                    phoneNumber.setAttribute("itemprop", "telephone")));
        }
        if (hasEmail) {
            contactContainer.add(of(HTMLElement.div, "prop email",
                    new Label(createText("Email")).withHTMLElement(HTMLElement.h2),
                    emailAddress.setAttribute("itemprop", "email")));
        }
    }

    if (twitterLink != null || facebookLink != null || linkedInLink != null) {
        Container social = of(HTMLElement.section, "social",
                new Label(createText("Social Media Links")).withHTMLElement(HTMLElement.h1));
        add(social);
        if (twitterLink != null) {
            twitterLink.setTarget("_blank");
            twitterLink.setText(createText("Twitter Link"));
            social.add(of(HTMLElement.div, "prop twitter", createText("Twitter"), twitterLink));
        }
        if (facebookLink != null) {
            facebookLink.setTarget("_blank");
            facebookLink.setText(createText("Facebook Link"));
            social.add(of(HTMLElement.div, "prop facebook", createText("Facebook"), facebookLink));
        }
        if (linkedInLink != null) {
            linkedInLink.setTarget("_blank");
            linkedInLink.setText(createText("LinkedIn Link"));
            social.add(of(HTMLElement.div, "prop linkedin", createText("LinkedIn"), linkedInLink));
        }
    }

    final boolean hasAboutMeProse = isEmptyString(aboutMeProse.getText());
    if (!hasAboutMeProse || aboutMeVideo != null) {
        Container aboutMe = of(HTMLElement.section, "about-me",
                new Label(createText("About Me")).withHTMLElement(HTMLElement.h1));
        add(aboutMe);
        if (picture != null) {
            aboutMe.add(of(HTMLElement.div, "prop picture", createText("Picture"), picture));
        }
        if (hasAboutMeProse) {
            aboutMe.add(of(HTMLElement.div, "prop prose",
                    createText("Professional Information, Hobbies, Interests..."), aboutMeProse));
        }
        if (aboutMeVideo != null) {
            Label label = new Label(createText("Video")).withHTMLElement(HTMLElement.label);
            label.addClassName("vl");
            aboutMe.add(of(HTMLElement.div, "prop video", label, aboutMeVideo));
        }

    }
}

From source file:com.example.app.ui.UserProfileViewer.java

@Override
public void init() {
    // Make sure you call super.init() at the top of this method.
    /// See the Javadoc for #init() for more information about what it does.
    super.init();

    // Set HTML element type and class names for presentation use on this Container component.
    setHTMLElement(HTMLElement.section);
    addClassName("user-profile-viewer");
    // property_viewer is a standard class name.
    addClassName("property-viewer");
    // Add microdata for programmatic / SEO use
    /// OR use RDFa support
    /// You typically only do this in viewers - not editors.
    setAttribute("itemscope", "");
    setAttribute("itemtype", "http://schema.org/Person");
    // setAttribute allows you to set any attribute as long as it will not interfere with a component's
    /// native HTML. For example, you cannot set the "value" attribute on a Field since
    /// it uses that attribute.

    // It's a good idea to *not* mark variables final that you don't want in the scope of event listeners.
    /// Hibernate/JPA entities are a great example of this pattern. You always need to re-attach
    /// entities before using them, so we should always call getUserProfile() in the context
    /// of handling an event. Note: our getUserProfile() method re-attaches the entity.
    UserProfile userProfile = getUserProfile();

    Name name = userProfile.getName();
    // You can use a Field for displaying non-internationalized content.
    /// It is desirable to do this since you don't need to create a LocalizedText.
    /// However, you cannot change the HTMLElement of a Field at this time,
    /// so some of the following code uses a Label which does allow
    /// specification of the HTMLElement.
    final Field namePrefix = new Field(name.getFormOfAddress(), false);
    final Field nameGiven = new Field(name.getFirst(), false);
    final Field nameFamily = new Field(name.getLast(), false);
    final Field nameSuffix = new Field(name.getSuffix(), false);
    // Sometimes it is easier and less error prone to make a component non-visible
    /// than checking for null on each use. Use this pattern with care. You don't
    /// want to consume a lot of resource unnecessarily.
    if (StringFactory.isEmptyString(namePrefix.getText()))
        namePrefix.setVisible(false);/*from  w  ww  .j  a v  a2  s  .c  o m*/
    if (StringFactory.isEmptyString(nameSuffix.getText()))
        nameSuffix.setVisible(false);

    // Address
    Address address = userProfile.getPostalAddress();
    // Address lines are always on their own line so we make sure they are enclosed by a block element like a DIV..
    final Label addressLine1 = new Label();
    addressLine1.setHTMLElement(HTMLElement.div).addClassName("prop").addClassName("address_line");
    final Label addressLine2 = new Label();
    addressLine2.setHTMLElement(HTMLElement.div).addClassName("prop").addClassName("address_line");
    if (address.getAddressLines().length > 0)
        addressLine1.setText(TextSources.create(address.getAddressLines()[0]));
    if (address.getAddressLines().length > 1)
        addressLine2.setText(TextSources.create(address.getAddressLines()[1]));
    final HTMLComponent city = new HTMLComponent();
    // The "prop" class name is part of the standard HTML structure. It is always a good idea to also
    /// add a specific class name like "city" in this example. Please be consistent when using class names.
    /// For example, if everyone else is using "city", please use "city" too. Don't come up with another class name
    /// that means something similar like "town" or "locality". Consistency has a big impact on
    /// the time required to style HTML as well as the ability to reuse CSS.
    city.setHTMLElement(HTMLElement.span).addClassName("prop").addClassName("city");
    if (!StringFactory.isEmptyString(address.getCity())) {
        // Our microdata for the city shouldn't include the comma, so this is a bit more complicated than the other examples.
        city.setText(TextSources.create("<span itemprop=\"addressLocality\">" + address.getCity()
                + "</span><span class=\"delimiter\">,</span>"));
    } else
        city.setVisible(false);
    final Label state = new Label(TextSources.create(address.getState()));
    state.addClassName("prop").addClassName("state");
    final Label postalCode = new Label(TextSources.create(address.getPostalCode()));
    postalCode.addClassName("prop").addClassName("postal_code");

    // Other Contact
    final Field phoneNumber = new Field(userProfile.getPhoneNumber(), false);
    final Field emailAddress = new Field(userProfile.getEmailAddress(), false);

    // Social Contact
    final URILink twitterLink = userProfile.getTwitterLink() != null
            ? new URILink(_userProfileDAO.toURI(userProfile.getTwitterLink(), null))
            : null;
    final URILink facebookLink = userProfile.getFacebookLink() != null
            ? new URILink(_userProfileDAO.toURI(userProfile.getFacebookLink(), null))
            : null;
    final URILink linkedInLink = userProfile.getLinkedInLink() != null
            ? new URILink(_userProfileDAO.toURI(userProfile.getLinkedInLink(), null))
            : null;

    // We are going to output HTML received from the outside, so we need to sanitize it first for security reasons.
    /// Sometimes you'll do this sanitation prior to persisting the data. It depends on whether or not you need to
    /// keep the original unsanitized HTML around.
    String processedHTML = userProfile.getAboutMeProse();
    if (!StringFactory.isEmptyString(processedHTML)) {
        // Process the HTML converting links as necessary (adding JSESSIONID(s)
        /// for URL based session tracking, converting resource links to increase concurrent loading limit,
        /// CMS link externalization, etc).
        /// This is *not* sanitation and should always be done before rendering - never before persisting.
        /// We are doing this before sanitizing the HTML to avoid having to whitelist internal URL protocols, etc.
        try {
            processedHTML = XMLRenderer.parseWithRoot(processedHTML, Event.getRequest(), Event.getResponse());
        } catch (IOException e) {
            _logger.error("Unable to accept HTML: " + processedHTML, e);
        }

        // We don't trust the input, so we sanitize it with a whitelist of allowed HTML.
        Document dirty = Jsoup.parseBodyFragment(processedHTML, "");
        Whitelist whitelist = Whitelist.relaxed();
        // Don't allow users to use our website as a link farm
        whitelist.addEnforcedAttribute("a", "rel", "nofollow");
        Cleaner cleaner = new Cleaner(whitelist);
        Document clean = cleaner.clean(dirty);
        processedHTML = clean.html();
    }
    final HTMLComponent aboutMeProse = new HTMLComponent(processedHTML);
    Component aboutMeVideo = null;
    URL videoLink = userProfile.getAboutMeVideoLink();
    if (videoLink != null) {
        // There are several ways to link to media (Youtube video URL, Vimeo video URL, Flickr URL, internally hosted media file, etc).
        /// You can link to it.
        /// You can embed it. See http://oembed.com/ for a common protocol for doing this.
        /// If the link is to the media itself, you can create a player for it.
        /// Below is an example of creating a link to the video as well as a player.
        final URI videoLinkURI = _userProfileDAO.toURI(videoLink, null);
        URILink videoLinkComponent = new URILink(videoLinkURI, TextSources.create("My Video"));
        videoLinkComponent.setTarget("_blank");
        IMediaUtility util = MediaUtilityFactory.getUtility();
        try {
            // Check if we can parse the media and it has a stream we like.
            /// In our made up example, we're only accepting H.264 video. We don't care about the audio in this example.
            IMediaMetaData mmd;
            if (util.isEnabled() && videoLinkURI != null
                    && (mmd = util.getMetaData(videoLinkURI.toString())).getStreams().length > 0) {
                int width = 853, height = 480; // 480p default
                boolean hasVideo = false;
                for (IMediaStream stream : mmd.getStreams()) {
                    if (stream.getCodec().getType() == ICodec.Type.video
                            && "H264".equals(stream.getCodec().name())) {
                        hasVideo = true;
                        if (stream.getWidth() > 0) {
                            width = stream.getWidth();
                            height = stream.getHeight();
                        }
                        break;
                    }
                }
                if (hasVideo) {
                    Media component = new Media();
                    component.setMediaType(Media.MediaType.video);
                    component.addSource(new MediaSource(videoLinkURI));
                    component.setFallbackContent(videoLinkComponent);
                    component.setSize(new PixelMetric(width), new PixelMetric(height));
                    aboutMeVideo = component;
                }
            }
        } catch (IllegalArgumentException | RemoteException e) {
            _logger.error("Unable to get media information for " + videoLink, e);
        }
        if (aboutMeVideo == null) {
            // We could check for oEmbed support in case link was to youtube, vimeo, etc - http://oembed.com/
            // Since this is an example, we'll just output the link.
            aboutMeVideo = videoLinkComponent;
        }
    }
    ImageComponent picture = null;
    final FileEntity userProfilePicture = userProfile.getPicture();
    if (userProfilePicture != null) {
        picture = new ImageComponent(new Image(userProfilePicture));
        picture.setImageCaching(userProfilePicture.getLastModifiedTime()
                .before(new Date(System.currentTimeMillis() - TimeUnit.MINUTES.toMillis(60))));
    }

    // Now that we've initialized most of the content, we'll add all the components to this View
    /// using the standard HTML structure for a property viewer.
    add(of(HTMLElement.section, "prop-group name",
            new Label(TextSources.create("Name")).setHTMLElement(HTMLElement.h1),
            namePrefix.setAttribute("itemprop", "honorificPrefix").addClassName("prop").addClassName("prefix"),
            nameGiven.setAttribute("itemprop", "givenName").addClassName("prop").addClassName("given"),
            nameFamily.setAttribute("itemprop", "familyName").addClassName("prop").addClassName("family"),
            nameSuffix.setAttribute("itemprop", "honorificSuffix").addClassName("prop")
                    .addClassName("suffix")));

    // Add wrapping DIV to group address lines if necessary.
    Component streetAddress = (!StringFactory.isEmptyString(addressLine1.getText())
            && !StringFactory.isEmptyString(addressLine2.getText())
                    ? of(HTMLElement.div, "address_lines", addressLine1, addressLine2)
                    : (StringFactory.isEmptyString(addressLine1.getText()) ? addressLine2 : addressLine1)
                            .setHTMLElement(HTMLElement.div));
    streetAddress.setAttribute("itemprop", "streetAddress");
    boolean hasAddress = (!StringFactory.isEmptyString(addressLine1.getText())
            || !StringFactory.isEmptyString(addressLine2.getText())
            || !StringFactory.isEmptyString(city.getText()) || !StringFactory.isEmptyString(state.getText())
            || !StringFactory.isEmptyString(postalCode.getText()));
    boolean hasPhone = !StringFactory.isEmptyString(phoneNumber.getText());
    boolean hasEmail = !StringFactory.isEmptyString(emailAddress.getText());
    // We only want to output the enclosing HTML if we have content to display.
    if (hasAddress || hasPhone || hasEmail) {
        Container contactContainer = of(HTMLElement.section, "contact",
                new Label(TextSources.create("Contact Information")).setHTMLElement(HTMLElement.h1));
        add(contactContainer);
        if (hasAddress) {
            contactContainer.add(of(HTMLElement.div, "prop-group address",
                    // We are using an H2 here because are immediate ancestor is a DIV. If it was a SECTION,
                    /// then we would use an H1. See the UserProfileViewer for a comparison.
                    new Label(TextSources.create("Address")).setHTMLElement(HTMLElement.h2), streetAddress,
                    of(HTMLElement.div, "place", city, state.setAttribute("itemprop", "addressRegion"),
                            postalCode.setAttribute("itemprop", "postalCode")))
                                    .setAttribute("itemprop", "address").setAttribute("itemscope", "")
                                    .setAttribute("itemtype", "http://schema.org/PostalAddress"));
        }
        if (hasPhone) {
            contactContainer.add(of(HTMLElement.div, "prop phone",
                    new Label(TextSources.create("Phone")).setHTMLElement(HTMLElement.h2),
                    phoneNumber.setAttribute("itemprop", "telephone")));
        }
        if (hasEmail) {
            contactContainer.add(of(HTMLElement.div, "prop email",
                    new Label(TextSources.create("Email")).setHTMLElement(HTMLElement.h2),
                    emailAddress.setAttribute("itemprop", "email")));
        }
    }

    if (twitterLink != null || facebookLink != null || linkedInLink != null) {
        Container social = of(HTMLElement.section, "social",
                new Label(TextSources.create("Social Media Links")).setHTMLElement(HTMLElement.h1));
        add(social);
        if (twitterLink != null) {
            twitterLink.setTarget("_blank");
            twitterLink.setText(TextSources.create("Twitter Link"));
            social.add(of(HTMLElement.div, "prop twitter", TextSources.create("Twitter"), twitterLink));
        }
        if (facebookLink != null) {
            facebookLink.setTarget("_blank");
            facebookLink.setText(TextSources.create("Facebook Link"));
            social.add(of(HTMLElement.div, "prop facebook", TextSources.create("Facebook"), facebookLink));
        }
        if (linkedInLink != null) {
            linkedInLink.setTarget("_blank");
            linkedInLink.setText(TextSources.create("LinkedIn Link"));
            social.add(of(HTMLElement.div, "prop linkedin", TextSources.create("LinkedIn"), linkedInLink));
        }
    }

    final boolean hasAboutMeProse = StringFactory.isEmptyString(aboutMeProse.getText());
    if (!hasAboutMeProse || aboutMeVideo != null) {
        Container aboutMe = of(HTMLElement.section, "about_me",
                new Label(TextSources.create("About Me")).setHTMLElement(HTMLElement.h1));
        add(aboutMe);
        if (picture != null) {
            aboutMe.add(of(HTMLElement.div, "prop picture", TextSources.create("Picture"), picture));
        }
        if (hasAboutMeProse) {
            aboutMe.add(of(HTMLElement.div, "prop prose",
                    TextSources.create("Professional Information, Hobbies, Interests..."), aboutMeProse));
        }
        if (aboutMeVideo != null) {
            Label label = new Label(TextSources.create("Video")).setHTMLElement(HTMLElement.label);
            label.addClassName("vl");
            aboutMe.add(of(HTMLElement.div, "prop video", label, aboutMeVideo));
        }
    }
}

From source file:io.gravitee.management.service.impl.EmailServiceImpl.java

private String addResourcesInMessage(final MimeMessageHelper mailMessage, final String htmlText)
        throws Exception {
    final Document document = Jsoup.parse(htmlText);

    final List<String> resources = new ArrayList<>();

    final Elements imageElements = document.getElementsByTag("img");
    resources.addAll(//from  ww w.j  av  a2 s .  c o  m
            imageElements.stream().filter(imageElement -> imageElement.hasAttr("src")).map(imageElement -> {
                final String src = imageElement.attr("src");
                imageElement.attr("src", "cid:" + src);
                return src;
            }).collect(Collectors.toList()));

    final String html = document.html();
    mailMessage.setText(html, true);

    for (final String res : resources) {
        final FileSystemResource templateResource = new FileSystemResource(new File(templatesPath, res));
        mailMessage.addInline(res, templateResource,
                MimetypesFileTypeMap.getDefaultFileTypeMap().getContentType(res));
    }

    return html;
}

From source file:me.vertretungsplan.parser.UntisMonitorParser.java

public SubstitutionSchedule getSubstitutionSchedule()
        throws IOException, JSONException, CredentialInvalidException {
    loginResponse = new LoginHandler(scheduleData, credential, cookieProvider).handleLoginWithResponse(executor,
            cookieStore);/*w  w  w .  ja  va  2s.c o m*/

    SubstitutionSchedule v = SubstitutionSchedule.fromData(scheduleData);

    JSONArray urls = scheduleData.getData().getJSONArray(PARAM_URLS);
    String encoding = scheduleData.getData().optString(PARAM_ENCODING, null);
    List<Document> docs = new ArrayList<>();

    for (int i = 0; i < urls.length(); i++) {
        JSONObject url = urls.getJSONObject(i);
        final String urlStr = url.getString(SUBPARAM_URL);
        for (String dateUrl : ParserUtils.handleUrlWithDateFormat(urlStr)) {
            loadUrl(dateUrl, encoding, url.getBoolean(SUBPARAM_FOLLOWING), docs);
        }
    }

    for (Document doc : docs) {
        if (scheduleData.getData().has(PARAM_EMBEDDED_CONTENT_SELECTOR)) {
            for (Element part : doc.select(scheduleData.getData().getString(PARAM_EMBEDDED_CONTENT_SELECTOR))) {
                SubstitutionScheduleDay day = parseMonitorDay(part, scheduleData.getData());
                v.addDay(day);
            }
        } else if (doc.title().contains("Untis") || doc.html().contains("<!--<title>Untis")) {
            SubstitutionScheduleDay day = parseMonitorDay(doc, scheduleData.getData());
            v.addDay(day);
        }
        // else Error

        if (scheduleData.getData().has(PARAM_LAST_CHANGE_SELECTOR)
                && doc.select(scheduleData.getData().getString(PARAM_LAST_CHANGE_SELECTOR)).size() > 0) {
            String text = doc.select(scheduleData.getData().getString(PARAM_LAST_CHANGE_SELECTOR)).first()
                    .text();
            String lastChange;
            Pattern pattern = Pattern.compile("\\d\\d\\.\\d\\d\\.\\d\\d\\d\\d,? \\d\\d:\\d\\d");
            Matcher matcher = pattern.matcher(text);
            if (matcher.find()) {
                lastChange = matcher.group();
            } else {
                lastChange = text;
            }
            v.setLastChangeString(lastChange);
            v.setLastChange(ParserUtils.parseDateTime(lastChange));
        }
    }

    if (scheduleData.getData().has(PARAM_WEBSITE)) {
        v.setWebsite(scheduleData.getData().getString(PARAM_WEBSITE));
    } else if (urls.length() == 1) {
        v.setWebsite(urls.getJSONObject(0).getString("url"));
    }

    v.setClasses(getAllClasses());
    v.setTeachers(getAllTeachers());

    return v;
}

From source file:org.confab.PhpBB3Parser.java

public List<Forum> parseForums(Document root, BulletinBoard parent) {
    Utilities.debug("parseForums");

    List<Forum> ret = new ArrayList<Forum>();

    // get table/*from w ww  . j av  a  2  s.co m*/
    Elements forum_tables = root.select("ul[class=topiclist forums]");
    assert !forum_tables.isEmpty() : root.html();

    for (Element forum_table : forum_tables) {
        Elements els_li = forum_table.select("li.row");
        assert !els_li.isEmpty();
        for (Element el_li : els_li) {
            Forum new_forum = new Forum(parent);

            // Get the forum url
            Elements els_a = el_li.select("a.forumtitle");
            Element el_a = els_a.first();
            assert el_a != null;
            new_forum.url = el_a.attr("href");
            assert new_forum.url != null;
            Utilities.debug("new_forum.url : " + new_forum.url);

            // Get the title text
            new_forum.title = el_a.text();
            assert new_forum.title != null;
            Utilities.debug("new_forum.title : " + new_forum.title);

            // Check for any subforums in remaining a elements
            els_a.remove(els_a.first());
            for (Element _el_a : els_a) {
                Forum sub_forum = new Forum(parent);
                sub_forum.url = el_a.attr("href");
                assert sub_forum.url != null;
                sub_forum.title = el_a.text();
                assert sub_forum.title != null;
                new_forum.subForums.add(sub_forum);
                Utilities.debug("added subForum: " + sub_forum.title);
            }

            // Get the description/message of this topic
            String el_description = el_a.parent().text();
            if (el_description != null) {
                new_forum.description = el_description;
            } else {
                new_forum.description = "";
            }
            Utilities.debug("new_forum.description : " + new_forum.description);

            Utilities.debug("new_forum.parent.url : " + new_forum.parent.url);

            ret.add(new_forum);
            Utilities.debug("-----");
        }
    }
    Utilities.debug("end parseForums");
    return ret;
}

From source file:net.slkdev.swagger.confluence.service.impl.XHtmlToConfluenceServiceImpl.java

private static List<ConfluencePage> handlePagination() {
    final List<ConfluencePage> confluencePages = new ArrayList<>();
    final SwaggerConfluenceConfig swaggerConfluenceConfig = SWAGGER_CONFLUENCE_CONFIG.get();

    final PaginationMode paginationMode = swaggerConfluenceConfig.getPaginationMode();

    final Document originalDocument = SWAGGER_DOCUMENT.get();
    final Document transformedDocument = originalDocument.clone();

    final Elements categoryElements = transformedDocument.select(".sect1");

    // Remove ToC form the transformed document
    final Elements toc = transformedDocument.select(".toc");
    toc.html("");
    toc.unwrap();//from  ww w.  j ava 2s  .com

    // For Single Page Mode, the incoming XHTML can be used directly.
    if (paginationMode == SINGLE_PAGE) {
        final ConfluencePage confluencePage = ConfluencePageBuilder.aConfluencePage()
                .withPageType(PageType.ROOT).withOriginalTitle(swaggerConfluenceConfig.getTitle())
                .withConfluenceTitle(buildConfluenceTitle(swaggerConfluenceConfig.getTitle(), null, null))
                .build();

        if (swaggerConfluenceConfig.isIncludeTableOfContentsOnSinglePage()) {
            confluencePage.setXhtml(originalDocument.html());
        } else {
            confluencePage.setXhtml(transformedDocument.html());
        }

        confluencePages.add(confluencePage);

        return confluencePages;
    }

    // Before beginning further processing, we need to know if we're in individual
    // page mode or not, as that will effect how we split the DOM. If we're in this
    // mode then the category pages will contain inner table of contents.
    final boolean individualPages = (paginationMode == INDIVIDUAL_PAGES);

    // From here on, if we're still proceeding then we know the meat of the document
    // will go in sub-pages. So for the master page, we will use the table of contents
    final Elements tocElements = originalDocument.select(".toc");

    final List<String> innerTocXHtmlList = new ArrayList<>();
    final Elements innerTocElements = originalDocument.select(".sectlevel2");

    for (final Element innerTocElement : innerTocElements) {
        // If we're in individual page mode, then we collect the inner ToCs
        if (individualPages) {
            final StringBuilder tocHtml = new StringBuilder();
            tocHtml.append("<div id=\"toc\" class=\"toc\">");
            tocHtml.append("<h4 id=\"toctitle\">Table of Contents</h4>");
            tocHtml.append("<div><ul class=\"sectlevel1\">");
            tocHtml.append(innerTocElement.html());
            tocHtml.append("</ul></div></div>");
            innerTocXHtmlList.add(tocHtml.toString());
        }
        // If we're in category page mode, then we strip out the inner table of contents.
        else {
            innerTocElement.html("");
            innerTocElement.unwrap();
        }
    }

    // Build the Root Page w/ the Appropriate Level of Table of Contents
    final ConfluencePage rootConfluencePage = ConfluencePageBuilder.aConfluencePage()
            .withPageType(PageType.ROOT).withOriginalTitle(swaggerConfluenceConfig.getTitle())
            .withConfluenceTitle(buildConfluenceTitle(swaggerConfluenceConfig.getTitle(), null, null))
            .withXhtml(tocElements.html()).build();
    confluencePages.add(rootConfluencePage);

    int category = 1;

    // Now we process the category pages
    for (final Element categoryElement : categoryElements) {
        // Fetch the title from the first child, which is the header element
        final String categoryTitle = categoryElement.children().first().text();

        // If we're in individual mode then we need these to be sub table of contents
        if (individualPages) {

            final ConfluencePage categoryConfluencePage = ConfluencePageBuilder.aConfluencePage()
                    .withPageType(PageType.CATEGORY).withOriginalTitle(categoryTitle)
                    .withConfluenceTitle(buildConfluenceTitle(categoryTitle, category, null))
                    .withXhtml(innerTocXHtmlList.get(category - 1)).build();
            confluencePages.add(categoryConfluencePage);

            final Elements individualElements = categoryElement.getElementsByClass("sect2");

            int individual = 1;

            for (final Element individualElement : individualElements) {
                final String individualTitle = individualElement.children().first().text();
                final ConfluencePage individualConfluencePage = ConfluencePageBuilder.aConfluencePage()
                        .withPageType(INDIVIDUAL).withOriginalTitle(individualTitle)
                        .withConfluenceTitle(buildConfluenceTitle(individualTitle, category, individual))
                        .withXhtml(individualElement.html()).build();
                confluencePages.add(individualConfluencePage);

                individual++;
            }

            category++;
            continue;
        }

        // If we're in category mode, we use the remaining page data
        final ConfluencePage categoryConfluencePage = ConfluencePageBuilder.aConfluencePage()
                .withPageType(PageType.CATEGORY).withOriginalTitle(categoryTitle)
                .withConfluenceTitle(buildConfluenceTitle(categoryTitle, category, null))
                .withXhtml(categoryElement.html()).build();
        confluencePages.add(categoryConfluencePage);

        category++;
    }

    return confluencePages;
}

From source file:info.smartkit.hairy_batman.query.SogouSearchQuery.java

public void parseWxOpenId() {
    Document doc;
    try {//w  ww.  j  av a  2s . c  om

        // need http protocol
        // doc = Jsoup.connect(GlobalConsts.SOGOU_SEARCH_URL_BASE+ wxFoo.getSubscribeId()).get();
        doc = Jsoup.connect("http://weixin.sogou.com/weixin?type=1&query=" + wxFoo.getSubscribeId()
                + "&fr=sgsearch&ie=utf8&_ast=1423915648&_asf=null&w=01019900&cid=null&sut=19381").get();

        LOG.debug("openID html INFO:" + doc.html());

        // get page title
        String title = doc.title();
        LOG.debug("title : " + title);
        // get all "?:" value of html <span>
        //Elements openIdLink = doc.select(GlobalConsts.SOGOU_SEARCH_WX_OPEN_ID_HTML_ELEMENTS).select(GlobalConsts.SOGOU_SEARCH_WX_OPEN_ID_HTML_ELE_IDENTITY);

        Elements openIdLink = doc.getElementsByClass("wx-rb");
        Element a = null;
        String openIdLinkHref = "";
        if (openIdLink != null && openIdLink.size() > 0) {
            Iterator<Element> itea = openIdLink.iterator();
            while (itea.hasNext()) {
                a = itea.next();
                LOG.debug("openID html INFO:" + a.html());
                if (a.getElementsByTag("em").html().indexOf(wxFoo.getSubscribeId()) != -1) {
                    break;
                }
            }
        }
        if (a != null) {
            openIdLinkHref = a.attr("href");
        }
        LOG.debug("openIdLinkHref:" + openIdLinkHref);
        // FIXME:????
        if (this.wxFoo.getOpenId() == null && openIdLinkHref.length() > 0) {

            this.wxFoo.setOpenId(openIdLinkHref.split(GlobalConsts.SOGOU_SEARCH_WX_OPEN_ID_KEYWORDS)[1]);
            LOG.info("saved wxOpenId value: " + this.wxFoo.getOpenId());
            GlobalVariables.wxFooListWithOpenId.add(this.wxFoo);
            // File reporting
            new FileReporter(GlobalConsts.REPORT_FILE_OUTPUT_OPENID, GlobalVariables.wxFooListWithOpenId,
                    FileReporter.REPORTER_TYPE.R_T_OPENID, FileReporter.REPORTER_FILE_TYPE.EXCEL).write();
            // Then,OpenID JSON site parse
            if (this.wxFoo.getOpenId() != null) {
                // Save openId to DB.
                try {
                    GlobalVariables.jdbcTempate.update("insert into " + GlobalConsts.QUERY_TABLE_NAME_BASIC
                            + "(id,store,agency,unit,subscribeId,onSubscribe,code,openId) values(?,?,?,?,?,?,?,?)",
                            new Object[] { this.wxFoo.getId(), this.wxFoo.getStore(), this.wxFoo.getAgency(),
                                    this.wxFoo.getUnit(), this.wxFoo.getSubscribeId(),
                                    this.wxFoo.getOnSubscribe(), this.wxFoo.getCode(), this.wxFoo.getOpenId() },
                            new int[] { java.sql.Types.INTEGER, java.sql.Types.VARCHAR, java.sql.Types.VARCHAR,
                                    java.sql.Types.VARCHAR, java.sql.Types.VARCHAR, java.sql.Types.VARCHAR,
                                    java.sql.Types.VARCHAR, java.sql.Types.VARCHAR });
                    this.parseSogouJsonSite(this.wxFoo.getOpenId());
                } catch (DataAccessException e) {
                    e.printStackTrace();
                }
            } else {
                LOG.warn("SogouSearchQuery getOpenId Failure! site info:" + wxFoo.getCode());
                // TODO write those info to File or DB for collect which
                // agency not open weixin service
                // Save openId to DB.
                try {
                    GlobalVariables.jdbcTempate.update("insert into " + GlobalConsts.QUERY_TABLE_NAME_BASIC
                            + "(id,store,agency,unit,subscribeId,onSubscribe,code,openId) values(?,?,?,?,?,?,?,?)",
                            new Object[] { this.wxFoo.getId(), this.wxFoo.getStore(), this.wxFoo.getAgency(),
                                    this.wxFoo.getUnit(), this.wxFoo.getSubscribeId(),
                                    this.wxFoo.getOnSubscribe(), this.wxFoo.getCode(), "" },
                            new int[] { java.sql.Types.INTEGER, java.sql.Types.VARCHAR, java.sql.Types.VARCHAR,
                                    java.sql.Types.VARCHAR, java.sql.Types.VARCHAR, java.sql.Types.VARCHAR,
                                    java.sql.Types.VARCHAR, java.sql.Types.VARCHAR });
                    LOG.warn("Can not get subsriber info: " + this.wxFoo.getCode());

                    this.parseSogouJsonSite(this.wxFoo.getOpenId());
                } catch (DataAccessException e) {
                    e.printStackTrace();
                }
            }
        }

    } catch (IOException e) {
        // e.printStackTrace();
        LOG.error(e.toString());
    }
}

From source file:com.maxl.java.aips2sqlite.PseudoExpertInfo.java

/**
 * Extracts all the important information from the pseudo "Fachinfo" file
 * @param pseudo_info_file/*from  w  w w . j av a 2s  .c om*/
 */
public boolean extractInfo(int idx, FileInputStream pseudo_info_file) {
    mMedi = new MedicalInformations.MedicalInformation();

    mSectionContent = new ArrayList<String>();
    mSectionTitles = new ArrayList<String>();
    mBarCodes = new ArrayList<String>();
    m_list_of_packages = new ArrayList<String>();

    String mediTitle = "";
    String mediAuthor = "";
    String mediPseudoTag = "";
    String mediHtmlContent = "";

    StringBuilder content = new StringBuilder();

    try {
        // Read in docx file
        XWPFDocument docx = new XWPFDocument(pseudo_info_file);
        // Get iterator through all paragraphs
        Iterator<XWPFParagraph> para = docx.getParagraphsIterator();

        // Pre-process input stream to extract paragraph titles
        boolean goodToGo = false;
        while (para.hasNext()) {
            List<XWPFRun> runs = para.next().getRuns();
            if (!runs.isEmpty()) {
                for (XWPFRun r : runs) {
                    // bold and italics identifies section title!
                    if (r.isBold()) { // && r.isItalic()) {
                        String pText = r.getParagraph().getText();
                        // These are the first chapter titles (DE and FR)
                        if (pText.equals("Zusammensetzung") || pText.equals("Composition"))
                            goodToGo = true;
                        if (goodToGo == true)
                            mSectionTitles.add(pText);
                    }
                }
            }
        }
        // Add "nil" at the end
        mSectionTitles.add("nil");

        if (mLanguage.equals("de") && !mSectionTitles.get(0).equals("Zusammensetzung"))
            return false;
        if (mLanguage.equals("fr") && !mSectionTitles.get(0).equals("Composition"))
            return false;

        // Reset iterator
        para = docx.getParagraphsIterator();

        // Init list for section content 
        for (int i = 0; i < mSectionTitles.size(); ++i)
            mSectionContent.add(i, "");

        // Get title
        if (para.hasNext())
            mediTitle = para.next().getParagraphText();
        // Get author while using "Medizinprodukt" as tag
        String prevParaText = "";
        while (para.hasNext()) {
            String paraText = para.next().getParagraphText();
            // If this word is not found, then no pseudo FI will be produced
            if (paraText.equals("Medizinprodukt") || paraText.equals("Dispositif mdical")) {
                mediPseudoTag = paraText;
                mediAuthor = prevParaText;
                break;
            }
            prevParaText = paraText;
        }

        // Get section titles + sections + ean codes
        boolean isSectionPackungen = false;
        int numSection = 0;
        // Init with section1 and title
        String sectionId_str = "";
        String sectionTitle_str = "";
        mEanCodes_str = "";
        mSectionIds_str = "section1,";
        mSectionTitles_str = mediTitle + ",";
        m_pack_info_str = "";
        // This is the EAN code pattern
        Pattern pattern = Pattern.compile("^[0-9]{13}");
        // Loop through it, identifying medication title, author, section titles and corresponding titles
        while (para.hasNext()) {
            String paraText = para.next().getParagraphText();
            if (paraText.equals(mSectionTitles.get(numSection))) {
                // ->> Get section title
                isSectionPackungen = false;
                // Get section title
                if (numSection < mSectionTitles.size())
                    numSection++;
                // Section "Packungen" is special
                if (paraText.equals("Packungen") || paraText.equals("Prsentation")) {
                    isSectionPackungen = true;
                }
                // Close previous div
                if (numSection > 1)
                    content.append("</div>");
                // Create html
                sectionId_str = "section" + (numSection + 1); // section1 is reserved for the MonTitle
                sectionTitle_str = mSectionTitles.get(numSection - 1);
                content.append("<div class=\"paragraph\" id=\"" + sectionId_str + "\">");
                content.append("<div class=\"absTitle\">" + sectionTitle_str + "</div>");
                // Generate section id string
                mSectionIds_str += (sectionId_str + ",");
                // Generate titles string
                mSectionTitles_str += (sectionTitle_str + ";");
            } else {
                // ->> Get section content
                String s = mSectionContent.get(numSection - 1);
                mSectionContent.set(numSection - 1, s + paraText + " ");
                // Create html
                content.append("<p class=\"spacing1\">" + paraText + "</p>");
                // Extract EAN codes and start positions
                Matcher matcher = pattern.matcher(paraText);
                while (matcher.find()) {
                    String eanCode = matcher.group();
                    mEanCodes_str += (eanCode + ", ");
                    if (!eanCode.isEmpty()) {
                        String pup = "";
                        String efp = "";
                        String fep = "";
                        String fap = "";
                        String vat = "";
                        String size = "";
                        String units = "";
                        String swissmedic_cat = "";
                        String pharma_code = "";
                        int visible = 0xff;
                        int has_free_samples = 0x00; // by default no free samples
                        // Exctract fep and fap pricing information
                        // FAP = Fabrikabgabepreis = EFP?
                        // FEP = Fachhandelseinkaufspreis
                        // EFP = FAP < FEP < PUP                     
                        if (m_map_products != null && eanCode != null && m_map_products.containsKey(eanCode)) {
                            Product product = m_map_products.get(eanCode);
                            if (product.efp > 0.0f)
                                efp = String.format("CHF %.2f", product.efp);
                            if (product.pp > 0.0f)
                                pup = String.format("CHF %.2f", product.pp);
                            if (product.fap > 0.0f)
                                fap = String.format("CHF %.2f", product.fap);
                            if (product.fep > 0.0f)
                                fep = String.format("CHF %.2f", product.fep);
                            if (product.vat > 0.0f)
                                vat = String.format("%.2f", product.vat);
                            if (product.size != null && !product.size.isEmpty())
                                size = product.size;
                            if (product.units != null && product.units.length > 0)
                                units = product.units[0];
                            if (product.swissmedic_cat != null && !product.swissmedic_cat.isEmpty())
                                swissmedic_cat = product.swissmedic_cat;
                            if (product.pharmacode != null && !product.pharmacode.isEmpty())
                                pharma_code = product.pharmacode;
                            visible = product.visible;
                            has_free_samples = product.free_sample;
                        }
                        m_list_of_packages.add(mediTitle.toUpperCase() + ", " + units + ", " + size + "|" + size
                                + "|" + units + "|" + efp + "|" + pup + "|" + fap + "|" + fep + "|" + vat + "|"
                                + swissmedic_cat + ",,|" + eanCode + "|" + pharma_code + "|" + visible + "|"
                                + has_free_samples + "\n");
                        // Generate bar codes
                        BarCode bc = new BarCode();
                        String barcodeImg64 = bc.encode(eanCode);
                        mBarCodes.add("<p class=\"spacing1\">" + barcodeImg64 + "</p>");
                        content.append(barcodeImg64);
                    }
                }
                // Generate section Packungen for search result
                if (isSectionPackungen)
                    m_pack_info_str += (paraText + "\n");
            }
        }
        /*
        // Add chapter "Barcodes"
        content.append("<p class=\"paragraph\"></p><div class=\"absTitle\">" + "Barcodes" + "</div>");
        for (String bcode : mBarCodes)
           content.append(bcode);
        */
        // Remove last comma from mEanCodes_str
        if (!mEanCodes_str.isEmpty())
            mEanCodes_str = mEanCodes_str.substring(0, mEanCodes_str.length() - 2);
        // Remove last \n from mSectionPackungen_str
        if (!m_pack_info_str.isEmpty())
            m_pack_info_str = m_pack_info_str.substring(0, m_pack_info_str.length() - 1);

        // Set title, autor
        mMedi.setTitle(mediTitle);
        mMedi.setAuthHolder(mediAuthor);
        mMedi.setAtcCode("PSEUDO");
        mMedi.setSubstances(mediTitle);

        System.out.println(idx + " - " + mediTitle + ": " + mEanCodes_str);

        // Close previous div + monographie div
        content.append("</div></div>");
        String title = "<div class=\"MonTitle\" id=\"section1\">" + mediTitle + "</div>";
        String author = "<div class=\"ownerCompany\"><div style=\"text-align: right;\">" + mediAuthor
                + "</div></div>";
        // Set "Medizinprodukt" label
        String pseudo = "<p class=\"spacing1\">" + mediPseudoTag + "</p>";
        // Set medi content         
        mediHtmlContent = "<html><head></head><body><div id=\"monographie\">" + title + author + pseudo
                + content.toString() + "</div></body></html>";

        // Generate clean html file
        Document doc = Jsoup.parse(mediHtmlContent);
        doc.outputSettings().escapeMode(EscapeMode.xhtml);
        doc.outputSettings().charset("UTF-8");
        doc.outputSettings().prettyPrint(true);
        doc.outputSettings().indentAmount(1);
        mediHtmlContent = doc.html();

        // Set html content
        mMedi.setContent(mediHtmlContent);

        // Add to DB
        addToDB();

        return true;
    } catch (IOException e) {
        e.printStackTrace();
        return false;
    }
}

From source file:gui.InboxPanel.java

private void setTextBody(String sbody) {
    String html = BodyTextPane.getText();
    Document doc = Jsoup.parseBodyFragment(html);
    //Element body = doc.body();
    //body.text(sbody);
    doc.select("body").html(sbody);
    BodyTextPane.setText(doc.html());
}

From source file:org.kitesdk.spring.hbase.example.service.WebPageSnapshotService.java

/**
 * Fetch the web page from the URL, parse the HTML to populate the metadata
 * required by WebPageSnapshotModel, and return the constructed
 * WebPageSnapshotModel./* www .j  a va  2 s .  c  o m*/
 *
 * @param url The URL to fetch the web page from
 * @return The WebPageSnapshotModel
 * @throws IOException Thrown if there's an issue fetching the web page.
 */
private WebPageSnapshotModel fetchWebPage(String url, String contentKey) throws IOException {
    long fetchTime = System.currentTimeMillis();
    Connection connection = Jsoup.connect(url);
    Response response = connection.execute();
    long postFetchTime = System.currentTimeMillis();
    int timeToFetch = (int) (postFetchTime - fetchTime);

    Document doc = response.parse();
    String destinationUrl = response.url().toString();
    String title = doc.title();
    String description = getDescriptionFromDocument(doc);
    List<String> keywords = getKeywordsFromDocument(doc);
    List<String> outlinks = getOutlinksFromDocument(doc);

    return WebPageSnapshotModel.newBuilder().setUrl(destinationUrl)
            .setFetchedAtRevTs(Long.MAX_VALUE - fetchTime).setSize(doc.html().length()).setFetchedAt(fetchTime)
            .setFetchTimeMs(timeToFetch).setTitle(title).setDescription(description).setKeywords(keywords)
            .setOutlinks(outlinks).setContentKey(contentKey).setContent(ImmutableMap.of(contentKey, doc.html()))
            .build();
}