Example usage for org.jsoup.nodes Document outputSettings

List of usage examples for org.jsoup.nodes Document outputSettings

Introduction

In this page you can find the example usage for org.jsoup.nodes Document outputSettings.

Prototype

OutputSettings outputSettings

To view the source code for org.jsoup.nodes Document outputSettings.

Click Source Link

Usage

From source file:com.maxl.java.aips2xml.Aips2Xml.java

static String updateSectionPackungen(String title, Map<String, ArrayList<String>> pack_info, String regnr_str,
        String content_str, List<String> tIndex_list) {
    Document doc = Jsoup.parse(content_str, "UTF-16");
    List<String> pinfo_str = new ArrayList<String>();
    int index = 0;

    // Extract swissmedicno5 registration numbers
    List<String> swissmedicno5_list = Arrays.asList(regnr_str.split("\\s*,\\s*"));
    for (String s : swissmedicno5_list) {
        // Extract original / generika info + Selbstbehalt info from "add_info_map"
        String orggen_str = "";
        String flagsb_str = "";
        String addinfo_str = add_info_map.get(s);
        if (addinfo_str != null) {
            List<String> ai_list = Arrays.asList(addinfo_str.split("\\s*;\\s*"));
            if (ai_list != null) {
                if (!ai_list.get(0).isEmpty())
                    orggen_str = ", " + ai_list.get(0);
                if (!ai_list.get(1).isEmpty())
                    flagsb_str = ", " + ai_list.get(1);
            }/*w  ww  .  j  a  va 2  s  . c om*/
        }
        // Now generate many swissmedicno8 = swissmedicno5 + ***, check if they're keys and retrieve package info         
        String swissmedicno8_key = "";
        for (int n = 0; n < 1000; ++n) {
            if (n < 10)
                swissmedicno8_key = s + String.valueOf(n).format("00%d", n);
            else if (n < 100)
                swissmedicno8_key = s + String.valueOf(n).format("0%d", n);
            else
                swissmedicno8_key = s + String.valueOf(n).format("%d", n);
            // Check if swissmedicno8_key is a key of the map
            if (pack_info.containsKey(swissmedicno8_key)) {
                ArrayList<String> pi_row = package_info.get(swissmedicno8_key);
                if (pi_row != null) {
                    // --> Add "ausser Handel" information
                    String withdrawn_str = "";
                    if (pi_row.get(10).length() > 0)
                        withdrawn_str = ", " + pi_row.get(10);
                    // --> Add public price information
                    if (pi_row.get(7).length() > 0) {
                        // Remove double spaces in title
                        String medtitle = capitalizeFully(pi_row.get(1).replaceAll("\\s+", " "), 1);
                        // Remove [QAP?] -> not an easy one!
                        medtitle = medtitle.replaceAll("\\[(.*?)\\?\\] ", "");
                        pinfo_str.add("<p class=\"spacing1\">" + medtitle + ", " + pi_row.get(7) + withdrawn_str
                                + " [" + pi_row.get(5) + pi_row.get(11) + pi_row.get(12) + flagsb_str
                                + orggen_str + "]</p>");
                    } else {
                        // Remove double spaces in title               
                        String medtitle = capitalizeFully(pi_row.get(1).replaceAll("\\s+", " "), 1);
                        // Remove [QAP?] -> not an easy one!                     
                        medtitle = medtitle.replaceAll("\\[(.*?)\\?\\] ", "");
                        if (DB_LANGUAGE.equals("de")) {
                            pinfo_str.add("<p class=\"spacing1\">" + medtitle + ", " + "k.A." + withdrawn_str
                                    + " [" + pi_row.get(5) + pi_row.get(11) + pi_row.get(12) + flagsb_str
                                    + orggen_str + "]</p>");
                        } else if (DB_LANGUAGE.equals("fr")) {
                            pinfo_str.add("<p class=\"spacing1\">" + medtitle + ", " + "prix n.s."
                                    + withdrawn_str + " [" + pi_row.get(5) + pi_row.get(11) + pi_row.get(12)
                                    + flagsb_str + orggen_str + "]</p>");
                        }
                    }
                    // --> Add "tindex_str" and "application_str" (see SqlDatabase.java)
                    if (index == 0) {
                        tIndex_list.add(pi_row.get(9)); // therapeutic index
                        tIndex_list.add(pi_row.get(6)); // application area                  
                        index++;
                    }
                }
            }
        }
    }
    // In case the pinfo_str is empty due to malformed XML
    /*
    if (pinfo_str.isEmpty())
       html_utils.extractPackSection();
    */
    // In case nothing was found
    if (index == 0) {
        tIndex_list.add("");
        tIndex_list.add("");
    }
    // Replace original package information with pinfo_str
    String p_str = "";
    mPackSection_str = "";
    for (String p : pinfo_str) {
        p_str += p;
    }

    // Generate a html-deprived string file
    mPackSection_str = p_str.replaceAll("\\<p.*?\\>", "");
    mPackSection_str = mPackSection_str.replaceAll("<\\/p\\>", "\n");
    // Remove last \n
    if (mPackSection_str.length() > 0)
        mPackSection_str = mPackSection_str.substring(0, mPackSection_str.length() - 1);

    doc.outputSettings().escapeMode(EscapeMode.xhtml);
    Element div7800 = doc.select("[id=Section7800]").first();
    if (div7800 != null) {
        div7800.html("<div class=\"absTitle\">Packungen</div>" + p_str);
    } else {
        Element div18 = doc.select("[id=section18]").first();
        if (div18 != null) {
            div18.html("<div class=\"absTitle\">Packungen</div>" + p_str);
        } else {
            if (SHOW_ERRORS)
                System.err.println(">> ERROR: elem is null, sections 18/7800 does not exist: " + title);
        }
    }

    return doc.html();
}

From source file:com.blackducksoftware.tools.nrt.generator.NRTReportGenerator.java

private void writeOutLicenseText(String componentName, PrintStream outputTextFile) {
    try {//from  ww  w  . ja  v a  2  s.  c  om
        outputTextFile.println();
        outputTextFile.println("License texts (" + (componentMap.get(componentName).getLicenses() != null
                ? componentMap.get(componentName).getLicenses().size()
                : "0") + ")");

        int licenseCounter = 0;
        if (componentMap.get(componentName).getLicenses() != null) {

            for (LicenseModel license : componentMap.get(componentName).getLicenseModels()) {

                String licenseName = license.getName() != null
                        ? license.getName() + "(Taken from KnowledgeBase)"
                        : "license_" + licenseCounter + "(Taken from scanned file)";

                if (nrtConfig.isTextFileOutput()) {
                    outputTextFile.println();
                    outputTextFile.println(
                            "==========================================================================");
                    outputTextFile.println(licenseName);
                    outputTextFile.print(StringEscapeUtils.unescapeHtml(Jsoup.clean(license.getText(), "",
                            Whitelist.none(), new Document.OutputSettings().prettyPrint(false))));
                }
                licenseCounter++;
            } // for all licenses
        } // if licenses exist

    } catch (Exception e) {
        log.error("Error writing out licenses", e);
    }
}

From source file:com.maxl.java.aips2sqlite.RealExpertInfo.java

private String updateSectionPackungen(String title, String atc_code, Map<String, ArrayList<String>> pack_info,
        String regnr_str, String content_str, List<String> tIndex_list) {
    Document doc = Jsoup.parse(content_str, "UTF-16");
    // package info string for original
    List<String> pinfo_originals_str = new ArrayList<String>();
    // package info string for generika
    List<String> pinfo_generics_str = new ArrayList<String>();
    // package info string for the rest
    List<String> pinfo_str = new ArrayList<String>();
    // String containg all barcodes
    List<String> barcode_list = new ArrayList<String>();

    int index = 0;

    // Extract swissmedicno5 registration numbers
    List<String> swissmedicno5_list = Arrays.asList(regnr_str.split("\\s*,\\s*"));
    for (String smno5 : swissmedicno5_list) {
        // Extract original / generika info + Selbstbehalt info from
        // "add_info_map"
        String orggen_str = ""; // O=Original, G=Generika
        String flagsb_str = ""; // SB=Selbstbehalt 
        String addinfo_str = m_add_info_map.get(smno5);
        if (addinfo_str != null) {
            List<String> ai_list = Arrays.asList(addinfo_str.split("\\s*;\\s*"));
            if (ai_list != null) {
                if (!ai_list.get(0).isEmpty())
                    orggen_str = ", " + ai_list.get(0); // O + G
                if (!ai_list.get(1).isEmpty())
                    flagsb_str = ", " + ai_list.get(1); // SB
            }//from w ww  .  j  av  a  2 s . c  o  m
        }
        // Now generate many swissmedicno8 = swissmedicno5 + ***, check if they're keys and retrieve package info
        String swissmedicno8_key = "";
        for (int n = 0; n < 1000; ++n) {
            swissmedicno8_key = getSwissmedicNo8(smno5, n);
            // Check if swissmedicno8_key is a key of the map
            if (pack_info.containsKey(swissmedicno8_key)) {
                ArrayList<String> pi_row = m_package_info.get(swissmedicno8_key);
                if (pi_row != null) {
                    // This string is used for "shopping carts" and contatins:
                    // Prparatname | Package size | Package unit | Public price
                    // | Exfactory price | Spezialittenliste, Swissmedic Kategorie, Limitations
                    // | EAN code | Pharma code
                    String barcode_html = "";
                    String pup = pi_row.get(7); // public price
                    String efp = pi_row.get(8); // exfactory price      
                    String fep = "";
                    String fap = "";
                    String vat = "";
                    String eancode = pi_row.get(14);
                    int visible = 0xff; // by default visible to all!
                    int has_free_samples = 0x00; // by default no free samples
                    // Exctract fep and fap pricing information
                    // FAP = Fabrikabgabepreis = EFP?
                    // FEP = Fachhandelseinkaufspreis
                    // EFP = FAP < FEP < PUP
                    if (m_map_products != null && eancode != null && m_map_products.containsKey(eancode)) {
                        Product product = m_map_products.get(eancode);
                        // Correct these prices, if necessary... the m_map_products info comes from the owner directly!
                        // @maxl: Added on 30.08.2015
                        if (product.efp > 0.0f)
                            efp = String.format("CHF %.2f", product.efp);
                        if (product.pp > 0.0f)
                            pup = String.format("CHF %.2f", product.pp);
                        if (product.fap > 0.0f)
                            fap = String.format("CHF %.2f", product.fap);
                        if (product.fep > 0.0f)
                            fep = String.format("CHF %.2f", product.fep);
                        if (product.vat > 0.0f)
                            vat = String.format("%.2f", product.vat);
                        visible = product.visible;
                        has_free_samples = product.free_sample;
                    }

                    // Some articles are listed in swissmedic_packages file but are not in the refdata file
                    if (pi_row.get(10).equals("a.H.")) {
                        pi_row.set(10, "ev.nn.i.H.");
                    }
                    if (pi_row.get(10).equals("p.c.")) {
                        pi_row.set(10, "ev.ep.e.c.");
                    }

                    // Add only if medication is "in Handel" -> check pi_row.get(10)                  
                    if (pi_row.get(10).isEmpty() || pi_row.get(10).equals("ev.nn.i.H.")
                            || pi_row.get(10).equals("ev.ep.e.c.")) {
                        // --> Extract EAN-13 or EAN-12 and generate barcodes                     
                        try {
                            if (!eancode.isEmpty()) {
                                BarCode bc = new BarCode();
                                if (eancode.length() == 12) {
                                    int cs = bc.getChecksum(eancode);
                                    eancode += cs;
                                }
                                String barcodeImg64 = bc.encode(eancode);
                                barcode_html = "<p class=\"barcode\">" + barcodeImg64 + "</p>";
                                barcode_list.add(barcode_html);
                            }
                        } catch (IOException e) {
                            e.printStackTrace();
                        }
                        m_list_of_packages.add(pi_row.get(1) + "|" + pi_row.get(3) + "|" + pi_row.get(4) + "|"
                                + efp + "|" + pup + "|" + fap + "|" + fep + "|" + vat + "|" + pi_row.get(5)
                                + ", " + pi_row.get(11) + ", " + pi_row.get(12) + "|" + eancode + "|"
                                + pi_row.get(15) + "|" + visible + "|" + has_free_samples + "\n");
                        m_list_of_eancodes.add(eancode);
                    }

                    // Remove double spaces in title and capitalize
                    String medtitle = capitalizeFully(pi_row.get(1).replaceAll("\\s+", " "), 1);
                    // Remove [QAP?] -> not an easy one!
                    medtitle = medtitle.replaceAll("\\[(.*?)\\?\\] ", "");
                    // --> Add "ausser Handel" information
                    String withdrawn_str = "";
                    if (pi_row.get(10).length() > 0)
                        withdrawn_str = ", " + pi_row.get(10);
                    // --> Add ex factory price information
                    String price_efp = !efp.isEmpty() ? "EFP" + efp.replace("CHF", "")
                            : "FEP" + fep.replace("CHF", "");
                    String price_pp = !pup.isEmpty() ? ", PP" + pup.replace("CHF", "") : "";
                    if (efp.length() > 0 || fep.length() > 0) {
                        // The rest of the package information
                        String append_str = ", " + price_efp + price_pp + withdrawn_str + " [" + pi_row.get(5)
                                + pi_row.get(11) + pi_row.get(12) + flagsb_str + orggen_str + "]";
                        // Generate package info string
                        if (orggen_str.equals(", O"))
                            pinfo_originals_str.add(
                                    "<p class=\"spacing1\">" + medtitle + append_str + "</p>" + barcode_html);
                        else if (orggen_str.equals(", G"))
                            pinfo_generics_str.add(
                                    "<p class=\"spacing1\">" + medtitle + append_str + "</p>" + barcode_html);
                        else
                            pinfo_str.add(
                                    "<p class=\"spacing1\">" + medtitle + append_str + "</p>" + barcode_html);
                    } else {
                        //
                        // @maxl (10.01.2014): Price for swissmedicNo8 pack is not listed in bag_preparations.xml!!
                        //
                        pinfo_str.add("<p class=\"spacing1\">" + medtitle + withdrawn_str + " [" + pi_row.get(5)
                                + "]</p>" + barcode_html);
                    }

                    // --> Add "tindex_str" and "application_str" (see
                    // SqlDatabase.java)
                    if (index == 0) {
                        tIndex_list.add(pi_row.get(9)); // therapeutic index
                        tIndex_list.add(pi_row.get(6)); // application area
                        index++;
                    }
                }
            }
        }
    }
    // Re-order the string alphabetically
    if (!m_list_of_packages.isEmpty()) {
        Collections.sort(m_list_of_packages, new AlphanumComp());
    }
    if (!pinfo_originals_str.isEmpty()) {
        Collections.sort(pinfo_originals_str, new AlphanumComp());
    }
    if (!pinfo_generics_str.isEmpty()) {
        Collections.sort(pinfo_generics_str, new AlphanumComp());
    }
    if (!pinfo_str.isEmpty()) {
        Collections.sort(pinfo_str, new AlphanumComp());
    }
    // Concatenate lists...
    pinfo_originals_str.addAll(pinfo_generics_str);
    pinfo_originals_str.addAll(pinfo_str);
    // Put everything in pinfo_str
    pinfo_str = pinfo_originals_str;

    // In case nothing was found
    if (index == 0) {
        tIndex_list.add("");
        tIndex_list.add("");
    }

    /*
    * Replace package information
    */
    if (CmlOptions.PLAIN == false) {
        // Replace original package information with pinfo_str   
        String p_str = "";
        for (String p : pinfo_str) {
            p_str += p;
        }

        // Generate a html-deprived string file
        m_pack_info_str = p_str.replaceAll("<p class=\"spacing1\">[<](/)?img[^>]*[>]</p>", "");
        m_pack_info_str = m_pack_info_str.replaceAll("<p class=\"barcode\">[<](/)?img[^>]*[>]</p>", "");
        m_pack_info_str = m_pack_info_str.replaceAll("\\<p.*?\\>", "");
        m_pack_info_str = m_pack_info_str.replaceAll("<\\/p\\>", "\n");

        // Remove last \n
        if (m_pack_info_str.length() > 0)
            m_pack_info_str = m_pack_info_str.substring(0, m_pack_info_str.length() - 1);

        doc.outputSettings().escapeMode(EscapeMode.xhtml);
        Element div7800 = doc.select("[id=Section7800]").first();

        // Initialize section titles
        String packages_title = "Packungen";
        String swiss_drg_title = "Swiss DRG";
        if (CmlOptions.DB_LANGUAGE.equals("fr")) {
            packages_title = "Prsentation";
            swiss_drg_title = "Swiss DRG";
        }

        // Generate html for chapter "Packagungen" and subchapter "Swiss DRGs"
        // ** Chapter "Packungen"
        String section_html = "<div class=\"absTitle\">" + packages_title + "</div>" + p_str;
        // ** Subchapter "Swiss DRGs"
        // Loop through list of dosages for a particular atc code and format appropriately
        if (atc_code != null) {
            // Update DRG footnote super scripts
            String footnotes = "1";
            String fn = m_swiss_drg_footnote.get(atc_code);
            if (fn != null)
                footnotes += (", " + fn);
            // Generate Swiss DRG string
            String drg_str = "";
            ArrayList<String> dosages = m_swiss_drg_info.get(atc_code);
            // For most atc codes, there are NO special DRG sanctioned dosages...
            if (dosages != null) {
                System.out.println(title + " (DRG)");
                for (String drg : dosages)
                    drg_str += "<p class=\"spacing1\">" + drg + "</p>";
                if (!drg_str.isEmpty()) {
                    section_html += ("<p class=\"paragraph\"></p><div class=\"absTitle\">" + swiss_drg_title
                            + "<sup>" + footnotes + "</sup></div>" + drg_str);
                }

                section_html += "<p class=\"noSpacing\"></p>";
                if (CmlOptions.DB_LANGUAGE.equals("de")) {
                    section_html += "<p class=\"spacing1\"><sup>1</sup> Alle Spitler mssen im Rahmen der jhrlichen Datenerhebung (Detaillieferung) die SwissDRG AG zwingend ber die Hhe der in Rechnung gestellten Zusatzentgelte informieren.</p>";
                    section_html += "<p class=\"spacing1\"><sup>2</sup> Eine zustzliche Abrechnung ist im Zusammenhang mit einer Fallpauschale der Basis-DRGs L60 oder L71 nicht mglich.</p>";
                    section_html += "<p class=\"spacing1\"><sup>3</sup> Eine Abrechnung des Zusatzentgeltes ist nur ber die in der Anlage zum Fallpauschalenkatalog aufgefhrten Dosisklassen mglich.</p>";
                    section_html += "<p class=\"spacing1\"><sup>4</sup> Dieses Zusatzentgelt ist nur abrechenbar fr Patienten mit einem Alter < 15 Jahre.</p>";
                    section_html += "<p class=\"spacing1\"><sup>5</sup> Dieses Zusatzentgelt darf nicht zustzlich zur DRG A91Z abgerechnet werden, da in dieser DRG Apheresen die Hauptleistung darstellen. "
                            + "Die Verfahrenskosten der  Apheresen sind in dieser DRG bereits vollumfnglich enthalten.</p>";
                } else if (CmlOptions.DB_LANGUAGE.equals("fr")) {
                    section_html += "<p class=\"spacing1\"><sup>1</sup> Tous les hpitaux doivent imprativement informer SwissDRG SA lors du relev (relev dtaill) sur le montant des rmunrations supplmentaires factures.</p>";
                    section_html += "<p class=\"spacing1\"><sup>2</sup> Une facturation supplmentaire aux forfaits par cas des DRG de base L60 ou L71 nest pas possible.</p>";
                    section_html += "<p class=\"spacing1\"><sup>3</sup> Une facturation des rmunration supplmentaires n'est possible que pour les classes de dosage dfinies dans cette annexe.</p>";
                    section_html += "<p class=\"spacing1\"><sup>4</sup> Cette rmunration supplmentaire n'est facturable que pour les patients gs de moins de 15 ans.</p>";
                    section_html += "<p class=\"spacing1\"><sup>5</sup> Cette rmunration supplmentaire ne peut pas tre facture en plus du DRG A91Z, la prestation principale de ce DRG tant l'aphrse. "
                            + "Les cots du traitement par aphrse sont dj intgralement compris dans le DRG.</p>";
                }
            }
        }

        if (div7800 != null) {
            div7800.html(section_html);
        } else {
            Element div18 = doc.select("[id=section18]").first();
            if (div18 != null) {
                div18.html(section_html);
            } else {
                if (CmlOptions.SHOW_ERRORS)
                    System.err.println(">> ERROR: elem is null, sections 18/7800 does not exist: " + title);
            }
        }
    }

    return doc.html();
}

From source file:org.asqatasun.rules.doc.utils.rga33.extractor.Rgaa3Extractor.java

private static void createTestcaseFiles() throws IOException {
    File srcDir = new File(RGAA3_TESTCASE_PATH);
    for (File file : srcDir.listFiles()) {
        String fileName = file.getName().replace("Rgaa30Rule", "").replace(".java", "");
        String theme = fileName.substring(0, 2);
        String crit = fileName.substring(2, 4);
        String test = fileName.substring(4, 6);
        String testKey = Integer.valueOf(theme).toString() + "-" + Integer.valueOf(crit).toString() + "-"
                + Integer.valueOf(test).toString();
        String wrongKey = theme + "." + crit + "." + test;
        for (File testcase : file.listFiles()) {
            if (testcase.isFile() && testcase.getName().contains("html")) {
                Document doc = Jsoup.parse(FileUtils.readFileToString(testcase));
                Element detail = doc.select(".test-detail").first();
                if (detail == null) {
                    System.out.println(doc.outerHtml());
                } else {
                    detail.tagName("div");
                    detail.text("");
                    for (Element el : detail.children()) {
                        el.remove();//from w  w  w.  j a va  2s  .  co m
                    }
                    if (!detail.hasAttr("lang")) {
                        detail.attr("lang", "fr");
                    }
                    detail.append("\n" + RGAA3.get(testKey).ruleRawHtml + "\n");
                    doc.outputSettings().escapeMode(Entities.EscapeMode.xhtml);
                    doc.outputSettings().outline(false);
                    doc.outputSettings().indentAmount(4);
                    String outputHtml = doc.outerHtml();
                    if (outputHtml.contains(wrongKey)) {
                        outputHtml = outputHtml.replaceAll(wrongKey, RGAA3.get(testKey).getRuleDot());
                    }
                    FileUtils.writeStringToFile(testcase, outputHtml);
                }
            }
        }
    }
}

From source file:org.b3log.solo.plugin.list.ListHandler.java

@Override
public void action(final Event<JSONObject> event) throws EventException {
    final JSONObject data = event.getData();
    final JSONObject article = data.optJSONObject(Article.ARTICLE);

    String content = article.optString(Article.ARTICLE_CONTENT);

    final Document doc = Jsoup.parse(content, StringUtils.EMPTY, Parser.htmlParser());
    doc.outputSettings().prettyPrint(false);

    final StringBuilder listBuilder = new StringBuilder();

    listBuilder.append("<link rel=\"stylesheet\" type=\"text/css\" href=\"" + Latkes.getStaticServePath()
            + "/plugins/list/style.css\" />");

    final Elements hs = doc.select("h1, h2, h3, h4, h5");

    listBuilder.append("<ul class='b3-solo-list'>");
    for (int i = 0; i < hs.size(); i++) {
        final Element element = hs.get(i);
        final String tagName = element.tagName().toLowerCase();
        final String text = element.text();
        final String id = "b3_solo_" + tagName + "_" + i;

        element.before("<span id='" + id + "'></span>");

        listBuilder.append("<li class='b3-solo-list-").append(tagName).append("'><a href='#").append(id)
                .append("'>").append(text).append("</a></li>");
    }//  w w w. ja v  a 2 s  . c o  m
    listBuilder.append("</ul>");

    final Element body = doc.getElementsByTag("body").get(0);

    content = listBuilder.toString() + body.html();

    article.put(Article.ARTICLE_CONTENT, content);
}

From source file:org.b3log.symphony.service.ArticleQueryService.java

/**
 * Markdowns the specified article content.
 *
 * <ul>/*  w  ww.  j a  v a2s  .  c  o  m*/
 * <li>Markdowns article content/reward content</li>
 * <li>Generates secured article content/reward content</li>
 * </ul>
 *
 * @param article the specified article content
 */
public void markdown(final JSONObject article) {
    String content = article.optString(Article.ARTICLE_CONTENT);

    final int articleType = article.optInt(Article.ARTICLE_TYPE);
    if (Article.ARTICLE_TYPE_C_THOUGHT != articleType) {
        content = Markdowns.toHTML(content);
        content = Markdowns.clean(content,
                Latkes.getServePath() + article.optString(Article.ARTICLE_PERMALINK));
    } else {
        final Document.OutputSettings outputSettings = new Document.OutputSettings();
        outputSettings.prettyPrint(false);

        content = Jsoup.clean(content, Latkes.getServePath() + article.optString(Article.ARTICLE_PERMALINK),
                Whitelist.relaxed().addAttributes(":all", "id", "target", "class").addTags("span", "hr")
                        .addAttributes("iframe", "src", "width", "height")
                        .addAttributes("audio", "controls", "src"),
                outputSettings);

        content = content.replace("\n", "\\n").replace("'", "\\'").replace("\"", "\\\"");
    }

    article.put(Article.ARTICLE_CONTENT, content);

    if (article.optInt(Article.ARTICLE_REWARD_POINT) > 0) {
        String rewardContent = article.optString(Article.ARTICLE_REWARD_CONTENT);
        rewardContent = Markdowns.toHTML(rewardContent);
        rewardContent = Markdowns.clean(rewardContent,
                Latkes.getServePath() + article.optString(Article.ARTICLE_PERMALINK));
        article.put(Article.ARTICLE_REWARD_CONTENT, rewardContent);
    }
}

From source file:org.b3log.symphony.util.Markdowns.java

/**
 * Converts the specified markdown text to HTML.
 *
 * @param markdownText the specified markdown text
 * @return converted HTML, returns an empty string "" if the specified markdown text is "" or {@code null}, returns
 * 'markdownErrorLabel' if exception/* ww  w.jav a  2  s.com*/
 */
public static String toHTML(final String markdownText) {
    if (Strings.isEmptyOrNull(markdownText)) {
        return "";
    }

    final String cachedHTML = getHTML(markdownText);
    if (null != cachedHTML) {
        return cachedHTML;
    }

    final ExecutorService pool = Executors.newSingleThreadExecutor();
    final long[] threadId = new long[1];

    final Callable<String> call = () -> {
        threadId[0] = Thread.currentThread().getId();

        String html = LANG_PROPS_SERVICE.get("contentRenderFailedLabel");

        if (MARKED_AVAILABLE) {
            html = toHtmlByMarked(markdownText);
            if (!StringUtils.startsWith(html, "<p>")) {
                html = "<p>" + html + "</p>";
            }
        } else {
            com.vladsch.flexmark.ast.Node document = PARSER.parse(markdownText);
            html = RENDERER.render(document);
            if (!StringUtils.startsWith(html, "<p>")) {
                html = "<p>" + html + "</p>";
            }
        }

        final Document doc = Jsoup.parse(html);
        final List<org.jsoup.nodes.Node> toRemove = new ArrayList<>();
        doc.traverse(new NodeVisitor() {
            @Override
            public void head(final org.jsoup.nodes.Node node, int depth) {
                if (node instanceof org.jsoup.nodes.TextNode) {
                    final org.jsoup.nodes.TextNode textNode = (org.jsoup.nodes.TextNode) node;
                    final org.jsoup.nodes.Node parent = textNode.parent();

                    if (parent instanceof Element) {
                        final Element parentElem = (Element) parent;

                        if (!parentElem.tagName().equals("code")) {
                            String text = textNode.getWholeText();
                            boolean nextIsBr = false;
                            final org.jsoup.nodes.Node nextSibling = textNode.nextSibling();
                            if (nextSibling instanceof Element) {
                                nextIsBr = "br".equalsIgnoreCase(((Element) nextSibling).tagName());
                            }

                            if (null != userQueryService) {
                                try {
                                    final Set<String> userNames = userQueryService.getUserNames(text);
                                    for (final String userName : userNames) {
                                        text = text.replace('@' + userName + (nextIsBr ? "" : " "),
                                                "@<a href='" + Latkes.getServePath() + "/member/" + userName
                                                        + "'>" + userName + "</a> ");
                                    }
                                    text = text.replace("@participants ",
                                            "@<a href='https://hacpai.com/article/1458053458339' class='ft-red'>participants</a> ");
                                } finally {
                                    JdbcRepository.dispose();
                                }
                            }

                            if (text.contains("@<a href=")) {
                                final List<org.jsoup.nodes.Node> nodes = Parser.parseFragment(text, parentElem,
                                        "");
                                final int index = textNode.siblingIndex();

                                parentElem.insertChildren(index, nodes);
                                toRemove.add(node);
                            } else {
                                textNode.text(Pangu.spacingText(text));
                            }
                        }
                    }
                }
            }

            @Override
            public void tail(org.jsoup.nodes.Node node, int depth) {
            }
        });

        toRemove.forEach(node -> node.remove());

        doc.select("pre>code").addClass("hljs");
        doc.select("a").forEach(a -> {
            String src = a.attr("href");
            if (!StringUtils.startsWithIgnoreCase(src, Latkes.getServePath())) {
                try {
                    src = URLEncoder.encode(src, "UTF-8");
                } catch (final Exception e) {
                }
                a.attr("href", Latkes.getServePath() + "/forward?goto=" + src);
                a.attr("target", "_blank");
            }
        });
        doc.outputSettings().prettyPrint(false);

        String ret = doc.select("body").html();
        ret = StringUtils.trim(ret);

        // cache it
        putHTML(markdownText, ret);

        return ret;
    };

    Stopwatchs.start("Md to HTML");
    try {
        final Future<String> future = pool.submit(call);

        return future.get(MD_TIMEOUT, TimeUnit.MILLISECONDS);
    } catch (final TimeoutException e) {
        LOGGER.log(Level.ERROR, "Markdown timeout [md=" + markdownText + "]");
        Callstacks.printCallstack(Level.ERROR, new String[] { "org.b3log" }, null);

        final Set<Thread> threads = Thread.getAllStackTraces().keySet();
        for (final Thread thread : threads) {
            if (thread.getId() == threadId[0]) {
                thread.stop();

                break;
            }
        }
    } catch (final Exception e) {
        LOGGER.log(Level.ERROR, "Markdown failed [md=" + markdownText + "]", e);
    } finally {
        pool.shutdownNow();

        Stopwatchs.end();
    }

    return LANG_PROPS_SERVICE.get("contentRenderFailedLabel");
}

From source file:org.b3log.symphony.util.Markdowns.java

/**
 * Gets the safe HTML content of the specified content.
 *
 * @param content the specified content/*from www. j ava 2s. c  o m*/
 * @param baseURI the specified base URI, the relative path value of href will starts with this URL
 * @return safe HTML content
 */
public static String clean(final String content, final String baseURI) {
    final Document.OutputSettings outputSettings = new Document.OutputSettings();
    outputSettings.prettyPrint(false);

    final String tmp = Jsoup.clean(content, baseURI,
            Whitelist.relaxed().addAttributes(":all", "id", "target", "class")
                    .addTags("span", "hr", "kbd", "samp", "tt", "del", "s", "strike", "u")
                    .addAttributes("iframe", "src", "width", "height", "border", "marginwidth", "marginheight")
                    .addAttributes("audio", "controls", "src")
                    .addAttributes("video", "controls", "src", "width", "height")
                    .addAttributes("source", "src", "media", "type")
                    .addAttributes("object", "width", "height", "data", "type")
                    .addAttributes("param", "name", "value")
                    .addAttributes("input", "type", "disabled", "checked").addAttributes("embed", "src", "type",
                            "width", "height", "wmode", "allowNetworking"),
            outputSettings);
    final Document doc = Jsoup.parse(tmp, baseURI, Parser.htmlParser());

    final Elements ps = doc.getElementsByTag("p");
    for (final Element p : ps) {
        p.removeAttr("style");
    }

    final Elements iframes = doc.getElementsByTag("iframe");
    for (final Element iframe : iframes) {
        final String src = StringUtils.deleteWhitespace(iframe.attr("src"));
        if (StringUtils.startsWithIgnoreCase(src, "javascript")
                || StringUtils.startsWithIgnoreCase(src, "data:")) {
            iframe.remove();
        }
    }

    final Elements objs = doc.getElementsByTag("object");
    for (final Element obj : objs) {
        final String data = StringUtils.deleteWhitespace(obj.attr("data"));
        if (StringUtils.startsWithIgnoreCase(data, "data:")
                || StringUtils.startsWithIgnoreCase(data, "javascript")) {
            obj.remove();

            continue;
        }

        final String type = StringUtils.deleteWhitespace(obj.attr("type"));
        if (StringUtils.containsIgnoreCase(type, "script")) {
            obj.remove();
        }
    }

    final Elements embeds = doc.getElementsByTag("embed");
    for (final Element embed : embeds) {
        final String data = StringUtils.deleteWhitespace(embed.attr("src"));
        if (StringUtils.startsWithIgnoreCase(data, "data:")
                || StringUtils.startsWithIgnoreCase(data, "javascript")) {
            embed.remove();

            continue;
        }
    }

    final Elements as = doc.getElementsByTag("a");
    for (final Element a : as) {
        a.attr("rel", "nofollow");

        final String href = a.attr("href");
        if (href.startsWith(Latkes.getServePath())) {
            continue;
        }

        a.attr("target", "_blank");
    }

    final Elements audios = doc.getElementsByTag("audio");
    for (final Element audio : audios) {
        audio.attr("preload", "none");
    }

    final Elements videos = doc.getElementsByTag("video");
    for (final Element video : videos) {
        video.attr("preload", "none");
    }

    String ret = doc.body().html();
    ret = ret.replaceAll("(</?br\\s*/?>\\s*)+", "<br>"); // patch for Jsoup issue

    return ret;
}

From source file:org.dswarm.xmlenhancer.XMLEnhancer.java

private static void unescapeEntity(final PrintWriter out, final Node node) {

    node.ownerDocument().outputSettings().escapeMode(Entities.EscapeMode.xhtml)
            .syntax(Document.OutputSettings.Syntax.xml).prettyPrint(false);

    if (node instanceof TextNode) {

        final TextNode textNode = (TextNode) node;

        final String wholeText = textNode.getWholeText();

        out.print(wholeText);//from   ww w .  jav a 2  s . c  om

        return;
    }

    final String nodeString = node.toString();
    final String unescapedNodeString = Parser.unescapeEntities(nodeString, true);

    out.print(unescapedNodeString);
}