Example usage for org.jsoup.nodes Document outputSettings

List of usage examples for org.jsoup.nodes Document outputSettings

Introduction

In this page you can find the example usage for org.jsoup.nodes Document outputSettings.

Prototype

OutputSettings outputSettings

To view the source code for org.jsoup.nodes Document outputSettings.

Click Source Link

Usage

From source file:com.switchfly.inputvalidation.sanitizer.StripHtmlSanitizer.java

@Override
public String execute(String content) {
    if (StringUtils.isBlank(content)) {
        return content;
    }/*from   w ww.  j a v  a2 s . c  om*/
    Document document = Jsoup.parse(content);
    document.outputSettings().escapeMode(Entities.EscapeMode.xhtml);
    for (Element element : document.select("script,link,iframe,style")) {
        element.remove();
    }
    return document.text();
}

From source file:com.betel.flowers.pdf.util.XMLtoHtml.java

public String checkHTML(String htmlString) throws IOException {

    String checkedhtml = null;/*from  w w  w. ja  va2s  .c  o  m*/
    try {
        Document docHtml = Jsoup.parse(htmlString);
        docHtml.outputSettings().syntax(Document.OutputSettings.Syntax.xml);
        String value = new String(docHtml.html());
        checkedhtml = StringEscapeUtils.unescapeHtml4(value);
    } catch (Exception ex) {
        throw ex;
    }
    return checkedhtml;
}

From source file:com.maxl.java.aips2xml.Aips2Xml.java

static String[] extractHtmlSection(MedicalInformations.MedicalInformation m) {
    // Extract section titles and section ids
    MedicalInformations.MedicalInformation.Sections med_sections = m.getSections();
    List<MedicalInformations.MedicalInformation.Sections.Section> med_section_list = med_sections.getSection();

    Document doc = Jsoup.parse(m.getContent());
    doc.outputSettings().escapeMode(EscapeMode.xhtml);

    // Clean html code
    HtmlUtils html_utils = new HtmlUtils(m.getContent());
    html_utils.clean();//from w w  w. j a v a 2s . c  om

    // Extract registration number (swissmedic no5)
    String regnr_str = "";
    if (DB_LANGUAGE.equals("de"))
        regnr_str = html_utils.extractRegNrDE(m.getTitle());
    else if (DB_LANGUAGE.equals("fr"))
        regnr_str = html_utils.extractRegNrFR(m.getTitle());

    // Sanitize html
    String html_sanitized = "";
    // First check for bad boys (version=1! but actually version>1!)
    if (!m.getVersion().equals("1") || m.getContent().substring(0, 20).contains("xml")) {
        for (int i = 1; i < 22; ++i) {
            html_sanitized += html_utils.sanitizeSection(i, m.getTitle(), DB_LANGUAGE);
        }
        html_sanitized = "<div id=\"monographie\">" + html_sanitized + "</div>";
    } else {
        html_sanitized = m.getContent();
    }

    // Update "Packungen" section and extract therapeutisches index
    List<String> mTyIndex_list = new ArrayList<String>();
    String mContent_str = updateSectionPackungen(m.getTitle(), package_info, regnr_str, html_sanitized,
            mTyIndex_list);

    // Add meta-tag and link
    mContent_str = mContent_str.replaceAll("<head>",
            "<head>" + "<link href=\"amiko_stylesheet.css\" rel=\"stylesheet\" type=\"text/css\"></>"
                    + "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\">");

    m.setContent(mContent_str);

    // Fix problem with wrong div class in original Swissmedic file
    if (DB_LANGUAGE.equals("de")) {
        m.setStyle(m.getStyle().replaceAll("untertitel", "untertitle"));
        m.setStyle(m.getStyle().replaceAll("untertitel1", "untertitle1"));
    }

    // Correct formatting error introduced by Swissmedic
    m.setAuthHolder(m.getAuthHolder().replaceAll("&#038;", "&"));

    // Extracts only *first* registration number
    /*
    List<String> swissmedicno5_list = Arrays.asList(regnr_str.split("\\s*,\\s*"));      
    String[] swno5_content_map = {swissmedicno5_list.get(0), mContent_str};
    */
    // Extract *all* registration numbers
    String[] swno5_content_map = { regnr_str, mContent_str };

    return swno5_content_map; //mContent_str;
}

From source file:com.maxl.java.aips2xml.Aips2Xml.java

static String addHeaderToXml(String xml_str) {
    Document mDoc = Jsoup.parse("<kompendium>\n" + xml_str + "</kompendium>");
    mDoc.outputSettings().escapeMode(EscapeMode.xhtml);
    mDoc.outputSettings().prettyPrint(true);
    mDoc.outputSettings().indentAmount(4);

    // Add date/*from  ww w .  j av a2s  . com*/
    Date df = new Date();
    String date_str = df.toString();
    mDoc.select("kompendium").first().prependElement("date");
    mDoc.select("date").first().text(date_str);
    // Add language
    mDoc.select("date").after("<lang></lang>");
    if (DB_LANGUAGE.equals("de"))
        mDoc.select("lang").first().text("DE");
    else if (DB_LANGUAGE.equals("fr"))
        mDoc.select("lang").first().text("FR");

    // Fool jsoup.parse which seems to have its own "life" 
    mDoc.select("tbody").unwrap();
    Elements img_elems = mDoc.select("img");
    for (Element img_e : img_elems) {
        if (!img_e.hasAttr("src"))
            img_e.unwrap();
    }
    mDoc.select("img").tagName("image");

    String final_xml_str = mDoc.select("kompendium").first().outerHtml();

    return final_xml_str;
}

From source file:com.maxl.java.aips2xml.Aips2Xml.java

static String convertHtmlToXml(String med_title, String html_str, String regnr_str) {
    Document mDoc = Jsoup.parse(html_str);
    mDoc.outputSettings().escapeMode(EscapeMode.xhtml);
    mDoc.outputSettings().prettyPrint(true);
    mDoc.outputSettings().indentAmount(4);

    // <div id="monographie"> -> <fi>
    mDoc.select("div[id=monographie]").tagName("fi").removeAttr("id");
    // <div class="MonTitle"> -> <title>
    mDoc.select("div[class=MonTitle]").tagName("title").removeAttr("class").removeAttr("id");
    // Beautify the title to the best of my possibilities ... still not good enough!
    String title_str = mDoc.select("title").text().trim().replaceAll("<br />", "").replaceAll("(\\t|\\r?\\n)+",
            "");//  w w  w.j a v a  2 s  . c  o m
    if (!title_str.equals(med_title))
        if (SHOW_ERRORS)
            System.err.println(med_title + " differs from " + title_str);
    // Fallback solution: use title from the header AIPS.xml file - the titles look all pretty good!
    mDoc.select("title").first().text(med_title);
    // <div class="ownerCompany"> -> <owner>
    Element owner_elem = mDoc.select("div[class=ownerCompany]").first();
    if (owner_elem != null) {
        owner_elem.tagName("owner").removeAttr("class");
        String owner_str = mDoc.select("owner").text();
        mDoc.select("owner").first().text(owner_str);
    } else {
        mDoc.select("title").after("<owner></owner>");
        if (DB_LANGUAGE.equals("de"))
            mDoc.select("owner").first().text("k.A.");
        else if (DB_LANGUAGE.equals("fr"))
            mDoc.select("owner").first().text("n.s.");
    }

    // <div class="paragraph"> -> <paragraph>
    mDoc.select("div[class=paragraph]").tagName("paragraph").removeAttr("class").removeAttr("id");
    // <div class="absTitle"> -> <paragraphTitle>
    mDoc.select("div[class=absTitle]").tagName("paragraphtitle").removeAttr("class");
    // <div class="untertitle1"> -> <paragraphSubTitle>
    mDoc.select("div[class=untertitle1]").tagName("paragraphsubtitle").removeAttr("class");
    // <div class="untertitle"> -> <paragraphSubTitle>
    mDoc.select("div[class=untertitle]").tagName("paragraphsubtitle").removeAttr("class");
    // <div class="shortCharacteristic"> -> <characteristic>
    mDoc.select("div[class=shortCharacteristic]").tagName("characteristic").removeAttr("class");
    // <div class="image">
    mDoc.select("div[class=image]").tagName("image").removeAttr("class");

    // <p class="spacing1"> -> <p> / <p class="noSpacing"> -> <p>
    mDoc.select("p[class]").tagName("p").removeAttr("class");
    // <span style="font-style:italic"> -> <i>
    mDoc.select("span").tagName("i").removeAttr("style");
    // <i class="indention1"> -> <i> / <i class="indention2"> -> <b-i> 
    mDoc.select("i[class=indention1]").tagName("i").removeAttr("class");
    mDoc.select("i[class=indention2]").tagName("i").removeAttr("class");
    // mDoc.select("p").select("i").tagName("i");
    // mDoc.select("paragraphtitle").select("i").tagName("para-i");
    // mDoc.select("paragraphsubtitle").select("i").tagName("parasub-i");
    Elements elems = mDoc.select("paragraphtitle");
    for (Element e : elems) {
        if (!e.text().isEmpty())
            e.text(e.text());
    }
    elems = mDoc.select("paragraphsubtitle");
    for (Element e : elems) {
        if (!e.text().isEmpty())
            e.text(e.text());
    }

    // Here we take care of tables
    // <table class="s21"> -> <table>
    mDoc.select("table[class]").removeAttr("class");
    mDoc.select("table").removeAttr("cellspacing").removeAttr("cellpadding").removeAttr("border");
    mDoc.select("colgroup").remove();
    mDoc.select("td").removeAttr("class").removeAttr("colspan").removeAttr("rowspan");
    mDoc.select("tr").removeAttr("class");
    elems = mDoc.select("div[class]");
    for (Element e : elems) {
        if (e.text().isEmpty())
            e.remove();
    }

    mDoc.select("tbody").unwrap();
    // Remove nested table (a nasty table-in-a-table
    Elements nested_table = mDoc.select("table").select("tr").select("td").select("table");
    if (!nested_table.isEmpty()) {
        nested_table.select("table").unwrap();
    }

    // Here we take care of the images
    mDoc.select("img").removeAttr("style").removeAttr("align").removeAttr("border");

    // Subs and sups
    mDoc.select("sub[class]").tagName("sub").removeAttr("class");
    mDoc.select("sup[class]").tagName("sup").removeAttr("class");
    mDoc.select("td").select("sub").tagName("td-sub");
    mDoc.select("td").select("sup").tagName("td-sup");
    // Remove floating <td-sup> tags
    mDoc.select("p").select("td-sup").tagName("sup");
    mDoc.select("p").select("td-sub").tagName("sub");

    // Box
    mDoc.select("div[class=box]").tagName("box").removeAttr("class");

    // Insert swissmedicno5 after <owner> tag
    mDoc.select("owner").after("<swissmedicno5></swissmedicno5");
    mDoc.select("swissmedicno5").first().text(regnr_str);

    // Remove html, head and body tags         
    String xml_str = mDoc.select("body").first().html();

    //xml_str = xml_str.replaceAll("<tbody>", "").replaceAll("</tbody>", "");
    xml_str = xml_str.replaceAll("<sup> </sup>", "");
    xml_str = xml_str.replaceAll("<sub> </sub>", "");
    xml_str = xml_str.replaceAll("<p> <i>", "<p><i>");
    xml_str = xml_str.replaceAll("</p> </td>", "</p></td>");
    xml_str = xml_str.replaceAll("<p> </p>", "<p></p>"); // MUST be improved, the space is not a real space!!
    xml_str = xml_str.replaceAll("", "- ");
    xml_str = xml_str.replaceAll("<br />", "");
    xml_str = xml_str.replaceAll("(?m)^[ \t]*\r?\n", "");

    // Remove multiple instances of <p></p>
    Scanner scanner = new Scanner(xml_str);
    String new_xml_str = "";
    int counter = 0;
    while (scanner.hasNextLine()) {
        String line = scanner.nextLine();
        if (line.trim().equals("<p></p>")) {
            counter++;
        } else
            counter = 0;
        if (counter < 3)
            new_xml_str += line;
    }
    scanner.close();

    return new_xml_str;
}

From source file:me.rkfg.xmpp.bot.plugins.CoolStoryPlugin.java

private String fetchStory(Website website) throws IOException {
    int roll = 0;
    String result;//from  ww w . j  a v a 2s.  c o m
    int resultLength;
    int resultLines;

    //noinspection ConstantConditions
    do {
        roll++;

        final Document doc = Jsoup.connect(website.getUrlString()).userAgent(DEFAULT_UA).get();
        doc.outputSettings(new Document.OutputSettings().prettyPrint(false));
        logger.info("Fetched a story from {}", doc.location());

        final Element story = doc.select(website.getCssQuery()).first();
        if (story == null) {
            return ERROR_COULD_NOT_PARSE;
        }

        story.select("div").remove();
        story.select("img").forEach(img -> img.replaceWith(new TextNode(img.attr("src"), "")));
        story.select("br").after("\\n");
        story.select("p").before("\\n\\n");
        final String storyHtml = story.html().replaceAll("\\\\n", "\n");

        result = Jsoup.clean(storyHtml, "", Whitelist.none(), new Document.OutputSettings().prettyPrint(false))
                .trim();
        resultLength = result.length();
        resultLines = countLines(result);

    } while (CONFIG_REROLL_LONG_STORIES
            && (resultLength > CONFIG_MAX_STORY_LENGTH || resultLines > CONFIG_MAX_STORY_LINES)
            && roll <= CONFIG_MAX_ROLLS);

    return result;
}

From source file:ac.simons.oembed.Oembed.java

public String transformDocumentString(final String documentHtml) {
    final Document rv = transformDocument(documentHtml);
    rv.outputSettings().prettyPrint(false).escapeMode(EscapeMode.xhtml);
    return rv.body().html();
}

From source file:com.isomorphic.maven.packaging.Downloads.java

/**
 * Interrogates the remote server for a list of hyperlinks matching the given distribution's {@link Distribution#getRemoteIndexFilter() filter}.
 * //from  w  w  w  . ja  v a  2  s . com
 * @param dist the build in which some files should exist
 * @return a String array of html href attributes
 * @throws MojoExecutionException
 */
private String[] list(Distribution dist) throws MojoExecutionException {

    HttpGet request = new HttpGet(dist.getRemoteIndex());
    HttpResponse response;

    try {

        LOGGER.debug("Requesting list of files from {}{}", DOMAIN, dist.getRemoteIndex());
        response = httpClient.execute(host, request);

    } catch (Exception e) {
        throw new MojoExecutionException("Error issuing GET request for bundle at '" + request + "'", e);
    }

    Document doc;

    try {

        String html = EntityUtils.toString(response.getEntity());
        doc = Jsoup.parse(html);
        doc.outputSettings().prettyPrint(true);

    } catch (Exception e) {
        throw new MojoExecutionException("Error processing response from '" + request + "'", e);
    }

    List<String> result = new ArrayList<String>();

    Elements links = doc.select(dist.getRemoteIndexFilter());

    for (Element element : links) {
        String href = element.attr("href");
        result.add(href);
    }

    if (result.isEmpty()) {
        String msg = String.format("No downloads found at '%s%s'.  Response from server: \n\n%s\n", DOMAIN,
                dist.getRemoteIndex(), doc.html());
        LOGGER.warn(msg);
    }

    return result.toArray(new String[0]);
}

From source file:com.maxl.java.aips2sqlite.PseudoExpertInfo.java

/**
 * Extracts all the important information from the pseudo "Fachinfo" file
 * @param pseudo_info_file/*from  w ww. j a  va 2  s. c o m*/
 */
public boolean extractInfo(int idx, FileInputStream pseudo_info_file) {
    mMedi = new MedicalInformations.MedicalInformation();

    mSectionContent = new ArrayList<String>();
    mSectionTitles = new ArrayList<String>();
    mBarCodes = new ArrayList<String>();
    m_list_of_packages = new ArrayList<String>();

    String mediTitle = "";
    String mediAuthor = "";
    String mediPseudoTag = "";
    String mediHtmlContent = "";

    StringBuilder content = new StringBuilder();

    try {
        // Read in docx file
        XWPFDocument docx = new XWPFDocument(pseudo_info_file);
        // Get iterator through all paragraphs
        Iterator<XWPFParagraph> para = docx.getParagraphsIterator();

        // Pre-process input stream to extract paragraph titles
        boolean goodToGo = false;
        while (para.hasNext()) {
            List<XWPFRun> runs = para.next().getRuns();
            if (!runs.isEmpty()) {
                for (XWPFRun r : runs) {
                    // bold and italics identifies section title!
                    if (r.isBold()) { // && r.isItalic()) {
                        String pText = r.getParagraph().getText();
                        // These are the first chapter titles (DE and FR)
                        if (pText.equals("Zusammensetzung") || pText.equals("Composition"))
                            goodToGo = true;
                        if (goodToGo == true)
                            mSectionTitles.add(pText);
                    }
                }
            }
        }
        // Add "nil" at the end
        mSectionTitles.add("nil");

        if (mLanguage.equals("de") && !mSectionTitles.get(0).equals("Zusammensetzung"))
            return false;
        if (mLanguage.equals("fr") && !mSectionTitles.get(0).equals("Composition"))
            return false;

        // Reset iterator
        para = docx.getParagraphsIterator();

        // Init list for section content 
        for (int i = 0; i < mSectionTitles.size(); ++i)
            mSectionContent.add(i, "");

        // Get title
        if (para.hasNext())
            mediTitle = para.next().getParagraphText();
        // Get author while using "Medizinprodukt" as tag
        String prevParaText = "";
        while (para.hasNext()) {
            String paraText = para.next().getParagraphText();
            // If this word is not found, then no pseudo FI will be produced
            if (paraText.equals("Medizinprodukt") || paraText.equals("Dispositif mdical")) {
                mediPseudoTag = paraText;
                mediAuthor = prevParaText;
                break;
            }
            prevParaText = paraText;
        }

        // Get section titles + sections + ean codes
        boolean isSectionPackungen = false;
        int numSection = 0;
        // Init with section1 and title
        String sectionId_str = "";
        String sectionTitle_str = "";
        mEanCodes_str = "";
        mSectionIds_str = "section1,";
        mSectionTitles_str = mediTitle + ",";
        m_pack_info_str = "";
        // This is the EAN code pattern
        Pattern pattern = Pattern.compile("^[0-9]{13}");
        // Loop through it, identifying medication title, author, section titles and corresponding titles
        while (para.hasNext()) {
            String paraText = para.next().getParagraphText();
            if (paraText.equals(mSectionTitles.get(numSection))) {
                // ->> Get section title
                isSectionPackungen = false;
                // Get section title
                if (numSection < mSectionTitles.size())
                    numSection++;
                // Section "Packungen" is special
                if (paraText.equals("Packungen") || paraText.equals("Prsentation")) {
                    isSectionPackungen = true;
                }
                // Close previous div
                if (numSection > 1)
                    content.append("</div>");
                // Create html
                sectionId_str = "section" + (numSection + 1); // section1 is reserved for the MonTitle
                sectionTitle_str = mSectionTitles.get(numSection - 1);
                content.append("<div class=\"paragraph\" id=\"" + sectionId_str + "\">");
                content.append("<div class=\"absTitle\">" + sectionTitle_str + "</div>");
                // Generate section id string
                mSectionIds_str += (sectionId_str + ",");
                // Generate titles string
                mSectionTitles_str += (sectionTitle_str + ";");
            } else {
                // ->> Get section content
                String s = mSectionContent.get(numSection - 1);
                mSectionContent.set(numSection - 1, s + paraText + " ");
                // Create html
                content.append("<p class=\"spacing1\">" + paraText + "</p>");
                // Extract EAN codes and start positions
                Matcher matcher = pattern.matcher(paraText);
                while (matcher.find()) {
                    String eanCode = matcher.group();
                    mEanCodes_str += (eanCode + ", ");
                    if (!eanCode.isEmpty()) {
                        String pup = "";
                        String efp = "";
                        String fep = "";
                        String fap = "";
                        String vat = "";
                        String size = "";
                        String units = "";
                        String swissmedic_cat = "";
                        String pharma_code = "";
                        int visible = 0xff;
                        int has_free_samples = 0x00; // by default no free samples
                        // Exctract fep and fap pricing information
                        // FAP = Fabrikabgabepreis = EFP?
                        // FEP = Fachhandelseinkaufspreis
                        // EFP = FAP < FEP < PUP                     
                        if (m_map_products != null && eanCode != null && m_map_products.containsKey(eanCode)) {
                            Product product = m_map_products.get(eanCode);
                            if (product.efp > 0.0f)
                                efp = String.format("CHF %.2f", product.efp);
                            if (product.pp > 0.0f)
                                pup = String.format("CHF %.2f", product.pp);
                            if (product.fap > 0.0f)
                                fap = String.format("CHF %.2f", product.fap);
                            if (product.fep > 0.0f)
                                fep = String.format("CHF %.2f", product.fep);
                            if (product.vat > 0.0f)
                                vat = String.format("%.2f", product.vat);
                            if (product.size != null && !product.size.isEmpty())
                                size = product.size;
                            if (product.units != null && product.units.length > 0)
                                units = product.units[0];
                            if (product.swissmedic_cat != null && !product.swissmedic_cat.isEmpty())
                                swissmedic_cat = product.swissmedic_cat;
                            if (product.pharmacode != null && !product.pharmacode.isEmpty())
                                pharma_code = product.pharmacode;
                            visible = product.visible;
                            has_free_samples = product.free_sample;
                        }
                        m_list_of_packages.add(mediTitle.toUpperCase() + ", " + units + ", " + size + "|" + size
                                + "|" + units + "|" + efp + "|" + pup + "|" + fap + "|" + fep + "|" + vat + "|"
                                + swissmedic_cat + ",,|" + eanCode + "|" + pharma_code + "|" + visible + "|"
                                + has_free_samples + "\n");
                        // Generate bar codes
                        BarCode bc = new BarCode();
                        String barcodeImg64 = bc.encode(eanCode);
                        mBarCodes.add("<p class=\"spacing1\">" + barcodeImg64 + "</p>");
                        content.append(barcodeImg64);
                    }
                }
                // Generate section Packungen for search result
                if (isSectionPackungen)
                    m_pack_info_str += (paraText + "\n");
            }
        }
        /*
        // Add chapter "Barcodes"
        content.append("<p class=\"paragraph\"></p><div class=\"absTitle\">" + "Barcodes" + "</div>");
        for (String bcode : mBarCodes)
           content.append(bcode);
        */
        // Remove last comma from mEanCodes_str
        if (!mEanCodes_str.isEmpty())
            mEanCodes_str = mEanCodes_str.substring(0, mEanCodes_str.length() - 2);
        // Remove last \n from mSectionPackungen_str
        if (!m_pack_info_str.isEmpty())
            m_pack_info_str = m_pack_info_str.substring(0, m_pack_info_str.length() - 1);

        // Set title, autor
        mMedi.setTitle(mediTitle);
        mMedi.setAuthHolder(mediAuthor);
        mMedi.setAtcCode("PSEUDO");
        mMedi.setSubstances(mediTitle);

        System.out.println(idx + " - " + mediTitle + ": " + mEanCodes_str);

        // Close previous div + monographie div
        content.append("</div></div>");
        String title = "<div class=\"MonTitle\" id=\"section1\">" + mediTitle + "</div>";
        String author = "<div class=\"ownerCompany\"><div style=\"text-align: right;\">" + mediAuthor
                + "</div></div>";
        // Set "Medizinprodukt" label
        String pseudo = "<p class=\"spacing1\">" + mediPseudoTag + "</p>";
        // Set medi content         
        mediHtmlContent = "<html><head></head><body><div id=\"monographie\">" + title + author + pseudo
                + content.toString() + "</div></body></html>";

        // Generate clean html file
        Document doc = Jsoup.parse(mediHtmlContent);
        doc.outputSettings().escapeMode(EscapeMode.xhtml);
        doc.outputSettings().charset("UTF-8");
        doc.outputSettings().prettyPrint(true);
        doc.outputSettings().indentAmount(1);
        mediHtmlContent = doc.html();

        // Set html content
        mMedi.setContent(mediHtmlContent);

        // Add to DB
        addToDB();

        return true;
    } catch (IOException e) {
        e.printStackTrace();
        return false;
    }
}

From source file:com.maxl.java.aips2sqlite.RealExpertInfo.java

public void process() {

    // Get stop words first
    getStopWords();/*from w w w . j  av  a  2 s .c  o  m*/

    // Extract EPha SwissmedicNo5 to ATC map
    extractSwissmedicNo5ToAtcMap();

    // Extract package information (this is the heavy-duty bit)
    extractPackageInfo();

    // Extract Swiss DRG information
    extractSwissDRGInfo();

    try {
        // Load CSS file: used only for self-contained xml files
        String amiko_style_v1_str = FileOps.readCSSfromFile(Constants.FILE_STYLE_CSS_BASE + "v1.css");

        // Create error report file
        ParseReport parse_errors = null;
        if (CmlOptions.GENERATE_REPORTS == true) {
            parse_errors = new ParseReport(Constants.FILE_PARSE_REPORT, CmlOptions.DB_LANGUAGE, "html");
            if (CmlOptions.DB_LANGUAGE.equals("de"))
                parse_errors.addHtmlHeader("Schweizer Arzneimittel-Kompendium", Constants.FI_DB_VERSION);
            else if (CmlOptions.DB_LANGUAGE.equals("fr"))
                parse_errors.addHtmlHeader("Compendium des Mdicaments Suisse", Constants.FI_DB_VERSION);
        }

        // Create indications report file
        BufferedWriter bw_indications = null;
        Map<String, String> tm_indications = new TreeMap<String, String>();
        if (CmlOptions.INDICATIONS_REPORT == true) {
            ParseReport indications_report = new ParseReport(Constants.FILE_INDICATIONS_REPORT,
                    CmlOptions.DB_LANGUAGE, "txt");
            bw_indications = indications_report.getBWriter();
        }

        /*
         * Add pseudo Fachinfos to SQLite database
         */
        int tot_pseudo_counter = 0;
        if (CmlOptions.ADD_PSEUDO_FI == true) {
            PseudoExpertInfo pseudo_fi = new PseudoExpertInfo(m_sql_db, CmlOptions.DB_LANGUAGE, m_map_products);
            // Process
            tot_pseudo_counter = pseudo_fi.process();
            System.out.println("");
        }

        /*
         * Add real Fachinfos to SQLite database
         */
        // Initialize counters for different languages
        int med_counter = 0;
        int tot_med_counter = 0;
        int missing_regnr_str = 0;
        int missing_pack_info = 0;
        int missing_atc_code = 0;
        int errors = 0;
        String fi_complete_xml = "";

        // First pass is always with DB_LANGUAGE set to German! (most complete information)
        // The file dumped in ./reports is fed to AllDown.java to generate a multilingual ATC code / ATC class file, e.g. German - French
        Set<String> atccode_set = new TreeSet<String>();

        // Treemap for owner error report (sorted by key)
        TreeMap<String, ArrayList<String>> tm_owner_error = new TreeMap<String, ArrayList<String>>();

        HtmlUtils html_utils = null;

        System.out.println("Processing real Fachinfos...");

        for (MedicalInformations.MedicalInformation m : m_med_list) {
            // --> Read FACHINFOS! <--            
            if (m.getLang().equals(CmlOptions.DB_LANGUAGE) && m.getType().equals("fi")) {
                // Database contains less than 5000 medis - this is a safe upperbound!
                if (tot_med_counter < 5000) {
                    // Trim titles of leading and trailing spaces
                    m.setTitle(m.getTitle().trim());
                    // Extract section titles and section ids
                    MedicalInformations.MedicalInformation.Sections med_sections = m.getSections();
                    List<MedicalInformations.MedicalInformation.Sections.Section> med_section_list = med_sections
                            .getSection();
                    String ids_str = "";
                    String titles_str = "";
                    for (MedicalInformations.MedicalInformation.Sections.Section s : med_section_list) {
                        ids_str += (s.getId() + ",");
                        titles_str += (s.getTitle() + ";");
                    }

                    Document doc = Jsoup.parse(m.getContent());
                    doc.outputSettings().escapeMode(EscapeMode.xhtml);

                    html_utils = new HtmlUtils(m.getContent());
                    html_utils.setLanguage(CmlOptions.DB_LANGUAGE);
                    html_utils.clean();

                    // Extract registration number (swissmedic no5)
                    String regnr_str = "";
                    if (CmlOptions.DB_LANGUAGE.equals("de"))
                        regnr_str = html_utils.extractRegNrDE(m.getTitle());
                    else if (CmlOptions.DB_LANGUAGE.equals("fr"))
                        regnr_str = html_utils.extractRegNrFR(m.getTitle());

                    // Pattern matcher for regnr command line option, (?s) searches across multiple lines
                    Pattern regnr_pattern = Pattern.compile("(?s).*\\b" + CmlOptions.OPT_MED_REGNR);

                    if (m.getTitle().toLowerCase().startsWith(CmlOptions.OPT_MED_TITLE.toLowerCase())
                            && regnr_pattern.matcher(regnr_str).find() && m.getAuthHolder().toLowerCase()
                                    .startsWith(CmlOptions.OPT_MED_OWNER.toLowerCase())) {

                        System.out.println(tot_med_counter + " - " + m.getTitle() + ": " + regnr_str);

                        if (regnr_str.isEmpty()) {
                            errors++;
                            if (CmlOptions.GENERATE_REPORTS == true) {
                                parse_errors.append("<p style=\"color:#ff0099\">ERROR " + errors
                                        + ": reg. nr. could not be parsed in AIPS.xml (swissmedic) - "
                                        + m.getTitle() + " (" + regnr_str + ")</p>");
                                // Add to owner errors
                                ArrayList<String> error = tm_owner_error.get(m.getAuthHolder());
                                if (error == null)
                                    error = new ArrayList<String>();
                                error.add(m.getTitle() + ";regnr");
                                tm_owner_error.put(m.getAuthHolder(), error);
                            }
                            missing_regnr_str++;
                            regnr_str = "";
                        }

                        // Associate ATC classes and subclasses (atc_map)               
                        String atc_class_str = "";
                        String atc_description_str = "";
                        // This bit is necessary because the ATC Code in the AIPS DB is broken sometimes 
                        String atc_code_str = "";

                        boolean atc_error_found = false;

                        // Use EPha ATC Codes, AIPS is fallback solution
                        String authNrs = m.getAuthNrs();
                        if (authNrs != null) {
                            // Deal with multi-swissmedic no5 case
                            String regnrs[] = authNrs.split(",");
                            // Use set to avoid duplicate ATC codes
                            Set<String> regnrs_set = new LinkedHashSet<>();
                            // Loop through EPha ATC codes
                            for (String r : regnrs) {
                                regnrs_set.add(m_smn5_atc_map.get(r.trim()));
                            }
                            // Iterate through set and format nicely
                            for (String r : regnrs_set) {
                                if (atc_code_str == null || atc_code_str.isEmpty())
                                    atc_code_str = r;
                                else
                                    atc_code_str += "," + r;
                            }
                        } else
                            atc_error_found = true;

                        // Notify any other problem with the EPha ATC codes
                        if (atc_code_str == null || atc_code_str.isEmpty())
                            atc_error_found = true;

                        // Fallback solution 
                        if (atc_error_found == true) {
                            if (m.getAtcCode() != null && !m.getAtcCode().equals("n.a.")
                                    && m.getAtcCode().length() > 1) {
                                atc_code_str = m.getAtcCode();
                                atc_code_str = atc_code_str.replaceAll("&ndash;", "(");
                                atc_code_str = atc_code_str.replaceAll("Code", "").replaceAll("ATC", "")
                                        .replaceAll("&nbsp", "").replaceAll("\\(.*", "").replaceAll("/", ",")
                                        .replaceAll("[^A-Za-z0-9,]", "");
                                if (atc_code_str.charAt(1) == 'O') {
                                    // E.g. Ascosal Brausetabletten
                                    atc_code_str = atc_code_str.substring(0, 1) + '0'
                                            + atc_code_str.substring(2);
                                }
                                if (atc_code_str.length() > 7) {
                                    if (atc_code_str.charAt(7) != ',' || atc_code_str.length() != 15)
                                        atc_code_str = atc_code_str.substring(0, 7);
                                }
                            } else {
                                // Work backwards using m_atc_map and m.getSubstances()
                                String substances = m.getSubstances();
                                if (substances != null) {
                                    if (m_atc_map.containsValue(substances)) {
                                        for (Map.Entry<String, String> entry : m_atc_map.entrySet()) {
                                            if (entry.getValue().equals(substances)) {
                                                atc_code_str = entry.getKey();
                                            }
                                        }
                                    }
                                }
                            }
                            atc_error_found = false;
                        }

                        // Now let's clean the m.getSubstances()
                        String substances = m.getSubstances();
                        if ((substances == null || substances.length() < 3) && atc_code_str != null) {
                            substances = m_atc_map.get(atc_code_str);
                        }

                        // Set clean substances
                        m.setSubstances(substances);
                        // Set clean ATC Code
                        m.setAtcCode(atc_code_str);

                        // System.out.println("ATC -> " + atc_code_str + ": " + substances);

                        if (atc_code_str != null) {
                            // \\s -> whitespace character, short for [ \t\n\x0b\r\f]
                            // atc_code_str = atc_code_str.replaceAll("\\s","");
                            // Take "leave" of the tree (most precise classification)
                            String a = m_atc_map.get(atc_code_str);
                            if (a != null) {
                                atc_description_str = a;
                                atccode_set.add(atc_code_str + ": " + a);
                            } else {
                                // Case: ATC1,ATC2
                                if (atc_code_str.length() == 15) {
                                    String[] codes = atc_code_str.split(",");
                                    if (codes.length > 1) {
                                        String a1 = m_atc_map.get(codes[0]);
                                        if (a1 == null) {
                                            atc_error_found = true;
                                            a1 = "k.A.";
                                        }
                                        String a2 = m_atc_map.get(codes[1]);
                                        if (a2 == null) {
                                            atc_error_found = true;
                                            a2 = "k.A.";
                                        }
                                        atc_description_str = a1 + "," + a2;
                                    }
                                } else if (m.getSubstances() != null) {
                                    // Fallback in case nothing else works
                                    atc_description_str = m.getSubstances();
                                    // Work backwards using m_atc_map and m.getSubstances(), change ATC code
                                    if (atc_description_str != null) {
                                        if (m_atc_map.containsValue(atc_description_str)) {
                                            for (Map.Entry<String, String> entry : m_atc_map.entrySet()) {
                                                if (entry.getValue().equals(atc_description_str)) {
                                                    m.setAtcCode(entry.getKey());
                                                }
                                            }
                                        }
                                    }
                                } else {
                                    atc_error_found = true;
                                    if (CmlOptions.DB_LANGUAGE.equals("de"))
                                        atc_description_str = "k.A.";
                                    else if (CmlOptions.DB_LANGUAGE.equals("fr"))
                                        atc_description_str = "n.s.";
                                }
                            }

                            // Read out only two levels (L1, L3, L4, L5)
                            for (int i = 1; i < 6; i++) {
                                if (i != 2) {
                                    String atc_key = "";
                                    if (i <= atc_code_str.length())
                                        atc_key = atc_code_str.substring(0, i);
                                    char sep = (i >= 4) ? '#' : ';'; // #-separator between L4 and L5                              
                                    if (atc_key != null) {
                                        String c = m_atc_map.get(atc_key);
                                        if (c != null) {
                                            atccode_set.add(atc_key + ": " + c);
                                            atc_class_str += (c + sep);
                                        } else {
                                            atc_class_str += sep;
                                        }
                                    } else {
                                        atc_class_str += sep;
                                    }
                                }
                            }

                            // System.out.println("atc class = " + atc_class_str);

                            // If DRG medication, add to atc_description_str
                            ArrayList<String> drg = m_swiss_drg_info.get(atc_code_str);
                            if (drg != null) {
                                atc_description_str += (";DRG");
                            }
                        }

                        if (atc_error_found) {
                            errors++;
                            if (CmlOptions.GENERATE_REPORTS) {
                                parse_errors.append("<p style=\"color:#0000bb\">ERROR " + errors
                                        + ": Broken or missing ATC-Code-Tag in AIPS.xml (Swissmedic) or ATC index (Wido) - "
                                        + m.getTitle() + " (" + regnr_str + ")</p>");
                                // Add to owner errors
                                ArrayList<String> error = tm_owner_error.get(m.getAuthHolder());
                                if (error == null)
                                    error = new ArrayList<String>();
                                error.add(m.getTitle() + ";atccode");
                                tm_owner_error.put(m.getAuthHolder(), error);
                            }
                            System.err.println(">> ERROR: " + tot_med_counter
                                    + " - no ATC-Code found in the XML-Tag \"atcCode\" - (" + regnr_str + ") "
                                    + m.getTitle());
                            missing_atc_code++;
                        }

                        // Additional info stored in add_info_map
                        String add_info_str = ";";
                        List<String> rnr_list = Arrays.asList(regnr_str.split("\\s*, \\s*"));
                        if (rnr_list.size() > 0)
                            add_info_str = m_add_info_map.get(rnr_list.get(0));

                        // Sanitize html
                        String html_sanitized = "";
                        // First check for bad boys (version=1! but actually version>1!)
                        if (!m.getVersion().equals("1") || m.getContent().substring(0, 20).contains("xml")) {
                            for (int i = 1; i < 22; ++i) {
                                html_sanitized += html_utils.sanitizeSection(i, m.getTitle(), m.getAuthHolder(),
                                        CmlOptions.DB_LANGUAGE);
                            }
                            html_sanitized = "<div id=\"monographie\">" + html_sanitized + "</div>";
                        } else {
                            html_sanitized = m.getContent();
                        }

                        // Add author number
                        html_sanitized = html_sanitized.replaceAll("<div id=\"monographie\">",
                                "<div id=\"monographie\" name=\"" + m.getAuthNrs() + "\">");

                        // Add Footer, timestamp in RFC822 format                     
                        DateFormat dateFormat = new SimpleDateFormat("EEE', 'dd' 'MMM' 'yyyy' 'HH:mm:ss' 'Z",
                                Locale.getDefault());
                        Date date = new Date();
                        String footer_str = "<p class=\"footer\">Auto-generated by <a href=\"https://github.com/zdavatz/aips2sqlite\">aips2sqlite</a> on "
                                + dateFormat.format(date) + "</p>";

                        // html_sanitized += footer_str;
                        html_sanitized = html_sanitized.replaceAll("</div>$", footer_str + "</div>");

                        // Extract section indications
                        String section_indications = "";
                        if (CmlOptions.DB_LANGUAGE.equals("de")) {
                            String sstr1 = "Indikationen/Anwendungsmglichkeiten";
                            String sstr2 = "Dosierung/Anwendung";
                            if (html_sanitized.contains(sstr1) && html_sanitized.contains(sstr2)) {
                                int idx1 = html_sanitized.indexOf(sstr1) + sstr1.length();
                                int idx2 = html_sanitized.substring(idx1, html_sanitized.length())
                                        .indexOf(sstr2);
                                try {
                                    section_indications = html_sanitized.substring(idx1, idx1 + idx2);
                                } catch (StringIndexOutOfBoundsException e) {
                                    e.printStackTrace();
                                }
                            }
                        } else if (CmlOptions.DB_LANGUAGE.equals("fr")) {
                            String sstr1 = "Indications/Possibilits demploi";
                            String sstr2 = "Posologie/Mode demploi";

                            html_sanitized = html_sanitized.replaceAll("Indications/Possibilits d&apos;emploi",
                                    sstr1);
                            html_sanitized = html_sanitized.replaceAll("Posologie/Mode d&apos;emploi", sstr2);
                            html_sanitized = html_sanitized.replaceAll("Indications/possibilits demploi",
                                    sstr1);
                            html_sanitized = html_sanitized.replaceAll("Posologie/mode demploi", sstr2);

                            if (html_sanitized.contains(sstr1) && html_sanitized.contains(sstr2)) {
                                int idx1 = html_sanitized.indexOf(sstr1) + sstr1.length();
                                int idx2 = html_sanitized.substring(idx1, html_sanitized.length())
                                        .indexOf(sstr2);
                                try {
                                    section_indications = html_sanitized.substring(idx1, idx1 + idx2);
                                } catch (StringIndexOutOfBoundsException e) {
                                    e.printStackTrace();
                                }
                            }
                        }

                        // Remove all p's, div's, span's and sup's
                        section_indications = section_indications.replaceAll("\\<p.*?\\>", "")
                                .replaceAll("</p>", "");
                        section_indications = section_indications.replaceAll("\\<div.*?\\>", "")
                                .replaceAll("</div>", "");
                        section_indications = section_indications.replaceAll("\\<span.*?\\>", "")
                                .replaceAll("</span>", "");
                        section_indications = section_indications.replaceAll("\\<sup.*?\\>", "")
                                .replaceAll("</sup>", "");

                        // System.out.println(section_indications);

                        if (CmlOptions.DB_LANGUAGE.equals("fr")) {
                            // Remove apostrophes
                            section_indications = section_indications.replaceAll("l&apos;", "")
                                    .replaceAll("d&apos;", "");
                            section_indications = section_indications.replaceAll("l", "").replaceAll("d", "");
                        }
                        // Remove all URLs
                        section_indications = section_indications.replaceAll(
                                "\\b(http|ftp|file)://[-a-zA-Z0-9+&@#/%?=~_|!:,.;]*[-a-zA-Z0-9+&@#/%=~_|]", "");
                        // Remove list of type a) b) c) ... 1) 2) ...
                        section_indications = section_indications.replaceAll("^\\w\\)", "");
                        // Remove numbers, commas, semicolons, parentheses, etc.                        
                        section_indications = section_indications.replaceAll("[^A-Za-z\\xC0-\\xFF- ]", "");
                        // Generate long list of keywords
                        LinkedList<String> wordsAsList = new LinkedList<String>(
                                Arrays.asList(section_indications.split("\\s+")));
                        // Remove stop words
                        Iterator<String> wordIterator = wordsAsList.iterator();
                        while (wordIterator.hasNext()) {
                            // Note: This assumes there are no null entries in the list and all stopwords are stored in lower case
                            String word = wordIterator.next().trim().toLowerCase();
                            if (word.length() < 3 || m.getTitle().toLowerCase().contains(word)
                                    || m_stop_words_hash.contains(word))
                                wordIterator.remove();
                        }
                        section_indications = "";
                        for (String w : wordsAsList) {
                            // Remove any leading dash or hyphen
                            if (w.startsWith("-"))
                                w = w.substring(1);
                            section_indications += (w + ";");
                            if (CmlOptions.INDICATIONS_REPORT == true) {
                                // Add to map (key->value), word = key, value = how many times used
                                // Is word w already stored in treemap?
                                String t_str = tm_indications.get(w);
                                if (t_str == null) {
                                    t_str = m.getTitle();
                                    tm_indications.put(w, t_str);
                                } else {
                                    t_str += (", " + m.getTitle());
                                    tm_indications.put(w, t_str);
                                }
                            }
                        }

                        /*
                         * Update section "Packungen", generate packungen string for shopping cart, and extract therapeutisches index
                         */
                        List<String> mTyIndex_list = new ArrayList<String>();
                        m_list_of_packages.clear();
                        m_list_of_eancodes.clear();
                        String mContent_str = updateSectionPackungen(m.getTitle(), m.getAtcCode(),
                                m_package_info, regnr_str, html_sanitized, mTyIndex_list);

                        m.setContent(mContent_str);

                        // Check if mPackSection_str is empty AND command line option PLAIN is not active
                        if (CmlOptions.PLAIN == false && m_pack_info_str.isEmpty()) {
                            errors++;
                            if (CmlOptions.GENERATE_REPORTS) {
                                parse_errors.append("<p style=\"color:#bb0000\">ERROR " + errors
                                        + ": SwissmedicNo5 not found in Packungen.xls (Swissmedic) - "
                                        + m.getTitle() + " (" + regnr_str + ")</p>");
                                // Add to owner errors
                                ArrayList<String> error = tm_owner_error.get(m.getAuthHolder());
                                if (error == null)
                                    error = new ArrayList<String>();
                                error.add(m.getTitle() + ";swissmedic5");
                                tm_owner_error.put(m.getAuthHolder(), error);
                            }
                            System.err.println(">> ERROR: " + tot_med_counter
                                    + " - SwissmedicNo5 not found in Swissmedic Packungen.xls - (" + regnr_str
                                    + ") " + m.getTitle());
                            missing_pack_info++;
                        }

                        // Fix problem with wrong div class in original Swissmedic file
                        if (CmlOptions.DB_LANGUAGE.equals("de")) {
                            m.setStyle(m.getStyle().replaceAll("untertitel", "untertitle"));
                            m.setStyle(m.getStyle().replaceAll("untertitel1", "untertitle1"));
                        }

                        // Correct formatting error introduced by Swissmedic
                        m.setAuthHolder(m.getAuthHolder().replaceAll("&#038;", "&"));

                        // Check if substances str has a '$a' and change it to '&alpha'
                        if (m.getSubstances() != null)
                            m.setSubstances(m.getSubstances().replaceAll("\\$a", "&alpha;"));

                        if (CmlOptions.XML_FILE == true) {
                            if (!regnr_str.isEmpty()) {
                                // Generate and add hash code 
                                String html_str_no_timestamp = mContent_str
                                        .replaceAll("<p class=\"footer\">.*?</p>", "");
                                String hash_code = html_utils.calcHashCode(html_str_no_timestamp);

                                // Add header to html file
                                mContent_str = mContent_str.replaceAll("<head>", "<head>"
                                        + "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\" name=\"fi_"
                                        + hash_code + "\"/>" + "<style>" + amiko_style_v1_str + "</style>");

                                // Note: the following line is not necessary!
                                // m.setContent(mContent_str);

                                // Add header to xml file
                                String xml_str = html_utils.convertHtmlToXml("fi", m.getTitle(), mContent_str,
                                        regnr_str);
                                xml_str = html_utils.addHeaderToXml("singlefi", xml_str);
                                fi_complete_xml += (xml_str + "\n");

                                // Write to html and xml files to disk
                                String name = m.getTitle();
                                // Replace all "Sonderzeichen"
                                name = name.replaceAll("[^a-zA-Z0-9]+", "_");
                                if (CmlOptions.DB_LANGUAGE.equals("de")) {
                                    FileOps.writeToFile(mContent_str,
                                            Constants.FI_FILE_XML_BASE + "fi_de_html/", name + "_fi_de.html");
                                    FileOps.writeToFile(xml_str, Constants.FI_FILE_XML_BASE + "fi_de_xml/",
                                            name + "_fi_de.xml");
                                } else if (CmlOptions.DB_LANGUAGE.equals("fr")) {
                                    FileOps.writeToFile(mContent_str,
                                            Constants.FI_FILE_XML_BASE + "fi_fr_html/", name + "_fi_fr.html");
                                    FileOps.writeToFile(xml_str, Constants.FI_FILE_XML_BASE + "fi_fr_xml/",
                                            name + "_fi_fr.xml");
                                }
                            }
                        }

                        int customer_id = 0;
                        // Is the customer paying? If yes add customer id
                        // str1.toLowerCase().contains(str2.toLowerCase())
                        if (m.getAuthHolder().toLowerCase().contains("desitin"))
                            customer_id = 1;
                        /*
                        / HERE GO THE OTHER PAYING CUSTOMERS (increment customer_id respectively)
                        */

                        // Extract (O)riginal / (G)enerika info
                        String orggen_str = "";
                        if (add_info_str != null) {
                            List<String> ai_list = Arrays.asList(add_info_str.split("\\s*;\\s*"));
                            if (ai_list != null) {
                                if (!ai_list.get(0).isEmpty())
                                    orggen_str = ai_list.get(0);
                            }
                        }

                        // @maxl: 25.04.2015 -> set orggen_str to nil (we are using add_info_str for group names now...)
                        orggen_str = "";

                        /*
                         * Add medis, titles and ids to database
                         */
                        String packages_str = "";
                        for (String s : m_list_of_packages)
                            packages_str += s;
                        String eancodes_str = "";
                        for (String e : m_list_of_eancodes)
                            eancodes_str += (e + ", ");
                        if (!eancodes_str.isEmpty() && eancodes_str.length() > 2)
                            eancodes_str = eancodes_str.substring(0, eancodes_str.length() - 2);

                        m_sql_db.addExpertDB(m, packages_str, regnr_str, ids_str, titles_str,
                                atc_description_str, atc_class_str, m_pack_info_str, orggen_str, customer_id,
                                mTyIndex_list, section_indications);
                        m_sql_db.addProductDB(m, packages_str, eancodes_str, m_pack_info_str);

                        med_counter++;
                    }
                }
                tot_med_counter++;
            }
        }
        System.out.println();
        System.out.println("--------------------------------------------");
        System.out.println("Total number of real Fachinfos: " + m_med_list.size());
        System.out.println("Number of FI with package information: " + tot_med_counter);
        System.out.println("Number of FI in generated database: " + med_counter);
        System.out.println("Number of errors in db: " + errors);
        System.out.println("Number of missing reg. nr. (min): " + missing_regnr_str);
        System.out.println("Number of missing pack info: " + missing_pack_info);
        System.out.println("Number of missing atc codes: " + missing_atc_code);
        System.out.println("--------------------------------------------");
        System.out.println("Total number of pseudo Fachinfos: " + tot_pseudo_counter);
        System.out.println("--------------------------------------------");

        if (CmlOptions.XML_FILE == true) {
            fi_complete_xml = html_utils.addHeaderToXml("kompendium", fi_complete_xml);
            // Write kompendium xml file to disk
            if (CmlOptions.DB_LANGUAGE.equals("de")) {
                FileOps.writeToFile(fi_complete_xml, Constants.FI_FILE_XML_BASE, "fi_de.xml");
                if (CmlOptions.ZIP_BIG_FILES)
                    FileOps.zipToFile(Constants.FI_FILE_XML_BASE, "fi_de.xml");
            } else if (CmlOptions.DB_LANGUAGE.equals("fr")) {
                FileOps.writeToFile(fi_complete_xml, Constants.FI_FILE_XML_BASE, "fi_fr.xml");
                if (CmlOptions.ZIP_BIG_FILES)
                    FileOps.zipToFile(Constants.FI_FILE_XML_BASE, "fi_fr.xml");
            }
            // Copy stylesheet file to ./fis/ folders
            try {
                File src = new File(Constants.FILE_STYLE_CSS_BASE + "v1.css");
                File dst_de = new File(Constants.FI_FILE_XML_BASE + "fi_de_html/");
                File dst_fr = new File(Constants.FI_FILE_XML_BASE + "fi_fr_html/");
                if (src.exists()) {
                    if (dst_de.exists())
                        FileUtils.copyFileToDirectory(src, dst_de);
                    if (dst_fr.exists())
                        FileUtils.copyFileToDirectory(src, dst_fr);
                }
            } catch (IOException e) {
                // TODO: Unhandled!
            }
        }

        if (CmlOptions.GENERATE_REPORTS == true) {
            parse_errors.append("<br/>");
            parse_errors
                    .append("<p>Number of medications with package information: " + tot_med_counter + "</p>");
            parse_errors.append("<p>Number of medications in generated database: " + med_counter + "</p>");
            parse_errors.append("<p>Number of errors in database: " + errors + "</p>");
            parse_errors.append("<p>Number of missing registration number: " + missing_regnr_str + "</p>");
            parse_errors.append("<p>Number of missing package info: " + missing_pack_info + "</p>");
            parse_errors.append("<p>Number of missing atc codes: " + missing_atc_code + "</p>");
            parse_errors.append("<br/>");
            // Write and close report file
            parse_errors.writeHtmlToFile();
            parse_errors.getBWriter().close();

            // Write owner error report to file
            ParseReport owner_errors = new ParseReport(Constants.FILE_OWNER_REPORT, CmlOptions.DB_LANGUAGE,
                    "html");
            String report_style_str = FileOps.readCSSfromFile(Constants.FILE_REPORT_CSS_BASE + ".css");
            owner_errors.addStyleSheet(report_style_str);
            if (CmlOptions.DB_LANGUAGE.equals("de"))
                owner_errors.addHtmlHeader("Schweizer Arzneimittel-Kompendium", Constants.FI_DB_VERSION);
            else if (CmlOptions.DB_LANGUAGE.equals("fr"))
                owner_errors.addHtmlHeader("Compendium des Mdicaments Suisse", Constants.FI_DB_VERSION);
            owner_errors.append(owner_errors.treemapToHtmlTable(tm_owner_error));
            owner_errors.writeHtmlToFile();
            owner_errors.getBWriter().close();
            // Dump to console...
            /*
            for (Map.Entry<String, ArrayList<String>> entry : tm_owner_error.entrySet()) {
               String author = entry.getKey();
               ArrayList<String> list = entry.getValue();
               for (String error : list)
                  System.out.println(author + " -> " + error);
            }
            */
        }

        if (CmlOptions.INDICATIONS_REPORT == true) {
            // Dump everything to file
            bw_indications.write("Total number of words: " + tm_indications.size() + "\n\n");
            for (Map.Entry<String, String> entry : tm_indications.entrySet()) {
                String key = entry.getKey();
                String value = entry.getValue();
                bw_indications.write(key + " [" + value + "]\n");
            }
            bw_indications.close();
        }

        if (CmlOptions.DB_LANGUAGE.equals("de")) {
            // Dump set to file, currently we do this only for German
            File atccodes_file = new File("./output/atc_codes_used_set.txt");
            if (!atccodes_file.exists()) {
                atccodes_file.getParentFile().mkdirs();
                atccodes_file.createNewFile();
            }
            FileWriter fwriter = new FileWriter(atccodes_file.getAbsoluteFile());
            BufferedWriter bwriter = new BufferedWriter(fwriter);

            Iterator<String> set_iterator = atccode_set.iterator();
            while (set_iterator.hasNext()) {
                bwriter.write(set_iterator.next() + "\n");
            }
            bwriter.close();
        }

        System.out.println("");

    } catch (IOException e) {
        e.printStackTrace();
    }
}