Example usage for org.jsoup.nodes Element text

List of usage examples for org.jsoup.nodes Element text

Introduction

In this page you can find the example usage for org.jsoup.nodes Element text.

Prototype

public Element text(String text) 

Source Link

Document

Set the text of this element.

Usage

From source file:DataCrawler.OpenAIRE.XMLGenerator.java

public static void main(String[] args) {
    String text = "";

    try {/*from   ww w .j a v a 2 s  . c  om*/
        if (args.length < 4) {
            System.out.println("<command> template_file csv_file output_dir log_file [start_id]");
        }

        // InputStream fis = new FileInputStream("E:/Downloads/result-r-00000");
        InputStream fis = new FileInputStream(args[1]);
        BufferedReader br = new BufferedReader(new InputStreamReader(fis, Charset.forName("UTF-8")));

        // String content = new String(Files.readAllBytes(Paths.get("publications_template.xml")));
        String content = new String(Files.readAllBytes(Paths.get(args[0])));
        Document doc = Jsoup.parse(content, "UTF-8", Parser.xmlParser());
        // String outputDirectory = "G:/";
        String outputDirectory = args[2];
        // PrintWriter logWriter = new PrintWriter(new FileOutputStream("publication.log",false));
        PrintWriter logWriter = new PrintWriter(new FileOutputStream(args[3], false));
        Element objectId = null, title = null, publisher = null, dateofacceptance = null, bestlicense = null,
                resulttype = null, originalId = null, originalId2 = null;
        boolean start = true;
        // String startID = "dedup_wf_001::207a098867b64f3b5af505fa3aeecd24";
        String startID = "";
        if (args.length >= 5) {
            start = false;
            startID = args[4];
        }
        String previousText = "";
        while ((text = br.readLine()) != null) {
            /*  For publications:
                0. dri:objIdentifier context
               9. title context
               12. publisher context
               18. dateofacceptance
               19. bestlicense @classname
               21. resulttype  @classname
               26. originalId context  
               (Notice that the prefix is null and will use space to separate two different "originalId")
            */

            if (!previousText.isEmpty()) {
                text = previousText + text;
                start = true;
                previousText = "";
            }

            String[] items = text.split("!");
            for (int i = 0; i < items.length; ++i) {
                items[i] = StringUtils.strip(items[i], "#");
            }
            if (objectId == null)
                objectId = doc.getElementsByTag("dri:objIdentifier").first();
            objectId.text(items[0]);

            if (!start && items[0].equals(startID)) {
                start = true;
            }

            if (title == null)
                title = doc.getElementsByTag("title").first();
            title.text(items[9]);

            if (publisher == null)
                publisher = doc.getElementsByTag("publisher").first();

            if (items.length < 12) {
                previousText = text;
                continue;
            }
            publisher.text(items[12]);

            if (dateofacceptance == null)
                dateofacceptance = doc.getElementsByTag("dateofacceptance").first();
            dateofacceptance.text(items[18]);

            if (bestlicense == null)
                bestlicense = doc.getElementsByTag("bestlicense").first();
            bestlicense.attr("classname", items[19]);

            if (resulttype == null)
                resulttype = doc.getElementsByTag("resulttype").first();
            resulttype.attr("classname", items[21]);

            if (originalId == null || originalId2 == null) {
                Elements elements = doc.getElementsByTag("originalId");
                String[] context = items[26].split(" ");
                if (elements.size() > 0) {
                    if (elements.size() >= 1) {
                        originalId = elements.get(0);
                        if (context.length >= 1) {
                            int indexOfnull = context[0].trim().indexOf("null");
                            String value = "";
                            if (indexOfnull != -1) {
                                if (context[0].trim().length() >= (indexOfnull + 5))
                                    value = context[0].trim().substring(indexOfnull + 5);

                            } else {
                                value = context[0].trim();
                            }
                            originalId.text(value);
                        }
                    }
                    if (elements.size() >= 2) {
                        originalId2 = elements.get(1);
                        if (context.length >= 2) {
                            int indexOfnull = context[1].trim().indexOf("null");
                            String value = "";
                            if (indexOfnull != -1) {
                                if (context[1].trim().length() >= (indexOfnull + 5))
                                    value = context[1].trim().substring(indexOfnull + 5);

                            } else {
                                value = context[1].trim();
                            }
                            originalId2.text(value);
                        }
                    }
                }
            } else {
                String[] context = items[26].split(" ");
                if (context.length >= 1) {
                    int indexOfnull = context[0].trim().indexOf("null");
                    String value = "";
                    if (indexOfnull != -1) {
                        if (context[0].trim().length() >= (indexOfnull + 5))
                            value = context[0].trim().substring(indexOfnull + 5);

                    } else {
                        value = context[0].trim();
                    }
                    originalId.text(value);
                }
                if (context.length >= 2) {
                    int indexOfnull = context[1].trim().indexOf("null");
                    String value = "";
                    if (indexOfnull != -1) {
                        if (context[1].trim().length() >= (indexOfnull + 5))
                            value = context[1].trim().substring(indexOfnull + 5);

                    } else {
                        value = context[1].trim();
                    }
                    originalId2.text(value);
                }
            }
            if (start) {
                String filePath = outputDirectory + items[0].replace(":", "#") + ".xml";
                PrintWriter writer = new PrintWriter(new FileOutputStream(filePath, false));
                logWriter.write(filePath + " > Start" + System.lineSeparator());
                writer.write("<?xml version=\"1.0\" encoding=\"UTF-8\"?>" + System.lineSeparator());
                writer.write(doc.getElementsByTag("response").first().toString());
                writer.close();
                logWriter.write(filePath + " > OK" + System.lineSeparator());
                logWriter.flush();
            }

        }
        logWriter.close();
    } catch (Exception e) {
        e.printStackTrace();
    }
}

From source file:com.astamuse.asta4d.web.form.field.SimpleFormFieldValueRenderer.java

protected Element createAlternativeDisplayElement(String nonNullString) {
    Element span = new Element(Tag.valueOf("span"), "");
    span.text(nonNullString);
    return span;//from  w  ww  .ja v  a  2 s .co m
}

From source file:blackman.matt.board.Post.java

/**
 * Formats the HTML on the post text to accurately display it on the post.
 *
 * @param post The unformatted text of the post.
 * @return A formatted version of the post.
 *///from  w  w  w  .  java  2  s. c  o m
private String formatPostBody(String post) {
    Document formattedText = Jsoup.parse(post);
    Pattern p = Pattern.compile("^/.*/index\\.html");

    // Red Text
    Elements redTexts = formattedText.getElementsByClass("heading");
    for (Element text : redTexts) {
        text.wrap("<font color=\"#AF0A0F\"><strong></strong></font>");
    }

    // Green text
    Elements greenTexts = formattedText.getElementsByClass("quote");
    for (Element text : greenTexts) {
        text.wrap("<font color=\"#789922\"></font>");
    }

    // Board Links
    Elements boardLinks = formattedText.select("a");
    for (Element link : boardLinks) {
        String url = link.attr("href");
        Matcher m = p.matcher(url);
        if (m.matches()) {
            link.attr("href", "http://8chan.co" + url);
        }
    }

    // Reply links
    Elements replyLinks = formattedText.select("a[onclick^=highlightReply");
    for (Element reply : replyLinks) {
        repliedTo.add(reply.attr("href").split("#")[1]);
        boardLinks.attr("href", "http://8chan.co" + reply.attr("href"));
    }

    // Post too long text removal
    Elements tooLongs = formattedText.getElementsByClass("toolong");
    for (Element text : tooLongs) {
        text.text("");
    }

    return formattedText.toString();
}

From source file:com.semfapp.adamdilger.semf.NonConformanceActivity.java

public void createPdf() {
    Document document = Pdf.getTemplate(getApplicationContext(), data.getJobNumber());

    try {/*w w  w .j a  v  a  2  s  . com*/
        Document body = Jsoup.parse(getAssets().open("nonConformance.html"), "utf-8", "http://www.example.com");

        Element site = body.getElementById("site");
        Element siteLocation = body.getElementById("site_location");
        Element recipient = body.getElementById("recipient");
        Element recipientEmail = body.getElementById("recipient_email");
        Element description = body.getElementById("description_list");
        Element actions = body.getElementById("actions_list");

        String[] descriptionArray, actionsArray;
        descriptionArray = data.getDescription().split(System.lineSeparator());
        actionsArray = data.getActions().split(System.lineSeparator());

        site.text(data.getSite());
        siteLocation.text(data.getLocation());
        recipient.html("<p>" + data.getRecipient() + "</p>");
        recipientEmail.html("<p>" + data.getRecipientEmail() + "</p>");

        //add each bullet from arrays as a <p>
        for (int x = 0; x < descriptionArray.length; x++) {
            String f = "";

            for (String bullet : descriptionArray) {
                f += "<p>" + bullet + "</p>";
            }

            description.html(f);
        }

        for (int x = 0; x < actionsArray.length; x++) {
            String f = "";

            for (String bullet : actionsArray) {
                f += "<p>" + bullet + "</p>";
            }

            actions.html(f);
        }

        document.getElementById("main").html(body.html());

    } catch (Exception e) {
        System.out.println("ERROR: " + e.toString());
    }

    String filePath = MainActivity.pdf.createFilePath(this, "Non Conformance");

    MainActivity.pdf.createPdfToFile(this, document.html(), filePath, null);

    pdfAttatchment = new File(filePath);
}

From source file:com.semfapp.adamdilger.semf.SiteInstructionActivity.java

public void createPdf() {
    Document documentTemplate = Pdf.getTemplate(getApplicationContext(), data.getJobNumber());
    try {/*from   www  .j  a  v  a2 s .  com*/
        Document body = Jsoup.parse(getAssets().open("siteInstruction.html"), "utf-8",
                "http://www.example.com");

        Element site = body.getElementById("site");
        Element siteLocation = body.getElementById("site_location");
        Element recipient = body.getElementById("recipient");
        Element recipientEmail = body.getElementById("recipient_email");
        Element description = body.getElementById("description_list");

        String[] descriptionArray;
        descriptionArray = data.getDescription().split(System.lineSeparator());

        site.text(data.getSite());
        siteLocation.text(data.getLocation());
        recipient.html("<p>" + data.getRecipient() + "</p>");
        recipientEmail.html("<p>" + data.getRecipientEmail() + "</p>");

        //add each bullet from arrays as a <p>
        for (int x = 0; x < descriptionArray.length; x++) {
            String f = "";

            for (String bullet : descriptionArray) {
                f += "<p>" + bullet + "</p>";
            }

            description.html(f);
        }

        documentTemplate.getElementById("main").html(body.html());
    } catch (Exception e) {
        System.out.println("ERROR: " + e.toString());
    }

    name = Emailer.getSubject(Emailer.SITE_INSTRUCTION_CODE, data.getJobNumber());
    String filePath = MainActivity.pdf.createFilePath(this, name);

    MainActivity.pdf.createPdfToFile(this, documentTemplate.html(), filePath, data.getImageArray());

    pdfAttatchment = new File(filePath);
}

From source file:org.asqatasun.rules.doc.utils.rga33.extractor.Rgaa3Extractor.java

private static void createTestcaseFiles() throws IOException {
    File srcDir = new File(RGAA3_TESTCASE_PATH);
    for (File file : srcDir.listFiles()) {
        String fileName = file.getName().replace("Rgaa30Rule", "").replace(".java", "");
        String theme = fileName.substring(0, 2);
        String crit = fileName.substring(2, 4);
        String test = fileName.substring(4, 6);
        String testKey = Integer.valueOf(theme).toString() + "-" + Integer.valueOf(crit).toString() + "-"
                + Integer.valueOf(test).toString();
        String wrongKey = theme + "." + crit + "." + test;
        for (File testcase : file.listFiles()) {
            if (testcase.isFile() && testcase.getName().contains("html")) {
                Document doc = Jsoup.parse(FileUtils.readFileToString(testcase));
                Element detail = doc.select(".test-detail").first();
                if (detail == null) {
                    System.out.println(doc.outerHtml());
                } else {
                    detail.tagName("div");
                    detail.text("");
                    for (Element el : detail.children()) {
                        el.remove();//  ww w. j  a va  2 s. com
                    }
                    if (!detail.hasAttr("lang")) {
                        detail.attr("lang", "fr");
                    }
                    detail.append("\n" + RGAA3.get(testKey).ruleRawHtml + "\n");
                    doc.outputSettings().escapeMode(Entities.EscapeMode.xhtml);
                    doc.outputSettings().outline(false);
                    doc.outputSettings().indentAmount(4);
                    String outputHtml = doc.outerHtml();
                    if (outputHtml.contains(wrongKey)) {
                        outputHtml = outputHtml.replaceAll(wrongKey, RGAA3.get(testKey).getRuleDot());
                    }
                    FileUtils.writeStringToFile(testcase, outputHtml);
                }
            }
        }
    }
}

From source file:org.craftercms.social.migration.controllers.MainController.java

protected void getHtml(final FileWriter writer) throws TransformerException, IOException {
    final URL in = getClass().getResource(
            MigrationTool.systemProperties.getString("crafter" + ".migration" + "" + ".loggerTemplate"));
    if (in == null) {
        log.error("Unable to find {} "
                + MigrationTool.systemProperties.getString("crafter" + ".migration" + "" + ".loggerTemplate"));
    }/*from w ww  . java 2 s . c  o m*/
    final Document loggingDoc = Jsoup.parse(IOUtils.toString(in));
    final Element logs = loggingDoc.getElementById("logs");
    for (Object o : logTable.getItems()) {
        if (o instanceof UserLogEntry) {
            UserLogEntry userLogEntry = (UserLogEntry) o;
            String dateFormat = new SimpleDateFormat("yyyy MM dd hh:mm:ss zzz").format(userLogEntry.getDate());
            final Element tr = loggingDoc.createElement("tr");
            tr.attr("class", userLogEntry.getLevel().getCssClass());
            final Element tmigrator = loggingDoc.createElement("td");
            final Element tdate = loggingDoc.createElement("td");
            final Element tmessage = loggingDoc.createElement("td");
            tmessage.attr("class", "text-center");
            tmessage.text(userLogEntry.getMessage());
            tdate.text(dateFormat);
            tmigrator.text(userLogEntry.getSource());
            tr.appendChild(tmigrator);
            tr.appendChild(tdate);
            tr.appendChild(tmessage);
            logs.appendChild(tr);
        }
    }
    IOUtils.write(loggingDoc.toString(), writer);
    //        Transformer transformer = TransformerFactory.newInstance().newTransformer();
    //        transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "no");
    //        transformer.setOutputProperty(OutputKeys.METHOD, "xml");
    //        transformer.setOutputProperty(OutputKeys.INDENT, "yes");
    //        transformer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
    //        transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "4");
    //        transformer.transform(new DOMSource(loggingDoc), new StreamResult(writer));
    writer.flush();
    writer.close();
}