List of usage examples for org.jsoup.nodes Element text
public Element text(String text)
From source file:DataCrawler.OpenAIRE.XMLGenerator.java
public static void main(String[] args) { String text = ""; try {/*from ww w .j a v a 2 s . c om*/ if (args.length < 4) { System.out.println("<command> template_file csv_file output_dir log_file [start_id]"); } // InputStream fis = new FileInputStream("E:/Downloads/result-r-00000"); InputStream fis = new FileInputStream(args[1]); BufferedReader br = new BufferedReader(new InputStreamReader(fis, Charset.forName("UTF-8"))); // String content = new String(Files.readAllBytes(Paths.get("publications_template.xml"))); String content = new String(Files.readAllBytes(Paths.get(args[0]))); Document doc = Jsoup.parse(content, "UTF-8", Parser.xmlParser()); // String outputDirectory = "G:/"; String outputDirectory = args[2]; // PrintWriter logWriter = new PrintWriter(new FileOutputStream("publication.log",false)); PrintWriter logWriter = new PrintWriter(new FileOutputStream(args[3], false)); Element objectId = null, title = null, publisher = null, dateofacceptance = null, bestlicense = null, resulttype = null, originalId = null, originalId2 = null; boolean start = true; // String startID = "dedup_wf_001::207a098867b64f3b5af505fa3aeecd24"; String startID = ""; if (args.length >= 5) { start = false; startID = args[4]; } String previousText = ""; while ((text = br.readLine()) != null) { /* For publications: 0. dri:objIdentifier context 9. title context 12. publisher context 18. dateofacceptance 19. bestlicense @classname 21. resulttype @classname 26. originalId context (Notice that the prefix is null and will use space to separate two different "originalId") */ if (!previousText.isEmpty()) { text = previousText + text; start = true; previousText = ""; } String[] items = text.split("!"); for (int i = 0; i < items.length; ++i) { items[i] = StringUtils.strip(items[i], "#"); } if (objectId == null) objectId = doc.getElementsByTag("dri:objIdentifier").first(); objectId.text(items[0]); if (!start && items[0].equals(startID)) { start = true; } if (title == null) title = doc.getElementsByTag("title").first(); title.text(items[9]); if (publisher == null) publisher = doc.getElementsByTag("publisher").first(); if (items.length < 12) { previousText = text; continue; } publisher.text(items[12]); if (dateofacceptance == null) dateofacceptance = doc.getElementsByTag("dateofacceptance").first(); dateofacceptance.text(items[18]); if (bestlicense == null) bestlicense = doc.getElementsByTag("bestlicense").first(); bestlicense.attr("classname", items[19]); if (resulttype == null) resulttype = doc.getElementsByTag("resulttype").first(); resulttype.attr("classname", items[21]); if (originalId == null || originalId2 == null) { Elements elements = doc.getElementsByTag("originalId"); String[] context = items[26].split(" "); if (elements.size() > 0) { if (elements.size() >= 1) { originalId = elements.get(0); if (context.length >= 1) { int indexOfnull = context[0].trim().indexOf("null"); String value = ""; if (indexOfnull != -1) { if (context[0].trim().length() >= (indexOfnull + 5)) value = context[0].trim().substring(indexOfnull + 5); } else { value = context[0].trim(); } originalId.text(value); } } if (elements.size() >= 2) { originalId2 = elements.get(1); if (context.length >= 2) { int indexOfnull = context[1].trim().indexOf("null"); String value = ""; if (indexOfnull != -1) { if (context[1].trim().length() >= (indexOfnull + 5)) value = context[1].trim().substring(indexOfnull + 5); } else { value = context[1].trim(); } originalId2.text(value); } } } } else { String[] context = items[26].split(" "); if (context.length >= 1) { int indexOfnull = context[0].trim().indexOf("null"); String value = ""; if (indexOfnull != -1) { if (context[0].trim().length() >= (indexOfnull + 5)) value = context[0].trim().substring(indexOfnull + 5); } else { value = context[0].trim(); } originalId.text(value); } if (context.length >= 2) { int indexOfnull = context[1].trim().indexOf("null"); String value = ""; if (indexOfnull != -1) { if (context[1].trim().length() >= (indexOfnull + 5)) value = context[1].trim().substring(indexOfnull + 5); } else { value = context[1].trim(); } originalId2.text(value); } } if (start) { String filePath = outputDirectory + items[0].replace(":", "#") + ".xml"; PrintWriter writer = new PrintWriter(new FileOutputStream(filePath, false)); logWriter.write(filePath + " > Start" + System.lineSeparator()); writer.write("<?xml version=\"1.0\" encoding=\"UTF-8\"?>" + System.lineSeparator()); writer.write(doc.getElementsByTag("response").first().toString()); writer.close(); logWriter.write(filePath + " > OK" + System.lineSeparator()); logWriter.flush(); } } logWriter.close(); } catch (Exception e) { e.printStackTrace(); } }
From source file:com.astamuse.asta4d.web.form.field.SimpleFormFieldValueRenderer.java
protected Element createAlternativeDisplayElement(String nonNullString) { Element span = new Element(Tag.valueOf("span"), ""); span.text(nonNullString); return span;//from w ww .ja v a 2 s .co m }
From source file:blackman.matt.board.Post.java
/** * Formats the HTML on the post text to accurately display it on the post. * * @param post The unformatted text of the post. * @return A formatted version of the post. *///from w w w . java 2 s. c o m private String formatPostBody(String post) { Document formattedText = Jsoup.parse(post); Pattern p = Pattern.compile("^/.*/index\\.html"); // Red Text Elements redTexts = formattedText.getElementsByClass("heading"); for (Element text : redTexts) { text.wrap("<font color=\"#AF0A0F\"><strong></strong></font>"); } // Green text Elements greenTexts = formattedText.getElementsByClass("quote"); for (Element text : greenTexts) { text.wrap("<font color=\"#789922\"></font>"); } // Board Links Elements boardLinks = formattedText.select("a"); for (Element link : boardLinks) { String url = link.attr("href"); Matcher m = p.matcher(url); if (m.matches()) { link.attr("href", "http://8chan.co" + url); } } // Reply links Elements replyLinks = formattedText.select("a[onclick^=highlightReply"); for (Element reply : replyLinks) { repliedTo.add(reply.attr("href").split("#")[1]); boardLinks.attr("href", "http://8chan.co" + reply.attr("href")); } // Post too long text removal Elements tooLongs = formattedText.getElementsByClass("toolong"); for (Element text : tooLongs) { text.text(""); } return formattedText.toString(); }
From source file:com.semfapp.adamdilger.semf.NonConformanceActivity.java
public void createPdf() { Document document = Pdf.getTemplate(getApplicationContext(), data.getJobNumber()); try {/*w w w .j a v a 2 s . com*/ Document body = Jsoup.parse(getAssets().open("nonConformance.html"), "utf-8", "http://www.example.com"); Element site = body.getElementById("site"); Element siteLocation = body.getElementById("site_location"); Element recipient = body.getElementById("recipient"); Element recipientEmail = body.getElementById("recipient_email"); Element description = body.getElementById("description_list"); Element actions = body.getElementById("actions_list"); String[] descriptionArray, actionsArray; descriptionArray = data.getDescription().split(System.lineSeparator()); actionsArray = data.getActions().split(System.lineSeparator()); site.text(data.getSite()); siteLocation.text(data.getLocation()); recipient.html("<p>" + data.getRecipient() + "</p>"); recipientEmail.html("<p>" + data.getRecipientEmail() + "</p>"); //add each bullet from arrays as a <p> for (int x = 0; x < descriptionArray.length; x++) { String f = ""; for (String bullet : descriptionArray) { f += "<p>" + bullet + "</p>"; } description.html(f); } for (int x = 0; x < actionsArray.length; x++) { String f = ""; for (String bullet : actionsArray) { f += "<p>" + bullet + "</p>"; } actions.html(f); } document.getElementById("main").html(body.html()); } catch (Exception e) { System.out.println("ERROR: " + e.toString()); } String filePath = MainActivity.pdf.createFilePath(this, "Non Conformance"); MainActivity.pdf.createPdfToFile(this, document.html(), filePath, null); pdfAttatchment = new File(filePath); }
From source file:com.semfapp.adamdilger.semf.SiteInstructionActivity.java
public void createPdf() { Document documentTemplate = Pdf.getTemplate(getApplicationContext(), data.getJobNumber()); try {/*from www .j a v a2 s . com*/ Document body = Jsoup.parse(getAssets().open("siteInstruction.html"), "utf-8", "http://www.example.com"); Element site = body.getElementById("site"); Element siteLocation = body.getElementById("site_location"); Element recipient = body.getElementById("recipient"); Element recipientEmail = body.getElementById("recipient_email"); Element description = body.getElementById("description_list"); String[] descriptionArray; descriptionArray = data.getDescription().split(System.lineSeparator()); site.text(data.getSite()); siteLocation.text(data.getLocation()); recipient.html("<p>" + data.getRecipient() + "</p>"); recipientEmail.html("<p>" + data.getRecipientEmail() + "</p>"); //add each bullet from arrays as a <p> for (int x = 0; x < descriptionArray.length; x++) { String f = ""; for (String bullet : descriptionArray) { f += "<p>" + bullet + "</p>"; } description.html(f); } documentTemplate.getElementById("main").html(body.html()); } catch (Exception e) { System.out.println("ERROR: " + e.toString()); } name = Emailer.getSubject(Emailer.SITE_INSTRUCTION_CODE, data.getJobNumber()); String filePath = MainActivity.pdf.createFilePath(this, name); MainActivity.pdf.createPdfToFile(this, documentTemplate.html(), filePath, data.getImageArray()); pdfAttatchment = new File(filePath); }
From source file:org.asqatasun.rules.doc.utils.rga33.extractor.Rgaa3Extractor.java
private static void createTestcaseFiles() throws IOException { File srcDir = new File(RGAA3_TESTCASE_PATH); for (File file : srcDir.listFiles()) { String fileName = file.getName().replace("Rgaa30Rule", "").replace(".java", ""); String theme = fileName.substring(0, 2); String crit = fileName.substring(2, 4); String test = fileName.substring(4, 6); String testKey = Integer.valueOf(theme).toString() + "-" + Integer.valueOf(crit).toString() + "-" + Integer.valueOf(test).toString(); String wrongKey = theme + "." + crit + "." + test; for (File testcase : file.listFiles()) { if (testcase.isFile() && testcase.getName().contains("html")) { Document doc = Jsoup.parse(FileUtils.readFileToString(testcase)); Element detail = doc.select(".test-detail").first(); if (detail == null) { System.out.println(doc.outerHtml()); } else { detail.tagName("div"); detail.text(""); for (Element el : detail.children()) { el.remove();// ww w. j a va 2 s. com } if (!detail.hasAttr("lang")) { detail.attr("lang", "fr"); } detail.append("\n" + RGAA3.get(testKey).ruleRawHtml + "\n"); doc.outputSettings().escapeMode(Entities.EscapeMode.xhtml); doc.outputSettings().outline(false); doc.outputSettings().indentAmount(4); String outputHtml = doc.outerHtml(); if (outputHtml.contains(wrongKey)) { outputHtml = outputHtml.replaceAll(wrongKey, RGAA3.get(testKey).getRuleDot()); } FileUtils.writeStringToFile(testcase, outputHtml); } } } } }
From source file:org.craftercms.social.migration.controllers.MainController.java
protected void getHtml(final FileWriter writer) throws TransformerException, IOException { final URL in = getClass().getResource( MigrationTool.systemProperties.getString("crafter" + ".migration" + "" + ".loggerTemplate")); if (in == null) { log.error("Unable to find {} " + MigrationTool.systemProperties.getString("crafter" + ".migration" + "" + ".loggerTemplate")); }/*from w ww . java 2 s . c o m*/ final Document loggingDoc = Jsoup.parse(IOUtils.toString(in)); final Element logs = loggingDoc.getElementById("logs"); for (Object o : logTable.getItems()) { if (o instanceof UserLogEntry) { UserLogEntry userLogEntry = (UserLogEntry) o; String dateFormat = new SimpleDateFormat("yyyy MM dd hh:mm:ss zzz").format(userLogEntry.getDate()); final Element tr = loggingDoc.createElement("tr"); tr.attr("class", userLogEntry.getLevel().getCssClass()); final Element tmigrator = loggingDoc.createElement("td"); final Element tdate = loggingDoc.createElement("td"); final Element tmessage = loggingDoc.createElement("td"); tmessage.attr("class", "text-center"); tmessage.text(userLogEntry.getMessage()); tdate.text(dateFormat); tmigrator.text(userLogEntry.getSource()); tr.appendChild(tmigrator); tr.appendChild(tdate); tr.appendChild(tmessage); logs.appendChild(tr); } } IOUtils.write(loggingDoc.toString(), writer); // Transformer transformer = TransformerFactory.newInstance().newTransformer(); // transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "no"); // transformer.setOutputProperty(OutputKeys.METHOD, "xml"); // transformer.setOutputProperty(OutputKeys.INDENT, "yes"); // transformer.setOutputProperty(OutputKeys.ENCODING, "UTF-8"); // transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "4"); // transformer.transform(new DOMSource(loggingDoc), new StreamResult(writer)); writer.flush(); writer.close(); }