List of usage examples for org.jsoup.nodes Element remove
public void remove()
From source file:org.brnvrn.Main.java
/** * Parse a tr HTML element describing the tool * @param tool is to be updated//from ww w. j a v a 2 s . c om * @param tr brings the data * @return true if successful */ private static boolean parseTrTool(Tool tool, Element tr) { boolean success = true; Element nameLink = tr.select("td:eq(0)").first(); if (nameLink == null) return false; tool.setName(nameLink.text()); tool.setUrl(nameLink.getElementsByTag("a").attr("href")); tool.setLicense(tr.select("td:eq(2)").first().text()); tool.setCompatibility(tr.select("td:eq(3)").first().text()); // More complicated: We will extract and remove known nodes, the rest will be description Element tdDescription = tr.select("td:eq(1)").first(); Elements smalls = tdDescription.getElementsByTag("small"); for (Element small : smalls) { Element author = small.getElementsContainingText("Author").first(); if (author != null) { String authorsString = author.text(); authorsString = authorsString.substring(authorsString.indexOf(":") + 1); tool.addAuthor(authorsString.split(",")); small.remove(); } Element sourceCode = small.getElementsContainingText("ource").last(); if (sourceCode != null) { tool.setUrl_src(sourceCode.attr("href")); small.remove(); } } tdDescription.getElementsByTag("br").remove(); tool.setDescription(Jsoup.clean(tdDescription.html(), Whitelist.relaxed())); // ownText will miss the contained links in the description tool.setDescriptionText(tdDescription.text()); bestEffortThemeLanguage(tool); return success; }
From source file:com.switchfly.inputvalidation.sanitizer.StripHtmlSanitizer.java
@Override public String execute(String content) { if (StringUtils.isBlank(content)) { return content; }/*from w ww . ja va 2s . co m*/ Document document = Jsoup.parse(content); document.outputSettings().escapeMode(Entities.EscapeMode.xhtml); for (Element element : document.select("script,link,iframe,style")) { element.remove(); } return document.text(); }
From source file:com.astamuse.asta4d.render.RenderUtil.java
/** * Find out all the snippet in the passed Document and execute them. The Containing embed tag of the passed Document will be exactly * mixed in here too. <br>//from www . ja v a 2s . c o m * Recursively contained snippets will be executed from outside to inside, thus the inner snippets will not be executed until all of * their outer snippets are finished. Also, the dynamically created snippets and embed tags will comply with this rule too. * * @param doc * the Document to apply snippets * @throws SnippetNotResovlableException * @throws SnippetInvokeException * @throws TemplateException */ public final static void applySnippets(Document doc) throws SnippetNotResovlableException, SnippetInvokeException, TemplateException, TemplateNotFoundException { if (doc == null) { return; } applyClearAction(doc, false); // retrieve ready snippets String selector = SelectorUtil.attr(ExtNodeConstants.SNIPPET_NODE_TAG_SELECTOR, ExtNodeConstants.SNIPPET_NODE_ATTR_STATUS, ExtNodeConstants.SNIPPET_NODE_ATTR_STATUS_READY); List<Element> snippetList = new ArrayList<>(doc.select(selector)); int readySnippetCount = snippetList.size(); int blockedSnippetCount = 0; for (int i = readySnippetCount - 1; i >= 0; i--) { // if parent snippet has not been executed, the current snippet will // not be executed too. if (isBlockedByParentSnippet(doc, snippetList.get(i))) { snippetList.remove(i); blockedSnippetCount++; } } readySnippetCount = readySnippetCount - blockedSnippetCount; String renderDeclaration; Renderer renderer; Context context = Context.getCurrentThreadContext(); Configuration conf = Configuration.getConfiguration(); final SnippetInvoker invoker = conf.getSnippetInvoker(); String refId; String currentTemplatePath; Element renderTarget; for (Element element : snippetList) { if (!conf.isSkipSnippetExecution()) { // for a faked snippet node which is created by template // analyzing process, the render target element should be its // child. if (element.attr(ExtNodeConstants.SNIPPET_NODE_ATTR_TYPE) .equals(ExtNodeConstants.SNIPPET_NODE_ATTR_TYPE_FAKE)) { renderTarget = element.children().first(); // the hosting element of this faked snippet has been removed by outer a snippet if (renderTarget == null) { element.attr(ExtNodeConstants.SNIPPET_NODE_ATTR_STATUS, ExtNodeConstants.SNIPPET_NODE_ATTR_STATUS_FINISHED); continue; } } else { renderTarget = element; } // we have to reset the ref of current snippet at every time to make sure the ref is always unique(duplicated snippet ref // could be created by list rendering) TemplateUtil.resetSnippetRefs(element); context.setCurrentRenderingElement(renderTarget); renderDeclaration = element.attr(ExtNodeConstants.SNIPPET_NODE_ATTR_RENDER); refId = element.attr(ExtNodeConstants.ATTR_SNIPPET_REF); currentTemplatePath = element.attr(ExtNodeConstants.ATTR_TEMPLATE_PATH); context.setCurrentRenderingElement(renderTarget); context.setData(TRACE_VAR_TEMPLATE_PATH, currentTemplatePath); try { if (element.hasAttr(ExtNodeConstants.SNIPPET_NODE_ATTR_PARALLEL)) { ConcurrentRenderHelper crHelper = ConcurrentRenderHelper.getInstance(context, doc); final Context newContext = context.clone(); final String declaration = renderDeclaration; crHelper.submitWithContext(newContext, declaration, refId, new Callable<Renderer>() { @Override public Renderer call() throws Exception { return invoker.invoke(declaration); } }); element.attr(ExtNodeConstants.SNIPPET_NODE_ATTR_STATUS, ExtNodeConstants.SNIPPET_NODE_ATTR_STATUS_WAITING); } else { renderer = invoker.invoke(renderDeclaration); applySnippetResultToElement(doc, refId, element, renderTarget, renderer); } } catch (SnippetNotResovlableException | SnippetInvokeException e) { throw e; } catch (Exception e) { SnippetInvokeException se = new SnippetInvokeException( "Error occured when executing rendering on [" + renderDeclaration + "]:" + e.getMessage(), e); throw se; } context.setData(TRACE_VAR_TEMPLATE_PATH, null); context.setCurrentRenderingElement(null); } else {// if skip snippet element.attr(ExtNodeConstants.SNIPPET_NODE_ATTR_STATUS, ExtNodeConstants.SNIPPET_NODE_ATTR_STATUS_FINISHED); } } // load embed nodes which blocking parents has finished List<Element> embedNodeList = doc.select(ExtNodeConstants.EMBED_NODE_TAG_SELECTOR); int embedNodeListCount = embedNodeList.size(); Iterator<Element> embedNodeIterator = embedNodeList.iterator(); Element embed; Element embedContent; while (embedNodeIterator.hasNext()) { embed = embedNodeIterator.next(); if (isBlockedByParentSnippet(doc, embed)) { embedNodeListCount--; continue; } embedContent = TemplateUtil.getEmbedNodeContent(embed); TemplateUtil.mergeBlock(doc, embedContent); embed.before(embedContent); embed.remove(); } if ((readySnippetCount + embedNodeListCount) > 0) { TemplateUtil.regulateElement(null, doc); applySnippets(doc); } else { ConcurrentRenderHelper crHelper = ConcurrentRenderHelper.getInstance(context, doc); String delcaration = null; if (crHelper.hasUnCompletedTask()) { delcaration = null; try { FutureRendererHolder holder = crHelper.take(); delcaration = holder.getRenderDeclaration(); String ref = holder.getSnippetRefId(); String reSelector = SelectorUtil.attr(ExtNodeConstants.SNIPPET_NODE_TAG_SELECTOR, ExtNodeConstants.ATTR_SNIPPET_REF, ref); Element element = doc.select(reSelector).get(0);// must have Element target; if (element.attr(ExtNodeConstants.SNIPPET_NODE_ATTR_TYPE) .equals(ExtNodeConstants.SNIPPET_NODE_ATTR_TYPE_FAKE)) { target = element.children().first(); } else { target = element; } applySnippetResultToElement(doc, ref, element, target, holder.getRenderer()); applySnippets(doc); } catch (InterruptedException | ExecutionException e) { throw new SnippetInvokeException("Concurrent snippet invocation failed" + (delcaration == null ? "" : " on [" + delcaration + "]"), e); } } } }
From source file:com.aestasit.markdown.slidery.converters.TextTemplateConverter.java
protected void transformDocument(final Document slidesDocument, final Configuration config) { if (!config.notesIncluded()) { for (Element notesElement : slidesDocument.select("aside")) { notesElement.remove(); }//from w ww .j av a2s . co m } if ("true".equals(config.getOption("renderSyntaxHighlighting"))) { renderSyntaxHighlightingHtml(slidesDocument, config); } }
From source file:com.jimplush.goose.outputformatters.DefaultOutputFormatter.java
/** * if there are elements inside our top node that have a negative gravity score, let's * give em the boot//from w w w . j a v a 2s . c o m */ private void removeNodesWithNegativeScores() { Elements gravityItems = this.topNode.select("*[gravityScore]"); for (Element item : gravityItems) { int score = Integer.parseInt(item.attr("gravityScore")); if (score < 1) { item.remove(); } } }
From source file:com.blackducksoftware.tools.nrt.generator.NRTReportGenerator.java
/** * Copies the HTML template into the finalHtmlOutput then injects the * generates JSON data into the specific div location and writes it out. * // w ww . jav a2 s . c o m * @param expectedFile */ public void generateHTMLFromTemplate(File finalHtmlOutput) { log.info("Writing to report: " + finalHtmlOutput); String jsonComponentList = generateJSONFromObject(componentMap); String jsonPropertyList = generateJSONFromObject(nrtConfig.getOptionsForExport()); // Construct a variable out of it jsonComponentList = "var compList=[" + jsonComponentList + "]"; jsonPropertyList = "var propList=[" + jsonPropertyList + "]"; PrintWriter writer = null; try { // Read the template Document doc = Jsoup.parse(finalHtmlOutput, "UTF-8"); // Inject the JSON Elements jsonElementDivBlock = doc.getElementsByClass(NRTConstants.HTML_JSON_DATA_BLOCK); // This will be empty, but it should exist Element jsonDivElement = jsonElementDivBlock.get(0); if (jsonDivElement != null) { // Remove any script tags from it, in case the user populated // the template incorrectly with data if (jsonDivElement.children().size() > 0) { Elements children = jsonDivElement.children(); for (int i = 0; i < children.size(); i++) { Element el = children.get(i); el.remove(); } } addNewScriptElementWithJson(jsonDivElement, jsonComponentList); addNewScriptElementWithJson(jsonDivElement, jsonPropertyList); } else { log.error("Unable to find a valid critical DIV inside HTML template: " + NRTConstants.HTML_JSON_DATA_BLOCK); } writer = new PrintWriter(finalHtmlOutput, "UTF-8"); // Write out the file writer.write(doc.html()); writer.flush(); writer.close(); } catch (Exception e) { log.error("Unable to write out final report file!", e); } finally { writer.close(); } }
From source file:de.tudarmstadt.ukp.dkpro.c4corpus.boilerplate.impl.JusTextBoilerplateRemoval.java
/** * remove unwanted parts from a jsoup doc *//*from w ww . j a v a 2s . c o m*/ private Document cleanDom(Document jsoupDoc) { String[] tagsToRemove = { "head", "script", ".hidden", "embedded" }; for (String tag : tagsToRemove) { Elements selectedTags = jsoupDoc.select(tag); for (Element element : selectedTags) { element.remove(); } } return jsoupDoc; }
From source file:by.heap.remark.convert.TextCleaner.java
private void fixLineBreaks(Element el) { for (final Element e : el.children()) { if (e.tagName().equals("br")) { e.before("\n"); e.remove(); } else {//from w ww. j a v a2s .c o m fixLineBreaks(e); } } }
From source file:com.jimplush.goose.outputformatters.DefaultOutputFormatter.java
/** * remove paragraphs that have less than x number of words, would indicate that it's some sort of link *///ww w. j ava 2s . c o m private void removeParagraphsWithFewWords() { if (logger.isDebugEnabled()) { logger.debug("removeParagraphsWithFewWords starting..."); } Elements allNodes = this.topNode.getAllElements(); for (Element el : allNodes) { try { // get stop words that appear in each node WordStats stopWords = StopWords.getStopWordCount(el.text()); if (stopWords.getStopWordCount() < 5 && el.getElementsByTag("object").size() == 0 && el.getElementsByTag("embed").size() == 0) { el.remove(); } } catch (IllegalArgumentException e) { logger.error(e.getMessage()); } //} } }
From source file:de.tudarmstadt.ukp.experiments.dip.wp1.documents.helpers.boilerplateremoval.impl.JusTextBoilerplateRemoval.java
/** * remove unwanted parts from a jsoup doc * * @param jsoupDoc/*ww w . j a va2s . c om*/ * @return */ public Document cleanDom(Document jsoupDoc) { String[] tagsToRemove = { "head", "script", ".hidden", "embedded" }; for (String tag : tagsToRemove) { Elements selectedTags = jsoupDoc.select(tag); for (Element element : selectedTags) { element.remove(); } } //remove comments (might be slow) for (Element element : jsoupDoc.getAllElements()) { for (Node n : element.childNodes()) { NodeHelper.removeComments(n); } } return jsoupDoc; }