Example usage for org.jsoup.nodes Element remove

List of usage examples for org.jsoup.nodes Element remove

Introduction

In this page you can find the example usage for org.jsoup.nodes Element remove.

Prototype

public void remove() 

Source Link

Document

Remove (delete) this node from the DOM tree.

Usage

From source file:org.brnvrn.Main.java

/**
 * Parse a tr HTML element describing the tool
 * @param tool is to be updated//from   ww w. j a v a  2 s  .  c  om
 * @param tr   brings the data
 * @return true if successful
 */
private static boolean parseTrTool(Tool tool, Element tr) {
    boolean success = true;

    Element nameLink = tr.select("td:eq(0)").first();
    if (nameLink == null)
        return false;
    tool.setName(nameLink.text());
    tool.setUrl(nameLink.getElementsByTag("a").attr("href"));

    tool.setLicense(tr.select("td:eq(2)").first().text());

    tool.setCompatibility(tr.select("td:eq(3)").first().text());

    // More complicated: We will extract and remove known nodes, the rest will be description
    Element tdDescription = tr.select("td:eq(1)").first();
    Elements smalls = tdDescription.getElementsByTag("small");
    for (Element small : smalls) {
        Element author = small.getElementsContainingText("Author").first();
        if (author != null) {
            String authorsString = author.text();
            authorsString = authorsString.substring(authorsString.indexOf(":") + 1);
            tool.addAuthor(authorsString.split(","));
            small.remove();
        }
        Element sourceCode = small.getElementsContainingText("ource").last();
        if (sourceCode != null) {
            tool.setUrl_src(sourceCode.attr("href"));
            small.remove();
        }
    }
    tdDescription.getElementsByTag("br").remove();
    tool.setDescription(Jsoup.clean(tdDescription.html(), Whitelist.relaxed())); // ownText will miss the contained links in the description
    tool.setDescriptionText(tdDescription.text());

    bestEffortThemeLanguage(tool);

    return success;
}

From source file:com.switchfly.inputvalidation.sanitizer.StripHtmlSanitizer.java

@Override
public String execute(String content) {
    if (StringUtils.isBlank(content)) {
        return content;
    }/*from w  ww  . ja  va  2s  .  co  m*/
    Document document = Jsoup.parse(content);
    document.outputSettings().escapeMode(Entities.EscapeMode.xhtml);
    for (Element element : document.select("script,link,iframe,style")) {
        element.remove();
    }
    return document.text();
}

From source file:com.astamuse.asta4d.render.RenderUtil.java

/**
 * Find out all the snippet in the passed Document and execute them. The Containing embed tag of the passed Document will be exactly
 * mixed in here too. <br>//from  www  . ja v a  2s . c  o m
 * Recursively contained snippets will be executed from outside to inside, thus the inner snippets will not be executed until all of
 * their outer snippets are finished. Also, the dynamically created snippets and embed tags will comply with this rule too.
 * 
 * @param doc
 *            the Document to apply snippets
 * @throws SnippetNotResovlableException
 * @throws SnippetInvokeException
 * @throws TemplateException
 */
public final static void applySnippets(Document doc) throws SnippetNotResovlableException,
        SnippetInvokeException, TemplateException, TemplateNotFoundException {
    if (doc == null) {
        return;
    }

    applyClearAction(doc, false);

    // retrieve ready snippets
    String selector = SelectorUtil.attr(ExtNodeConstants.SNIPPET_NODE_TAG_SELECTOR,
            ExtNodeConstants.SNIPPET_NODE_ATTR_STATUS, ExtNodeConstants.SNIPPET_NODE_ATTR_STATUS_READY);
    List<Element> snippetList = new ArrayList<>(doc.select(selector));
    int readySnippetCount = snippetList.size();
    int blockedSnippetCount = 0;
    for (int i = readySnippetCount - 1; i >= 0; i--) {
        // if parent snippet has not been executed, the current snippet will
        // not be executed too.
        if (isBlockedByParentSnippet(doc, snippetList.get(i))) {
            snippetList.remove(i);
            blockedSnippetCount++;
        }
    }
    readySnippetCount = readySnippetCount - blockedSnippetCount;

    String renderDeclaration;
    Renderer renderer;
    Context context = Context.getCurrentThreadContext();
    Configuration conf = Configuration.getConfiguration();
    final SnippetInvoker invoker = conf.getSnippetInvoker();

    String refId;
    String currentTemplatePath;
    Element renderTarget;
    for (Element element : snippetList) {
        if (!conf.isSkipSnippetExecution()) {
            // for a faked snippet node which is created by template
            // analyzing process, the render target element should be its
            // child.
            if (element.attr(ExtNodeConstants.SNIPPET_NODE_ATTR_TYPE)
                    .equals(ExtNodeConstants.SNIPPET_NODE_ATTR_TYPE_FAKE)) {
                renderTarget = element.children().first();
                // the hosting element of this faked snippet has been removed by outer a snippet
                if (renderTarget == null) {
                    element.attr(ExtNodeConstants.SNIPPET_NODE_ATTR_STATUS,
                            ExtNodeConstants.SNIPPET_NODE_ATTR_STATUS_FINISHED);
                    continue;
                }
            } else {
                renderTarget = element;
            }

            // we have to reset the ref of current snippet at every time to make sure the ref is always unique(duplicated snippet ref
            // could be created by list rendering)
            TemplateUtil.resetSnippetRefs(element);

            context.setCurrentRenderingElement(renderTarget);
            renderDeclaration = element.attr(ExtNodeConstants.SNIPPET_NODE_ATTR_RENDER);

            refId = element.attr(ExtNodeConstants.ATTR_SNIPPET_REF);
            currentTemplatePath = element.attr(ExtNodeConstants.ATTR_TEMPLATE_PATH);

            context.setCurrentRenderingElement(renderTarget);
            context.setData(TRACE_VAR_TEMPLATE_PATH, currentTemplatePath);

            try {
                if (element.hasAttr(ExtNodeConstants.SNIPPET_NODE_ATTR_PARALLEL)) {
                    ConcurrentRenderHelper crHelper = ConcurrentRenderHelper.getInstance(context, doc);
                    final Context newContext = context.clone();
                    final String declaration = renderDeclaration;
                    crHelper.submitWithContext(newContext, declaration, refId, new Callable<Renderer>() {
                        @Override
                        public Renderer call() throws Exception {
                            return invoker.invoke(declaration);
                        }
                    });
                    element.attr(ExtNodeConstants.SNIPPET_NODE_ATTR_STATUS,
                            ExtNodeConstants.SNIPPET_NODE_ATTR_STATUS_WAITING);
                } else {
                    renderer = invoker.invoke(renderDeclaration);
                    applySnippetResultToElement(doc, refId, element, renderTarget, renderer);
                }
            } catch (SnippetNotResovlableException | SnippetInvokeException e) {
                throw e;
            } catch (Exception e) {
                SnippetInvokeException se = new SnippetInvokeException(
                        "Error occured when executing rendering on [" + renderDeclaration + "]:"
                                + e.getMessage(),
                        e);
                throw se;
            }

            context.setData(TRACE_VAR_TEMPLATE_PATH, null);
            context.setCurrentRenderingElement(null);
        } else {// if skip snippet
            element.attr(ExtNodeConstants.SNIPPET_NODE_ATTR_STATUS,
                    ExtNodeConstants.SNIPPET_NODE_ATTR_STATUS_FINISHED);
        }
    }

    // load embed nodes which blocking parents has finished
    List<Element> embedNodeList = doc.select(ExtNodeConstants.EMBED_NODE_TAG_SELECTOR);
    int embedNodeListCount = embedNodeList.size();
    Iterator<Element> embedNodeIterator = embedNodeList.iterator();
    Element embed;
    Element embedContent;
    while (embedNodeIterator.hasNext()) {
        embed = embedNodeIterator.next();
        if (isBlockedByParentSnippet(doc, embed)) {
            embedNodeListCount--;
            continue;
        }
        embedContent = TemplateUtil.getEmbedNodeContent(embed);
        TemplateUtil.mergeBlock(doc, embedContent);
        embed.before(embedContent);
        embed.remove();
    }

    if ((readySnippetCount + embedNodeListCount) > 0) {
        TemplateUtil.regulateElement(null, doc);
        applySnippets(doc);
    } else {
        ConcurrentRenderHelper crHelper = ConcurrentRenderHelper.getInstance(context, doc);
        String delcaration = null;
        if (crHelper.hasUnCompletedTask()) {
            delcaration = null;
            try {
                FutureRendererHolder holder = crHelper.take();
                delcaration = holder.getRenderDeclaration();
                String ref = holder.getSnippetRefId();
                String reSelector = SelectorUtil.attr(ExtNodeConstants.SNIPPET_NODE_TAG_SELECTOR,
                        ExtNodeConstants.ATTR_SNIPPET_REF, ref);
                Element element = doc.select(reSelector).get(0);// must have
                Element target;
                if (element.attr(ExtNodeConstants.SNIPPET_NODE_ATTR_TYPE)
                        .equals(ExtNodeConstants.SNIPPET_NODE_ATTR_TYPE_FAKE)) {
                    target = element.children().first();
                } else {
                    target = element;
                }
                applySnippetResultToElement(doc, ref, element, target, holder.getRenderer());
                applySnippets(doc);
            } catch (InterruptedException | ExecutionException e) {
                throw new SnippetInvokeException("Concurrent snippet invocation failed"
                        + (delcaration == null ? "" : " on [" + delcaration + "]"), e);
            }
        }
    }
}

From source file:com.aestasit.markdown.slidery.converters.TextTemplateConverter.java

protected void transformDocument(final Document slidesDocument, final Configuration config) {
    if (!config.notesIncluded()) {
        for (Element notesElement : slidesDocument.select("aside")) {
            notesElement.remove();
        }//from   w  ww  .j  av a2s  .  co m
    }
    if ("true".equals(config.getOption("renderSyntaxHighlighting"))) {
        renderSyntaxHighlightingHtml(slidesDocument, config);
    }
}

From source file:com.jimplush.goose.outputformatters.DefaultOutputFormatter.java

/**
 * if there are elements inside our top node that have a negative gravity score, let's
 * give em the boot//from w  w w .  j a  v a  2s  . c  o  m
 */
private void removeNodesWithNegativeScores() {
    Elements gravityItems = this.topNode.select("*[gravityScore]");
    for (Element item : gravityItems) {
        int score = Integer.parseInt(item.attr("gravityScore"));
        if (score < 1) {
            item.remove();
        }
    }
}

From source file:com.blackducksoftware.tools.nrt.generator.NRTReportGenerator.java

/**
 * Copies the HTML template into the finalHtmlOutput then injects the
 * generates JSON data into the specific div location and writes it out.
 * //  w  ww . jav a2 s  .  c o m
 * @param expectedFile
 */
public void generateHTMLFromTemplate(File finalHtmlOutput) {

    log.info("Writing to report: " + finalHtmlOutput);
    String jsonComponentList = generateJSONFromObject(componentMap);
    String jsonPropertyList = generateJSONFromObject(nrtConfig.getOptionsForExport());
    // Construct a variable out of it
    jsonComponentList = "var compList=[" + jsonComponentList + "]";
    jsonPropertyList = "var propList=[" + jsonPropertyList + "]";

    PrintWriter writer = null;
    try {
        // Read the template
        Document doc = Jsoup.parse(finalHtmlOutput, "UTF-8");

        // Inject the JSON
        Elements jsonElementDivBlock = doc.getElementsByClass(NRTConstants.HTML_JSON_DATA_BLOCK);

        // This will be empty, but it should exist
        Element jsonDivElement = jsonElementDivBlock.get(0);

        if (jsonDivElement != null) {
            // Remove any script tags from it, in case the user populated
            // the template incorrectly with data
            if (jsonDivElement.children().size() > 0) {
                Elements children = jsonDivElement.children();
                for (int i = 0; i < children.size(); i++) {
                    Element el = children.get(i);
                    el.remove();
                }
            }

            addNewScriptElementWithJson(jsonDivElement, jsonComponentList);
            addNewScriptElementWithJson(jsonDivElement, jsonPropertyList);
        } else {
            log.error("Unable to find a valid critical DIV inside HTML template: "
                    + NRTConstants.HTML_JSON_DATA_BLOCK);
        }
        writer = new PrintWriter(finalHtmlOutput, "UTF-8");
        // Write out the file
        writer.write(doc.html());
        writer.flush();
        writer.close();

    } catch (Exception e) {
        log.error("Unable to write out final report file!", e);
    } finally {
        writer.close();
    }

}

From source file:de.tudarmstadt.ukp.dkpro.c4corpus.boilerplate.impl.JusTextBoilerplateRemoval.java

/**
 * remove unwanted parts from a jsoup doc
 *//*from   w  ww  .  j  a v  a  2s . c o  m*/
private Document cleanDom(Document jsoupDoc) {
    String[] tagsToRemove = { "head", "script", ".hidden", "embedded" };

    for (String tag : tagsToRemove) {
        Elements selectedTags = jsoupDoc.select(tag);
        for (Element element : selectedTags) {
            element.remove();
        }
    }

    return jsoupDoc;
}

From source file:by.heap.remark.convert.TextCleaner.java

private void fixLineBreaks(Element el) {
    for (final Element e : el.children()) {
        if (e.tagName().equals("br")) {
            e.before("\n");
            e.remove();
        } else {//from w ww.  j a v  a2s  .c  o m
            fixLineBreaks(e);
        }
    }
}

From source file:com.jimplush.goose.outputformatters.DefaultOutputFormatter.java

/**
 * remove paragraphs that have less than x number of words, would indicate that it's some sort of link
 *///ww  w.  j  ava  2s  .  c  o  m
private void removeParagraphsWithFewWords() {
    if (logger.isDebugEnabled()) {
        logger.debug("removeParagraphsWithFewWords starting...");
    }

    Elements allNodes = this.topNode.getAllElements();
    for (Element el : allNodes) {

        try {
            // get stop words that appear in each node

            WordStats stopWords = StopWords.getStopWordCount(el.text());

            if (stopWords.getStopWordCount() < 5 && el.getElementsByTag("object").size() == 0
                    && el.getElementsByTag("embed").size() == 0) {
                el.remove();
            }
        } catch (IllegalArgumentException e) {
            logger.error(e.getMessage());
        }
        //}
    }
}

From source file:de.tudarmstadt.ukp.experiments.dip.wp1.documents.helpers.boilerplateremoval.impl.JusTextBoilerplateRemoval.java

/**
 * remove unwanted parts from a jsoup doc
 *
 * @param jsoupDoc/*ww  w  . j a  va2s  .  c om*/
 * @return
 */
public Document cleanDom(Document jsoupDoc) {
    String[] tagsToRemove = { "head", "script", ".hidden", "embedded" };

    for (String tag : tagsToRemove) {
        Elements selectedTags = jsoupDoc.select(tag);
        for (Element element : selectedTags) {
            element.remove();
        }
    }
    //remove comments (might be slow)
    for (Element element : jsoupDoc.getAllElements()) {
        for (Node n : element.childNodes()) {
            NodeHelper.removeComments(n);
        }
    }
    return jsoupDoc;

}