Example usage for org.jsoup.nodes Element parent

List of usage examples for org.jsoup.nodes Element parent

Introduction

In this page you can find the example usage for org.jsoup.nodes Element parent.

Prototype

@Override
    public final Element parent() 

Source Link

Usage

From source file:org.opens.tanaguru.rules.rgaa30.Rgaa30Rule110102.java

/**
 * This method linked each label which have an input child on a page to its
 * form in a map./* w  w w.ja  v  a  2s. co m*/
 */
private void putLabelElementHandlerIntoTheMap() {
    for (Element el : labelElementHandler.get()) {
        Element tmpElement = el.parent();
        while (StringUtils.isNotBlank(tmpElement.tagName())) {
            if (tmpElement.tagName().equals(FORM_TAG)) {
                if (labelFormMap.containsKey(tmpElement)) {
                    Elements els = el.select(FORM_ELEMENT_WITH_ID_CSS_LIKE_QUERY);
                    if (!els.isEmpty()) {
                        labelFormMap.get(tmpElement).add(el);
                    }
                } else {
                    Elements els = el.select(FORM_ELEMENT_WITH_ID_CSS_LIKE_QUERY);
                    if (!els.isEmpty()) {
                        ElementHandler<Element> labelElement = new ElementHandlerImpl();
                        labelElement.add(el);
                        labelFormMap.put(tmpElement, labelElement);
                    }
                }
                break;
            }
            tmpElement = tmpElement.parent();
        }
    }
}

From source file:org.opens.tanaguru.rules.rgaa30.Rgaa30Rule110103.java

/**
 * This method linked each input on a page to its form in a map.
 *//*w w w .  j  a  v a 2s .c om*/
private void putInputElementHandlerIntoTheMap() {
    for (Element el : inputElementHandler.get()) {
        Element tmpElement = el.parent();
        while (StringUtils.isNotBlank(tmpElement.tagName())) {
            if (tmpElement.tagName().equals(FORM_TAG)) {
                if (inputFormMap.containsKey(tmpElement)) {
                    inputFormMap.get(tmpElement).add(el);
                } else {
                    ElementHandler<Element> inputElement = new ElementHandlerImpl();
                    inputElement.add(el);
                    inputFormMap.put(tmpElement, inputElement);
                }
                break;
            }
            tmpElement = tmpElement.parent();
        }
    }
}

From source file:org.silverpeas.mobile.server.servlets.PublicationContentServlet.java

private void displayWysiwyg(String html, HttpServletRequest request, HttpServletResponse response,
        String instanceId) throws IOException {
    html = "<html><body>" + html + "</body></html>";
    Document doc = Jsoup.parse(html);

    Elements body = doc.getElementsByTag("body");
    if (!body.isEmpty()) {
        html = body.first().html();//from  w  w w.  ja v a 2  s.c om
    }

    Elements images = doc.getElementsByTag("img");
    for (Element img : images) {
        String source = img.attr("src");
        String newSource = source;
        if (source.contains("/silverpeas")) {
            // need to convert in dataurl
            newSource = convertSpImageUrlToDataUrl(source);
        }
        img.attr("src", newSource);
    }
    Elements embeds = doc.getElementsByTag("embed");
    for (Element embed : embeds) {
        String htmlPart = embed.outerHtml();
        if (htmlPart.contains("flash")) {
            String attachmentId = htmlPart
                    .substring(htmlPart.indexOf("attachmentId/") + "attachmentId/".length());
            attachmentId = attachmentId.substring(0, attachmentId.indexOf("/"));
            SimpleDocument attachment = AttachmentServiceProvider.getAttachmentService().searchDocumentById(
                    new SimpleDocumentPK(attachmentId),
                    getUserInSession(request).getUserPreferences().getLanguage());
            String type = attachment.getContentType();
            String url = getServletContext().getContextPath() + "/services/spmobile/Attachment";
            url = url + "?id=" + attachmentId + "&instanceId=" + instanceId + "&lang="
                    + getUserInSession(request).getUserPreferences().getLanguage() + "&userId="
                    + getUserInSession(request).getId();
            if (type.equals("audio/mpeg") || type.equals("audio/ogg") || type.equals("audio/wav")) {
                embed.parent().append("<audio controls><source src='" + url + "' type='" + type + "'></audio>");
                embed.remove();
            } else if (type.equals("video/mp4") || type.equals("video/ogg") || type.equals("video/webm")) {
                embed.parent()
                        .append("<video controls='controls'><source src='" + url + "' type='" + type + "' />");
                embed.remove();
            }
        }
    }
    html = doc.outerHtml();
    OutputStreamWriter out = new OutputStreamWriter(response.getOutputStream(), "UTF-8");
    writeContainer(out, html);
    out.flush();
}

From source file:org.silverpeas.mobile.server.servlets.PublicationContentServlet.java

private void displayFormView(Writer out, PublicationDetail pub, UserDetail user, String ua) throws Exception {

    PublicationTemplate pubTemplate = PublicationTemplateManager.getInstance()
            .getPublicationTemplate(pub.getInstanceId() + ":" + pub.getInfoId());
    DataRecord xmlData = pubTemplate.getRecordSet().getRecord(pub.getId());

    PagesContext xmlContext = new PagesContext("myForm", "0", user.getUserPreferences().getLanguage(), false,
            pub.getInstanceId(), "useless");
    xmlContext.setObjectId(pub.getId());
    xmlContext.setDesignMode(false);//from  ww w.  j  av  a2s. c o  m
    xmlContext.setBorderPrinted(false);
    xmlContext.setContentLanguage(user.getUserPreferences().getLanguage());
    xmlContext.setCreation(false);

    StringWriter generatedHtml = new StringWriter();
    PrintWriter outTmp = new PrintWriter(generatedHtml);

    Form xmlForm = pubTemplate.getViewForm();
    if (xmlForm instanceof XmlForm) {
        Method m = XmlForm.class.getDeclaredMethod("display",
                new Class[] { PrintWriter.class, PagesContext.class, DataRecord.class });
        m.setAccessible(true);
        m.invoke(xmlForm, outTmp, xmlContext, xmlData);
        outTmp.flush();
    } else if (xmlForm instanceof HtmlForm) {
        String html = ((HtmlForm) xmlForm).toString(xmlContext, xmlData);
        outTmp.write(html);
        outTmp.flush();
    }
    String html = generatedHtml.toString();

    Document doc = Jsoup.parse(html);
    Elements images = doc.getElementsByTag("img");
    for (Element img : images) {
        if (img.attr("class").equals("preview-file")) {
            // remove preview for files
            img.remove();
        } else if (img.attr("src").startsWith("/silverpeas/attached_file/componentId/")) {
            // convert url to dataurl
            String data = img.attr("src");
            data = convertImageAttachmentUrl(data, data);
            img.attr("src", data);
        }
    }
    Elements links = doc.getElementsByTag("a");
    for (Element link : links) {
        if (link.attr("href").startsWith("/silverpeas/attached_file/componentId/")) {
            // link to file
            String url = link.attr("href");
            String attachmentId = url.substring(url.indexOf("attachmentId/") + "attachmentId/".length());
            attachmentId = attachmentId.substring(0, attachmentId.indexOf("/"));
            url = getServletContext().getContextPath() + "/services/spmobile/Attachment";
            url = url + "?id=" + attachmentId + "&instanceId=" + pub.getInstanceId() + "&lang="
                    + user.getUserPreferences().getLanguage() + "&userId=" + user.getId();
            link.attr("href", url);
            link.attr("target", "_self");

            if (link.attr("id").startsWith("player")) {

                boolean playable = false;

                SimpleDocument attachment = AttachmentServiceProvider.getAttachmentService().searchDocumentById(
                        new SimpleDocumentPK(attachmentId), user.getUserPreferences().getLanguage());
                String type = attachment.getContentType();
                if (type.contains("mp4") || type.contains("ogg") || type.contains("webm")) {
                    playable = true;
                }

                if (playable) {
                    String style = link.attr("style");
                    String width = style.substring(style.indexOf("width") + "width".length() + 1);
                    width = width.substring(0, width.indexOf("px"));
                    String height = style.substring(style.indexOf("height") + "height".length() + 1);
                    height = height.substring(0, height.indexOf("px"));
                    link.parent().append("<video width='" + width + "' height='" + height
                            + "' controls='controls'><source src='" + url + "' type='" + type + "' />");
                    link.remove();
                } else {
                    // display image instead of video player
                    String style = "display:block; width:150px; height:98px; background-repeat: no-repeat; ";
                    style += "background-image: url(data:image/jpeg;base64," + "/9j/4AAQSkZJRgABAQEBLAEsAAD"
                            + "/4QYfRXhpZgAATU0AKgAAAAgAAAAAAA4AAgIBAAQAAAABAAAALAICAAQAAAABAAAF6wAAAAD/2P"
                            + "/gABBKRklGAAEBAAABAAEAAP/bAEMACAYGBwYFCAcHBwkJCAoMFA0MCwsMGRITDxQdGh8eHRocHCAkLicgIiwjHBwoNyksMDE0NDQfJzk9ODI8LjM0Mv/AAAsIAEAAYgEBEQD/xAAfAAABBQEBAQEBAQAAAAAAAAAAAQIDBAUGBwgJCgv/xAC1EAACAQMDAgQDBQUEBAAAAX0BAgMABBEFEiExQQYTUWEHInEUMoGRoQgjQrHBFVLR8CQzYnKCCQoWFxgZGiUmJygpKjQ1Njc4OTpDREVGR0hJSlNUVVZXWFlaY2RlZmdoaWpzdHV2d3h5eoOEhYaHiImKkpOUlZaXmJmaoqOkpaanqKmqsrO0tba3uLm6wsPExcbHyMnK0tPU1dbX2Nna4eLj5OXm5+jp6vHy8/T19vf4+fr/2gAIAQEAAD8A98orN1zXdO8OaVLqWqXS29tEOWPUnsAO5PoK8X1H9ouR7t00Xw600APEk8pDMP8AdUcfma3PCXx60nWr6Ow1qzbSp5CFWUvviLehOAV/Hj3r1me4htraS4mlSOGNS7yMcKqjnJPpXjPiL9oSxtL17XQNKfUQpI8+VyiN/uqASR+VQaJ+0TBJeLBr+iSWcZODPA5fb9UIB/In6V7TYX1tqVlDeWc6T20yho5EOQw9qs0UUUUV89/HO6udZ8f6D4WExitWWN+vG+Ryu78AP1Ne2eHvDGkeGNLisdLs4oY0UAuFG9z6sepNec/HHwVpVz4RuPEMFtFb6jZsrGSNQvmqWCkN6nnIPtXB6/4z1K7+Aei2ryuXuLp7SaXJy8cfIBP4r/3zXrnws8F6V4e8IabeR20cmoXlvHcTXLAFsuobaD2AzjitLx34K0nxb4fuoru2iF0kbNBchQHjYDI59OORXnf7OWs3NxpesaPM7NBaPHLDk527924fmoP4mvcKKKKKK8e+N3gXUNZis/EmixvLf2C7JI4/vtGCWDL6lSTx71m+Hf2hbKLT0tvEem3S3sQ2vLbKCrkdyCQVP51zfjH4iat8VpofDPhnS7hLSSQNIH5eTB4LY4VR1/AV6Br3wn+0fCO18OWbq+o2H+kxv0EsvJYfQ5IH0FcZ4K+MVz4KsV8N+K9Muz9i/dRugxLGo6KysRkDsc9KseMfjmdf06TRvCmnXgmvFMRmlUb8HghFUnk9M/pXefB3wNP4N8NSyagmzUr9lkmTr5ajO1frySfrXo1FFFFFcl8RPGsfgXwwdTNv9omklEEEZOAXIJyT6YBr5y1T4qPrFyZ7/wAJ+HZ5ScmRrZtx+pDc1e0v43arosBh0zQNCs4z1EFuy5+uG5q//wANE+Kv+fDTP+/b/wDxVZWrfGS+10D+1fDegXhHAaa2YsPod2abpPxeutDfdpfhjw/aueN8Vswb/vrdmve/hh4//wCE90S4nmtVtry0kCTIhJU5BIYZ+h49qf4n+KfhnwprEWl31xJJdMR5iQJv8oHpu9Pp1rtI5FljWRTlWGR9KdRRRXj37Rn/ACIun/8AYRX/ANFyV8yUUUUV23hD4iXngzw3q9jpkeL7UHj23B5EKqGBIHduePSuRE0lxfiaaRpJZJNzuxyWJPJJr7tsv+PGD/rmv8qnooorx79oz/kRdP8A+wkv/ot6+ZKKKKKKkt/+PiL/AHh/OvvGy/48YP8Armv8qnooorx/9osf8ULp/wD2El/9FyV8xmiiiivSvh58O4vHnhbXWhk8rVLSSI2zsflbIbKt9cDntXCXOm3elaw1jfQPBcwy7JI3GCpBr7msv+PG3/65r/Kp6KKKwPGHhLT/ABpoL6TqO9ULB45Iz80bjOGH5n868nP7Ndpk48STY/69R/8AFUf8M12n/QyTf+Ao/wDiqP8Ahmu0/wChkm/8BR/8VR/wzXaf9DJN/wCAo/8AiqP+Ga7T/oZJv/AUf/FV6f4H8Dab4E0d7GwZ5XlfzJp5PvSN0HA6ADtV3UvCHh7WNUg1PUNJtri8gxsldMnjpn1x71tgADAGBRRX/9n/2wBDAAMCAgMCAgMDAwMEAwMEBQgFBQQEBQoHBwYIDAoMDAsKCwsNDhIQDQ4RDgsLEBYQERMUFRUVDA8XGBYUGBIUFRT/wgALCABiAJYBAREA/8QAHQABAAICAwEBAAAAAAAAAAAAAAcIAgYEBQkDAf/aAAgBAQAAAAG0o4cWdnJf71sXbBIOQB1VCZO6Dl2uolLekbxbQAgPVbTKAyDu9hvzzq9DeSAi6sV3+iorZmJrm6bSn0PyAK6V+59q5RrDB/eWzkAAUe0sDfrrxVt2zgw8t/kBz7fU2u/NoMPLf5AZMbyTYDDy3+QAvJNgMPLf5AcnYdVvJNgMfLrjAdn6GUKuNMAEL6mBukwfL6gAAB//xAAoEAABAwQBBAIBBQAAAAAAAAAFAwQGAAECByAWFzAyEzYQERIxNED/2gAIAQEAAQUC4OXaLJB1tWPNsxmxgBTO1/1tT4i2GIONsR5DMTPgZlTxEyKIlgcPE50XHaWeLIyDU5MQhrieLCXkgNox4S7elp6ZZ6ScqJSXWJKPIayni2Lrw7kdZJRnSo1G+P42QOSGS7ZJRZ1E9MjUkgdZWte0qaYg5Y2U+Zv4NjAszsX11MMIsRZv25FE9J2EcbOVnc4lGy4vdeJ6ymyUeVQcpOkpNMR8YbCmTmaynHH9uPhmWqkyiysMkY1QdrmQFlodBWkTTva17SzUnzrZxCRsMxeszxRSJQ9nE2nik2zipB/1yfrrk/XXJ+uuT9dcn665P11yfqJbKKNClSjYY6MLx6QN5KN5ZeufvzY/3dgbIxD2UUyWz1D9Q5Zeufvzte+N73vf8ah+ocsvXP38WofqHLL1z9+aCfzLSGOu40RrUP1Dlf8AhbG6avMbhkqQksaaSgcfj7qNkNWM12UT5yLVww887Js67Js67Js67Js67Js67Js67Js6jeshsed1mlgr/o//xABAEAACAQICAg0JBwQDAAAAAAABAgMABBESITEFEBMiMDI1QUJRYXGSFCBSc5GTocHRFSQzYnKx8AYjQFOBguH/2gAIAQEABj8C8xpriVIIl1vIcAKyi5eftiiOHxoIl8IZDqWdcnx1VjzbRmup47eIdKQ4VlWeWb80cRw+NCOC+VZTqjmBQ/Hg57y4bLDCuZqVcHkLthBax6l/nXQe8v47Zz0I03TD/nGnntpF2QiXSwjXB/DUWx19LnsJDkQufwT9Knvp9KxjQo1seYUBg1zcOd5EnEjHyFY3WyccUnoxxZx7cRTXCMt9bLxmiGDL2lai2I2QlMkUm9t5X1q3o93BQRLoEtwA3cAT9K2QvmAMwIhX8o1n+dm3eJCMkb5Zco5sRpr+mcxP3iLdpO1gi/U1c3uAM8spTN1KMNHx2tOmr6K13ghmzR4dHnFRSaiyg8DMkS5p4G3dB14Y4j2E1Ilzj5DcYCQjoEamoS206XEZ6cbYimlvJ1U9GIHF27hTMq/eLuTBV9Ec3sFWvkyljsaBgOfc8MD+wPtqWxvWyWUzZxJ/rf6fShLBIs0Z1NGcQaZriVXuMN5bq2/Y/LvpQ++e5lMkzDorjvqwGrgpLzYtlguX0vA/EY9nUaI+zrsHrgGYe1a02b24OuW6OX/2jIG8ovnGDzkauxeqsDqp7nYZkTHSbV9A/wCpor9nXqHn3JSR8KG6W3kcZ1yXJw+GuikH924f8W4YaW7OwcHILG5aztFYiMRaCw6ya5WuvHXK11465WuvHXK11465WuvHXK11465WuvHUEV/cNd2kjBG3TjL2g7Qt3z3Nz0o4cN530l7ah1jJylZBpB8803fwEH61/en2P2McSX2qSUaRF2fq/amd2LuxxLMcSaX17/Lzz3Ue/gMQcCK0nHaX17/Lzz3Ue/g19e/y8891Hv4BE1ZiFxp7S6TfDSrjiuvWNpfXv8uAdWGDAkEHm4C2RRmZpFAA76a2uRlYaY5RrjP85qa1vI8rDiuOK46xUQniaIvIzqG9E8/ANdq8tlPIcX3PDKx68OuuU5/diuU5/diuU5/diuU5/diuU5/diuU5/diuU5/dilu80l3cJxDLhlTtA2hnRXw0jMP8j//EACkQAQABAgQFBQADAQAAAAAAAAERACExQVGhMGFxgcEQkbHw8SBA0eH/2gAIAQEAAT8h/gaMJJ65aQQ5hV90J7U7aYSl3eVAARVxMPTGLtFf9eVIIG38EUizoQToQBeQtTwpnFsYvI5tqGV8eQythOvVgVdRQs7kF+k9aLL8wM1xT2V5UgkaWK4Q+1MscmpJLprmw9Vqx5C25PIDFe7NEIvmcHd8FFc+XAmOg5i84pi4DU5RuawMxt04LavP4HPc9lM0dXFDet7vTGj7wTDcwjrL3qVZAjkA+2dSJrYXCjuU+3orABIRp09vYjpO1KwSedQeDFroZeIfQTFTExBy5KzLoxk5xCV65BbVBERI3J+TDmVPHGe5hjoN3k0AWlm4HvkjkUh5lCtAEvMBOkKJTci9MlSnchnJEZNVvhWj06LpcoLHagIwBAacJi+NsdirO9nlV9gwubuNBJywDvd2GsOBUoZlk3dqRBKIRMaVmWsRu1OjY1oksYkXe3enxWZZ+b2qaCHYG0fB78KdcCg98VdLRJuZV+9r97X6Wv3tfpa/W1+tqFk8hFYzlpLOPpnXvDOU1IXTHtikAIAHEjE66/z278VvHAcXY/yVckjtTa/TSlpwlA1XgTb58VvXAMIRImJSqom6vBm3z4rev6U2+fFb1wLv78IUh4sJyTfYbcGYyzUincGghUonAQFioFhV9IA+tPNJZgzGr8x2zvQpH6hYIh24C+wR0rGTcMV9x819x819x819x819x819x819x81LC0mCZAzNV9MYAsDDrWP9f//aAAgBAQAAABD8n/2U/wDyv/jH/uJ/4A//AP7/AL+X9/7+/wDf3+f8AP8Af/8A/8QAKBABAAIBAwMDBAMBAAAAAAAAAREhADFBYTBRcYGR8BAgocFAULHx/9oACAEBAAE/EPsQZM4/dUDxOO04Bp4IYQnEC1xJ7sAHSQtzMpHj6D+Gvm7E+wJwytc9+JPOWr4DzmYMlMkzBMHsXJ6nSchaXAKLdEHLi1Qt26BKRJXtJKAAaWmxxgfCDCFvdmMJBzDhMkIgeoNREEgwA3V5EFhXuQnYl2yaqUpIzBX/AFSTCClI3ezO4U9fU1QTXthgloQKzfy2JZQBhOl1+pvDo9yZDvkGsBClU3Jhr8mNxNxZOICEk5yNzIAHmFBMDYGExTJbxX5/dlUU3J0HUlBvHYfRHpiJEdRO2BxOJ71kePjwxUqD7LB6T0VXX3gAcpDhiXWmTB2SIbaBsMJC1ETypj2ybGUw8sXyeWEAO0Q5MeVTUTXNbmzloALE73AY05c5CSOtsFqDVz3NNiIx9HAWJN+gBU9hBsLE5woWZa7KTk74VtQSABABsRXRgNt5xXmVTJRiwqjcya8iaMGB4VzQKH58xLMEhiQit4lFmYJWAISLoAOybl5YWYJMstHYXYGIvWB69d4d3VDSdUkt5ws0MCCZC2Gw72lyeigBpqOBGfMFFaRuggTpnw79ZB878ZP8D8Z8O/WfEP1nyT9ZdPzPGBSPwdgNJozAE1v6CZSm0pKPVEuBlsY2IVuWWKhZ95OAbNN9BdRRc5J3o0JGsFd7bdeGjYSTZUKquq4JRtLf4EionmLogyImjjlCUFVdV/pVwKrgVOD2QSEAx64IcV9IY3oWk1sWdFfx0sWuMwJAdxOgJ8H1VEGTG1LCaJ9R0kh01hy2yMwfSnxKVUMb5SjLMsHRNu/QlEFAlKHSWwLt0O3bt27du2tci4ZBW2iE2jIO3OHkiM4WSU846pue/Qgutdeeh/37/wD/2Q==);";
                    link.attr("style", style);
                }
            }
        }
    }

    // remove all scripts
    Elements scripts = doc.getElementsByTag("script");
    for (Element script : scripts) {
        script.remove();
    }
    html = doc.outerHtml();
    writeContainer(out, html);
    out.flush();
}

From source file:org.tinymediamanager.scraper.imdb.ImdbParser.java

protected MediaMetadata parseReferencePage(Document doc, MediaScrapeOptions options, MediaMetadata md) {
    /*/*from   ww  w . ja v  a 2s  .c o  m*/
     * title and year have the following structure
     * 
     * <div id="tn15title"><h1>Merida - Legende der Highlands <span>(<a href="/year/2012/">2012</a>) <span class="pro-link">...</span> <span
     * class="title-extra">Brave <i>(original title)</i></span> </span></h1> </div>
     */

    // title
    Element title = doc.getElementsByAttributeValue("name", "title").first();
    if (title != null) {
        String movieTitle = cleanString(title.attr("content"));
        int yearStart = movieTitle.lastIndexOf("(");
        if (yearStart > 0) {
            movieTitle = movieTitle.substring(0, yearStart - 1).trim();
            md.setTitle(movieTitle);
        }
    }

    // original title and year
    Element originalTitleYear = doc.getElementsByAttributeValue("property", "og:title").first();
    if (originalTitleYear != null) {
        String content = originalTitleYear.attr("content");
        int startOfYear = content.lastIndexOf("(");
        if (startOfYear > 0) {
            // noo - this is NOT the original title!!! (seems always english?) parse from AKAs page...
            // String originalTitle = content.substring(0, startOfYear - 1).trim();
            // md.setOriginalTitle(originalTitle);

            String yearText = content.substring(startOfYear);

            // search year
            Pattern yearPattern = Pattern.compile("[1-2][0-9]{3}");
            Matcher matcher = yearPattern.matcher(yearText);
            while (matcher.find()) {
                if (matcher.group(0) != null) {
                    String movieYear = matcher.group(0);
                    try {
                        md.setYear(Integer.parseInt(movieYear));
                        break;
                    } catch (Exception ignored) {
                    }
                }
            }
        }
    }

    // poster
    Element poster = doc.getElementsByAttributeValue("property", "og:image").first();
    if (poster != null) {
        String posterUrl = poster.attr("content");

        int fileStart = posterUrl.lastIndexOf("/");
        if (fileStart > 0) {
            int parameterStart = posterUrl.indexOf("_", fileStart);
            if (parameterStart > 0) {
                int startOfExtension = posterUrl.lastIndexOf(".");
                if (startOfExtension > parameterStart) {
                    posterUrl = posterUrl.substring(0, parameterStart) + posterUrl.substring(startOfExtension);

                }
            }
        }
        processMediaArt(md, MediaArtwork.MediaArtworkType.POSTER, posterUrl);
    }

    /*
     * <div class="starbar-meta"> <b>7.4/10</b> &nbsp;&nbsp;<a href="ratings" class="tn15more">52,871 votes</a>&nbsp;&raquo; </div>
     */

    // rating and rating count
    Element ratingElement = doc.getElementsByClass("ipl-rating-star__rating").first();
    if (ratingElement != null) {
        String ratingAsString = ratingElement.ownText().replace(",", ".");
        try {
            md.setRating(Float.valueOf(ratingAsString));
        } catch (Exception ignored) {
        }

        Element votesElement = doc.getElementsByClass("ipl-rating-star__total-votes").first();
        if (votesElement != null) {
            String countAsString = votesElement.ownText().replaceAll("[.,()\\u00a0]", "").trim();
            try {
                md.setVoteCount(Integer.parseInt(countAsString));
            } catch (Exception ignored) {
            }
        }
    }
    // top250
    Element topRatedElement = doc.getElementsByAttributeValue("href", "/chart/top").first();
    if (topRatedElement != null) {
        Pattern topPattern = Pattern.compile("Top Rated Movies: #([0-9]{1,3})");
        Matcher matcher = topPattern.matcher(topRatedElement.ownText());
        while (matcher.find()) {
            if (matcher.group(1) != null) {
                try {
                    String top250Text = matcher.group(1);
                    md.setTop250(Integer.parseInt(top250Text));
                } catch (Exception ignored) {
                }
            }
        }
    }

    // releasedate
    Element releaseDateElement = doc
            .getElementsByAttributeValue("href", "/title/" + options.getImdbId().toLowerCase() + "/releaseinfo")
            .first();
    if (releaseDateElement != null) {
        String releaseDateText = releaseDateElement.ownText();
        int startOfCountry = releaseDateText.indexOf("(");
        if (startOfCountry > 0) {
            releaseDateText = releaseDateText.substring(0, startOfCountry - 1).trim();
        }
        try {
            SimpleDateFormat sdf = new SimpleDateFormat("d MMMM yyyy", Locale.US);
            Date parsedDate = sdf.parse(releaseDateText);
            md.setReleaseDate(parsedDate);
        } catch (ParseException otherformat) {
            try {
                SimpleDateFormat sdf = new SimpleDateFormat("MMMM yyyy", Locale.US);
                Date parsedDate = sdf.parse(releaseDateText);
                md.setReleaseDate(parsedDate);
            } catch (ParseException ignored) {
            }
        }
    }

    Elements elements = doc.getElementsByClass("ipl-zebra-list__label");
    for (Element element : elements) {
        // only parse tds
        if (!"td".equals(element.tag().getName())) {
            continue;
        }

        String elementText = element.ownText();

        if (elementText.equals("Taglines")) {
            if (!ImdbMetadataProvider.providerInfo.getConfig().getValueAsBool("useTmdb")) {
                Element taglineElement = element.nextElementSibling();
                if (taglineElement != null) {
                    String tagline = cleanString(taglineElement.ownText().replaceAll("", ""));
                    md.setTagline(tagline);
                }
            }
        }

        if (elementText.equals("Genres")) {
            Element nextElement = element.nextElementSibling();
            if (nextElement != null) {
                Elements genreElements = nextElement.getElementsByAttributeValueStarting("href", "/genre/");

                for (Element genreElement : genreElements) {
                    String genreText = genreElement.ownText();
                    md.addGenre(getTmmGenre(genreText));
                }
            }
        }

        /*
         * Old HTML, but maybe the same content formart <div class="info"><h5>Runtime:</h5><div class="info-content">162 min | 171 min (special edition)
         * | 178 min (extended cut)</div></div>
         */
        if (elementText.equals("Runtime")) {
            Element nextElement = element.nextElementSibling();
            if (nextElement != null) {
                Element runtimeElement = nextElement.getElementsByClass("ipl-inline-list__item").first();
                if (runtimeElement != null) {
                    String first = runtimeElement.ownText().split("\\|")[0];
                    String runtimeAsString = cleanString(first.replaceAll("min", ""));
                    int runtime = 0;
                    try {
                        runtime = Integer.parseInt(runtimeAsString);
                    } catch (Exception e) {
                        // try to filter out the first number we find
                        Pattern runtimePattern = Pattern.compile("([0-9]{2,3})");
                        Matcher matcher = runtimePattern.matcher(runtimeAsString);
                        if (matcher.find()) {
                            runtime = Integer.parseInt(matcher.group(0));
                        }
                    }
                    md.setRuntime(runtime);
                }
            }
        }

        if (elementText.equals("Country")) {
            Element nextElement = element.nextElementSibling();
            if (nextElement != null) {
                Elements countryElements = nextElement.getElementsByAttributeValueStarting("href", "/country/");
                Pattern pattern = Pattern.compile("/country/(.*)");

                for (Element countryElement : countryElements) {
                    Matcher matcher = pattern.matcher(countryElement.attr("href"));
                    if (matcher.matches()) {
                        if (ImdbMetadataProvider.providerInfo.getConfig()
                                .getValueAsBool("scrapeLanguageNames")) {
                            md.addCountry(LanguageUtils.getLocalizedCountryForLanguage(
                                    options.getLanguage().getLanguage(), countryElement.text(),
                                    matcher.group(1)));
                        } else {
                            md.addCountry(matcher.group(1));
                        }
                    }
                }
            }
        }

        if (elementText.equals("Language")) {
            Element nextElement = element.nextElementSibling();
            if (nextElement != null) {
                Elements languageElements = nextElement.getElementsByAttributeValueStarting("href",
                        "/language/");
                Pattern pattern = Pattern.compile("/language/(.*)");

                for (Element languageElement : languageElements) {
                    Matcher matcher = pattern.matcher(languageElement.attr("href"));
                    if (matcher.matches()) {
                        if (ImdbMetadataProvider.providerInfo.getConfig()
                                .getValueAsBool("scrapeLanguageNames")) {
                            md.addSpokenLanguage(LanguageUtils.getLocalizedLanguageNameFromLocalizedString(
                                    options.getLanguage(), languageElement.text(), matcher.group(1)));
                        } else {
                            md.addSpokenLanguage(matcher.group(1));
                        }
                    }
                }
            }
        }

        if (elementText.equals("Certification")) {
            Element nextElement = element.nextElementSibling();
            if (nextElement != null) {
                String languageCode = options.getCountry().getAlpha2();
                Elements certificationElements = nextElement.getElementsByAttributeValueStarting("href",
                        "/search/title?certificates=" + languageCode);
                boolean done = false;
                for (Element certificationElement : certificationElements) {
                    String certText = certificationElement.ownText();
                    int startOfCert = certText.indexOf(":");
                    if (startOfCert > 0 && certText.length() > startOfCert + 1) {
                        certText = certText.substring(startOfCert + 1);
                    }

                    Certification certification = Certification.getCertification(options.getCountry(),
                            certText);
                    if (certification != null) {
                        md.addCertification(certification);
                        done = true;
                        break;
                    }
                }

                if (!done && languageCode.equals("DE")) {
                    certificationElements = nextElement.getElementsByAttributeValueStarting("href",
                            "/search/title?certificates=XWG");
                    for (Element certificationElement : certificationElements) {
                        String certText = certificationElement.ownText();
                        int startOfCert = certText.indexOf(":");
                        if (startOfCert > 0 && certText.length() > startOfCert + 1) {
                            certText = certText.substring(startOfCert + 1);
                        }

                        Certification certification = Certification.getCertification(options.getCountry(),
                                certText);
                        if (certification != null) {
                            md.addCertification(certification);
                            break;
                        }
                    }
                }

            }
        }
    }

    // director
    Element directorsElement = doc.getElementById("directors");
    while (directorsElement != null && directorsElement.tag().getName() != "header") {
        directorsElement = directorsElement.parent();
    }
    if (directorsElement != null) {
        directorsElement = directorsElement.nextElementSibling();
    }
    if (directorsElement != null) {
        for (Element directorElement : directorsElement.getElementsByClass("name")) {
            String director = directorElement.text().trim();

            MediaCastMember cm = new MediaCastMember(MediaCastMember.CastType.DIRECTOR);
            cm.setName(director);
            md.addCastMember(cm);
        }
    }

    // actors
    Element castTableElement = doc.getElementsByClass("cast_list").first();
    if (castTableElement != null) {
        Elements tr = castTableElement.getElementsByTag("tr");
        for (Element row : tr) {
            MediaCastMember cm = parseCastMember(row);
            if (cm != null && StringUtils.isNotEmpty(cm.getName())
                    && StringUtils.isNotEmpty(cm.getCharacter())) {
                cm.setType(MediaCastMember.CastType.ACTOR);
                md.addCastMember(cm);
            }
        }
    }

    // writers
    Element writersElement = doc.getElementById("writers");
    while (writersElement != null && writersElement.tag().getName() != "header") {
        writersElement = writersElement.parent();
    }
    if (writersElement != null) {
        writersElement = writersElement.nextElementSibling();
    }
    if (writersElement != null) {
        Elements writersElements = writersElement.getElementsByAttributeValueStarting("href", "/name/");

        for (Element writerElement : writersElements) {
            String writer = cleanString(writerElement.ownText());
            MediaCastMember cm = new MediaCastMember(MediaCastMember.CastType.WRITER);
            cm.setName(writer);
            md.addCastMember(cm);
        }
    }

    // producers
    Element producersElement = doc.getElementById("producers");
    while (producersElement != null && producersElement.tag().getName() != "header") {
        producersElement = producersElement.parent();
    }
    if (producersElement != null) {
        producersElement = producersElement.nextElementSibling();
    }
    if (producersElement != null) {
        Elements producersElements = producersElement.getElementsByAttributeValueStarting("href", "/name/");

        for (Element producerElement : producersElements) {
            String producer = cleanString(producerElement.ownText());
            MediaCastMember cm = new MediaCastMember(MediaCastMember.CastType.PRODUCER);
            cm.setName(producer);
            md.addCastMember(cm);
        }
    }

    // producers
    Elements prodCompHeaderElements = doc.getElementsByClass("ipl-list-title");
    Element prodCompHeaderElement = null;

    for (Element possibleProdCompHeaderEl : prodCompHeaderElements) {
        if (possibleProdCompHeaderEl.ownText().equals("Production Companies")) {
            prodCompHeaderElement = possibleProdCompHeaderEl;
            break;
        }
    }

    while (prodCompHeaderElement != null && prodCompHeaderElement.tag().getName() != "header") {
        prodCompHeaderElement = prodCompHeaderElement.parent();
    }
    if (prodCompHeaderElement != null) {
        prodCompHeaderElement = prodCompHeaderElement.nextElementSibling();
    }
    if (prodCompHeaderElement != null) {
        Elements prodCompElements = prodCompHeaderElement.getElementsByAttributeValueStarting("href",
                "/company/");

        for (Element prodCompElement : prodCompElements) {
            String prodComp = prodCompElement.ownText();
            md.addProductionCompany(prodComp);
        }
    }

    return md;
}

From source file:org.tinymediamanager.scraper.ofdb.OfdbMetadataProvider.java

private void parseCast(Elements el, MediaCastMember.CastType type, MediaMetadata md) {
    if (el != null && !el.isEmpty()) {
        Element castEl = null;
        for (Element element : el) {
            if (!element.tagName().equals("option")) { // we get more, just do not take the optionbox
                castEl = element;/*from ww w. j  a v a  2s  .  com*/
            }
        }
        if (castEl == null) {
            LOGGER.debug("meh, no " + type.name() + " found");
            return;
        }
        // walk up to table TR...
        while (!((castEl == null) || (castEl.tagName().equalsIgnoreCase("tr")))) {
            castEl = castEl.parent();
        }
        // ... and take the next table row ^^
        Element tr = castEl.nextElementSibling();

        if (tr != null) {
            for (Element a : tr.getElementsByAttributeValue("valign", "middle")) {
                String act = a.toString();
                String aname = StrgUtils.substr(act, "alt=\"(.*?)\"");
                if (!aname.isEmpty()) {
                    MediaCastMember cm = new MediaCastMember();
                    cm.setName(aname);
                    String id = StrgUtils.substr(act, "id=(.*?)[^\"]\">");
                    if (!id.isEmpty()) {
                        cm.setId(id);
                        // thumb
                        // http://www.ofdb.de/thumbnail.php?cover=images%2Fperson%2F7%2F7689.jpg&size=6
                        // fullsize ;) http://www.ofdb.de/images/person/7/7689.jpg
                        try {
                            String imgurl = URLDecoder
                                    .decode(StrgUtils.substr(act, "images%2Fperson%2F(.*?)&amp;size"), "UTF-8");
                            if (!imgurl.isEmpty()) {
                                imgurl = BASE_URL + "/images/person/" + imgurl;
                            }
                            cm.setImageUrl(imgurl);
                        } catch (Exception e) {
                        }
                    }
                    String arole = StrgUtils.substr(act, "\\.\\.\\. (.*?)</font>").replaceAll("<[^>]*>", "");
                    cm.setCharacter(arole);
                    cm.setType(type);
                    md.addCastMember(cm);
                }
            }
        }
    }
}

From source file:org.xwiki.validator.HTML5DutchWebGuidelinesValidator.java

/**
 * Use the p (paragraph) element to indicate paragraphs. Do not use the br (linebreak) element to separate
 * paragraphs.//from w w w  .  ja va2  s .co m
 */
public void validateRpd3s4() {
    Pattern pattern = Pattern.compile("<br(\\s)*(/)?>(\\s)*<br(\\s)*(/)?>");
    for (Element br : getElements(ELEM_BR)) {
        Matcher matcher = pattern.matcher(br.parent().html());
        assertFalse(Type.ERROR, "rpd3s4.linebreaks", matcher.find());
    }
}

From source file:solarrecorder.SolarRecorder.java

private void getSysData() throws IOException {
    org.jsoup.nodes.Document doc = Jsoup.connect("http://envoy").get();

    Elements allh2 = doc.getElementsByTag("h2");
    for (Element h2 : allh2) {
        if (h2.text().equals("System Statistics")) {
            Elements tables = h2.parent().getElementsByTag("table");
            Elements alltr = tables.first().getElementsByTag("tbody").first().getElementsByTag("tr");
            for (Element tr : alltr) {
                Elements alltd = tr.getElementsByTag("td");
                String name = alltd.first().text();
                String value = alltd.last().text();
                if (name.equals("Number of Microinverters Online")) {
                    envoyData.add(new EnvoyData(name, value));
                }//from  w  w  w.  jav a2s.  c  om
            }
        }
    }
}