List of usage examples for org.jsoup.nodes Element parent
@Override public final Element parent()
From source file:org.opens.tanaguru.rules.rgaa30.Rgaa30Rule110102.java
/** * This method linked each label which have an input child on a page to its * form in a map./* w w w.ja v a 2s. co m*/ */ private void putLabelElementHandlerIntoTheMap() { for (Element el : labelElementHandler.get()) { Element tmpElement = el.parent(); while (StringUtils.isNotBlank(tmpElement.tagName())) { if (tmpElement.tagName().equals(FORM_TAG)) { if (labelFormMap.containsKey(tmpElement)) { Elements els = el.select(FORM_ELEMENT_WITH_ID_CSS_LIKE_QUERY); if (!els.isEmpty()) { labelFormMap.get(tmpElement).add(el); } } else { Elements els = el.select(FORM_ELEMENT_WITH_ID_CSS_LIKE_QUERY); if (!els.isEmpty()) { ElementHandler<Element> labelElement = new ElementHandlerImpl(); labelElement.add(el); labelFormMap.put(tmpElement, labelElement); } } break; } tmpElement = tmpElement.parent(); } } }
From source file:org.opens.tanaguru.rules.rgaa30.Rgaa30Rule110103.java
/** * This method linked each input on a page to its form in a map. *//*w w w . j a v a 2s .c om*/ private void putInputElementHandlerIntoTheMap() { for (Element el : inputElementHandler.get()) { Element tmpElement = el.parent(); while (StringUtils.isNotBlank(tmpElement.tagName())) { if (tmpElement.tagName().equals(FORM_TAG)) { if (inputFormMap.containsKey(tmpElement)) { inputFormMap.get(tmpElement).add(el); } else { ElementHandler<Element> inputElement = new ElementHandlerImpl(); inputElement.add(el); inputFormMap.put(tmpElement, inputElement); } break; } tmpElement = tmpElement.parent(); } } }
From source file:org.silverpeas.mobile.server.servlets.PublicationContentServlet.java
private void displayWysiwyg(String html, HttpServletRequest request, HttpServletResponse response, String instanceId) throws IOException { html = "<html><body>" + html + "</body></html>"; Document doc = Jsoup.parse(html); Elements body = doc.getElementsByTag("body"); if (!body.isEmpty()) { html = body.first().html();//from w w w. ja v a 2 s.c om } Elements images = doc.getElementsByTag("img"); for (Element img : images) { String source = img.attr("src"); String newSource = source; if (source.contains("/silverpeas")) { // need to convert in dataurl newSource = convertSpImageUrlToDataUrl(source); } img.attr("src", newSource); } Elements embeds = doc.getElementsByTag("embed"); for (Element embed : embeds) { String htmlPart = embed.outerHtml(); if (htmlPart.contains("flash")) { String attachmentId = htmlPart .substring(htmlPart.indexOf("attachmentId/") + "attachmentId/".length()); attachmentId = attachmentId.substring(0, attachmentId.indexOf("/")); SimpleDocument attachment = AttachmentServiceProvider.getAttachmentService().searchDocumentById( new SimpleDocumentPK(attachmentId), getUserInSession(request).getUserPreferences().getLanguage()); String type = attachment.getContentType(); String url = getServletContext().getContextPath() + "/services/spmobile/Attachment"; url = url + "?id=" + attachmentId + "&instanceId=" + instanceId + "&lang=" + getUserInSession(request).getUserPreferences().getLanguage() + "&userId=" + getUserInSession(request).getId(); if (type.equals("audio/mpeg") || type.equals("audio/ogg") || type.equals("audio/wav")) { embed.parent().append("<audio controls><source src='" + url + "' type='" + type + "'></audio>"); embed.remove(); } else if (type.equals("video/mp4") || type.equals("video/ogg") || type.equals("video/webm")) { embed.parent() .append("<video controls='controls'><source src='" + url + "' type='" + type + "' />"); embed.remove(); } } } html = doc.outerHtml(); OutputStreamWriter out = new OutputStreamWriter(response.getOutputStream(), "UTF-8"); writeContainer(out, html); out.flush(); }
From source file:org.silverpeas.mobile.server.servlets.PublicationContentServlet.java
private void displayFormView(Writer out, PublicationDetail pub, UserDetail user, String ua) throws Exception { PublicationTemplate pubTemplate = PublicationTemplateManager.getInstance() .getPublicationTemplate(pub.getInstanceId() + ":" + pub.getInfoId()); DataRecord xmlData = pubTemplate.getRecordSet().getRecord(pub.getId()); PagesContext xmlContext = new PagesContext("myForm", "0", user.getUserPreferences().getLanguage(), false, pub.getInstanceId(), "useless"); xmlContext.setObjectId(pub.getId()); xmlContext.setDesignMode(false);//from ww w. j av a2s. c o m xmlContext.setBorderPrinted(false); xmlContext.setContentLanguage(user.getUserPreferences().getLanguage()); xmlContext.setCreation(false); StringWriter generatedHtml = new StringWriter(); PrintWriter outTmp = new PrintWriter(generatedHtml); Form xmlForm = pubTemplate.getViewForm(); if (xmlForm instanceof XmlForm) { Method m = XmlForm.class.getDeclaredMethod("display", new Class[] { PrintWriter.class, PagesContext.class, DataRecord.class }); m.setAccessible(true); m.invoke(xmlForm, outTmp, xmlContext, xmlData); outTmp.flush(); } else if (xmlForm instanceof HtmlForm) { String html = ((HtmlForm) xmlForm).toString(xmlContext, xmlData); outTmp.write(html); outTmp.flush(); } String html = generatedHtml.toString(); Document doc = Jsoup.parse(html); Elements images = doc.getElementsByTag("img"); for (Element img : images) { if (img.attr("class").equals("preview-file")) { // remove preview for files img.remove(); } else if (img.attr("src").startsWith("/silverpeas/attached_file/componentId/")) { // convert url to dataurl String data = img.attr("src"); data = convertImageAttachmentUrl(data, data); img.attr("src", data); } } Elements links = doc.getElementsByTag("a"); for (Element link : links) { if (link.attr("href").startsWith("/silverpeas/attached_file/componentId/")) { // link to file String url = link.attr("href"); String attachmentId = url.substring(url.indexOf("attachmentId/") + "attachmentId/".length()); attachmentId = attachmentId.substring(0, attachmentId.indexOf("/")); url = getServletContext().getContextPath() + "/services/spmobile/Attachment"; url = url + "?id=" + attachmentId + "&instanceId=" + pub.getInstanceId() + "&lang=" + user.getUserPreferences().getLanguage() + "&userId=" + user.getId(); link.attr("href", url); link.attr("target", "_self"); if (link.attr("id").startsWith("player")) { boolean playable = false; SimpleDocument attachment = AttachmentServiceProvider.getAttachmentService().searchDocumentById( new SimpleDocumentPK(attachmentId), user.getUserPreferences().getLanguage()); String type = attachment.getContentType(); if (type.contains("mp4") || type.contains("ogg") || type.contains("webm")) { playable = true; } if (playable) { String style = link.attr("style"); String width = style.substring(style.indexOf("width") + "width".length() + 1); width = width.substring(0, width.indexOf("px")); String height = style.substring(style.indexOf("height") + "height".length() + 1); height = height.substring(0, height.indexOf("px")); link.parent().append("<video width='" + width + "' height='" + height + "' controls='controls'><source src='" + url + "' type='" + type + "' />"); link.remove(); } else { // display image instead of video player String style = "display:block; width:150px; height:98px; background-repeat: no-repeat; "; style += "background-image: url(data:image/jpeg;base64," + "/9j/4AAQSkZJRgABAQEBLAEsAAD" + "/4QYfRXhpZgAATU0AKgAAAAgAAAAAAA4AAgIBAAQAAAABAAAALAICAAQAAAABAAAF6wAAAAD/2P" + "/gABBKRklGAAEBAAABAAEAAP/bAEMACAYGBwYFCAcHBwkJCAoMFA0MCwsMGRITDxQdGh8eHRocHCAkLicgIiwjHBwoNyksMDE0NDQfJzk9ODI8LjM0Mv/AAAsIAEAAYgEBEQD/xAAfAAABBQEBAQEBAQAAAAAAAAAAAQIDBAUGBwgJCgv/xAC1EAACAQMDAgQDBQUEBAAAAX0BAgMABBEFEiExQQYTUWEHInEUMoGRoQgjQrHBFVLR8CQzYnKCCQoWFxgZGiUmJygpKjQ1Njc4OTpDREVGR0hJSlNUVVZXWFlaY2RlZmdoaWpzdHV2d3h5eoOEhYaHiImKkpOUlZaXmJmaoqOkpaanqKmqsrO0tba3uLm6wsPExcbHyMnK0tPU1dbX2Nna4eLj5OXm5+jp6vHy8/T19vf4+fr/2gAIAQEAAD8A98orN1zXdO8OaVLqWqXS29tEOWPUnsAO5PoK8X1H9ouR7t00Xw600APEk8pDMP8AdUcfma3PCXx60nWr6Ow1qzbSp5CFWUvviLehOAV/Hj3r1me4htraS4mlSOGNS7yMcKqjnJPpXjPiL9oSxtL17XQNKfUQpI8+VyiN/uqASR+VQaJ+0TBJeLBr+iSWcZODPA5fb9UIB/In6V7TYX1tqVlDeWc6T20yho5EOQw9qs0UUUUV89/HO6udZ8f6D4WExitWWN+vG+Ryu78AP1Ne2eHvDGkeGNLisdLs4oY0UAuFG9z6sepNec/HHwVpVz4RuPEMFtFb6jZsrGSNQvmqWCkN6nnIPtXB6/4z1K7+Aei2ryuXuLp7SaXJy8cfIBP4r/3zXrnws8F6V4e8IabeR20cmoXlvHcTXLAFsuobaD2AzjitLx34K0nxb4fuoru2iF0kbNBchQHjYDI59OORXnf7OWs3NxpesaPM7NBaPHLDk527924fmoP4mvcKKKKKK8e+N3gXUNZis/EmixvLf2C7JI4/vtGCWDL6lSTx71m+Hf2hbKLT0tvEem3S3sQ2vLbKCrkdyCQVP51zfjH4iat8VpofDPhnS7hLSSQNIH5eTB4LY4VR1/AV6Br3wn+0fCO18OWbq+o2H+kxv0EsvJYfQ5IH0FcZ4K+MVz4KsV8N+K9Muz9i/dRugxLGo6KysRkDsc9KseMfjmdf06TRvCmnXgmvFMRmlUb8HghFUnk9M/pXefB3wNP4N8NSyagmzUr9lkmTr5ajO1frySfrXo1FFFFFcl8RPGsfgXwwdTNv9omklEEEZOAXIJyT6YBr5y1T4qPrFyZ7/wAJ+HZ5ScmRrZtx+pDc1e0v43arosBh0zQNCs4z1EFuy5+uG5q//wANE+Kv+fDTP+/b/wDxVZWrfGS+10D+1fDegXhHAaa2YsPod2abpPxeutDfdpfhjw/aueN8Vswb/vrdmve/hh4//wCE90S4nmtVtry0kCTIhJU5BIYZ+h49qf4n+KfhnwprEWl31xJJdMR5iQJv8oHpu9Pp1rtI5FljWRTlWGR9KdRRRXj37Rn/ACIun/8AYRX/ANFyV8yUUUUV23hD4iXngzw3q9jpkeL7UHj23B5EKqGBIHduePSuRE0lxfiaaRpJZJNzuxyWJPJJr7tsv+PGD/rmv8qnooorx79oz/kRdP8A+wkv/ot6+ZKKKKKKkt/+PiL/AHh/OvvGy/48YP8Armv8qnooorx/9osf8ULp/wD2El/9FyV8xmiiiivSvh58O4vHnhbXWhk8rVLSSI2zsflbIbKt9cDntXCXOm3elaw1jfQPBcwy7JI3GCpBr7msv+PG3/65r/Kp6KKKwPGHhLT/ABpoL6TqO9ULB45Iz80bjOGH5n868nP7Ndpk48STY/69R/8AFUf8M12n/QyTf+Ao/wDiqP8Ahmu0/wChkm/8BR/8VR/wzXaf9DJN/wCAo/8AiqP+Ga7T/oZJv/AUf/FV6f4H8Dab4E0d7GwZ5XlfzJp5PvSN0HA6ADtV3UvCHh7WNUg1PUNJtri8gxsldMnjpn1x71tgADAGBRRX/9n/2wBDAAMCAgMCAgMDAwMEAwMEBQgFBQQEBQoHBwYIDAoMDAsKCwsNDhIQDQ4RDgsLEBYQERMUFRUVDA8XGBYUGBIUFRT/wgALCABiAJYBAREA/8QAHQABAAICAwEBAAAAAAAAAAAAAAcIAgYEBQkDAf/aAAgBAQAAAAG0o4cWdnJf71sXbBIOQB1VCZO6Dl2uolLekbxbQAgPVbTKAyDu9hvzzq9DeSAi6sV3+iorZmJrm6bSn0PyAK6V+59q5RrDB/eWzkAAUe0sDfrrxVt2zgw8t/kBz7fU2u/NoMPLf5AZMbyTYDDy3+QAvJNgMPLf5AcnYdVvJNgMfLrjAdn6GUKuNMAEL6mBukwfL6gAAB//xAAoEAABAwQBBAIBBQAAAAAAAAAFAwQGAAECByAWFzAyEzYQERIxNED/2gAIAQEAAQUC4OXaLJB1tWPNsxmxgBTO1/1tT4i2GIONsR5DMTPgZlTxEyKIlgcPE50XHaWeLIyDU5MQhrieLCXkgNox4S7elp6ZZ6ScqJSXWJKPIayni2Lrw7kdZJRnSo1G+P42QOSGS7ZJRZ1E9MjUkgdZWte0qaYg5Y2U+Zv4NjAszsX11MMIsRZv25FE9J2EcbOVnc4lGy4vdeJ6ymyUeVQcpOkpNMR8YbCmTmaynHH9uPhmWqkyiysMkY1QdrmQFlodBWkTTva17SzUnzrZxCRsMxeszxRSJQ9nE2nik2zipB/1yfrrk/XXJ+uuT9dcn665P11yfqJbKKNClSjYY6MLx6QN5KN5ZeufvzY/3dgbIxD2UUyWz1D9Q5Zeufvzte+N73vf8ah+ocsvXP38WofqHLL1z9+aCfzLSGOu40RrUP1Dlf8AhbG6avMbhkqQksaaSgcfj7qNkNWM12UT5yLVww887Js67Js67Js67Js67Js67Js67Js6jeshsed1mlgr/o//xABAEAACAQICAg0JBwQDAAAAAAABAgMABBESITEFEBMiMDI1QUJRYXGSFCBSc5GTocHRFSQzYnKx8AYjQFOBguH/2gAIAQEABj8C8xpriVIIl1vIcAKyi5eftiiOHxoIl8IZDqWdcnx1VjzbRmup47eIdKQ4VlWeWb80cRw+NCOC+VZTqjmBQ/Hg57y4bLDCuZqVcHkLthBax6l/nXQe8v47Zz0I03TD/nGnntpF2QiXSwjXB/DUWx19LnsJDkQufwT9Knvp9KxjQo1seYUBg1zcOd5EnEjHyFY3WyccUnoxxZx7cRTXCMt9bLxmiGDL2lai2I2QlMkUm9t5X1q3o93BQRLoEtwA3cAT9K2QvmAMwIhX8o1n+dm3eJCMkb5Zco5sRpr+mcxP3iLdpO1gi/U1c3uAM8spTN1KMNHx2tOmr6K13ghmzR4dHnFRSaiyg8DMkS5p4G3dB14Y4j2E1Ilzj5DcYCQjoEamoS206XEZ6cbYimlvJ1U9GIHF27hTMq/eLuTBV9Ec3sFWvkyljsaBgOfc8MD+wPtqWxvWyWUzZxJ/rf6fShLBIs0Z1NGcQaZriVXuMN5bq2/Y/LvpQ++e5lMkzDorjvqwGrgpLzYtlguX0vA/EY9nUaI+zrsHrgGYe1a02b24OuW6OX/2jIG8ovnGDzkauxeqsDqp7nYZkTHSbV9A/wCpor9nXqHn3JSR8KG6W3kcZ1yXJw+GuikH924f8W4YaW7OwcHILG5aztFYiMRaCw6ya5WuvHXK11465WuvHXK11465WuvHXK11465WuvHUEV/cNd2kjBG3TjL2g7Qt3z3Nz0o4cN530l7ah1jJylZBpB8803fwEH61/en2P2McSX2qSUaRF2fq/amd2LuxxLMcSaX17/Lzz3Ue/gMQcCK0nHaX17/Lzz3Ue/g19e/y8891Hv4BE1ZiFxp7S6TfDSrjiuvWNpfXv8uAdWGDAkEHm4C2RRmZpFAA76a2uRlYaY5RrjP85qa1vI8rDiuOK46xUQniaIvIzqG9E8/ANdq8tlPIcX3PDKx68OuuU5/diuU5/diuU5/diuU5/diuU5/diuU5/diuU5/dilu80l3cJxDLhlTtA2hnRXw0jMP8j//EACkQAQABAgQFBQADAQAAAAAAAAERACExQVGhMGFxgcEQkbHw8SBA0eH/2gAIAQEAAT8h/gaMJJ65aQQ5hV90J7U7aYSl3eVAARVxMPTGLtFf9eVIIG38EUizoQToQBeQtTwpnFsYvI5tqGV8eQythOvVgVdRQs7kF+k9aLL8wM1xT2V5UgkaWK4Q+1MscmpJLprmw9Vqx5C25PIDFe7NEIvmcHd8FFc+XAmOg5i84pi4DU5RuawMxt04LavP4HPc9lM0dXFDet7vTGj7wTDcwjrL3qVZAjkA+2dSJrYXCjuU+3orABIRp09vYjpO1KwSedQeDFroZeIfQTFTExBy5KzLoxk5xCV65BbVBERI3J+TDmVPHGe5hjoN3k0AWlm4HvkjkUh5lCtAEvMBOkKJTci9MlSnchnJEZNVvhWj06LpcoLHagIwBAacJi+NsdirO9nlV9gwubuNBJywDvd2GsOBUoZlk3dqRBKIRMaVmWsRu1OjY1oksYkXe3enxWZZ+b2qaCHYG0fB78KdcCg98VdLRJuZV+9r97X6Wv3tfpa/W1+tqFk8hFYzlpLOPpnXvDOU1IXTHtikAIAHEjE66/z278VvHAcXY/yVckjtTa/TSlpwlA1XgTb58VvXAMIRImJSqom6vBm3z4rev6U2+fFb1wLv78IUh4sJyTfYbcGYyzUincGghUonAQFioFhV9IA+tPNJZgzGr8x2zvQpH6hYIh24C+wR0rGTcMV9x819x819x819x819x819x819x81LC0mCZAzNV9MYAsDDrWP9f//aAAgBAQAAABD8n/2U/wDyv/jH/uJ/4A//AP7/AL+X9/7+/wDf3+f8AP8Af/8A/8QAKBABAAIBAwMDBAMBAAAAAAAAAREhADFBYTBRcYGR8BAgocFAULHx/9oACAEBAAE/EPsQZM4/dUDxOO04Bp4IYQnEC1xJ7sAHSQtzMpHj6D+Gvm7E+wJwytc9+JPOWr4DzmYMlMkzBMHsXJ6nSchaXAKLdEHLi1Qt26BKRJXtJKAAaWmxxgfCDCFvdmMJBzDhMkIgeoNREEgwA3V5EFhXuQnYl2yaqUpIzBX/AFSTCClI3ezO4U9fU1QTXthgloQKzfy2JZQBhOl1+pvDo9yZDvkGsBClU3Jhr8mNxNxZOICEk5yNzIAHmFBMDYGExTJbxX5/dlUU3J0HUlBvHYfRHpiJEdRO2BxOJ71kePjwxUqD7LB6T0VXX3gAcpDhiXWmTB2SIbaBsMJC1ETypj2ybGUw8sXyeWEAO0Q5MeVTUTXNbmzloALE73AY05c5CSOtsFqDVz3NNiIx9HAWJN+gBU9hBsLE5woWZa7KTk74VtQSABABsRXRgNt5xXmVTJRiwqjcya8iaMGB4VzQKH58xLMEhiQit4lFmYJWAISLoAOybl5YWYJMstHYXYGIvWB69d4d3VDSdUkt5ws0MCCZC2Gw72lyeigBpqOBGfMFFaRuggTpnw79ZB878ZP8D8Z8O/WfEP1nyT9ZdPzPGBSPwdgNJozAE1v6CZSm0pKPVEuBlsY2IVuWWKhZ95OAbNN9BdRRc5J3o0JGsFd7bdeGjYSTZUKquq4JRtLf4EionmLogyImjjlCUFVdV/pVwKrgVOD2QSEAx64IcV9IY3oWk1sWdFfx0sWuMwJAdxOgJ8H1VEGTG1LCaJ9R0kh01hy2yMwfSnxKVUMb5SjLMsHRNu/QlEFAlKHSWwLt0O3bt27du2tci4ZBW2iE2jIO3OHkiM4WSU846pue/Qgutdeeh/37/wD/2Q==);"; link.attr("style", style); } } } } // remove all scripts Elements scripts = doc.getElementsByTag("script"); for (Element script : scripts) { script.remove(); } html = doc.outerHtml(); writeContainer(out, html); out.flush(); }
From source file:org.tinymediamanager.scraper.imdb.ImdbParser.java
protected MediaMetadata parseReferencePage(Document doc, MediaScrapeOptions options, MediaMetadata md) { /*/*from ww w . ja v a 2s .c o m*/ * title and year have the following structure * * <div id="tn15title"><h1>Merida - Legende der Highlands <span>(<a href="/year/2012/">2012</a>) <span class="pro-link">...</span> <span * class="title-extra">Brave <i>(original title)</i></span> </span></h1> </div> */ // title Element title = doc.getElementsByAttributeValue("name", "title").first(); if (title != null) { String movieTitle = cleanString(title.attr("content")); int yearStart = movieTitle.lastIndexOf("("); if (yearStart > 0) { movieTitle = movieTitle.substring(0, yearStart - 1).trim(); md.setTitle(movieTitle); } } // original title and year Element originalTitleYear = doc.getElementsByAttributeValue("property", "og:title").first(); if (originalTitleYear != null) { String content = originalTitleYear.attr("content"); int startOfYear = content.lastIndexOf("("); if (startOfYear > 0) { // noo - this is NOT the original title!!! (seems always english?) parse from AKAs page... // String originalTitle = content.substring(0, startOfYear - 1).trim(); // md.setOriginalTitle(originalTitle); String yearText = content.substring(startOfYear); // search year Pattern yearPattern = Pattern.compile("[1-2][0-9]{3}"); Matcher matcher = yearPattern.matcher(yearText); while (matcher.find()) { if (matcher.group(0) != null) { String movieYear = matcher.group(0); try { md.setYear(Integer.parseInt(movieYear)); break; } catch (Exception ignored) { } } } } } // poster Element poster = doc.getElementsByAttributeValue("property", "og:image").first(); if (poster != null) { String posterUrl = poster.attr("content"); int fileStart = posterUrl.lastIndexOf("/"); if (fileStart > 0) { int parameterStart = posterUrl.indexOf("_", fileStart); if (parameterStart > 0) { int startOfExtension = posterUrl.lastIndexOf("."); if (startOfExtension > parameterStart) { posterUrl = posterUrl.substring(0, parameterStart) + posterUrl.substring(startOfExtension); } } } processMediaArt(md, MediaArtwork.MediaArtworkType.POSTER, posterUrl); } /* * <div class="starbar-meta"> <b>7.4/10</b> <a href="ratings" class="tn15more">52,871 votes</a> » </div> */ // rating and rating count Element ratingElement = doc.getElementsByClass("ipl-rating-star__rating").first(); if (ratingElement != null) { String ratingAsString = ratingElement.ownText().replace(",", "."); try { md.setRating(Float.valueOf(ratingAsString)); } catch (Exception ignored) { } Element votesElement = doc.getElementsByClass("ipl-rating-star__total-votes").first(); if (votesElement != null) { String countAsString = votesElement.ownText().replaceAll("[.,()\\u00a0]", "").trim(); try { md.setVoteCount(Integer.parseInt(countAsString)); } catch (Exception ignored) { } } } // top250 Element topRatedElement = doc.getElementsByAttributeValue("href", "/chart/top").first(); if (topRatedElement != null) { Pattern topPattern = Pattern.compile("Top Rated Movies: #([0-9]{1,3})"); Matcher matcher = topPattern.matcher(topRatedElement.ownText()); while (matcher.find()) { if (matcher.group(1) != null) { try { String top250Text = matcher.group(1); md.setTop250(Integer.parseInt(top250Text)); } catch (Exception ignored) { } } } } // releasedate Element releaseDateElement = doc .getElementsByAttributeValue("href", "/title/" + options.getImdbId().toLowerCase() + "/releaseinfo") .first(); if (releaseDateElement != null) { String releaseDateText = releaseDateElement.ownText(); int startOfCountry = releaseDateText.indexOf("("); if (startOfCountry > 0) { releaseDateText = releaseDateText.substring(0, startOfCountry - 1).trim(); } try { SimpleDateFormat sdf = new SimpleDateFormat("d MMMM yyyy", Locale.US); Date parsedDate = sdf.parse(releaseDateText); md.setReleaseDate(parsedDate); } catch (ParseException otherformat) { try { SimpleDateFormat sdf = new SimpleDateFormat("MMMM yyyy", Locale.US); Date parsedDate = sdf.parse(releaseDateText); md.setReleaseDate(parsedDate); } catch (ParseException ignored) { } } } Elements elements = doc.getElementsByClass("ipl-zebra-list__label"); for (Element element : elements) { // only parse tds if (!"td".equals(element.tag().getName())) { continue; } String elementText = element.ownText(); if (elementText.equals("Taglines")) { if (!ImdbMetadataProvider.providerInfo.getConfig().getValueAsBool("useTmdb")) { Element taglineElement = element.nextElementSibling(); if (taglineElement != null) { String tagline = cleanString(taglineElement.ownText().replaceAll("", "")); md.setTagline(tagline); } } } if (elementText.equals("Genres")) { Element nextElement = element.nextElementSibling(); if (nextElement != null) { Elements genreElements = nextElement.getElementsByAttributeValueStarting("href", "/genre/"); for (Element genreElement : genreElements) { String genreText = genreElement.ownText(); md.addGenre(getTmmGenre(genreText)); } } } /* * Old HTML, but maybe the same content formart <div class="info"><h5>Runtime:</h5><div class="info-content">162 min | 171 min (special edition) * | 178 min (extended cut)</div></div> */ if (elementText.equals("Runtime")) { Element nextElement = element.nextElementSibling(); if (nextElement != null) { Element runtimeElement = nextElement.getElementsByClass("ipl-inline-list__item").first(); if (runtimeElement != null) { String first = runtimeElement.ownText().split("\\|")[0]; String runtimeAsString = cleanString(first.replaceAll("min", "")); int runtime = 0; try { runtime = Integer.parseInt(runtimeAsString); } catch (Exception e) { // try to filter out the first number we find Pattern runtimePattern = Pattern.compile("([0-9]{2,3})"); Matcher matcher = runtimePattern.matcher(runtimeAsString); if (matcher.find()) { runtime = Integer.parseInt(matcher.group(0)); } } md.setRuntime(runtime); } } } if (elementText.equals("Country")) { Element nextElement = element.nextElementSibling(); if (nextElement != null) { Elements countryElements = nextElement.getElementsByAttributeValueStarting("href", "/country/"); Pattern pattern = Pattern.compile("/country/(.*)"); for (Element countryElement : countryElements) { Matcher matcher = pattern.matcher(countryElement.attr("href")); if (matcher.matches()) { if (ImdbMetadataProvider.providerInfo.getConfig() .getValueAsBool("scrapeLanguageNames")) { md.addCountry(LanguageUtils.getLocalizedCountryForLanguage( options.getLanguage().getLanguage(), countryElement.text(), matcher.group(1))); } else { md.addCountry(matcher.group(1)); } } } } } if (elementText.equals("Language")) { Element nextElement = element.nextElementSibling(); if (nextElement != null) { Elements languageElements = nextElement.getElementsByAttributeValueStarting("href", "/language/"); Pattern pattern = Pattern.compile("/language/(.*)"); for (Element languageElement : languageElements) { Matcher matcher = pattern.matcher(languageElement.attr("href")); if (matcher.matches()) { if (ImdbMetadataProvider.providerInfo.getConfig() .getValueAsBool("scrapeLanguageNames")) { md.addSpokenLanguage(LanguageUtils.getLocalizedLanguageNameFromLocalizedString( options.getLanguage(), languageElement.text(), matcher.group(1))); } else { md.addSpokenLanguage(matcher.group(1)); } } } } } if (elementText.equals("Certification")) { Element nextElement = element.nextElementSibling(); if (nextElement != null) { String languageCode = options.getCountry().getAlpha2(); Elements certificationElements = nextElement.getElementsByAttributeValueStarting("href", "/search/title?certificates=" + languageCode); boolean done = false; for (Element certificationElement : certificationElements) { String certText = certificationElement.ownText(); int startOfCert = certText.indexOf(":"); if (startOfCert > 0 && certText.length() > startOfCert + 1) { certText = certText.substring(startOfCert + 1); } Certification certification = Certification.getCertification(options.getCountry(), certText); if (certification != null) { md.addCertification(certification); done = true; break; } } if (!done && languageCode.equals("DE")) { certificationElements = nextElement.getElementsByAttributeValueStarting("href", "/search/title?certificates=XWG"); for (Element certificationElement : certificationElements) { String certText = certificationElement.ownText(); int startOfCert = certText.indexOf(":"); if (startOfCert > 0 && certText.length() > startOfCert + 1) { certText = certText.substring(startOfCert + 1); } Certification certification = Certification.getCertification(options.getCountry(), certText); if (certification != null) { md.addCertification(certification); break; } } } } } } // director Element directorsElement = doc.getElementById("directors"); while (directorsElement != null && directorsElement.tag().getName() != "header") { directorsElement = directorsElement.parent(); } if (directorsElement != null) { directorsElement = directorsElement.nextElementSibling(); } if (directorsElement != null) { for (Element directorElement : directorsElement.getElementsByClass("name")) { String director = directorElement.text().trim(); MediaCastMember cm = new MediaCastMember(MediaCastMember.CastType.DIRECTOR); cm.setName(director); md.addCastMember(cm); } } // actors Element castTableElement = doc.getElementsByClass("cast_list").first(); if (castTableElement != null) { Elements tr = castTableElement.getElementsByTag("tr"); for (Element row : tr) { MediaCastMember cm = parseCastMember(row); if (cm != null && StringUtils.isNotEmpty(cm.getName()) && StringUtils.isNotEmpty(cm.getCharacter())) { cm.setType(MediaCastMember.CastType.ACTOR); md.addCastMember(cm); } } } // writers Element writersElement = doc.getElementById("writers"); while (writersElement != null && writersElement.tag().getName() != "header") { writersElement = writersElement.parent(); } if (writersElement != null) { writersElement = writersElement.nextElementSibling(); } if (writersElement != null) { Elements writersElements = writersElement.getElementsByAttributeValueStarting("href", "/name/"); for (Element writerElement : writersElements) { String writer = cleanString(writerElement.ownText()); MediaCastMember cm = new MediaCastMember(MediaCastMember.CastType.WRITER); cm.setName(writer); md.addCastMember(cm); } } // producers Element producersElement = doc.getElementById("producers"); while (producersElement != null && producersElement.tag().getName() != "header") { producersElement = producersElement.parent(); } if (producersElement != null) { producersElement = producersElement.nextElementSibling(); } if (producersElement != null) { Elements producersElements = producersElement.getElementsByAttributeValueStarting("href", "/name/"); for (Element producerElement : producersElements) { String producer = cleanString(producerElement.ownText()); MediaCastMember cm = new MediaCastMember(MediaCastMember.CastType.PRODUCER); cm.setName(producer); md.addCastMember(cm); } } // producers Elements prodCompHeaderElements = doc.getElementsByClass("ipl-list-title"); Element prodCompHeaderElement = null; for (Element possibleProdCompHeaderEl : prodCompHeaderElements) { if (possibleProdCompHeaderEl.ownText().equals("Production Companies")) { prodCompHeaderElement = possibleProdCompHeaderEl; break; } } while (prodCompHeaderElement != null && prodCompHeaderElement.tag().getName() != "header") { prodCompHeaderElement = prodCompHeaderElement.parent(); } if (prodCompHeaderElement != null) { prodCompHeaderElement = prodCompHeaderElement.nextElementSibling(); } if (prodCompHeaderElement != null) { Elements prodCompElements = prodCompHeaderElement.getElementsByAttributeValueStarting("href", "/company/"); for (Element prodCompElement : prodCompElements) { String prodComp = prodCompElement.ownText(); md.addProductionCompany(prodComp); } } return md; }
From source file:org.tinymediamanager.scraper.ofdb.OfdbMetadataProvider.java
private void parseCast(Elements el, MediaCastMember.CastType type, MediaMetadata md) { if (el != null && !el.isEmpty()) { Element castEl = null; for (Element element : el) { if (!element.tagName().equals("option")) { // we get more, just do not take the optionbox castEl = element;/*from ww w. j a v a 2s . com*/ } } if (castEl == null) { LOGGER.debug("meh, no " + type.name() + " found"); return; } // walk up to table TR... while (!((castEl == null) || (castEl.tagName().equalsIgnoreCase("tr")))) { castEl = castEl.parent(); } // ... and take the next table row ^^ Element tr = castEl.nextElementSibling(); if (tr != null) { for (Element a : tr.getElementsByAttributeValue("valign", "middle")) { String act = a.toString(); String aname = StrgUtils.substr(act, "alt=\"(.*?)\""); if (!aname.isEmpty()) { MediaCastMember cm = new MediaCastMember(); cm.setName(aname); String id = StrgUtils.substr(act, "id=(.*?)[^\"]\">"); if (!id.isEmpty()) { cm.setId(id); // thumb // http://www.ofdb.de/thumbnail.php?cover=images%2Fperson%2F7%2F7689.jpg&size=6 // fullsize ;) http://www.ofdb.de/images/person/7/7689.jpg try { String imgurl = URLDecoder .decode(StrgUtils.substr(act, "images%2Fperson%2F(.*?)&size"), "UTF-8"); if (!imgurl.isEmpty()) { imgurl = BASE_URL + "/images/person/" + imgurl; } cm.setImageUrl(imgurl); } catch (Exception e) { } } String arole = StrgUtils.substr(act, "\\.\\.\\. (.*?)</font>").replaceAll("<[^>]*>", ""); cm.setCharacter(arole); cm.setType(type); md.addCastMember(cm); } } } } }
From source file:org.xwiki.validator.HTML5DutchWebGuidelinesValidator.java
/** * Use the p (paragraph) element to indicate paragraphs. Do not use the br (linebreak) element to separate * paragraphs.//from w w w . ja va2 s .co m */ public void validateRpd3s4() { Pattern pattern = Pattern.compile("<br(\\s)*(/)?>(\\s)*<br(\\s)*(/)?>"); for (Element br : getElements(ELEM_BR)) { Matcher matcher = pattern.matcher(br.parent().html()); assertFalse(Type.ERROR, "rpd3s4.linebreaks", matcher.find()); } }
From source file:solarrecorder.SolarRecorder.java
private void getSysData() throws IOException { org.jsoup.nodes.Document doc = Jsoup.connect("http://envoy").get(); Elements allh2 = doc.getElementsByTag("h2"); for (Element h2 : allh2) { if (h2.text().equals("System Statistics")) { Elements tables = h2.parent().getElementsByTag("table"); Elements alltr = tables.first().getElementsByTag("tbody").first().getElementsByTag("tr"); for (Element tr : alltr) { Elements alltd = tr.getElementsByTag("td"); String name = alltd.first().text(); String value = alltd.last().text(); if (name.equals("Number of Microinverters Online")) { envoyData.add(new EnvoyData(name, value)); }//from w w w. jav a2s. c om } } } }