List of usage examples for org.jsoup.safety Whitelist none
public static Whitelist none()
From source file:com.geecko.QuickLyric.lyrics.Genius.java
public static Lyrics fromURL(String url, String artist, String title) { Document lyricsPage;// www. j av a 2s . co m String text; try { lyricsPage = Jsoup.connect(url).get(); Elements lyricsDiv = lyricsPage.select("div.lyrics"); if (lyricsDiv.isEmpty()) throw new StringIndexOutOfBoundsException(); else text = Jsoup.clean(lyricsDiv.html(), Whitelist.none().addTags("br")).trim(); } catch (HttpStatusException e) { return new Lyrics(Lyrics.NO_RESULT); } catch (IOException | StringIndexOutOfBoundsException e) { e.printStackTrace(); return new Lyrics(Lyrics.ERROR); } if (artist == null) { title = lyricsPage.getElementsByClass("text_title").get(0).text(); artist = lyricsPage.getElementsByClass("text_artist").get(0).text(); } Lyrics result = new Lyrics(Lyrics.POSITIVE_RESULT); if ("[Instrumental]".equals(text)) result = new Lyrics(Lyrics.NEGATIVE_RESULT); result.setArtist(artist); result.setTitle(title); result.setText(text); result.setURL(url); result.setSource("Genius"); return result; }
From source file:com.elevenpaths.googleindexretriever.GoogleSearch.java
/** * Strips any potential XSS threats out of the value * @param value// w w w .jav a2s . c om * @return */ public String stripXSS(String value) { if (value == null) return null; // Avoid null characters value = value.replaceAll("\0", ""); // Clean out HTML value = Jsoup.clean(value, Whitelist.none()); return value; }
From source file:ch.dbs.actions.bestellung.EZBVascoda.java
/** * This class uses the EZB API from/* w w w. ja va 2 s .c o m*/ * http://ezb.uni-regensburg.de/ezeit/vascoda/openURL?pid=format%3Dxml. This * API differs from the EZB/ZDB API (http://services.dnb.de). It brings back * no print information and other information for electronic holdings. It * seems to be more stable. */ public EZBForm read(final String content) { final EZBForm ezbform = new EZBForm(); try { if (content != null) { final DocumentBuilderFactory domFactory = DocumentBuilderFactory.newInstance(); domFactory.setNamespaceAware(true); final DocumentBuilder builder = domFactory.newDocumentBuilder(); final Document doc = builder.parse(new InputSource(new StringReader(content))); final XPathFactory factory = XPathFactory.newInstance(); final XPath xpath = factory.newXPath(); // issns final XPathExpression exprRefE = xpath.compile("//OpenURLResponse"); final NodeList resultListRefE = (NodeList) exprRefE.evaluate(doc, XPathConstants.NODESET); String title = null; String levelAvailable = null; for (int i = 0; i < resultListRefE.getLength(); i++) { final Node firstResultNode = resultListRefE.item(i); final Element result = (Element) firstResultNode; // First ISSN // final String issn = getValue(result.getElementsByTagName("issn")); // System.out.println(issn); // title // unfortunately this will bring back the title sent by OpenURL, unless if not // specified in the OpenURL request. It then brings back the title form the EZB...! title = getValue(result.getElementsByTagName("title")); if (title != null) { title = Jsoup.clean(title, Whitelist.none()); title = Jsoup.parse(title).text(); } // this is the overall level of the best match and not the level of each individual result final NodeList levelNode = result.getElementsByTagName("available"); final Element levelElement = (Element) levelNode.item(0); if (levelElement != null) { levelAvailable = levelElement.getAttribute("level"); } } // electronic data final XPathExpression exprE = xpath.compile("//OpenURLResponse/OpenURLResult/Resultlist/Result"); final NodeList resultListE = (NodeList) exprE.evaluate(doc, XPathConstants.NODESET); for (int i = 0; i < resultListE.getLength(); i++) { final Node firstResultNode = resultListE.item(i); final Element result = (Element) firstResultNode; final NodeList state = result.getElementsByTagName("access"); final Element stateElement = (Element) state.item(0); int color = 0; if (stateElement != null) { color = Integer.valueOf(stateElement.getAttribute("color")); } final EZBDataOnline online = new EZBDataOnline(); // state // 1 free accessible if (color == EZBState.FREE.getValue()) { online.setAmpel("green"); online.setComment("availresult.free"); online.setState(JOPState.FREE.getValue()); // translate state to EZB/ZDB-API // 2 licensed ; 3 partially licensed } else if (color == EZBState.LICENSED.getValue() || color == EZBState.LICENSED_PARTIALLY.getValue()) { online.setAmpel("yellow"); online.setComment("availresult.abonniert"); online.setState(JOPState.LICENSED.getValue()); // translate state to EZB/ZDB-API // not licensed } else if (color == EZBState.NOT_LICENSED.getValue()) { online.setAmpel("red"); online.setComment("availresult.not_licensed"); online.setState(JOPState.NOT_LICENSED.getValue()); // translate state to EZB/ZDB-API } else { online.setAmpel("red"); online.setComment("availresult.not_licensed"); online.setState(JOPState.NOT_LICENSED.getValue()); // translate state to EZB/ZDB-API } // LinkToArticle not always present String url = getValue(result.getElementsByTagName("LinkToArticle")); // LinkToJournal always present if (url == null) { url = getValue(result.getElementsByTagName("LinkToJournal")); } online.setUrl(url); // try to get level from link String levelLinkToArticle = null; final NodeList levelNode = result.getElementsByTagName("LinkToArticle"); final Element levelElement = (Element) levelNode.item(0); if (levelElement != null) { levelLinkToArticle = levelElement.getAttribute("level"); } if (levelLinkToArticle != null) { online.setLevel(levelLinkToArticle); // specific level of each result } else { online.setLevel(levelAvailable); // overall level of best match } if (title != null) { online.setTitle(title); } else { online.setTitle(url); } online.setReadme(getValue(result.getElementsByTagName("LinkToReadme"))); ezbform.getOnline().add(online); } // Title not found if (resultListE.getLength() == 0) { final EZBDataOnline online = new EZBDataOnline(); online.setAmpel("red"); online.setComment("availresult.nohits"); online.setState(JOPState.NO_HITS.getValue()); // translate state to EZB/ZDB-API ezbform.getOnline().add(online); } } } catch (final XPathExpressionException e) { LOG.error(e.toString()); } catch (final SAXParseException e) { LOG.error(e.toString()); } catch (final SAXException e) { LOG.error(e.toString()); } catch (final IOException e) { LOG.error(e.toString()); } catch (final ParserConfigurationException e) { LOG.error(e.toString()); } catch (final Exception e) { LOG.error(e.toString()); } return ezbform; }
From source file:ch.dbs.actions.bestellung.EZBJOP.java
/** * This class uses the official EZB/ZDB API from * http://services.dnb.de/fize-service/gvr/full.xml. *//*from w ww . j a v a2s . c om*/ public EZBForm read(final String content) { final EZBForm ezbform = new EZBForm(); try { if (content != null) { final DocumentBuilderFactory domFactory = DocumentBuilderFactory.newInstance(); domFactory.setNamespaceAware(true); final DocumentBuilder builder = domFactory.newDocumentBuilder(); final Document doc = builder.parse(new InputSource(new StringReader(content))); final XPathFactory factory = XPathFactory.newInstance(); final XPath xpath = factory.newXPath(); // references electronic data // final XPathExpression exprRefE = xpath.compile("//ElectronicData/References/Reference"); // final NodeList resultListRefE = (NodeList) exprRefE.evaluate(doc, XPathConstants.NODESET); // // for (int i = 0; i < resultListRefE.getLength(); i++) { // final Node firstResultNode = resultListRefE.item(i); // final Element result = (Element) firstResultNode; // // final EZBReference ref = new EZBReference(); // // // EZB URLs // final String url = getValue(result.getElementsByTagName("URL")); // ref.setUrl(url); // // // Label for URLs // final String label = getValue(result.getElementsByTagName("Label")); // ref.setLabel(label); // // ezbform.getReferencesonline().add(ref); // } // electronic data final XPathExpression exprE = xpath.compile("//ElectronicData/ResultList/Result"); final NodeList resultListE = (NodeList) exprE.evaluate(doc, XPathConstants.NODESET); for (int i = 0; i < resultListE.getLength(); i++) { final Node firstResultNode = resultListE.item(i); final Element result = (Element) firstResultNode; final EZBDataOnline online = new EZBDataOnline(); // state online.setState(Integer.valueOf(result.getAttribute("state"))); // 0 free accessible if (online.getState() == JOPState.FREE.getValue()) { online.setAmpel("green"); online.setComment("availresult.free"); // 1 partially free accesible } else if (online.getState() == JOPState.FREE_PARTIALLY.getValue()) { online.setAmpel("green"); online.setComment("availresult.partially_free"); // 2 licensed ; 3 partially licensed } else if (online.getState() == JOPState.LICENSED.getValue() || online.getState() == JOPState.LICENSED_PARTIALLY.getValue()) { online.setAmpel("yellow"); online.setComment("availresult.abonniert"); // journal not online for periode } else if (online.getState() == JOPState.OUTSIDE_PERIOD.getValue()) { online.setAmpel("red"); online.setComment("availresult.timeperiode"); // not indexed } else if (online.getState() == JOPState.NO_HITS.getValue()) { online.setAmpel("red"); online.setComment("availresult.nohits"); } else { online.setAmpel("red"); online.setComment("availresult.not_licensed"); } // title String title = getValue(result.getElementsByTagName("Title")); if (title != null) { title = Jsoup.clean(title, Whitelist.none()); online.setTitle(Jsoup.parse(title).text()); } online.setUrl(getValue(result.getElementsByTagName("AccessURL"))); online.setLevel(getValue(result.getElementsByTagName("AccessLevel"))); online.setReadme(getValue(result.getElementsByTagName("ReadmeURL"))); // National licenses etc. online.setAdditional(getValue(result.getElementsByTagName("Additional"))); ezbform.getOnline().add(online); } // Title not found if (resultListE.getLength() == 0) { final EZBDataOnline online = new EZBDataOnline(); online.setAmpel("red"); online.setComment("availresult.nohits"); online.setState(JOPState.NO_HITS.getValue()); ezbform.getOnline().add(online); } // references print data final XPathExpression exprRefP = xpath.compile("//PrintData/References/Reference"); final NodeList resultListRefP = (NodeList) exprRefP.evaluate(doc, XPathConstants.NODESET); final EZBReference ref = new EZBReference(); for (int i = 0; i < resultListRefP.getLength(); i++) { final Node firstResultNode = resultListRefP.item(i); final Element result = (Element) firstResultNode; // EZB URLs ref.setUrl(getValue(result.getElementsByTagName("URL"))); // Label for URLs // final String label = getValue(result.getElementsByTagName("Label")); ref.setLabel("availresult.link_title_print"); ezbform.getReferencesprint().add(ref); } // print data final XPathExpression exprP = xpath.compile("//PrintData/ResultList/Result"); final NodeList resultListP = (NodeList) exprP.evaluate(doc, XPathConstants.NODESET); for (int i = 0; i < resultListP.getLength(); i++) { final Node firstResultNode = resultListP.item(i); final Element result = (Element) firstResultNode; final EZBDataPrint print = new EZBDataPrint(); // state print.setState(Integer.valueOf(result.getAttribute("state"))); // title String title = getValue(result.getElementsByTagName("Title")); if (title != null) { title = Jsoup.clean(title, Whitelist.none()); print.setTitle(Jsoup.parse(title).text()); } print.setLocation(getValue(result.getElementsByTagName("Location"))); print.setCallnr(getValue(result.getElementsByTagName("Signature"))); print.setCoverage(getValue(result.getElementsByTagName("Period"))); // set previous extracted URL and label print.setInfo(ref); // in stock ; partially in stock if (print.getState() == JOPState.LICENSED.getValue() || print.getState() == JOPState.LICENSED_PARTIALLY.getValue()) { print.setAmpel("yellow"); print.setComment("availresult.print"); // only return if existing in Print ezbform.getPrint().add(print); } } } } catch (final XPathExpressionException e) { LOG.error(e.toString()); } catch (final SAXParseException e) { LOG.error(e.toString()); } catch (final SAXException e) { LOG.error(e.toString()); } catch (final IOException e) { LOG.error(e.toString()); } catch (final ParserConfigurationException e) { LOG.error(e.toString()); } catch (final Exception e) { LOG.error(e.toString()); } return ezbform; }
From source file:me.rkfg.xmpp.bot.plugins.CoolStoryPlugin.java
private String fetchStory(Website website) throws IOException { int roll = 0; String result;/* w ww. ja v a 2 s. c om*/ int resultLength; int resultLines; //noinspection ConstantConditions do { roll++; final Document doc = Jsoup.connect(website.getUrlString()).userAgent(DEFAULT_UA).get(); doc.outputSettings(new Document.OutputSettings().prettyPrint(false)); logger.info("Fetched a story from {}", doc.location()); final Element story = doc.select(website.getCssQuery()).first(); if (story == null) { return ERROR_COULD_NOT_PARSE; } story.select("div").remove(); story.select("img").forEach(img -> img.replaceWith(new TextNode(img.attr("src"), ""))); story.select("br").after("\\n"); story.select("p").before("\\n\\n"); final String storyHtml = story.html().replaceAll("\\\\n", "\n"); result = Jsoup.clean(storyHtml, "", Whitelist.none(), new Document.OutputSettings().prettyPrint(false)) .trim(); resultLength = result.length(); resultLines = countLines(result); } while (CONFIG_REROLL_LONG_STORIES && (resultLength > CONFIG_MAX_STORY_LENGTH || resultLines > CONFIG_MAX_STORY_LINES) && roll <= CONFIG_MAX_ROLLS); return result; }
From source file:com.metadot.book.connectr.server.domain.StreamItem.java
private String buildDescrSummary() { String cleaned = Jsoup.clean(description.getValue(), Whitelist.none()); if (cleaned.length() > SUMMARY_LENGTH) { this.descrSummary = cleaned.substring(0, SUMMARY_LENGTH - 1) + "..."; } else {//from ww w.j ava2 s .co m this.descrSummary = cleaned; } return this.descrSummary; }
From source file:co.dilaver.quoter.fragments.QODFragment.java
private void parseQodResponse(JSONObject response) throws JSONException { JSONObject parse = response.getJSONObject("parse"); JSONObject text = parse.getJSONObject("text"); String content = text.getString("*"); Document doc = Jsoup.parse(content); Elements table = doc.select("table[style=\"text-align:center; width:100%\"]"); Elements rows = table.select("tr"); Elements qod = rows.get(0).select("td"); Elements author = rows.get(1).select("td"); Whitelist whitelist = Whitelist.none(); String newQuote = Html.fromHtml(Jsoup.clean(qod.toString(), whitelist)).toString(); String newAuthor = Html.fromHtml(Jsoup.clean(author.toString(), whitelist).replace("~", "")).toString(); if (!qodString.equals("") && !authorString.equals("")) { if (!qodString.equals(newQuote) || !authorString.equals(newAuthor)) { Snackbar.make(rootLayout, getString(R.string.str_Refreshing), Snackbar.LENGTH_SHORT).show(); }// w w w .ja v a 2 s.c om } qodString = newQuote; authorString = newAuthor; sharedPrefStorage.setQodText(qodString); sharedPrefStorage.setQodAuthor(authorString); Log.e(TAG, "quote: " + qodString); Log.e(TAG, "author: " + authorString); qodText.setText(getString(R.string.str_WithinQuotation, qodString)); qodAuthor.setText(authorString); }
From source file:ch.dbs.actions.bestellung.EZBXML.java
public List<JournalDetails> searchByJourids(final List<String> jourids, final String bibid) { final List<JournalDetails> list = new ArrayList<JournalDetails>(); final Http http = new Http(); final StringBuffer link = new StringBuffer( "http://rzblx1.uni-regensburg.de/ezeit/detail.phtml?xmloutput=1&colors=7&lang=de&bibid="); link.append(bibid);//from w w w. j av a 2s .c om link.append("&jour_id="); final StringBuffer infoLink = new StringBuffer( "http://ezb.uni-regensburg.de/ezeit/detail.phtml?colors=7&lang=de&bibid="); infoLink.append(bibid); infoLink.append("&jour_id="); try { for (final String jourid : jourids) { final JournalDetails jd = new JournalDetails(); final String content = http.getContent(link.toString() + jourid, Connect.TIMEOUT_1.getValue(), Connect.TRIES_1.getValue(), null); if (content != null) { final DocumentBuilderFactory domFactory = DocumentBuilderFactory.newInstance(); domFactory.setNamespaceAware(true); final DocumentBuilder builder = domFactory.newDocumentBuilder(); final Document doc = builder.parse(new InputSource(new StringReader(content))); final XPathFactory factory = XPathFactory.newInstance(); final XPath xpath = factory.newXPath(); final XPathExpression exprJournal = xpath.compile("//journal"); final XPathExpression exprPissns = xpath.compile("//journal/detail/P_ISSNs"); final XPathExpression exprEissns = xpath.compile("//journal/detail/E_ISSNs"); final NodeList resultJournal = (NodeList) exprJournal.evaluate(doc, XPathConstants.NODESET); for (int i = 0; i < resultJournal.getLength(); i++) { final Node firstResultNode = resultJournal.item(i); final Element journal = (Element) firstResultNode; // Title String title = getValue(journal.getElementsByTagName("title")); if (title != null) { title = Jsoup.clean(title, Whitelist.none()); title = Jsoup.parse(title).text(); } jd.setZeitschriftentitel(title); // P-ISSNs final NodeList resultPissns = (NodeList) exprPissns.evaluate(doc, XPathConstants.NODESET); // get first pissn for (int z = 0; z < resultPissns.getLength(); z++) { final Node firstPissnsNode = resultPissns.item(i); final Element pissnElement = (Element) firstPissnsNode; final String pissn = getValue(pissnElement.getElementsByTagName("P_ISSN")); jd.setIssn(pissn); } // try to get Eissn if we have no Pissn if (jd.getIssn() == null) { // E-ISSNs final NodeList resultEissns = (NodeList) exprEissns.evaluate(doc, XPathConstants.NODESET); // get first eissn for (int z = 0; z < resultEissns.getLength(); z++) { final Node firstEissnsNode = resultEissns.item(i); final Element eissnElement = (Element) firstEissnsNode; final String eissn = getValue(eissnElement.getElementsByTagName("E_ISSN")); jd.setIssn(eissn); } } // add info link jd.setLink(infoLink.toString() + jourid); list.add(jd); } } } } catch (final XPathExpressionException e) { LOG.error(e.toString()); } catch (final SAXParseException e) { LOG.error(e.toString()); } catch (final SAXException e) { LOG.error(e.toString()); } catch (final IOException e) { LOG.error(e.toString()); } catch (final ParserConfigurationException e) { LOG.error(e.toString()); } catch (final Exception e) { LOG.error(e.toString()); } return list; }
From source file:com.blackducksoftware.tools.nrt.generator.NRTReportGenerator.java
private void writeOutLicenseText(String componentName, PrintStream outputTextFile) { try {/* w w w.ja v a 2s .c o m*/ outputTextFile.println(); outputTextFile.println("License texts (" + (componentMap.get(componentName).getLicenses() != null ? componentMap.get(componentName).getLicenses().size() : "0") + ")"); int licenseCounter = 0; if (componentMap.get(componentName).getLicenses() != null) { for (LicenseModel license : componentMap.get(componentName).getLicenseModels()) { String licenseName = license.getName() != null ? license.getName() + "(Taken from KnowledgeBase)" : "license_" + licenseCounter + "(Taken from scanned file)"; if (nrtConfig.isTextFileOutput()) { outputTextFile.println(); outputTextFile.println( "=========================================================================="); outputTextFile.println(licenseName); outputTextFile.print(StringEscapeUtils.unescapeHtml(Jsoup.clean(license.getText(), "", Whitelist.none(), new Document.OutputSettings().prettyPrint(false)))); } licenseCounter++; } // for all licenses } // if licenses exist } catch (Exception e) { log.error("Error writing out licenses", e); } }
From source file:com.ah.ui.actions.admin.LicenseMgrAction.java
public boolean installOrderKey() { // check the order key String checkName = checkKeyExistsIgnoreDomain(primaryOrderKey); if (null != checkName) { addActionError(checkName);/*from w ww . j a v a 2 s .c om*/ return false; } String hmId = licenseInfo.getSystemId(); HmDomain domain = getDomain(); try { if (!Jsoup.isValid(primaryOrderKey, Whitelist.none())) { addActionError(MgrUtil.getUserMessage("error.license.orderkey.activate.Failed", new String[] { StringEscapeUtils.escapeHtml4(primaryOrderKey) })); return false; } OrderKeyManagement.activateOrderKey(primaryOrderKey, domain.getDomainName(), hmId); doAfterInstallNewLicense(); addActionMessage(MgrUtil.getUserMessage("info.license.orderKeyActivated", primaryOrderKey)); generateAuditLog(HmAuditLog.STATUS_SUCCESS, MgrUtil.getUserMessage("hm.audit.log.entitlement.key.enter", primaryOrderKey)); primaryOrderKey = ""; // remove the expired info in session MgrUtil.removeSessionAttribute(LICENSE_INFO_IN_TITLE_AREA); return true; } catch (Exception e) { addActionError(MgrUtil.getUserMessage("error.license.orderkey.activate.Failed", new String[] { primaryOrderKey }) + "<br>" + e.getMessage()); generateAuditLog(HmAuditLog.STATUS_FAILURE, MgrUtil.getUserMessage("hm.audit.log.entitlement.key.enter", primaryOrderKey)); } return false; }