List of usage examples for org.jsoup.parser Parser unescapeEntities
public static String unescapeEntities(String string, boolean inAttribute)
From source file:com.none.tom.simplerssreader.feed.CurrentFeed.java
public static String getAuthorForEntry(final SyndEntry entry) { return Parser.unescapeEntities(entry.getAuthor(), false); }
From source file:com.none.tom.simplerssreader.feed.CurrentFeed.java
public static String getTitleForEntry(final SyndEntry entry) { return Parser.unescapeEntities(entry.getTitle(), false); }
From source file:com.none.tom.simplerssreader.feed.CurrentFeed.java
public static CharSequence getDescriptionForEntry(final SyndEntry entry) { final SyndContent itemDescription = entry.getDescription(); if (itemDescription != null) { return Jsoup.parse(Parser.unescapeEntities(itemDescription.getValue(), false)).text(); }//from w w w.j a v a 2 s . c om return null; }
From source file:com.none.tom.simplerssreader.feed.CurrentFeed.java
public static CharSequence getContentForEntry(final SyndEntry entry) { final List<SyndContent> entryContents = entry.getContents(); if (!entryContents.isEmpty()) { final SyndContent entryContent = entry.getContents().get(0); if ((entryContent.getType().equals("html") || entryContent.getType().equals("xhtml"))) { return HtmlUtils.fromHtml(Parser.unescapeEntities(entryContent.getValue(), false)); }/*from www . j av a2 s. c o m*/ return Parser.unescapeEntities(entryContent.getValue(), false); } return null; }
From source file:ru.xxlabaza.popa.pack.PackingService.java
private String processHtml(Document document) { String content = commentRemoveService.removeComments(document.html(), HTML); return Parser.unescapeEntities(content, false); }
From source file:free.org.murottal.businessobjects.VideoStream.ParseStreamMetaData.java
/** * Returns a list of video/stream meta-data that is supported by this app. * * @return List of {@link StreamMetaData}. *//* w ww . j a v a 2 s.co m*/ public StreamMetaDataList getStreamMetaDataList() throws Exception { StreamMetaDataList streamMetaDataList = new StreamMetaDataList(); String encodedUrlMap = playerArgs.getString("url_encoded_fmt_stream_map"); StreamMetaData streamMetaData; for (String url_data_str : encodedUrlMap.split(",")) { Map<String, String> tags = new HashMap<>(); for (String raw_tag : Parser.unescapeEntities(url_data_str, true).split("&")) { String[] split_tag = raw_tag.split("="); tags.put(split_tag[0], split_tag[1]); } int itag = Integer.parseInt(tags.get("itag")); String streamUrl = URLDecoder.decode(tags.get("url"), "UTF-8"); // if video has a signature: decrypt it and add it to the url if (tags.get("s") != null) { streamUrl = streamUrl + "&signature=" + decryptSignature(tags.get("s"), decryptionCode); } // contruct the meta-data of the video and add it to the list if it is supported streamMetaData = new StreamMetaData(streamUrl, itag); if (streamMetaData.getFormat() != MediaFormat.UNKNOWN) { streamMetaDataList.add(streamMetaData); } } return streamMetaDataList; }
From source file:com.geecko.QuickLyric.lyrics.LyricWiki.java
public static Lyrics fromURL(String url, String artist, String song) { if (url.endsWith("action=edit")) { return new Lyrics(NO_RESULT); }/*from www . j a v a2 s . com*/ String text; try { //url = URLDecoder.decode(url, "utf-8"); Document lyricsPage = Jsoup.connect(url).get(); Element lyricbox = lyricsPage.select("div.lyricBox").get(0); lyricbox.after(lyricbox.childNode(0)); String lyricsHtml = lyricbox.html(); text = lyricsHtml.substring(0, lyricsHtml.indexOf("<!--")).replaceAll("<.*?>", "").replaceAll("\n", "<br />"); if (text.contains("&#")) text = Parser.unescapeEntities(text, true); } catch (IndexOutOfBoundsException | IOException e) { e.printStackTrace(); return new Lyrics(ERROR); } if (artist == null) artist = url.substring(24).replace("Gracenote:", "").split(":", 2)[0].replace('_', ' '); if (song == null) song = url.substring(24).replace("Gracenote:", "").split(":", 2)[1].replace('_', ' '); try { artist = URLDecoder.decode(artist, "UTF-8"); song = URLDecoder.decode(song, "UTF-8"); } catch (UnsupportedEncodingException e) { e.printStackTrace(); } if (text.contains( "Unfortunately, we are not licensed to display the full lyrics for this song at the moment.") || text.equals("Instrumental <br />")) { Lyrics result = new Lyrics(NEGATIVE_RESULT); result.setArtist(artist); result.setTitle(song); return result; } else if (text.equals("") || text.length() < 3) return new Lyrics(NO_RESULT); else { Lyrics lyrics = new Lyrics(POSITIVE_RESULT); lyrics.setArtist(artist); lyrics.setTitle(song); lyrics.setText(text); lyrics.setSource("LyricsWiki"); lyrics.setURL(url); return lyrics; } }
From source file:com.pemikir.youtubeplus.youtube.YoutubeExtractor.java
@Override public VideoInfo getVideoInfo(String siteUrl) { String site = Downloader.download(siteUrl); VideoInfo videoInfo = new VideoInfo(); Document doc = Jsoup.parse(site, siteUrl); try {/*from w ww . j a v a 2s . co m*/ Pattern p = Pattern.compile("v=([0-9a-zA-Z]*)"); Matcher m = p.matcher(siteUrl); m.find(); videoInfo.id = m.group(1); } catch (Exception e) { e.printStackTrace(); } videoInfo.age_limit = 0; videoInfo.webpage_url = siteUrl; //------------------------------------- // extracting form player args //------------------------------------- JSONObject playerArgs = null; JSONObject ytAssets = null; String dashManifest = ""; { Pattern p = Pattern.compile("ytplayer.config\\s*=\\s*(\\{.*?\\});"); Matcher m = p.matcher(site); m.find(); try { playerArgs = (new JSONObject(m.group(1))).getJSONObject("args"); ytAssets = (new JSONObject(m.group(1))).getJSONObject("assets"); } catch (Exception e) { e.printStackTrace(); // If we fail in this part the video is most likely not available. // Determining why is done later. videoInfo.videoAvailableStatus = VideoInfo.VIDEO_UNAVAILABLE; } } try { videoInfo.uploader = playerArgs.getString("author"); videoInfo.title = playerArgs.getString("title"); //first attempt gating a small image version //in the html extracting part we try to get a thumbnail with a higher resolution videoInfo.thumbnail_url = playerArgs.getString("thumbnail_url"); videoInfo.duration = playerArgs.getInt("length_seconds"); videoInfo.average_rating = playerArgs.getString("avg_rating"); // View Count will be extracted from html dashManifest = playerArgs.getString("dashmpd"); String playerUrl = ytAssets.getString("js"); if (playerUrl.startsWith("//")) { playerUrl = "https:" + playerUrl; } if (decryptoinCode.isEmpty()) { decryptoinCode = loadDecryptioinCode(playerUrl); } // extract audio videoInfo.audioStreams = parseDashManifest(dashManifest, decryptoinCode); //------------------------------------ // extract video stream url //------------------------------------ String encoded_url_map = playerArgs.getString("url_encoded_fmt_stream_map"); Vector<VideoInfo.VideoStream> videoStreams = new Vector<>(); for (String url_data_str : encoded_url_map.split(",")) { Map<String, String> tags = new HashMap<>(); for (String raw_tag : Parser.unescapeEntities(url_data_str, true).split("&")) { String[] split_tag = raw_tag.split("="); tags.put(split_tag[0], split_tag[1]); } int itag = Integer.parseInt(tags.get("itag")); String streamUrl = terrible_unescape_workaround_fuck(tags.get("url")); // if video has a signature: decrypt it and add it to the url if (tags.get("s") != null) { if (decryptoinCode.isEmpty()) { decryptoinCode = loadDecryptioinCode(playerUrl); } streamUrl = streamUrl + "&signature=" + decryptSignature(tags.get("s"), decryptoinCode); } if (resolveFormat(itag) != -1) { videoStreams.add(new VideoInfo.VideoStream(streamUrl, resolveFormat(itag), resolveResolutionString(itag))); } } videoInfo.videoStreams = new VideoInfo.VideoStream[videoStreams.size()]; for (int i = 0; i < videoStreams.size(); i++) { videoInfo.videoStreams[i] = videoStreams.get(i); } } catch (Exception e) { e.printStackTrace(); } //------------------------------- // extrating from html page //------------------------------- // Determine what went wrong when the Video is not available if (videoInfo.videoAvailableStatus == VideoInfo.VIDEO_UNAVAILABLE) { if (doc.select("h1[id=\"unavailable-message\"]").first().text().contains("GEMA")) { videoInfo.videoAvailableStatus = VideoInfo.VIDEO_UNAVAILABLE_GEMA; } } // Try to get high resolution thumbnail if it fails use low res from the player instead try { videoInfo.thumbnail_url = doc.select("link[itemprop=\"thumbnailUrl\"]").first().attr("abs:href"); } catch (Exception e) { Log.i(TAG, "Could not find high res Thumbnail. Use low res instead"); } // upload date videoInfo.upload_date = doc.select("strong[class=\"watch-time-text\"").first().text(); // Try to only use date not the text around it try { Pattern p = Pattern.compile("([0-9.]*$)"); Matcher m = p.matcher(videoInfo.upload_date); m.find(); videoInfo.upload_date = m.group(1); } catch (Exception e) { e.printStackTrace(); } // description videoInfo.description = doc.select("p[id=\"eow-description\"]").first().html(); try { // likes videoInfo.like_count = doc.select("span[class=\"like-button-renderer \"]").first().getAllElements() .select("button").select("span").get(0).text(); // dislikes videoInfo.dislike_count = doc.select("span[class=\"like-button-renderer \"]").first().getAllElements() .select("button").select("span").get(2).text(); } catch (Exception e) { // if it fails we know that the video does not offer dislikes. videoInfo.like_count = "0"; videoInfo.dislike_count = "0"; } // uploader thumbnail videoInfo.uploader_thumbnail_url = doc.select("a[class*=\"yt-user-photo\"]").first().select("img").first() .attr("abs:data-thumb"); // view count videoInfo.view_count = doc.select("div[class=\"watch-view-count\"]").first().text(); /* todo finish this code // next video videoInfo.nextVideo = extractVideoInfoItem(doc.select("div[class=\"watch-sidebar-section\"]").first() .select("li").first()); int i = 0; // related videos for(Element li : doc.select("ul[id=\"watch-related\"]").first().children()) { // first check if we have a playlist. If so leave them out if(li.select("a[class*=\"content-link\"]").first() != null) { //videoInfo.relatedVideos.add(extractVideoInfoItem(li)); //i++; //Log.d(TAG, Integer.toString(i)); } } */ return videoInfo; }
From source file:org.dswarm.xmlenhancer.XMLEnhancer.java
private static void unescapeEntity(final PrintWriter out, final Node node) { node.ownerDocument().outputSettings().escapeMode(Entities.EscapeMode.xhtml) .syntax(Document.OutputSettings.Syntax.xml).prettyPrint(false); if (node instanceof TextNode) { final TextNode textNode = (TextNode) node; final String wholeText = textNode.getWholeText(); out.print(wholeText);//from w w w . j av a 2s . c o m return; } final String nodeString = node.toString(); final String unescapedNodeString = Parser.unescapeEntities(nodeString, true); out.print(unescapedNodeString); }
From source file:org.schabi.newpipe.services.youtube.YoutubeVideoExtractor.java
@Override public VideoInfo.VideoStream[] getVideoStreams() { try {// w w w . j av a2s .c o m //------------------------------------ // extract video stream url //------------------------------------ String encoded_url_map = playerArgs.getString("url_encoded_fmt_stream_map"); Vector<VideoInfo.VideoStream> videoStreams = new Vector<>(); for (String url_data_str : encoded_url_map.split(",")) { Map<String, String> tags = new HashMap<>(); for (String raw_tag : Parser.unescapeEntities(url_data_str, true).split("&")) { String[] split_tag = raw_tag.split("="); tags.put(split_tag[0], split_tag[1]); } int itag = Integer.parseInt(tags.get("itag")); String streamUrl = URLDecoder.decode(tags.get("url"), "UTF-8"); // if video has a signature: decrypt it and add it to the url if (tags.get("s") != null) { streamUrl = streamUrl + "&signature=" + decryptSignature(tags.get("s"), decryptionCode); } if (resolveFormat(itag) != -1) { videoStreams.add(new VideoInfo.VideoStream(streamUrl, resolveFormat(itag), resolveResolutionString(itag))); } } return videoStreams.toArray(new VideoInfo.VideoStream[videoStreams.size()]); } catch (Exception e) { Log.e(TAG, "Failed to get video stream"); e.printStackTrace(); return new VideoInfo.VideoStream[0]; } }