Example usage for org.jsoup.parser Parser unescapeEntities

Introduction

In this page you can find the example usage for org.jsoup.parser Parser unescapeEntities.

Prototype

public static String unescapeEntities(String string, boolean inAttribute)

Source Link

Document

Utility method to unescape HTML entities from a string

Usage

From source file:com.none.tom.simplerssreader.feed.CurrentFeed.java

public static String getAuthorForEntry(final SyndEntry entry) {
    return Parser.unescapeEntities(entry.getAuthor(), false);
}

From source file:com.none.tom.simplerssreader.feed.CurrentFeed.java

public static String getTitleForEntry(final SyndEntry entry) {
    return Parser.unescapeEntities(entry.getTitle(), false);
}

From source file:com.none.tom.simplerssreader.feed.CurrentFeed.java

public static CharSequence getDescriptionForEntry(final SyndEntry entry) {
    final SyndContent itemDescription = entry.getDescription();

    if (itemDescription != null) {
        return Jsoup.parse(Parser.unescapeEntities(itemDescription.getValue(), false)).text();
    }//from  w w  w.j a v  a 2 s  . c  om

    return null;
}

From source file:com.none.tom.simplerssreader.feed.CurrentFeed.java

public static CharSequence getContentForEntry(final SyndEntry entry) {
    final List<SyndContent> entryContents = entry.getContents();

    if (!entryContents.isEmpty()) {
        final SyndContent entryContent = entry.getContents().get(0);

        if ((entryContent.getType().equals("html") || entryContent.getType().equals("xhtml"))) {
            return HtmlUtils.fromHtml(Parser.unescapeEntities(entryContent.getValue(), false));
        }/*from www  .  j  av a2  s. c  o  m*/

        return Parser.unescapeEntities(entryContent.getValue(), false);
    }

    return null;
}

From source file:ru.xxlabaza.popa.pack.PackingService.java

private String processHtml(Document document) {
    String content = commentRemoveService.removeComments(document.html(), HTML);
    return Parser.unescapeEntities(content, false);
}

From source file:free.org.murottal.businessobjects.VideoStream.ParseStreamMetaData.java

/**
 * Returns a list of video/stream meta-data that is supported by this app.
 *
 * @return List of {@link StreamMetaData}.
 *//*  w ww  . j a v a 2 s.co m*/
public StreamMetaDataList getStreamMetaDataList() throws Exception {
    StreamMetaDataList streamMetaDataList = new StreamMetaDataList();
    String encodedUrlMap = playerArgs.getString("url_encoded_fmt_stream_map");
    StreamMetaData streamMetaData;

    for (String url_data_str : encodedUrlMap.split(",")) {
        Map<String, String> tags = new HashMap<>();

        for (String raw_tag : Parser.unescapeEntities(url_data_str, true).split("&")) {
            String[] split_tag = raw_tag.split("=");
            tags.put(split_tag[0], split_tag[1]);
        }

        int itag = Integer.parseInt(tags.get("itag"));
        String streamUrl = URLDecoder.decode(tags.get("url"), "UTF-8");

        // if video has a signature: decrypt it and add it to the url
        if (tags.get("s") != null) {
            streamUrl = streamUrl + "&signature=" + decryptSignature(tags.get("s"), decryptionCode);
        }

        // contruct the meta-data of the video and add it to the list if it is supported
        streamMetaData = new StreamMetaData(streamUrl, itag);
        if (streamMetaData.getFormat() != MediaFormat.UNKNOWN) {
            streamMetaDataList.add(streamMetaData);
        }
    }

    return streamMetaDataList;
}

From source file:com.geecko.QuickLyric.lyrics.LyricWiki.java

public static Lyrics fromURL(String url, String artist, String song) {
    if (url.endsWith("action=edit")) {
        return new Lyrics(NO_RESULT);
    }/*from   www .  j  a v  a2  s  . com*/
    String text;
    try {
        //url = URLDecoder.decode(url, "utf-8");
        Document lyricsPage = Jsoup.connect(url).get();
        Element lyricbox = lyricsPage.select("div.lyricBox").get(0);
        lyricbox.after(lyricbox.childNode(0));
        String lyricsHtml = lyricbox.html();
        text = lyricsHtml.substring(0, lyricsHtml.indexOf("<!--")).replaceAll("<.*?>", "").replaceAll("\n",
                "<br />");
        if (text.contains("&#"))
            text = Parser.unescapeEntities(text, true);
    } catch (IndexOutOfBoundsException | IOException e) {
        e.printStackTrace();
        return new Lyrics(ERROR);
    }

    if (artist == null)
        artist = url.substring(24).replace("Gracenote:", "").split(":", 2)[0].replace('_', ' ');
    if (song == null)
        song = url.substring(24).replace("Gracenote:", "").split(":", 2)[1].replace('_', ' ');

    try {
        artist = URLDecoder.decode(artist, "UTF-8");
        song = URLDecoder.decode(song, "UTF-8");
    } catch (UnsupportedEncodingException e) {
        e.printStackTrace();
    }
    if (text.contains(
            "Unfortunately, we are not licensed to display the full lyrics for this song at the moment.")
            || text.equals("Instrumental <br />")) {
        Lyrics result = new Lyrics(NEGATIVE_RESULT);
        result.setArtist(artist);
        result.setTitle(song);
        return result;
    } else if (text.equals("") || text.length() < 3)
        return new Lyrics(NO_RESULT);
    else {
        Lyrics lyrics = new Lyrics(POSITIVE_RESULT);
        lyrics.setArtist(artist);
        lyrics.setTitle(song);
        lyrics.setText(text);
        lyrics.setSource("LyricsWiki");
        lyrics.setURL(url);
        return lyrics;
    }
}

From source file:com.pemikir.youtubeplus.youtube.YoutubeExtractor.java

@Override
public VideoInfo getVideoInfo(String siteUrl) {
    String site = Downloader.download(siteUrl);
    VideoInfo videoInfo = new VideoInfo();

    Document doc = Jsoup.parse(site, siteUrl);

    try {/*from w ww . j  a  v  a  2s  . co  m*/
        Pattern p = Pattern.compile("v=([0-9a-zA-Z]*)");
        Matcher m = p.matcher(siteUrl);
        m.find();
        videoInfo.id = m.group(1);
    } catch (Exception e) {
        e.printStackTrace();
    }

    videoInfo.age_limit = 0;
    videoInfo.webpage_url = siteUrl;

    //-------------------------------------
    // extracting form player args
    //-------------------------------------
    JSONObject playerArgs = null;
    JSONObject ytAssets = null;
    String dashManifest = "";
    {
        Pattern p = Pattern.compile("ytplayer.config\\s*=\\s*(\\{.*?\\});");
        Matcher m = p.matcher(site);
        m.find();

        try {
            playerArgs = (new JSONObject(m.group(1))).getJSONObject("args");
            ytAssets = (new JSONObject(m.group(1))).getJSONObject("assets");
        } catch (Exception e) {
            e.printStackTrace();
            // If we fail in this part the video is most likely not available.
            // Determining why is done later.
            videoInfo.videoAvailableStatus = VideoInfo.VIDEO_UNAVAILABLE;
        }
    }

    try {
        videoInfo.uploader = playerArgs.getString("author");
        videoInfo.title = playerArgs.getString("title");
        //first attempt gating a small image version
        //in the html extracting part we try to get a thumbnail with a higher resolution
        videoInfo.thumbnail_url = playerArgs.getString("thumbnail_url");
        videoInfo.duration = playerArgs.getInt("length_seconds");
        videoInfo.average_rating = playerArgs.getString("avg_rating");
        // View Count will be extracted from html
        dashManifest = playerArgs.getString("dashmpd");
        String playerUrl = ytAssets.getString("js");
        if (playerUrl.startsWith("//")) {
            playerUrl = "https:" + playerUrl;
        }
        if (decryptoinCode.isEmpty()) {
            decryptoinCode = loadDecryptioinCode(playerUrl);
        }

        // extract audio
        videoInfo.audioStreams = parseDashManifest(dashManifest, decryptoinCode);

        //------------------------------------
        // extract video stream url
        //------------------------------------
        String encoded_url_map = playerArgs.getString("url_encoded_fmt_stream_map");
        Vector<VideoInfo.VideoStream> videoStreams = new Vector<>();
        for (String url_data_str : encoded_url_map.split(",")) {
            Map<String, String> tags = new HashMap<>();
            for (String raw_tag : Parser.unescapeEntities(url_data_str, true).split("&")) {
                String[] split_tag = raw_tag.split("=");
                tags.put(split_tag[0], split_tag[1]);
            }

            int itag = Integer.parseInt(tags.get("itag"));
            String streamUrl = terrible_unescape_workaround_fuck(tags.get("url"));

            // if video has a signature: decrypt it and add it to the url
            if (tags.get("s") != null) {
                if (decryptoinCode.isEmpty()) {
                    decryptoinCode = loadDecryptioinCode(playerUrl);
                }
                streamUrl = streamUrl + "&signature=" + decryptSignature(tags.get("s"), decryptoinCode);
            }

            if (resolveFormat(itag) != -1) {
                videoStreams.add(new VideoInfo.VideoStream(streamUrl, resolveFormat(itag),
                        resolveResolutionString(itag)));
            }
        }
        videoInfo.videoStreams = new VideoInfo.VideoStream[videoStreams.size()];
        for (int i = 0; i < videoStreams.size(); i++) {
            videoInfo.videoStreams[i] = videoStreams.get(i);
        }

    } catch (Exception e) {
        e.printStackTrace();
    }

    //-------------------------------
    // extrating from html page
    //-------------------------------

    // Determine what went wrong when the Video is not available
    if (videoInfo.videoAvailableStatus == VideoInfo.VIDEO_UNAVAILABLE) {
        if (doc.select("h1[id=\"unavailable-message\"]").first().text().contains("GEMA")) {
            videoInfo.videoAvailableStatus = VideoInfo.VIDEO_UNAVAILABLE_GEMA;
        }
    }

    // Try to get high resolution thumbnail if it fails use low res from the player instead
    try {
        videoInfo.thumbnail_url = doc.select("link[itemprop=\"thumbnailUrl\"]").first().attr("abs:href");
    } catch (Exception e) {
        Log.i(TAG, "Could not find high res Thumbnail. Use low res instead");
    }

    // upload date
    videoInfo.upload_date = doc.select("strong[class=\"watch-time-text\"").first().text();
    // Try to only use date not the text around it
    try {
        Pattern p = Pattern.compile("([0-9.]*$)");
        Matcher m = p.matcher(videoInfo.upload_date);
        m.find();
        videoInfo.upload_date = m.group(1);
    } catch (Exception e) {
        e.printStackTrace();
    }

    // description
    videoInfo.description = doc.select("p[id=\"eow-description\"]").first().html();

    try {
        // likes
        videoInfo.like_count = doc.select("span[class=\"like-button-renderer \"]").first().getAllElements()
                .select("button").select("span").get(0).text();

        // dislikes
        videoInfo.dislike_count = doc.select("span[class=\"like-button-renderer \"]").first().getAllElements()
                .select("button").select("span").get(2).text();
    } catch (Exception e) {
        // if it fails we know that the video does not offer dislikes.
        videoInfo.like_count = "0";
        videoInfo.dislike_count = "0";
    }

    // uploader thumbnail
    videoInfo.uploader_thumbnail_url = doc.select("a[class*=\"yt-user-photo\"]").first().select("img").first()
            .attr("abs:data-thumb");

    // view count
    videoInfo.view_count = doc.select("div[class=\"watch-view-count\"]").first().text();

    /* todo finish this code
            
    // next video
    videoInfo.nextVideo = extractVideoInfoItem(doc.select("div[class=\"watch-sidebar-section\"]").first()
        .select("li").first());
            
    int i = 0;
    // related videos
    for(Element li : doc.select("ul[id=\"watch-related\"]").first().children()) {
    // first check if we have a playlist. If so leave them out
    if(li.select("a[class*=\"content-link\"]").first() != null) {
        //videoInfo.relatedVideos.add(extractVideoInfoItem(li));
        //i++;
        //Log.d(TAG, Integer.toString(i));
    }
    }
            
    */

    return videoInfo;
}

From source file:org.dswarm.xmlenhancer.XMLEnhancer.java

private static void unescapeEntity(final PrintWriter out, final Node node) {

    node.ownerDocument().outputSettings().escapeMode(Entities.EscapeMode.xhtml)
            .syntax(Document.OutputSettings.Syntax.xml).prettyPrint(false);

    if (node instanceof TextNode) {

        final TextNode textNode = (TextNode) node;

        final String wholeText = textNode.getWholeText();

        out.print(wholeText);//from  w  w w  . j av  a  2s  .  c o m

        return;
    }

    final String nodeString = node.toString();
    final String unescapedNodeString = Parser.unescapeEntities(nodeString, true);

    out.print(unescapedNodeString);
}

From source file:org.schabi.newpipe.services.youtube.YoutubeVideoExtractor.java

@Override
public VideoInfo.VideoStream[] getVideoStreams() {
    try {// w w w . j av a2s  .c  o m
        //------------------------------------
        // extract video stream url
        //------------------------------------
        String encoded_url_map = playerArgs.getString("url_encoded_fmt_stream_map");
        Vector<VideoInfo.VideoStream> videoStreams = new Vector<>();
        for (String url_data_str : encoded_url_map.split(",")) {
            Map<String, String> tags = new HashMap<>();
            for (String raw_tag : Parser.unescapeEntities(url_data_str, true).split("&")) {
                String[] split_tag = raw_tag.split("=");
                tags.put(split_tag[0], split_tag[1]);
            }

            int itag = Integer.parseInt(tags.get("itag"));
            String streamUrl = URLDecoder.decode(tags.get("url"), "UTF-8");

            // if video has a signature: decrypt it and add it to the url
            if (tags.get("s") != null) {
                streamUrl = streamUrl + "&signature=" + decryptSignature(tags.get("s"), decryptionCode);
            }

            if (resolveFormat(itag) != -1) {
                videoStreams.add(new VideoInfo.VideoStream(streamUrl, resolveFormat(itag),
                        resolveResolutionString(itag)));
            }
        }
        return videoStreams.toArray(new VideoInfo.VideoStream[videoStreams.size()]);

    } catch (Exception e) {
        Log.e(TAG, "Failed to get video stream");
        e.printStackTrace();
        return new VideoInfo.VideoStream[0];
    }
}