Java HTML Parse Jsoup extractRssUrl(String html, URI base)

Here you can find the source of extractRssUrl(String html, URI base)

Description

extract Rss Url

License

Open Source License

Declaration

public static String extractRssUrl(String html, URI base) 

Method Source Code


//package com.java2s;
import java.net.URI;

import java.util.regex.Pattern;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;

import org.jsoup.select.Elements;

public class Main {
    final static String HREF = "href";
    final static String LINK = "link";
    final static String RSS = "application/rss+xml";
    final static String ATOM = "application/atom+xml";
    final static String TITLE = "title";
    final static String TYPE = "type";
    final static String REL = "rel";
    final static String ALTERNATE = "alternate";
    final static Pattern comment = Pattern.compile("comment", Pattern.CASE_INSENSITIVE);

    public static String extractRssUrl(String html, URI base) {
        Document d = Jsoup.parse(html);
        Elements links = d.getElementsByTag(LINK);

        for (Element link : links) {
            if (ALTERNATE.equalsIgnoreCase(link.attr(REL))) {
                String type = link.attr(TYPE);
                if (RSS.equalsIgnoreCase(type) || ATOM.equalsIgnoreCase(type)) {
                    String href = link.attr(HREF);
                    String title = link.attr(TITLE);
                    if (title == null) {
                        title = "";
                    }//from   w w  w  .  j  av  a  2  s .  c om
                    // ignore comment
                    if (href != null && !comment.matcher(href).find() && !comment.matcher(title).find()) {
                        // return the first one
                        return base.resolve(href).toString();
                    }
                }
            }
        }
        return null;
    }
}

Related

  1. cleanHtmlFromString(String stringToClean)
  2. cleanHTMLTags(String str)
  3. cleanupHtmlDoc(String s)
  4. clearBody(String html)
  5. coverTag(String html, String... tagNames)
  6. filter(String html)
  7. fixHtml(String htmlContent, String outputFile, String contentFile)
  8. getContentFromHTML(String html)
  9. getDistinctImageUrls(String htmlContent)