Android Open Source - CATaZine-Live Html Utils






From Project

Back to project page CATaZine-Live.

License

The source code is released under:

GNU General Public License

If you think the Android project CATaZine-Live listed in this page is inappropriate, such as containing malicious code/tools or violating the copyright, please email info at java2s dot com, thanks.

Java Source Code

package com.melegy.catazine.utils;
/*from w w w .j  a va2 s  . co  m*/
import android.content.Context;
import android.content.Intent;
import android.text.TextUtils;


import org.jsoup.Jsoup;
import org.jsoup.safety.Whitelist;

import com.melegy.catazine.Constants;
import com.melegy.catazine.MainApplication;
import com.melegy.catazine.service.FetcherService;

import java.io.File;
import java.util.ArrayList;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class HtmlUtils {

    private static final Whitelist JSOUP_WHITELIST = Whitelist.relaxed().addTags("iframe", "video", "audio", "source", "track")
            .addAttributes("iframe", "src", "frameborder", "height", "width")
            .addAttributes("video", "src", "controls", "height", "width", "poster")
            .addAttributes("audio", "src", "controls")
            .addAttributes("source", "src", "type")
            .addAttributes("track", "src", "kind", "srclang", "label");

    // middle() is group 1; s* is important for non-whitespaces; ' also usable
    private static final Pattern IMG_PATTERN = Pattern.compile("<img\\s+[^>]*src=\\s*['\"]([^'\"]+)['\"][^>]*>", Pattern.CASE_INSENSITIVE);
    private static final String URL_SPACE = "%20";

    public static String improveHtmlContent(String content, String baseUri) {
        if (content != null) {
            // remove some ads
            content = content.replaceAll("(?i)<div class=('|\")mf-viral('|\")><table border=('|\")0('|\")>.*", "");
            // remove lazy loading images stuff
            content = content.replaceAll("(?i)\\s+src=[^>]+\\s+original[-]*src=(\"|')", " src=$1");
            // remove bad image paths
            content = content.replaceAll("(?i)\\s+(href|src)=(\"|')//", " $1=$2http://");
            // clean by jsoup
            content = Jsoup.clean(content, baseUri, JSOUP_WHITELIST);
        }

        return content;
    }

    public static ArrayList<String> getImageURLs(String content) {
        ArrayList<String> images = new ArrayList<String>();

        if (!TextUtils.isEmpty(content)) {
            Matcher matcher = IMG_PATTERN.matcher(content);

            while (matcher.find()) {
                images.add(matcher.group(1).replace(" ", URL_SPACE));
            }
        }

        return images;
    }

    public static String replaceImageURLs(String content, final long entryId) {

        if (!TextUtils.isEmpty(content)) {
            Matcher matcher = IMG_PATTERN.matcher(content);

            final ArrayList<String> imagesToDl = new ArrayList<String>();

            while (matcher.find()) {
                String match = matcher.group(1).replace(" ", URL_SPACE);

                if (!match.startsWith(Constants.FILE_SCHEME)) { // Just for legacy, could be removed later
                    String imgPath = NetworkUtils.getDownloadedImagePath(entryId, match);
                    content = content.replace(match, Constants.FILE_SCHEME + imgPath);

                    if (!new File(imgPath).exists()) {
                        imagesToDl.add(match);
                    }
                }
            }

            // Download the images if needed
            if (!imagesToDl.isEmpty()) {
                new Thread(new Runnable() {
                    @Override
                    public void run() {
                        FetcherService.addImagesToDownload(String.valueOf(entryId), imagesToDl);
                        Context context = MainApplication.getContext();
                        context.startService(new Intent(context, FetcherService.class).setAction(FetcherService.ACTION_DOWNLOAD_IMAGES));
                    }
                }).start();
            }
        }

        return content;
    }
}




Java Source Code List

com.melegy.catazine.Constants.java
com.melegy.catazine.MainApplication.java
com.melegy.catazine.activity.AboutActivity.java
com.melegy.catazine.activity.BaseActivity.java
com.melegy.catazine.activity.EntryActivity.java
com.melegy.catazine.activity.GeneralPrefsActivity.java
com.melegy.catazine.activity.HomeActivity.java
com.melegy.catazine.adapter.CursorLoaderExpandableListAdapter.java
com.melegy.catazine.adapter.DrawerAdapter.java
com.melegy.catazine.adapter.EntriesCursorAdapter.java
com.melegy.catazine.adapter.FeedsCursorAdapter.java
com.melegy.catazine.fragment.EntriesListFragment.java
com.melegy.catazine.fragment.EntryFragment.java
com.melegy.catazine.loader.BaseLoader.java
com.melegy.catazine.parser.OPML.java
com.melegy.catazine.parser.RssAtomParser.java
com.melegy.catazine.provider.DatabaseHelper.java
com.melegy.catazine.provider.FeedDataContentProvider.java
com.melegy.catazine.provider.FeedData.java
com.melegy.catazine.receiver.BootCompletedBroadcastReceiver.java
com.melegy.catazine.receiver.ConnectionChangeReceiver.java
com.melegy.catazine.service.FetcherService.java
com.melegy.catazine.service.RefreshService.java
com.melegy.catazine.utils.ArticleTextExtractor.java
com.melegy.catazine.utils.HtmlUtils.java
com.melegy.catazine.utils.NetworkUtils.java
com.melegy.catazine.utils.PrefUtils.java
com.melegy.catazine.utils.StringUtils.java
com.melegy.catazine.utils.ThrottledContentObserver.java
com.melegy.catazine.utils.UiUtils.java
com.melegy.catazine.view.DragNDropExpandableListView.java
com.melegy.catazine.view.DragNDropListener.java
com.melegy.catazine.view.EntryView.java
com.melegy.catazine.widget.ColorPickerDialogPreference.java
com.melegy.catazine.widget.TickerWidgetProvider.java
com.melegy.catazine.widget.WidgetConfigActivity.java
com.melegy.catazine.widget.WidgetProvider.java
com.melegy.catazine.widget.WidgetService.java