com.tinspx.util.net.RefreshRedirect.java Source code

Java tutorial

Introduction

Here is the source code for com.tinspx.util.net.RefreshRedirect.java

Source

/* Copyright (C) 2013-2014 Ian Teune <ian.teune@gmail.com>
 * 
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 * 
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 * 
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */
package com.tinspx.util.net;

import com.google.common.base.Strings;
import com.google.common.net.HttpHeaders;
import com.tinspx.util.base.BasicError;
import com.tinspx.util.base.Errors;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.annotation.Nullable;
import lombok.AllArgsConstructor;
import lombok.EqualsAndHashCode;
import lombok.Value;
import lombok.experimental.Accessors;

/**
 * A redirect handler that will follow
 * <a href="http://en.wikipedia.org/wiki/Meta_refresh">mete refresh</a>
 * redirects. Note that the response must be decoded as a
 * {@link Response#asCharSequence() CharSequence} by calling
 * {@link Request#asCharSequence(boolean)} in order for this class to work.
 * <p>
 * To add refresh redirection to the
 * {@link Requests#defaultRedirectHandler() default} redirect handler, use 
 * {@code Requests.chain(Requests.defaultRedirectHandler(), RefreshRedirect.instance())}.
 * 
 * @author Ian
 */
@Value
@Accessors(fluent = true)
@lombok.experimental.Builder(builderClassName = "Builder")
public class RefreshRedirect implements RedirectHandler {

    private static final RefreshRedirect INSTANCE = builder().build();

    /**
     * Returns a singleton instance with the default settings. Both header
     * and meta tag refresh redirects will be followed, but the referer will
     * not be included and delays are not honored.
     */
    public static RefreshRedirect instance() {
        return INSTANCE;
    }

    /**
     * If {@code true}, the referer from the request that caused the redirect
     * will be preserved as the referer of the redirect (if it exists). Defaults
     * to {@code false}.
     */
    boolean preserveReferer;
    /**
     * If {@code true}, an referer will always be included in the redirect. If
     * the referer is not {@link #preserveReferer() preserved}, then it will be
     * set to the {@code URI} of the response that is being redirected. If
     * false, then the referer will be removed unless it is being
     * {@link #preserveReferer() preserved}. Defaults to {@code false}.
     */
    boolean includeReferer;
    /**
     * If {@code true}, the meta http-equiv refresh tag will be processed to
     * determine the redirect location, if possible. Defaults to {@code true}.
     * Set to {@code false} to prevent the html meta tags from being processed
     * and only use the http {@code Refresh} header.
     */
    boolean allowTag;
    /**
     * If {@code true}, the http {@code Refresh} header will be processed to
     * determine the redirect location, if possible. Defaults to {@code true}.
     * Set to {@code false} to prevent the {@code Refresh} header from being
     * processed and only use the html meta tags.
     */
    boolean allowHeader;
    /**
     * If {@code true}, the refresh delay will be honored. Defaults to
     * {@code false}, causing redirects to be executed immediately regardless of
     * the delay. Set to {@code true} to have the redirect honor the refresh
     * delay.
     */
    boolean honorDelay;

    public static Builder builder() {
        return new Builder().allowHeader(true).allowTag(true);
    }

    @AllArgsConstructor
    @EqualsAndHashCode
    static class Refresh {
        double delay;
        String location;

        @Override
        public String toString() {
            return delay + (Strings.isNullOrEmpty(location) ? "" : "; url=" + location);
        }
    }

    //(?i)<\s*meta.+?http-equiv\s*=\s*['"]?refresh.*?>
    private static final Pattern EXTRACT = Pattern.compile("(?i)<\\s*meta.+?http-equiv\\s*=\\s*['\"]?refresh.*?>");

    private static final Pattern CONTENT_QUOTED = Pattern.compile(
            //(?i)content\s*=\s*(['"])(.+?)\1
            "(?i)content\\s*=\\s*(['\"])(.+?)\\1");

    private static final Pattern CONTENT_UNQUOTED = Pattern.compile(
            //(?i)content\s*=\s*([^'">][^\s>]*)
            "(?i)content\\s*=\\s*([^'\">][^\\s>]*)");

    private static final Pattern DELAY_URL = Pattern.compile(
            //(?i)\s*([0-9\.]*);?\s*(?:url\s*=\s*)?(.+)
            "(?i)\\s*([0-9\\.]*);?\\s*(?:url\\s*=\\s*)?(.+)");

    public Builder toBuilder() {
        return builder().allowHeader(allowHeader).allowTag(allowTag).includeReferer(includeReferer)
                .preserveReferer(preserveReferer).honorDelay(honorDelay);
    }

    @Override
    public boolean shouldRedirect(Response response) {
        if (response == null) {
            return false;
        }
        if (allowHeader && fromHeaders(response.headers(), response) != null) {
            return true;
        }
        return allowTag && response.hasCharSequence() && fromContent(response.asCharSequence(), response) != null;
    }

    @Override
    public Request apply(Request request) {
        if (request == null) {
            return null;
        }
        final Response cause = request.cause().get();
        if (allowHeader) {
            Refresh refresh = fromHeaders(cause.headers(), cause);
            if (refresh != null) {
                Request r = doApply(cause, request, refresh);
                //could have an invalid url
                if (r != null) {
                    return r;
                }
            }
        }
        if (allowTag && cause.hasCharSequence()) {
            Refresh refresh = fromContent(cause.asCharSequence(), cause);
            if (refresh != null) {
                return doApply(cause, request, refresh);
            }
        }
        return null;
    }

    static @Nullable Refresh fromContent(CharSequence content, BasicError.Listener listener) {
        Matcher m = EXTRACT.matcher(content);
        if (!m.find()) {
            return null;
        }
        final String equiv = m.group();
        m = CONTENT_QUOTED.matcher(equiv);
        if (m.find()) {
            Refresh r = fromHeader(m.group(2), listener);
            if (r != null) {
                return r;
            }
        }
        m = CONTENT_UNQUOTED.matcher(equiv);
        if (m.find()) {
            return fromHeader(m.group(1), listener);
        }
        listener.onError(Errors.message("no content (%s)", equiv));
        return null;
    }

    static @Nullable Refresh fromHeaders(Headers headers, BasicError.Listener listener) {
        String value = headers.last(HttpHeaders.REFRESH);
        if (!Strings.isNullOrEmpty(value)) {
            return fromHeader(value, listener);
        }
        return null;
    }

    static @Nullable Refresh fromHeader(CharSequence header, BasicError.Listener listener) {
        Matcher m = DELAY_URL.matcher(header);
        if (m.find()) {
            double delay = 0;
            try {
                delay = Double.parseDouble(m.group(1));
            } catch (NumberFormatException ex) {
                listener.onError(Errors.create(ex, "invalid refresh delay: %s", header));
            }
            return new Refresh(delay, Strings.nullToEmpty(m.group(2)).trim());
        }
        listener.onError(Errors.message("invalid refresh: %s", header));
        return null;
    }

    private @Nullable Request doApply(Response cause, Request request, Refresh refresh) {
        if (Strings.isNullOrEmpty(refresh.location)) {
            request.uri(cause.uri());
        } else {
            final URI uri;
            try {
                uri = new URI(refresh.location.trim());
            } catch (URISyntaxException ex) {
                request.onError(Errors.create(this, ex, "invalid refresh uri: %s", refresh.location));
                return null;
            }
            request.uri(cause.uri().resolve(uri));
        }
        boolean refererSet = false;
        if (preserveReferer && cause.request().headers().contains(HttpHeaders.REFERER)) {
            refererSet = true;
            request.headers().set(HttpHeaders.REFERER, cause.request().headers().last(HttpHeaders.REFERER));
        }
        if (!refererSet) {
            if (includeReferer) {
                request.headers().set(HttpHeaders.REFERER, cause.uri().toASCIIString());
            } else {
                request.headers().removeAll(HttpHeaders.REFERER);
            }
        }
        if (honorDelay && refresh.delay > 0) {
            long millis = (long) (refresh.delay * 1000);
            if (millis > 0) {
                request.properties().put(Request.REDIRECT_DELAY_MILLIS, millis);
            }
        }
        return request;
    }
}