io.github.carlomicieli.footballdb.starter.documents.WebDocumentDownloader.java Source code

Java tutorial

Introduction

Here is the source code for io.github.carlomicieli.footballdb.starter.documents.WebDocumentDownloader.java

Source

/*
 * Copyright 2014 the original author or authors.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing, software
 *  distributed under the License is distributed on an "AS IS" BASIS,
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  See the License for the specific language governing permissions and
 *  limitations under the License.
 */
package io.github.carlomicieli.footballdb.starter.documents;

import io.github.carlomicieli.footballdb.starter.App;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.springframework.stereotype.Component;

import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URISyntaxException;
import java.net.URL;
import java.util.Optional;

/**
 * @author Carlo Micieli
 */
@Component("documentDownloader")
public class WebDocumentDownloader implements DocumentDownloader {
    private static final String NFL_ROOT_URL = "http://www.nfl.com";
    private static final String CHROME_USER_AGENT = "Mozilla/5.0 (X11; Linux x86_64) "
            + "AppleWebKit/537.36 (KHTML, like Gecko) " + "Chrome/34.0.1847.116 Safari/537.36";

    @Override
    public Optional<Document> from(String uri) {
        return Optional.ofNullable(downloadFromURL(uri));
    }

    private static Document downloadFromURL(String url) {
        validateUrl(url);

        try {
            Document doc = Jsoup.connect(url).userAgent(CHROME_USER_AGENT).get();

            App.log().info("Downloading '{}'...", doc.title());
            return doc;
        } catch (IOException e) {
            App.log().error("Error for '{}': {}", url, e);
            return null;
        }
    }

    private static void validateUrl(String url) {
        try {
            URL u = new URL(url);
            u.toURI();
        } catch (MalformedURLException | URISyntaxException e) {
            throw new IllegalArgumentException("Invalid url value: " + url);
        }
    }

    public static String nfl(String path) {
        return NFL_ROOT_URL + path;
    }
}