com.gistlabs.mechanize.document.html.HtmlDocument.java Source code

Java tutorial

Introduction

Here is the source code for com.gistlabs.mechanize.document.html.HtmlDocument.java

Source

/**
 * Copyright (C) 2012-2014 Gist Labs, LLC. (http://gistlabs.com)
 *
 * This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
 */
package com.gistlabs.mechanize.document.html;

import java.util.Arrays;
import java.util.Collection;
import java.util.List;

import org.apache.http.HttpResponse;
import org.apache.http.client.methods.HttpRequestBase;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;

import com.gistlabs.mechanize.Mechanize;
import com.gistlabs.mechanize.document.AbstractDocument;
import com.gistlabs.mechanize.document.html.form.Forms;
import com.gistlabs.mechanize.document.html.image.Images;
import com.gistlabs.mechanize.document.link.Links;
import com.gistlabs.mechanize.document.node.Node;
import com.gistlabs.mechanize.exceptions.MechanizeExceptionFactory;
import com.gistlabs.mechanize.util.apache.ContentType;

/**
 * @author Martin Kersten <Martin.Kersten.mk@gmail.com>
 */
public class HtmlDocument extends AbstractDocument {
    public static Collection<String> CONTENT_MATCHERS = Arrays.asList(ContentType.TEXT_HTML.getMimeType(),
            ContentType.APPLICATION_ATOM_XML.getMimeType(), ContentType.APPLICATION_XHTML_XML.getMimeType(),
            ContentType.APPLICATION_XML.getMimeType());

    private HtmlElements htmlElements;

    private String baseUri;

    public HtmlDocument(final Mechanize agent, final HttpRequestBase request, final HttpResponse response) {
        super(agent, request, response);
    }

    @Override
    public HtmlElement getRoot() {
        return htmlElements().getRoot();
    }

    @Override
    public HtmlElement find(String csss) {
        return (HtmlElement) super.find(csss);
    }

    @SuppressWarnings("unchecked")
    @Override
    public List<? extends HtmlElement> findAll(String csss) {
        return (List<? extends HtmlElement>) super.findAll(csss);
    }

    @Override
    protected void loadPage() throws Exception {
        Document jsoup = Jsoup.parse(getInputStream(), getContentEncoding(response), getUri());
        setBaseUri(jsoup.head().baseUri());
        this.htmlElements = new HtmlElements(this, jsoup);
    }

    private void setBaseUri(final String baseUri) {
        if (!this.getUri().equals(baseUri))
            this.baseUri = baseUri;
    }

    @Override
    public String getUri() {
        return this.baseUri == null ? super.getUri() : this.baseUri;
    }

    @Override
    protected Links loadLinks() {
        List<? extends Node> links = htmlElements().findAll("a");
        return new Links(this, links);
    }

    @Override
    protected Forms loadForms() {
        List<? extends Node> forms = htmlElements().findAll("form");
        return new Forms(this, forms);
    }

    @Override
    protected Images loadImages() {
        List<HtmlElement> images = htmlElements().findAll("img");
        return new Images(this, images);
    }

    public HtmlElements htmlElements() {
        if (htmlElements == null)
            try {
                loadPage();
            } catch (Exception e) {
                throw MechanizeExceptionFactory.newException(e);
            }
        return htmlElements;
    }

    /**
     * Returns the title of the page or null.
     */
    @Override
    public String getTitle() {
        HtmlElement title = htmlElements().find("title");
        return title != null ? title.getText() : null;
    }

    /**
     * Serialize the contents of this page into a string
     * 
     * @return
     */
    @Override
    public String asString() {
        return htmlElements.toString();
    }

}