org.jsoup.parser.Tag.java Source code

Java tutorial

Introduction

Here is the source code for org.jsoup.parser.Tag.java

Source

package org.jsoup.parser;

import org.jsoup.helper.Validate;
import org.jsoup.internal.Normalizer;

import java.util.HashMap;
import java.util.Map;

/**
 * HTML Tag capabilities.
 *
 * @author Jonathan Hedley, jonathan@hedley.net
 */
public class Tag {
    private static final Map<String, Tag> tags = new HashMap<>(); // map of known tags

    private String tagName;
    private String normalName; // always the lower case version of this tag, regardless of case preservation mode
    private boolean isBlock = true; // block or inline
    private boolean formatAsBlock = true; // should be formatted as a block
    private boolean canContainInline = true; // only pcdata if not
    private boolean empty = false; // can hold nothing; e.g. img
    private boolean selfClosing = false; // can self close (<foo />). used for unknown tags that self close, without forcing them as empty.
    private boolean preserveWhitespace = false; // for pre, textarea, script etc
    private boolean formList = false; // a control that appears in forms: input, textarea, output etc
    private boolean formSubmit = false; // a control that can be submitted in a form: input etc

    private Tag(String tagName) {
        this.tagName = tagName;
        normalName = Normalizer.lowerCase(tagName);
    }

    /**
     * Get this tag's name.
     *
     * @return the tag's name
     */
    public String getName() {
        return tagName;
    }

    /**
     * Get this tag's normalized (lowercased) name.
     * @return the tag's normal name.
     */
    public String normalName() {
        return normalName;
    }

    /**
     * Get a Tag by name. If not previously defined (unknown), returns a new generic tag, that can do anything.
     * <p>
     * Pre-defined tags (P, DIV etc) will be ==, but unknown tags are not registered and will only .equals().
     * </p>
     * 
     * @param tagName Name of tag, e.g. "p". Case insensitive.
     * @param settings used to control tag name sensitivity
     * @return The tag, either defined or new generic.
     */
    public static Tag valueOf(String tagName, ParseSettings settings) {
        Validate.notNull(tagName);
        Tag tag = tags.get(tagName);

        if (tag == null) {
            tagName = settings.normalizeTag(tagName);
            Validate.notEmpty(tagName);
            tag = tags.get(tagName);

            if (tag == null) {
                // not defined: create default; go anywhere, do anything! (incl be inside a <p>)
                tag = new Tag(tagName);
                tag.isBlock = false;
            }
        }
        return tag;
    }

    /**
     * Get a Tag by name. If not previously defined (unknown), returns a new generic tag, that can do anything.
     * <p>
     * Pre-defined tags (P, DIV etc) will be ==, but unknown tags are not registered and will only .equals().
     * </p>
     *
     * @param tagName Name of tag, e.g. "p". <b>Case sensitive</b>.
     * @return The tag, either defined or new generic.
     */
    public static Tag valueOf(String tagName) {
        return valueOf(tagName, ParseSettings.preserveCase);
    }

    /**
     * Gets if this is a block tag.
     *
     * @return if block tag
     */
    public boolean isBlock() {
        return isBlock;
    }

    /**
     * Gets if this tag should be formatted as a block (or as inline)
     *
     * @return if should be formatted as block or inline
     */
    public boolean formatAsBlock() {
        return formatAsBlock;
    }

    /**
     * Gets if this tag can contain block tags.
     *
     * @return if tag can contain block tags
     * @deprecated No longer used, and no different result than {{@link #isBlock()}}
     */
    public boolean canContainBlock() {
        return isBlock;
    }

    /**
     * Gets if this tag is an inline tag.
     *
     * @return if this tag is an inline tag.
     */
    public boolean isInline() {
        return !isBlock;
    }

    /**
     * Gets if this tag is a data only tag.
     *
     * @return if this tag is a data only tag
     */
    public boolean isData() {
        return !canContainInline && !isEmpty();
    }

    /**
     * Get if this is an empty tag
     *
     * @return if this is an empty tag
     */
    public boolean isEmpty() {
        return empty;
    }

    /**
     * Get if this tag is self closing.
     *
     * @return if this tag should be output as self closing.
     */
    public boolean isSelfClosing() {
        return empty || selfClosing;
    }

    /**
     * Get if this is a pre-defined tag, or was auto created on parsing.
     *
     * @return if a known tag
     */
    public boolean isKnownTag() {
        return tags.containsKey(tagName);
    }

    /**
     * Check if this tagname is a known tag.
     *
     * @param tagName name of tag
     * @return if known HTML tag
     */
    public static boolean isKnownTag(String tagName) {
        return tags.containsKey(tagName);
    }

    /**
     * Get if this tag should preserve whitespace within child text nodes.
     *
     * @return if preserve whitespace
     */
    public boolean preserveWhitespace() {
        return preserveWhitespace;
    }

    /**
     * Get if this tag represents a control associated with a form. E.g. input, textarea, output
     * @return if associated with a form
     */
    public boolean isFormListed() {
        return formList;
    }

    /**
     * Get if this tag represents an element that should be submitted with a form. E.g. input, option
     * @return if submittable with a form
     */
    public boolean isFormSubmittable() {
        return formSubmit;
    }

    Tag setSelfClosing() {
        selfClosing = true;
        return this;
    }

    @Override
    public boolean equals(Object o) {
        if (this == o)
            return true;
        if (!(o instanceof Tag))
            return false;

        Tag tag = (Tag) o;

        if (!tagName.equals(tag.tagName))
            return false;
        if (canContainInline != tag.canContainInline)
            return false;
        if (empty != tag.empty)
            return false;
        if (formatAsBlock != tag.formatAsBlock)
            return false;
        if (isBlock != tag.isBlock)
            return false;
        if (preserveWhitespace != tag.preserveWhitespace)
            return false;
        if (selfClosing != tag.selfClosing)
            return false;
        if (formList != tag.formList)
            return false;
        return formSubmit == tag.formSubmit;
    }

    @Override
    public int hashCode() {
        int result = tagName.hashCode();
        result = 31 * result + (isBlock ? 1 : 0);
        result = 31 * result + (formatAsBlock ? 1 : 0);
        result = 31 * result + (canContainInline ? 1 : 0);
        result = 31 * result + (empty ? 1 : 0);
        result = 31 * result + (selfClosing ? 1 : 0);
        result = 31 * result + (preserveWhitespace ? 1 : 0);
        result = 31 * result + (formList ? 1 : 0);
        result = 31 * result + (formSubmit ? 1 : 0);
        return result;
    }

    @Override
    public String toString() {
        return tagName;
    }

    // internal static initialisers:
    // prepped from http://www.w3.org/TR/REC-html40/sgml/dtd.html and other sources
    private static final String[] blockTags = { "html", "head", "body", "frameset", "script", "noscript", "style",
            "meta", "link", "title", "frame", "noframes", "section", "nav", "aside", "hgroup", "header", "footer",
            "p", "h1", "h2", "h3", "h4", "h5", "h6", "ul", "ol", "pre", "div", "blockquote", "hr", "address",
            "figure", "figcaption", "form", "fieldset", "ins", "del", "dl", "dt", "dd", "li", "table", "caption",
            "thead", "tfoot", "tbody", "colgroup", "col", "tr", "th", "td", "video", "audio", "canvas", "details",
            "menu", "plaintext", "template", "article", "main", "svg", "math", "center" };
    private static final String[] inlineTags = { "object", "base", "font", "tt", "i", "b", "u", "big", "small",
            "em", "strong", "dfn", "code", "samp", "kbd", "var", "cite", "abbr", "time", "acronym", "mark", "ruby",
            "rt", "rp", "a", "img", "br", "wbr", "map", "q", "sub", "sup", "bdo", "iframe", "embed", "span",
            "input", "select", "textarea", "label", "button", "optgroup", "option", "legend", "datalist", "keygen",
            "output", "progress", "meter", "area", "param", "source", "track", "summary", "command", "device",
            "area", "basefont", "bgsound", "menuitem", "param", "source", "track", "data", "bdi", "s" };
    private static final String[] emptyTags = { "meta", "link", "base", "frame", "img", "br", "wbr", "embed", "hr",
            "input", "keygen", "col", "command", "device", "area", "basefont", "bgsound", "menuitem", "param",
            "source", "track" };
    private static final String[] formatAsInlineTags = { "title", "a", "p", "h1", "h2", "h3", "h4", "h5", "h6",
            "pre", "address", "li", "th", "td", "script", "style", "ins", "del", "s" };
    private static final String[] preserveWhitespaceTags = { "pre", "plaintext", "title", "textarea"
            // script is not here as it is a data node, which always preserve whitespace
    };
    // todo: I think we just need submit tags, and can scrub listed
    private static final String[] formListedTags = { "button", "fieldset", "input", "keygen", "object", "output",
            "select", "textarea" };
    private static final String[] formSubmitTags = { "input", "keygen", "object", "select", "textarea" };

    static {
        // creates
        for (String tagName : blockTags) {
            Tag tag = new Tag(tagName);
            register(tag);
        }
        for (String tagName : inlineTags) {
            Tag tag = new Tag(tagName);
            tag.isBlock = false;
            tag.formatAsBlock = false;
            register(tag);
        }

        // mods:
        for (String tagName : emptyTags) {
            Tag tag = tags.get(tagName);
            Validate.notNull(tag);
            tag.canContainInline = false;
            tag.empty = true;
        }

        for (String tagName : formatAsInlineTags) {
            Tag tag = tags.get(tagName);
            Validate.notNull(tag);
            tag.formatAsBlock = false;
        }

        for (String tagName : preserveWhitespaceTags) {
            Tag tag = tags.get(tagName);
            Validate.notNull(tag);
            tag.preserveWhitespace = true;
        }

        for (String tagName : formListedTags) {
            Tag tag = tags.get(tagName);
            Validate.notNull(tag);
            tag.formList = true;
        }

        for (String tagName : formSubmitTags) {
            Tag tag = tags.get(tagName);
            Validate.notNull(tag);
            tag.formSubmit = true;
        }
    }

    private static void register(Tag tag) {
        tags.put(tag.tagName, tag);
    }
}