Retrieve the name of the first tag in the XML document specified by the given Reader, without parsing the full file/string. - Java XML

Java examples for XML:XML String

Description

Retrieve the name of the first tag in the XML document specified by the given Reader, without parsing the full file/string.

Demo Code

/*/*  ww  w  .  j a v  a2 s. co m*/
// Licensed to Julian Hyde under one or more contributor license
// agreements. See the NOTICE file distributed with this work for
// additional information regarding copyright ownership.
//
// Julian Hyde licenses this file to you under the Apache License,
// Version 2.0 (the "License"); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at:
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
 */
//package com.java2s;
import java.io.IOException;

import java.io.Reader;

public class Main {
    /**
     * Retrieve the name of the first tag in the XML document specified by the
     * given Reader, without parsing the full file/string.  This function is
     * useful to identify the DocType of an XML document before parsing,
     * possibly to send the document off to different pieces of code.
     * For performance reasons, the function attempts to read as little of
     * the file or string as possible before making its decision about the
     * first tag.  Leading comments are ignored.
     * @param xml a Reader containing an XML document.
     * @return the first tag name, as a String, or null if no first tag
     * can be found.
     */
    public static String getFirstTagName(Reader xml) {
        final int OUTSIDE = 0; // constant: identify outside state
        final int BRACKET = 1; // constant: bracket, contents unknown
        final int COMMENT = 2; // constant: identify a comment section
        final int IGNORE = 3; // constant: identify an ignored section
        final int TAG = 4; // constant: identify a tag section

        int state = OUTSIDE;
        String commentMatch = null;
        StringBuffer tagBuffer = null;
        boolean sawBang = false;

        try {
            int c = xml.read();
            for (;;) {
                // No tag found if we hit EOF first.
                if (c == -1) {
                    return null;
                }
                switch (state) {
                case OUTSIDE:
                    // Start of any sort of tag
                    if (c == '<') {
                        state = BRACKET;
                        commentMatch = "!--";
                        sawBang = false;
                        c = xml.read();

                        // Other non-whitespace characters outside of any tag
                    } else if (!Character.isWhitespace((char) c)) {
                        return null;

                        // Whitespace characters are ignored
                    } else {
                        c = xml.read();
                    }
                    break;

                case BRACKET:
                    // Check for the start of a comment.
                    if (commentMatch != null) {
                        if (c == commentMatch.charAt(0)) {
                            // This match indicates a comment
                            if (commentMatch.length() == 1) {
                                c = xml.read();
                                commentMatch = "-->";
                                state = COMMENT;
                            } else {
                                // Remove the first character from commentMatch,
                                // then process the character as usual.
                                commentMatch = commentMatch.substring(1,
                                        commentMatch.length());
                            }
                        } else {
                            // No longer eligible for comment.
                            commentMatch = null;
                        }
                    }

                    // Hit whitespace; ignore the character.
                    if (Character.isWhitespace((char) c)) {
                        c = xml.read();
                        break;
                    }

                    switch (c) {
                    case '?':
                        c = xml.read();
                        state = IGNORE;
                        break;
                    case '!':
                        // Enter an ignored section unless eligible for comment.
                        c = xml.read();
                        sawBang = true;
                        if (commentMatch == null) {
                            state = IGNORE;
                        }
                        break;
                    case '-':
                        // Enter an ignored section unless eligible for comment.
                        c = xml.read();
                        if (commentMatch == null) {
                            state = IGNORE;
                        }
                        break;
                    case '>':
                        // Return to OUTSIDE state immediately
                        c = xml.read();
                        state = OUTSIDE;
                        break;
                    default:
                        // State depends on whether we saw a ! or not.
                        if (sawBang) {
                            state = IGNORE;
                        } else {
                            state = TAG;
                        }
                        tagBuffer = new StringBuffer();
                    }
                    break;

                case COMMENT:
                    // Did we match the next expected end-of-comment character?
                    if (c == commentMatch.charAt(0)) {
                        c = xml.read();
                        if (commentMatch.length() == 1) {
                            // Done with the comment
                            state = OUTSIDE;
                        } else {
                            commentMatch = commentMatch.substring(1,
                                    commentMatch.length());
                        }
                    } else {
                        // If not, restart our quest for the end-of-comment character.
                        c = xml.read();
                        commentMatch = "-->";
                    }
                    break;

                case IGNORE:
                    // Drop out on a close >.  Ignore all other characters.
                    if (c == '>') {
                        c = xml.read();
                        state = OUTSIDE;
                    } else {
                        c = xml.read();
                    }
                    break;

                case TAG:
                    // Store characters in the tag buffer until we hit whitespace.
                    // When we hit whitespace or '>' or '/', return the name of the tag.
                    if (Character.isWhitespace((char) c) || c == '>'
                            || c == '/') {
                        return tagBuffer.toString();
                    } else {
                        tagBuffer.append((char) c);
                        c = xml.read();
                    }
                    break;
                }
            }
        } catch (IOException ex) {
            // On exception, we can't determine the first tag, so return null.
            return null;
        }
    }
}

Related Tutorials