Java XML Element Text getContent(Element e)

Here you can find the source of getContent(Element e)

Description

Return the content of the given element.

License

Open Source License

Declaration

public static String getContent(Element e) 

Method Source Code

//package com.java2s;
/*//from www  .j  a  v  a 2 s .  co  m
 * ====================
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
 *
 * Copyright 2008-2009 Sun Microsystems, Inc. All rights reserved.
 *
 * The contents of this file are subject to the terms of the Common Development
 * and Distribution License("CDDL") (the "License").  You may not use this file
 * except in compliance with the License.
 *
 * You can obtain a copy of the License at
 * http://opensource.org/licenses/cddl1.php
 * See the License for the specific language governing permissions and limitations
 * under the License.
 *
 * When distributing the Covered Code, include this CDDL Header Notice in each file
 * and include the License file at http://opensource.org/licenses/cddl1.php.
 * If applicable, add the following below this CDDL Header, with the fields
 * enclosed by brackets [] replaced by your own identifying information:
 * "Portions Copyrighted [year] [name of copyright owner]"
 * ====================
 */

import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.Text;

public class Main {
    /**
     * Return the content of the given element.
     * <p/>
     * We will descend to an arbitrary depth looking for the first text node.
     * <p/>
     * Note that the parser may break what was originally a single string of
     * pcdata into multiple adjacent text nodes. Xerces appears to do this when
     * it encounters a '$' in the text, not sure if there is specified behavior,
     * or if its parser specific.
     * <p/>
     * Here, we will congeal adjacent text nodes.
     * <p/>
     * We will NOT ignore text nodes that have only whitespace.
     */
    public static String getContent(Element e) {

        String content = null;

        if (e != null) {

            // find the first inner text node,
            Text t = findText(e, false);
            if (t != null) {
                // we have at least some text
                StringBuilder b = new StringBuilder();
                while (t != null) {
                    b.append(t.getData());
                    Node n = t.getNextSibling();

                    t = null;
                    if (n != null && ((n.getNodeType() == Node.TEXT_NODE)
                            || (n.getNodeType() == Node.CDATA_SECTION_NODE))) {
                        t = (Text) n;
                    }
                }
                content = b.toString();
            }
        }

        return content;
    }

    /**
     * Locate the first text node at any level below the given node. If the
     * ignoreEmpty flag is true, we will ignore text nodes that contain only
     * whitespace characteres.
     * <p/>
     * Note that if you're trying to extract element content, you probably don't
     * want this since parser's can break up pcdata into multiple adjacent text
     * nodes. See getContent() for a more useful method.
     */
    private static Text findText(Node node, boolean ignoreEmpty) {

        Text found = null;

        if (node != null) {

            if (node.getNodeType() == Node.TEXT_NODE || node.getNodeType() == Node.CDATA_SECTION_NODE) {

                Text t = (Text) node;
                if (!ignoreEmpty) {
                    found = t;
                } else {
                    String s = t.getData().trim();
                    if (s.length() > 0) {
                        found = t;
                    }
                }
            }

            if (found == null) {

                for (Node child = node.getFirstChild(); child != null
                        && found == null; child = child.getNextSibling()) {

                    found = findText(child, ignoreEmpty);
                }
            }
        }

        return found;
    }
}

Related

  1. getContent(Element element)
  2. getContent(Element element)
  3. getContent(Element element)
  4. getContentFromElement(Element element, String namespaceURI, String localName)