Java XML Node Text Value getTextContent(Node node, String path)

Description

Get the text content of an element identified by a path, where the path elements can include an index.

License

Open Source License

Parameter

Parameter	Description
node	the starting node for the path. The first path element must match the name of this node.
path	the path to the target node.

Return

the full text value of the target node (including all descendent text nodes), or the empty string if the target is not a descendent of the starting node.

Declaration

public static String getTextContent(Node node, String path)

Method Source Code

//package com.java2s;
/*---------------------------------------------------------------
*  Copyright 2005 by the Radiological Society of North America
*
*  This source software is released under the terms of the
*  RSNA Public License (http://mirc.rsna.org/rsnapubliclicense)
*----------------------------------------------------------------*/

import org.w3c.dom.Document;
import org.w3c.dom.Element;

import org.w3c.dom.Node;

public class Main {
    /**/* w  w w  . ja  va2  s. c om*/
     * Get the text content of an element identified by a path,
     * where the path elements can include an index. The first
     * path element must not have an index, and its name must
     * match the name of the starting node. If the starting
     * node is a Document, the root Element of the document is
     * used as the starting point. Path elements must be separated
     * by the slash character. If the path starts with a slash,
     * the slash is ignored. If the element or attribute identified
     * by the path is not present as a child of the starting node,
     * the empty string is returned. If a path element identifies
     * an attribute, any subsequent path elements are ignored.
     * A path is in the form: /e1/e2/e3/... or /e1/e2/@attr
     * Note the slash preceding the attribute's @-sign.
     * @param node the starting node for the path. The first path
     * element must match the name of this node.
     * @param path the path to the target node.
     * @return the full text value of the target node (including all
     * descendent text nodes), or the empty string if the target is
     * not a descendent of the starting node.
     */
    public static String getTextContent(Node node, String path) {
        if (node instanceof Document)
            node = ((Document) node).getDocumentElement();
        if (!(node instanceof Element))
            return "";
        Element el = (Element) node;
        path = path.replaceAll("\\s", "");
        if (path.startsWith("/"))
            path = path.substring(1);
        String[] pathElements = path.split("/");
        if (!pathElements[0].equals(el.getTagName()))
            return "";
        for (int i = 1; i < pathElements.length; i++) {
            String pe = pathElements[i];
            if (pe.startsWith("@")) {
                //If this path element identifies an attribute, return it
                //and ignore any further path elements.
                return el.getAttribute(pe.substring(1));
            } else {
                //This path element identifies an Element. It may have an index.
                //Get the index, if present, and get the element name.
                int n = 0;
                int k = pe.indexOf("[");
                int kk = pe.lastIndexOf("]");
                if ((k != -1) && (k < kk)) {
                    try {
                        n = Integer.parseInt(pe.substring(k + 1, kk));
                    } catch (Exception ex) {
                        return "";
                    }
                    pe = pe.substring(0, k);
                } else if (k != kk)
                    return "";
                //We now have the element name and the index.
                //Find the identified Element. We have to count
                //matching elements to find the one identified
                //by the index.
                int nn = 0;
                Node child = el.getFirstChild();
                while (child != null) {
                    if ((child.getNodeType() == Node.ELEMENT_NODE) && child.getNodeName().equals(pe)) {
                        if (n == nn)
                            break;
                        nn++;
                    }
                    child = child.getNextSibling();
                }
                //If the child is null, we didn't find the identified Element.
                if (child == null)
                    return "";
                //If we get here, we found it, now look for the next one.
                el = (Element) child;
            }
        }
        //Okay, we must be at the end of the path, and it must be an Element.
        //Return the text content of the element.
        return el.getTextContent();
    }
}