Example usage for org.w3c.dom Text getData

Introduction

In this page you can find the example usage for org.w3c.dom Text getData.

Prototype

public String getData() throws DOMException;

Source Link

Document

The character data of the node that implements this interface.

Usage

From source file:Main.java

/**
 * get the  text string in an element (eg interspersed between child elements), 
 * or "" if there is none or if the Element is null.
 * Tries to ignore white space text; but does not succeed.
 *//*from ww  w .ja v a  2 s . c o  m*/
public static String getText(Element el) {
    String res = "";
    if (el != null)
        try {
            el.normalize(); // does not help recognise white space
            NodeList nodes = el.getChildNodes();
            for (int i = 0; i < nodes.getLength(); i++)
                if (nodes.item(i) instanceof Text) {
                    Text text = (Text) nodes.item(i);
                    // this filter seems to make no difference
                    if (!text.isElementContentWhitespace()) {
                        String tData = text.getData();
                        // this seems to be an effective way to catch pure white space
                        StringTokenizer nonWhiteSpace = new StringTokenizer(tData, "\n \t");
                        if (nonWhiteSpace.countTokens() > 0)
                            res = res + tData;
                    }
                }
        } catch (Exception e) {
            System.out.println("Text failure: " + e.getMessage());
        }
    return res;
}

From source file:Main.java

/**
 * Extracts the String content of a TXT element.
 *
 * @param parentNode the node containing the data that we'd like to get.
 * @return the string contained by the node or null if none existed.
 *///w  w  w  .  ja  va  2s.  c o m
public static String getText(Element parentNode) {
    Text text = getTextNode(parentNode);

    if (text == null) {
        return null;
    } else {
        return text.getData();
    }
}

From source file:Main.java

/**
 * Locate the first text node at any level below the given node. If the
 * ignoreEmpty flag is true, we will ignore text nodes that contain only
 * whitespace characteres./*from  ww  w . ja v a 2  s .c  o  m*/
 * <p/>
 * Note that if you're trying to extract element content, you probably don't
 * want this since parser's can break up pcdata into multiple adjacent text
 * nodes. See getContent() for a more useful method.
 */
private static Text findText(Node node, boolean ignoreEmpty) {

    Text found = null;

    if (node != null) {

        if (node.getNodeType() == Node.TEXT_NODE || node.getNodeType() == Node.CDATA_SECTION_NODE) {

            Text t = (Text) node;
            if (!ignoreEmpty) {
                found = t;
            } else {
                String s = t.getData().trim();
                if (s.length() > 0) {
                    found = t;
                }
            }
        }

        if (found == null) {

            for (Node child = node.getFirstChild(); child != null
                    && found == null; child = child.getNextSibling()) {

                found = findText(child, ignoreEmpty);
            }
        }
    }

    return found;
}

From source file:fr.aliasource.webmail.server.proxy.client.http.DOMUtils.java

public static String[] getTexts(Element root, String elementName) {
    NodeList list = root.getElementsByTagName(elementName);
    String[] ret = new String[list.getLength()];
    for (int i = 0; i < list.getLength(); i++) {
        Text txt = (Text) list.item(i).getFirstChild();
        if (txt != null) {
            ret[i] = txt.getData();
        } else {//from w  w w  .  jav a2 s  .  com
            ret[i] = ""; //$NON-NLS-1$
        }
    }
    return ret;
}

From source file:fr.aliasource.webmail.server.proxy.client.http.DOMUtils.java

public static String getElementText(Element node) {
    Text txtElem = (Text) node.getFirstChild();
    if (txtElem == null) {
        return null;
    }/*from   w w w . ja v  a2 s . c om*/
    return txtElem.getData();
}

From source file:Main.java

/**
 * Return the content of the given element.
 * <p/>/*from ww  w .  j  a v  a2  s  . co m*/
 * We will descend to an arbitrary depth looking for the first text node.
 * <p/>
 * Note that the parser may break what was originally a single string of
 * pcdata into multiple adjacent text nodes. Xerces appears to do this when
 * it encounters a '$' in the text, not sure if there is specified behavior,
 * or if its parser specific.
 * <p/>
 * Here, we will congeal adjacent text nodes.
 * <p/>
 * We will NOT ignore text nodes that have only whitespace.
 */
public static String getContent(Element e) {

    String content = null;

    if (e != null) {

        // find the first inner text node,
        Text t = findText(e, false);
        if (t != null) {
            // we have at least some text
            StringBuilder b = new StringBuilder();
            while (t != null) {
                b.append(t.getData());
                Node n = t.getNextSibling();

                t = null;
                if (n != null && ((n.getNodeType() == Node.TEXT_NODE)
                        || (n.getNodeType() == Node.CDATA_SECTION_NODE))) {
                    t = (Text) n;
                }
            }
            content = b.toString();
        }
    }

    return content;
}

From source file:DomUtil.java

/**
 * Returns the text content of a DOM <code>Element</code>.
 * //  w  ww  .  j a  v  a  2s . c  o m
 * @param element The <code>Element</code> to analyze.
 */
public static String getElementText(Element element) {
    NodeList children = element.getChildNodes();
    int childCount = children.getLength();
    for (int index = 0; index < childCount; ++index) {
        if (children.item(index) instanceof Text) {
            Text text = (Text) children.item(index);
            return text.getData();
        }
    }
    return null;
}

From source file:XMLUtils.java

/**
 * Returns the text value of an element.
 * @param el// w ww .  ja  v a2  s  .c  om
 * @return
 */
public static String getTextValue(Element el) {
    StringBuffer b = new StringBuffer();
    // retrieve the text node child
    NodeList nl = el.getChildNodes();
    int len = nl.getLength();
    for (int i = 0; i < len; i++) {
        Node n = nl.item(i);
        if (n instanceof Text) {
            Text t = (Text) n;
            b.append(t.getData());
        }
    }
    // trim the result, ignoring the first spaces and cariage return
    int iFirst = 0;
    for (; iFirst < b.length(); iFirst++) {
        char c = b.charAt(iFirst);
        if (c != ' ' && c != '\r' && c != '\n' && c != '\t') {
            break;
        }
    }
    // start by the end as well
    int iLast = b.length() - 1;
    for (; iLast >= 0; iLast--) {
        char c = b.charAt(iLast);
        if (c != ' ' && c != '\r' && c != '\n' && c != '\t') {
            break;
        }
    }
    return b.substring(iFirst, iLast + 1);
}

From source file:edu.lternet.pasta.portal.search.BrowseGroup.java

public static BrowseGroup generateKeywordCache() {
    BrowseGroup controlledVocabulary = new BrowseGroup("Controlled Vocabulary");
    BrowseGroup lterSiteCache = generateLterSiteCache();
    controlledVocabulary.addBrowseGroup(lterSiteCache);

    try {/*from   w  w w.  j  a  v  a 2  s.co m*/
        String topTermsXML = ControlledVocabularyClient.webServiceFetchTopTerms();
        DocumentBuilderFactory documentBuilderFactory = DocumentBuilderFactory.newInstance();
        DocumentBuilder documentBuilder = documentBuilderFactory.newDocumentBuilder();
        InputStream inputStream = IOUtils.toInputStream(topTermsXML, "UTF-8");
        Document document = documentBuilder.parse(inputStream);
        Element documentElement = document.getDocumentElement();
        NodeList documentNodeList = documentElement.getElementsByTagName("term");

        for (int i = 0; i < documentNodeList.getLength(); i++) {
            Node documentNode = documentNodeList.item(i);
            NodeList childNodes = documentNode.getChildNodes();
            String termId = null;
            String value = null;

            for (int j = 0; j < childNodes.getLength(); j++) {

                Node childNode = childNodes.item(j);
                if (childNode instanceof Element) {
                    Element childElement = (Element) childNode;
                    if (childElement.getTagName().equals("term_id")) {
                        Text text = (Text) childElement.getFirstChild();
                        termId = text.getData().trim();
                    } else if (childElement.getTagName().equals("string")) {
                        Text text = (Text) childElement.getFirstChild();
                        value = text.getData().trim();
                    }
                }
            }

            BrowseGroup topTerm = new BrowseGroup(value);
            controlledVocabulary.addBrowseGroup(topTerm);
            topTerm.setTermId(termId);
            topTerm.setHasMoreDown("1");
            topTerm.addFetchDownElements();
        }
    } catch (Exception e) {
        logger.error("Exception:\n" + e.getMessage());
        e.printStackTrace();
        /*
         * By returning null, we let callers know that there was a problem
         * refreshing the browse cache, so callers will know not to
         * overwrite the previous results.
         */
        controlledVocabulary = null;
    }

    return controlledVocabulary;
}

From source file:XMLDocumentWriter.java

/**
 * Output the specified DOM Node object, printing it using the specified
 * indentation string//from ww  w  .j  a v a  2 s. com
 */
public void write(Node node, String indent) {
    // The output depends on the type of the node
    switch (node.getNodeType()) {
    case Node.DOCUMENT_NODE: { // If its a Document node
        Document doc = (Document) node;
        out.println(indent + "<?xml version='1.0'?>"); // Output header
        Node child = doc.getFirstChild(); // Get the first node
        while (child != null) { // Loop 'till no more nodes
            write(child, indent); // Output node
            child = child.getNextSibling(); // Get next node
        }
        break;
    }
    case Node.DOCUMENT_TYPE_NODE: { // It is a <!DOCTYPE> tag
        DocumentType doctype = (DocumentType) node;
        // Note that the DOM Level 1 does not give us information about
        // the the public or system ids of the doctype, so we can't output
        // a complete <!DOCTYPE> tag here. We can do better with Level 2.
        out.println("<!DOCTYPE " + doctype.getName() + ">");
        break;
    }
    case Node.ELEMENT_NODE: { // Most nodes are Elements
        Element elt = (Element) node;
        out.print(indent + "<" + elt.getTagName()); // Begin start tag
        NamedNodeMap attrs = elt.getAttributes(); // Get attributes
        for (int i = 0; i < attrs.getLength(); i++) { // Loop through them
            Node a = attrs.item(i);
            out.print(" " + a.getNodeName() + "='" + // Print attr. name
                    fixup(a.getNodeValue()) + "'"); // Print attr. value
        }
        out.println(">"); // Finish start tag

        String newindent = indent + "    "; // Increase indent
        Node child = elt.getFirstChild(); // Get child
        while (child != null) { // Loop
            write(child, newindent); // Output child
            child = child.getNextSibling(); // Get next child
        }

        out.println(indent + "</" + // Output end tag
                elt.getTagName() + ">");
        break;
    }
    case Node.TEXT_NODE: { // Plain text node
        Text textNode = (Text) node;
        String text = textNode.getData().trim(); // Strip off space
        if ((text != null) && text.length() > 0) // If non-empty
            out.println(indent + fixup(text)); // print text
        break;
    }
    case Node.PROCESSING_INSTRUCTION_NODE: { // Handle PI nodes
        ProcessingInstruction pi = (ProcessingInstruction) node;
        out.println(indent + "<?" + pi.getTarget() + " " + pi.getData() + "?>");
        break;
    }
    case Node.ENTITY_REFERENCE_NODE: { // Handle entities
        out.println(indent + "&" + node.getNodeName() + ";");
        break;
    }
    case Node.CDATA_SECTION_NODE: { // Output CDATA sections
        CDATASection cdata = (CDATASection) node;
        // Careful! Don't put a CDATA section in the program itself!
        out.println(indent + "<" + "![CDATA[" + cdata.getData() + "]]" + ">");
        break;
    }
    case Node.COMMENT_NODE: { // Comments
        Comment c = (Comment) node;
        out.println(indent + "<!--" + c.getData() + "-->");
        break;
    }
    default: // Hopefully, this won't happen too much!
        System.err.println("Ignoring node: " + node.getClass().getName());
        break;
    }
}