Example usage for org.w3c.dom Text getData

List of usage examples for org.w3c.dom Text getData

Introduction

In this page you can find the example usage for org.w3c.dom Text getData.

Prototype

public String getData() throws DOMException;

Source Link

Document

The character data of the node that implements this interface.

Usage

From source file:Main.java

/**
 * get the  text string in an element (eg interspersed between child elements), 
 * or "" if there is none or if the Element is null.
 * Tries to ignore white space text; but does not succeed.
 *//*from ww  w .ja v a  2 s . c o  m*/
public static String getText(Element el) {
    String res = "";
    if (el != null)
        try {
            el.normalize(); // does not help recognise white space
            NodeList nodes = el.getChildNodes();
            for (int i = 0; i < nodes.getLength(); i++)
                if (nodes.item(i) instanceof Text) {
                    Text text = (Text) nodes.item(i);
                    // this filter seems to make no difference
                    if (!text.isElementContentWhitespace()) {
                        String tData = text.getData();
                        // this seems to be an effective way to catch pure white space
                        StringTokenizer nonWhiteSpace = new StringTokenizer(tData, "\n \t");
                        if (nonWhiteSpace.countTokens() > 0)
                            res = res + tData;
                    }
                }
        } catch (Exception e) {
            System.out.println("Text failure: " + e.getMessage());
        }
    return res;
}

From source file:Main.java

/**
 * Extracts the String content of a TXT element.
 *
 * @param parentNode the node containing the data that we'd like to get.
 * @return the string contained by the node or null if none existed.
 *///w  w  w  .  ja  va  2s.  c o m
public static String getText(Element parentNode) {
    Text text = getTextNode(parentNode);

    if (text == null) {
        return null;
    } else {
        return text.getData();
    }
}

From source file:Main.java

/**
 * Locate the first text node at any level below the given node. If the
 * ignoreEmpty flag is true, we will ignore text nodes that contain only
 * whitespace characteres./*from  ww  w . ja v a 2  s .c  o  m*/
 * <p/>
 * Note that if you're trying to extract element content, you probably don't
 * want this since parser's can break up pcdata into multiple adjacent text
 * nodes. See getContent() for a more useful method.
 */
private static Text findText(Node node, boolean ignoreEmpty) {

    Text found = null;

    if (node != null) {

        if (node.getNodeType() == Node.TEXT_NODE || node.getNodeType() == Node.CDATA_SECTION_NODE) {

            Text t = (Text) node;
            if (!ignoreEmpty) {
                found = t;
            } else {
                String s = t.getData().trim();
                if (s.length() > 0) {
                    found = t;
                }
            }
        }

        if (found == null) {

            for (Node child = node.getFirstChild(); child != null
                    && found == null; child = child.getNextSibling()) {

                found = findText(child, ignoreEmpty);
            }
        }
    }

    return found;
}

From source file:fr.aliasource.webmail.server.proxy.client.http.DOMUtils.java

public static String[] getTexts(Element root, String elementName) {
    NodeList list = root.getElementsByTagName(elementName);
    String[] ret = new String[list.getLength()];
    for (int i = 0; i < list.getLength(); i++) {
        Text txt = (Text) list.item(i).getFirstChild();
        if (txt != null) {
            ret[i] = txt.getData();
        } else {//from w  w w  .  jav a2 s  .  com
            ret[i] = ""; //$NON-NLS-1$
        }
    }
    return ret;
}

From source file:fr.aliasource.webmail.server.proxy.client.http.DOMUtils.java

public static String getElementText(Element node) {
    Text txtElem = (Text) node.getFirstChild();
    if (txtElem == null) {
        return null;
    }/*from   w w w . ja v  a2 s . c om*/
    return txtElem.getData();
}

From source file:Main.java

/**
 * Return the content of the given element.
 * <p/>/*from ww  w .  j  a v  a2  s  . co m*/
 * We will descend to an arbitrary depth looking for the first text node.
 * <p/>
 * Note that the parser may break what was originally a single string of
 * pcdata into multiple adjacent text nodes. Xerces appears to do this when
 * it encounters a '$' in the text, not sure if there is specified behavior,
 * or if its parser specific.
 * <p/>
 * Here, we will congeal adjacent text nodes.
 * <p/>
 * We will NOT ignore text nodes that have only whitespace.
 */
public static String getContent(Element e) {

    String content = null;

    if (e != null) {

        // find the first inner text node,
        Text t = findText(e, false);
        if (t != null) {
            // we have at least some text
            StringBuilder b = new StringBuilder();
            while (t != null) {
                b.append(t.getData());
                Node n = t.getNextSibling();

                t = null;
                if (n != null && ((n.getNodeType() == Node.TEXT_NODE)
                        || (n.getNodeType() == Node.CDATA_SECTION_NODE))) {
                    t = (Text) n;
                }
            }
            content = b.toString();
        }
    }

    return content;
}

From source file:DomUtil.java

/**
 * Returns the text content of a DOM <code>Element</code>.
 * //  w  ww  .  j a  v  a  2s . c  o m
 * @param element The <code>Element</code> to analyze.
 */
public static String getElementText(Element element) {
    NodeList children = element.getChildNodes();
    int childCount = children.getLength();
    for (int index = 0; index < childCount; ++index) {
        if (children.item(index) instanceof Text) {
            Text text = (Text) children.item(index);
            return text.getData();
        }
    }
    return null;
}

From source file:XMLUtils.java

/**
 * Returns the text value of an element.
 * @param el// w ww .  ja  v a2  s  .c  om
 * @return
 */
public static String getTextValue(Element el) {
    StringBuffer b = new StringBuffer();
    // retrieve the text node child
    NodeList nl = el.getChildNodes();
    int len = nl.getLength();
    for (int i = 0; i < len; i++) {
        Node n = nl.item(i);
        if (n instanceof Text) {
            Text t = (Text) n;
            b.append(t.getData());
        }
    }
    // trim the result, ignoring the first spaces and cariage return
    int iFirst = 0;
    for (; iFirst < b.length(); iFirst++) {
        char c = b.charAt(iFirst);
        if (c != ' ' && c != '\r' && c != '\n' && c != '\t') {
            break;
        }
    }
    // start by the end as well
    int iLast = b.length() - 1;
    for (; iLast >= 0; iLast--) {
        char c = b.charAt(iLast);
        if (c != ' ' && c != '\r' && c != '\n' && c != '\t') {
            break;
        }
    }
    return b.substring(iFirst, iLast + 1);
}

From source file:edu.lternet.pasta.portal.search.BrowseGroup.java

public static BrowseGroup generateKeywordCache() {
    BrowseGroup controlledVocabulary = new BrowseGroup("Controlled Vocabulary");
    BrowseGroup lterSiteCache = generateLterSiteCache();
    controlledVocabulary.addBrowseGroup(lterSiteCache);

    try {/*from   w  w w.  j  a  v  a 2  s.co m*/
        String topTermsXML = ControlledVocabularyClient.webServiceFetchTopTerms();
        DocumentBuilderFactory documentBuilderFactory = DocumentBuilderFactory.newInstance();
        DocumentBuilder documentBuilder = documentBuilderFactory.newDocumentBuilder();
        InputStream inputStream = IOUtils.toInputStream(topTermsXML, "UTF-8");
        Document document = documentBuilder.parse(inputStream);
        Element documentElement = document.getDocumentElement();
        NodeList documentNodeList = documentElement.getElementsByTagName("term");

        for (int i = 0; i < documentNodeList.getLength(); i++) {
            Node documentNode = documentNodeList.item(i);
            NodeList childNodes = documentNode.getChildNodes();
            String termId = null;
            String value = null;

            for (int j = 0; j < childNodes.getLength(); j++) {

                Node childNode = childNodes.item(j);
                if (childNode instanceof Element) {
                    Element childElement = (Element) childNode;
                    if (childElement.getTagName().equals("term_id")) {
                        Text text = (Text) childElement.getFirstChild();
                        termId = text.getData().trim();
                    } else if (childElement.getTagName().equals("string")) {
                        Text text = (Text) childElement.getFirstChild();
                        value = text.getData().trim();
                    }
                }
            }

            BrowseGroup topTerm = new BrowseGroup(value);
            controlledVocabulary.addBrowseGroup(topTerm);
            topTerm.setTermId(termId);
            topTerm.setHasMoreDown("1");
            topTerm.addFetchDownElements();
        }
    } catch (Exception e) {
        logger.error("Exception:\n" + e.getMessage());
        e.printStackTrace();
        /*
         * By returning null, we let callers know that there was a problem
         * refreshing the browse cache, so callers will know not to
         * overwrite the previous results.
         */
        controlledVocabulary = null;
    }

    return controlledVocabulary;
}

From source file:XMLDocumentWriter.java

/**
 * Output the specified DOM Node object, printing it using the specified
 * indentation string//from ww  w  .j  a v a  2 s. com
 */
public void write(Node node, String indent) {
    // The output depends on the type of the node
    switch (node.getNodeType()) {
    case Node.DOCUMENT_NODE: { // If its a Document node
        Document doc = (Document) node;
        out.println(indent + "<?xml version='1.0'?>"); // Output header
        Node child = doc.getFirstChild(); // Get the first node
        while (child != null) { // Loop 'till no more nodes
            write(child, indent); // Output node
            child = child.getNextSibling(); // Get next node
        }
        break;
    }
    case Node.DOCUMENT_TYPE_NODE: { // It is a <!DOCTYPE> tag
        DocumentType doctype = (DocumentType) node;
        // Note that the DOM Level 1 does not give us information about
        // the the public or system ids of the doctype, so we can't output
        // a complete <!DOCTYPE> tag here. We can do better with Level 2.
        out.println("<!DOCTYPE " + doctype.getName() + ">");
        break;
    }
    case Node.ELEMENT_NODE: { // Most nodes are Elements
        Element elt = (Element) node;
        out.print(indent + "<" + elt.getTagName()); // Begin start tag
        NamedNodeMap attrs = elt.getAttributes(); // Get attributes
        for (int i = 0; i < attrs.getLength(); i++) { // Loop through them
            Node a = attrs.item(i);
            out.print(" " + a.getNodeName() + "='" + // Print attr. name
                    fixup(a.getNodeValue()) + "'"); // Print attr. value
        }
        out.println(">"); // Finish start tag

        String newindent = indent + "    "; // Increase indent
        Node child = elt.getFirstChild(); // Get child
        while (child != null) { // Loop
            write(child, newindent); // Output child
            child = child.getNextSibling(); // Get next child
        }

        out.println(indent + "</" + // Output end tag
                elt.getTagName() + ">");
        break;
    }
    case Node.TEXT_NODE: { // Plain text node
        Text textNode = (Text) node;
        String text = textNode.getData().trim(); // Strip off space
        if ((text != null) && text.length() > 0) // If non-empty
            out.println(indent + fixup(text)); // print text
        break;
    }
    case Node.PROCESSING_INSTRUCTION_NODE: { // Handle PI nodes
        ProcessingInstruction pi = (ProcessingInstruction) node;
        out.println(indent + "<?" + pi.getTarget() + " " + pi.getData() + "?>");
        break;
    }
    case Node.ENTITY_REFERENCE_NODE: { // Handle entities
        out.println(indent + "&" + node.getNodeName() + ";");
        break;
    }
    case Node.CDATA_SECTION_NODE: { // Output CDATA sections
        CDATASection cdata = (CDATASection) node;
        // Careful! Don't put a CDATA section in the program itself!
        out.println(indent + "<" + "![CDATA[" + cdata.getData() + "]]" + ">");
        break;
    }
    case Node.COMMENT_NODE: { // Comments
        Comment c = (Comment) node;
        out.println(indent + "<!--" + c.getData() + "-->");
        break;
    }
    default: // Hopefully, this won't happen too much!
        System.err.println("Ignoring node: " + node.getClass().getName());
        break;
    }
}