List of usage examples for org.w3c.dom Text getData
public String getData() throws DOMException;
From source file:Main.java
/** * get the text string in an element (eg interspersed between child elements), * or "" if there is none or if the Element is null. * Tries to ignore white space text; but does not succeed. *//*from ww w .ja v a 2 s . c o m*/ public static String getText(Element el) { String res = ""; if (el != null) try { el.normalize(); // does not help recognise white space NodeList nodes = el.getChildNodes(); for (int i = 0; i < nodes.getLength(); i++) if (nodes.item(i) instanceof Text) { Text text = (Text) nodes.item(i); // this filter seems to make no difference if (!text.isElementContentWhitespace()) { String tData = text.getData(); // this seems to be an effective way to catch pure white space StringTokenizer nonWhiteSpace = new StringTokenizer(tData, "\n \t"); if (nonWhiteSpace.countTokens() > 0) res = res + tData; } } } catch (Exception e) { System.out.println("Text failure: " + e.getMessage()); } return res; }
From source file:Main.java
/** * Extracts the String content of a TXT element. * * @param parentNode the node containing the data that we'd like to get. * @return the string contained by the node or null if none existed. *///w w w . ja va 2s. c o m public static String getText(Element parentNode) { Text text = getTextNode(parentNode); if (text == null) { return null; } else { return text.getData(); } }
From source file:Main.java
/** * Locate the first text node at any level below the given node. If the * ignoreEmpty flag is true, we will ignore text nodes that contain only * whitespace characteres./*from ww w . ja v a 2 s .c o m*/ * <p/> * Note that if you're trying to extract element content, you probably don't * want this since parser's can break up pcdata into multiple adjacent text * nodes. See getContent() for a more useful method. */ private static Text findText(Node node, boolean ignoreEmpty) { Text found = null; if (node != null) { if (node.getNodeType() == Node.TEXT_NODE || node.getNodeType() == Node.CDATA_SECTION_NODE) { Text t = (Text) node; if (!ignoreEmpty) { found = t; } else { String s = t.getData().trim(); if (s.length() > 0) { found = t; } } } if (found == null) { for (Node child = node.getFirstChild(); child != null && found == null; child = child.getNextSibling()) { found = findText(child, ignoreEmpty); } } } return found; }
From source file:fr.aliasource.webmail.server.proxy.client.http.DOMUtils.java
public static String[] getTexts(Element root, String elementName) { NodeList list = root.getElementsByTagName(elementName); String[] ret = new String[list.getLength()]; for (int i = 0; i < list.getLength(); i++) { Text txt = (Text) list.item(i).getFirstChild(); if (txt != null) { ret[i] = txt.getData(); } else {//from w w w . jav a2 s . com ret[i] = ""; //$NON-NLS-1$ } } return ret; }
From source file:fr.aliasource.webmail.server.proxy.client.http.DOMUtils.java
public static String getElementText(Element node) { Text txtElem = (Text) node.getFirstChild(); if (txtElem == null) { return null; }/*from w w w . ja v a2 s . c om*/ return txtElem.getData(); }
From source file:Main.java
/** * Return the content of the given element. * <p/>/*from ww w . j a v a2 s . co m*/ * We will descend to an arbitrary depth looking for the first text node. * <p/> * Note that the parser may break what was originally a single string of * pcdata into multiple adjacent text nodes. Xerces appears to do this when * it encounters a '$' in the text, not sure if there is specified behavior, * or if its parser specific. * <p/> * Here, we will congeal adjacent text nodes. * <p/> * We will NOT ignore text nodes that have only whitespace. */ public static String getContent(Element e) { String content = null; if (e != null) { // find the first inner text node, Text t = findText(e, false); if (t != null) { // we have at least some text StringBuilder b = new StringBuilder(); while (t != null) { b.append(t.getData()); Node n = t.getNextSibling(); t = null; if (n != null && ((n.getNodeType() == Node.TEXT_NODE) || (n.getNodeType() == Node.CDATA_SECTION_NODE))) { t = (Text) n; } } content = b.toString(); } } return content; }
From source file:DomUtil.java
/** * Returns the text content of a DOM <code>Element</code>. * // w ww . j a v a 2s . c o m * @param element The <code>Element</code> to analyze. */ public static String getElementText(Element element) { NodeList children = element.getChildNodes(); int childCount = children.getLength(); for (int index = 0; index < childCount; ++index) { if (children.item(index) instanceof Text) { Text text = (Text) children.item(index); return text.getData(); } } return null; }
From source file:XMLUtils.java
/** * Returns the text value of an element. * @param el// w ww . ja v a2 s .c om * @return */ public static String getTextValue(Element el) { StringBuffer b = new StringBuffer(); // retrieve the text node child NodeList nl = el.getChildNodes(); int len = nl.getLength(); for (int i = 0; i < len; i++) { Node n = nl.item(i); if (n instanceof Text) { Text t = (Text) n; b.append(t.getData()); } } // trim the result, ignoring the first spaces and cariage return int iFirst = 0; for (; iFirst < b.length(); iFirst++) { char c = b.charAt(iFirst); if (c != ' ' && c != '\r' && c != '\n' && c != '\t') { break; } } // start by the end as well int iLast = b.length() - 1; for (; iLast >= 0; iLast--) { char c = b.charAt(iLast); if (c != ' ' && c != '\r' && c != '\n' && c != '\t') { break; } } return b.substring(iFirst, iLast + 1); }
From source file:edu.lternet.pasta.portal.search.BrowseGroup.java
public static BrowseGroup generateKeywordCache() { BrowseGroup controlledVocabulary = new BrowseGroup("Controlled Vocabulary"); BrowseGroup lterSiteCache = generateLterSiteCache(); controlledVocabulary.addBrowseGroup(lterSiteCache); try {/*from w w w. j a v a 2 s.co m*/ String topTermsXML = ControlledVocabularyClient.webServiceFetchTopTerms(); DocumentBuilderFactory documentBuilderFactory = DocumentBuilderFactory.newInstance(); DocumentBuilder documentBuilder = documentBuilderFactory.newDocumentBuilder(); InputStream inputStream = IOUtils.toInputStream(topTermsXML, "UTF-8"); Document document = documentBuilder.parse(inputStream); Element documentElement = document.getDocumentElement(); NodeList documentNodeList = documentElement.getElementsByTagName("term"); for (int i = 0; i < documentNodeList.getLength(); i++) { Node documentNode = documentNodeList.item(i); NodeList childNodes = documentNode.getChildNodes(); String termId = null; String value = null; for (int j = 0; j < childNodes.getLength(); j++) { Node childNode = childNodes.item(j); if (childNode instanceof Element) { Element childElement = (Element) childNode; if (childElement.getTagName().equals("term_id")) { Text text = (Text) childElement.getFirstChild(); termId = text.getData().trim(); } else if (childElement.getTagName().equals("string")) { Text text = (Text) childElement.getFirstChild(); value = text.getData().trim(); } } } BrowseGroup topTerm = new BrowseGroup(value); controlledVocabulary.addBrowseGroup(topTerm); topTerm.setTermId(termId); topTerm.setHasMoreDown("1"); topTerm.addFetchDownElements(); } } catch (Exception e) { logger.error("Exception:\n" + e.getMessage()); e.printStackTrace(); /* * By returning null, we let callers know that there was a problem * refreshing the browse cache, so callers will know not to * overwrite the previous results. */ controlledVocabulary = null; } return controlledVocabulary; }
From source file:XMLDocumentWriter.java
/** * Output the specified DOM Node object, printing it using the specified * indentation string//from ww w .j a v a 2 s. com */ public void write(Node node, String indent) { // The output depends on the type of the node switch (node.getNodeType()) { case Node.DOCUMENT_NODE: { // If its a Document node Document doc = (Document) node; out.println(indent + "<?xml version='1.0'?>"); // Output header Node child = doc.getFirstChild(); // Get the first node while (child != null) { // Loop 'till no more nodes write(child, indent); // Output node child = child.getNextSibling(); // Get next node } break; } case Node.DOCUMENT_TYPE_NODE: { // It is a <!DOCTYPE> tag DocumentType doctype = (DocumentType) node; // Note that the DOM Level 1 does not give us information about // the the public or system ids of the doctype, so we can't output // a complete <!DOCTYPE> tag here. We can do better with Level 2. out.println("<!DOCTYPE " + doctype.getName() + ">"); break; } case Node.ELEMENT_NODE: { // Most nodes are Elements Element elt = (Element) node; out.print(indent + "<" + elt.getTagName()); // Begin start tag NamedNodeMap attrs = elt.getAttributes(); // Get attributes for (int i = 0; i < attrs.getLength(); i++) { // Loop through them Node a = attrs.item(i); out.print(" " + a.getNodeName() + "='" + // Print attr. name fixup(a.getNodeValue()) + "'"); // Print attr. value } out.println(">"); // Finish start tag String newindent = indent + " "; // Increase indent Node child = elt.getFirstChild(); // Get child while (child != null) { // Loop write(child, newindent); // Output child child = child.getNextSibling(); // Get next child } out.println(indent + "</" + // Output end tag elt.getTagName() + ">"); break; } case Node.TEXT_NODE: { // Plain text node Text textNode = (Text) node; String text = textNode.getData().trim(); // Strip off space if ((text != null) && text.length() > 0) // If non-empty out.println(indent + fixup(text)); // print text break; } case Node.PROCESSING_INSTRUCTION_NODE: { // Handle PI nodes ProcessingInstruction pi = (ProcessingInstruction) node; out.println(indent + "<?" + pi.getTarget() + " " + pi.getData() + "?>"); break; } case Node.ENTITY_REFERENCE_NODE: { // Handle entities out.println(indent + "&" + node.getNodeName() + ";"); break; } case Node.CDATA_SECTION_NODE: { // Output CDATA sections CDATASection cdata = (CDATASection) node; // Careful! Don't put a CDATA section in the program itself! out.println(indent + "<" + "![CDATA[" + cdata.getData() + "]]" + ">"); break; } case Node.COMMENT_NODE: { // Comments Comment c = (Comment) node; out.println(indent + "<!--" + c.getData() + "-->"); break; } default: // Hopefully, this won't happen too much! System.err.println("Ignoring node: " + node.getClass().getName()); break; } }