Listing the Contents of Parse Tree Nodes: Using the DOM Parser to Extract XML Document Data : DOM Parser « XML « Java Tutorial





/*
Code revised from
Java, XML, and JAXP by Arthur Griffith John Wiley & Sons 2002

*/


import java.io.IOException;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;

import org.w3c.dom.Attr;
import org.w3c.dom.CDATASection;
import org.w3c.dom.Comment;
import org.w3c.dom.Document;
import org.w3c.dom.DocumentFragment;
import org.w3c.dom.DocumentType;
import org.w3c.dom.Element;
import org.w3c.dom.Entity;
import org.w3c.dom.EntityReference;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.w3c.dom.Notation;
import org.w3c.dom.ProcessingInstruction;
import org.w3c.dom.Text;
import org.xml.sax.ErrorHandler;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;

public class DOMDump {
  static public void main(String[] arg) {
    boolean validate = true;

    DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
    dbf.setValidating(validate);
    dbf.setNamespaceAware(true);
    dbf.setIgnoringElementContentWhitespace(true);

    // Parse the input to produce a parse tree with its root
    // in the form of a Document object
    Document doc = null;
    try {
      DocumentBuilder builder = dbf.newDocumentBuilder();
      builder.setErrorHandler(new MyErrorHandler());
      InputSource is = new InputSource("personWithDTD.xml");
      doc = builder.parse(is);
    } catch (SAXException e) {
      System.exit(1);
    } catch (ParserConfigurationException e) {
      System.err.println(e);
      System.exit(1);
    } catch (IOException e) {
      System.err.println(e);
      System.exit(1);
    }
    dump(doc);
  }

  private static void dump(Document doc) {
    dumpLoop((Node) doc, "");
  }

  private static void dumpLoop(Node node, String indent) {
    switch (node.getNodeType()) {
    case Node.ATTRIBUTE_NODE:
      dumpAttributeNode((Attr) node, indent);
      break;
    case Node.CDATA_SECTION_NODE:
      dumpCDATASectionNode((CDATASection) node, indent);
      break;
    case Node.COMMENT_NODE:
      dumpCommentNode((Comment) node, indent);
      break;
    case Node.DOCUMENT_NODE:
      dumpDocument((Document) node, indent);
      break;
    case Node.DOCUMENT_FRAGMENT_NODE:
      dumpDocumentFragment((DocumentFragment) node, indent);
      break;
    case Node.DOCUMENT_TYPE_NODE:
      dumpDocumentType((DocumentType) node, indent);
      break;
    case Node.ELEMENT_NODE:
      dumpElement((Element) node, indent);
      break;
    case Node.ENTITY_NODE:
      dumpEntityNode((Entity) node, indent);
      break;
    case Node.ENTITY_REFERENCE_NODE:
      dumpEntityReferenceNode((EntityReference) node, indent);
      break;
    case Node.NOTATION_NODE:
      dumpNotationNode((Notation) node, indent);
      break;
    case Node.PROCESSING_INSTRUCTION_NODE:
      dumpProcessingInstructionNode((ProcessingInstruction) node, indent);
      break;
    case Node.TEXT_NODE:
      dumpTextNode((Text) node, indent);
      break;
    default:
      System.out.println(indent + "Unknown node");
      break;
    }

    NodeList list = node.getChildNodes();
    for (int i = 0; i < list.getLength(); i++)
      dumpLoop(list.item(i), indent + "   ");
  }

  /* Display the contents of a ATTRIBUTE_NODE */
  private static void dumpAttributeNode(Attr node, String indent) {
    System.out.println(indent + "ATTRIBUTE " + node.getName() + "=\"" + node.getValue() + "\"");
  }

  /* Display the contents of a CDATA_SECTION_NODE */
  private static void dumpCDATASectionNode(CDATASection node, String indent) {
    System.out.println(indent + "CDATA SECTION length=" + node.getLength());
    System.out.println(indent + "\"" + node.getData() + "\"");
  }

  /* Display the contents of a COMMENT_NODE */
  private static void dumpCommentNode(Comment node, String indent) {
    System.out.println(indent + "COMMENT length=" + node.getLength());
    System.out.println(indent + "  " + node.getData());
  }

  /* Display the contents of a DOCUMENT_NODE */
  private static void dumpDocument(Document node, String indent) {
    System.out.println(indent + "DOCUMENT");
  }

  /* Display the contents of a DOCUMENT_FRAGMENT_NODE */
  private static void dumpDocumentFragment(DocumentFragment node, String indent) {
    System.out.println(indent + "DOCUMENT FRAGMENT");
  }

  /* Display the contents of a DOCUMENT_TYPE_NODE */
  private static void dumpDocumentType(DocumentType node, String indent) {
    System.out.println(indent + "DOCUMENT_TYPE: " + node.getName());
    if (node.getPublicId() != null)
      System.out.println(indent + " Public ID: " + node.getPublicId());
    if (node.getSystemId() != null)
      System.out.println(indent + " System ID: " + node.getSystemId());
    NamedNodeMap entities = node.getEntities();
    if (entities.getLength() > 0) {
      for (int i = 0; i < entities.getLength(); i++) {
        dumpLoop(entities.item(i), indent + "  ");
      }
    }
    NamedNodeMap notations = node.getNotations();
    if (notations.getLength() > 0) {
      for (int i = 0; i < notations.getLength(); i++)
        dumpLoop(notations.item(i), indent + "  ");
    }
  }

  /* Display the contents of a ELEMENT_NODE */
  private static void dumpElement(Element node, String indent) {
    System.out.println(indent + "ELEMENT: " + node.getTagName());
    NamedNodeMap nm = node.getAttributes();
    for (int i = 0; i < nm.getLength(); i++)
      dumpLoop(nm.item(i), indent + "  ");
  }

  /* Display the contents of a ENTITY_NODE */
  private static void dumpEntityNode(Entity node, String indent) {
    System.out.println(indent + "ENTITY: " + node.getNodeName());
  }

  /* Display the contents of a ENTITY_REFERENCE_NODE */
  private static void dumpEntityReferenceNode(EntityReference node, String indent) {
    System.out.println(indent + "ENTITY REFERENCE: " + node.getNodeName());
  }

  /* Display the contents of a NOTATION_NODE */
  private static void dumpNotationNode(Notation node, String indent) {
    System.out.println(indent + "NOTATION");
    System.out.print(indent + "  " + node.getNodeName() + "=");
    if (node.getPublicId() != null)
      System.out.println(node.getPublicId());
    else
      System.out.println(node.getSystemId());
  }

  /* Display the contents of a PROCESSING_INSTRUCTION_NODE */
  private static void dumpProcessingInstructionNode(ProcessingInstruction node, String indent) {
    System.out.println(indent + "PI: target=" + node.getTarget());
    System.out.println(indent + "  " + node.getData());
  }

  /* Display the contents of a TEXT_NODE */
  private static void dumpTextNode(Text node, String indent) {
    System.out.println(indent + "TEXT length=" + node.getLength());
    System.out.println(indent + "  " + node.getData());
  }
}

class MyErrorHandler implements ErrorHandler {
  public void warning(SAXParseException e) throws SAXException {
    show("Warning", e);
    throw (e);
  }

  public void error(SAXParseException e) throws SAXException {
    show("Error", e);
    throw (e);
  }

  public void fatalError(SAXParseException e) throws SAXException {
    show("Fatal Error", e);
    throw (e);
  }

  private void show(String type, SAXParseException e) {
    System.out.println(type + ": " + e.getMessage());
    System.out.println("Line " + e.getLineNumber() + " Column " + e.getColumnNumber());
    System.out.println("System ID: " + e.getSystemId());
  }
}
//File: personWithDTD.xml
<?xml version="1.0" standalone="yes"?>

<!-- This document is both well formed and valid -->

<!DOCTYPE folks [
<!ELEMENT folks (person)*>
<!ELEMENT person (name, phone, email)>
<!ELEMENT name (#PCDATA)>
<!ELEMENT phone (#PCDATA)>
<!ELEMENT email (#PCDATA)>
]>

<folks>
    <person>
        <name>B D</name>
        <phone>999 555-8888</phone>
        <email>b@xyz.net</email>
    </person>
</folks>
DOCUMENT
COMMENT length=45
   This document is both well formed and valid 
DOCUMENT_TYPE: folks
ELEMENT: folks
   ELEMENT: person
      ELEMENT: name
         TEXT length=15
           Bertha D. Blues
      ELEMENT: phone
         TEXT length=12
           999 555-8888
      ELEMENT: email
         TEXT length=14
           b@xyz.net










33.2.DOM Parser
33.2.1.DOM Objects That Make Up the Parse Tree
33.2.2.A DOM Error Checker: Using DOM for Syntax Checking
33.2.3.A DOM Parse Tree Lister
33.2.4.Listing the Contents of Parse Tree Nodes: Using the DOM Parser to Extract XML Document Data
33.2.5.Ignorable Whitespace and Element Content
33.2.6.Remove the element from parent
33.2.7.Visiting All the Elements in a DOM Document
33.2.8.Getting the Root Element in a DOM Document
33.2.9.Getting a Node Relative to Another Node in a DOM Document
33.2.10.Getting the Notations in a DOM Document
33.2.11.Getting the Declared Entities in a DOM Document
33.2.12.Getting the Value of an Entity Reference in a DOM Document
33.2.13.Getting a DOM Element by Id
33.2.14.Converting an XML Fragment into a DOM Fragment
33.2.15.Parse an XML string: Using DOM and a StringReader.
33.2.16.Use DOM L3 DOMBuilder, DOMBuilderFilter DOMWriter and other DOM L3 functionality to preparse, revalidate and safe document.
33.2.17.Read XML as DOM
33.2.18.Create DOM Document out of string
33.2.19.Source To InputSource