Java HTML / XML How to - Extract XML table data using XPath








Question

We would like to know how to extract XML table data using XPath.

Answer

import java.io.File;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
/* w w  w .j a  v a2s.c  om*/
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpression;
import javax.xml.xpath.XPathFactory;

import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;

public class Main {
  public static void main(String[] args) throws Exception {
    DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
    DocumentBuilder builder = factory.newDocumentBuilder();
    Document doc = builder.parse(new File("Table.xml"));

    XPathFactory xFactory = XPathFactory.newInstance();
    XPath path = xFactory.newXPath();
    XPathExpression exp = path.compile("/tables/table");
    NodeList nlTables = (NodeList) exp.evaluate(doc, XPathConstants.NODESET);
    for (int tblIndex = 0; tblIndex < nlTables.getLength(); tblIndex++) {
      Node table = nlTables.item(tblIndex);
      Node nAtt = table.getAttributes().getNamedItem("title");
      System.out.println(nAtt.getTextContent());
      exp = path.compile("headings/heading");
      NodeList nlHeaders = (NodeList) exp.evaluate(table,
          XPathConstants.NODESET);
      Set<String> headers = new HashSet<String>(25);
      for (int index = 0; index < nlHeaders.getLength(); index++) {
        headers.add(nlHeaders.item(index).getTextContent().trim());
      }
      for (String header : headers) {
        System.out.println(header);
      }
      exp = path.compile("tablebody/tablerow");
      NodeList nlRows = (NodeList) exp.evaluate(table, XPathConstants.NODESET);
      for (int index = 0; index < nlRows.getLength(); index++) {
        Node rowNode = nlRows.item(index);
        exp = path.compile("tablecell/item");
        NodeList nlValues = (NodeList) exp.evaluate(rowNode,
            XPathConstants.NODESET);
        List<String> values = new ArrayList<String>(25);
        for (int valueIndex = 0; valueIndex < nlValues.getLength(); valueIndex++) {
          values.add(nlValues.item(valueIndex).getTextContent().trim());
        }
        for (String value : values) {
          System.out.println(value);
        }
       }
     }
  }
}

Table.xml

<tables>/*  www. j a v  a 2s . c  o m*/
    <table index="1" title="Record" ref="results">
        <headings>
            <heading>A</heading>
            <heading>B</heading>
            <heading>C</heading>
            <heading>D</heading>
            <heading>E</heading>
            <heading>F</heading>
        </headings>
        <tablebody>
            <tablerow>
                <tablecell>
                    <item>10</item>
                </tablecell>
                <tablecell>
                    <item>12</item>
                </tablecell>
                <tablecell>
                    <item>13</item>
                </tablecell>
                <tablecell>
                    <item>15</item>
                </tablecell>
                <tablecell>
                    <item>B</item>
                </tablecell>
                <tablecell>
                    <item>B</item>
                </tablecell>
            </tablerow>
        </tablebody>
    </table>
    <table index="2" title="Emp" ref="Results">
        <headings>
            <heading>A</heading>
            <heading>B</heading>
            <heading>C</heading>
            <heading>D</heading>
            <heading>E</heading>
            <heading>F</heading>
        </headings>
        <tablebody>
            <tablerow>
                <tablecell>
                    <item>15</item>
                </tablecell>
                <tablecell>
                    <item>15</item>
                </tablecell>
                <tablecell>
                    <item>15</item>
                </tablecell>
                <tablecell>
                    <item>14</item>
                </tablecell>
                <tablecell>
                    <item>A</item>
                </tablecell>
                <tablecell>
                    <item>A</item>
                </tablecell>
            </tablerow>
            <tablerow>
                <tablecell>
                    <item>10</item>
                </tablecell>
                <tablecell>
                    <item>5</item>
                </tablecell>
                <tablecell>
                    <item>9</item>
                </tablecell>
                <tablecell>
                    <item>11</item>
                </tablecell>
                <tablecell>
                    <item>C</item>
                </tablecell>
                <tablecell>
                    <item>C</item>
                </tablecell>
            </tablerow>
        </tablebody>
    </table>
</tables>