1   // ========================================================================
2   // Copyright 2004-2005 Mort Bay Consulting Pty. Ltd.
3   // ------------------------------------------------------------------------
4   // Licensed under the Apache License, Version 2.0 (the "License");
5   // you may not use this file except in compliance with the License.
6   // You may obtain a copy of the License at 
7   // http://www.apache.org/licenses/LICENSE-2.0
8   // Unless required by applicable law or agreed to in writing, software
9   // distributed under the License is distributed on an "AS IS" BASIS,
10  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11  // See the License for the specific language governing permissions and
12  // limitations under the License.
13  // ========================================================================
14  
15  package org.mortbay.xml;
16  
17  import java.io.File;
18  import java.io.IOException;
19  import java.io.InputStream;
20  import java.net.URL;
21  import java.util.AbstractList;
22  import java.util.ArrayList;
23  import java.util.HashMap;
24  import java.util.Iterator;
25  import java.util.Map;
26  import java.util.NoSuchElementException;
27  import java.util.Stack;
28  import java.util.StringTokenizer;
29  
30  import javax.xml.parsers.SAXParser;
31  import javax.xml.parsers.SAXParserFactory;
32  
33  import org.mortbay.log.Log;
34  import org.mortbay.util.LazyList;
35  import org.xml.sax.Attributes;
36  import org.xml.sax.ContentHandler;
37  import org.xml.sax.InputSource;
38  import org.xml.sax.SAXException;
39  import org.xml.sax.SAXParseException;
40  import org.xml.sax.XMLReader;
41  import org.xml.sax.helpers.DefaultHandler;
42  
43  /*--------------------------------------------------------------*/
44  /**
45   * XML Parser wrapper. This class wraps any standard JAXP1.1 parser with convieniant error and
46   * entity handlers and a mini dom-like document tree.
47   * <P>
48   * By default, the parser is created as a validating parser only if xerces is present. This can be 
49   * configured by setting the "org.mortbay.xml.XmlParser.Validating" system property.
50   * 
51   * @author Greg Wilkins (gregw)
52   */
53  public class XmlParser
54  {
55      private Map _redirectMap = new HashMap();
56      private SAXParser _parser;
57      private Map _observerMap;
58      private Stack _observers = new Stack();
59      private String _xpath;
60      private Object _xpaths;
61      private String _dtd;
62  
63      /* ------------------------------------------------------------ */
64      /**
65       * Construct
66       */
67      public XmlParser()
68      {
69          SAXParserFactory factory = SAXParserFactory.newInstance();
70          boolean validating_dft = factory.getClass().toString().startsWith("org.apache.xerces.");
71          String validating_prop = System.getProperty("org.mortbay.xml.XmlParser.Validating", validating_dft ? "true" : "false");
72          boolean notValidating = Boolean.getBoolean("org.mortbay.xml.XmlParser.NotValidating"); // deprecated!
73          boolean validating = !notValidating && Boolean.valueOf(validating_prop).booleanValue();
74  
75          setValidating(validating);
76      }
77  
78      /* ------------------------------------------------------------ */
79      /**
80       * Constructor.
81       */
82      public XmlParser(boolean validating)
83      {
84          setValidating(validating);
85      }
86      
87      /* ------------------------------------------------------------ */
88      public void setValidating(boolean validating)
89      {
90          try
91          {
92              SAXParserFactory factory = SAXParserFactory.newInstance();
93              factory.setValidating(validating);
94              _parser = factory.newSAXParser();
95              
96              try
97              {
98                  if (validating)
99                      _parser.getXMLReader().setFeature("http://apache.org/xml/features/validation/schema", validating);
100             }
101             catch (Exception e)
102             {
103                 if (validating)
104                     Log.warn("Schema validation may not be supported: ", e);
105                 else
106                     Log.ignore(e);
107             }
108 
109             _parser.getXMLReader().setFeature("http://xml.org/sax/features/validation", validating);
110             _parser.getXMLReader().setFeature("http://xml.org/sax/features/namespaces", validating);
111             _parser.getXMLReader().setFeature("http://xml.org/sax/features/namespace-prefixes", validating);  }
112         catch (Exception e)
113         {
114             Log.warn(Log.EXCEPTION, e);
115             throw new Error(e.toString());
116         }
117     }
118     
119     /* ------------------------------------------------------------ */
120     /**
121      * @param name
122      * @param entity
123      */
124     public synchronized void redirectEntity(String name, URL entity)
125     {
126         if (entity != null)
127             _redirectMap.put(name, entity);
128     }
129 
130     /* ------------------------------------------------------------ */
131     /**
132      * 
133      * @return Returns the xpath.
134      */
135     public String getXpath()
136     {
137         return _xpath;
138     }
139 
140     /* ------------------------------------------------------------ */
141     /**
142      * Set an XPath A very simple subset of xpath is supported to select a partial tree. Currently
143      * only path like "/node1/nodeA | /node1/nodeB" are supported.
144      * 
145      * @param xpath The xpath to set.
146      */
147     public void setXpath(String xpath)
148     {
149         _xpath = xpath;
150         StringTokenizer tok = new StringTokenizer(xpath, "| ");
151         while (tok.hasMoreTokens())
152             _xpaths = LazyList.add(_xpaths, tok.nextToken());
153     }
154 
155     /* ------------------------------------------------------------ */
156     public String getDTD()
157     {
158         return _dtd;
159     }
160 
161     /* ------------------------------------------------------------ */
162     /**
163      * Add a ContentHandler. Add an additional _content handler that is triggered on a tag name. SAX
164      * events are passed to the ContentHandler provided from a matching start element to the
165      * corresponding end element. Only a single _content handler can be registered against each tag.
166      * 
167      * @param trigger Tag local or q name.
168      * @param observer SAX ContentHandler
169      */
170     public synchronized void addContentHandler(String trigger, ContentHandler observer)
171     {
172         if (_observerMap == null)
173             _observerMap = new HashMap();
174         _observerMap.put(trigger, observer);
175     }
176 
177     /* ------------------------------------------------------------ */
178     public synchronized Node parse(InputSource source) throws IOException, SAXException
179     {
180         _dtd=null;
181         Handler handler = new Handler();
182         XMLReader reader = _parser.getXMLReader();
183         reader.setContentHandler(handler);
184         reader.setErrorHandler(handler);
185         reader.setEntityResolver(handler);
186         if (Log.isDebugEnabled())
187             Log.debug("parsing: sid=" + source.getSystemId() + ",pid=" + source.getPublicId());
188         _parser.parse(source, handler);
189         if (handler._error != null)
190             throw handler._error;
191         Node doc = (Node) handler._top.get(0);
192         handler.clear();
193         return doc;
194     }
195 
196     /* ------------------------------------------------------------ */
197     /**
198      * Parse String URL.
199      */
200     public synchronized Node parse(String url) throws IOException, SAXException
201     {
202         if (Log.isDebugEnabled())
203             Log.debug("parse: " + url);
204         return parse(new InputSource(url));
205     }
206 
207     /* ------------------------------------------------------------ */
208     /**
209      * Parse File.
210      */
211     public synchronized Node parse(File file) throws IOException, SAXException
212     {
213         if (Log.isDebugEnabled())
214             Log.debug("parse: " + file);
215         return parse(new InputSource(file.toURL().toString()));
216     }
217 
218     /* ------------------------------------------------------------ */
219     /**
220      * Parse InputStream.
221      */
222     public synchronized Node parse(InputStream in) throws IOException, SAXException
223     {
224         _dtd=null;
225         Handler handler = new Handler();
226         XMLReader reader = _parser.getXMLReader();
227         reader.setContentHandler(handler);
228         reader.setErrorHandler(handler);
229         reader.setEntityResolver(handler);
230         _parser.parse(new InputSource(in), handler);
231         if (handler._error != null)
232             throw handler._error;
233         Node doc = (Node) handler._top.get(0);
234         handler.clear();
235         return doc;
236     }
237 
238     /* ------------------------------------------------------------ */
239     /* ------------------------------------------------------------ */
240     private class NoopHandler extends DefaultHandler
241     {
242         Handler _next;
243         int _depth;
244 
245         NoopHandler(Handler next)
246         {
247             this._next = next;
248         }
249 
250         /* ------------------------------------------------------------ */
251         public void startElement(String uri, String localName, String qName, Attributes attrs) throws SAXException
252         {
253             _depth++;
254         }
255 
256         /* ------------------------------------------------------------ */
257         public void endElement(String uri, String localName, String qName) throws SAXException
258         {
259             if (_depth == 0)
260                 _parser.getXMLReader().setContentHandler(_next);
261             else
262                 _depth--;
263         }
264     }
265     
266     /* ------------------------------------------------------------ */
267     /* ------------------------------------------------------------ */
268     private class Handler extends DefaultHandler
269     {
270         Node _top = new Node(null, null, null);
271         SAXParseException _error;
272         private Node _context = _top;
273         private NoopHandler _noop;
274 
275         Handler()
276         {
277             _noop = new NoopHandler(this);
278         }
279 
280         /* ------------------------------------------------------------ */
281         void clear()
282         {
283             _top = null;
284             _error = null;
285             _context = null;
286         }
287 
288         /* ------------------------------------------------------------ */
289         public void startElement(String uri, String localName, String qName, Attributes attrs) throws SAXException
290         {
291             String name = (uri == null || uri.equals("")) ? qName : localName;
292             Node node = new Node(_context, name, attrs);
293             
294 
295             // check if the node matches any xpaths set?
296             if (_xpaths != null)
297             {
298                 String path = node.getPath();
299                 boolean match = false;
300                 for (int i = LazyList.size(_xpaths); !match && i-- > 0;)
301                 {
302                     String xpath = (String) LazyList.get(_xpaths, i);
303 
304                     match = path.equals(xpath) || xpath.startsWith(path) && xpath.length() > path.length() && xpath.charAt(path.length()) == '/';
305                 }
306 
307                 if (match)
308                 {
309                     _context.add(node);
310                     _context = node;
311                 }
312                 else
313                 {
314                     _parser.getXMLReader().setContentHandler(_noop);
315                 }
316             }
317             else
318             {
319                 _context.add(node);
320                 _context = node;
321             }
322 
323             ContentHandler observer = null;
324             if (_observerMap != null)
325                 observer = (ContentHandler) _observerMap.get(name);
326             _observers.push(observer);
327 
328             for (int i = 0; i < _observers.size(); i++)
329                 if (_observers.get(i) != null)
330                     ((ContentHandler) _observers.get(i)).startElement(uri, localName, qName, attrs);
331         }
332 
333         /* ------------------------------------------------------------ */
334         public void endElement(String uri, String localName, String qName) throws SAXException
335         {
336             _context = _context._parent;
337             for (int i = 0; i < _observers.size(); i++)
338                 if (_observers.get(i) != null)
339                     ((ContentHandler) _observers.get(i)).endElement(uri, localName, qName);
340             _observers.pop();
341         }
342 
343         /* ------------------------------------------------------------ */
344         public void ignorableWhitespace(char buf[], int offset, int len) throws SAXException
345         {
346             for (int i = 0; i < _observers.size(); i++)
347                 if (_observers.get(i) != null)
348                     ((ContentHandler) _observers.get(i)).ignorableWhitespace(buf, offset, len);
349         }
350 
351         /* ------------------------------------------------------------ */
352         public void characters(char buf[], int offset, int len) throws SAXException
353         {
354             _context.add(new String(buf, offset, len));
355             for (int i = 0; i < _observers.size(); i++)
356                 if (_observers.get(i) != null)
357                     ((ContentHandler) _observers.get(i)).characters(buf, offset, len);
358         }
359 
360         /* ------------------------------------------------------------ */
361         public void warning(SAXParseException ex)
362         {
363             Log.debug(Log.EXCEPTION, ex);
364             Log.warn("WARNING@" + getLocationString(ex) + " : " + ex.toString());
365         }
366 
367         /* ------------------------------------------------------------ */
368         public void error(SAXParseException ex) throws SAXException
369         {
370             // Save error and continue to report other errors
371             if (_error == null)
372                 _error = ex;
373             Log.debug(Log.EXCEPTION, ex);
374             Log.warn("ERROR@" + getLocationString(ex) + " : " + ex.toString());
375         }
376 
377         /* ------------------------------------------------------------ */
378         public void fatalError(SAXParseException ex) throws SAXException
379         {
380             _error = ex;
381             Log.debug(Log.EXCEPTION, ex);
382             Log.warn("FATAL@" + getLocationString(ex) + " : " + ex.toString());
383             throw ex;
384         }
385 
386         /* ------------------------------------------------------------ */
387         private String getLocationString(SAXParseException ex)
388         {
389             return ex.getSystemId() + " line:" + ex.getLineNumber() + " col:" + ex.getColumnNumber();
390         }
391 
392         /* ------------------------------------------------------------ */
393         public InputSource resolveEntity(String pid, String sid)
394         {
395             if (Log.isDebugEnabled())
396                 Log.debug("resolveEntity(" + pid + ", " + sid + ")");
397 
398             if (sid!=null && sid.endsWith(".dtd"))
399                 _dtd=sid;
400             
401             URL entity = null;
402             if (pid != null)
403                 entity = (URL) _redirectMap.get(pid);
404             if (entity == null)
405                 entity = (URL) _redirectMap.get(sid);
406             if (entity == null)
407             {
408                 String dtd = sid;
409                 if (dtd.lastIndexOf('/') >= 0)
410                     dtd = dtd.substring(dtd.lastIndexOf('/') + 1);
411 
412                 if (Log.isDebugEnabled())
413                     Log.debug("Can't exact match entity in redirect map, trying " + dtd);
414                 entity = (URL) _redirectMap.get(dtd);
415             }
416 
417             if (entity != null)
418             {
419                 try
420                 {
421                     InputStream in = entity.openStream();
422                     if (Log.isDebugEnabled())
423                         Log.debug("Redirected entity " + sid + " --> " + entity);
424                     InputSource is = new InputSource(in);
425                     is.setSystemId(sid);
426                     return is;
427                 }
428                 catch (IOException e)
429                 {
430                     Log.ignore(e);
431                 }
432             }
433             return null;
434         }
435     }
436 
437     /* ------------------------------------------------------------ */
438     /* ------------------------------------------------------------ */
439     /**
440      * XML Attribute.
441      */
442     public static class Attribute
443     {
444         private String _name;
445         private String _value;
446 
447         Attribute(String n, String v)
448         {
449             _name = n;
450             _value = v;
451         }
452 
453         public String getName()
454         {
455             return _name;
456         }
457 
458         public String getValue()
459         {
460             return _value;
461         }
462     }
463 
464     /* ------------------------------------------------------------ */
465     /* ------------------------------------------------------------ */
466     /**
467      * XML Node. Represents an XML element with optional attributes and ordered content.
468      */
469     public static class Node extends AbstractList
470     {
471         Node _parent;
472         private ArrayList _list;
473         private String _tag;
474         private Attribute[] _attrs;
475         private boolean _lastString = false;
476         private String _path;
477 
478         /* ------------------------------------------------------------ */
479         Node(Node parent, String tag, Attributes attrs)
480         {
481             _parent = parent;
482             _tag = tag;
483 
484             if (attrs != null)
485             {
486                 _attrs = new Attribute[attrs.getLength()];
487                 for (int i = 0; i < attrs.getLength(); i++)
488                 {
489                     String name = attrs.getLocalName(i);
490                     if (name == null || name.equals(""))
491                         name = attrs.getQName(i);
492                     _attrs[i] = new Attribute(name, attrs.getValue(i));
493                 }
494             }
495         }
496 
497         /* ------------------------------------------------------------ */
498         public Node getParent()
499         {
500             return _parent;
501         }
502 
503         /* ------------------------------------------------------------ */
504         public String getTag()
505         {
506             return _tag;
507         }
508 
509         /* ------------------------------------------------------------ */
510         public String getPath()
511         {
512             if (_path == null)
513             {
514                 if (getParent() != null && getParent().getTag() != null)
515                     _path = getParent().getPath() + "/" + _tag;
516                 else
517                     _path = "/" + _tag;
518             }
519             return _path;
520         }
521 
522         /* ------------------------------------------------------------ */
523         /**
524          * Get an array of element attributes.
525          */
526         public Attribute[] getAttributes()
527         {
528             return _attrs;
529         }
530 
531         /* ------------------------------------------------------------ */
532         /**
533          * Get an element attribute.
534          * 
535          * @return attribute or null.
536          */
537         public String getAttribute(String name)
538         {
539             return getAttribute(name, null);
540         }
541 
542         /* ------------------------------------------------------------ */
543         /**
544          * Get an element attribute.
545          * 
546          * @return attribute or null.
547          */
548         public String getAttribute(String name, String dft)
549         {
550             if (_attrs == null || name == null)
551                 return dft;
552             for (int i = 0; i < _attrs.length; i++)
553                 if (name.equals(_attrs[i].getName()))
554                     return _attrs[i].getValue();
555             return dft;
556         }
557 
558         /* ------------------------------------------------------------ */
559         /**
560          * Get the number of children nodes.
561          */
562         public int size()
563         {
564             if (_list != null)
565                 return _list.size();
566             return 0;
567         }
568 
569         /* ------------------------------------------------------------ */
570         /**
571          * Get the ith child node or content.
572          * 
573          * @return Node or String.
574          */
575         public Object get(int i)
576         {
577             if (_list != null)
578                 return _list.get(i);
579             return null;
580         }
581 
582         /* ------------------------------------------------------------ */
583         /**
584          * Get the first child node with the tag.
585          * 
586          * @param tag
587          * @return Node or null.
588          */
589         public Node get(String tag)
590         {
591             if (_list != null)
592             {
593                 for (int i = 0; i < _list.size(); i++)
594                 {
595                     Object o = _list.get(i);
596                     if (o instanceof Node)
597                     {
598                         Node n = (Node) o;
599                         if (tag.equals(n._tag))
600                             return n;
601                     }
602                 }
603             }
604             return null;
605         }
606 
607         /* ------------------------------------------------------------ */
608         public void add(int i, Object o)
609         {
610             if (_list == null)
611                 _list = new ArrayList();
612             if (o instanceof String)
613             {
614                 if (_lastString)
615                 {
616                     int last = _list.size() - 1;
617                     _list.set(last, (String) _list.get(last) + o);
618                 }
619                 else
620                     _list.add(i, o);
621                 _lastString = true;
622             }
623             else
624             {
625                 _lastString = false;
626                 _list.add(i, o);
627             }
628         }
629 
630         /* ------------------------------------------------------------ */
631         public void clear()
632         {
633             if (_list != null)
634                 _list.clear();
635             _list = null;
636         }
637 
638         /* ------------------------------------------------------------ */
639         /**
640          * Get a tag as a string.
641          * 
642          * @param tag The tag to get
643          * @param tags IF true, tags are included in the value.
644          * @param trim If true, trim the value.
645          * @return results of get(tag).toString(tags).
646          */
647         public String getString(String tag, boolean tags, boolean trim)
648         {
649             Node node = get(tag);
650             if (node == null)
651                 return null;
652             String s = node.toString(tags);
653             if (s != null && trim)
654                 s = s.trim();
655             return s;
656         }
657 
658         /* ------------------------------------------------------------ */
659         public synchronized String toString()
660         {
661             return toString(true);
662         }
663 
664         /* ------------------------------------------------------------ */
665         /**
666          * Convert to a string.
667          * 
668          * @param tag If false, only _content is shown.
669          */
670         public synchronized String toString(boolean tag)
671         {
672             StringBuilder buf = new StringBuilder();
673             toString(buf, tag);
674             return buf.toString();
675         }
676 
677         /* ------------------------------------------------------------ */
678         /**
679          * Convert to a string.
680          * 
681          * @param tag If false, only _content is shown.
682          */
683         public synchronized String toString(boolean tag, boolean trim)
684         {
685             String s = toString(tag);
686             if (s != null && trim)
687                 s = s.trim();
688             return s;
689         }
690 
691         /* ------------------------------------------------------------ */
692         private synchronized void toString(StringBuilder buf, boolean tag)
693         {
694             if (tag)
695             {
696                 buf.append("<");
697                 buf.append(_tag);
698 
699                 if (_attrs != null)
700                 {
701                     for (int i = 0; i < _attrs.length; i++)
702                     {
703                         buf.append(' ');
704                         buf.append(_attrs[i].getName());
705                         buf.append("=\"");
706                         buf.append(_attrs[i].getValue());
707                         buf.append("\"");
708                     }
709                 }
710             }
711 
712             if (_list != null)
713             {
714                 if (tag)
715                     buf.append(">");
716                 for (int i = 0; i < _list.size(); i++)
717                 {
718                     Object o = _list.get(i);
719                     if (o == null)
720                         continue;
721                     if (o instanceof Node)
722                         ((Node) o).toString(buf, tag);
723                     else
724                         buf.append(o.toString());
725                 }
726                 if (tag)
727                 {
728                     buf.append("</");
729                     buf.append(_tag);
730                     buf.append(">");
731                 }
732             }
733             else if (tag)
734                 buf.append("/>");
735         }
736 
737         /* ------------------------------------------------------------ */
738         /**
739          * Iterator over named child nodes.
740          * 
741          * @param tag The tag of the nodes.
742          * @return Iterator over all child nodes with the specified tag.
743          */
744         public Iterator iterator(final String tag)
745         {
746             return new Iterator()
747             {
748                 int c = 0;
749                 Node _node;
750 
751                 /* -------------------------------------------------- */
752                 public boolean hasNext()
753                 {
754                     if (_node != null)
755                         return true;
756                     while (_list != null && c < _list.size())
757                     {
758                         Object o = _list.get(c);
759                         if (o instanceof Node)
760                         {
761                             Node n = (Node) o;
762                             if (tag.equals(n._tag))
763                             {
764                                 _node = n;
765                                 return true;
766                             }
767                         }
768                         c++;
769                     }
770                     return false;
771                 }
772 
773                 /* -------------------------------------------------- */
774                 public Object next()
775                 {
776                     try
777                     {
778                         if (hasNext())
779                             return _node;
780                         throw new NoSuchElementException();
781                     }
782                     finally
783                     {
784                         _node = null;
785                         c++;
786                     }
787                 }
788 
789                 /* -------------------------------------------------- */
790                 public void remove()
791                 {
792                     throw new UnsupportedOperationException("Not supported");
793                 }
794             };
795         }
796     }
797 }