Content handler for element starting and ending : sax « XML « Python Tutorial






from xml.sax.handler import ContentHandler
from xml.sax import parse

class PageMaker(ContentHandler):
    passthrough = 0
    def startElement(self, name, attrs):
        if name == 'page':
            self.passthrough = 1
            self.out = open(attrs['name'] + '.html', 'w')
            self.out.write('<html><head>\n')
            self.out.write('<title>%s</title>\n' % attrs['title'])
            self.out.write('</head><body>\n')
        elif self.passthrough:
            self.out.write('<' + name)
            for key, val in attrs.items():
                self.out.write(' %s="%s"' % (key, val))
            self.out.write('>')

    def endElement(self, name):
        if name == 'page':
            self.passthrough = 0
            self.out.write('\n</body></html>\n')
            self.out.close()
        elif self.passthrough:
            self.out.write('</%s>' % name)
    def characters(self, chars):
        if self.passthrough: self.out.write(chars)

parse('website.xml', PageMaker ())








20.1.sax
20.1.1.Demonstrating SAX-based parsing.
20.1.2.Content handler for element starting and ending