Extracting Text from XML Documents : expat « XML « Python Tutorial






from xml.parsers import expat

xmlFile = "emails.xml"

#Define a class that will store the character data
class xmlText(object):
    def __init__ (self):
        self.textBuff = ""
    def CharacterData(self, data):
        data = data.strip()
        if data:
            data = data.encode('ascii')
            self.textBuff += data + "\n"

    def Parse(self, fName):
        xmlParser = expat.ParserCreate()
        xmlParser.CharacterDataHandler = self.CharacterData
        xmlParser.Parse(open(fName).read(), 1)

xText = xmlText()
xText.Parse(xmlFile)
print "Text from %s\n=" % xmlFile
print xText.textBuff








20.3.expat
20.3.1.Extracting Text from XML Documents
20.3.2.Parsing XML Tags