Package deefuzzer :: Package tools :: Module xmltodict2
[hide private]
[frames] | no frames]

Source Code for Module deefuzzer.tools.xmltodict2

  1  # -*- coding: utf-8 -*- 
  2  """ xmltodict(): convert xml into tree of Python dicts. 
  3   
  4  This was copied and modified from John Bair's recipe at aspn.activestate.com: 
  5      http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/149368 
  6  """ 
  7  import os 
  8  import string 
  9  import locale 
 10  from xml.parsers import expat 
 11   
 12  """ """ 
 13   
 14  ''' 
 15  # If we're in Dabo, get the default encoding. 
 16  # import dabo 
 17  # import dabo.lib.DesignerUtils as desUtil 
 18  # from dabo.dLocalize import _ 
 19  # from dabo.lib.utils import resolvePath 
 20  # app = dabo.dAppRef 
 21  # if app is not None: 
 22      # default_encoding = app.Encoding 
 23  # else: 
 24          # enc = locale.getlocale()[1] 
 25          # if enc is None: 
 26              # enc = dabo.defaultEncoding 
 27          # default_encoding = enc 
 28  ''' 
 29   
 30  # Python seems to need to compile code with \n linesep: 
 31  code_linesep = "\n" 
 32  eol = os.linesep 
 33   
 34   
35 -class Xml2Obj:
36 """XML to Object"""
37 - def __init__(self):
38 self.root = None 39 self.nodeStack = [] 40 self.attsToSkip = [] 41 self._inCode = False 42 self._mthdName = "" 43 self._mthdCode = "" 44 self._codeDict = None 45 self._inProp = False 46 self._propName = "" 47 self._propData = "" 48 self._propDict = None 49 self._currPropAtt = "" 50 self._currPropDict = None
51
52 - def StartElement(self, name, attributes):
53 """SAX start element even handler""" 54 if name == "code": 55 # This is code for the parent element 56 self._inCode = True 57 parent = self.nodeStack[-1] 58 if "code" not in parent: 59 parent["code"] = {} 60 self._codeDict = parent["code"] 61 62 elif name == "properties": 63 # These are the custom property definitions 64 self._inProp = True 65 self._propName = "" 66 self._propData = "" 67 parent = self.nodeStack[-1] 68 if "properties" not in parent: 69 parent["properties"] = {} 70 self._propDict = parent["properties"] 71 72 else: 73 if self._inCode: 74 self._mthdName = name.encode() 75 elif self._inProp: 76 if self._propName: 77 # In the middle of a prop definition 78 self._currPropAtt = name.encode() 79 else: 80 self._propName = name.encode() 81 self._currPropDict = {} 82 self._currPropAtt = "" 83 else: 84 element = {"name": name.encode()} 85 if len(attributes) > 0: 86 for att in self.attsToSkip: 87 if att in attributes: 88 del attributes[att] 89 element["attributes"] = attributes 90 91 # Push element onto the stack and make it a child of parent 92 if len(self.nodeStack) > 0: 93 parent = self.nodeStack[-1] 94 if "children" not in parent: 95 parent["children"] = [] 96 parent["children"].append(element) 97 else: 98 self.root = element 99 self.nodeStack.append(element)
100
101 - def EndElement(self, name):
102 """SAX end element event handler""" 103 if self._inCode: 104 if name == "code": 105 self._inCode = False 106 self._codeDict = None 107 else: 108 # End of an individual method 109 mth = self._mthdCode.strip() 110 if not mth.endswith("\n"): 111 mth += "\n" 112 self._codeDict[self._mthdName] = mth 113 self._mthdName = "" 114 self._mthdCode = "" 115 elif self._inProp: 116 if name == "properties": 117 self._inProp = False 118 self._propDict = None 119 elif name == self._propName: 120 # End of an individual prop definition 121 self._propDict[self._propName] = self._currPropDict 122 self._propName = "" 123 else: 124 # end of a property attribute 125 self._currPropDict[self._currPropAtt] = self._propData 126 self._propData = self._currPropAtt = "" 127 else: 128 self.nodeStack = self.nodeStack[:-1]
129
130 - def CharacterData(self, data):
131 """SAX character data event handler""" 132 if self._inCode or data.strip(): 133 data = data.replace("&lt;", "<") 134 data = data.encode() 135 if self._inCode: 136 if self._mthdCode: 137 self._mthdCode += data 138 else: 139 self._mthdCode = data 140 elif self._inProp: 141 self._propData += data 142 else: 143 element = self.nodeStack[-1] 144 if "cdata" not in element: 145 element["cdata"] = "" 146 element["cdata"] += data
147
148 - def Parse(self, xml):
149 # Create a SAX parser 150 Parser = expat.ParserCreate() 151 # SAX event handlers 152 Parser.StartElementHandler = self.StartElement 153 Parser.EndElementHandler = self.EndElement 154 Parser.CharacterDataHandler = self.CharacterData 155 # Parse the XML File 156 ParserStatus = Parser.Parse(xml, 1) 157 return self.root
158
159 - def ParseFromFile(self, filename):
160 return self.Parse(open(filename, "r").read())
161 162
163 -def xmltodict(xml, attsToSkip=None, addCodeFile=False):
164 """Given an xml string or file, return a Python dictionary.""" 165 if not attsToSkip: 166 attsToSkip = [] 167 parser = Xml2Obj() 168 parser.attsToSkip = attsToSkip 169 isPath = os.path.exists(xml) 170 errmsg = "" 171 ret = None 172 if eol not in xml and isPath: 173 # argument was a file 174 try: 175 ret = parser.ParseFromFile(xml) 176 except expat.ExpatError, e: 177 errmsg = _("The XML in '%s' is not well-formed and cannot be parsed: %s") % (xml, e) 178 else: 179 # argument must have been raw xml: 180 if not xml.strip().startswith("<?xml "): 181 # it's a bad file name 182 errmsg = _("The file '%s' could not be found") % xml 183 else: 184 try: 185 ret = parser.Parse(xml) 186 except expat.ExpatError: 187 errmsg = _("An invalid XML string was encountered") 188 if errmsg: 189 raise dabo.dException.XmlException, errmsg 190 if addCodeFile and isPath: 191 # Get the associated code file, if any 192 codePth = "%s-code.py" % os.path.splitext(xml)[0] 193 if os.path.exists(codePth): 194 try: 195 codeDict = desUtil.parseCodeFile(open(codePth).read()) 196 desUtil.addCodeToClassDict(ret, codeDict) 197 except StandardError, e: 198 print "Failed to parse code file:", e 199 return ret
200 201
202 -def escQuote(val, noEscape=False, noQuote=False):
203 """Add surrounding quotes to the string, and escape 204 any illegal XML characters. 205 """ 206 if not isinstance(val, basestring): 207 val = str(val) 208 if not isinstance(val, unicode): 209 val = unicode(val, default_encoding) 210 if noQuote: 211 qt = '' 212 else: 213 qt = '"' 214 # slsh = "\\" 215 # val = val.replace(slsh, slsh+slsh) 216 if not noEscape: 217 # First escape internal ampersands. We need to double them up due to a 218 # quirk in wxPython and the way it displays this character. 219 val = val.replace("&", "&amp;&amp;") 220 # Escape any internal quotes 221 val = val.replace('"', '&quot;').replace("'", "&apos;") 222 # Escape any high-order characters 223 chars = [] 224 for pos, char in enumerate(list(val)): 225 if ord(char) > 127: 226 chars.append("&#%s;" % ord(char)) 227 else: 228 chars.append(char) 229 val = "".join(chars) 230 val = val.replace("<", "&#060;").replace(">", "&#062;") 231 return "%s%s%s" % (qt, val, qt)
232 233
234 -def dicttoxml(dct, level=0, header=None, linesep=None):
235 """Given a Python dictionary, return an xml string. 236 237 The dictionary must be in the format returned by dicttoxml(), with keys 238 on "attributes", "code", "cdata", "name", and "children". 239 240 Send your own XML header, otherwise a default one will be used. 241 242 The linesep argument is a dictionary, with keys on levels, allowing the 243 developer to add extra whitespace depending on the level. 244 """ 245 att = "" 246 ret = "" 247 248 if "attributes" in dct: 249 for key, val in dct["attributes"].items(): 250 # Some keys are already handled. 251 noEscape = key in ("sizerInfo",) 252 val = escQuote(val, noEscape) 253 att += " %s=%s" % (key, val) 254 ret += "%s<%s%s" % ("\t" * level, dct["name"], att) 255 256 if ("cdata" not in dct and "children" not in dct 257 and "code" not in dct and "properties" not in dct): 258 ret += " />%s" % eol 259 else: 260 ret += ">" 261 if "cdata" in dct: 262 ret += "%s" % dct["cdata"].replace("<", "&lt;") 263 264 if "code" in dct: 265 if len(dct["code"].keys()): 266 ret += "%s%s<code>%s" % (eol, "\t" * (level + 1), eol) 267 methodTab = "\t" * (level + 2) 268 for mthd, cd in dct["code"].items(): 269 # Convert \n's in the code to eol: 270 cd = eol.join(cd.splitlines()) 271 272 # Make sure that the code ends with a linefeed 273 if not cd.endswith(eol): 274 cd += eol 275 276 ret += "%s<%s><![CDATA[%s%s]]>%s%s</%s>%s" % ( 277 methodTab, mthd, eol, 278 cd, eol, 279 methodTab, mthd, eol 280 ) 281 ret += "%s</code>%s" % ("\t" * (level + 1), eol) 282 283 if "properties" in dct: 284 if len(dct["properties"].keys()): 285 ret += "%s%s<properties>%s" % (eol, "\t" * (level + 1), eol) 286 currTab = "\t" * (level + 2) 287 for prop, val in dct["properties"].items(): 288 ret += "%s<%s>%s" % (currTab, prop, eol) 289 for propItm, itmVal in val.items(): 290 itmTab = "\t" * (level + 3) 291 ret += "%s<%s>%s</%s>%s" % (itmTab, propItm, itmVal, propItm, eol) 292 ret += "%s</%s>%s" % (currTab, prop, eol) 293 ret += "%s</properties>%s" % ("\t" * (level + 1), eol) 294 295 if "children" in dct: 296 if len(dct["children"]) > 0: 297 ret += eol 298 for child in dct["children"]: 299 ret += dicttoxml(child, level + 1, linesep=linesep) 300 indnt = "" 301 if ret.endswith(eol): 302 # Indent the closing tag 303 indnt = ("\t" * level) 304 ret += "%s</%s>%s" % (indnt, dct["name"], eol) 305 306 if linesep: 307 ret += linesep.get(level, "") 308 309 if level == 0: 310 if header is None: 311 header = '<?xml version="1.0" encoding="%s" standalone="no"?>%s' % (default_encoding, eol) 312 ret = header + ret 313 314 return ret
315 316
317 -def flattenClassDict(cd, retDict=None):
318 """Given a dict containing a series of nested objects such as would 319 be created by restoring from a cdxml file, returns a dict with all classIDs 320 as keys, and a dict as the corresponding value. The dict value will have 321 keys for the attributes and/or code, depending on what was in the original 322 dict. The end result is to take a nested dict structure and return a flattened 323 dict with all objects at the top level. 324 """ 325 if retDict is None: 326 retDict = {} 327 atts = cd.get("attributes", {}) 328 props = cd.get("properties", {}) 329 kids = cd.get("children", []) 330 code = cd.get("code", {}) 331 classID = atts.get("classID", "") 332 classFile = resolvePath(atts.get("designerClass", "")) 333 superclass = resolvePath(atts.get("superclass", "")) 334 superclassID = atts.get("superclassID", "") 335 if superclassID and os.path.exists(superclass): 336 # Get the superclass info 337 superCD = xmltodict(superclass, addCodeFile=True) 338 flattenClassDict(superCD, retDict) 339 if classID: 340 if os.path.exists(classFile): 341 # Get the class info 342 classCD = xmltodict(classFile, addCodeFile=True) 343 classAtts = classCD.get("attributes", {}) 344 classProps = classCD.get("properties", {}) 345 classCode = classCD.get("code", {}) 346 classKids = classCD.get("children", []) 347 currDict = retDict.get(classID, {}) 348 retDict[classID] = { 349 "attributes": classAtts, 350 "code": classCode, 351 "properties": classProps 352 } 353 retDict[classID].update(currDict) 354 # Now update the child objects in the dict 355 for kid in classKids: 356 flattenClassDict(kid, retDict) 357 else: 358 # Not a file; most likely just a component in another class 359 currDict = retDict.get(classID, {}) 360 retDict[classID] = { 361 "attributes": atts, 362 "code": code, 363 "properties": props 364 } 365 retDict[classID].update(currDict) 366 if kids: 367 for kid in kids: 368 flattenClassDict(kid, retDict) 369 return retDict
370 371
372 -def addInheritedInfo(src, super, updateCode=False):
373 """Called recursively on the class container structure, modifying 374 the attributes to incorporate superclass information. When the 375 'updateCode' parameter is True, superclass code is added to the 376 object's code 377 """ 378 atts = src.get("attributes", {}) 379 props = src.get("properties", {}) 380 kids = src.get("children", []) 381 code = src.get("code", {}) 382 classID = atts.get("classID", "") 383 if classID: 384 superInfo = super.get(classID, {"attributes": {}, "code": {}, "properties": {}}) 385 src["attributes"] = superInfo["attributes"].copy() 386 src["attributes"].update(atts) 387 src["properties"] = superInfo.get("properties", {}).copy() 388 src["properties"].update(props) 389 if updateCode: 390 src["code"] = superInfo["code"].copy() 391 src["code"].update(code) 392 if kids: 393 for kid in kids: 394 addInheritedInfo(kid, super, updateCode)
395 396 ''' 397 # if __name__ == "__main__": 398 # test_dict = {"name": "test", "attributes":{"path": "c:\\temp\\name", 399 # "problemChars": "Welcome to <Jos\xc3\xa9's \ Stuff!>\xc2\xae".decode("latin-1")}} 400 # print "test_dict:", test_dict 401 # xml = dicttoxml(test_dict) 402 # print "xml:", xml 403 # test_dict2 = xmltodict(xml) 404 # print "test_dict2:", test_dict2 405 # print "same?:", test_dict == test_dict2 406 ''' 407