1
2 """ xmltodict(): convert xml into tree of Python dicts.
3
4 This was copied and modified from John Bair's recipe at aspn.activestate.com:
5 http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/149368
6 """
7 import os
8 import string
9 import locale
10 from xml.parsers import expat
11
12 """ """
13
14 '''
15 # If we're in Dabo, get the default encoding.
16 # import dabo
17 # import dabo.lib.DesignerUtils as desUtil
18 # from dabo.dLocalize import _
19 # from dabo.lib.utils import resolvePath
20 # app = dabo.dAppRef
21 # if app is not None:
22 # default_encoding = app.Encoding
23 # else:
24 # enc = locale.getlocale()[1]
25 # if enc is None:
26 # enc = dabo.defaultEncoding
27 # default_encoding = enc
28 '''
29
30
31 code_linesep = "\n"
32 eol = os.linesep
33
34
36 """XML to Object"""
38 self.root = None
39 self.nodeStack = []
40 self.attsToSkip = []
41 self._inCode = False
42 self._mthdName = ""
43 self._mthdCode = ""
44 self._codeDict = None
45 self._inProp = False
46 self._propName = ""
47 self._propData = ""
48 self._propDict = None
49 self._currPropAtt = ""
50 self._currPropDict = None
51
53 """SAX start element even handler"""
54 if name == "code":
55
56 self._inCode = True
57 parent = self.nodeStack[-1]
58 if "code" not in parent:
59 parent["code"] = {}
60 self._codeDict = parent["code"]
61
62 elif name == "properties":
63
64 self._inProp = True
65 self._propName = ""
66 self._propData = ""
67 parent = self.nodeStack[-1]
68 if "properties" not in parent:
69 parent["properties"] = {}
70 self._propDict = parent["properties"]
71
72 else:
73 if self._inCode:
74 self._mthdName = name.encode()
75 elif self._inProp:
76 if self._propName:
77
78 self._currPropAtt = name.encode()
79 else:
80 self._propName = name.encode()
81 self._currPropDict = {}
82 self._currPropAtt = ""
83 else:
84 element = {"name": name.encode()}
85 if len(attributes) > 0:
86 for att in self.attsToSkip:
87 if att in attributes:
88 del attributes[att]
89 element["attributes"] = attributes
90
91
92 if len(self.nodeStack) > 0:
93 parent = self.nodeStack[-1]
94 if "children" not in parent:
95 parent["children"] = []
96 parent["children"].append(element)
97 else:
98 self.root = element
99 self.nodeStack.append(element)
100
102 """SAX end element event handler"""
103 if self._inCode:
104 if name == "code":
105 self._inCode = False
106 self._codeDict = None
107 else:
108
109 mth = self._mthdCode.strip()
110 if not mth.endswith("\n"):
111 mth += "\n"
112 self._codeDict[self._mthdName] = mth
113 self._mthdName = ""
114 self._mthdCode = ""
115 elif self._inProp:
116 if name == "properties":
117 self._inProp = False
118 self._propDict = None
119 elif name == self._propName:
120
121 self._propDict[self._propName] = self._currPropDict
122 self._propName = ""
123 else:
124
125 self._currPropDict[self._currPropAtt] = self._propData
126 self._propData = self._currPropAtt = ""
127 else:
128 self.nodeStack = self.nodeStack[:-1]
129
131 """SAX character data event handler"""
132 if self._inCode or data.strip():
133 data = data.replace("<", "<")
134 data = data.encode()
135 if self._inCode:
136 if self._mthdCode:
137 self._mthdCode += data
138 else:
139 self._mthdCode = data
140 elif self._inProp:
141 self._propData += data
142 else:
143 element = self.nodeStack[-1]
144 if "cdata" not in element:
145 element["cdata"] = ""
146 element["cdata"] += data
147
149
150 Parser = expat.ParserCreate()
151
152 Parser.StartElementHandler = self.StartElement
153 Parser.EndElementHandler = self.EndElement
154 Parser.CharacterDataHandler = self.CharacterData
155
156 ParserStatus = Parser.Parse(xml, 1)
157 return self.root
158
160 return self.Parse(open(filename, "r").read())
161
162
163 -def xmltodict(xml, attsToSkip=None, addCodeFile=False):
164 """Given an xml string or file, return a Python dictionary."""
165 if not attsToSkip:
166 attsToSkip = []
167 parser = Xml2Obj()
168 parser.attsToSkip = attsToSkip
169 isPath = os.path.exists(xml)
170 errmsg = ""
171 ret = None
172 if eol not in xml and isPath:
173
174 try:
175 ret = parser.ParseFromFile(xml)
176 except expat.ExpatError, e:
177 errmsg = _("The XML in '%s' is not well-formed and cannot be parsed: %s") % (xml, e)
178 else:
179
180 if not xml.strip().startswith("<?xml "):
181
182 errmsg = _("The file '%s' could not be found") % xml
183 else:
184 try:
185 ret = parser.Parse(xml)
186 except expat.ExpatError:
187 errmsg = _("An invalid XML string was encountered")
188 if errmsg:
189 raise dabo.dException.XmlException, errmsg
190 if addCodeFile and isPath:
191
192 codePth = "%s-code.py" % os.path.splitext(xml)[0]
193 if os.path.exists(codePth):
194 try:
195 codeDict = desUtil.parseCodeFile(open(codePth).read())
196 desUtil.addCodeToClassDict(ret, codeDict)
197 except StandardError, e:
198 print "Failed to parse code file:", e
199 return ret
200
201
202 -def escQuote(val, noEscape=False, noQuote=False):
203 """Add surrounding quotes to the string, and escape
204 any illegal XML characters.
205 """
206 if not isinstance(val, basestring):
207 val = str(val)
208 if not isinstance(val, unicode):
209 val = unicode(val, default_encoding)
210 if noQuote:
211 qt = ''
212 else:
213 qt = '"'
214
215
216 if not noEscape:
217
218
219 val = val.replace("&", "&&")
220
221 val = val.replace('"', '"').replace("'", "'")
222
223 chars = []
224 for pos, char in enumerate(list(val)):
225 if ord(char) > 127:
226 chars.append("&#%s;" % ord(char))
227 else:
228 chars.append(char)
229 val = "".join(chars)
230 val = val.replace("<", "<").replace(">", ">")
231 return "%s%s%s" % (qt, val, qt)
232
233
234 -def dicttoxml(dct, level=0, header=None, linesep=None):
235 """Given a Python dictionary, return an xml string.
236
237 The dictionary must be in the format returned by dicttoxml(), with keys
238 on "attributes", "code", "cdata", "name", and "children".
239
240 Send your own XML header, otherwise a default one will be used.
241
242 The linesep argument is a dictionary, with keys on levels, allowing the
243 developer to add extra whitespace depending on the level.
244 """
245 att = ""
246 ret = ""
247
248 if "attributes" in dct:
249 for key, val in dct["attributes"].items():
250
251 noEscape = key in ("sizerInfo",)
252 val = escQuote(val, noEscape)
253 att += " %s=%s" % (key, val)
254 ret += "%s<%s%s" % ("\t" * level, dct["name"], att)
255
256 if ("cdata" not in dct and "children" not in dct
257 and "code" not in dct and "properties" not in dct):
258 ret += " />%s" % eol
259 else:
260 ret += ">"
261 if "cdata" in dct:
262 ret += "%s" % dct["cdata"].replace("<", "<")
263
264 if "code" in dct:
265 if len(dct["code"].keys()):
266 ret += "%s%s<code>%s" % (eol, "\t" * (level + 1), eol)
267 methodTab = "\t" * (level + 2)
268 for mthd, cd in dct["code"].items():
269
270 cd = eol.join(cd.splitlines())
271
272
273 if not cd.endswith(eol):
274 cd += eol
275
276 ret += "%s<%s><![CDATA[%s%s]]>%s%s</%s>%s" % (
277 methodTab, mthd, eol,
278 cd, eol,
279 methodTab, mthd, eol
280 )
281 ret += "%s</code>%s" % ("\t" * (level + 1), eol)
282
283 if "properties" in dct:
284 if len(dct["properties"].keys()):
285 ret += "%s%s<properties>%s" % (eol, "\t" * (level + 1), eol)
286 currTab = "\t" * (level + 2)
287 for prop, val in dct["properties"].items():
288 ret += "%s<%s>%s" % (currTab, prop, eol)
289 for propItm, itmVal in val.items():
290 itmTab = "\t" * (level + 3)
291 ret += "%s<%s>%s</%s>%s" % (itmTab, propItm, itmVal, propItm, eol)
292 ret += "%s</%s>%s" % (currTab, prop, eol)
293 ret += "%s</properties>%s" % ("\t" * (level + 1), eol)
294
295 if "children" in dct:
296 if len(dct["children"]) > 0:
297 ret += eol
298 for child in dct["children"]:
299 ret += dicttoxml(child, level + 1, linesep=linesep)
300 indnt = ""
301 if ret.endswith(eol):
302
303 indnt = ("\t" * level)
304 ret += "%s</%s>%s" % (indnt, dct["name"], eol)
305
306 if linesep:
307 ret += linesep.get(level, "")
308
309 if level == 0:
310 if header is None:
311 header = '<?xml version="1.0" encoding="%s" standalone="no"?>%s' % (default_encoding, eol)
312 ret = header + ret
313
314 return ret
315
316
318 """Given a dict containing a series of nested objects such as would
319 be created by restoring from a cdxml file, returns a dict with all classIDs
320 as keys, and a dict as the corresponding value. The dict value will have
321 keys for the attributes and/or code, depending on what was in the original
322 dict. The end result is to take a nested dict structure and return a flattened
323 dict with all objects at the top level.
324 """
325 if retDict is None:
326 retDict = {}
327 atts = cd.get("attributes", {})
328 props = cd.get("properties", {})
329 kids = cd.get("children", [])
330 code = cd.get("code", {})
331 classID = atts.get("classID", "")
332 classFile = resolvePath(atts.get("designerClass", ""))
333 superclass = resolvePath(atts.get("superclass", ""))
334 superclassID = atts.get("superclassID", "")
335 if superclassID and os.path.exists(superclass):
336
337 superCD = xmltodict(superclass, addCodeFile=True)
338 flattenClassDict(superCD, retDict)
339 if classID:
340 if os.path.exists(classFile):
341
342 classCD = xmltodict(classFile, addCodeFile=True)
343 classAtts = classCD.get("attributes", {})
344 classProps = classCD.get("properties", {})
345 classCode = classCD.get("code", {})
346 classKids = classCD.get("children", [])
347 currDict = retDict.get(classID, {})
348 retDict[classID] = {
349 "attributes": classAtts,
350 "code": classCode,
351 "properties": classProps
352 }
353 retDict[classID].update(currDict)
354
355 for kid in classKids:
356 flattenClassDict(kid, retDict)
357 else:
358
359 currDict = retDict.get(classID, {})
360 retDict[classID] = {
361 "attributes": atts,
362 "code": code,
363 "properties": props
364 }
365 retDict[classID].update(currDict)
366 if kids:
367 for kid in kids:
368 flattenClassDict(kid, retDict)
369 return retDict
370
371
373 """Called recursively on the class container structure, modifying
374 the attributes to incorporate superclass information. When the
375 'updateCode' parameter is True, superclass code is added to the
376 object's code
377 """
378 atts = src.get("attributes", {})
379 props = src.get("properties", {})
380 kids = src.get("children", [])
381 code = src.get("code", {})
382 classID = atts.get("classID", "")
383 if classID:
384 superInfo = super.get(classID, {"attributes": {}, "code": {}, "properties": {}})
385 src["attributes"] = superInfo["attributes"].copy()
386 src["attributes"].update(atts)
387 src["properties"] = superInfo.get("properties", {}).copy()
388 src["properties"].update(props)
389 if updateCode:
390 src["code"] = superInfo["code"].copy()
391 src["code"].update(code)
392 if kids:
393 for kid in kids:
394 addInheritedInfo(kid, super, updateCode)
395
396 '''
397 # if __name__ == "__main__":
398 # test_dict = {"name": "test", "attributes":{"path": "c:\\temp\\name",
399 # "problemChars": "Welcome to <Jos\xc3\xa9's \ Stuff!>\xc2\xae".decode("latin-1")}}
400 # print "test_dict:", test_dict
401 # xml = dicttoxml(test_dict)
402 # print "xml:", xml
403 # test_dict2 = xmltodict(xml)
404 # print "test_dict2:", test_dict2
405 # print "same?:", test_dict == test_dict2
406 '''
407