Module PyRSS2Gen'
[hide private]
[frames] | no frames]

Source Code for Module PyRSS2Gen'

  1  """PyRSS2Gen - A Python library for generating RSS 2.0 feeds.""" 
  2   
  3  __name__ = "PyRSS2Gen" 
  4  __version__ = (1, 0, 0) 
  5  __author__ = "Andrew Dalke <dalke@dalkescientific.com>" 
  6   
  7  _generator_name = __name__ + "-" + ".".join(map(str, __version__)) 
  8   
  9  import datetime 
 10   
 11   
 12  # Could make this the base class; will need to add 'publish' 
13 -class WriteXmlMixin:
14 - def __init__(self):
15 pass
16
17 - def write_xml(self, outfile, encoding="iso-8859-1"):
18 from xml.sax import saxutils 19 20 handler = saxutils.XMLGenerator(outfile, encoding) 21 handler.startDocument() 22 self.publish(handler) 23 handler.endDocument()
24
25 - def to_xml(self, encoding="iso-8859-1"):
26 try: 27 import cStringIO as StringIO 28 except ImportError: 29 import StringIO 30 f = StringIO.StringIO() 31 self.write_xml(f, encoding) 32 return f.getvalue()
33 34
35 -def _element(handler, name, obj, d=None):
36 if not d: 37 d = {} 38 if isinstance(obj, basestring) or obj is None: 39 # special-case handling to make the API easier 40 # to use for the common case. 41 handler.startElement(name, d) 42 if obj is not None: 43 handler.characters(obj) 44 handler.endElement(name) 45 else: 46 # It better know how to emit the correct XML. 47 obj.publish(handler)
48 49
50 -def _opt_element(handler, name, obj):
51 if obj is None: 52 return 53 _element(handler, name, obj)
54 55
56 -def _format_date(dt):
57 """convert a datetime into an RFC 822 formatted date 58 59 Input date must be in GMT. 60 """ 61 # Looks like: 62 # Sat, 07 Sep 2002 00:00:01 GMT 63 # Can't use strftime because that's locale dependent 64 # 65 # Isn't there a standard way to do this for Python? The 66 # rfc822 and email.Utils modules assume a timestamp. The 67 # following is based on the rfc822 module. 68 return "%s, %02d %s %04d %02d:%02d:%02d GMT" % ( 69 ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"][dt.weekday()], 70 dt.day, 71 ["Jan", "Feb", "Mar", "Apr", "May", "Jun", 72 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"][dt.month - 1], 73 dt.year, dt.hour, dt.minute, dt.second)
74 75 76 ## 77 # A couple simple wrapper objects for the fields which 78 # take a simple value other than a string.
79 -class IntElement:
80 """implements the 'publish' API for integers 81 82 Takes the tag name and the integer value to publish. 83 84 (Could be used for anything which uses str() to be published 85 to text for XML.) 86 """ 87 element_attrs = {} 88
89 - def __init__(self, name, val):
90 self.name = name 91 self.val = val
92
93 - def publish(self, handler):
94 handler.startElement(self.name, self.element_attrs) 95 handler.characters(str(self.val)) 96 handler.endElement(self.name)
97 98
99 -class DateElement:
100 """implements the 'publish' API for a datetime.datetime 101 102 Takes the tag name and the datetime to publish. 103 104 Converts the datetime to RFC 2822 timestamp (4-digit year). 105 """ 106
107 - def __init__(self, name, dt):
108 self.name = name 109 self.dt = dt
110
111 - def publish(self, handler):
112 _element(handler, self.name, _format_date(self.dt))
113 114 115 #### 116
117 -class Category:
118 """Publish a category element""" 119
120 - def __init__(self, category, domain=None):
121 self.category = category 122 self.domain = domain
123
124 - def publish(self, handler):
125 d = {} 126 if self.domain is not None: 127 d["domain"] = self.domain 128 _element(handler, "category", self.category, d)
129 130
131 -class Cloud:
132 """Publish a cloud""" 133
134 - def __init__(self, domain, port, path, 135 registerProcedure, protocol):
136 self.domain = domain 137 self.port = port 138 self.path = path 139 self.registerProcedure = registerProcedure 140 self.protocol = protocol
141
142 - def publish(self, handler):
143 _element(handler, "cloud", None, { 144 "domain": self.domain, 145 "port": str(self.port), 146 "path": self.path, 147 "registerProcedure": self.registerProcedure, 148 "protocol": self.protocol})
149 150
151 -class Image:
152 """Publish a channel Image""" 153 element_attrs = {} 154
155 - def __init__(self, url, title, link, 156 width=None, height=None, description=None):
157 self.url = url 158 self.title = title 159 self.link = link 160 self.width = width 161 self.height = height 162 self.description = description
163
164 - def publish(self, handler):
165 handler.startElement("image", self.element_attrs) 166 167 _element(handler, "url", self.url) 168 _element(handler, "title", self.title) 169 _element(handler, "link", self.link) 170 171 width = self.width 172 if isinstance(width, int): 173 width = IntElement("width", width) 174 _opt_element(handler, "width", width) 175 176 height = self.height 177 if isinstance(height, int): 178 height = IntElement("height", height) 179 _opt_element(handler, "height", height) 180 181 _opt_element(handler, "description", self.description) 182 183 handler.endElement("image")
184 185
186 -class Guid:
187 """Publish a guid 188 189 Defaults to being a permalink, which is the assumption if it's 190 omitted. Hence strings are always permalinks. 191 """ 192
193 - def __init__(self, guid, isPermaLink=1):
194 self.guid = guid 195 self.isPermaLink = isPermaLink
196
197 - def publish(self, handler):
198 d = {} 199 if self.isPermaLink: 200 d["isPermaLink"] = "true" 201 else: 202 d["isPermaLink"] = "false" 203 _element(handler, "guid", self.guid, d)
204 205
206 -class TextInput:
207 """Publish a textInput 208 209 Apparently this is rarely used. 210 """ 211 element_attrs = {} 212
213 - def __init__(self, title, description, name, link):
214 self.title = title 215 self.description = description 216 self.name = name 217 self.link = link
218
219 - def publish(self, handler):
220 handler.startElement("textInput", self.element_attrs) 221 _element(handler, "title", self.title) 222 _element(handler, "description", self.description) 223 _element(handler, "name", self.name) 224 _element(handler, "link", self.link) 225 handler.endElement("textInput")
226 227
228 -class Enclosure:
229 """Publish an enclosure""" 230
231 - def __init__(self, url, length, type):
232 self.url = url 233 self.length = length 234 self.type = type
235
236 - def publish(self, handler):
237 _element(handler, "enclosure", None, { 238 "url": self.url, 239 "length": str(self.length), 240 "type": self.type 241 })
242 243
244 -class Source:
245 """Publish the item's original source, used by aggregators""" 246
247 - def __init__(self, name, url):
248 self.name = name 249 self.url = url
250
251 - def publish(self, handler):
252 _element(handler, "source", self.name, {"url": self.url})
253 254
255 -class SkipHours:
256 """Publish the skipHours 257 258 This takes a list of hours, as integers. 259 """ 260 element_attrs = {} 261
262 - def __init__(self, hours):
263 self.hours = hours
264
265 - def publish(self, handler):
266 if self.hours: 267 handler.startElement("skipHours", self.element_attrs) 268 for hour in self.hours: 269 _element(handler, "hour", str(hour)) 270 handler.endElement("skipHours")
271 272
273 -class SkipDays:
274 """Publish the skipDays 275 276 This takes a list of days as strings. 277 """ 278 element_attrs = {} 279
280 - def __init__(self, days):
281 self.days = days
282
283 - def publish(self, handler):
284 if self.days: 285 handler.startElement("skipDays", self.element_attrs) 286 for day in self.days: 287 _element(handler, "day", day) 288 handler.endElement("skipDays")
289 290
291 -class RSS2(WriteXmlMixin):
292 """The main RSS class. 293 294 Stores the channel attributes, with the "category" elements under 295 ".categories" and the RSS items under ".items". 296 """ 297 298 rss_attrs = {"version": "2.0"} 299 element_attrs = {} 300
301 - def __init__( 302 self, 303 title, 304 link, 305 description, 306 307 language=None, 308 copyright=None, 309 managingEditor=None, 310 webMaster=None, 311 pubDate=None, # a datetime, *in* *GMT* 312 lastBuildDate=None, # a datetime 313 314 categories=None, # list of strings or Category 315 generator=_generator_name, 316 docs="http://blogs.law.harvard.edu/tech/rss", 317 cloud=None, # a Cloud 318 ttl=None, # integer number of minutes 319 320 image=None, # an Image 321 rating=None, # a string; I don't know how it's used 322 textInput=None, # a TextInput 323 skipHours=None, # a SkipHours with a list of integers 324 skipDays=None, # a SkipDays with a list of strings 325 items=None # list of RSSItems 326 ):
327 self.title = title 328 self.link = link 329 self.description = description 330 self.language = language 331 self.copyright = copyright 332 self.managingEditor = managingEditor 333 334 self.webMaster = webMaster 335 self.pubDate = pubDate 336 self.lastBuildDate = lastBuildDate 337 338 if categories is None: 339 categories = [] 340 self.categories = categories 341 self.generator = generator 342 self.docs = docs 343 self.cloud = cloud 344 self.ttl = ttl 345 self.image = image 346 self.rating = rating 347 self.textInput = textInput 348 self.skipHours = skipHours 349 self.skipDays = skipDays 350 351 if items is None: 352 items = [] 353 self.items = items
354
355 - def publish(self, handler):
356 handler.startElement("rss", self.rss_attrs) 357 handler.startElement("channel", self.element_attrs) 358 _element(handler, "title", self.title) 359 _element(handler, "link", self.link) 360 _element(handler, "description", self.description) 361 362 self.publish_extensions(handler) 363 364 _opt_element(handler, "language", self.language) 365 _opt_element(handler, "copyright", self.copyright) 366 _opt_element(handler, "managingEditor", self.managingEditor) 367 _opt_element(handler, "webMaster", self.webMaster) 368 369 pubDate = self.pubDate 370 if isinstance(pubDate, datetime.datetime): 371 pubDate = DateElement("pubDate", pubDate) 372 _opt_element(handler, "pubDate", pubDate) 373 374 lastBuildDate = self.lastBuildDate 375 if isinstance(lastBuildDate, datetime.datetime): 376 lastBuildDate = DateElement("lastBuildDate", lastBuildDate) 377 _opt_element(handler, "lastBuildDate", lastBuildDate) 378 379 for category in self.categories: 380 if isinstance(category, basestring): 381 category = Category(category) 382 category.publish(handler) 383 384 _opt_element(handler, "generator", self.generator) 385 _opt_element(handler, "docs", self.docs) 386 387 if self.cloud is not None: 388 self.cloud.publish(handler) 389 390 ttl = self.ttl 391 if isinstance(self.ttl, int): 392 ttl = IntElement("ttl", ttl) 393 _opt_element(handler, "tt", ttl) 394 395 if self.image is not None: 396 self.image.publish(handler) 397 398 _opt_element(handler, "rating", self.rating) 399 if self.textInput is not None: 400 self.textInput.publish(handler) 401 if self.skipHours is not None: 402 self.skipHours.publish(handler) 403 if self.skipDays is not None: 404 self.skipDays.publish(handler) 405 406 for item in self.items: 407 item.publish(handler) 408 409 handler.endElement("channel") 410 handler.endElement("rss")
411
412 - def publish_extensions(self, handler):
413 # Derived classes can hook into this to insert 414 # output after the three required fields. 415 pass
416 417
418 -class RSSItem(WriteXmlMixin):
419 """Publish an RSS Item""" 420 element_attrs = {} 421
422 - def __init__( 423 self, 424 title=None, # string 425 link=None, # url as string 426 description=None, # string 427 author=None, # email address as string 428 categories=None, # list of string or Category 429 comments=None, # url as string 430 enclosure=None, # an Enclosure 431 guid=None, # a unique string 432 pubDate=None, # a datetime 433 source=None # a Source 434 ):
435 436 if title is None and description is None: 437 raise TypeError( 438 "must define at least one of 'title' or 'description'") 439 self.title = title 440 self.link = link 441 self.description = description 442 self.author = author 443 if categories is None: 444 categories = [] 445 self.categories = categories 446 self.comments = comments 447 self.enclosure = enclosure 448 self.guid = guid 449 self.pubDate = pubDate 450 self.source = source
451 # It sure does get tedious typing these names three times... 452
453 - def publish(self, handler):
454 handler.startElement("item", self.element_attrs) 455 _opt_element(handler, "title", self.title) 456 _opt_element(handler, "link", self.link) 457 self.publish_extensions(handler) 458 _opt_element(handler, "description", self.description) 459 _opt_element(handler, "author", self.author) 460 461 for category in self.categories: 462 if isinstance(category, basestring): 463 category = Category(category) 464 category.publish(handler) 465 466 _opt_element(handler, "comments", self.comments) 467 if self.enclosure is not None: 468 self.enclosure.publish(handler) 469 _opt_element(handler, "guid", self.guid) 470 471 pubDate = self.pubDate 472 if isinstance(pubDate, datetime.datetime): 473 pubDate = DateElement("pubDate", pubDate) 474 _opt_element(handler, "pubDate", pubDate) 475 476 if self.source is not None: 477 self.source.publish(handler) 478 479 handler.endElement("item")
480
481 - def publish_extensions(self, handler):
482 # Derived classes can hook into this to insert 483 # output after the title and link elements 484 pass
485