telemeta.util.url module
# -*- coding: utf-8 -*- import os, re, urllib class URLMediaParser(object): formats = ['mp3', 'wav', 'ogg', 'flac', 'aac', 'mp4', 'webm'] def __init__(self, url): self.url = url self.formats = self.get_formats() self.urls = [] if self.url[-1] != '/': self.url += '/' def get_formats(self): formats = [] for f in self.formats: formats.append(f.upper()) formats.append(f.lower()) formats.append(f.capitalize()) return formats def get_urls(self): data = urllib.urlopen(self.url).read() for line in data.split("\012"): s = re.compile('href=".*\.*"').search(line,1) if s: filename = line[s.start():s.end()].split('"')[1] name, ext = os.path.splitext(filename) if ext[1:] in self.formats: self.urls.append(self.url + filename) return self.urls if __name__ == "__main__": import sys parser = URLMediaParser(sys.argv[-1]) urls = parser.get_urls() print urls print len(urls)
Classes
class URLMediaParser
class URLMediaParser(object): formats = ['mp3', 'wav', 'ogg', 'flac', 'aac', 'mp4', 'webm'] def __init__(self, url): self.url = url self.formats = self.get_formats() self.urls = [] if self.url[-1] != '/': self.url += '/' def get_formats(self): formats = [] for f in self.formats: formats.append(f.upper()) formats.append(f.lower()) formats.append(f.capitalize()) return formats def get_urls(self): data = urllib.urlopen(self.url).read() for line in data.split("\012"): s = re.compile('href=".*\.*"').search(line,1) if s: filename = line[s.start():s.end()].split('"')[1] name, ext = os.path.splitext(filename) if ext[1:] in self.formats: self.urls.append(self.url + filename) return self.urls
Ancestors (in MRO)
- URLMediaParser
- __builtin__.object
Class variables
var formats
Instance variables
var formats
var url
var urls
Methods
def __init__(
self, url)
def __init__(self, url): self.url = url self.formats = self.get_formats() self.urls = [] if self.url[-1] != '/': self.url += '/'
def get_formats(
self)
def get_formats(self): formats = [] for f in self.formats: formats.append(f.upper()) formats.append(f.lower()) formats.append(f.capitalize()) return formats
def get_urls(
self)
def get_urls(self): data = urllib.urlopen(self.url).read() for line in data.split("\012"): s = re.compile('href=".*\.*"').search(line,1) if s: filename = line[s.start():s.end()].split('"')[1] name, ext = os.path.splitext(filename) if ext[1:] in self.formats: self.urls.append(self.url + filename) return self.urls