telemeta.management.commands.telemeta-import-collections-from-crem module
#!/usr/bin/python # -*- coding: utf-8 -*- # # Copyright (C) 2010 Guillaume Pellerin # All rights reserved. # # This software is licensed as described in the file COPYING, which # you should have received as part of this distribution. The terms # are also available at http://svn.parisson.org/telemeta/TelemetaLicense. # # Author: Guillaume Pellerin <yomguy@parisson.com> # import logging import codecs import os import sys import csv import logging import datetime from optparse import make_option from django.conf import settings from django.core.management.base import BaseCommand, CommandError from django.contrib.auth.models import User from django.core.files.base import ContentFile from django.contrib.auth.models import User from django.contrib.sites.models import Site from django.template.defaultfilters import slugify from telemeta.models import * from telemeta.util.unaccent import unaccent class Logger: def __init__(self, file): self.logger = logging.getLogger('myapp') self.hdlr = logging.FileHandler(file) self.formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s') self.hdlr.setFormatter(self.formatter) self.logger.addHandler(self.hdlr) self.logger.setLevel(logging.INFO) def info(self, prefix, message): self.logger.info(' ' + prefix + ' : ' + message.decode('utf8')) def error(self, prefix, message): self.logger.error(prefix + ' : ' + message.decode('utf8')) class Command(BaseCommand): """Import CREM collections from collection directories containing media files and eventually a XLS files representing the relation between old codes and new codes """ help = "import CREM collections (special usecase)" admin_email = 'webmaster@parisson.com' media_root = settings.MEDIA_ROOT option_list = BaseCommand.option_list + ( make_option('-d', '--dry-run', action='store_true', dest='dry-run', help='Do NOT write anything'), make_option('-f', '--force', action='store_true', dest='force', help='Force overwrite data'), make_option('-s', '--source', dest='source_dir', help='define the source directory'), make_option('-l', '--log', dest='log', help='define log file'), make_option('-p', '--pattern', dest='pattern', help='define the pattern'), make_option('-m', '--domain', dest='domain', help='define site domain'), ) def write_file(self, item, media): filename = media.split(os.sep)[-1] print 'importing ' + filename if os.path.exists(media): if not item.file or self.force: if not self.media_root in self.source_dir: print "file not in MEDIA_ROOT, copying..." f = open(media, 'r') if not self.dry_run: file_content = ContentFile(f.read()) item.file.save(filename, file_content) f.close() else: print "file in MEDIA_ROOT, linking..." path = media.replace(self.media_root, '') if not self.dry_run: item.file = path item.save() if self.user: item.set_revision(self.user) else: msg = item.code + ' : fichier ' + item.file.name + ' deja inscrit dans la base de donnees et pas de forcage !' self.logger.info('item', msg) else: msg = item.code + ' : fichier audio ' + filename + ' inexistant dans le dossier !' self.logger.error('item', msg) def handle(self, *args, **kwargs): self.logger = Logger(kwargs.get('log')) self.pattern = kwargs.get('pattern') self.source_dir = os.path.abspath(kwargs.get('source_dir')) self.dry_run = kwargs.get('dry-run') self.force = kwargs.get('force') self.domain = kwargs.get('domain') site = Site.objects.all()[0] if self.domain: site.domain = self.domain site.name = self.domain site.save() else: self.domain = site.domain self.user = User.objects.filter(username='admin')[0] self.collections = os.listdir(self.source_dir) collections = [] for collection in self.collections: collection_dir = self.source_dir + os.sep + collection collection_files = os.listdir(collection_dir) if not '/.' in collection_dir and self.pattern in collection_dir: collection_name = collection.split(os.sep)[-1] collections.append(collection_name) c = MediaCollection.objects.filter(code=collection_name) if not c and collection + '.csv' in collection_files: msg = collection + ' collection NON présente dans la base de données, SORTIE ' self.logger.error(collection, msg) sys.exit(msg) elif not c: msg = 'collection NON présente dans la base de données, CREATION ' self.logger.info(collection, msg) if not self.dry_run: c = MediaCollection(code=collection_name, title=collection_name) c.save() c.set_revision(self.user) else: msg = 'collection présente dans la base de données, SELECTION' self.logger.info(collection, msg) for collection in collections: collection_dir = self.source_dir + os.sep + collection collection_name = collection collection_files = os.listdir(collection_dir) msg = '************************ ' + collection + ' ******************************' self.logger.info(collection, msg[:70]) csv_file = '' rows = {} if collection + '.csv' in collection_files: csv_file = self.source_dir + os.sep + collection + os.sep + collection + '.csv' csv_data = csv.reader(open(csv_file), delimiter=';') for row in csv_data: rows[row[1].strip()] = row[0].strip() msg = collection + ' import du fichier CSV de la collection' self.logger.info(collection, msg[:70]) else: msg = collection + ' pas de fichier CSV dans la collection' self.logger.info(collection, msg[:70]) c = MediaCollection.objects.filter(code=collection_name) if not c: if not self.dry_run: c = MediaCollection(code=collection_name) c.save() msg = ' collection NON présente dans la BDD, CREATION ' self.logger.info(c.code, msg) else: c = c[0] msg = ' id = '+str(c.id) self.logger.info(c.code, msg) audio_files = [] for file in collection_files: ext = ['WAV', 'wav'] if file.split('.')[-1] in ext and file[0] != '.': audio_files.append(file) audio_files.sort() nb_items = c.items.count() counter = 0 for file in audio_files: code = file.split('.')[0] wav_file = self.source_dir + os.sep + collection + os.sep + file if len(audio_files) <= nb_items: items = MediaItem.objects.filter(code=code) old_ref = '' if code in rows and not items: old_ref = rows[code] items = MediaItem.objects.filter(old_code=old_ref) if items: item = items[0] if item.code: msg = code + ' : ' + item.code + ' : Cas 1 ou 2 : id = ' + str(item.id) elif item.old_code: msg = code + ' : ' + item.old_code + ' : Cas 1 ou 2 : id = ' + str(item.id) else: msg = code + ' : ' + ' Cas 1 ou 2 : id = ' + str(item.id) self.logger.info('item', msg) item.code = code else: item = MediaItem(code=code, collection=c) msg = code + ' : ' + old_ref + ' : Cas 1 ou 2 : item NON présent dans la base de données, CREATION' self.logger.info('item', msg) self.write_file(item, wav_file) elif nb_items == 1 and len(audio_files) > 1: if counter == 0: msg = code + ' : Cas 3a : item n°01 présent dans la base de données, PASSE' self.logger.info('item', msg) else: item = MediaItem(code=code, collection=c) msg = code + ' : Cas 3a : item NON présent dans la base de données, CREATION' self.logger.info('item', msg) self.write_file(item, wav_file) elif nb_items > 1 and nb_items < len(audio_files): msg = code + ' : Cas 3b : nb items < nb de fichiers audio, PAS de creation' self.logger.info('item', msg) counter += 1 msg = 'Liste des URLs des collections importées :' self.logger.info('INFO', msg) for collection in collections: msg = 'http://'+self.domain+'/archives/collections/'+collection self.logger.info(collection, msg)
Module variables
var ITEM_PUBLIC_ACCESS_CHOICES
var ITEM_TRANSODING_STATUS
var PUBLIC_ACCESS_CHOICES
var SCOPE_CHOICES
var TYPE_CHOICES
var app_name
var code_linesep
var collection_code_regex
var collection_published_code_regex
var collection_unpublished_code_regex
var default_decoding
var default_encoding
var engine
var eol
var ext
var item_code_regex
var item_published_code_regex
var item_unpublished_code_regex
var mime_type
var private_extra_types
var public_extra_types
var resource_code_regex
var strict_code
Classes
class Command
Import CREM collections from collection directories containing media files and eventually a XLS files representing the relation between old codes and new codes
class Command(BaseCommand): """Import CREM collections from collection directories containing media files and eventually a XLS files representing the relation between old codes and new codes """ help = "import CREM collections (special usecase)" admin_email = 'webmaster@parisson.com' media_root = settings.MEDIA_ROOT option_list = BaseCommand.option_list + ( make_option('-d', '--dry-run', action='store_true', dest='dry-run', help='Do NOT write anything'), make_option('-f', '--force', action='store_true', dest='force', help='Force overwrite data'), make_option('-s', '--source', dest='source_dir', help='define the source directory'), make_option('-l', '--log', dest='log', help='define log file'), make_option('-p', '--pattern', dest='pattern', help='define the pattern'), make_option('-m', '--domain', dest='domain', help='define site domain'), ) def write_file(self, item, media): filename = media.split(os.sep)[-1] print 'importing ' + filename if os.path.exists(media): if not item.file or self.force: if not self.media_root in self.source_dir: print "file not in MEDIA_ROOT, copying..." f = open(media, 'r') if not self.dry_run: file_content = ContentFile(f.read()) item.file.save(filename, file_content) f.close() else: print "file in MEDIA_ROOT, linking..." path = media.replace(self.media_root, '') if not self.dry_run: item.file = path item.save() if self.user: item.set_revision(self.user) else: msg = item.code + ' : fichier ' + item.file.name + ' deja inscrit dans la base de donnees et pas de forcage !' self.logger.info('item', msg) else: msg = item.code + ' : fichier audio ' + filename + ' inexistant dans le dossier !' self.logger.error('item', msg) def handle(self, *args, **kwargs): self.logger = Logger(kwargs.get('log')) self.pattern = kwargs.get('pattern') self.source_dir = os.path.abspath(kwargs.get('source_dir')) self.dry_run = kwargs.get('dry-run') self.force = kwargs.get('force') self.domain = kwargs.get('domain') site = Site.objects.all()[0] if self.domain: site.domain = self.domain site.name = self.domain site.save() else: self.domain = site.domain self.user = User.objects.filter(username='admin')[0] self.collections = os.listdir(self.source_dir) collections = [] for collection in self.collections: collection_dir = self.source_dir + os.sep + collection collection_files = os.listdir(collection_dir) if not '/.' in collection_dir and self.pattern in collection_dir: collection_name = collection.split(os.sep)[-1] collections.append(collection_name) c = MediaCollection.objects.filter(code=collection_name) if not c and collection + '.csv' in collection_files: msg = collection + ' collection NON présente dans la base de données, SORTIE ' self.logger.error(collection, msg) sys.exit(msg) elif not c: msg = 'collection NON présente dans la base de données, CREATION ' self.logger.info(collection, msg) if not self.dry_run: c = MediaCollection(code=collection_name, title=collection_name) c.save() c.set_revision(self.user) else: msg = 'collection présente dans la base de données, SELECTION' self.logger.info(collection, msg) for collection in collections: collection_dir = self.source_dir + os.sep + collection collection_name = collection collection_files = os.listdir(collection_dir) msg = '************************ ' + collection + ' ******************************' self.logger.info(collection, msg[:70]) csv_file = '' rows = {} if collection + '.csv' in collection_files: csv_file = self.source_dir + os.sep + collection + os.sep + collection + '.csv' csv_data = csv.reader(open(csv_file), delimiter=';') for row in csv_data: rows[row[1].strip()] = row[0].strip() msg = collection + ' import du fichier CSV de la collection' self.logger.info(collection, msg[:70]) else: msg = collection + ' pas de fichier CSV dans la collection' self.logger.info(collection, msg[:70]) c = MediaCollection.objects.filter(code=collection_name) if not c: if not self.dry_run: c = MediaCollection(code=collection_name) c.save() msg = ' collection NON présente dans la BDD, CREATION ' self.logger.info(c.code, msg) else: c = c[0] msg = ' id = '+str(c.id) self.logger.info(c.code, msg) audio_files = [] for file in collection_files: ext = ['WAV', 'wav'] if file.split('.')[-1] in ext and file[0] != '.': audio_files.append(file) audio_files.sort() nb_items = c.items.count() counter = 0 for file in audio_files: code = file.split('.')[0] wav_file = self.source_dir + os.sep + collection + os.sep + file if len(audio_files) <= nb_items: items = MediaItem.objects.filter(code=code) old_ref = '' if code in rows and not items: old_ref = rows[code] items = MediaItem.objects.filter(old_code=old_ref) if items: item = items[0] if item.code: msg = code + ' : ' + item.code + ' : Cas 1 ou 2 : id = ' + str(item.id) elif item.old_code: msg = code + ' : ' + item.old_code + ' : Cas 1 ou 2 : id = ' + str(item.id) else: msg = code + ' : ' + ' Cas 1 ou 2 : id = ' + str(item.id) self.logger.info('item', msg) item.code = code else: item = MediaItem(code=code, collection=c) msg = code + ' : ' + old_ref + ' : Cas 1 ou 2 : item NON présent dans la base de données, CREATION' self.logger.info('item', msg) self.write_file(item, wav_file) elif nb_items == 1 and len(audio_files) > 1: if counter == 0: msg = code + ' : Cas 3a : item n°01 présent dans la base de données, PASSE' self.logger.info('item', msg) else: item = MediaItem(code=code, collection=c) msg = code + ' : Cas 3a : item NON présent dans la base de données, CREATION' self.logger.info('item', msg) self.write_file(item, wav_file) elif nb_items > 1 and nb_items < len(audio_files): msg = code + ' : Cas 3b : nb items < nb de fichiers audio, PAS de creation' self.logger.info('item', msg) counter += 1 msg = 'Liste des URLs des collections importées :' self.logger.info('INFO', msg) for collection in collections: msg = 'http://'+self.domain+'/archives/collections/'+collection self.logger.info(collection, msg)
Ancestors (in MRO)
- Command
- django.core.management.base.BaseCommand
- __builtin__.object
Class variables
var admin_email
var args
var can_import_settings
var help
var leave_locale_alone
var media_root
var option_list
var output_transaction
var requires_model_validation
Methods
def __init__(
self)
def __init__(self): self.style = color_style()
def create_parser(
self, prog_name, subcommand)
Create and return the OptionParser
which will be used to
parse the arguments to this command.
def create_parser(self, prog_name, subcommand): """ Create and return the ``OptionParser`` which will be used to parse the arguments to this command. """ return OptionParser(prog=prog_name, usage=self.usage(subcommand), version=self.get_version(), option_list=self.option_list)
def execute(
self, *args, **options)
Try to execute this command, performing model validation if
needed (as controlled by the attribute
self.requires_model_validation
, except if force-skipped).
def execute(self, *args, **options): """ Try to execute this command, performing model validation if needed (as controlled by the attribute ``self.requires_model_validation``, except if force-skipped). """ self.stdout = OutputWrapper(options.get('stdout', sys.stdout)) self.stderr = OutputWrapper(options.get('stderr', sys.stderr), self.style.ERROR) if self.can_import_settings: from django.conf import settings saved_locale = None if not self.leave_locale_alone: # Only mess with locales if we can assume we have a working # settings file, because django.utils.translation requires settings # (The final saying about whether the i18n machinery is active will be # found in the value of the USE_I18N setting) if not self.can_import_settings: raise CommandError("Incompatible values of 'leave_locale_alone' " "(%s) and 'can_import_settings' (%s) command " "options." % (self.leave_locale_alone, self.can_import_settings)) # Switch to US English, because django-admin.py creates database # content like permissions, and those shouldn't contain any # translations. from django.utils import translation saved_locale = translation.get_language() translation.activate('en-us') try: if self.requires_model_validation and not options.get('skip_validation'): self.validate() output = self.handle(*args, **options) if output: if self.output_transaction: # This needs to be imported here, because it relies on # settings. from django.db import connections, DEFAULT_DB_ALIAS connection = connections[options.get('database', DEFAULT_DB_ALIAS)] if connection.ops.start_transaction_sql(): self.stdout.write(self.style.SQL_KEYWORD(connection.ops.start_transaction_sql())) self.stdout.write(output) if self.output_transaction: self.stdout.write('\n' + self.style.SQL_KEYWORD("COMMIT;")) finally: if saved_locale is not None: translation.activate(saved_locale)
def get_version(
self)
Return the Django version, which should be correct for all built-in Django commands. User-supplied commands should override this method.
def get_version(self): """ Return the Django version, which should be correct for all built-in Django commands. User-supplied commands should override this method. """ return django.get_version()
def handle(
self, *args, **kwargs)
def handle(self, *args, **kwargs): self.logger = Logger(kwargs.get('log')) self.pattern = kwargs.get('pattern') self.source_dir = os.path.abspath(kwargs.get('source_dir')) self.dry_run = kwargs.get('dry-run') self.force = kwargs.get('force') self.domain = kwargs.get('domain') site = Site.objects.all()[0] if self.domain: site.domain = self.domain site.name = self.domain site.save() else: self.domain = site.domain self.user = User.objects.filter(username='admin')[0] self.collections = os.listdir(self.source_dir) collections = [] for collection in self.collections: collection_dir = self.source_dir + os.sep + collection collection_files = os.listdir(collection_dir) if not '/.' in collection_dir and self.pattern in collection_dir: collection_name = collection.split(os.sep)[-1] collections.append(collection_name) c = MediaCollection.objects.filter(code=collection_name) if not c and collection + '.csv' in collection_files: msg = collection + ' collection NON présente dans la base de données, SORTIE ' self.logger.error(collection, msg) sys.exit(msg) elif not c: msg = 'collection NON présente dans la base de données, CREATION ' self.logger.info(collection, msg) if not self.dry_run: c = MediaCollection(code=collection_name, title=collection_name) c.save() c.set_revision(self.user) else: msg = 'collection présente dans la base de données, SELECTION' self.logger.info(collection, msg) for collection in collections: collection_dir = self.source_dir + os.sep + collection collection_name = collection collection_files = os.listdir(collection_dir) msg = '************************ ' + collection + ' ******************************' self.logger.info(collection, msg[:70]) csv_file = '' rows = {} if collection + '.csv' in collection_files: csv_file = self.source_dir + os.sep + collection + os.sep + collection + '.csv' csv_data = csv.reader(open(csv_file), delimiter=';') for row in csv_data: rows[row[1].strip()] = row[0].strip() msg = collection + ' import du fichier CSV de la collection' self.logger.info(collection, msg[:70]) else: msg = collection + ' pas de fichier CSV dans la collection' self.logger.info(collection, msg[:70]) c = MediaCollection.objects.filter(code=collection_name) if not c: if not self.dry_run: c = MediaCollection(code=collection_name) c.save() msg = ' collection NON présente dans la BDD, CREATION ' self.logger.info(c.code, msg) else: c = c[0] msg = ' id = '+str(c.id) self.logger.info(c.code, msg) audio_files = [] for file in collection_files: ext = ['WAV', 'wav'] if file.split('.')[-1] in ext and file[0] != '.': audio_files.append(file) audio_files.sort() nb_items = c.items.count() counter = 0 for file in audio_files: code = file.split('.')[0] wav_file = self.source_dir + os.sep + collection + os.sep + file if len(audio_files) <= nb_items: items = MediaItem.objects.filter(code=code) old_ref = '' if code in rows and not items: old_ref = rows[code] items = MediaItem.objects.filter(old_code=old_ref) if items: item = items[0] if item.code: msg = code + ' : ' + item.code + ' : Cas 1 ou 2 : id = ' + str(item.id) elif item.old_code: msg = code + ' : ' + item.old_code + ' : Cas 1 ou 2 : id = ' + str(item.id) else: msg = code + ' : ' + ' Cas 1 ou 2 : id = ' + str(item.id) self.logger.info('item', msg) item.code = code else: item = MediaItem(code=code, collection=c) msg = code + ' : ' + old_ref + ' : Cas 1 ou 2 : item NON présent dans la base de données, CREATION' self.logger.info('item', msg) self.write_file(item, wav_file) elif nb_items == 1 and len(audio_files) > 1: if counter == 0: msg = code + ' : Cas 3a : item n°01 présent dans la base de données, PASSE' self.logger.info('item', msg) else: item = MediaItem(code=code, collection=c) msg = code + ' : Cas 3a : item NON présent dans la base de données, CREATION' self.logger.info('item', msg) self.write_file(item, wav_file) elif nb_items > 1 and nb_items < len(audio_files): msg = code + ' : Cas 3b : nb items < nb de fichiers audio, PAS de creation' self.logger.info('item', msg) counter += 1 msg = 'Liste des URLs des collections importées :' self.logger.info('INFO', msg) for collection in collections: msg = 'http://'+self.domain+'/archives/collections/'+collection self.logger.info(collection, msg)
def print_help(
self, prog_name, subcommand)
Print the help message for this command, derived from
self.usage()
.
def print_help(self, prog_name, subcommand): """ Print the help message for this command, derived from ``self.usage()``. """ parser = self.create_parser(prog_name, subcommand) parser.print_help()
def run_from_argv(
self, argv)
Set up any environment changes requested (e.g., Python path
and Django settings), then run this command. If the
command raises a CommandError
, intercept it and print it sensibly
to stderr. If the --traceback
option is present or the raised
Exception
is not CommandError
, raise it.
def run_from_argv(self, argv): """ Set up any environment changes requested (e.g., Python path and Django settings), then run this command. If the command raises a ``CommandError``, intercept it and print it sensibly to stderr. If the ``--traceback`` option is present or the raised ``Exception`` is not ``CommandError``, raise it. """ parser = self.create_parser(argv[0], argv[1]) options, args = parser.parse_args(argv[2:]) handle_default_options(options) try: self.execute(*args, **options.__dict__) except Exception as e: if options.traceback or not isinstance(e, CommandError): raise # self.stderr is not guaranteed to be set here stderr = getattr(self, 'stderr', OutputWrapper(sys.stderr, self.style.ERROR)) stderr.write('%s: %s' % (e.__class__.__name__, e)) sys.exit(1)
def usage(
self, subcommand)
Return a brief description of how to use this command, by
default from the attribute self.help
.
def usage(self, subcommand): """ Return a brief description of how to use this command, by default from the attribute ``self.help``. """ usage = '%%prog %s [options] %s' % (subcommand, self.args) if self.help: return '%s\n\n%s' % (usage, self.help) else: return usage
def validate(
self, app=None, display_num_errors=False)
Validates the given app, raising CommandError for any errors.
If app is None, then this will validate all installed apps.
def validate(self, app=None, display_num_errors=False): """ Validates the given app, raising CommandError for any errors. If app is None, then this will validate all installed apps. """ from django.core.management.validation import get_validation_errors s = StringIO() num_errors = get_validation_errors(s, app) if num_errors: s.seek(0) error_text = s.read() raise CommandError("One or more models did not validate:\n%s" % error_text) if display_num_errors: self.stdout.write("%s error%s found" % (num_errors, '' if num_errors == 1 else 's'))
def write_file(
self, item, media)
def write_file(self, item, media): filename = media.split(os.sep)[-1] print 'importing ' + filename if os.path.exists(media): if not item.file or self.force: if not self.media_root in self.source_dir: print "file not in MEDIA_ROOT, copying..." f = open(media, 'r') if not self.dry_run: file_content = ContentFile(f.read()) item.file.save(filename, file_content) f.close() else: print "file in MEDIA_ROOT, linking..." path = media.replace(self.media_root, '') if not self.dry_run: item.file = path item.save() if self.user: item.set_revision(self.user) else: msg = item.code + ' : fichier ' + item.file.name + ' deja inscrit dans la base de donnees et pas de forcage !' self.logger.info('item', msg) else: msg = item.code + ' : fichier audio ' + filename + ' inexistant dans le dossier !' self.logger.error('item', msg)
class Logger
class Logger: def __init__(self, file): self.logger = logging.getLogger('myapp') self.hdlr = logging.FileHandler(file) self.formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s') self.hdlr.setFormatter(self.formatter) self.logger.addHandler(self.hdlr) self.logger.setLevel(logging.INFO) def info(self, prefix, message): self.logger.info(' ' + prefix + ' : ' + message.decode('utf8')) def error(self, prefix, message): self.logger.error(prefix + ' : ' + message.decode('utf8'))
Ancestors (in MRO)
Instance variables
var formatter
var hdlr
var logger
Methods
def __init__(
self, file)
def __init__(self, file): self.logger = logging.getLogger('myapp') self.hdlr = logging.FileHandler(file) self.formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s') self.hdlr.setFormatter(self.formatter) self.logger.addHandler(self.hdlr) self.logger.setLevel(logging.INFO)
def error(
self, prefix, message)
def error(self, prefix, message): self.logger.error(prefix + ' : ' + message.decode('utf8'))
def info(
self, prefix, message)
def info(self, prefix, message): self.logger.info(' ' + prefix + ' : ' + message.decode('utf8'))