Top

telemeta.management.commands.telemeta-geocode module

from optparse import make_option
from django.conf import settings
from django.core.management.base import BaseCommand, CommandError
from telemeta.models import Location
from telemeta.util.unaccent import unaccent
import logging
import codecs

class Command(BaseCommand):
    help = "Geocode Telemeta countries from a local Geonames data file"
    args = "path to geoname's allCountries.txt"

    def handle(self, datafile=None, *args, **options):

        if not datafile:
            raise CommandError("Please provide the %s" % self.args)

        try:
            datafile = codecs.open(datafile, 'r', 'utf-8')
        except IOError:
            raise CommandError("Unable to open %s" % datafile)
        
        locations = {}
        for l in Location.objects.all().current().filter(type=Location.COUNTRY):
            locations[l] = [a.alias for a in l.aliases.all()]

        i = 0
        geocoded = 0
        total = len(locations)
        found_by_alias = {}
        for line in datafile:
            (geonameid, name, asciiname, alternatenames, latitude, longitude, feature_class,
             feature_code, country_code, cc2, admin1_code, admin2_code, admin3_code,
             admin4_code, population, elevation, gtopo30, timezone, modification_date) = line.strip().split("\t")
           
            if feature_code[0:3] == 'PCL':
                names = [asciiname.lower()]
                if alternatenames:
                    names.extend([unaccent(n).lower() for n in alternatenames.split(',')])

                found = []
                for l in locations:
                    if unaccent(l.name).lower() in names:
                        l.latitude = float(latitude)
                        l.longitude = float(longitude)
                        l.save()
                        geocoded += 1
                        found.append(l)
                    else:
                        for a in locations[l]:
                            if unaccent(a).lower() in names:
                                found_by_alias[l] = float(latitude), float(longitude)
                                break
                            

                for l in found:
                    locations.pop(l)

            i += 1

            if i % 200000 == 0:
                print "Geocoded %d (%d by alias) out of %d countries (parsed %d geonames)" % (geocoded, len(found_by_alias), total, i)

            if total == geocoded:
                break

        for l in locations:
            if found_by_alias.has_key(l):
                l.latitude, l.longitude = found_by_alias[l]
                l.save()
                geocoded += 1

        print "Done. Geocoded %d out of %d countries (parsed %d geonames)" % (geocoded, total, i)
        datafile.close()                

Classes

class Command

class Command(BaseCommand):
    help = "Geocode Telemeta countries from a local Geonames data file"
    args = "path to geoname's allCountries.txt"

    def handle(self, datafile=None, *args, **options):

        if not datafile:
            raise CommandError("Please provide the %s" % self.args)

        try:
            datafile = codecs.open(datafile, 'r', 'utf-8')
        except IOError:
            raise CommandError("Unable to open %s" % datafile)
        
        locations = {}
        for l in Location.objects.all().current().filter(type=Location.COUNTRY):
            locations[l] = [a.alias for a in l.aliases.all()]

        i = 0
        geocoded = 0
        total = len(locations)
        found_by_alias = {}
        for line in datafile:
            (geonameid, name, asciiname, alternatenames, latitude, longitude, feature_class,
             feature_code, country_code, cc2, admin1_code, admin2_code, admin3_code,
             admin4_code, population, elevation, gtopo30, timezone, modification_date) = line.strip().split("\t")
           
            if feature_code[0:3] == 'PCL':
                names = [asciiname.lower()]
                if alternatenames:
                    names.extend([unaccent(n).lower() for n in alternatenames.split(',')])

                found = []
                for l in locations:
                    if unaccent(l.name).lower() in names:
                        l.latitude = float(latitude)
                        l.longitude = float(longitude)
                        l.save()
                        geocoded += 1
                        found.append(l)
                    else:
                        for a in locations[l]:
                            if unaccent(a).lower() in names:
                                found_by_alias[l] = float(latitude), float(longitude)
                                break
                            

                for l in found:
                    locations.pop(l)

            i += 1

            if i % 200000 == 0:
                print "Geocoded %d (%d by alias) out of %d countries (parsed %d geonames)" % (geocoded, len(found_by_alias), total, i)

            if total == geocoded:
                break

        for l in locations:
            if found_by_alias.has_key(l):
                l.latitude, l.longitude = found_by_alias[l]
                l.save()
                geocoded += 1

        print "Done. Geocoded %d out of %d countries (parsed %d geonames)" % (geocoded, total, i)
        datafile.close()                

Ancestors (in MRO)

  • Command
  • django.core.management.base.BaseCommand
  • __builtin__.object

Class variables

var args

var can_import_settings

var help

var leave_locale_alone

var option_list

var output_transaction

var requires_model_validation

Methods

def __init__(

self)

def __init__(self):
    self.style = color_style()

def create_parser(

self, prog_name, subcommand)

Create and return the OptionParser which will be used to parse the arguments to this command.

def create_parser(self, prog_name, subcommand):
    """
    Create and return the ``OptionParser`` which will be used to
    parse the arguments to this command.
    """
    return OptionParser(prog=prog_name,
                        usage=self.usage(subcommand),
                        version=self.get_version(),
                        option_list=self.option_list)

def execute(

self, *args, **options)

Try to execute this command, performing model validation if needed (as controlled by the attribute self.requires_model_validation, except if force-skipped).

def execute(self, *args, **options):
    """
    Try to execute this command, performing model validation if
    needed (as controlled by the attribute
    ``self.requires_model_validation``, except if force-skipped).
    """
    self.stdout = OutputWrapper(options.get('stdout', sys.stdout))
    self.stderr = OutputWrapper(options.get('stderr', sys.stderr), self.style.ERROR)
    if self.can_import_settings:
        from django.conf import settings
    saved_locale = None
    if not self.leave_locale_alone:
        # Only mess with locales if we can assume we have a working
        # settings file, because django.utils.translation requires settings
        # (The final saying about whether the i18n machinery is active will be
        # found in the value of the USE_I18N setting)
        if not self.can_import_settings:
            raise CommandError("Incompatible values of 'leave_locale_alone' "
                               "(%s) and 'can_import_settings' (%s) command "
                               "options." % (self.leave_locale_alone,
                                             self.can_import_settings))
        # Switch to US English, because django-admin.py creates database
        # content like permissions, and those shouldn't contain any
        # translations.
        from django.utils import translation
        saved_locale = translation.get_language()
        translation.activate('en-us')
    try:
        if self.requires_model_validation and not options.get('skip_validation'):
            self.validate()
        output = self.handle(*args, **options)
        if output:
            if self.output_transaction:
                # This needs to be imported here, because it relies on
                # settings.
                from django.db import connections, DEFAULT_DB_ALIAS
                connection = connections[options.get('database', DEFAULT_DB_ALIAS)]
                if connection.ops.start_transaction_sql():
                    self.stdout.write(self.style.SQL_KEYWORD(connection.ops.start_transaction_sql()))
            self.stdout.write(output)
            if self.output_transaction:
                self.stdout.write('\n' + self.style.SQL_KEYWORD("COMMIT;"))
    finally:
        if saved_locale is not None:
            translation.activate(saved_locale)

def get_version(

self)

Return the Django version, which should be correct for all built-in Django commands. User-supplied commands should override this method.

def get_version(self):
    """
    Return the Django version, which should be correct for all
    built-in Django commands. User-supplied commands should
    override this method.
    """
    return django.get_version()

def handle(

self, datafile=None, *args, **options)

def handle(self, datafile=None, *args, **options):
    if not datafile:
        raise CommandError("Please provide the %s" % self.args)
    try:
        datafile = codecs.open(datafile, 'r', 'utf-8')
    except IOError:
        raise CommandError("Unable to open %s" % datafile)
    
    locations = {}
    for l in Location.objects.all().current().filter(type=Location.COUNTRY):
        locations[l] = [a.alias for a in l.aliases.all()]
    i = 0
    geocoded = 0
    total = len(locations)
    found_by_alias = {}
    for line in datafile:
        (geonameid, name, asciiname, alternatenames, latitude, longitude, feature_class,
         feature_code, country_code, cc2, admin1_code, admin2_code, admin3_code,
         admin4_code, population, elevation, gtopo30, timezone, modification_date) = line.strip().split("\t")
       
        if feature_code[0:3] == 'PCL':
            names = [asciiname.lower()]
            if alternatenames:
                names.extend([unaccent(n).lower() for n in alternatenames.split(',')])
            found = []
            for l in locations:
                if unaccent(l.name).lower() in names:
                    l.latitude = float(latitude)
                    l.longitude = float(longitude)
                    l.save()
                    geocoded += 1
                    found.append(l)
                else:
                    for a in locations[l]:
                        if unaccent(a).lower() in names:
                            found_by_alias[l] = float(latitude), float(longitude)
                            break
                        
            for l in found:
                locations.pop(l)
        i += 1
        if i % 200000 == 0:
            print "Geocoded %d (%d by alias) out of %d countries (parsed %d geonames)" % (geocoded, len(found_by_alias), total, i)
        if total == geocoded:
            break
    for l in locations:
        if found_by_alias.has_key(l):
            l.latitude, l.longitude = found_by_alias[l]
            l.save()
            geocoded += 1
    print "Done. Geocoded %d out of %d countries (parsed %d geonames)" % (geocoded, total, i)
    datafile.close()                

def print_help(

self, prog_name, subcommand)

Print the help message for this command, derived from self.usage().

def print_help(self, prog_name, subcommand):
    """
    Print the help message for this command, derived from
    ``self.usage()``.
    """
    parser = self.create_parser(prog_name, subcommand)
    parser.print_help()

def run_from_argv(

self, argv)

Set up any environment changes requested (e.g., Python path and Django settings), then run this command. If the command raises a CommandError, intercept it and print it sensibly to stderr. If the --traceback option is present or the raised Exception is not CommandError, raise it.

def run_from_argv(self, argv):
    """
    Set up any environment changes requested (e.g., Python path
    and Django settings), then run this command. If the
    command raises a ``CommandError``, intercept it and print it sensibly
    to stderr. If the ``--traceback`` option is present or the raised
    ``Exception`` is not ``CommandError``, raise it.
    """
    parser = self.create_parser(argv[0], argv[1])
    options, args = parser.parse_args(argv[2:])
    handle_default_options(options)
    try:
        self.execute(*args, **options.__dict__)
    except Exception as e:
        if options.traceback or not isinstance(e, CommandError):
            raise
        # self.stderr is not guaranteed to be set here
        stderr = getattr(self, 'stderr', OutputWrapper(sys.stderr, self.style.ERROR))
        stderr.write('%s: %s' % (e.__class__.__name__, e))
        sys.exit(1)

def usage(

self, subcommand)

Return a brief description of how to use this command, by default from the attribute self.help.

def usage(self, subcommand):
    """
    Return a brief description of how to use this command, by
    default from the attribute ``self.help``.
    """
    usage = '%%prog %s [options] %s' % (subcommand, self.args)
    if self.help:
        return '%s\n\n%s' % (usage, self.help)
    else:
        return usage

def validate(

self, app=None, display_num_errors=False)

Validates the given app, raising CommandError for any errors.

If app is None, then this will validate all installed apps.

def validate(self, app=None, display_num_errors=False):
    """
    Validates the given app, raising CommandError for any errors.
    If app is None, then this will validate all installed apps.
    """
    from django.core.management.validation import get_validation_errors
    s = StringIO()
    num_errors = get_validation_errors(s, app)
    if num_errors:
        s.seek(0)
        error_text = s.read()
        raise CommandError("One or more models did not validate:\n%s" % error_text)
    if display_num_errors:
        self.stdout.write("%s error%s found" % (num_errors, '' if num_errors == 1 else 's'))