import os import csv import zipfile import requests from pathlib import Path from django.core.management.base import BaseCommand, CommandError from django.db import connection class Command(BaseCommand): help = 'Create and populate geonames_city table with GeoNames data' def add_arguments(self, parser): parser.add_argument( '--force', action='store_true', help='Force recreation of table even if it exists', ) parser.add_argument( '--skip-download', action='store_true', help='Skip downloading data, use existing files', ) def handle(self, *args, **options): self.stdout.write('Creating geonames_city table...') # Create table with connection.cursor() as cursor: if options['force']: cursor.execute('DROP TABLE IF EXISTS geonames_city') cursor.execute(''' CREATE TABLE IF NOT EXISTS geonames_city ( id SERIAL PRIMARY KEY, geonameid INTEGER, name VARCHAR(200), asciiname VARCHAR(200), alternatenames TEXT, latitude DECIMAL(10, 7), longitude DECIMAL(10, 7), feature_class CHAR(1), feature_code VARCHAR(10), country_code CHAR(2), cc2 VARCHAR(200), admin1_code VARCHAR(20), admin2_code VARCHAR(80), admin3_code VARCHAR(20), admin4_code VARCHAR(20), population BIGINT, elevation INTEGER, dem INTEGER, timezone VARCHAR(40), modification_date DATE ) ''') # Create indexes for better performance cursor.execute('CREATE INDEX IF NOT EXISTS idx_geonames_city_coords ON geonames_city (latitude, longitude)') cursor.execute('CREATE INDEX IF NOT EXISTS idx_geonames_city_country ON geonames_city (country_code)') cursor.execute('CREATE INDEX IF NOT EXISTS idx_geonames_city_feature ON geonames_city (feature_class)') cursor.execute('CREATE INDEX IF NOT EXISTS idx_geonames_city_population ON geonames_city (population)') self.stdout.write(self.style.SUCCESS('Table created successfully')) if not options['skip_download']: self.download_and_import_data() else: self.stdout.write('Skipping download, using existing data...') def download_and_import_data(self): """Download and import GeoNames cities data""" self.stdout.write('Downloading GeoNames cities data...') # Create data directory data_dir = Path('utils/geonames_data') data_dir.mkdir(exist_ok=True) # Download cities500.zip (cities with population > 500) url = 'https://download.geonames.org/export/dump/cities500.zip' zip_path = data_dir / 'cities500.zip' try: response = requests.get(url, stream=True) response.raise_for_status() with open(zip_path, 'wb') as f: for chunk in response.iter_content(chunk_size=8192): f.write(chunk) self.stdout.write('Download completed') # Extract zip file with zipfile.ZipFile(zip_path, 'r') as zip_ref: zip_ref.extractall(data_dir) # Import data self.import_cities_data(data_dir / 'cities500.txt') except Exception as e: raise CommandError(f'Failed to download/import data: {e}') def import_cities_data(self, txt_file): """Import cities data from GeoNames text file""" self.stdout.write(f'Importing data from {txt_file}...') if not txt_file.exists(): raise CommandError(f'File {txt_file} does not exist') batch_size = 1000 batch = [] with open(txt_file, 'r', encoding='utf-8') as f: for line_num, line in enumerate(f, 1): if line_num % 10000 == 0: self.stdout.write(f'Processing line {line_num}...') fields = line.strip().split('\t') if len(fields) < 19: continue try: # Parse the GeoNames format geonameid = int(fields[0]) name = fields[1][:200] if fields[1] else '' asciiname = fields[2][:200] if fields[2] else '' alternatenames = fields[3] if fields[3] else '' latitude = float(fields[4]) longitude = float(fields[5]) feature_class = fields[6] feature_code = fields[7] country_code = fields[8][:2] if fields[8] else '' cc2 = fields[9] if fields[9] else '' admin1_code = fields[10] if fields[10] else '' admin2_code = fields[11] if fields[11] else '' admin3_code = fields[12] if fields[12] else '' admin4_code = fields[13] if fields[13] else '' population = int(fields[14]) if fields[14] and fields[14] != '0' else 0 elevation = int(fields[15]) if fields[15] else None dem = int(fields[16]) if fields[16] else None timezone = fields[17] if fields[17] else '' modification_date = fields[18] if fields[18] else None batch.append(( geonameid, name, asciiname, alternatenames, latitude, longitude, feature_class, feature_code, country_code, cc2, admin1_code, admin2_code, admin3_code, admin4_code, population, elevation, dem, timezone, modification_date )) if len(batch) >= batch_size: self.insert_batch(batch) batch = [] except (ValueError, IndexError) as e: self.stdout.write(self.style.WARNING(f'Error parsing line {line_num}: {e}')) continue # Insert remaining records if batch: self.insert_batch(batch) self.stdout.write(self.style.SUCCESS('Data import completed')) def insert_batch(self, batch): """Insert a batch of records into the database""" with connection.cursor() as cursor: cursor.executemany(''' INSERT INTO geonames_city ( geonameid, name, asciiname, alternatenames, latitude, longitude, feature_class, feature_code, country_code, cc2, admin1_code, admin2_code, admin3_code, admin4_code, population, elevation, dem, timezone, modification_date ) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s) ''', batch)