You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

175 lines
7.0 KiB

import os
import csv
import zipfile
import requests
from pathlib import Path
from django.core.management.base import BaseCommand, CommandError
from django.db import connection
class Command(BaseCommand):
help = 'Create and populate geonames_city table with GeoNames data'
def add_arguments(self, parser):
parser.add_argument(
'--force',
action='store_true',
help='Force recreation of table even if it exists',
)
parser.add_argument(
'--skip-download',
action='store_true',
help='Skip downloading data, use existing files',
)
def handle(self, *args, **options):
self.stdout.write('Creating geonames_city table...')
# Create table
with connection.cursor() as cursor:
if options['force']:
cursor.execute('DROP TABLE IF EXISTS geonames_city')
cursor.execute('''
CREATE TABLE IF NOT EXISTS geonames_city (
id SERIAL PRIMARY KEY,
geonameid INTEGER,
name VARCHAR(200),
asciiname VARCHAR(200),
alternatenames TEXT,
latitude DECIMAL(10, 7),
longitude DECIMAL(10, 7),
feature_class CHAR(1),
feature_code VARCHAR(10),
country_code CHAR(2),
cc2 VARCHAR(200),
admin1_code VARCHAR(20),
admin2_code VARCHAR(80),
admin3_code VARCHAR(20),
admin4_code VARCHAR(20),
population BIGINT,
elevation INTEGER,
dem INTEGER,
timezone VARCHAR(40),
modification_date DATE
)
''')
# Create indexes for better performance
cursor.execute('CREATE INDEX IF NOT EXISTS idx_geonames_city_coords ON geonames_city (latitude, longitude)')
cursor.execute('CREATE INDEX IF NOT EXISTS idx_geonames_city_country ON geonames_city (country_code)')
cursor.execute('CREATE INDEX IF NOT EXISTS idx_geonames_city_feature ON geonames_city (feature_class)')
cursor.execute('CREATE INDEX IF NOT EXISTS idx_geonames_city_population ON geonames_city (population)')
self.stdout.write(self.style.SUCCESS('Table created successfully'))
if not options['skip_download']:
self.download_and_import_data()
else:
self.stdout.write('Skipping download, using existing data...')
def download_and_import_data(self):
"""Download and import GeoNames cities data"""
self.stdout.write('Downloading GeoNames cities data...')
# Create data directory
data_dir = Path('utils/geonames_data')
data_dir.mkdir(exist_ok=True)
# Download cities500.zip (cities with population > 500)
url = 'https://download.geonames.org/export/dump/cities500.zip'
zip_path = data_dir / 'cities500.zip'
try:
response = requests.get(url, stream=True)
response.raise_for_status()
with open(zip_path, 'wb') as f:
for chunk in response.iter_content(chunk_size=8192):
f.write(chunk)
self.stdout.write('Download completed')
# Extract zip file
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
zip_ref.extractall(data_dir)
# Import data
self.import_cities_data(data_dir / 'cities500.txt')
except Exception as e:
raise CommandError(f'Failed to download/import data: {e}')
def import_cities_data(self, txt_file):
"""Import cities data from GeoNames text file"""
self.stdout.write(f'Importing data from {txt_file}...')
if not txt_file.exists():
raise CommandError(f'File {txt_file} does not exist')
batch_size = 1000
batch = []
with open(txt_file, 'r', encoding='utf-8') as f:
for line_num, line in enumerate(f, 1):
if line_num % 10000 == 0:
self.stdout.write(f'Processing line {line_num}...')
fields = line.strip().split('\t')
if len(fields) < 19:
continue
try:
# Parse the GeoNames format
geonameid = int(fields[0])
name = fields[1][:200] if fields[1] else ''
asciiname = fields[2][:200] if fields[2] else ''
alternatenames = fields[3] if fields[3] else ''
latitude = float(fields[4])
longitude = float(fields[5])
feature_class = fields[6]
feature_code = fields[7]
country_code = fields[8][:2] if fields[8] else ''
cc2 = fields[9] if fields[9] else ''
admin1_code = fields[10] if fields[10] else ''
admin2_code = fields[11] if fields[11] else ''
admin3_code = fields[12] if fields[12] else ''
admin4_code = fields[13] if fields[13] else ''
population = int(fields[14]) if fields[14] and fields[14] != '0' else 0
elevation = int(fields[15]) if fields[15] else None
dem = int(fields[16]) if fields[16] else None
timezone = fields[17] if fields[17] else ''
modification_date = fields[18] if fields[18] else None
batch.append((
geonameid, name, asciiname, alternatenames, latitude, longitude,
feature_class, feature_code, country_code, cc2, admin1_code,
admin2_code, admin3_code, admin4_code, population, elevation,
dem, timezone, modification_date
))
if len(batch) >= batch_size:
self.insert_batch(batch)
batch = []
except (ValueError, IndexError) as e:
self.stdout.write(self.style.WARNING(f'Error parsing line {line_num}: {e}'))
continue
# Insert remaining records
if batch:
self.insert_batch(batch)
self.stdout.write(self.style.SUCCESS('Data import completed'))
def insert_batch(self, batch):
"""Insert a batch of records into the database"""
with connection.cursor() as cursor:
cursor.executemany('''
INSERT INTO geonames_city (
geonameid, name, asciiname, alternatenames, latitude, longitude,
feature_class, feature_code, country_code, cc2, admin1_code,
admin2_code, admin3_code, admin4_code, population, elevation,
dem, timezone, modification_date
) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
''', batch)