You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
175 lines
7.0 KiB
175 lines
7.0 KiB
import os
|
|
import csv
|
|
import zipfile
|
|
import requests
|
|
from pathlib import Path
|
|
from django.core.management.base import BaseCommand, CommandError
|
|
from django.db import connection
|
|
|
|
|
|
class Command(BaseCommand):
|
|
help = 'Create and populate geonames_city table with GeoNames data'
|
|
|
|
def add_arguments(self, parser):
|
|
parser.add_argument(
|
|
'--force',
|
|
action='store_true',
|
|
help='Force recreation of table even if it exists',
|
|
)
|
|
parser.add_argument(
|
|
'--skip-download',
|
|
action='store_true',
|
|
help='Skip downloading data, use existing files',
|
|
)
|
|
|
|
def handle(self, *args, **options):
|
|
self.stdout.write('Creating geonames_city table...')
|
|
|
|
# Create table
|
|
with connection.cursor() as cursor:
|
|
if options['force']:
|
|
cursor.execute('DROP TABLE IF EXISTS geonames_city')
|
|
|
|
cursor.execute('''
|
|
CREATE TABLE IF NOT EXISTS geonames_city (
|
|
id SERIAL PRIMARY KEY,
|
|
geonameid INTEGER,
|
|
name VARCHAR(200),
|
|
asciiname VARCHAR(200),
|
|
alternatenames TEXT,
|
|
latitude DECIMAL(10, 7),
|
|
longitude DECIMAL(10, 7),
|
|
feature_class CHAR(1),
|
|
feature_code VARCHAR(10),
|
|
country_code CHAR(2),
|
|
cc2 VARCHAR(200),
|
|
admin1_code VARCHAR(20),
|
|
admin2_code VARCHAR(80),
|
|
admin3_code VARCHAR(20),
|
|
admin4_code VARCHAR(20),
|
|
population BIGINT,
|
|
elevation INTEGER,
|
|
dem INTEGER,
|
|
timezone VARCHAR(40),
|
|
modification_date DATE
|
|
)
|
|
''')
|
|
|
|
# Create indexes for better performance
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_geonames_city_coords ON geonames_city (latitude, longitude)')
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_geonames_city_country ON geonames_city (country_code)')
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_geonames_city_feature ON geonames_city (feature_class)')
|
|
cursor.execute('CREATE INDEX IF NOT EXISTS idx_geonames_city_population ON geonames_city (population)')
|
|
|
|
self.stdout.write(self.style.SUCCESS('Table created successfully'))
|
|
|
|
if not options['skip_download']:
|
|
self.download_and_import_data()
|
|
else:
|
|
self.stdout.write('Skipping download, using existing data...')
|
|
|
|
def download_and_import_data(self):
|
|
"""Download and import GeoNames cities data"""
|
|
self.stdout.write('Downloading GeoNames cities data...')
|
|
|
|
# Create data directory
|
|
data_dir = Path('utils/geonames_data')
|
|
data_dir.mkdir(exist_ok=True)
|
|
|
|
# Download cities500.zip (cities with population > 500)
|
|
url = 'https://download.geonames.org/export/dump/cities500.zip'
|
|
zip_path = data_dir / 'cities500.zip'
|
|
|
|
try:
|
|
response = requests.get(url, stream=True)
|
|
response.raise_for_status()
|
|
|
|
with open(zip_path, 'wb') as f:
|
|
for chunk in response.iter_content(chunk_size=8192):
|
|
f.write(chunk)
|
|
|
|
self.stdout.write('Download completed')
|
|
|
|
# Extract zip file
|
|
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
|
|
zip_ref.extractall(data_dir)
|
|
|
|
# Import data
|
|
self.import_cities_data(data_dir / 'cities500.txt')
|
|
|
|
except Exception as e:
|
|
raise CommandError(f'Failed to download/import data: {e}')
|
|
|
|
def import_cities_data(self, txt_file):
|
|
"""Import cities data from GeoNames text file"""
|
|
self.stdout.write(f'Importing data from {txt_file}...')
|
|
|
|
if not txt_file.exists():
|
|
raise CommandError(f'File {txt_file} does not exist')
|
|
|
|
batch_size = 1000
|
|
batch = []
|
|
|
|
with open(txt_file, 'r', encoding='utf-8') as f:
|
|
for line_num, line in enumerate(f, 1):
|
|
if line_num % 10000 == 0:
|
|
self.stdout.write(f'Processing line {line_num}...')
|
|
|
|
fields = line.strip().split('\t')
|
|
if len(fields) < 19:
|
|
continue
|
|
|
|
try:
|
|
# Parse the GeoNames format
|
|
geonameid = int(fields[0])
|
|
name = fields[1][:200] if fields[1] else ''
|
|
asciiname = fields[2][:200] if fields[2] else ''
|
|
alternatenames = fields[3] if fields[3] else ''
|
|
latitude = float(fields[4])
|
|
longitude = float(fields[5])
|
|
feature_class = fields[6]
|
|
feature_code = fields[7]
|
|
country_code = fields[8][:2] if fields[8] else ''
|
|
cc2 = fields[9] if fields[9] else ''
|
|
admin1_code = fields[10] if fields[10] else ''
|
|
admin2_code = fields[11] if fields[11] else ''
|
|
admin3_code = fields[12] if fields[12] else ''
|
|
admin4_code = fields[13] if fields[13] else ''
|
|
population = int(fields[14]) if fields[14] and fields[14] != '0' else 0
|
|
elevation = int(fields[15]) if fields[15] else None
|
|
dem = int(fields[16]) if fields[16] else None
|
|
timezone = fields[17] if fields[17] else ''
|
|
modification_date = fields[18] if fields[18] else None
|
|
|
|
batch.append((
|
|
geonameid, name, asciiname, alternatenames, latitude, longitude,
|
|
feature_class, feature_code, country_code, cc2, admin1_code,
|
|
admin2_code, admin3_code, admin4_code, population, elevation,
|
|
dem, timezone, modification_date
|
|
))
|
|
|
|
if len(batch) >= batch_size:
|
|
self.insert_batch(batch)
|
|
batch = []
|
|
|
|
except (ValueError, IndexError) as e:
|
|
self.stdout.write(self.style.WARNING(f'Error parsing line {line_num}: {e}'))
|
|
continue
|
|
|
|
# Insert remaining records
|
|
if batch:
|
|
self.insert_batch(batch)
|
|
|
|
self.stdout.write(self.style.SUCCESS('Data import completed'))
|
|
|
|
def insert_batch(self, batch):
|
|
"""Insert a batch of records into the database"""
|
|
with connection.cursor() as cursor:
|
|
cursor.executemany('''
|
|
INSERT INTO geonames_city (
|
|
geonameid, name, asciiname, alternatenames, latitude, longitude,
|
|
feature_class, feature_code, country_code, cc2, admin1_code,
|
|
admin2_code, admin3_code, admin4_code, population, elevation,
|
|
dem, timezone, modification_date
|
|
) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
|
|
''', batch)
|