import os import sqlite3 import django import time import sys from pathlib import Path # Setup Django environment os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'config.settings.develop') django.setup() def print_flush(msg): print(msg) sys.stdout.flush() from apps.geolocation_package.models.geoNames import GeoNamesCity from django.db import transaction, connection # Constants SQLITE_DB_PATH = Path("../geolocation_package/data/geonames_city.sqlite") BATCH_SIZE = 25000 def migrate_data(): if not SQLITE_DB_PATH.exists(): print(f"Error: SQLite file not found at {SQLITE_DB_PATH}") return print_flush(f"\n{'='*60}") print_flush(f"GEONAMES DATA MIGRATION: SQLITE -> POSTGRESQL") print_flush(f"{'='*60}") # 1. Connect to SQLite print_flush(f"Connecting to SQLite: {SQLITE_DB_PATH}...") sqlite_conn = sqlite3.connect(SQLITE_DB_PATH) sqlite_curr = sqlite_conn.cursor() # 2. Get total count sqlite_curr.execute("SELECT COUNT(*) FROM geonames_city") total_records = sqlite_curr.fetchone()[0] print_flush(f"Found {total_records:,} records in SQLite.") # 3. Clear PostgreSQL table print_flush("\nEmptying existing PostgreSQL table (geonames_city)...") with connection.cursor() as cursor: cursor.execute("TRUNCATE TABLE geonames_city RESTART IDENTITY CASCADE;") print_flush(" [OK] Table truncated.") # 4. Migrate data in batches print_flush(f"\nStarting migration in batches of {BATCH_SIZE:,}...") start_time = time.time() sqlite_curr.execute("SELECT id, name, country_code, latitude, longitude, feature_class, population FROM geonames_city") processed = 0 while True: rows = sqlite_curr.fetchmany(BATCH_SIZE) if not rows: break objs = [ GeoNamesCity( id=row[0], name=row[1], country_code=row[2], latitude=row[3], longitude=row[4], feature_class=row[5], population=row[6] ) for row in rows ] try: with transaction.atomic(): GeoNamesCity.objects.bulk_create(objs) processed += len(objs) elapsed = time.time() - start_time avg_speed = processed / elapsed if elapsed > 0 else 0 remaining = (total_records - processed) / avg_speed if avg_speed > 0 else 0 print_flush(f" [{processed:,}/{total_records:,}] ({processed/total_records*100:.1f}%) - Speed: {avg_speed:.0f} rows/s - ETA: {remaining/60:.1f} min") except Exception as e: print_flush(f" [Error] In batch starting at {processed}: {str(e)}") break total_time = time.time() - start_time print_flush(f"\n{'='*60}") print_flush(f"MIGRATION COMPLETED!") print_flush(f"Total time: {total_time/60:.2f} minutes") print_flush(f"Final Count in PostgreSQL: {GeoNamesCity.objects.count():,}") print_flush(f"{'='*60}\n") sqlite_conn.close() if __name__ == "__main__": migrate_data()