You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

98 lines
3.1 KiB

import os
import sqlite3
import django
import time
import sys
from pathlib import Path
# Setup Django environment
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'config.settings.develop')
django.setup()
def print_flush(msg):
print(msg)
sys.stdout.flush()
from apps.geolocation_package.models.geoNames import GeoNamesCity
from django.db import transaction, connection
# Constants
SQLITE_DB_PATH = Path("../geolocation_package/data/geonames_city.sqlite")
BATCH_SIZE = 25000
def migrate_data():
if not SQLITE_DB_PATH.exists():
print(f"Error: SQLite file not found at {SQLITE_DB_PATH}")
return
print_flush(f"\n{'='*60}")
print_flush(f"GEONAMES DATA MIGRATION: SQLITE -> POSTGRESQL")
print_flush(f"{'='*60}")
# 1. Connect to SQLite
print_flush(f"Connecting to SQLite: {SQLITE_DB_PATH}...")
sqlite_conn = sqlite3.connect(SQLITE_DB_PATH)
sqlite_curr = sqlite_conn.cursor()
# 2. Get total count
sqlite_curr.execute("SELECT COUNT(*) FROM geonames_city")
total_records = sqlite_curr.fetchone()[0]
print_flush(f"Found {total_records:,} records in SQLite.")
# 3. Clear PostgreSQL table
print_flush("\nEmptying existing PostgreSQL table (geonames_city)...")
with connection.cursor() as cursor:
cursor.execute("TRUNCATE TABLE geonames_city RESTART IDENTITY CASCADE;")
print_flush(" [OK] Table truncated.")
# 4. Migrate data in batches
print_flush(f"\nStarting migration in batches of {BATCH_SIZE:,}...")
start_time = time.time()
sqlite_curr.execute("SELECT id, name, country_code, latitude, longitude, feature_class, population FROM geonames_city")
processed = 0
while True:
rows = sqlite_curr.fetchmany(BATCH_SIZE)
if not rows:
break
objs = [
GeoNamesCity(
id=row[0],
name=row[1],
country_code=row[2],
latitude=row[3],
longitude=row[4],
feature_class=row[5],
population=row[6]
)
for row in rows
]
try:
with transaction.atomic():
GeoNamesCity.objects.bulk_create(objs)
processed += len(objs)
elapsed = time.time() - start_time
avg_speed = processed / elapsed if elapsed > 0 else 0
remaining = (total_records - processed) / avg_speed if avg_speed > 0 else 0
print_flush(f" [{processed:,}/{total_records:,}] ({processed/total_records*100:.1f}%) - Speed: {avg_speed:.0f} rows/s - ETA: {remaining/60:.1f} min")
except Exception as e:
print_flush(f" [Error] In batch starting at {processed}: {str(e)}")
break
total_time = time.time() - start_time
print_flush(f"\n{'='*60}")
print_flush(f"MIGRATION COMPLETED!")
print_flush(f"Total time: {total_time/60:.2f} minutes")
print_flush(f"Final Count in PostgreSQL: {GeoNamesCity.objects.count():,}")
print_flush(f"{'='*60}\n")
sqlite_conn.close()
if __name__ == "__main__":
migrate_data()