You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
98 lines
3.1 KiB
98 lines
3.1 KiB
import os
|
|
import sqlite3
|
|
import django
|
|
import time
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
# Setup Django environment
|
|
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'config.settings.develop')
|
|
django.setup()
|
|
|
|
def print_flush(msg):
|
|
print(msg)
|
|
sys.stdout.flush()
|
|
|
|
from apps.geolocation_package.models.geoNames import GeoNamesCity
|
|
from django.db import transaction, connection
|
|
|
|
# Constants
|
|
SQLITE_DB_PATH = Path("../geolocation_package/data/geonames_city.sqlite")
|
|
BATCH_SIZE = 25000
|
|
|
|
def migrate_data():
|
|
if not SQLITE_DB_PATH.exists():
|
|
print(f"Error: SQLite file not found at {SQLITE_DB_PATH}")
|
|
return
|
|
|
|
print_flush(f"\n{'='*60}")
|
|
print_flush(f"GEONAMES DATA MIGRATION: SQLITE -> POSTGRESQL")
|
|
print_flush(f"{'='*60}")
|
|
|
|
# 1. Connect to SQLite
|
|
print_flush(f"Connecting to SQLite: {SQLITE_DB_PATH}...")
|
|
sqlite_conn = sqlite3.connect(SQLITE_DB_PATH)
|
|
sqlite_curr = sqlite_conn.cursor()
|
|
|
|
# 2. Get total count
|
|
sqlite_curr.execute("SELECT COUNT(*) FROM geonames_city")
|
|
total_records = sqlite_curr.fetchone()[0]
|
|
print_flush(f"Found {total_records:,} records in SQLite.")
|
|
|
|
# 3. Clear PostgreSQL table
|
|
print_flush("\nEmptying existing PostgreSQL table (geonames_city)...")
|
|
with connection.cursor() as cursor:
|
|
cursor.execute("TRUNCATE TABLE geonames_city RESTART IDENTITY CASCADE;")
|
|
print_flush(" [OK] Table truncated.")
|
|
|
|
# 4. Migrate data in batches
|
|
print_flush(f"\nStarting migration in batches of {BATCH_SIZE:,}...")
|
|
start_time = time.time()
|
|
|
|
sqlite_curr.execute("SELECT id, name, country_code, latitude, longitude, feature_class, population FROM geonames_city")
|
|
|
|
processed = 0
|
|
while True:
|
|
rows = sqlite_curr.fetchmany(BATCH_SIZE)
|
|
if not rows:
|
|
break
|
|
|
|
objs = [
|
|
GeoNamesCity(
|
|
id=row[0],
|
|
name=row[1],
|
|
country_code=row[2],
|
|
latitude=row[3],
|
|
longitude=row[4],
|
|
feature_class=row[5],
|
|
population=row[6]
|
|
)
|
|
for row in rows
|
|
]
|
|
|
|
try:
|
|
with transaction.atomic():
|
|
GeoNamesCity.objects.bulk_create(objs)
|
|
|
|
processed += len(objs)
|
|
elapsed = time.time() - start_time
|
|
avg_speed = processed / elapsed if elapsed > 0 else 0
|
|
remaining = (total_records - processed) / avg_speed if avg_speed > 0 else 0
|
|
|
|
print_flush(f" [{processed:,}/{total_records:,}] ({processed/total_records*100:.1f}%) - Speed: {avg_speed:.0f} rows/s - ETA: {remaining/60:.1f} min")
|
|
|
|
except Exception as e:
|
|
print_flush(f" [Error] In batch starting at {processed}: {str(e)}")
|
|
break
|
|
|
|
total_time = time.time() - start_time
|
|
print_flush(f"\n{'='*60}")
|
|
print_flush(f"MIGRATION COMPLETED!")
|
|
print_flush(f"Total time: {total_time/60:.2f} minutes")
|
|
print_flush(f"Final Count in PostgreSQL: {GeoNamesCity.objects.count():,}")
|
|
print_flush(f"{'='*60}\n")
|
|
|
|
sqlite_conn.close()
|
|
|
|
if __name__ == "__main__":
|
|
migrate_data()
|