From afaaaab485c5d9240a79051163b1902df1e5409b Mon Sep 17 00:00:00 2001 From: mohsentaba Date: Mon, 13 Apr 2026 10:09:21 +0330 Subject: [PATCH] test files and md guide for it added for geolocation coordinates. --- GEOLOCATION_TESTING.md | 66 +++++++++++++++++++++++++ migrate_geo_data.py | 98 ++++++++++++++++++++++++++++++++++++++ test_geo_app_flow.py | 64 +++++++++++++++++++++++++ test_geo_raw_sql.py | 83 ++++++++++++++++++++++++++++++++ test_geo_search_integer.py | 65 +++++++++++++++++++++++++ 5 files changed, 376 insertions(+) create mode 100644 GEOLOCATION_TESTING.md create mode 100644 migrate_geo_data.py create mode 100644 test_geo_app_flow.py create mode 100644 test_geo_raw_sql.py create mode 100644 test_geo_search_integer.py diff --git a/GEOLOCATION_TESTING.md b/GEOLOCATION_TESTING.md new file mode 100644 index 0000000..5f64c62 --- /dev/null +++ b/GEOLOCATION_TESTING.md @@ -0,0 +1,66 @@ +# Geolocation Diagnostic & Testing Tools + +This guide explains how to use the custom scripts created to monitor, debug, and manage the geonames city database. + +## šŸš€ Migration Tool + +### `migrate_geo_data.py` +Used to migrate 5.1 million city records from the reference SQLite file to your primary PostgreSQL database. + +**Usage:** +```bash +python migrate_geo_data.py +``` +*Note: This script uses high-speed `TRUNCATE` and batch processing (25k rows/batch) to ensure performance.* + +--- + +## šŸ” Diagnostic Scripts + +### 1. `test_geo_app_flow.py` +Monitors the **logical flow** of a reverse geolocation request. It helps you see if a coordinate is hitting a "Special Case" (placeholder), the cache, or the database. + +**Usage:** +```bash +python test_geo_app_flow.py +``` +**Example Output:** +- `[STEP 1] Checking Special Coordinates...` +- `[STEP 2] Checking Cache...` +- `[STEP 3] Executing Application Logic...` + +### 2. `test_geo_raw_sql.py` +Dives into the **PostgreSQL database** to show exactly which cities are being considered as candidates and how they are scored. + +**Usage:** +```bash +python test_geo_raw_sql.py +``` +**What to look for:** +- **Distance**: How far the city is from the coordinates. +- **Population**: Used to weight the results. +- **Score**: The final value used to determine the winner (Higher is better). + +### 3. `test_geo_search_integer.py` +A search tool to lookup records by their **integer part**. Useful for checking if data exists in a specific region without knowing exact decimals. + +**Usage:** +```bash +# Search by Latitude integer +python test_geo_search_integer.py --lat 25 + +# Search by Longitude integer +python test_geo_search_integer.py --lon 59 + +# Combined search +python test_geo_search_integer.py --lat 25 --lon 55 +``` + +--- + +## šŸ›  Troubleshooting +If you receive a `ModuleNotFoundError`: +Ensure your virtual environment is active before running the scripts: +```powershell +.venv\Scripts\activate +``` diff --git a/migrate_geo_data.py b/migrate_geo_data.py new file mode 100644 index 0000000..f9c8cfe --- /dev/null +++ b/migrate_geo_data.py @@ -0,0 +1,98 @@ +import os +import sqlite3 +import django +import time +import sys +from pathlib import Path + +# Setup Django environment +os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'config.settings.develop') +django.setup() + +def print_flush(msg): + print(msg) + sys.stdout.flush() + +from apps.geolocation_package.models.geoNames import GeoNamesCity +from django.db import transaction, connection + +# Constants +SQLITE_DB_PATH = Path("../geolocation_package/data/geonames_city.sqlite") +BATCH_SIZE = 25000 + +def migrate_data(): + if not SQLITE_DB_PATH.exists(): + print(f"Error: SQLite file not found at {SQLITE_DB_PATH}") + return + + print_flush(f"\n{'='*60}") + print_flush(f"GEONAMES DATA MIGRATION: SQLITE -> POSTGRESQL") + print_flush(f"{'='*60}") + + # 1. Connect to SQLite + print_flush(f"Connecting to SQLite: {SQLITE_DB_PATH}...") + sqlite_conn = sqlite3.connect(SQLITE_DB_PATH) + sqlite_curr = sqlite_conn.cursor() + + # 2. Get total count + sqlite_curr.execute("SELECT COUNT(*) FROM geonames_city") + total_records = sqlite_curr.fetchone()[0] + print_flush(f"Found {total_records:,} records in SQLite.") + + # 3. Clear PostgreSQL table + print_flush("\nEmptying existing PostgreSQL table (geonames_city)...") + with connection.cursor() as cursor: + cursor.execute("TRUNCATE TABLE geonames_city RESTART IDENTITY CASCADE;") + print_flush(" [OK] Table truncated.") + + # 4. Migrate data in batches + print_flush(f"\nStarting migration in batches of {BATCH_SIZE:,}...") + start_time = time.time() + + sqlite_curr.execute("SELECT id, name, country_code, latitude, longitude, feature_class, population FROM geonames_city") + + processed = 0 + while True: + rows = sqlite_curr.fetchmany(BATCH_SIZE) + if not rows: + break + + objs = [ + GeoNamesCity( + id=row[0], + name=row[1], + country_code=row[2], + latitude=row[3], + longitude=row[4], + feature_class=row[5], + population=row[6] + ) + for row in rows + ] + + try: + with transaction.atomic(): + GeoNamesCity.objects.bulk_create(objs) + + processed += len(objs) + elapsed = time.time() - start_time + avg_speed = processed / elapsed if elapsed > 0 else 0 + remaining = (total_records - processed) / avg_speed if avg_speed > 0 else 0 + + print_flush(f" [{processed:,}/{total_records:,}] ({processed/total_records*100:.1f}%) - Speed: {avg_speed:.0f} rows/s - ETA: {remaining/60:.1f} min") + + except Exception as e: + print_flush(f" [Error] In batch starting at {processed}: {str(e)}") + break + + total_time = time.time() - start_time + print_flush(f"\n{'='*60}") + print_flush(f"MIGRATION COMPLETED!") + print_flush(f"Total time: {total_time/60:.2f} minutes") + print_flush(f"Final Count in PostgreSQL: {GeoNamesCity.objects.count():,}") + print_flush(f"{'='*60}\n") + + sqlite_conn.close() + +if __name__ == "__main__": + migrate_data() diff --git a/test_geo_app_flow.py b/test_geo_app_flow.py new file mode 100644 index 0000000..a511e0c --- /dev/null +++ b/test_geo_app_flow.py @@ -0,0 +1,64 @@ +import os +import django +import logging +from pathlib import Path + +# Setup Django environment +os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'config.settings.develop') +django.setup() + +from django.core.cache import cache +from city_detection_ip import get_location_by_coordinates, SPECIAL_COORDINATES + +def test_flow(lat, lon): + print(f"\n{'='*60}") + print(f"DEBUGGING REVERSE GEOFLOW FOR: ({lat}, {lon})") + print(f"{'='*60}") + + # Step 1: Special Coordinates Check + print("\n[STEP 1] Checking Special Coordinates...") + is_special = False + for s_lat, s_lon in SPECIAL_COORDINATES: + if abs(lat - s_lat) < 0.001 and abs(lon - s_lon) < 0.001: + print(f" MATCH FOUND! Coordinate ({lat}, {lon}) is a static placeholder.") + print(" Logic should skip DB lookup and use IP Detection instead.") + is_special = True + break + if not is_special: + print(" āœ… Not a special coordinate. Proceeding to cache.") + + # Step 2: Cache Check + print("\n[STEP 2] Checking Cache...") + cache_key = f'geo_{round(lat, 2)}_{round(lon, 2)}' + cached_result = cache.get(cache_key) + if cached_result: + print(f" āœ… CACHE HIT! key: {cache_key}") + print(f" Result: {cached_result}") + else: + print(f" āŒ CACHE MISS. key: {cache_key}. Proceeding to Database.") + + # Step 3: Application Logic Execution + print("\n[STEP 3] Executing Application Logic (city_detection_ip.py)...") + # Note: ensure logging is at INFO level to see app's internal logs + logging.getLogger('city_detection_ip').setLevel(logging.INFO) + + result = get_location_by_coordinates(lat, lon) + + print("\n[STEP 4] Final Result:") + if result: + print(f" SUCCESS: {result.get('city')}, {result.get('countryCode')}") + else: + print(" FAILED: No location found.") + print(f"{'='*60}\n") + +if __name__ == "__main__": + import sys + if len(sys.argv) == 3: + try: + l1, l2 = float(sys.argv[1]), float(sys.argv[2]) + test_flow(l1, l2) + except ValueError: + print("Please provide valid numbers for lat and lon.") + else: + # Default test (Karbaba coordinates) + test_flow(32.616, 44.034) diff --git a/test_geo_raw_sql.py b/test_geo_raw_sql.py new file mode 100644 index 0000000..d4c3b1e --- /dev/null +++ b/test_geo_raw_sql.py @@ -0,0 +1,83 @@ +import os +import django +import math +from django.db import connection + +# Setup Django environment +os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'config.settings.develop') +django.setup() + +def test_sql_logic(lat, lon): + print(f"\n{'='*80}") + print(f"POSTGRESQL RAW DATA & SCORING FOR: ({lat}, {lon})") + print(f"{'='*80}") + + # Bounding Box Range used in city_detection_ip.py + lat_range = 3.0 + lon_range = 3.0 + lat_min, lat_max = lat - lat_range, lat + lat_range + lon_min, lon_max = lon - lon_range, lon + lon_range + + print(f"Range: Lat [{lat_min:.2f} to {lat_max:.2f}], Lon [{lon_min:.2f} to {lon_max:.2f}]") + + with connection.cursor() as cursor: + query = """ + WITH bounded_cities AS ( + SELECT name, country_code, latitude, longitude, population + FROM geonames_city + WHERE feature_class = 'P' + AND latitude BETWEEN %s AND %s + AND longitude BETWEEN %s AND %s + AND population IS NOT NULL + AND population > 0 + ), + distance_calc AS ( + SELECT name, country_code, population, latitude, longitude, + (6371 * acos(least(1, greatest(-1, + cos(radians(%s)) * cos(radians(latitude)) * + cos(radians(longitude) - radians(%s)) + + sin(radians(%s)) * sin(radians(latitude)) + )))) AS distance + FROM bounded_cities + ) + SELECT name, country_code, population, distance, + CASE + WHEN distance <= 30 THEN population / (distance + 1) + ELSE population / POWER(distance, 2) + END AS calculated_score + FROM distance_calc + WHERE distance <= 150 + ORDER BY calculated_score DESC + LIMIT 15; + """ + + cursor.execute(query, [lat_min, lat_max, lon_min, lon_max, lat, lon, lat]) + results = cursor.fetchall() + + if not results: + print("\nāŒ No results found within range.") + return + + print(f"\n{'Name':<25} | {'CC':<2} | {'Population':<12} | {'Distance':<8} | {'Score':<15}") + print("-" * 80) + + for name, cc, pop, dist, score in results: + dist_str = f"{dist:.2f}km" + pop_str = f"{pop:,}" + score_str = f"{score:.2f}" + print(f"{name:<25} | {cc:<2} | {pop_str:<12} | {dist_str:<8} | {score_str:<15}") + + print(f"\nWINNER: {results[0][0]} ({results[0][1]})") + print(f"{'='*80}\n") + +if __name__ == "__main__": + import sys + if len(sys.argv) == 3: + try: + l1, l2 = float(sys.argv[1]), float(sys.argv[2]) + test_sql_logic(l1, l2) + except ValueError: + print("Please provide valid numbers.") + else: + # Tehran coordinates as example + test_sql_logic(35.689, 51.389) diff --git a/test_geo_search_integer.py b/test_geo_search_integer.py new file mode 100644 index 0000000..a069db8 --- /dev/null +++ b/test_geo_search_integer.py @@ -0,0 +1,65 @@ +import os +import django +import argparse +from django.db import connection + +# Setup Django environment +os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'config.settings.develop') +django.setup() + +def search_cities(target_lat=None, target_lon=None): + print(f"\n{'='*80}") + print(f"SEARCHING CITIES BY INTEGER COORDINATE PART") + if target_lat is not None: print(f"Target Latitude Integer: {int(target_lat)}") + if target_lon is not None: print(f"Target Longitude Integer: {int(target_lon)}") + print(f"{'='*80}") + + where_clauses = [] + params = [] + + if target_lat is not None: + # TRUNC removes decimals. TRUNC(25.123) = 25. TRUNC(-25.123) = -25. + where_clauses.append("TRUNC(latitude) = %s") + params.append(int(target_lat)) + + if target_lon is not None: + where_clauses.append("TRUNC(longitude) = %s") + params.append(int(target_lon)) + + if not where_clauses: + print("Please provide at least --lat or --lon") + return + + query = f""" + SELECT name, country_code, latitude, longitude, population + FROM geonames_city + WHERE {" AND ".join(where_clauses)} + ORDER BY population DESC NULLS LAST + LIMIT 50; + """ + + with connection.cursor() as cursor: + cursor.execute(query, params) + results = cursor.fetchall() + + if not results: + print("\nāŒ No records found with those integer coordinates.") + return + + print(f"\n{'Name':<30} | {'CC':<2} | {'Latitude':<12} | {'Longitude':<12} | {'Population':<10}") + print("-" * 80) + + for name, cc, lat, lon, pop in results: + pop_str = f"{pop:,}" if pop else "N/A" + print(f"{name:<30} | {cc:<2} | {lat:<12.6f} | {lon:<12.6f} | {pop_str:<10}") + + print(f"\nFound {len(results)} records (limited to top 50 by population).") + print(f"{'='*80}\n") + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description='Search cities by integer part of coordinates.') + parser.add_argument('--lat', type=float, help='Integer part of latitude to search for') + parser.add_argument('--lon', type=float, help='Integer part of longitude to search for') + + args = parser.parse_args() + search_cities(args.lat, args.lon)