5 changed files with 376 additions and 0 deletions
-
66GEOLOCATION_TESTING.md
-
98migrate_geo_data.py
-
64test_geo_app_flow.py
-
83test_geo_raw_sql.py
-
65test_geo_search_integer.py
@ -0,0 +1,66 @@ |
|||
# Geolocation Diagnostic & Testing Tools |
|||
|
|||
This guide explains how to use the custom scripts created to monitor, debug, and manage the geonames city database. |
|||
|
|||
## 🚀 Migration Tool |
|||
|
|||
### `migrate_geo_data.py` |
|||
Used to migrate 5.1 million city records from the reference SQLite file to your primary PostgreSQL database. |
|||
|
|||
**Usage:** |
|||
```bash |
|||
python migrate_geo_data.py |
|||
``` |
|||
*Note: This script uses high-speed `TRUNCATE` and batch processing (25k rows/batch) to ensure performance.* |
|||
|
|||
--- |
|||
|
|||
## 🔍 Diagnostic Scripts |
|||
|
|||
### 1. `test_geo_app_flow.py` |
|||
Monitors the **logical flow** of a reverse geolocation request. It helps you see if a coordinate is hitting a "Special Case" (placeholder), the cache, or the database. |
|||
|
|||
**Usage:** |
|||
```bash |
|||
python test_geo_app_flow.py <latitude> <longitude> |
|||
``` |
|||
**Example Output:** |
|||
- `[STEP 1] Checking Special Coordinates...` |
|||
- `[STEP 2] Checking Cache...` |
|||
- `[STEP 3] Executing Application Logic...` |
|||
|
|||
### 2. `test_geo_raw_sql.py` |
|||
Dives into the **PostgreSQL database** to show exactly which cities are being considered as candidates and how they are scored. |
|||
|
|||
**Usage:** |
|||
```bash |
|||
python test_geo_raw_sql.py <latitude> <longitude> |
|||
``` |
|||
**What to look for:** |
|||
- **Distance**: How far the city is from the coordinates. |
|||
- **Population**: Used to weight the results. |
|||
- **Score**: The final value used to determine the winner (Higher is better). |
|||
|
|||
### 3. `test_geo_search_integer.py` |
|||
A search tool to lookup records by their **integer part**. Useful for checking if data exists in a specific region without knowing exact decimals. |
|||
|
|||
**Usage:** |
|||
```bash |
|||
# Search by Latitude integer |
|||
python test_geo_search_integer.py --lat 25 |
|||
|
|||
# Search by Longitude integer |
|||
python test_geo_search_integer.py --lon 59 |
|||
|
|||
# Combined search |
|||
python test_geo_search_integer.py --lat 25 --lon 55 |
|||
``` |
|||
|
|||
--- |
|||
|
|||
## 🛠 Troubleshooting |
|||
If you receive a `ModuleNotFoundError`: |
|||
Ensure your virtual environment is active before running the scripts: |
|||
```powershell |
|||
.venv\Scripts\activate |
|||
``` |
|||
@ -0,0 +1,98 @@ |
|||
import os |
|||
import sqlite3 |
|||
import django |
|||
import time |
|||
import sys |
|||
from pathlib import Path |
|||
|
|||
# Setup Django environment |
|||
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'config.settings.develop') |
|||
django.setup() |
|||
|
|||
def print_flush(msg): |
|||
print(msg) |
|||
sys.stdout.flush() |
|||
|
|||
from apps.geolocation_package.models.geoNames import GeoNamesCity |
|||
from django.db import transaction, connection |
|||
|
|||
# Constants |
|||
SQLITE_DB_PATH = Path("../geolocation_package/data/geonames_city.sqlite") |
|||
BATCH_SIZE = 25000 |
|||
|
|||
def migrate_data(): |
|||
if not SQLITE_DB_PATH.exists(): |
|||
print(f"Error: SQLite file not found at {SQLITE_DB_PATH}") |
|||
return |
|||
|
|||
print_flush(f"\n{'='*60}") |
|||
print_flush(f"GEONAMES DATA MIGRATION: SQLITE -> POSTGRESQL") |
|||
print_flush(f"{'='*60}") |
|||
|
|||
# 1. Connect to SQLite |
|||
print_flush(f"Connecting to SQLite: {SQLITE_DB_PATH}...") |
|||
sqlite_conn = sqlite3.connect(SQLITE_DB_PATH) |
|||
sqlite_curr = sqlite_conn.cursor() |
|||
|
|||
# 2. Get total count |
|||
sqlite_curr.execute("SELECT COUNT(*) FROM geonames_city") |
|||
total_records = sqlite_curr.fetchone()[0] |
|||
print_flush(f"Found {total_records:,} records in SQLite.") |
|||
|
|||
# 3. Clear PostgreSQL table |
|||
print_flush("\nEmptying existing PostgreSQL table (geonames_city)...") |
|||
with connection.cursor() as cursor: |
|||
cursor.execute("TRUNCATE TABLE geonames_city RESTART IDENTITY CASCADE;") |
|||
print_flush(" [OK] Table truncated.") |
|||
|
|||
# 4. Migrate data in batches |
|||
print_flush(f"\nStarting migration in batches of {BATCH_SIZE:,}...") |
|||
start_time = time.time() |
|||
|
|||
sqlite_curr.execute("SELECT id, name, country_code, latitude, longitude, feature_class, population FROM geonames_city") |
|||
|
|||
processed = 0 |
|||
while True: |
|||
rows = sqlite_curr.fetchmany(BATCH_SIZE) |
|||
if not rows: |
|||
break |
|||
|
|||
objs = [ |
|||
GeoNamesCity( |
|||
id=row[0], |
|||
name=row[1], |
|||
country_code=row[2], |
|||
latitude=row[3], |
|||
longitude=row[4], |
|||
feature_class=row[5], |
|||
population=row[6] |
|||
) |
|||
for row in rows |
|||
] |
|||
|
|||
try: |
|||
with transaction.atomic(): |
|||
GeoNamesCity.objects.bulk_create(objs) |
|||
|
|||
processed += len(objs) |
|||
elapsed = time.time() - start_time |
|||
avg_speed = processed / elapsed if elapsed > 0 else 0 |
|||
remaining = (total_records - processed) / avg_speed if avg_speed > 0 else 0 |
|||
|
|||
print_flush(f" [{processed:,}/{total_records:,}] ({processed/total_records*100:.1f}%) - Speed: {avg_speed:.0f} rows/s - ETA: {remaining/60:.1f} min") |
|||
|
|||
except Exception as e: |
|||
print_flush(f" [Error] In batch starting at {processed}: {str(e)}") |
|||
break |
|||
|
|||
total_time = time.time() - start_time |
|||
print_flush(f"\n{'='*60}") |
|||
print_flush(f"MIGRATION COMPLETED!") |
|||
print_flush(f"Total time: {total_time/60:.2f} minutes") |
|||
print_flush(f"Final Count in PostgreSQL: {GeoNamesCity.objects.count():,}") |
|||
print_flush(f"{'='*60}\n") |
|||
|
|||
sqlite_conn.close() |
|||
|
|||
if __name__ == "__main__": |
|||
migrate_data() |
|||
@ -0,0 +1,64 @@ |
|||
import os |
|||
import django |
|||
import logging |
|||
from pathlib import Path |
|||
|
|||
# Setup Django environment |
|||
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'config.settings.develop') |
|||
django.setup() |
|||
|
|||
from django.core.cache import cache |
|||
from city_detection_ip import get_location_by_coordinates, SPECIAL_COORDINATES |
|||
|
|||
def test_flow(lat, lon): |
|||
print(f"\n{'='*60}") |
|||
print(f"DEBUGGING REVERSE GEOFLOW FOR: ({lat}, {lon})") |
|||
print(f"{'='*60}") |
|||
|
|||
# Step 1: Special Coordinates Check |
|||
print("\n[STEP 1] Checking Special Coordinates...") |
|||
is_special = False |
|||
for s_lat, s_lon in SPECIAL_COORDINATES: |
|||
if abs(lat - s_lat) < 0.001 and abs(lon - s_lon) < 0.001: |
|||
print(f" MATCH FOUND! Coordinate ({lat}, {lon}) is a static placeholder.") |
|||
print(" Logic should skip DB lookup and use IP Detection instead.") |
|||
is_special = True |
|||
break |
|||
if not is_special: |
|||
print(" ✅ Not a special coordinate. Proceeding to cache.") |
|||
|
|||
# Step 2: Cache Check |
|||
print("\n[STEP 2] Checking Cache...") |
|||
cache_key = f'geo_{round(lat, 2)}_{round(lon, 2)}' |
|||
cached_result = cache.get(cache_key) |
|||
if cached_result: |
|||
print(f" ✅ CACHE HIT! key: {cache_key}") |
|||
print(f" Result: {cached_result}") |
|||
else: |
|||
print(f" ❌ CACHE MISS. key: {cache_key}. Proceeding to Database.") |
|||
|
|||
# Step 3: Application Logic Execution |
|||
print("\n[STEP 3] Executing Application Logic (city_detection_ip.py)...") |
|||
# Note: ensure logging is at INFO level to see app's internal logs |
|||
logging.getLogger('city_detection_ip').setLevel(logging.INFO) |
|||
|
|||
result = get_location_by_coordinates(lat, lon) |
|||
|
|||
print("\n[STEP 4] Final Result:") |
|||
if result: |
|||
print(f" SUCCESS: {result.get('city')}, {result.get('countryCode')}") |
|||
else: |
|||
print(" FAILED: No location found.") |
|||
print(f"{'='*60}\n") |
|||
|
|||
if __name__ == "__main__": |
|||
import sys |
|||
if len(sys.argv) == 3: |
|||
try: |
|||
l1, l2 = float(sys.argv[1]), float(sys.argv[2]) |
|||
test_flow(l1, l2) |
|||
except ValueError: |
|||
print("Please provide valid numbers for lat and lon.") |
|||
else: |
|||
# Default test (Karbaba coordinates) |
|||
test_flow(32.616, 44.034) |
|||
@ -0,0 +1,83 @@ |
|||
import os |
|||
import django |
|||
import math |
|||
from django.db import connection |
|||
|
|||
# Setup Django environment |
|||
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'config.settings.develop') |
|||
django.setup() |
|||
|
|||
def test_sql_logic(lat, lon): |
|||
print(f"\n{'='*80}") |
|||
print(f"POSTGRESQL RAW DATA & SCORING FOR: ({lat}, {lon})") |
|||
print(f"{'='*80}") |
|||
|
|||
# Bounding Box Range used in city_detection_ip.py |
|||
lat_range = 3.0 |
|||
lon_range = 3.0 |
|||
lat_min, lat_max = lat - lat_range, lat + lat_range |
|||
lon_min, lon_max = lon - lon_range, lon + lon_range |
|||
|
|||
print(f"Range: Lat [{lat_min:.2f} to {lat_max:.2f}], Lon [{lon_min:.2f} to {lon_max:.2f}]") |
|||
|
|||
with connection.cursor() as cursor: |
|||
query = """ |
|||
WITH bounded_cities AS ( |
|||
SELECT name, country_code, latitude, longitude, population |
|||
FROM geonames_city |
|||
WHERE feature_class = 'P' |
|||
AND latitude BETWEEN %s AND %s |
|||
AND longitude BETWEEN %s AND %s |
|||
AND population IS NOT NULL |
|||
AND population > 0 |
|||
), |
|||
distance_calc AS ( |
|||
SELECT name, country_code, population, latitude, longitude, |
|||
(6371 * acos(least(1, greatest(-1, |
|||
cos(radians(%s)) * cos(radians(latitude)) * |
|||
cos(radians(longitude) - radians(%s)) + |
|||
sin(radians(%s)) * sin(radians(latitude)) |
|||
)))) AS distance |
|||
FROM bounded_cities |
|||
) |
|||
SELECT name, country_code, population, distance, |
|||
CASE |
|||
WHEN distance <= 30 THEN population / (distance + 1) |
|||
ELSE population / POWER(distance, 2) |
|||
END AS calculated_score |
|||
FROM distance_calc |
|||
WHERE distance <= 150 |
|||
ORDER BY calculated_score DESC |
|||
LIMIT 15; |
|||
""" |
|||
|
|||
cursor.execute(query, [lat_min, lat_max, lon_min, lon_max, lat, lon, lat]) |
|||
results = cursor.fetchall() |
|||
|
|||
if not results: |
|||
print("\n❌ No results found within range.") |
|||
return |
|||
|
|||
print(f"\n{'Name':<25} | {'CC':<2} | {'Population':<12} | {'Distance':<8} | {'Score':<15}") |
|||
print("-" * 80) |
|||
|
|||
for name, cc, pop, dist, score in results: |
|||
dist_str = f"{dist:.2f}km" |
|||
pop_str = f"{pop:,}" |
|||
score_str = f"{score:.2f}" |
|||
print(f"{name:<25} | {cc:<2} | {pop_str:<12} | {dist_str:<8} | {score_str:<15}") |
|||
|
|||
print(f"\nWINNER: {results[0][0]} ({results[0][1]})") |
|||
print(f"{'='*80}\n") |
|||
|
|||
if __name__ == "__main__": |
|||
import sys |
|||
if len(sys.argv) == 3: |
|||
try: |
|||
l1, l2 = float(sys.argv[1]), float(sys.argv[2]) |
|||
test_sql_logic(l1, l2) |
|||
except ValueError: |
|||
print("Please provide valid numbers.") |
|||
else: |
|||
# Tehran coordinates as example |
|||
test_sql_logic(35.689, 51.389) |
|||
@ -0,0 +1,65 @@ |
|||
import os |
|||
import django |
|||
import argparse |
|||
from django.db import connection |
|||
|
|||
# Setup Django environment |
|||
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'config.settings.develop') |
|||
django.setup() |
|||
|
|||
def search_cities(target_lat=None, target_lon=None): |
|||
print(f"\n{'='*80}") |
|||
print(f"SEARCHING CITIES BY INTEGER COORDINATE PART") |
|||
if target_lat is not None: print(f"Target Latitude Integer: {int(target_lat)}") |
|||
if target_lon is not None: print(f"Target Longitude Integer: {int(target_lon)}") |
|||
print(f"{'='*80}") |
|||
|
|||
where_clauses = [] |
|||
params = [] |
|||
|
|||
if target_lat is not None: |
|||
# TRUNC removes decimals. TRUNC(25.123) = 25. TRUNC(-25.123) = -25. |
|||
where_clauses.append("TRUNC(latitude) = %s") |
|||
params.append(int(target_lat)) |
|||
|
|||
if target_lon is not None: |
|||
where_clauses.append("TRUNC(longitude) = %s") |
|||
params.append(int(target_lon)) |
|||
|
|||
if not where_clauses: |
|||
print("Please provide at least --lat or --lon") |
|||
return |
|||
|
|||
query = f""" |
|||
SELECT name, country_code, latitude, longitude, population |
|||
FROM geonames_city |
|||
WHERE {" AND ".join(where_clauses)} |
|||
ORDER BY population DESC NULLS LAST |
|||
LIMIT 50; |
|||
""" |
|||
|
|||
with connection.cursor() as cursor: |
|||
cursor.execute(query, params) |
|||
results = cursor.fetchall() |
|||
|
|||
if not results: |
|||
print("\n❌ No records found with those integer coordinates.") |
|||
return |
|||
|
|||
print(f"\n{'Name':<30} | {'CC':<2} | {'Latitude':<12} | {'Longitude':<12} | {'Population':<10}") |
|||
print("-" * 80) |
|||
|
|||
for name, cc, lat, lon, pop in results: |
|||
pop_str = f"{pop:,}" if pop else "N/A" |
|||
print(f"{name:<30} | {cc:<2} | {lat:<12.6f} | {lon:<12.6f} | {pop_str:<10}") |
|||
|
|||
print(f"\nFound {len(results)} records (limited to top 50 by population).") |
|||
print(f"{'='*80}\n") |
|||
|
|||
if __name__ == "__main__": |
|||
parser = argparse.ArgumentParser(description='Search cities by integer part of coordinates.') |
|||
parser.add_argument('--lat', type=float, help='Integer part of latitude to search for') |
|||
parser.add_argument('--lon', type=float, help='Integer part of longitude to search for') |
|||
|
|||
args = parser.parse_args() |
|||
search_cities(args.lat, args.lon) |
|||
Write
Preview
Loading…
Cancel
Save
Reference in new issue