5 changed files with 376 additions and 0 deletions
-
66GEOLOCATION_TESTING.md
-
98migrate_geo_data.py
-
64test_geo_app_flow.py
-
83test_geo_raw_sql.py
-
65test_geo_search_integer.py
@ -0,0 +1,66 @@ |
|||||
|
# Geolocation Diagnostic & Testing Tools |
||||
|
|
||||
|
This guide explains how to use the custom scripts created to monitor, debug, and manage the geonames city database. |
||||
|
|
||||
|
## 🚀 Migration Tool |
||||
|
|
||||
|
### `migrate_geo_data.py` |
||||
|
Used to migrate 5.1 million city records from the reference SQLite file to your primary PostgreSQL database. |
||||
|
|
||||
|
**Usage:** |
||||
|
```bash |
||||
|
python migrate_geo_data.py |
||||
|
``` |
||||
|
*Note: This script uses high-speed `TRUNCATE` and batch processing (25k rows/batch) to ensure performance.* |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
## 🔍 Diagnostic Scripts |
||||
|
|
||||
|
### 1. `test_geo_app_flow.py` |
||||
|
Monitors the **logical flow** of a reverse geolocation request. It helps you see if a coordinate is hitting a "Special Case" (placeholder), the cache, or the database. |
||||
|
|
||||
|
**Usage:** |
||||
|
```bash |
||||
|
python test_geo_app_flow.py <latitude> <longitude> |
||||
|
``` |
||||
|
**Example Output:** |
||||
|
- `[STEP 1] Checking Special Coordinates...` |
||||
|
- `[STEP 2] Checking Cache...` |
||||
|
- `[STEP 3] Executing Application Logic...` |
||||
|
|
||||
|
### 2. `test_geo_raw_sql.py` |
||||
|
Dives into the **PostgreSQL database** to show exactly which cities are being considered as candidates and how they are scored. |
||||
|
|
||||
|
**Usage:** |
||||
|
```bash |
||||
|
python test_geo_raw_sql.py <latitude> <longitude> |
||||
|
``` |
||||
|
**What to look for:** |
||||
|
- **Distance**: How far the city is from the coordinates. |
||||
|
- **Population**: Used to weight the results. |
||||
|
- **Score**: The final value used to determine the winner (Higher is better). |
||||
|
|
||||
|
### 3. `test_geo_search_integer.py` |
||||
|
A search tool to lookup records by their **integer part**. Useful for checking if data exists in a specific region without knowing exact decimals. |
||||
|
|
||||
|
**Usage:** |
||||
|
```bash |
||||
|
# Search by Latitude integer |
||||
|
python test_geo_search_integer.py --lat 25 |
||||
|
|
||||
|
# Search by Longitude integer |
||||
|
python test_geo_search_integer.py --lon 59 |
||||
|
|
||||
|
# Combined search |
||||
|
python test_geo_search_integer.py --lat 25 --lon 55 |
||||
|
``` |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
## 🛠 Troubleshooting |
||||
|
If you receive a `ModuleNotFoundError`: |
||||
|
Ensure your virtual environment is active before running the scripts: |
||||
|
```powershell |
||||
|
.venv\Scripts\activate |
||||
|
``` |
||||
@ -0,0 +1,98 @@ |
|||||
|
import os |
||||
|
import sqlite3 |
||||
|
import django |
||||
|
import time |
||||
|
import sys |
||||
|
from pathlib import Path |
||||
|
|
||||
|
# Setup Django environment |
||||
|
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'config.settings.develop') |
||||
|
django.setup() |
||||
|
|
||||
|
def print_flush(msg): |
||||
|
print(msg) |
||||
|
sys.stdout.flush() |
||||
|
|
||||
|
from apps.geolocation_package.models.geoNames import GeoNamesCity |
||||
|
from django.db import transaction, connection |
||||
|
|
||||
|
# Constants |
||||
|
SQLITE_DB_PATH = Path("../geolocation_package/data/geonames_city.sqlite") |
||||
|
BATCH_SIZE = 25000 |
||||
|
|
||||
|
def migrate_data(): |
||||
|
if not SQLITE_DB_PATH.exists(): |
||||
|
print(f"Error: SQLite file not found at {SQLITE_DB_PATH}") |
||||
|
return |
||||
|
|
||||
|
print_flush(f"\n{'='*60}") |
||||
|
print_flush(f"GEONAMES DATA MIGRATION: SQLITE -> POSTGRESQL") |
||||
|
print_flush(f"{'='*60}") |
||||
|
|
||||
|
# 1. Connect to SQLite |
||||
|
print_flush(f"Connecting to SQLite: {SQLITE_DB_PATH}...") |
||||
|
sqlite_conn = sqlite3.connect(SQLITE_DB_PATH) |
||||
|
sqlite_curr = sqlite_conn.cursor() |
||||
|
|
||||
|
# 2. Get total count |
||||
|
sqlite_curr.execute("SELECT COUNT(*) FROM geonames_city") |
||||
|
total_records = sqlite_curr.fetchone()[0] |
||||
|
print_flush(f"Found {total_records:,} records in SQLite.") |
||||
|
|
||||
|
# 3. Clear PostgreSQL table |
||||
|
print_flush("\nEmptying existing PostgreSQL table (geonames_city)...") |
||||
|
with connection.cursor() as cursor: |
||||
|
cursor.execute("TRUNCATE TABLE geonames_city RESTART IDENTITY CASCADE;") |
||||
|
print_flush(" [OK] Table truncated.") |
||||
|
|
||||
|
# 4. Migrate data in batches |
||||
|
print_flush(f"\nStarting migration in batches of {BATCH_SIZE:,}...") |
||||
|
start_time = time.time() |
||||
|
|
||||
|
sqlite_curr.execute("SELECT id, name, country_code, latitude, longitude, feature_class, population FROM geonames_city") |
||||
|
|
||||
|
processed = 0 |
||||
|
while True: |
||||
|
rows = sqlite_curr.fetchmany(BATCH_SIZE) |
||||
|
if not rows: |
||||
|
break |
||||
|
|
||||
|
objs = [ |
||||
|
GeoNamesCity( |
||||
|
id=row[0], |
||||
|
name=row[1], |
||||
|
country_code=row[2], |
||||
|
latitude=row[3], |
||||
|
longitude=row[4], |
||||
|
feature_class=row[5], |
||||
|
population=row[6] |
||||
|
) |
||||
|
for row in rows |
||||
|
] |
||||
|
|
||||
|
try: |
||||
|
with transaction.atomic(): |
||||
|
GeoNamesCity.objects.bulk_create(objs) |
||||
|
|
||||
|
processed += len(objs) |
||||
|
elapsed = time.time() - start_time |
||||
|
avg_speed = processed / elapsed if elapsed > 0 else 0 |
||||
|
remaining = (total_records - processed) / avg_speed if avg_speed > 0 else 0 |
||||
|
|
||||
|
print_flush(f" [{processed:,}/{total_records:,}] ({processed/total_records*100:.1f}%) - Speed: {avg_speed:.0f} rows/s - ETA: {remaining/60:.1f} min") |
||||
|
|
||||
|
except Exception as e: |
||||
|
print_flush(f" [Error] In batch starting at {processed}: {str(e)}") |
||||
|
break |
||||
|
|
||||
|
total_time = time.time() - start_time |
||||
|
print_flush(f"\n{'='*60}") |
||||
|
print_flush(f"MIGRATION COMPLETED!") |
||||
|
print_flush(f"Total time: {total_time/60:.2f} minutes") |
||||
|
print_flush(f"Final Count in PostgreSQL: {GeoNamesCity.objects.count():,}") |
||||
|
print_flush(f"{'='*60}\n") |
||||
|
|
||||
|
sqlite_conn.close() |
||||
|
|
||||
|
if __name__ == "__main__": |
||||
|
migrate_data() |
||||
@ -0,0 +1,64 @@ |
|||||
|
import os |
||||
|
import django |
||||
|
import logging |
||||
|
from pathlib import Path |
||||
|
|
||||
|
# Setup Django environment |
||||
|
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'config.settings.develop') |
||||
|
django.setup() |
||||
|
|
||||
|
from django.core.cache import cache |
||||
|
from city_detection_ip import get_location_by_coordinates, SPECIAL_COORDINATES |
||||
|
|
||||
|
def test_flow(lat, lon): |
||||
|
print(f"\n{'='*60}") |
||||
|
print(f"DEBUGGING REVERSE GEOFLOW FOR: ({lat}, {lon})") |
||||
|
print(f"{'='*60}") |
||||
|
|
||||
|
# Step 1: Special Coordinates Check |
||||
|
print("\n[STEP 1] Checking Special Coordinates...") |
||||
|
is_special = False |
||||
|
for s_lat, s_lon in SPECIAL_COORDINATES: |
||||
|
if abs(lat - s_lat) < 0.001 and abs(lon - s_lon) < 0.001: |
||||
|
print(f" MATCH FOUND! Coordinate ({lat}, {lon}) is a static placeholder.") |
||||
|
print(" Logic should skip DB lookup and use IP Detection instead.") |
||||
|
is_special = True |
||||
|
break |
||||
|
if not is_special: |
||||
|
print(" ✅ Not a special coordinate. Proceeding to cache.") |
||||
|
|
||||
|
# Step 2: Cache Check |
||||
|
print("\n[STEP 2] Checking Cache...") |
||||
|
cache_key = f'geo_{round(lat, 2)}_{round(lon, 2)}' |
||||
|
cached_result = cache.get(cache_key) |
||||
|
if cached_result: |
||||
|
print(f" ✅ CACHE HIT! key: {cache_key}") |
||||
|
print(f" Result: {cached_result}") |
||||
|
else: |
||||
|
print(f" ❌ CACHE MISS. key: {cache_key}. Proceeding to Database.") |
||||
|
|
||||
|
# Step 3: Application Logic Execution |
||||
|
print("\n[STEP 3] Executing Application Logic (city_detection_ip.py)...") |
||||
|
# Note: ensure logging is at INFO level to see app's internal logs |
||||
|
logging.getLogger('city_detection_ip').setLevel(logging.INFO) |
||||
|
|
||||
|
result = get_location_by_coordinates(lat, lon) |
||||
|
|
||||
|
print("\n[STEP 4] Final Result:") |
||||
|
if result: |
||||
|
print(f" SUCCESS: {result.get('city')}, {result.get('countryCode')}") |
||||
|
else: |
||||
|
print(" FAILED: No location found.") |
||||
|
print(f"{'='*60}\n") |
||||
|
|
||||
|
if __name__ == "__main__": |
||||
|
import sys |
||||
|
if len(sys.argv) == 3: |
||||
|
try: |
||||
|
l1, l2 = float(sys.argv[1]), float(sys.argv[2]) |
||||
|
test_flow(l1, l2) |
||||
|
except ValueError: |
||||
|
print("Please provide valid numbers for lat and lon.") |
||||
|
else: |
||||
|
# Default test (Karbaba coordinates) |
||||
|
test_flow(32.616, 44.034) |
||||
@ -0,0 +1,83 @@ |
|||||
|
import os |
||||
|
import django |
||||
|
import math |
||||
|
from django.db import connection |
||||
|
|
||||
|
# Setup Django environment |
||||
|
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'config.settings.develop') |
||||
|
django.setup() |
||||
|
|
||||
|
def test_sql_logic(lat, lon): |
||||
|
print(f"\n{'='*80}") |
||||
|
print(f"POSTGRESQL RAW DATA & SCORING FOR: ({lat}, {lon})") |
||||
|
print(f"{'='*80}") |
||||
|
|
||||
|
# Bounding Box Range used in city_detection_ip.py |
||||
|
lat_range = 3.0 |
||||
|
lon_range = 3.0 |
||||
|
lat_min, lat_max = lat - lat_range, lat + lat_range |
||||
|
lon_min, lon_max = lon - lon_range, lon + lon_range |
||||
|
|
||||
|
print(f"Range: Lat [{lat_min:.2f} to {lat_max:.2f}], Lon [{lon_min:.2f} to {lon_max:.2f}]") |
||||
|
|
||||
|
with connection.cursor() as cursor: |
||||
|
query = """ |
||||
|
WITH bounded_cities AS ( |
||||
|
SELECT name, country_code, latitude, longitude, population |
||||
|
FROM geonames_city |
||||
|
WHERE feature_class = 'P' |
||||
|
AND latitude BETWEEN %s AND %s |
||||
|
AND longitude BETWEEN %s AND %s |
||||
|
AND population IS NOT NULL |
||||
|
AND population > 0 |
||||
|
), |
||||
|
distance_calc AS ( |
||||
|
SELECT name, country_code, population, latitude, longitude, |
||||
|
(6371 * acos(least(1, greatest(-1, |
||||
|
cos(radians(%s)) * cos(radians(latitude)) * |
||||
|
cos(radians(longitude) - radians(%s)) + |
||||
|
sin(radians(%s)) * sin(radians(latitude)) |
||||
|
)))) AS distance |
||||
|
FROM bounded_cities |
||||
|
) |
||||
|
SELECT name, country_code, population, distance, |
||||
|
CASE |
||||
|
WHEN distance <= 30 THEN population / (distance + 1) |
||||
|
ELSE population / POWER(distance, 2) |
||||
|
END AS calculated_score |
||||
|
FROM distance_calc |
||||
|
WHERE distance <= 150 |
||||
|
ORDER BY calculated_score DESC |
||||
|
LIMIT 15; |
||||
|
""" |
||||
|
|
||||
|
cursor.execute(query, [lat_min, lat_max, lon_min, lon_max, lat, lon, lat]) |
||||
|
results = cursor.fetchall() |
||||
|
|
||||
|
if not results: |
||||
|
print("\n❌ No results found within range.") |
||||
|
return |
||||
|
|
||||
|
print(f"\n{'Name':<25} | {'CC':<2} | {'Population':<12} | {'Distance':<8} | {'Score':<15}") |
||||
|
print("-" * 80) |
||||
|
|
||||
|
for name, cc, pop, dist, score in results: |
||||
|
dist_str = f"{dist:.2f}km" |
||||
|
pop_str = f"{pop:,}" |
||||
|
score_str = f"{score:.2f}" |
||||
|
print(f"{name:<25} | {cc:<2} | {pop_str:<12} | {dist_str:<8} | {score_str:<15}") |
||||
|
|
||||
|
print(f"\nWINNER: {results[0][0]} ({results[0][1]})") |
||||
|
print(f"{'='*80}\n") |
||||
|
|
||||
|
if __name__ == "__main__": |
||||
|
import sys |
||||
|
if len(sys.argv) == 3: |
||||
|
try: |
||||
|
l1, l2 = float(sys.argv[1]), float(sys.argv[2]) |
||||
|
test_sql_logic(l1, l2) |
||||
|
except ValueError: |
||||
|
print("Please provide valid numbers.") |
||||
|
else: |
||||
|
# Tehran coordinates as example |
||||
|
test_sql_logic(35.689, 51.389) |
||||
@ -0,0 +1,65 @@ |
|||||
|
import os |
||||
|
import django |
||||
|
import argparse |
||||
|
from django.db import connection |
||||
|
|
||||
|
# Setup Django environment |
||||
|
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'config.settings.develop') |
||||
|
django.setup() |
||||
|
|
||||
|
def search_cities(target_lat=None, target_lon=None): |
||||
|
print(f"\n{'='*80}") |
||||
|
print(f"SEARCHING CITIES BY INTEGER COORDINATE PART") |
||||
|
if target_lat is not None: print(f"Target Latitude Integer: {int(target_lat)}") |
||||
|
if target_lon is not None: print(f"Target Longitude Integer: {int(target_lon)}") |
||||
|
print(f"{'='*80}") |
||||
|
|
||||
|
where_clauses = [] |
||||
|
params = [] |
||||
|
|
||||
|
if target_lat is not None: |
||||
|
# TRUNC removes decimals. TRUNC(25.123) = 25. TRUNC(-25.123) = -25. |
||||
|
where_clauses.append("TRUNC(latitude) = %s") |
||||
|
params.append(int(target_lat)) |
||||
|
|
||||
|
if target_lon is not None: |
||||
|
where_clauses.append("TRUNC(longitude) = %s") |
||||
|
params.append(int(target_lon)) |
||||
|
|
||||
|
if not where_clauses: |
||||
|
print("Please provide at least --lat or --lon") |
||||
|
return |
||||
|
|
||||
|
query = f""" |
||||
|
SELECT name, country_code, latitude, longitude, population |
||||
|
FROM geonames_city |
||||
|
WHERE {" AND ".join(where_clauses)} |
||||
|
ORDER BY population DESC NULLS LAST |
||||
|
LIMIT 50; |
||||
|
""" |
||||
|
|
||||
|
with connection.cursor() as cursor: |
||||
|
cursor.execute(query, params) |
||||
|
results = cursor.fetchall() |
||||
|
|
||||
|
if not results: |
||||
|
print("\n❌ No records found with those integer coordinates.") |
||||
|
return |
||||
|
|
||||
|
print(f"\n{'Name':<30} | {'CC':<2} | {'Latitude':<12} | {'Longitude':<12} | {'Population':<10}") |
||||
|
print("-" * 80) |
||||
|
|
||||
|
for name, cc, lat, lon, pop in results: |
||||
|
pop_str = f"{pop:,}" if pop else "N/A" |
||||
|
print(f"{name:<30} | {cc:<2} | {lat:<12.6f} | {lon:<12.6f} | {pop_str:<10}") |
||||
|
|
||||
|
print(f"\nFound {len(results)} records (limited to top 50 by population).") |
||||
|
print(f"{'='*80}\n") |
||||
|
|
||||
|
if __name__ == "__main__": |
||||
|
parser = argparse.ArgumentParser(description='Search cities by integer part of coordinates.') |
||||
|
parser.add_argument('--lat', type=float, help='Integer part of latitude to search for') |
||||
|
parser.add_argument('--lon', type=float, help='Integer part of longitude to search for') |
||||
|
|
||||
|
args = parser.parse_args() |
||||
|
search_cities(args.lat, args.lon) |
||||
Write
Preview
Loading…
Cancel
Save
Reference in new issue