Browse Source

test files and md guide for it added for geolocation coordinates.

master
Mohsen Taba 1 month ago
parent
commit
afaaaab485
  1. 66
      GEOLOCATION_TESTING.md
  2. 98
      migrate_geo_data.py
  3. 64
      test_geo_app_flow.py
  4. 83
      test_geo_raw_sql.py
  5. 65
      test_geo_search_integer.py

66
GEOLOCATION_TESTING.md

@ -0,0 +1,66 @@
# Geolocation Diagnostic & Testing Tools
This guide explains how to use the custom scripts created to monitor, debug, and manage the geonames city database.
## 🚀 Migration Tool
### `migrate_geo_data.py`
Used to migrate 5.1 million city records from the reference SQLite file to your primary PostgreSQL database.
**Usage:**
```bash
python migrate_geo_data.py
```
*Note: This script uses high-speed `TRUNCATE` and batch processing (25k rows/batch) to ensure performance.*
---
## 🔍 Diagnostic Scripts
### 1. `test_geo_app_flow.py`
Monitors the **logical flow** of a reverse geolocation request. It helps you see if a coordinate is hitting a "Special Case" (placeholder), the cache, or the database.
**Usage:**
```bash
python test_geo_app_flow.py <latitude> <longitude>
```
**Example Output:**
- `[STEP 1] Checking Special Coordinates...`
- `[STEP 2] Checking Cache...`
- `[STEP 3] Executing Application Logic...`
### 2. `test_geo_raw_sql.py`
Dives into the **PostgreSQL database** to show exactly which cities are being considered as candidates and how they are scored.
**Usage:**
```bash
python test_geo_raw_sql.py <latitude> <longitude>
```
**What to look for:**
- **Distance**: How far the city is from the coordinates.
- **Population**: Used to weight the results.
- **Score**: The final value used to determine the winner (Higher is better).
### 3. `test_geo_search_integer.py`
A search tool to lookup records by their **integer part**. Useful for checking if data exists in a specific region without knowing exact decimals.
**Usage:**
```bash
# Search by Latitude integer
python test_geo_search_integer.py --lat 25
# Search by Longitude integer
python test_geo_search_integer.py --lon 59
# Combined search
python test_geo_search_integer.py --lat 25 --lon 55
```
---
## 🛠 Troubleshooting
If you receive a `ModuleNotFoundError`:
Ensure your virtual environment is active before running the scripts:
```powershell
.venv\Scripts\activate
```

98
migrate_geo_data.py

@ -0,0 +1,98 @@
import os
import sqlite3
import django
import time
import sys
from pathlib import Path
# Setup Django environment
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'config.settings.develop')
django.setup()
def print_flush(msg):
print(msg)
sys.stdout.flush()
from apps.geolocation_package.models.geoNames import GeoNamesCity
from django.db import transaction, connection
# Constants
SQLITE_DB_PATH = Path("../geolocation_package/data/geonames_city.sqlite")
BATCH_SIZE = 25000
def migrate_data():
if not SQLITE_DB_PATH.exists():
print(f"Error: SQLite file not found at {SQLITE_DB_PATH}")
return
print_flush(f"\n{'='*60}")
print_flush(f"GEONAMES DATA MIGRATION: SQLITE -> POSTGRESQL")
print_flush(f"{'='*60}")
# 1. Connect to SQLite
print_flush(f"Connecting to SQLite: {SQLITE_DB_PATH}...")
sqlite_conn = sqlite3.connect(SQLITE_DB_PATH)
sqlite_curr = sqlite_conn.cursor()
# 2. Get total count
sqlite_curr.execute("SELECT COUNT(*) FROM geonames_city")
total_records = sqlite_curr.fetchone()[0]
print_flush(f"Found {total_records:,} records in SQLite.")
# 3. Clear PostgreSQL table
print_flush("\nEmptying existing PostgreSQL table (geonames_city)...")
with connection.cursor() as cursor:
cursor.execute("TRUNCATE TABLE geonames_city RESTART IDENTITY CASCADE;")
print_flush(" [OK] Table truncated.")
# 4. Migrate data in batches
print_flush(f"\nStarting migration in batches of {BATCH_SIZE:,}...")
start_time = time.time()
sqlite_curr.execute("SELECT id, name, country_code, latitude, longitude, feature_class, population FROM geonames_city")
processed = 0
while True:
rows = sqlite_curr.fetchmany(BATCH_SIZE)
if not rows:
break
objs = [
GeoNamesCity(
id=row[0],
name=row[1],
country_code=row[2],
latitude=row[3],
longitude=row[4],
feature_class=row[5],
population=row[6]
)
for row in rows
]
try:
with transaction.atomic():
GeoNamesCity.objects.bulk_create(objs)
processed += len(objs)
elapsed = time.time() - start_time
avg_speed = processed / elapsed if elapsed > 0 else 0
remaining = (total_records - processed) / avg_speed if avg_speed > 0 else 0
print_flush(f" [{processed:,}/{total_records:,}] ({processed/total_records*100:.1f}%) - Speed: {avg_speed:.0f} rows/s - ETA: {remaining/60:.1f} min")
except Exception as e:
print_flush(f" [Error] In batch starting at {processed}: {str(e)}")
break
total_time = time.time() - start_time
print_flush(f"\n{'='*60}")
print_flush(f"MIGRATION COMPLETED!")
print_flush(f"Total time: {total_time/60:.2f} minutes")
print_flush(f"Final Count in PostgreSQL: {GeoNamesCity.objects.count():,}")
print_flush(f"{'='*60}\n")
sqlite_conn.close()
if __name__ == "__main__":
migrate_data()

64
test_geo_app_flow.py

@ -0,0 +1,64 @@
import os
import django
import logging
from pathlib import Path
# Setup Django environment
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'config.settings.develop')
django.setup()
from django.core.cache import cache
from city_detection_ip import get_location_by_coordinates, SPECIAL_COORDINATES
def test_flow(lat, lon):
print(f"\n{'='*60}")
print(f"DEBUGGING REVERSE GEOFLOW FOR: ({lat}, {lon})")
print(f"{'='*60}")
# Step 1: Special Coordinates Check
print("\n[STEP 1] Checking Special Coordinates...")
is_special = False
for s_lat, s_lon in SPECIAL_COORDINATES:
if abs(lat - s_lat) < 0.001 and abs(lon - s_lon) < 0.001:
print(f" MATCH FOUND! Coordinate ({lat}, {lon}) is a static placeholder.")
print(" Logic should skip DB lookup and use IP Detection instead.")
is_special = True
break
if not is_special:
print(" ✅ Not a special coordinate. Proceeding to cache.")
# Step 2: Cache Check
print("\n[STEP 2] Checking Cache...")
cache_key = f'geo_{round(lat, 2)}_{round(lon, 2)}'
cached_result = cache.get(cache_key)
if cached_result:
print(f" ✅ CACHE HIT! key: {cache_key}")
print(f" Result: {cached_result}")
else:
print(f" ❌ CACHE MISS. key: {cache_key}. Proceeding to Database.")
# Step 3: Application Logic Execution
print("\n[STEP 3] Executing Application Logic (city_detection_ip.py)...")
# Note: ensure logging is at INFO level to see app's internal logs
logging.getLogger('city_detection_ip').setLevel(logging.INFO)
result = get_location_by_coordinates(lat, lon)
print("\n[STEP 4] Final Result:")
if result:
print(f" SUCCESS: {result.get('city')}, {result.get('countryCode')}")
else:
print(" FAILED: No location found.")
print(f"{'='*60}\n")
if __name__ == "__main__":
import sys
if len(sys.argv) == 3:
try:
l1, l2 = float(sys.argv[1]), float(sys.argv[2])
test_flow(l1, l2)
except ValueError:
print("Please provide valid numbers for lat and lon.")
else:
# Default test (Karbaba coordinates)
test_flow(32.616, 44.034)

83
test_geo_raw_sql.py

@ -0,0 +1,83 @@
import os
import django
import math
from django.db import connection
# Setup Django environment
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'config.settings.develop')
django.setup()
def test_sql_logic(lat, lon):
print(f"\n{'='*80}")
print(f"POSTGRESQL RAW DATA & SCORING FOR: ({lat}, {lon})")
print(f"{'='*80}")
# Bounding Box Range used in city_detection_ip.py
lat_range = 3.0
lon_range = 3.0
lat_min, lat_max = lat - lat_range, lat + lat_range
lon_min, lon_max = lon - lon_range, lon + lon_range
print(f"Range: Lat [{lat_min:.2f} to {lat_max:.2f}], Lon [{lon_min:.2f} to {lon_max:.2f}]")
with connection.cursor() as cursor:
query = """
WITH bounded_cities AS (
SELECT name, country_code, latitude, longitude, population
FROM geonames_city
WHERE feature_class = 'P'
AND latitude BETWEEN %s AND %s
AND longitude BETWEEN %s AND %s
AND population IS NOT NULL
AND population > 0
),
distance_calc AS (
SELECT name, country_code, population, latitude, longitude,
(6371 * acos(least(1, greatest(-1,
cos(radians(%s)) * cos(radians(latitude)) *
cos(radians(longitude) - radians(%s)) +
sin(radians(%s)) * sin(radians(latitude))
)))) AS distance
FROM bounded_cities
)
SELECT name, country_code, population, distance,
CASE
WHEN distance <= 30 THEN population / (distance + 1)
ELSE population / POWER(distance, 2)
END AS calculated_score
FROM distance_calc
WHERE distance <= 150
ORDER BY calculated_score DESC
LIMIT 15;
"""
cursor.execute(query, [lat_min, lat_max, lon_min, lon_max, lat, lon, lat])
results = cursor.fetchall()
if not results:
print("\n❌ No results found within range.")
return
print(f"\n{'Name':<25} | {'CC':<2} | {'Population':<12} | {'Distance':<8} | {'Score':<15}")
print("-" * 80)
for name, cc, pop, dist, score in results:
dist_str = f"{dist:.2f}km"
pop_str = f"{pop:,}"
score_str = f"{score:.2f}"
print(f"{name:<25} | {cc:<2} | {pop_str:<12} | {dist_str:<8} | {score_str:<15}")
print(f"\nWINNER: {results[0][0]} ({results[0][1]})")
print(f"{'='*80}\n")
if __name__ == "__main__":
import sys
if len(sys.argv) == 3:
try:
l1, l2 = float(sys.argv[1]), float(sys.argv[2])
test_sql_logic(l1, l2)
except ValueError:
print("Please provide valid numbers.")
else:
# Tehran coordinates as example
test_sql_logic(35.689, 51.389)

65
test_geo_search_integer.py

@ -0,0 +1,65 @@
import os
import django
import argparse
from django.db import connection
# Setup Django environment
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'config.settings.develop')
django.setup()
def search_cities(target_lat=None, target_lon=None):
print(f"\n{'='*80}")
print(f"SEARCHING CITIES BY INTEGER COORDINATE PART")
if target_lat is not None: print(f"Target Latitude Integer: {int(target_lat)}")
if target_lon is not None: print(f"Target Longitude Integer: {int(target_lon)}")
print(f"{'='*80}")
where_clauses = []
params = []
if target_lat is not None:
# TRUNC removes decimals. TRUNC(25.123) = 25. TRUNC(-25.123) = -25.
where_clauses.append("TRUNC(latitude) = %s")
params.append(int(target_lat))
if target_lon is not None:
where_clauses.append("TRUNC(longitude) = %s")
params.append(int(target_lon))
if not where_clauses:
print("Please provide at least --lat or --lon")
return
query = f"""
SELECT name, country_code, latitude, longitude, population
FROM geonames_city
WHERE {" AND ".join(where_clauses)}
ORDER BY population DESC NULLS LAST
LIMIT 50;
"""
with connection.cursor() as cursor:
cursor.execute(query, params)
results = cursor.fetchall()
if not results:
print("\n❌ No records found with those integer coordinates.")
return
print(f"\n{'Name':<30} | {'CC':<2} | {'Latitude':<12} | {'Longitude':<12} | {'Population':<10}")
print("-" * 80)
for name, cc, lat, lon, pop in results:
pop_str = f"{pop:,}" if pop else "N/A"
print(f"{name:<30} | {cc:<2} | {lat:<12.6f} | {lon:<12.6f} | {pop_str:<10}")
print(f"\nFound {len(results)} records (limited to top 50 by population).")
print(f"{'='*80}\n")
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Search cities by integer part of coordinates.')
parser.add_argument('--lat', type=float, help='Integer part of latitude to search for')
parser.add_argument('--lon', type=float, help='Integer part of longitude to search for')
args = parser.parse_args()
search_cities(args.lat, args.lon)
Loading…
Cancel
Save