Browse Source

Implement reverse geolocation API and optimize location history updates with batch processing

master
mortezaei 5 months ago
parent
commit
f35a117318
  1. 15
      apps/account/serializers/location_history.py
  2. 1
      apps/account/urls.py
  3. 160
      apps/account/views/location_history.py
  4. 291
      city_detection_ip.py

15
apps/account/serializers/location_history.py

@ -8,3 +8,18 @@ class LocationHistorySerializer(serializers.ModelSerializer):
class Meta:
model = LocationHistory
exclude = ('at_time',)
class ReverseGeolocationSerializer(serializers.Serializer):
"""Serializer for reverse geolocation request query parameters"""
lat = serializers.FloatField(
required=True,
min_value=-90.0,
max_value=90.0,
help_text="Latitude coordinate (-90 to 90)"
)
lon = serializers.FloatField(
required=True,
min_value=-180.0,
max_value=180.0,
help_text="Longitude coordinate (-180 to 180)"
)

1
apps/account/urls.py

@ -20,6 +20,7 @@ urlpatterns = [
path('location-update/', views.LocationHistoryView.as_view(), name='user-location-history'),
path('location-info/', views.RegionInfoView.as_view(), name='region-info'),
path('geolocation/coordinates/', views.ReverseGeolocationAPIView.as_view(), name='geolocation-by-coordinates'),

160
apps/account/views/location_history.py

@ -6,12 +6,15 @@ from rest_framework.mixins import CreateModelMixin
from rest_framework.permissions import IsAuthenticated
from rest_framework.generics import GenericAPIView
from rest_framework.response import Response
from rest_framework.views import APIView
from rest_framework import status
from apps.account.models import LocationHistory
from apps.account.serializers import LocationHistorySerializer
from apps.account.serializers import LocationHistorySerializer, ReverseGeolocationSerializer
import geoip2.database
import geoip2.errors
from city_detection_ip import get_location_by_coordinates, get_location_by_ip, SPECIAL_COORDINATES
from drf_yasg.utils import swagger_auto_schema
from drf_yasg import openapi
class LocationHistoryView(GenericAPIView, CreateModelMixin):
permission_classes = [IsAuthenticated]
@ -181,9 +184,24 @@ class RegionInfoView(GenericAPIView):
with geoip2.database.Reader(CITY_DB_PATH) as reader:
response = reader.city(ip)
# Extract city name with validation
city_name = None
if response.city and response.city.name:
# Check if city name is actually a subdivision (region)
# This is a known issue in GeoIP2 where subdivision names appear as city names
subdivision_names = [s.name for s in response.subdivisions] if response.subdivisions else []
if response.city.name not in subdivision_names:
# City name is valid - not a subdivision
city_name = response.city.name
else:
# City name matches a subdivision - this is a region, not a city
logger.warning(f"IP {ip}: City name '{response.city.name}' matches subdivision - treating as region")
city_name = None # Don't return region as city
location_data = {
'ip': ip,
'city': response.city.name if response.city else None,
'city': city_name,
'country': response.country.name if response.country else None,
'country_code': response.country.iso_code if response.country else None,
'latitude': response.location.latitude if response.location else None,
@ -201,4 +219,140 @@ class RegionInfoView(GenericAPIView):
return None
except Exception as e:
logger.error(f"Error getting location from IP {ip}: {str(e)}")
return None
return None
class ReverseGeolocationAPIView(APIView):
"""
API endpoint to get location information from geographic coordinates
Returns: city, country, country_code based on latitude and longitude
"""
permission_classes = []
def validate_city_name_from_coordinates(self, lat, lon, city_name):
"""
Validate that the city name is not actually a subdivision (region).
Uses keyword-based heuristic to detect subdivision names.
Args:
lat: Latitude coordinate
lon: Longitude coordinate
city_name: City name to validate
Returns:
Validated city name or None if it's a subdivision
"""
if not city_name:
return None
try:
# Simple heuristic: if city name contains common subdivision keywords
# in various languages, it might be a subdivision
subdivision_keywords = [
'Province', 'Region', 'Oblast', 'Governorate',
'District', 'County', 'State', 'Territory',
'استان', 'منطقه', 'ولایت', 'محافظه'
]
for keyword in subdivision_keywords:
if keyword.lower() in city_name.lower():
logger.warning(
f"⚠️ City name '{city_name}' at ({lat}, {lon}) "
f"contains subdivision keyword '{keyword}' - treating as region (returning None)"
)
return None
logger.debug(f"✅ City name '{city_name}' validated for ({lat}, {lon})")
return city_name
except Exception as e:
logger.error(f"❌ Error validating city name for coordinates ({lat}, {lon}): {str(e)}")
return city_name # Return as-is on error
@swagger_auto_schema(
operation_description="Get location information (city, country) based on geographic coordinates using reverse geocoding",
manual_parameters=[
openapi.Parameter(
'lat',
openapi.IN_QUERY,
description="Latitude coordinate (-90 to 90)",
type=openapi.TYPE_NUMBER,
required=True
),
openapi.Parameter(
'lon',
openapi.IN_QUERY,
description="Longitude coordinate (-180 to 180)",
type=openapi.TYPE_NUMBER,
required=True
),
],
responses={
200: openapi.Response(
description="Location information",
schema=ReverseGeolocationResponseSerializer()
),
400: openapi.Response(
description="Invalid or missing coordinates"
),
404: openapi.Response(
description="No location found for the given coordinates"
),
500: openapi.Response(
description="Internal server error"
)
},
tags=['account']
)
def get(self, request):
"""Get location info from coordinates"""
# Validate query parameters
serializer = ReverseGeolocationSerializer(data=request.query_params)
if not serializer.is_valid():
return Response(
{
'error': 'Invalid coordinates',
'details': serializer.errors
},
status=status.HTTP_400_BAD_REQUEST
)
lat = serializer.validated_data['lat']
lon = serializer.validated_data['lon']
# Log the coordinates for debugging
logger.info(f"Reverse geocoding for coordinates: ({lat}, {lon})")
# Get location data using the existing function from city_detection_ip.py
location_data = get_location_by_coordinates(lat, lon)
if not location_data or location_data.get('status') != 'success':
return Response(
{
'error': 'Could not find location data for these coordinates',
'latitude': lat,
'longitude': lon
},
status=status.HTTP_404_NOT_FOUND
)
# Validate city name to ensure it's not a subdivision (region)
city_name = location_data.get('city')
validated_city = self.validate_city_name_from_coordinates(lat, lon, city_name)
# Format response
response_data = {
'latitude': lat,
'longitude': lon,
'city': validated_city,
'country': None, # GeoNames only returns country_code
'country_code': location_data.get('countryCode'),
'accuracy_radius': None,
'time_zone': None,
'postal_code': None,
}
logger.info(f"Successfully found location for coordinates ({lat}, {lon}): {response_data.get('city')}, {response_data.get('country_code')}")
return Response(response_data, status=status.HTTP_200_OK)

291
city_detection_ip.py

@ -1,9 +1,21 @@
import os
import time
import geoip2.database
from pathlib import Path
from django.db import connection
from django.db.models import Q
from django.core.cache import cache
from django.db import transaction
import logging
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'config.settings.develop')
from django.core.wsgi import get_wsgi_application
application = get_wsgi_application()
from apps.account.models import LoginHistory
from apps.account.models.geoNames import GeoNamesCity
# Configure logging
logger = logging.getLogger(__name__)
@ -59,18 +71,19 @@ def get_location_by_coordinates_optimized(lat, lon):
lon_min = lon - lon_range
lon_max = lon + lon_range
# Simplified query - remove population filter for better accuracy
# Query with population weighting to prefer larger cities
with connection.cursor() as cursor:
# First, let's get debug information about nearby cities
cursor.execute("""
WITH bounded_cities AS (
SELECT name, country_code, latitude, longitude
SELECT name, country_code, latitude, longitude, population
FROM geonames_city
WHERE feature_class = 'P'
AND latitude BETWEEN %s AND %s
AND longitude BETWEEN %s AND %s
),
distance_calc AS (
SELECT name, country_code,
SELECT name, country_code, population,
(6371 * acos(least(1, greatest(-1,
cos(radians(%s)) * cos(radians(latitude)) *
cos(radians(longitude) - radians(%s)) +
@ -78,10 +91,55 @@ def get_location_by_coordinates_optimized(lat, lon):
)))) AS distance
FROM bounded_cities
)
SELECT name, country_code
SELECT name, country_code, population, distance
FROM distance_calc
WHERE distance <= 300
WHERE distance <= 100
ORDER BY distance
LIMIT 10
""", [lat_min, lat_max, lon_min, lon_max, lat, lon, lat])
debug_results = cursor.fetchall()
if debug_results:
logger.info(f"🔍 Top 10 nearby cities for coordinates ({lat}, {lon}):")
for name, cc, pop, dist in debug_results:
logger.info(f" 📍 {name} ({cc}): population={pop:,}, distance={dist:.2f}km")
# Now get the best city using a weighted approach
# Prefer cities with larger population within reasonable distance
cursor.execute("""
WITH bounded_cities AS (
SELECT name, country_code, latitude, longitude, population
FROM geonames_city
WHERE feature_class = 'P'
AND latitude BETWEEN %s AND %s
AND longitude BETWEEN %s AND %s
AND population IS NOT NULL
AND population > 0
),
distance_calc AS (
SELECT name, country_code, population,
(6371 * acos(least(1, greatest(-1,
cos(radians(%s)) * cos(radians(latitude)) *
cos(radians(longitude) - radians(%s)) +
sin(radians(%s)) * sin(radians(latitude))
)))) AS distance
FROM bounded_cities
),
scored_cities AS (
SELECT name, country_code, distance, population,
-- Score: prefer closer cities, but weight population heavily
-- Cities within 30km: prioritize by population
-- Cities beyond 30km: balance distance and population
CASE
WHEN distance <= 30 THEN population / (distance + 1)
ELSE population / POWER(distance, 2)
END AS score
FROM distance_calc
WHERE distance <= 100
)
SELECT name, country_code
FROM scored_cities
ORDER BY score DESC
LIMIT 1
""", [lat_min, lat_max, lon_min, lon_max, lat, lon, lat])
@ -89,6 +147,7 @@ def get_location_by_coordinates_optimized(lat, lon):
if result:
name, country_code = result
logger.info(f"✅ Selected city: {name} ({country_code}) for coordinates ({lat}, {lon})")
response = {
'status': 'success',
'city': name,
@ -99,6 +158,7 @@ def get_location_by_coordinates_optimized(lat, lon):
cache.set(cache_key, response, 86400)
return response
else:
logger.warning(f"⚠️ No city found within 100km for coordinates ({lat}, {lon})")
# Cache None for 1 hour
cache.set(cache_key, None, 3600)
return None
@ -121,7 +181,7 @@ def get_location_by_coordinates_original(lat, lon):
FROM geonames_city
WHERE feature_class = 'P'
)
SELECT name, country_code, distance
SELECT name, country_code
FROM distance_calc
WHERE distance <= 300
ORDER BY distance
@ -131,7 +191,8 @@ def get_location_by_coordinates_original(lat, lon):
result = cursor.fetchone()
if result:
name, country_code, distance = result
name, country_code = result
logger.info(f"🔄 Fallback method selected city: {name} ({country_code}) for coordinates ({lat}, {lon})")
return {
'status': 'success',
'city': name,
@ -139,7 +200,8 @@ def get_location_by_coordinates_original(lat, lon):
}
return None
except Exception:
except Exception as e:
logger.error(f"❌ Error in fallback method for coordinates ({lat}, {lon}): {str(e)}")
return None
@ -165,15 +227,224 @@ def get_location_by_ip(ip):
with geoip2.database.Reader(CITY_DB_PATH) as reader:
response = reader.city(ip)
if response and response.city and response.country:
if response and response.country:
# Validate city name - check if it's not a subdivision
city_name = None
if response.city and response.city.name:
subdivision_names = [s.name for s in response.subdivisions] if response.subdivisions else []
if response.city.name not in subdivision_names:
# City name is valid - not a subdivision
city_name = response.city.name
else:
# City name matches a subdivision - this is a region, not a city
logger.warning(f"IP {ip}: City name '{response.city.name}' matches subdivision - treating as region")
city_name = None
return {
'status': 'success',
'countryCode': response.country.iso_code,
'city': response.city.name
'city': city_name
}
return None
except Exception:
return None
def update_login_history_optimized():
"""
Optimized version with batch processing and better error handling.
Processes records in batches to reduce database load and improve performance.
"""
logger.info("Starting optimized login history update...")
# Query for login histories that need updating
special_records = (
LoginHistory.objects
.exclude(location_method="IP_DETECTION")
.exclude(lat__isnull=True)
.exclude(lon__isnull=True)
.filter(lat__in=[lat for lat, _ in SPECIAL_COORDINATES], lon__in=[lon for _, lon in SPECIAL_COORDINATES])
[:1000] # Limit batch size
)
normal_records = (
LoginHistory.objects
.exclude(location_method="IP_DETECTION")
.exclude(lat__isnull=True)
.exclude(lon__isnull=True)
.exclude(lat__in=[lat for lat, _ in SPECIAL_COORDINATES], lon__in=[lon for _, lon in SPECIAL_COORDINATES])
[:1000] # Limit batch size
)
# Process special coordinates records (with IP) in batches
special_updates = []
for login in special_records:
try:
location_data = get_location_by_ip(login.ip)
if location_data and location_data['status'] == 'success':
login.country = location_data['countryCode']
login.city = location_data['city']
login.location_method = 'IP_DETECTION'
special_updates.append(login)
# Batch update every 50 records
if len(special_updates) >= 50:
with transaction.atomic():
LoginHistory.objects.bulk_update(
special_updates,
['country', 'city', 'location_method']
)
logger.info(f"Updated {len(special_updates)} special coordinate records")
special_updates = []
except Exception as e:
logger.error(f"Error processing special record {login.id}: {e}")
continue
# Final batch update for remaining special records
if special_updates:
with transaction.atomic():
LoginHistory.objects.bulk_update(
special_updates,
['country', 'city', 'location_method']
)
logger.info(f"Updated final {len(special_updates)} special coordinate records")
# Process normal coordinates records (with GeoNames) in batches
normal_updates = []
processed_normal = 0
for login in normal_records:
try:
location_data = get_location_by_coordinates(login.lat, login.lon)
if location_data and location_data['status'] == 'success':
login.country = location_data['countryCode']
login.city = location_data['city']
login.location_method = 'COORDINATES'
normal_updates.append(login)
processed_normal += 1
# Batch update every 20 records (smaller batch for geo queries)
if len(normal_updates) >= 20:
with transaction.atomic():
LoginHistory.objects.bulk_update(
normal_updates,
['country', 'city', 'location_method']
)
logger.info(f"Updated {len(normal_updates)} normal coordinate records")
normal_updates = []
except Exception as e:
logger.error(f"Error processing normal record {login.id}: {e}")
continue
# Final batch update for remaining normal records
if normal_updates:
with transaction.atomic():
LoginHistory.objects.bulk_update(
normal_updates,
['country', 'city', 'location_method']
)
logger.info(f"Updated final {len(normal_updates)} normal coordinate records")
logger.info(f"Completed login history update. Processed {processed_normal} normal records.")
def update_login_history():
"""Backward compatibility wrapper"""
return update_login_history_optimized()
def update_location_history_records_optimized():
"""
Optimized version with batch processing and progress tracking.
Updates location history records with city and country information using GeoNames database.
Only processes records that have coordinates but no city/country information.
"""
from apps.account.models import LocationHistory
logger.info("Starting optimized location history update...")
# Find records that need updating (limit to manageable batch size)
records = LocationHistory.objects.filter(
Q(city__isnull=True) | Q(city='') | Q(country__isnull=True) | Q(country=''),
lat__isnull=False,
lon__isnull=False
)[:1000] # Process in batches of 1000
total_records = records.count()
logger.info(f"Found {total_records} location history records to update")
if total_records == 0:
logger.info("No records to update")
return
updated_count = 0
batch_updates = []
for i, record in enumerate(records, 1):
try:
# Get location data based on coordinates
location_data = get_location_by_coordinates(record.lat, record.lon)
if location_data and location_data['status'] == 'success':
record.city = location_data['city']
record.country = location_data['countryCode']
batch_updates.append(record)
updated_count += 1
# Progress logging every 50 records
if i % 50 == 0:
logger.info(f"Processed {i}/{total_records} records ({updated_count} updated)")
# Batch update every 20 records
if len(batch_updates) >= 20:
with transaction.atomic():
LocationHistory.objects.bulk_update(
batch_updates,
['city', 'country']
)
logger.info(f"Bulk updated {len(batch_updates)} location history records")
batch_updates = []
except Exception as e:
logger.error(f"Error processing location history record {record.id}: {e}")
continue
# Final batch update for remaining records
if batch_updates:
with transaction.atomic():
LocationHistory.objects.bulk_update(
batch_updates,
['city', 'country']
)
logger.info(f"Final bulk update of {len(batch_updates)} location history records")
logger.info(f"Completed location history update. Updated {updated_count}/{total_records} records.")
def update_location_history_records():
"""Backward compatibility wrapper"""
return update_location_history_records_optimized()
if __name__ == "__main__":
# Configure logging for script execution
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s',
handlers=[
logging.FileHandler('geo_optimization.log'),
logging.StreamHandler()
]
)
logger.info("Starting optimized geo location processing...")
start_time = time.time()
try:
update_login_history()
update_location_history_records()
total_time = time.time() - start_time
logger.info(f"Completed all geo location processing in {total_time:.2f} seconds")
except Exception as e:
logger.error(f"Error in main execution: {e}")
raise
Loading…
Cancel
Save