From f35a117318ba20fc7a0bfcb9493848185e98a44a Mon Sep 17 00:00:00 2001 From: mortezaei Date: Sun, 28 Dec 2025 13:49:55 +0330 Subject: [PATCH] Implement reverse geolocation API and optimize location history updates with batch processing --- apps/account/serializers/location_history.py | 15 + apps/account/urls.py | 1 + apps/account/views/location_history.py | 160 +++++++++- city_detection_ip.py | 291 ++++++++++++++++++- 4 files changed, 454 insertions(+), 13 deletions(-) diff --git a/apps/account/serializers/location_history.py b/apps/account/serializers/location_history.py index 49145dc..0aad9b2 100644 --- a/apps/account/serializers/location_history.py +++ b/apps/account/serializers/location_history.py @@ -8,3 +8,18 @@ class LocationHistorySerializer(serializers.ModelSerializer): class Meta: model = LocationHistory exclude = ('at_time',) + +class ReverseGeolocationSerializer(serializers.Serializer): + """Serializer for reverse geolocation request query parameters""" + lat = serializers.FloatField( + required=True, + min_value=-90.0, + max_value=90.0, + help_text="Latitude coordinate (-90 to 90)" + ) + lon = serializers.FloatField( + required=True, + min_value=-180.0, + max_value=180.0, + help_text="Longitude coordinate (-180 to 180)" + ) \ No newline at end of file diff --git a/apps/account/urls.py b/apps/account/urls.py index a3332af..4517911 100644 --- a/apps/account/urls.py +++ b/apps/account/urls.py @@ -20,6 +20,7 @@ urlpatterns = [ path('location-update/', views.LocationHistoryView.as_view(), name='user-location-history'), path('location-info/', views.RegionInfoView.as_view(), name='region-info'), + path('geolocation/coordinates/', views.ReverseGeolocationAPIView.as_view(), name='geolocation-by-coordinates'), diff --git a/apps/account/views/location_history.py b/apps/account/views/location_history.py index 12e606d..971cbd6 100644 --- a/apps/account/views/location_history.py +++ b/apps/account/views/location_history.py @@ -6,12 +6,15 @@ from rest_framework.mixins import CreateModelMixin from rest_framework.permissions import IsAuthenticated from rest_framework.generics import GenericAPIView from rest_framework.response import Response +from rest_framework.views import APIView from rest_framework import status from apps.account.models import LocationHistory -from apps.account.serializers import LocationHistorySerializer +from apps.account.serializers import LocationHistorySerializer, ReverseGeolocationSerializer import geoip2.database import geoip2.errors from city_detection_ip import get_location_by_coordinates, get_location_by_ip, SPECIAL_COORDINATES +from drf_yasg.utils import swagger_auto_schema +from drf_yasg import openapi class LocationHistoryView(GenericAPIView, CreateModelMixin): permission_classes = [IsAuthenticated] @@ -181,9 +184,24 @@ class RegionInfoView(GenericAPIView): with geoip2.database.Reader(CITY_DB_PATH) as reader: response = reader.city(ip) + # Extract city name with validation + city_name = None + if response.city and response.city.name: + # Check if city name is actually a subdivision (region) + # This is a known issue in GeoIP2 where subdivision names appear as city names + subdivision_names = [s.name for s in response.subdivisions] if response.subdivisions else [] + + if response.city.name not in subdivision_names: + # City name is valid - not a subdivision + city_name = response.city.name + else: + # City name matches a subdivision - this is a region, not a city + logger.warning(f"IP {ip}: City name '{response.city.name}' matches subdivision - treating as region") + city_name = None # Don't return region as city + location_data = { 'ip': ip, - 'city': response.city.name if response.city else None, + 'city': city_name, 'country': response.country.name if response.country else None, 'country_code': response.country.iso_code if response.country else None, 'latitude': response.location.latitude if response.location else None, @@ -201,4 +219,140 @@ class RegionInfoView(GenericAPIView): return None except Exception as e: logger.error(f"Error getting location from IP {ip}: {str(e)}") - return None \ No newline at end of file + return None + + +class ReverseGeolocationAPIView(APIView): + """ + API endpoint to get location information from geographic coordinates + Returns: city, country, country_code based on latitude and longitude + """ + permission_classes = [] + + def validate_city_name_from_coordinates(self, lat, lon, city_name): + """ + Validate that the city name is not actually a subdivision (region). + Uses keyword-based heuristic to detect subdivision names. + + Args: + lat: Latitude coordinate + lon: Longitude coordinate + city_name: City name to validate + + Returns: + Validated city name or None if it's a subdivision + """ + if not city_name: + return None + + try: + # Simple heuristic: if city name contains common subdivision keywords + # in various languages, it might be a subdivision + subdivision_keywords = [ + 'Province', 'Region', 'Oblast', 'Governorate', + 'District', 'County', 'State', 'Territory', + 'استان', 'منطقه', 'ولایت', 'محافظه' + ] + + for keyword in subdivision_keywords: + if keyword.lower() in city_name.lower(): + logger.warning( + f"⚠️ City name '{city_name}' at ({lat}, {lon}) " + f"contains subdivision keyword '{keyword}' - treating as region (returning None)" + ) + return None + + logger.debug(f"✅ City name '{city_name}' validated for ({lat}, {lon})") + return city_name + + except Exception as e: + logger.error(f"❌ Error validating city name for coordinates ({lat}, {lon}): {str(e)}") + return city_name # Return as-is on error + + @swagger_auto_schema( + operation_description="Get location information (city, country) based on geographic coordinates using reverse geocoding", + manual_parameters=[ + openapi.Parameter( + 'lat', + openapi.IN_QUERY, + description="Latitude coordinate (-90 to 90)", + type=openapi.TYPE_NUMBER, + required=True + ), + openapi.Parameter( + 'lon', + openapi.IN_QUERY, + description="Longitude coordinate (-180 to 180)", + type=openapi.TYPE_NUMBER, + required=True + ), + ], + responses={ + 200: openapi.Response( + description="Location information", + schema=ReverseGeolocationResponseSerializer() + ), + 400: openapi.Response( + description="Invalid or missing coordinates" + ), + 404: openapi.Response( + description="No location found for the given coordinates" + ), + 500: openapi.Response( + description="Internal server error" + ) + }, + tags=['account'] + ) + def get(self, request): + """Get location info from coordinates""" + # Validate query parameters + serializer = ReverseGeolocationSerializer(data=request.query_params) + + if not serializer.is_valid(): + return Response( + { + 'error': 'Invalid coordinates', + 'details': serializer.errors + }, + status=status.HTTP_400_BAD_REQUEST + ) + + lat = serializer.validated_data['lat'] + lon = serializer.validated_data['lon'] + + # Log the coordinates for debugging + logger.info(f"Reverse geocoding for coordinates: ({lat}, {lon})") + + # Get location data using the existing function from city_detection_ip.py + location_data = get_location_by_coordinates(lat, lon) + + if not location_data or location_data.get('status') != 'success': + return Response( + { + 'error': 'Could not find location data for these coordinates', + 'latitude': lat, + 'longitude': lon + }, + status=status.HTTP_404_NOT_FOUND + ) + + # Validate city name to ensure it's not a subdivision (region) + city_name = location_data.get('city') + validated_city = self.validate_city_name_from_coordinates(lat, lon, city_name) + + # Format response + response_data = { + 'latitude': lat, + 'longitude': lon, + 'city': validated_city, + 'country': None, # GeoNames only returns country_code + 'country_code': location_data.get('countryCode'), + 'accuracy_radius': None, + 'time_zone': None, + 'postal_code': None, + } + + logger.info(f"Successfully found location for coordinates ({lat}, {lon}): {response_data.get('city')}, {response_data.get('country_code')}") + + return Response(response_data, status=status.HTTP_200_OK) \ No newline at end of file diff --git a/city_detection_ip.py b/city_detection_ip.py index 0240e41..0161f4d 100644 --- a/city_detection_ip.py +++ b/city_detection_ip.py @@ -1,9 +1,21 @@ +import os +import time import geoip2.database from pathlib import Path from django.db import connection +from django.db.models import Q from django.core.cache import cache +from django.db import transaction import logging +os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'config.settings.develop') +from django.core.wsgi import get_wsgi_application + +application = get_wsgi_application() + +from apps.account.models import LoginHistory +from apps.account.models.geoNames import GeoNamesCity + # Configure logging logger = logging.getLogger(__name__) @@ -59,18 +71,19 @@ def get_location_by_coordinates_optimized(lat, lon): lon_min = lon - lon_range lon_max = lon + lon_range - # Simplified query - remove population filter for better accuracy + # Query with population weighting to prefer larger cities with connection.cursor() as cursor: + # First, let's get debug information about nearby cities cursor.execute(""" WITH bounded_cities AS ( - SELECT name, country_code, latitude, longitude + SELECT name, country_code, latitude, longitude, population FROM geonames_city WHERE feature_class = 'P' AND latitude BETWEEN %s AND %s AND longitude BETWEEN %s AND %s ), distance_calc AS ( - SELECT name, country_code, + SELECT name, country_code, population, (6371 * acos(least(1, greatest(-1, cos(radians(%s)) * cos(radians(latitude)) * cos(radians(longitude) - radians(%s)) + @@ -78,10 +91,55 @@ def get_location_by_coordinates_optimized(lat, lon): )))) AS distance FROM bounded_cities ) - SELECT name, country_code + SELECT name, country_code, population, distance FROM distance_calc - WHERE distance <= 300 + WHERE distance <= 100 ORDER BY distance + LIMIT 10 + """, [lat_min, lat_max, lon_min, lon_max, lat, lon, lat]) + + debug_results = cursor.fetchall() + if debug_results: + logger.info(f"🔍 Top 10 nearby cities for coordinates ({lat}, {lon}):") + for name, cc, pop, dist in debug_results: + logger.info(f" 📍 {name} ({cc}): population={pop:,}, distance={dist:.2f}km") + + # Now get the best city using a weighted approach + # Prefer cities with larger population within reasonable distance + cursor.execute(""" + WITH bounded_cities AS ( + SELECT name, country_code, latitude, longitude, population + FROM geonames_city + WHERE feature_class = 'P' + AND latitude BETWEEN %s AND %s + AND longitude BETWEEN %s AND %s + AND population IS NOT NULL + AND population > 0 + ), + distance_calc AS ( + SELECT name, country_code, population, + (6371 * acos(least(1, greatest(-1, + cos(radians(%s)) * cos(radians(latitude)) * + cos(radians(longitude) - radians(%s)) + + sin(radians(%s)) * sin(radians(latitude)) + )))) AS distance + FROM bounded_cities + ), + scored_cities AS ( + SELECT name, country_code, distance, population, + -- Score: prefer closer cities, but weight population heavily + -- Cities within 30km: prioritize by population + -- Cities beyond 30km: balance distance and population + CASE + WHEN distance <= 30 THEN population / (distance + 1) + ELSE population / POWER(distance, 2) + END AS score + FROM distance_calc + WHERE distance <= 100 + ) + SELECT name, country_code + FROM scored_cities + ORDER BY score DESC LIMIT 1 """, [lat_min, lat_max, lon_min, lon_max, lat, lon, lat]) @@ -89,6 +147,7 @@ def get_location_by_coordinates_optimized(lat, lon): if result: name, country_code = result + logger.info(f"✅ Selected city: {name} ({country_code}) for coordinates ({lat}, {lon})") response = { 'status': 'success', 'city': name, @@ -99,6 +158,7 @@ def get_location_by_coordinates_optimized(lat, lon): cache.set(cache_key, response, 86400) return response else: + logger.warning(f"⚠️ No city found within 100km for coordinates ({lat}, {lon})") # Cache None for 1 hour cache.set(cache_key, None, 3600) return None @@ -121,7 +181,7 @@ def get_location_by_coordinates_original(lat, lon): FROM geonames_city WHERE feature_class = 'P' ) - SELECT name, country_code, distance + SELECT name, country_code FROM distance_calc WHERE distance <= 300 ORDER BY distance @@ -131,7 +191,8 @@ def get_location_by_coordinates_original(lat, lon): result = cursor.fetchone() if result: - name, country_code, distance = result + name, country_code = result + logger.info(f"🔄 Fallback method selected city: {name} ({country_code}) for coordinates ({lat}, {lon})") return { 'status': 'success', 'city': name, @@ -139,7 +200,8 @@ def get_location_by_coordinates_original(lat, lon): } return None - except Exception: + except Exception as e: + logger.error(f"❌ Error in fallback method for coordinates ({lat}, {lon}): {str(e)}") return None @@ -165,15 +227,224 @@ def get_location_by_ip(ip): with geoip2.database.Reader(CITY_DB_PATH) as reader: response = reader.city(ip) - if response and response.city and response.country: + if response and response.country: + # Validate city name - check if it's not a subdivision + city_name = None + if response.city and response.city.name: + subdivision_names = [s.name for s in response.subdivisions] if response.subdivisions else [] + + if response.city.name not in subdivision_names: + # City name is valid - not a subdivision + city_name = response.city.name + else: + # City name matches a subdivision - this is a region, not a city + logger.warning(f"IP {ip}: City name '{response.city.name}' matches subdivision - treating as region") + city_name = None + return { 'status': 'success', 'countryCode': response.country.iso_code, - 'city': response.city.name + 'city': city_name } return None except Exception: return None +def update_login_history_optimized(): + """ + Optimized version with batch processing and better error handling. + Processes records in batches to reduce database load and improve performance. + """ + logger.info("Starting optimized login history update...") + + # Query for login histories that need updating + special_records = ( + LoginHistory.objects + .exclude(location_method="IP_DETECTION") + .exclude(lat__isnull=True) + .exclude(lon__isnull=True) + .filter(lat__in=[lat for lat, _ in SPECIAL_COORDINATES], lon__in=[lon for _, lon in SPECIAL_COORDINATES]) + [:1000] # Limit batch size + ) + + normal_records = ( + LoginHistory.objects + .exclude(location_method="IP_DETECTION") + .exclude(lat__isnull=True) + .exclude(lon__isnull=True) + .exclude(lat__in=[lat for lat, _ in SPECIAL_COORDINATES], lon__in=[lon for _, lon in SPECIAL_COORDINATES]) + [:1000] # Limit batch size + ) + + # Process special coordinates records (with IP) in batches + special_updates = [] + for login in special_records: + try: + location_data = get_location_by_ip(login.ip) + if location_data and location_data['status'] == 'success': + login.country = location_data['countryCode'] + login.city = location_data['city'] + login.location_method = 'IP_DETECTION' + special_updates.append(login) + + # Batch update every 50 records + if len(special_updates) >= 50: + with transaction.atomic(): + LoginHistory.objects.bulk_update( + special_updates, + ['country', 'city', 'location_method'] + ) + logger.info(f"Updated {len(special_updates)} special coordinate records") + special_updates = [] + except Exception as e: + logger.error(f"Error processing special record {login.id}: {e}") + continue + + # Final batch update for remaining special records + if special_updates: + with transaction.atomic(): + LoginHistory.objects.bulk_update( + special_updates, + ['country', 'city', 'location_method'] + ) + logger.info(f"Updated final {len(special_updates)} special coordinate records") + + # Process normal coordinates records (with GeoNames) in batches + normal_updates = [] + processed_normal = 0 + for login in normal_records: + try: + location_data = get_location_by_coordinates(login.lat, login.lon) + if location_data and location_data['status'] == 'success': + login.country = location_data['countryCode'] + login.city = location_data['city'] + login.location_method = 'COORDINATES' + normal_updates.append(login) + processed_normal += 1 + + # Batch update every 20 records (smaller batch for geo queries) + if len(normal_updates) >= 20: + with transaction.atomic(): + LoginHistory.objects.bulk_update( + normal_updates, + ['country', 'city', 'location_method'] + ) + logger.info(f"Updated {len(normal_updates)} normal coordinate records") + normal_updates = [] + except Exception as e: + logger.error(f"Error processing normal record {login.id}: {e}") + continue + + # Final batch update for remaining normal records + if normal_updates: + with transaction.atomic(): + LoginHistory.objects.bulk_update( + normal_updates, + ['country', 'city', 'location_method'] + ) + logger.info(f"Updated final {len(normal_updates)} normal coordinate records") + + logger.info(f"Completed login history update. Processed {processed_normal} normal records.") + + +def update_login_history(): + """Backward compatibility wrapper""" + return update_login_history_optimized() + +def update_location_history_records_optimized(): + """ + Optimized version with batch processing and progress tracking. + Updates location history records with city and country information using GeoNames database. + Only processes records that have coordinates but no city/country information. + """ + from apps.account.models import LocationHistory + + logger.info("Starting optimized location history update...") + + # Find records that need updating (limit to manageable batch size) + records = LocationHistory.objects.filter( + Q(city__isnull=True) | Q(city='') | Q(country__isnull=True) | Q(country=''), + lat__isnull=False, + lon__isnull=False + )[:1000] # Process in batches of 1000 + + total_records = records.count() + logger.info(f"Found {total_records} location history records to update") + + if total_records == 0: + logger.info("No records to update") + return + + updated_count = 0 + batch_updates = [] + + for i, record in enumerate(records, 1): + try: + # Get location data based on coordinates + location_data = get_location_by_coordinates(record.lat, record.lon) + + if location_data and location_data['status'] == 'success': + record.city = location_data['city'] + record.country = location_data['countryCode'] + batch_updates.append(record) + updated_count += 1 + + # Progress logging every 50 records + if i % 50 == 0: + logger.info(f"Processed {i}/{total_records} records ({updated_count} updated)") + + # Batch update every 20 records + if len(batch_updates) >= 20: + with transaction.atomic(): + LocationHistory.objects.bulk_update( + batch_updates, + ['city', 'country'] + ) + logger.info(f"Bulk updated {len(batch_updates)} location history records") + batch_updates = [] + + except Exception as e: + logger.error(f"Error processing location history record {record.id}: {e}") + continue + + # Final batch update for remaining records + if batch_updates: + with transaction.atomic(): + LocationHistory.objects.bulk_update( + batch_updates, + ['city', 'country'] + ) + logger.info(f"Final bulk update of {len(batch_updates)} location history records") + + logger.info(f"Completed location history update. Updated {updated_count}/{total_records} records.") + + +def update_location_history_records(): + """Backward compatibility wrapper""" + return update_location_history_records_optimized() + +if __name__ == "__main__": + # Configure logging for script execution + logging.basicConfig( + level=logging.INFO, + format='%(asctime)s - %(levelname)s - %(message)s', + handlers=[ + logging.FileHandler('geo_optimization.log'), + logging.StreamHandler() + ] + ) + + logger.info("Starting optimized geo location processing...") + start_time = time.time() + + try: + update_login_history() + update_location_history_records() + + total_time = time.time() - start_time + logger.info(f"Completed all geo location processing in {total_time:.2f} seconds") + except Exception as e: + logger.error(f"Error in main execution: {e}") + raise