From 3facc32de85ee632bab4d3b29e3de4d59839a86e Mon Sep 17 00:00:00 2001 From: mortezaei Date: Tue, 20 Jan 2026 19:08:14 +0330 Subject: [PATCH] Implement data integrity fixes in hadis seeding command - Introduced a new phase to the seeding process to address existing data integrity issues before creating new records. - Added functionality to fix empty and duplicate slugs for NarratorLayer, TransmitterReliability, and OpinionStatus models, ensuring unique identifiers. - Enhanced logging to provide detailed feedback on the fixes applied during the seeding process, improving overall data quality. --- .../commands/seed_complete_hadis_data.py | 344 ++++++++---------- 1 file changed, 142 insertions(+), 202 deletions(-) diff --git a/apps/hadis/management/commands/seed_complete_hadis_data.py b/apps/hadis/management/commands/seed_complete_hadis_data.py index 9f82440..483346f 100644 --- a/apps/hadis/management/commands/seed_complete_hadis_data.py +++ b/apps/hadis/management/commands/seed_complete_hadis_data.py @@ -214,6 +214,10 @@ class Command(BaseCommand): self.stdout.write(self.style.SUCCESS('Starting comprehensive Hadis data generation...')) try: + # Phase 0: Fix existing data integrity issues BEFORE starting transaction + self.stdout.write('Phase 0: Fixing existing data integrity issues...') + self.fix_existing_data() + with transaction.atomic(): # Phase 1: Foundation self.stdout.write('Phase 1: Creating foundation data...') @@ -259,6 +263,138 @@ class Command(BaseCommand): except Exception as e: self.stdout.write(self.style.ERROR(f'Error: {str(e)}')) raise + + def fix_existing_data(self): + """Fix all existing data integrity issues before starting main transaction""" + from django.utils.text import slugify + from django.db.models import Count, Q + + self.stdout.write('Fixing NarratorLayer empty slugs...') + # Fix NarratorLayer empty slugs + empty_layers = NarratorLayer.objects.filter(Q(slug__isnull=True) | Q(slug='') | Q(slug=' ')) + self.stdout.write(f'Found {empty_layers.count()} layers with empty slugs') + + for layer in empty_layers: + try: + if layer.name and isinstance(layer.name, list) and len(layer.name) > 0: + text = layer.name[0].get('text', '').strip() + if text: + new_slug = slugify(text) + else: + new_slug = f"layer-{layer.number}" + else: + new_slug = f"layer-{layer.number}" + except (IndexError, KeyError, AttributeError, TypeError): + new_slug = f"layer-{layer.number}" + + # Ensure uniqueness + counter = 1 + original_slug = new_slug + while NarratorLayer.objects.filter(slug=new_slug).exclude(pk=layer.pk).exists(): + new_slug = f"{original_slug}-{counter}" + counter += 1 + + layer.slug = new_slug + layer.save(update_fields=['slug']) + self.stdout.write(f' Fixed layer {layer.pk} (number={layer.number}): "{new_slug}"') + + self.stdout.write('Fixing TransmitterReliability duplicates...') + # Fix TransmitterReliability empty and duplicate slugs + empty_reliability = TransmitterReliability.objects.filter(Q(slug__isnull=True) | Q(slug='') | Q(slug=' ')) + for record in empty_reliability: + try: + if record.title and isinstance(record.title, list) and len(record.title) > 0: + text = record.title[0].get('text', '').strip() + if text: + new_slug = slugify(text) + else: + from datetime import datetime + new_slug = f"reliability-{datetime.now().strftime('%Y%m%d%H%M%S%f')}" + else: + from datetime import datetime + new_slug = f"reliability-{datetime.now().strftime('%Y%m%d%H%M%S%f')}" + except: + from datetime import datetime + new_slug = f"reliability-{datetime.now().strftime('%Y%m%d%H%M%S%f')}" + + counter = 1 + original_slug = new_slug + while TransmitterReliability.objects.filter(slug=new_slug).exclude(pk=record.pk).exists(): + new_slug = f"{original_slug}-{counter}" + counter += 1 + + record.slug = new_slug + record.save(update_fields=['slug']) + self.stdout.write(f' Fixed reliability {record.pk}: "{new_slug}"') + + # Fix duplicates + duplicates = TransmitterReliability.objects.values('slug').annotate(count=Count('id')).filter(count__gt=1) + for dup in duplicates: + slug_value = dup['slug'] + records = list(TransmitterReliability.objects.filter(slug=slug_value)) + for i, record in enumerate(records): + if i == 0: + continue + from datetime import datetime + new_slug = f"{slug_value}-{i}-{datetime.now().strftime('%H%M%S%f')}" + counter = 1 + original_slug = new_slug + while TransmitterReliability.objects.filter(slug=new_slug).exclude(pk=record.pk).exists(): + new_slug = f"{original_slug}-{counter}" + counter += 1 + record.slug = new_slug + record.save(update_fields=['slug']) + self.stdout.write(f' Fixed duplicate reliability: "{new_slug}"') + + self.stdout.write('Fixing OpinionStatus duplicates...') + # Fix OpinionStatus empty and duplicate slugs + empty_opinion = OpinionStatus.objects.filter(Q(slug__isnull=True) | Q(slug='') | Q(slug=' ')) + for record in empty_opinion: + try: + if record.title and isinstance(record.title, list) and len(record.title) > 0: + text = record.title[0].get('text', '').strip() + if text: + new_slug = slugify(text) + else: + from datetime import datetime + new_slug = f"opinion-{datetime.now().strftime('%Y%m%d%H%M%S%f')}" + else: + from datetime import datetime + new_slug = f"opinion-{datetime.now().strftime('%Y%m%d%H%M%S%f')}" + except: + from datetime import datetime + new_slug = f"opinion-{datetime.now().strftime('%Y%m%d%H%M%S%f')}" + + counter = 1 + original_slug = new_slug + while OpinionStatus.objects.filter(slug=new_slug).exclude(pk=record.pk).exists(): + new_slug = f"{original_slug}-{counter}" + counter += 1 + + record.slug = new_slug + record.save(update_fields=['slug']) + self.stdout.write(f' Fixed opinion status {record.pk}: "{new_slug}"') + + # Fix duplicates + duplicates = OpinionStatus.objects.values('slug').annotate(count=Count('id')).filter(count__gt=1) + for dup in duplicates: + slug_value = dup['slug'] + records = list(OpinionStatus.objects.filter(slug=slug_value)) + for i, record in enumerate(records): + if i == 0: + continue + from datetime import datetime + new_slug = f"{slug_value}-{i}-{datetime.now().strftime('%H%M%S%f')}" + counter = 1 + original_slug = new_slug + while OpinionStatus.objects.filter(slug=new_slug).exclude(pk=record.pk).exists(): + new_slug = f"{original_slug}-{counter}" + counter += 1 + record.slug = new_slug + record.save(update_fields=['slug']) + self.stdout.write(f' Fixed duplicate opinion status: "{new_slug}"') + + self.stdout.write(self.style.SUCCESS('Data integrity fixes complete!')) def create_sects_and_categories(self): """Create or get existing sects and categories""" @@ -319,40 +455,8 @@ class Command(BaseCommand): self.created_counts['tags'] = len(self.tags) def create_narrator_layers(self): - """Create narrator layers - fixes empty slugs first""" - from django.utils.text import slugify - from django.db.models import Q - - # Fix ALL existing layers with empty, null, or invalid slugs - existing_layers = NarratorLayer.objects.filter(Q(slug__isnull=True) | Q(slug='') | Q(slug=' ')) - self.stdout.write(f"Found {existing_layers.count()} layers with empty slugs to fix") - - for layer in existing_layers: - # Generate proper slug - try: - if layer.name and isinstance(layer.name, list) and len(layer.name) > 0: - text = layer.name[0].get('text', '').strip() - if text: - new_slug = slugify(text) - else: - new_slug = f"layer-{layer.number}" - else: - new_slug = f"layer-{layer.number}" - except (IndexError, KeyError, AttributeError, TypeError): - new_slug = f"layer-{layer.number}" - - # Ensure uniqueness - counter = 1 - original_slug = new_slug - while NarratorLayer.objects.filter(slug=new_slug).exclude(pk=layer.pk).exists(): - new_slug = f"{original_slug}-{counter}" - counter += 1 - - layer.slug = new_slug - layer.save(update_fields=['slug']) - self.stdout.write(f"Fixed slug for layer {layer.pk} (number={layer.number}): '{new_slug}'") - - # Now create or get narrator layers - use filter().first() to avoid duplicates + """Create narrator layers (slugs already fixed in Phase 0)""" + # Create or get narrator layers - use filter().first() to avoid duplicates for layer_data in RUSSIAN_NARRATOR_LAYERS: try: # Try to find existing layer by number @@ -360,7 +464,6 @@ class Command(BaseCommand): if layer: # Layer already exists - created = False self.stdout.write(f"Using existing layer {layer_data['number']}") else: # Create new layer @@ -369,7 +472,6 @@ class Command(BaseCommand): name=[{'language_code': 'ru', 'text': layer_data['name']}], description=[{'language_code': 'ru', 'text': layer_data['description']}] ) - created = True self.stdout.write(f"Created new layer {layer_data['number']}") self.narrator_layers.append(layer) @@ -388,93 +490,13 @@ class Command(BaseCommand): self.created_counts['narrator_layers'] = len(self.narrator_layers) def create_reliability_statuses(self): - """Create transmitter reliability statuses - fixes duplicates first""" - from django.utils.text import slugify - from django.db.models import Count, Q - - # Fix records with empty/null slugs first - empty_slug_records = TransmitterReliability.objects.filter(Q(slug__isnull=True) | Q(slug='') | Q(slug=' ')) - self.stdout.write(f"Found {empty_slug_records.count()} reliability records with empty slugs") - - for record in empty_slug_records: - try: - if record.title and isinstance(record.title, list) and len(record.title) > 0: - text = record.title[0].get('text', '').strip() - if text: - new_slug = slugify(text) - else: - from datetime import datetime - new_slug = f"reliability-{datetime.now().strftime('%Y%m%d%H%M%S%f')}" - else: - from datetime import datetime - new_slug = f"reliability-{datetime.now().strftime('%Y%m%d%H%M%S%f')}" - except: - from datetime import datetime - new_slug = f"reliability-{datetime.now().strftime('%Y%m%d%H%M%S%f')}" - - # Ensure uniqueness - counter = 1 - original_slug = new_slug - while TransmitterReliability.objects.filter(slug=new_slug).exclude(pk=record.pk).exists(): - new_slug = f"{original_slug}-{counter}" - counter += 1 - - record.slug = new_slug - record.save(update_fields=['slug']) - self.stdout.write(f"Fixed empty reliability slug: '{new_slug}'") - - # Find and fix duplicate slugs - duplicates = TransmitterReliability.objects.values('slug').annotate( - count=Count('id') - ).filter(count__gt=1) - - self.stdout.write(f"Found {duplicates.count()} duplicate reliability slugs") - - for dup in duplicates: - slug_value = dup['slug'] - # Get all records with this slug - records = list(TransmitterReliability.objects.filter(slug=slug_value)) - self.stdout.write(f"Processing {len(records)} records with slug '{slug_value}'") - - # Keep the first one, update others - for i, record in enumerate(records): - if i == 0: - continue # Keep first record as is - else: - # Update slug to make it unique - try: - if record.title and isinstance(record.title, list) and len(record.title) > 0: - text = record.title[0].get('text', '').strip() - if text: - new_slug = slugify(text) - else: - from datetime import datetime - new_slug = f"reliability-{datetime.now().strftime('%Y%m%d%H%M%S%f')}-{i}" - else: - from datetime import datetime - new_slug = f"reliability-{datetime.now().strftime('%Y%m%d%H%M%S%f')}-{i}" - except: - from datetime import datetime - new_slug = f"reliability-{datetime.now().strftime('%Y%m%d%H%M%S%f')}-{i}" - - # Ensure uniqueness - counter = 1 - original_slug = new_slug - while TransmitterReliability.objects.filter(slug=new_slug).exclude(pk=record.pk).exists(): - new_slug = f"{original_slug}-{counter}" - counter += 1 - - record.slug = new_slug - record.save(update_fields=['slug']) - self.stdout.write(f"Fixed duplicate reliability slug: '{new_slug}'") - - # Now create or get reliability statuses - using filter().first() to avoid MultipleObjectsReturned + """Create transmitter reliability statuses (duplicates already fixed in Phase 0)""" + # Create or get reliability statuses - using filter().first() to avoid MultipleObjectsReturned for reliability_data in RUSSIAN_RELIABILITY_LEVELS: try: # Try to get by slug first reliability = TransmitterReliability.objects.filter(slug=reliability_data['slug']).first() if reliability: - created = False self.stdout.write(f"Using existing reliability: {reliability_data['slug']}") else: # Create new one @@ -483,7 +505,6 @@ class Command(BaseCommand): title=[{'language_code': 'ru', 'text': reliability_data['title']}], color=reliability_data['color'] ) - created = True self.stdout.write(f"Created new reliability: {reliability_data['slug']}") self.reliability_statuses.append(reliability) @@ -495,93 +516,13 @@ class Command(BaseCommand): self.created_counts['reliability_statuses'] = len(self.reliability_statuses) def create_opinion_statuses(self): - """Create opinion statuses - fixes duplicates first""" - from django.utils.text import slugify - from django.db.models import Count, Q - - # Fix records with empty/null slugs first - empty_slug_records = OpinionStatus.objects.filter(Q(slug__isnull=True) | Q(slug='') | Q(slug=' ')) - self.stdout.write(f"Found {empty_slug_records.count()} opinion status records with empty slugs") - - for record in empty_slug_records: - try: - if record.title and isinstance(record.title, list) and len(record.title) > 0: - text = record.title[0].get('text', '').strip() - if text: - new_slug = slugify(text) - else: - from datetime import datetime - new_slug = f"opinion-{datetime.now().strftime('%Y%m%d%H%M%S%f')}" - else: - from datetime import datetime - new_slug = f"opinion-{datetime.now().strftime('%Y%m%d%H%M%S%f')}" - except: - from datetime import datetime - new_slug = f"opinion-{datetime.now().strftime('%Y%m%d%H%M%S%f')}" - - # Ensure uniqueness - counter = 1 - original_slug = new_slug - while OpinionStatus.objects.filter(slug=new_slug).exclude(pk=record.pk).exists(): - new_slug = f"{original_slug}-{counter}" - counter += 1 - - record.slug = new_slug - record.save(update_fields=['slug']) - self.stdout.write(f"Fixed empty opinion status slug: '{new_slug}'") - - # Find and fix duplicate slugs - duplicates = OpinionStatus.objects.values('slug').annotate( - count=Count('id') - ).filter(count__gt=1) - - self.stdout.write(f"Found {duplicates.count()} duplicate opinion status slugs") - - for dup in duplicates: - slug_value = dup['slug'] - # Get all records with this slug - records = list(OpinionStatus.objects.filter(slug=slug_value)) - self.stdout.write(f"Processing {len(records)} records with slug '{slug_value}'") - - # Keep the first one, update others - for i, record in enumerate(records): - if i == 0: - continue # Keep first record as is - else: - # Update slug to make it unique - try: - if record.title and isinstance(record.title, list) and len(record.title) > 0: - text = record.title[0].get('text', '').strip() - if text: - new_slug = slugify(text) - else: - from datetime import datetime - new_slug = f"opinion-{datetime.now().strftime('%Y%m%d%H%M%S%f')}-{i}" - else: - from datetime import datetime - new_slug = f"opinion-{datetime.now().strftime('%Y%m%d%H%M%S%f')}-{i}" - except: - from datetime import datetime - new_slug = f"opinion-{datetime.now().strftime('%Y%m%d%H%M%S%f')}-{i}" - - # Ensure uniqueness - counter = 1 - original_slug = new_slug - while OpinionStatus.objects.filter(slug=new_slug).exclude(pk=record.pk).exists(): - new_slug = f"{original_slug}-{counter}" - counter += 1 - - record.slug = new_slug - record.save(update_fields=['slug']) - self.stdout.write(f"Fixed duplicate opinion status slug: '{new_slug}'") - - # Now create or get opinion statuses - using filter().first() to avoid MultipleObjectsReturned + """Create opinion statuses (duplicates already fixed in Phase 0)""" + # Create or get opinion statuses - using filter().first() to avoid MultipleObjectsReturned for opinion_data in RUSSIAN_OPINION_STATUSES: try: # Try to get by slug first opinion_status = OpinionStatus.objects.filter(slug=opinion_data['slug']).first() if opinion_status: - created = False self.stdout.write(f"Using existing opinion status: {opinion_data['slug']}") else: # Create new one @@ -590,7 +531,6 @@ class Command(BaseCommand): title=[{'language_code': 'ru', 'text': opinion_data['title']}], color=opinion_data['color'] ) - created = True self.stdout.write(f"Created new opinion status: {opinion_data['slug']}") self.opinion_statuses.append(opinion_status)