Browse Source

Implement data integrity fixes in hadis seeding command

- Introduced a new phase to the seeding process to address existing data integrity issues before creating new records.
- Added functionality to fix empty and duplicate slugs for NarratorLayer, TransmitterReliability, and OpinionStatus models, ensuring unique identifiers.
- Enhanced logging to provide detailed feedback on the fixes applied during the seeding process, improving overall data quality.
master
mortezaei 4 months ago
parent
commit
3facc32de8
  1. 344
      apps/hadis/management/commands/seed_complete_hadis_data.py

344
apps/hadis/management/commands/seed_complete_hadis_data.py

@ -214,6 +214,10 @@ class Command(BaseCommand):
self.stdout.write(self.style.SUCCESS('Starting comprehensive Hadis data generation...'))
try:
# Phase 0: Fix existing data integrity issues BEFORE starting transaction
self.stdout.write('Phase 0: Fixing existing data integrity issues...')
self.fix_existing_data()
with transaction.atomic():
# Phase 1: Foundation
self.stdout.write('Phase 1: Creating foundation data...')
@ -259,6 +263,138 @@ class Command(BaseCommand):
except Exception as e:
self.stdout.write(self.style.ERROR(f'Error: {str(e)}'))
raise
def fix_existing_data(self):
"""Fix all existing data integrity issues before starting main transaction"""
from django.utils.text import slugify
from django.db.models import Count, Q
self.stdout.write('Fixing NarratorLayer empty slugs...')
# Fix NarratorLayer empty slugs
empty_layers = NarratorLayer.objects.filter(Q(slug__isnull=True) | Q(slug='') | Q(slug=' '))
self.stdout.write(f'Found {empty_layers.count()} layers with empty slugs')
for layer in empty_layers:
try:
if layer.name and isinstance(layer.name, list) and len(layer.name) > 0:
text = layer.name[0].get('text', '').strip()
if text:
new_slug = slugify(text)
else:
new_slug = f"layer-{layer.number}"
else:
new_slug = f"layer-{layer.number}"
except (IndexError, KeyError, AttributeError, TypeError):
new_slug = f"layer-{layer.number}"
# Ensure uniqueness
counter = 1
original_slug = new_slug
while NarratorLayer.objects.filter(slug=new_slug).exclude(pk=layer.pk).exists():
new_slug = f"{original_slug}-{counter}"
counter += 1
layer.slug = new_slug
layer.save(update_fields=['slug'])
self.stdout.write(f' Fixed layer {layer.pk} (number={layer.number}): "{new_slug}"')
self.stdout.write('Fixing TransmitterReliability duplicates...')
# Fix TransmitterReliability empty and duplicate slugs
empty_reliability = TransmitterReliability.objects.filter(Q(slug__isnull=True) | Q(slug='') | Q(slug=' '))
for record in empty_reliability:
try:
if record.title and isinstance(record.title, list) and len(record.title) > 0:
text = record.title[0].get('text', '').strip()
if text:
new_slug = slugify(text)
else:
from datetime import datetime
new_slug = f"reliability-{datetime.now().strftime('%Y%m%d%H%M%S%f')}"
else:
from datetime import datetime
new_slug = f"reliability-{datetime.now().strftime('%Y%m%d%H%M%S%f')}"
except:
from datetime import datetime
new_slug = f"reliability-{datetime.now().strftime('%Y%m%d%H%M%S%f')}"
counter = 1
original_slug = new_slug
while TransmitterReliability.objects.filter(slug=new_slug).exclude(pk=record.pk).exists():
new_slug = f"{original_slug}-{counter}"
counter += 1
record.slug = new_slug
record.save(update_fields=['slug'])
self.stdout.write(f' Fixed reliability {record.pk}: "{new_slug}"')
# Fix duplicates
duplicates = TransmitterReliability.objects.values('slug').annotate(count=Count('id')).filter(count__gt=1)
for dup in duplicates:
slug_value = dup['slug']
records = list(TransmitterReliability.objects.filter(slug=slug_value))
for i, record in enumerate(records):
if i == 0:
continue
from datetime import datetime
new_slug = f"{slug_value}-{i}-{datetime.now().strftime('%H%M%S%f')}"
counter = 1
original_slug = new_slug
while TransmitterReliability.objects.filter(slug=new_slug).exclude(pk=record.pk).exists():
new_slug = f"{original_slug}-{counter}"
counter += 1
record.slug = new_slug
record.save(update_fields=['slug'])
self.stdout.write(f' Fixed duplicate reliability: "{new_slug}"')
self.stdout.write('Fixing OpinionStatus duplicates...')
# Fix OpinionStatus empty and duplicate slugs
empty_opinion = OpinionStatus.objects.filter(Q(slug__isnull=True) | Q(slug='') | Q(slug=' '))
for record in empty_opinion:
try:
if record.title and isinstance(record.title, list) and len(record.title) > 0:
text = record.title[0].get('text', '').strip()
if text:
new_slug = slugify(text)
else:
from datetime import datetime
new_slug = f"opinion-{datetime.now().strftime('%Y%m%d%H%M%S%f')}"
else:
from datetime import datetime
new_slug = f"opinion-{datetime.now().strftime('%Y%m%d%H%M%S%f')}"
except:
from datetime import datetime
new_slug = f"opinion-{datetime.now().strftime('%Y%m%d%H%M%S%f')}"
counter = 1
original_slug = new_slug
while OpinionStatus.objects.filter(slug=new_slug).exclude(pk=record.pk).exists():
new_slug = f"{original_slug}-{counter}"
counter += 1
record.slug = new_slug
record.save(update_fields=['slug'])
self.stdout.write(f' Fixed opinion status {record.pk}: "{new_slug}"')
# Fix duplicates
duplicates = OpinionStatus.objects.values('slug').annotate(count=Count('id')).filter(count__gt=1)
for dup in duplicates:
slug_value = dup['slug']
records = list(OpinionStatus.objects.filter(slug=slug_value))
for i, record in enumerate(records):
if i == 0:
continue
from datetime import datetime
new_slug = f"{slug_value}-{i}-{datetime.now().strftime('%H%M%S%f')}"
counter = 1
original_slug = new_slug
while OpinionStatus.objects.filter(slug=new_slug).exclude(pk=record.pk).exists():
new_slug = f"{original_slug}-{counter}"
counter += 1
record.slug = new_slug
record.save(update_fields=['slug'])
self.stdout.write(f' Fixed duplicate opinion status: "{new_slug}"')
self.stdout.write(self.style.SUCCESS('Data integrity fixes complete!'))
def create_sects_and_categories(self):
"""Create or get existing sects and categories"""
@ -319,40 +455,8 @@ class Command(BaseCommand):
self.created_counts['tags'] = len(self.tags)
def create_narrator_layers(self):
"""Create narrator layers - fixes empty slugs first"""
from django.utils.text import slugify
from django.db.models import Q
# Fix ALL existing layers with empty, null, or invalid slugs
existing_layers = NarratorLayer.objects.filter(Q(slug__isnull=True) | Q(slug='') | Q(slug=' '))
self.stdout.write(f"Found {existing_layers.count()} layers with empty slugs to fix")
for layer in existing_layers:
# Generate proper slug
try:
if layer.name and isinstance(layer.name, list) and len(layer.name) > 0:
text = layer.name[0].get('text', '').strip()
if text:
new_slug = slugify(text)
else:
new_slug = f"layer-{layer.number}"
else:
new_slug = f"layer-{layer.number}"
except (IndexError, KeyError, AttributeError, TypeError):
new_slug = f"layer-{layer.number}"
# Ensure uniqueness
counter = 1
original_slug = new_slug
while NarratorLayer.objects.filter(slug=new_slug).exclude(pk=layer.pk).exists():
new_slug = f"{original_slug}-{counter}"
counter += 1
layer.slug = new_slug
layer.save(update_fields=['slug'])
self.stdout.write(f"Fixed slug for layer {layer.pk} (number={layer.number}): '{new_slug}'")
# Now create or get narrator layers - use filter().first() to avoid duplicates
"""Create narrator layers (slugs already fixed in Phase 0)"""
# Create or get narrator layers - use filter().first() to avoid duplicates
for layer_data in RUSSIAN_NARRATOR_LAYERS:
try:
# Try to find existing layer by number
@ -360,7 +464,6 @@ class Command(BaseCommand):
if layer:
# Layer already exists
created = False
self.stdout.write(f"Using existing layer {layer_data['number']}")
else:
# Create new layer
@ -369,7 +472,6 @@ class Command(BaseCommand):
name=[{'language_code': 'ru', 'text': layer_data['name']}],
description=[{'language_code': 'ru', 'text': layer_data['description']}]
)
created = True
self.stdout.write(f"Created new layer {layer_data['number']}")
self.narrator_layers.append(layer)
@ -388,93 +490,13 @@ class Command(BaseCommand):
self.created_counts['narrator_layers'] = len(self.narrator_layers)
def create_reliability_statuses(self):
"""Create transmitter reliability statuses - fixes duplicates first"""
from django.utils.text import slugify
from django.db.models import Count, Q
# Fix records with empty/null slugs first
empty_slug_records = TransmitterReliability.objects.filter(Q(slug__isnull=True) | Q(slug='') | Q(slug=' '))
self.stdout.write(f"Found {empty_slug_records.count()} reliability records with empty slugs")
for record in empty_slug_records:
try:
if record.title and isinstance(record.title, list) and len(record.title) > 0:
text = record.title[0].get('text', '').strip()
if text:
new_slug = slugify(text)
else:
from datetime import datetime
new_slug = f"reliability-{datetime.now().strftime('%Y%m%d%H%M%S%f')}"
else:
from datetime import datetime
new_slug = f"reliability-{datetime.now().strftime('%Y%m%d%H%M%S%f')}"
except:
from datetime import datetime
new_slug = f"reliability-{datetime.now().strftime('%Y%m%d%H%M%S%f')}"
# Ensure uniqueness
counter = 1
original_slug = new_slug
while TransmitterReliability.objects.filter(slug=new_slug).exclude(pk=record.pk).exists():
new_slug = f"{original_slug}-{counter}"
counter += 1
record.slug = new_slug
record.save(update_fields=['slug'])
self.stdout.write(f"Fixed empty reliability slug: '{new_slug}'")
# Find and fix duplicate slugs
duplicates = TransmitterReliability.objects.values('slug').annotate(
count=Count('id')
).filter(count__gt=1)
self.stdout.write(f"Found {duplicates.count()} duplicate reliability slugs")
for dup in duplicates:
slug_value = dup['slug']
# Get all records with this slug
records = list(TransmitterReliability.objects.filter(slug=slug_value))
self.stdout.write(f"Processing {len(records)} records with slug '{slug_value}'")
# Keep the first one, update others
for i, record in enumerate(records):
if i == 0:
continue # Keep first record as is
else:
# Update slug to make it unique
try:
if record.title and isinstance(record.title, list) and len(record.title) > 0:
text = record.title[0].get('text', '').strip()
if text:
new_slug = slugify(text)
else:
from datetime import datetime
new_slug = f"reliability-{datetime.now().strftime('%Y%m%d%H%M%S%f')}-{i}"
else:
from datetime import datetime
new_slug = f"reliability-{datetime.now().strftime('%Y%m%d%H%M%S%f')}-{i}"
except:
from datetime import datetime
new_slug = f"reliability-{datetime.now().strftime('%Y%m%d%H%M%S%f')}-{i}"
# Ensure uniqueness
counter = 1
original_slug = new_slug
while TransmitterReliability.objects.filter(slug=new_slug).exclude(pk=record.pk).exists():
new_slug = f"{original_slug}-{counter}"
counter += 1
record.slug = new_slug
record.save(update_fields=['slug'])
self.stdout.write(f"Fixed duplicate reliability slug: '{new_slug}'")
# Now create or get reliability statuses - using filter().first() to avoid MultipleObjectsReturned
"""Create transmitter reliability statuses (duplicates already fixed in Phase 0)"""
# Create or get reliability statuses - using filter().first() to avoid MultipleObjectsReturned
for reliability_data in RUSSIAN_RELIABILITY_LEVELS:
try:
# Try to get by slug first
reliability = TransmitterReliability.objects.filter(slug=reliability_data['slug']).first()
if reliability:
created = False
self.stdout.write(f"Using existing reliability: {reliability_data['slug']}")
else:
# Create new one
@ -483,7 +505,6 @@ class Command(BaseCommand):
title=[{'language_code': 'ru', 'text': reliability_data['title']}],
color=reliability_data['color']
)
created = True
self.stdout.write(f"Created new reliability: {reliability_data['slug']}")
self.reliability_statuses.append(reliability)
@ -495,93 +516,13 @@ class Command(BaseCommand):
self.created_counts['reliability_statuses'] = len(self.reliability_statuses)
def create_opinion_statuses(self):
"""Create opinion statuses - fixes duplicates first"""
from django.utils.text import slugify
from django.db.models import Count, Q
# Fix records with empty/null slugs first
empty_slug_records = OpinionStatus.objects.filter(Q(slug__isnull=True) | Q(slug='') | Q(slug=' '))
self.stdout.write(f"Found {empty_slug_records.count()} opinion status records with empty slugs")
for record in empty_slug_records:
try:
if record.title and isinstance(record.title, list) and len(record.title) > 0:
text = record.title[0].get('text', '').strip()
if text:
new_slug = slugify(text)
else:
from datetime import datetime
new_slug = f"opinion-{datetime.now().strftime('%Y%m%d%H%M%S%f')}"
else:
from datetime import datetime
new_slug = f"opinion-{datetime.now().strftime('%Y%m%d%H%M%S%f')}"
except:
from datetime import datetime
new_slug = f"opinion-{datetime.now().strftime('%Y%m%d%H%M%S%f')}"
# Ensure uniqueness
counter = 1
original_slug = new_slug
while OpinionStatus.objects.filter(slug=new_slug).exclude(pk=record.pk).exists():
new_slug = f"{original_slug}-{counter}"
counter += 1
record.slug = new_slug
record.save(update_fields=['slug'])
self.stdout.write(f"Fixed empty opinion status slug: '{new_slug}'")
# Find and fix duplicate slugs
duplicates = OpinionStatus.objects.values('slug').annotate(
count=Count('id')
).filter(count__gt=1)
self.stdout.write(f"Found {duplicates.count()} duplicate opinion status slugs")
for dup in duplicates:
slug_value = dup['slug']
# Get all records with this slug
records = list(OpinionStatus.objects.filter(slug=slug_value))
self.stdout.write(f"Processing {len(records)} records with slug '{slug_value}'")
# Keep the first one, update others
for i, record in enumerate(records):
if i == 0:
continue # Keep first record as is
else:
# Update slug to make it unique
try:
if record.title and isinstance(record.title, list) and len(record.title) > 0:
text = record.title[0].get('text', '').strip()
if text:
new_slug = slugify(text)
else:
from datetime import datetime
new_slug = f"opinion-{datetime.now().strftime('%Y%m%d%H%M%S%f')}-{i}"
else:
from datetime import datetime
new_slug = f"opinion-{datetime.now().strftime('%Y%m%d%H%M%S%f')}-{i}"
except:
from datetime import datetime
new_slug = f"opinion-{datetime.now().strftime('%Y%m%d%H%M%S%f')}-{i}"
# Ensure uniqueness
counter = 1
original_slug = new_slug
while OpinionStatus.objects.filter(slug=new_slug).exclude(pk=record.pk).exists():
new_slug = f"{original_slug}-{counter}"
counter += 1
record.slug = new_slug
record.save(update_fields=['slug'])
self.stdout.write(f"Fixed duplicate opinion status slug: '{new_slug}'")
# Now create or get opinion statuses - using filter().first() to avoid MultipleObjectsReturned
"""Create opinion statuses (duplicates already fixed in Phase 0)"""
# Create or get opinion statuses - using filter().first() to avoid MultipleObjectsReturned
for opinion_data in RUSSIAN_OPINION_STATUSES:
try:
# Try to get by slug first
opinion_status = OpinionStatus.objects.filter(slug=opinion_data['slug']).first()
if opinion_status:
created = False
self.stdout.write(f"Using existing opinion status: {opinion_data['slug']}")
else:
# Create new one
@ -590,7 +531,6 @@ class Command(BaseCommand):
title=[{'language_code': 'ru', 'text': opinion_data['title']}],
color=opinion_data['color']
)
created = True
self.stdout.write(f"Created new opinion status: {opinion_data['slug']}")
self.opinion_statuses.append(opinion_status)

Loading…
Cancel
Save