diff --git a/apps/hadis/management/commands/seed_russian_data.py b/apps/hadis/management/commands/seed_russian_data.py index b890c61..a2dcbeb 100644 --- a/apps/hadis/management/commands/seed_russian_data.py +++ b/apps/hadis/management/commands/seed_russian_data.py @@ -1,7 +1,7 @@ import random import uuid from django.core.management.base import BaseCommand -from django.db import transaction +from django.db import transaction, connection from apps.hadis.models import Hadis, HadisCategory, HadisSect @@ -68,6 +68,24 @@ RUSSIAN_HADIS_ENDINGS = [ "", ] +RUSSIAN_HADIS_TITLES = [ + "О терпении и награде", + "О знании и мудрости", + "О молитве и поклонении", + "О справедливости и праведности", + "О семье и воспитании", + "О доброте и милосердии", + "О вере и благочестии", + "О покаянии и прощении", + "О благодарности Аллаху", + "О смирении и скромности", + "О правде и честности", + "О соседях и обществе", + "О торговле и справедливости", + "О чистоте и гигиене", + "О намерении и искренности", +] + RUSSIAN_SECT_DATA = { 'shia': { 'title': 'Шиизм', @@ -118,34 +136,13 @@ def generate_hadis_text(): return f"{opening} {body} {ending}".strip() -def generate_hadis_title(): - """Generate a random hadis title.""" - topics = [ - "О терпении и награде", - "О знании и мудрости", - "О молитве и поклонении", - "О справедливости и праведности", - "О семье и воспитании", - "О доброте и милосердии", - "О вере и благочестии", - "О покаянии и прощении", - "О благодарности Аллаху", - "О смирении и скромности", - "О правде и честности", - "О соседях и обществе", - "О торговле и справедливости", - "О чистоте и гигиене", - "О намерении и искренности", - ] - return random.choice(topics) - - class Command(BaseCommand): help = 'Seed Russian language data for HadisSect, HadisCategory (3 levels), and Hadis (up to 500 records)' # Configuration constants MAX_HADIS_RECORDS = 500 HADIS_PER_CATEGORY = 10 + BULK_BATCH_SIZE = 100 # Batch size for bulk_create def add_arguments(self, parser): parser.add_argument( @@ -160,7 +157,7 @@ class Command(BaseCommand): ) def handle(self, *args, **options): - self.stdout.write(self.style.WARNING('=== Starting Russian Data Seeding ===')) + self.stdout.write(self.style.WARNING('=== Starting Russian Data Seeding (Optimized) ===')) # Check if data already exists (prevent re-running in loops) existing_hadis = Hadis.objects.count() @@ -185,11 +182,15 @@ class Command(BaseCommand): # Step 1: Create Sects sects = self.create_sects() - # Step 2: Create Categories (3 levels) - categories = self.create_categories(sects) + # Step 2: Create Categories (3 levels) - optimized with batch creation per level + leaf_categories = self.create_categories_optimized(sects) + + # Step 3: Create Hadis using bulk_create + self.create_hadis_bulk(leaf_categories) - # Step 3: Create Hadis (up to 500 records, distributed across categories) - self.create_hadis(categories) + # Step 4: Rebuild MPTT tree structure + self.stdout.write('Rebuilding MPTT tree...') + HadisCategory.objects.rebuild() self.print_statistics() self.stdout.write(self.style.SUCCESS('=== Russian Data Seeding Complete ===')) @@ -219,55 +220,96 @@ class Command(BaseCommand): return sects - def create_categories(self, sects): - """Create 3-level category tree for each sect and source type.""" - self.stdout.write('Creating category tree (3 levels)...') + def create_categories_optimized(self, sects): + """Create 3-level category tree using batch operations per level.""" + self.stdout.write('Creating category tree (3 levels) - optimized...') + + source_types = ['hadith', 'quran'] all_leaf_categories = [] + slug_counter = 0 - # Limit source types to control total number of categories - source_types = ['hadith', 'quran'] + # Level 1: Root categories - create in batch + level1_categories = [] + level1_metadata = [] # Store metadata for creating children for sect_type, sect in sects.items(): for source_type in source_types: - self.stdout.write(f" Creating categories for {sect_type}/{source_type}...") - - # Level 1: Root categories (2 per source type to limit total) for i in range(2): - level1_title = generate_category_title() - level1_cat = HadisCategory.objects.create( + title = generate_category_title() + slug_counter += 1 + slug = f"cat-l1-{slug_counter}-{uuid.uuid4().hex[:6]}" + + cat = HadisCategory( parent=None, sect=sect, source_type=source_type, - title=make_json_field(level1_title), - description=make_json_field(generate_category_description(level1_title)), + title=make_json_field(title), + description=make_json_field(generate_category_description(title)), order=i + 1, + slug=slug, ) - - # Level 2: Child categories (2 per level 1) - for j in range(2): - level2_title = generate_category_title() - level2_cat = HadisCategory.objects.create( - parent=level1_cat, - sect=sect, - source_type=source_type, - title=make_json_field(level2_title), - description=make_json_field(generate_category_description(level2_title)), - order=j + 1, - ) - - # Level 3: Leaf categories (2-3 per level 2) - num_level3 = random.randint(2, 3) - for k in range(num_level3): - level3_title = generate_category_title() - level3_cat = HadisCategory.objects.create( - parent=level2_cat, - sect=sect, - source_type=source_type, - title=make_json_field(level3_title), - description=make_json_field(generate_category_description(level3_title)), - order=k + 1, - ) - all_leaf_categories.append(level3_cat) + level1_categories.append(cat) + level1_metadata.append({ + 'sect': sect, + 'source_type': source_type, + }) + + # Bulk create level 1 - we need to save individually due to MPTT + # But we can disable MPTT signals temporarily + self.stdout.write(f' Creating {len(level1_categories)} level 1 categories...') + for cat in level1_categories: + cat.save() + + # Level 2: Child categories + level2_categories = [] + level2_metadata = [] + + for idx, parent in enumerate(level1_categories): + meta = level1_metadata[idx] + for j in range(2): + title = generate_category_title() + slug_counter += 1 + slug = f"cat-l2-{slug_counter}-{uuid.uuid4().hex[:6]}" + + cat = HadisCategory( + parent=parent, + sect=meta['sect'], + source_type=meta['source_type'], + title=make_json_field(title), + description=make_json_field(generate_category_description(title)), + order=j + 1, + slug=slug, + ) + level2_categories.append(cat) + level2_metadata.append({ + 'sect': meta['sect'], + 'source_type': meta['source_type'], + }) + + self.stdout.write(f' Creating {len(level2_categories)} level 2 categories...') + for cat in level2_categories: + cat.save() + + # Level 3: Leaf categories + for idx, parent in enumerate(level2_categories): + meta = level2_metadata[idx] + num_level3 = random.randint(2, 3) + for k in range(num_level3): + title = generate_category_title() + slug_counter += 1 + slug = f"cat-l3-{slug_counter}-{uuid.uuid4().hex[:6]}" + + cat = HadisCategory( + parent=parent, + sect=meta['sect'], + source_type=meta['source_type'], + title=make_json_field(title), + description=make_json_field(generate_category_description(title)), + order=k + 1, + slug=slug, + ) + cat.save() + all_leaf_categories.append(cat) total_categories = HadisCategory.objects.count() self.stdout.write(self.style.SUCCESS(f" Created {total_categories} categories total")) @@ -275,58 +317,61 @@ class Command(BaseCommand): return all_leaf_categories - def create_hadis(self, leaf_categories): - """Create hadis records distributed across categories, up to MAX_HADIS_RECORDS.""" - self.stdout.write(f'Creating hadis entries (max {self.MAX_HADIS_RECORDS})...') - hadis_count = 0 - hadis_number = 1 - - # Calculate hadis per category to reach ~500 total + def create_hadis_bulk(self, leaf_categories): + """Create hadis records using bulk_create for maximum performance.""" + self.stdout.write(f'Creating hadis entries using bulk_create (max {self.MAX_HADIS_RECORDS})...') + num_categories = len(leaf_categories) if num_categories == 0: self.stdout.write(self.style.WARNING('No leaf categories found!')) return + # Pre-generate all hadis objects + hadis_list = [] + hadis_number = 1 hadis_per_cat = max(self.HADIS_PER_CATEGORY, self.MAX_HADIS_RECORDS // num_categories) - for idx, category in enumerate(leaf_categories): - # Stop if we've reached the limit - if hadis_count >= self.MAX_HADIS_RECORDS: + for category in leaf_categories: + if len(hadis_list) >= self.MAX_HADIS_RECORDS: break - # Create hadis for this category - for i in range(hadis_per_cat): - if hadis_count >= self.MAX_HADIS_RECORDS: + for _ in range(hadis_per_cat): + if len(hadis_list) >= self.MAX_HADIS_RECORDS: break - title = generate_hadis_title() + title = random.choice(RUSSIAN_HADIS_TITLES) text = generate_hadis_text() - translation = text # Same as text since it's already in Russian - - # Generate unique slug to avoid duplicates + opening = random.choice(RUSSIAN_HADIS_OPENINGS).rstrip(':') + + # Pre-generate slug (bypass model's save method) slug = generate_unique_slug('hadis-ru', hadis_number) - Hadis.objects.create( + hadis = Hadis( category=category, number=hadis_number, slug=slug, title=make_json_field(title), - title_narrator=make_json_field(random.choice(RUSSIAN_HADIS_OPENINGS).rstrip(':')), + title_narrator=make_json_field(opening), description=make_json_field(f"Хадис номер {hadis_number} из категории {category.title[0]['text']}"), text=text, - translation=make_json_field(translation), + translation=make_json_field(text), status=True, address=make_json_field(f"Том {random.randint(1, 10)}, страница {random.randint(1, 500)}"), explanation=make_json_field(f"Этот хадис учит нас важности {title.lower()}."), + share_link=f"/hadis/{slug}", ) + hadis_list.append(hadis) hadis_number += 1 - hadis_count += 1 - # Progress indicator every 5 categories - if idx % 5 == 0: - self.stdout.write(f" Progress: {hadis_count}/{self.MAX_HADIS_RECORDS} hadis created...") + # Bulk create in batches + total_created = 0 + for i in range(0, len(hadis_list), self.BULK_BATCH_SIZE): + batch = hadis_list[i:i + self.BULK_BATCH_SIZE] + Hadis.objects.bulk_create(batch, batch_size=self.BULK_BATCH_SIZE) + total_created += len(batch) + self.stdout.write(f" Progress: {total_created}/{len(hadis_list)} hadis created...") - self.stdout.write(self.style.SUCCESS(f" Created {hadis_count} hadis entries")) + self.stdout.write(self.style.SUCCESS(f" Created {total_created} hadis entries")) def print_statistics(self): """Print final statistics.""" @@ -336,11 +381,12 @@ class Command(BaseCommand): self.stdout.write(f"Hadis: {Hadis.objects.count()}") # Show hadis per category stats - leaf_cats = [] - for cat in HadisCategory.objects.all(): - if not HadisCategory.objects.filter(parent=cat).exists(): - leaf_cats.append(cat) - - if leaf_cats: + leaf_cats = HadisCategory.objects.filter(children__isnull=True) + + if leaf_cats.exists(): hadis_counts = [Hadis.objects.filter(category=cat).count() for cat in leaf_cats] - self.stdout.write(f"Hadis per leaf category: min={min(hadis_counts)}, max={max(hadis_counts)}, avg={sum(hadis_counts)/len(hadis_counts):.1f}") + if hadis_counts: + self.stdout.write( + f"Hadis per leaf category: min={min(hadis_counts)}, " + f"max={max(hadis_counts)}, avg={sum(hadis_counts)/len(hadis_counts):.1f}" + )