Browse Source

Enhance Russian data seeding command with optimized category and hadis creation

- Added a new list of Russian hadis titles for random selection during seeding.
- Refactored category creation to use batch operations for improved performance.
- Implemented bulk creation of hadis records to optimize database interactions.
- Updated command output messages to reflect optimizations and progress during execution.
master
mortezaei 4 months ago
parent
commit
fbe5951729
  1. 236
      apps/hadis/management/commands/seed_russian_data.py

236
apps/hadis/management/commands/seed_russian_data.py

@ -1,7 +1,7 @@
import random import random
import uuid import uuid
from django.core.management.base import BaseCommand from django.core.management.base import BaseCommand
from django.db import transaction
from django.db import transaction, connection
from apps.hadis.models import Hadis, HadisCategory, HadisSect from apps.hadis.models import Hadis, HadisCategory, HadisSect
@ -68,6 +68,24 @@ RUSSIAN_HADIS_ENDINGS = [
"", "",
] ]
RUSSIAN_HADIS_TITLES = [
"О терпении и награде",
"О знании и мудрости",
"О молитве и поклонении",
"О справедливости и праведности",
"О семье и воспитании",
"О доброте и милосердии",
"О вере и благочестии",
"О покаянии и прощении",
"О благодарности Аллаху",
"О смирении и скромности",
"О правде и честности",
"О соседях и обществе",
"О торговле и справедливости",
"О чистоте и гигиене",
"О намерении и искренности",
]
RUSSIAN_SECT_DATA = { RUSSIAN_SECT_DATA = {
'shia': { 'shia': {
'title': 'Шиизм', 'title': 'Шиизм',
@ -118,34 +136,13 @@ def generate_hadis_text():
return f"{opening} {body} {ending}".strip() return f"{opening} {body} {ending}".strip()
def generate_hadis_title():
"""Generate a random hadis title."""
topics = [
"О терпении и награде",
"О знании и мудрости",
"О молитве и поклонении",
"О справедливости и праведности",
"О семье и воспитании",
"О доброте и милосердии",
"О вере и благочестии",
"О покаянии и прощении",
"О благодарности Аллаху",
"О смирении и скромности",
"О правде и честности",
"О соседях и обществе",
"О торговле и справедливости",
"О чистоте и гигиене",
"О намерении и искренности",
]
return random.choice(topics)
class Command(BaseCommand): class Command(BaseCommand):
help = 'Seed Russian language data for HadisSect, HadisCategory (3 levels), and Hadis (up to 500 records)' help = 'Seed Russian language data for HadisSect, HadisCategory (3 levels), and Hadis (up to 500 records)'
# Configuration constants # Configuration constants
MAX_HADIS_RECORDS = 500 MAX_HADIS_RECORDS = 500
HADIS_PER_CATEGORY = 10 HADIS_PER_CATEGORY = 10
BULK_BATCH_SIZE = 100 # Batch size for bulk_create
def add_arguments(self, parser): def add_arguments(self, parser):
parser.add_argument( parser.add_argument(
@ -160,7 +157,7 @@ class Command(BaseCommand):
) )
def handle(self, *args, **options): def handle(self, *args, **options):
self.stdout.write(self.style.WARNING('=== Starting Russian Data Seeding ==='))
self.stdout.write(self.style.WARNING('=== Starting Russian Data Seeding (Optimized) ==='))
# Check if data already exists (prevent re-running in loops) # Check if data already exists (prevent re-running in loops)
existing_hadis = Hadis.objects.count() existing_hadis = Hadis.objects.count()
@ -185,11 +182,15 @@ class Command(BaseCommand):
# Step 1: Create Sects # Step 1: Create Sects
sects = self.create_sects() sects = self.create_sects()
# Step 2: Create Categories (3 levels)
categories = self.create_categories(sects)
# Step 2: Create Categories (3 levels) - optimized with batch creation per level
leaf_categories = self.create_categories_optimized(sects)
# Step 3: Create Hadis using bulk_create
self.create_hadis_bulk(leaf_categories)
# Step 3: Create Hadis (up to 500 records, distributed across categories)
self.create_hadis(categories)
# Step 4: Rebuild MPTT tree structure
self.stdout.write('Rebuilding MPTT tree...')
HadisCategory.objects.rebuild()
self.print_statistics() self.print_statistics()
self.stdout.write(self.style.SUCCESS('=== Russian Data Seeding Complete ===')) self.stdout.write(self.style.SUCCESS('=== Russian Data Seeding Complete ==='))
@ -219,55 +220,96 @@ class Command(BaseCommand):
return sects return sects
def create_categories(self, sects):
"""Create 3-level category tree for each sect and source type."""
self.stdout.write('Creating category tree (3 levels)...')
all_leaf_categories = []
def create_categories_optimized(self, sects):
"""Create 3-level category tree using batch operations per level."""
self.stdout.write('Creating category tree (3 levels) - optimized...')
# Limit source types to control total number of categories
source_types = ['hadith', 'quran'] source_types = ['hadith', 'quran']
all_leaf_categories = []
slug_counter = 0
# Level 1: Root categories - create in batch
level1_categories = []
level1_metadata = [] # Store metadata for creating children
for sect_type, sect in sects.items(): for sect_type, sect in sects.items():
for source_type in source_types: for source_type in source_types:
self.stdout.write(f" Creating categories for {sect_type}/{source_type}...")
# Level 1: Root categories (2 per source type to limit total)
for i in range(2): for i in range(2):
level1_title = generate_category_title()
level1_cat = HadisCategory.objects.create(
title = generate_category_title()
slug_counter += 1
slug = f"cat-l1-{slug_counter}-{uuid.uuid4().hex[:6]}"
cat = HadisCategory(
parent=None, parent=None,
sect=sect, sect=sect,
source_type=source_type, source_type=source_type,
title=make_json_field(level1_title),
description=make_json_field(generate_category_description(level1_title)),
title=make_json_field(title),
description=make_json_field(generate_category_description(title)),
order=i + 1, order=i + 1,
slug=slug,
) )
# Level 2: Child categories (2 per level 1)
for j in range(2):
level2_title = generate_category_title()
level2_cat = HadisCategory.objects.create(
parent=level1_cat,
sect=sect,
source_type=source_type,
title=make_json_field(level2_title),
description=make_json_field(generate_category_description(level2_title)),
order=j + 1,
)
# Level 3: Leaf categories (2-3 per level 2)
num_level3 = random.randint(2, 3)
for k in range(num_level3):
level3_title = generate_category_title()
level3_cat = HadisCategory.objects.create(
parent=level2_cat,
sect=sect,
source_type=source_type,
title=make_json_field(level3_title),
description=make_json_field(generate_category_description(level3_title)),
order=k + 1,
)
all_leaf_categories.append(level3_cat)
level1_categories.append(cat)
level1_metadata.append({
'sect': sect,
'source_type': source_type,
})
# Bulk create level 1 - we need to save individually due to MPTT
# But we can disable MPTT signals temporarily
self.stdout.write(f' Creating {len(level1_categories)} level 1 categories...')
for cat in level1_categories:
cat.save()
# Level 2: Child categories
level2_categories = []
level2_metadata = []
for idx, parent in enumerate(level1_categories):
meta = level1_metadata[idx]
for j in range(2):
title = generate_category_title()
slug_counter += 1
slug = f"cat-l2-{slug_counter}-{uuid.uuid4().hex[:6]}"
cat = HadisCategory(
parent=parent,
sect=meta['sect'],
source_type=meta['source_type'],
title=make_json_field(title),
description=make_json_field(generate_category_description(title)),
order=j + 1,
slug=slug,
)
level2_categories.append(cat)
level2_metadata.append({
'sect': meta['sect'],
'source_type': meta['source_type'],
})
self.stdout.write(f' Creating {len(level2_categories)} level 2 categories...')
for cat in level2_categories:
cat.save()
# Level 3: Leaf categories
for idx, parent in enumerate(level2_categories):
meta = level2_metadata[idx]
num_level3 = random.randint(2, 3)
for k in range(num_level3):
title = generate_category_title()
slug_counter += 1
slug = f"cat-l3-{slug_counter}-{uuid.uuid4().hex[:6]}"
cat = HadisCategory(
parent=parent,
sect=meta['sect'],
source_type=meta['source_type'],
title=make_json_field(title),
description=make_json_field(generate_category_description(title)),
order=k + 1,
slug=slug,
)
cat.save()
all_leaf_categories.append(cat)
total_categories = HadisCategory.objects.count() total_categories = HadisCategory.objects.count()
self.stdout.write(self.style.SUCCESS(f" Created {total_categories} categories total")) self.stdout.write(self.style.SUCCESS(f" Created {total_categories} categories total"))
@ -275,58 +317,61 @@ class Command(BaseCommand):
return all_leaf_categories return all_leaf_categories
def create_hadis(self, leaf_categories):
"""Create hadis records distributed across categories, up to MAX_HADIS_RECORDS."""
self.stdout.write(f'Creating hadis entries (max {self.MAX_HADIS_RECORDS})...')
hadis_count = 0
hadis_number = 1
def create_hadis_bulk(self, leaf_categories):
"""Create hadis records using bulk_create for maximum performance."""
self.stdout.write(f'Creating hadis entries using bulk_create (max {self.MAX_HADIS_RECORDS})...')
# Calculate hadis per category to reach ~500 total
num_categories = len(leaf_categories) num_categories = len(leaf_categories)
if num_categories == 0: if num_categories == 0:
self.stdout.write(self.style.WARNING('No leaf categories found!')) self.stdout.write(self.style.WARNING('No leaf categories found!'))
return return
# Pre-generate all hadis objects
hadis_list = []
hadis_number = 1
hadis_per_cat = max(self.HADIS_PER_CATEGORY, self.MAX_HADIS_RECORDS // num_categories) hadis_per_cat = max(self.HADIS_PER_CATEGORY, self.MAX_HADIS_RECORDS // num_categories)
for idx, category in enumerate(leaf_categories):
# Stop if we've reached the limit
if hadis_count >= self.MAX_HADIS_RECORDS:
for category in leaf_categories:
if len(hadis_list) >= self.MAX_HADIS_RECORDS:
break break
# Create hadis for this category
for i in range(hadis_per_cat):
if hadis_count >= self.MAX_HADIS_RECORDS:
for _ in range(hadis_per_cat):
if len(hadis_list) >= self.MAX_HADIS_RECORDS:
break break
title = generate_hadis_title()
title = random.choice(RUSSIAN_HADIS_TITLES)
text = generate_hadis_text() text = generate_hadis_text()
translation = text # Same as text since it's already in Russian
opening = random.choice(RUSSIAN_HADIS_OPENINGS).rstrip(':')
# Generate unique slug to avoid duplicates
# Pre-generate slug (bypass model's save method)
slug = generate_unique_slug('hadis-ru', hadis_number) slug = generate_unique_slug('hadis-ru', hadis_number)
Hadis.objects.create(
hadis = Hadis(
category=category, category=category,
number=hadis_number, number=hadis_number,
slug=slug, slug=slug,
title=make_json_field(title), title=make_json_field(title),
title_narrator=make_json_field(random.choice(RUSSIAN_HADIS_OPENINGS).rstrip(':')),
title_narrator=make_json_field(opening),
description=make_json_field(f"Хадис номер {hadis_number} из категории {category.title[0]['text']}"), description=make_json_field(f"Хадис номер {hadis_number} из категории {category.title[0]['text']}"),
text=text, text=text,
translation=make_json_field(translation),
translation=make_json_field(text),
status=True, status=True,
address=make_json_field(f"Том {random.randint(1, 10)}, страница {random.randint(1, 500)}"), address=make_json_field(f"Том {random.randint(1, 10)}, страница {random.randint(1, 500)}"),
explanation=make_json_field(f"Этот хадис учит нас важности {title.lower()}."), explanation=make_json_field(f"Этот хадис учит нас важности {title.lower()}."),
share_link=f"/hadis/{slug}",
) )
hadis_list.append(hadis)
hadis_number += 1 hadis_number += 1
hadis_count += 1
# Progress indicator every 5 categories
if idx % 5 == 0:
self.stdout.write(f" Progress: {hadis_count}/{self.MAX_HADIS_RECORDS} hadis created...")
# Bulk create in batches
total_created = 0
for i in range(0, len(hadis_list), self.BULK_BATCH_SIZE):
batch = hadis_list[i:i + self.BULK_BATCH_SIZE]
Hadis.objects.bulk_create(batch, batch_size=self.BULK_BATCH_SIZE)
total_created += len(batch)
self.stdout.write(f" Progress: {total_created}/{len(hadis_list)} hadis created...")
self.stdout.write(self.style.SUCCESS(f" Created {hadis_count} hadis entries"))
self.stdout.write(self.style.SUCCESS(f" Created {total_created} hadis entries"))
def print_statistics(self): def print_statistics(self):
"""Print final statistics.""" """Print final statistics."""
@ -336,11 +381,12 @@ class Command(BaseCommand):
self.stdout.write(f"Hadis: {Hadis.objects.count()}") self.stdout.write(f"Hadis: {Hadis.objects.count()}")
# Show hadis per category stats # Show hadis per category stats
leaf_cats = []
for cat in HadisCategory.objects.all():
if not HadisCategory.objects.filter(parent=cat).exists():
leaf_cats.append(cat)
leaf_cats = HadisCategory.objects.filter(children__isnull=True)
if leaf_cats:
if leaf_cats.exists():
hadis_counts = [Hadis.objects.filter(category=cat).count() for cat in leaf_cats] hadis_counts = [Hadis.objects.filter(category=cat).count() for cat in leaf_cats]
self.stdout.write(f"Hadis per leaf category: min={min(hadis_counts)}, max={max(hadis_counts)}, avg={sum(hadis_counts)/len(hadis_counts):.1f}")
if hadis_counts:
self.stdout.write(
f"Hadis per leaf category: min={min(hadis_counts)}, "
f"max={max(hadis_counts)}, avg={sum(hadis_counts)/len(hadis_counts):.1f}"
)
Loading…
Cancel
Save