Browse Source

Enhance Russian data seeding command with optimized category and hadis creation

- Added a new list of Russian hadis titles for random selection during seeding.
- Refactored category creation to use batch operations for improved performance.
- Implemented bulk creation of hadis records to optimize database interactions.
- Updated command output messages to reflect optimizations and progress during execution.
master
mortezaei 4 months ago
parent
commit
fbe5951729
  1. 242
      apps/hadis/management/commands/seed_russian_data.py

242
apps/hadis/management/commands/seed_russian_data.py

@ -1,7 +1,7 @@
import random
import uuid
from django.core.management.base import BaseCommand
from django.db import transaction
from django.db import transaction, connection
from apps.hadis.models import Hadis, HadisCategory, HadisSect
@ -68,6 +68,24 @@ RUSSIAN_HADIS_ENDINGS = [
"",
]
RUSSIAN_HADIS_TITLES = [
"О терпении и награде",
"О знании и мудрости",
"О молитве и поклонении",
"О справедливости и праведности",
"О семье и воспитании",
"О доброте и милосердии",
"О вере и благочестии",
"О покаянии и прощении",
"О благодарности Аллаху",
"О смирении и скромности",
"О правде и честности",
"О соседях и обществе",
"О торговле и справедливости",
"О чистоте и гигиене",
"О намерении и искренности",
]
RUSSIAN_SECT_DATA = {
'shia': {
'title': 'Шиизм',
@ -118,34 +136,13 @@ def generate_hadis_text():
return f"{opening} {body} {ending}".strip()
def generate_hadis_title():
"""Generate a random hadis title."""
topics = [
"О терпении и награде",
"О знании и мудрости",
"О молитве и поклонении",
"О справедливости и праведности",
"О семье и воспитании",
"О доброте и милосердии",
"О вере и благочестии",
"О покаянии и прощении",
"О благодарности Аллаху",
"О смирении и скромности",
"О правде и честности",
"О соседях и обществе",
"О торговле и справедливости",
"О чистоте и гигиене",
"О намерении и искренности",
]
return random.choice(topics)
class Command(BaseCommand):
help = 'Seed Russian language data for HadisSect, HadisCategory (3 levels), and Hadis (up to 500 records)'
# Configuration constants
MAX_HADIS_RECORDS = 500
HADIS_PER_CATEGORY = 10
BULK_BATCH_SIZE = 100 # Batch size for bulk_create
def add_arguments(self, parser):
parser.add_argument(
@ -160,7 +157,7 @@ class Command(BaseCommand):
)
def handle(self, *args, **options):
self.stdout.write(self.style.WARNING('=== Starting Russian Data Seeding ==='))
self.stdout.write(self.style.WARNING('=== Starting Russian Data Seeding (Optimized) ==='))
# Check if data already exists (prevent re-running in loops)
existing_hadis = Hadis.objects.count()
@ -185,11 +182,15 @@ class Command(BaseCommand):
# Step 1: Create Sects
sects = self.create_sects()
# Step 2: Create Categories (3 levels)
categories = self.create_categories(sects)
# Step 2: Create Categories (3 levels) - optimized with batch creation per level
leaf_categories = self.create_categories_optimized(sects)
# Step 3: Create Hadis using bulk_create
self.create_hadis_bulk(leaf_categories)
# Step 3: Create Hadis (up to 500 records, distributed across categories)
self.create_hadis(categories)
# Step 4: Rebuild MPTT tree structure
self.stdout.write('Rebuilding MPTT tree...')
HadisCategory.objects.rebuild()
self.print_statistics()
self.stdout.write(self.style.SUCCESS('=== Russian Data Seeding Complete ==='))
@ -219,55 +220,96 @@ class Command(BaseCommand):
return sects
def create_categories(self, sects):
"""Create 3-level category tree for each sect and source type."""
self.stdout.write('Creating category tree (3 levels)...')
def create_categories_optimized(self, sects):
"""Create 3-level category tree using batch operations per level."""
self.stdout.write('Creating category tree (3 levels) - optimized...')
source_types = ['hadith', 'quran']
all_leaf_categories = []
slug_counter = 0
# Limit source types to control total number of categories
source_types = ['hadith', 'quran']
# Level 1: Root categories - create in batch
level1_categories = []
level1_metadata = [] # Store metadata for creating children
for sect_type, sect in sects.items():
for source_type in source_types:
self.stdout.write(f" Creating categories for {sect_type}/{source_type}...")
# Level 1: Root categories (2 per source type to limit total)
for i in range(2):
level1_title = generate_category_title()
level1_cat = HadisCategory.objects.create(
title = generate_category_title()
slug_counter += 1
slug = f"cat-l1-{slug_counter}-{uuid.uuid4().hex[:6]}"
cat = HadisCategory(
parent=None,
sect=sect,
source_type=source_type,
title=make_json_field(level1_title),
description=make_json_field(generate_category_description(level1_title)),
title=make_json_field(title),
description=make_json_field(generate_category_description(title)),
order=i + 1,
slug=slug,
)
# Level 2: Child categories (2 per level 1)
for j in range(2):
level2_title = generate_category_title()
level2_cat = HadisCategory.objects.create(
parent=level1_cat,
sect=sect,
source_type=source_type,
title=make_json_field(level2_title),
description=make_json_field(generate_category_description(level2_title)),
order=j + 1,
)
# Level 3: Leaf categories (2-3 per level 2)
num_level3 = random.randint(2, 3)
for k in range(num_level3):
level3_title = generate_category_title()
level3_cat = HadisCategory.objects.create(
parent=level2_cat,
sect=sect,
source_type=source_type,
title=make_json_field(level3_title),
description=make_json_field(generate_category_description(level3_title)),
order=k + 1,
)
all_leaf_categories.append(level3_cat)
level1_categories.append(cat)
level1_metadata.append({
'sect': sect,
'source_type': source_type,
})
# Bulk create level 1 - we need to save individually due to MPTT
# But we can disable MPTT signals temporarily
self.stdout.write(f' Creating {len(level1_categories)} level 1 categories...')
for cat in level1_categories:
cat.save()
# Level 2: Child categories
level2_categories = []
level2_metadata = []
for idx, parent in enumerate(level1_categories):
meta = level1_metadata[idx]
for j in range(2):
title = generate_category_title()
slug_counter += 1
slug = f"cat-l2-{slug_counter}-{uuid.uuid4().hex[:6]}"
cat = HadisCategory(
parent=parent,
sect=meta['sect'],
source_type=meta['source_type'],
title=make_json_field(title),
description=make_json_field(generate_category_description(title)),
order=j + 1,
slug=slug,
)
level2_categories.append(cat)
level2_metadata.append({
'sect': meta['sect'],
'source_type': meta['source_type'],
})
self.stdout.write(f' Creating {len(level2_categories)} level 2 categories...')
for cat in level2_categories:
cat.save()
# Level 3: Leaf categories
for idx, parent in enumerate(level2_categories):
meta = level2_metadata[idx]
num_level3 = random.randint(2, 3)
for k in range(num_level3):
title = generate_category_title()
slug_counter += 1
slug = f"cat-l3-{slug_counter}-{uuid.uuid4().hex[:6]}"
cat = HadisCategory(
parent=parent,
sect=meta['sect'],
source_type=meta['source_type'],
title=make_json_field(title),
description=make_json_field(generate_category_description(title)),
order=k + 1,
slug=slug,
)
cat.save()
all_leaf_categories.append(cat)
total_categories = HadisCategory.objects.count()
self.stdout.write(self.style.SUCCESS(f" Created {total_categories} categories total"))
@ -275,58 +317,61 @@ class Command(BaseCommand):
return all_leaf_categories
def create_hadis(self, leaf_categories):
"""Create hadis records distributed across categories, up to MAX_HADIS_RECORDS."""
self.stdout.write(f'Creating hadis entries (max {self.MAX_HADIS_RECORDS})...')
hadis_count = 0
hadis_number = 1
# Calculate hadis per category to reach ~500 total
def create_hadis_bulk(self, leaf_categories):
"""Create hadis records using bulk_create for maximum performance."""
self.stdout.write(f'Creating hadis entries using bulk_create (max {self.MAX_HADIS_RECORDS})...')
num_categories = len(leaf_categories)
if num_categories == 0:
self.stdout.write(self.style.WARNING('No leaf categories found!'))
return
# Pre-generate all hadis objects
hadis_list = []
hadis_number = 1
hadis_per_cat = max(self.HADIS_PER_CATEGORY, self.MAX_HADIS_RECORDS // num_categories)
for idx, category in enumerate(leaf_categories):
# Stop if we've reached the limit
if hadis_count >= self.MAX_HADIS_RECORDS:
for category in leaf_categories:
if len(hadis_list) >= self.MAX_HADIS_RECORDS:
break
# Create hadis for this category
for i in range(hadis_per_cat):
if hadis_count >= self.MAX_HADIS_RECORDS:
for _ in range(hadis_per_cat):
if len(hadis_list) >= self.MAX_HADIS_RECORDS:
break
title = generate_hadis_title()
title = random.choice(RUSSIAN_HADIS_TITLES)
text = generate_hadis_text()
translation = text # Same as text since it's already in Russian
# Generate unique slug to avoid duplicates
opening = random.choice(RUSSIAN_HADIS_OPENINGS).rstrip(':')
# Pre-generate slug (bypass model's save method)
slug = generate_unique_slug('hadis-ru', hadis_number)
Hadis.objects.create(
hadis = Hadis(
category=category,
number=hadis_number,
slug=slug,
title=make_json_field(title),
title_narrator=make_json_field(random.choice(RUSSIAN_HADIS_OPENINGS).rstrip(':')),
title_narrator=make_json_field(opening),
description=make_json_field(f"Хадис номер {hadis_number} из категории {category.title[0]['text']}"),
text=text,
translation=make_json_field(translation),
translation=make_json_field(text),
status=True,
address=make_json_field(f"Том {random.randint(1, 10)}, страница {random.randint(1, 500)}"),
explanation=make_json_field(f"Этот хадис учит нас важности {title.lower()}."),
share_link=f"/hadis/{slug}",
)
hadis_list.append(hadis)
hadis_number += 1
hadis_count += 1
# Progress indicator every 5 categories
if idx % 5 == 0:
self.stdout.write(f" Progress: {hadis_count}/{self.MAX_HADIS_RECORDS} hadis created...")
# Bulk create in batches
total_created = 0
for i in range(0, len(hadis_list), self.BULK_BATCH_SIZE):
batch = hadis_list[i:i + self.BULK_BATCH_SIZE]
Hadis.objects.bulk_create(batch, batch_size=self.BULK_BATCH_SIZE)
total_created += len(batch)
self.stdout.write(f" Progress: {total_created}/{len(hadis_list)} hadis created...")
self.stdout.write(self.style.SUCCESS(f" Created {hadis_count} hadis entries"))
self.stdout.write(self.style.SUCCESS(f" Created {total_created} hadis entries"))
def print_statistics(self):
"""Print final statistics."""
@ -336,11 +381,12 @@ class Command(BaseCommand):
self.stdout.write(f"Hadis: {Hadis.objects.count()}")
# Show hadis per category stats
leaf_cats = []
for cat in HadisCategory.objects.all():
if not HadisCategory.objects.filter(parent=cat).exists():
leaf_cats.append(cat)
if leaf_cats:
leaf_cats = HadisCategory.objects.filter(children__isnull=True)
if leaf_cats.exists():
hadis_counts = [Hadis.objects.filter(category=cat).count() for cat in leaf_cats]
self.stdout.write(f"Hadis per leaf category: min={min(hadis_counts)}, max={max(hadis_counts)}, avg={sum(hadis_counts)/len(hadis_counts):.1f}")
if hadis_counts:
self.stdout.write(
f"Hadis per leaf category: min={min(hadis_counts)}, "
f"max={max(hadis_counts)}, avg={sum(hadis_counts)/len(hadis_counts):.1f}"
)
Loading…
Cancel
Save