You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

420 lines
18 KiB

import random
import uuid
from django.core.management.base import BaseCommand
from django.db import transaction, connection
from apps.hadis.models import Hadis, HadisCategory, HadisSect
# Russian word patterns for random combination
RUSSIAN_CATEGORY_PREFIXES = [
"Основы", "Принципы", "Учение", "Заповеди", "Наставления",
"Мудрость", "Знания", "Истина", "Путь", "Свет",
"Благочестие", "Праведность", "Духовность", "Вера", "Надежда",
]
RUSSIAN_CATEGORY_TOPICS = [
"молитвы", "поста", "милостыни", "паломничества", "джихада",
"брака", "семьи", "торговли", "справедливости", "терпения",
"благодарности", "покаяния", "прощения", "любви", "мира",
"знания", "мудрости", "веры", "надежды", "смирения",
]
RUSSIAN_CATEGORY_SUFFIXES = [
"в исламе", "для верующих", "и праведность", "и благочестие",
"и духовность", "и истина", "и свет", "и путь",
]
RUSSIAN_HADIS_OPENINGS = [
"Пророк (мир ему) сказал:",
"Имам Али (мир ему) сказал:",
"Имам Садик (мир ему) сказал:",
"Имам Хусейн (мир ему) сказал:",
"Передается от Пророка (мир ему):",
"Рассказывает имам Бакир (мир ему):",
"Сообщается от имама Казима (мир ему):",
]
RUSSIAN_HADIS_BODIES = [
"Лучший из вас тот, кто учится и учит других.",
"Ищите знания от колыбели до могилы.",
"Терпение - ключ к облегчению.",
"Молитва - столп религии.",
"Знание - свет для сердца.",
"Доброта к родителям - обязанность верующего.",
"Справедливость - основа власти.",
"Правда ведет к благочестию.",
"Смирение возвышает человека.",
"Благодарность увеличивает благословения.",
"Прощение - признак силы.",
"Терпение приносит награду.",
"Вера укрепляется добрыми делами.",
"Знание без действия бесполезно.",
"Лучшее богатство - довольство.",
"Самый сильный - тот, кто владеет собой в гневе.",
"Сосед имеет права на тебя.",
"Улыбка - это милостыня.",
"Чистота - половина веры.",
"Намерение определяет ценность дела.",
]
RUSSIAN_HADIS_ENDINGS = [
"И это истина от Всевышнего.",
"Так учит нас ислам.",
"Это путь праведных.",
"Запомните это наставление.",
"Следуйте этому пути.",
"Это мудрость пророков.",
"Берегите это знание.",
"",
]
RUSSIAN_HADIS_TITLES = [
"О терпении и награде",
"О знании и мудрости",
"О молитве и поклонении",
"О справедливости и праведности",
"О семье и воспитании",
"О доброте и милосердии",
"О вере и благочестии",
"О покаянии и прощении",
"О благодарности Аллаху",
"О смирении и скромности",
"О правде и честности",
"О соседях и обществе",
"О торговле и справедливости",
"О чистоте и гигиене",
"О намерении и искренности",
]
RUSSIAN_SECT_DATA = {
'shia': {
'title': 'Шиизм',
'description': 'Шиитское направление ислама, следующее за семьей Пророка (мир ему и его семье).',
},
'sunni': {
'title': 'Суннизм',
'description': 'Суннитское направление ислама, следующее сунне Пророка (мир ему).',
},
}
def make_json_field(text, lang='ru'):
"""Create JSON field in the format used by the models."""
return [{'text': text, 'language_code': lang}]
def generate_unique_slug(prefix, number):
"""Generate a unique slug with prefix and number."""
unique_suffix = uuid.uuid4().hex[:8]
return f"{prefix}-{number}-{unique_suffix}"
def generate_category_title():
"""Generate a random Russian category title."""
prefix = random.choice(RUSSIAN_CATEGORY_PREFIXES)
topic = random.choice(RUSSIAN_CATEGORY_TOPICS)
suffix = random.choice(RUSSIAN_CATEGORY_SUFFIXES) if random.random() > 0.5 else ""
return f"{prefix} {topic} {suffix}".strip()
def generate_category_description(title):
"""Generate a description based on the title."""
descriptions = [
f"Раздел посвящен теме: {title}. Здесь собраны важные хадисы и наставления.",
f"В этом разделе вы найдете хадисы о {title.lower()}.",
f"Категория содержит материалы по теме: {title}.",
f"Изучайте {title.lower()} через достоверные хадисы.",
]
return random.choice(descriptions)
def generate_hadis_text():
"""Generate a random hadis text in Russian."""
opening = random.choice(RUSSIAN_HADIS_OPENINGS)
body = random.choice(RUSSIAN_HADIS_BODIES)
ending = random.choice(RUSSIAN_HADIS_ENDINGS)
return f"{opening} {body} {ending}".strip()
class Command(BaseCommand):
help = 'Seed Russian language data for HadisSect, HadisCategory (3 levels), and Hadis (up to 500 records)'
# Configuration constants
MAX_HADIS_RECORDS = 500
HADIS_PER_CATEGORY = 10
BULK_BATCH_SIZE = 100 # Batch size for bulk_create
def add_arguments(self, parser):
parser.add_argument(
'--clear',
action='store_true',
help='Clear existing data before seeding',
)
parser.add_argument(
'--force',
action='store_true',
help='Force seeding even if data already exists',
)
def handle(self, *args, **options):
self.stdout.write(self.style.WARNING('=== Starting Russian Data Seeding (Optimized) ==='))
# Check if data already exists (prevent re-running in loops)
existing_hadis = Hadis.objects.count()
existing_categories = HadisCategory.objects.count()
if existing_hadis >= self.MAX_HADIS_RECORDS and not options['force'] and not options['clear']:
self.stdout.write(self.style.SUCCESS(
f'Data already seeded ({existing_hadis} hadis, {existing_categories} categories). '
'Use --force to override or --clear to reset.'
))
return
if options['clear']:
self.stdout.write(self.style.WARNING('Clearing existing data...'))
Hadis.objects.all().delete()
HadisCategory.objects.all().delete()
HadisSect.objects.all().delete()
self.stdout.write(self.style.SUCCESS('Existing data cleared.'))
try:
with transaction.atomic():
# Step 1: Create Sects
sects = self.create_sects()
# Step 2: Create Categories (3 levels) - optimized with batch creation per level
leaf_categories = self.create_categories_optimized(sects)
# Step 3: Create Hadis using bulk_create
self.create_hadis_bulk(leaf_categories)
# Step 4: Rebuild MPTT tree structure
self.stdout.write('Rebuilding MPTT tree...')
HadisCategory.objects.rebuild()
self.print_statistics()
self.stdout.write(self.style.SUCCESS('=== Russian Data Seeding Complete ==='))
except Exception as e:
self.stdout.write(self.style.ERROR(f'Error during seeding: {str(e)}'))
raise
def create_sects(self):
"""Create or update HadisSect entries."""
self.stdout.write('Creating sects...')
sects = {}
for sect_type, data in RUSSIAN_SECT_DATA.items():
sect, created = HadisSect.objects.update_or_create(
sect_type=sect_type,
defaults={
'title': make_json_field(data['title']),
'description': make_json_field(data['description']),
'is_active': True,
'order': 1 if sect_type == 'shia' else 2,
}
)
sects[sect_type] = sect
status = 'Created' if created else 'Updated'
self.stdout.write(f" {status} sect: {data['title']}")
return sects
def create_categories_optimized(self, sects):
"""Create 3-level category tree using batch operations per level."""
self.stdout.write('Creating category tree (3 levels) - optimized...')
source_types = ['hadith', 'quran']
all_leaf_categories = []
slug_counter = 0
# Level 1: Root categories - create in batch
level1_categories = []
level1_metadata = [] # Store metadata for creating children
for sect_type, sect in sects.items():
for source_type in source_types:
for i in range(2):
title = generate_category_title()
slug_counter += 1
slug = f"cat-l1-{slug_counter}-{uuid.uuid4().hex[:6]}"
cat = HadisCategory(
parent=None,
sect=sect,
source_type=source_type,
title=make_json_field(title),
description=make_json_field(generate_category_description(title)),
order=i + 1,
slug=slug,
)
level1_categories.append(cat)
level1_metadata.append({
'sect': sect,
'source_type': source_type,
})
# Bulk create level 1 - we need to save individually due to MPTT
# But we can disable MPTT signals temporarily
self.stdout.write(f' Creating {len(level1_categories)} level 1 categories...')
for cat in level1_categories:
cat.save()
# Level 2: Child categories
level2_categories = []
level2_metadata = []
for idx, parent in enumerate(level1_categories):
meta = level1_metadata[idx]
for j in range(2):
title = generate_category_title()
slug_counter += 1
slug = f"cat-l2-{slug_counter}-{uuid.uuid4().hex[:6]}"
cat = HadisCategory(
parent=parent,
sect=meta['sect'],
source_type=meta['source_type'],
title=make_json_field(title),
description=make_json_field(generate_category_description(title)),
order=j + 1,
slug=slug,
)
level2_categories.append(cat)
level2_metadata.append({
'sect': meta['sect'],
'source_type': meta['source_type'],
})
self.stdout.write(f' Creating {len(level2_categories)} level 2 categories...')
for cat in level2_categories:
cat.save()
# Level 3: Leaf categories
for idx, parent in enumerate(level2_categories):
meta = level2_metadata[idx]
num_level3 = random.randint(2, 3)
for k in range(num_level3):
title = generate_category_title()
slug_counter += 1
slug = f"cat-l3-{slug_counter}-{uuid.uuid4().hex[:6]}"
cat = HadisCategory(
parent=parent,
sect=meta['sect'],
source_type=meta['source_type'],
title=make_json_field(title),
description=make_json_field(generate_category_description(title)),
order=k + 1,
slug=slug,
)
cat.save()
all_leaf_categories.append(cat)
total_categories = HadisCategory.objects.count()
self.stdout.write(self.style.SUCCESS(f" Created {total_categories} categories total"))
self.stdout.write(self.style.SUCCESS(f" Leaf categories: {len(all_leaf_categories)}"))
return all_leaf_categories
def create_hadis_bulk(self, leaf_categories):
"""Create hadis records using bulk_create for maximum performance."""
self.stdout.write(f'Creating hadis entries using bulk_create (target: {self.MAX_HADIS_RECORDS})...')
num_categories = len(leaf_categories)
if num_categories == 0:
self.stdout.write(self.style.WARNING('No leaf categories found!'))
return
# Calculate distribution: minimum per category, then distribute remainder
min_per_category = self.HADIS_PER_CATEGORY
base_total = min_per_category * num_categories
if base_total > self.MAX_HADIS_RECORDS:
# If minimum would exceed max, reduce proportionally
min_per_category = self.MAX_HADIS_RECORDS // num_categories
remainder = self.MAX_HADIS_RECORDS % num_categories
else:
# We can give minimum to all, distribute remainder
remainder = self.MAX_HADIS_RECORDS - base_total
# Create distribution list
distribution = [min_per_category] * num_categories
# Distribute remainder randomly across categories
if remainder > 0:
indices = random.sample(range(num_categories), min(remainder, num_categories))
for idx in indices:
distribution[idx] += 1
# If remainder is larger than num_categories, distribute multiple times
remaining = remainder - len(indices)
while remaining > 0:
for idx in range(num_categories):
if remaining <= 0:
break
distribution[idx] += 1
remaining -= 1
self.stdout.write(f' Distribution: {num_categories} categories, '
f'min={min(distribution)}, max={max(distribution)}, '
f'total={sum(distribution)}')
# Pre-generate all hadis objects
hadis_list = []
hadis_number = 1
for idx, category in enumerate(leaf_categories):
count_for_this_category = distribution[idx]
for _ in range(count_for_this_category):
title = random.choice(RUSSIAN_HADIS_TITLES)
text = generate_hadis_text()
opening = random.choice(RUSSIAN_HADIS_OPENINGS).rstrip(':')
# Pre-generate slug (bypass model's save method)
slug = generate_unique_slug('hadis-ru', hadis_number)
hadis = Hadis(
category=category,
number=hadis_number,
slug=slug,
title=make_json_field(title),
title_narrator=make_json_field(opening),
description=make_json_field(f"Хадис номер {hadis_number} из категории {category.title[0]['text']}"),
text=text,
translation=make_json_field(text),
status=True,
address=make_json_field(f"Том {random.randint(1, 10)}, страница {random.randint(1, 500)}"),
explanation=make_json_field(f"Этот хадис учит нас важности {title.lower()}."),
share_link=f"/hadis/{slug}",
)
hadis_list.append(hadis)
hadis_number += 1
# Bulk create in batches
total_created = 0
for i in range(0, len(hadis_list), self.BULK_BATCH_SIZE):
batch = hadis_list[i:i + self.BULK_BATCH_SIZE]
Hadis.objects.bulk_create(batch, batch_size=self.BULK_BATCH_SIZE)
total_created += len(batch)
self.stdout.write(f" Progress: {total_created}/{len(hadis_list)} hadis created...")
self.stdout.write(self.style.SUCCESS(f" Created {total_created} hadis entries"))
def print_statistics(self):
"""Print final statistics."""
self.stdout.write("\n=== Statistics ===")
self.stdout.write(f"Sects: {HadisSect.objects.count()}")
self.stdout.write(f"Categories: {HadisCategory.objects.count()}")
self.stdout.write(f"Hadis: {Hadis.objects.count()}")
# Show hadis per category stats
leaf_cats = HadisCategory.objects.filter(children__isnull=True)
if leaf_cats.exists():
hadis_counts = [Hadis.objects.filter(category=cat).count() for cat in leaf_cats]
if hadis_counts:
self.stdout.write(
f"Hadis per leaf category: min={min(hadis_counts)}, "
f"max={max(hadis_counts)}, avg={sum(hadis_counts)/len(hadis_counts):.1f}"
)