Dovodi_Backend/apps/hadis/management/commands/import_legacy_data.py


								import os

								import json

								import csv

								from django.core.management.base import BaseCommand

								from django.core.files import File

								from django.db import transaction

								from django.conf import settings


								# Import all necessary models

								from apps.hadis.models import (

								    HadisCategory, HadisSect, HadisStatus, HadisTag, Hadis,

								    HadisCorrection, HadisReference, ReferenceImage, HadisTransmitter,

								    BookReference, BookReferenceImage, BookReferenceDocument, BookAuthor, BookSubjectArea,

								    Transmitters, NarratorLayer, TransmitterReliability, OpinionStatus,

								    TransmitterOpinion, TransmitterOriginalText, OriginalTextReference, OriginalTextReferenceImage,

								    HadisInterpretation, InterpretationReference, InterpretationReferenceImage, CorrectionReference, CorrectionReferenceImage

								)


								class Command(BaseCommand):

								    help = 'Import legacy Hadith data from JSON, CSV, and Media folders'


								    def add_arguments(self, parser):

								        parser.add_argument('base_dir', type=str, help='Absolute path to the "тестовая база данных" directory')


								    def wrap_lang(self, text, lang="ru"):

								        """Helper to format strings into the [ {'language_code': lang, 'text': text} ] schema.

								        Always returns a valid dictionary to bypass Django's blank=False validators."""

								        if text is None:

								            text = ""

								        return [{"language_code": lang, "text": str(text).strip()}]


								    @transaction.atomic

								    def handle(self, *args, **kwargs):

								        base_dir = kwargs['base_dir']


								        if not os.path.exists(base_dir):

								            self.stderr.write(self.style.ERROR(f'Directory not found: {base_dir}'))

								            return


								        self.stdout.write(self.style.SUCCESS(f'Starting import from: {base_dir}'))


								        # Paths

								        aut_ui_path = os.path.join(base_dir, 'AUT_UI.csv')

								        bib_path = os.path.join(base_dir, 'bib.csv')

								        narrators_path = os.path.join(base_dir, 'narrators.json')

								        tathir_path = os.path.join(base_dir, 'tathir.json')


								        # --- PHASE 1: SCHOLARS & BOOKS ---

								        self.stdout.write(self.style.WARNING('\n--- PHASE 1: Loading Scholars & Books ---'))


								        scholars_map = {}

								        if os.path.exists(aut_ui_path):

								            with open(aut_ui_path, 'r', encoding='utf-8') as f:

								                reader = csv.reader(f)

								                for row in reader:

								                    if len(row) >= 3:

								                        scholars_map[row[0].strip()] = {

								                            "ar": row[1].strip(),

								                            "ru": row[2].strip()

								                        }

								            self.stdout.write(f'Loaded {len(scholars_map)} scholars into memory.')


								        if os.path.exists(bib_path):

								            with open(bib_path, 'r', encoding='utf-8') as f:

								                reader = csv.reader(f)

								                for row in reader:

								                    if len(row) < 5: continue


								                    base_legacy_id = row[0].strip()

								                    author_name = row[2].strip()

								                    base_title = row[3].strip()


								                    # Extract total volumes (Column 11 / Index 10)

								                    vol_str = row[10].strip() if len(row) > 10 else ''

								                    try:

								                        total_vols = int(vol_str) if vol_str.isdigit() else 1

								                    except ValueError:

								                        total_vols = 1


								                    # Create a BookReference for EVERY volume

								                    for v in range(1, total_vols + 1):

								                        # Generate unique ID and Title for multi-volume books

								                        is_multi_vol = total_vols > 1

								                        legacy_id = f"{base_legacy_id}-v{v}" if is_multi_vol else base_legacy_id

								                        title_text = f"{base_title} (Vol {v})" if is_multi_vol else base_title


								                        book, _ = BookReference.objects.update_or_create(

								                            legacy_id=legacy_id,

								                            defaults={

								                                'title': self.wrap_lang(title_text),

								                                'number_of_volumes': total_vols,

								                                'volume': str(v),

								                                'year_of_publication': row[9].strip() if len(row) > 9 else '',

								                                'source_url': row[11].strip() if len(row) > 11 else '',

								                                'description': self.wrap_lang(row[12].strip() if len(row) > 12 else ''),

								                                'publisher': self.wrap_lang(row[5].strip() if len(row) > 5 else ''),

								                                'language': self.wrap_lang('')

								                            }

								                        )


								                        # Author

								                        if author_name:

								                            author, _ = BookAuthor.objects.get_or_create(name=self.wrap_lang(author_name))

								                            book.authors.add(author)


								                        # Scan Book Folder for Specific Volume Images and PDFs

								                        book_folder = os.path.join(base_dir, 'books', base_legacy_id)

								                        if os.path.exists(book_folder):

								                            vol_num_str = str(v)

								                            vol_padded_str = str(v).zfill(2) # "1" -> "01"


								                            for root, _, files in os.walk(book_folder):

								                                folder_name = os.path.basename(root)


								                                for file in files:

								                                    file_path = os.path.join(root, file)

								                                    file_lower = file.lower()


								                                    # Attach PDF if it matches "1.pdf" or "01.pdf"

								                                    if file_lower.endswith('.pdf'):

								                                        if file_lower in [f"{vol_num_str}.pdf", f"{vol_padded_str}.pdf"] or not is_multi_vol:

								                                            with open(file_path, 'rb') as doc_f:

								                                                doc = BookReferenceDocument(book_reference=book, volume=vol_num_str, title=file)

								                                                doc.file.save(file, File(doc_f), save=True)


								                                    # Attach Images if they are in folder "1" or "01"

								                                    elif file_lower.endswith(('.png', '.jpg', '.jpeg', '.gif')):

								                                        if folder_name in [vol_num_str, vol_padded_str] or not is_multi_vol:

								                                            with open(file_path, 'rb') as img_f:

								                                                img = BookReferenceImage(book_reference=book, volume=vol_num_str)

								                                                img.image.save(file, File(img_f), save=True)


								            self.stdout.write(self.style.SUCCESS('Books (split by volumes) loaded successfully.'))


								        # --- PHASE 2: NARRATORS ---

								        self.stdout.write(self.style.WARNING('\n--- PHASE 2: Loading Narrators ---'))

								        if os.path.exists(narrators_path):

								            with open(narrators_path, 'r', encoding='utf-8') as f:

								                n_data_list = json.load(f).get('narrators', [])


								            for n_data in n_data_list:

								                legacy_id = n_data.get('id')

								                legacy_number = int(n_data.get('narrator_number')) if str(n_data.get('narrator_number')).isdigit() else None


								                info = n_data.get('info', {})

								                ar_info = info.get('arabic', {})


								                reliability, _ = TransmitterReliability.objects.get_or_create(

								                    title=self.wrap_lang(n_data.get('reliability', 'Unknown'))

								                )


								                generation = int(n_data.get('generation')) if str(n_data.get('generation')).isdigit() else None

								                if generation:

								                    NarratorLayer.objects.get_or_create(

								                        number=generation,

								                        defaults={

								                            'name': self.wrap_lang(f'Layer {generation}'),

								                            'description': self.wrap_lang('')

								                        }

								                    )


								                # Create Transmitter

								                transmitter, _ = Transmitters.objects.update_or_create(

								                    legacy_id=legacy_id,

								                    defaults={

								                        'legacy_number': legacy_number,

								                        'full_name': self.wrap_lang(info.get('name', ''), 'ru') + self.wrap_lang(ar_info.get('name', ''), 'ar'),

								                        'known_as': self.wrap_lang(info.get('known_name', ''), 'ru') + self.wrap_lang(ar_info.get('known_name', ''), 'ar'),

								                        'kunya': self.wrap_lang(info.get('kunya', ''), 'ru') + self.wrap_lang(ar_info.get('kunya', ''), 'ar'),

								                        'nickname': self.wrap_lang(info.get('nickname', ''), 'ru') + self.wrap_lang(ar_info.get('nickname', ''), 'ar'),

								                        'origin': self.wrap_lang(info.get('origin', ''), 'ru') + self.wrap_lang(ar_info.get('origin', ''), 'ar'),

								                        'lived_in': self.wrap_lang(info.get('city_of_residence', ''), 'ru') + self.wrap_lang(ar_info.get('city_of_residence', ''), 'ar'),

								                        'died_in': self.wrap_lang(info.get('city_of_death', ''), 'ru') + self.wrap_lang(ar_info.get('city_of_death', ''), 'ar'),

								                        'description': self.wrap_lang(''),

								                        'generation': generation,

								                        'reliability': reliability,

								                        'in_sahih_bukhari': n_data.get('transmitted_to_bukhari', False),

								                        'in_sahih_muslim': n_data.get('transmitted_to_muslim', False),

								                        'relatives_raw': info.get('relatives', {})

								                    }

								                )


								                # Opinions

								                for op in n_data.get('strengthened_weakened', {}).get('review', []):

								                    author_ui = op.get('author_ui')

								                    scholar_data = scholars_map.get(author_ui, {"ar": author_ui, "ru": author_ui})

								                    TransmitterOpinion.objects.get_or_create(

								                        transmitter=transmitter,

								                        opinion_text=self.wrap_lang(op.get('quote_original', ''), 'ar') + self.wrap_lang(op.get('quote_translated', ''), 'ru'),

								                        scholar_name=self.wrap_lang(scholar_data['ar'], 'ar') + self.wrap_lang(scholar_data['ru'], 'ru')

								                    )


								                # Original Texts

								                for text_data in n_data.get('excerpts', []):

								                    orig_text, _ = TransmitterOriginalText.objects.get_or_create(

								                        transmitter=transmitter,

								                        title=self.wrap_lang(text_data.get('title')),

								                        text=self.wrap_lang(text_data.get('text'), 'ar'),

								                        translation=self.wrap_lang(text_data.get('translation'), 'ru')

								                    )


								                    for ed in text_data.get('editions', []):

								                        book_ref = self._get_book_volume(ed.get('book_id'), ed.get('volume'))

								                        ref_obj, _ = OriginalTextReference.objects.get_or_create(

								                            original_text=orig_text, book_reference=book_ref,

								                            volume=ed.get('volume'), page=ed.get('page'), url=ed.get('url')

								                        )


								                        folder = ed.get('screenshots_folder')

								                        if folder:

								                            self._attach_images(os.path.join(base_dir, 'screens_trx', legacy_id, folder), OriginalTextReferenceImage, ref_obj)


								            self.stdout.write(self.style.SUCCESS('Narrators loaded successfully.'))


								        # --- PHASE 3: HADITHS (Arguments, Corrections, Interpretations) ---

								        self.stdout.write(self.style.WARNING('\n--- PHASE 3: Loading Hadiths ---'))

								        default_sect, _ = HadisSect.objects.get_or_create(

								            sect_type='sunni',

								            defaults={

								                'title': self.wrap_lang('Sunni'),

								                'description': self.wrap_lang('')

								            }

								        )


								        if os.path.exists(tathir_path):

								            with open(tathir_path, 'r', encoding='utf-8') as f:

								                materials = json.load(f).get('materials', [])


								            # Map corrections to their parent hadiths

								            correction_to_hadith_map = {}

								            for item in materials:

								                if item.get('type') == 'arguments':

								                    for conf_id in item.get('confirmation', []):

								                        correction_to_hadith_map[conf_id] = item.get('id')


								            for item in materials:

								                i_type = item.get('type')


								                # A: BASE HADITHS

								                if i_type == 'arguments':

								                    cat_str = item.get('category', [''])[0]

								                    category, _ = HadisCategory.objects.get_or_create(

								                        title=self.wrap_lang(cat_str),

								                        defaults={

								                            'sect': default_sect,

								                            'source_type': item.get('subtype', 'hadith') or 'hadith',

								                            'description': self.wrap_lang('')

								                        }

								                    )

								                    status, _ = HadisStatus.objects.get_or_create(

								                        title=self.wrap_lang(item.get('authenticity', '')),

								                        defaults={'description': self.wrap_lang('')}

								                    )


								                    hadis, _ = Hadis.objects.update_or_create(

								                        legacy_id=item.get('id'),

								                        defaults={

								                            'category': category,

								                            'hadis_status': status,

								                            'title': self.wrap_lang(item.get('aliases', [''])[0] if item.get('aliases') else ''),

								                            'title_narrator': self.wrap_lang(item.get('aliases', [''])[0] if item.get('aliases') else ''),

								                            'description': self.wrap_lang(''),

								                            'explanation': self.wrap_lang(''),

								                            'address': self.wrap_lang(''),

								                            'hadis_status_text': self.wrap_lang(''),

								                            'text': item.get('original_text', ''),

								                            'translation': self.wrap_lang(item.get('translation', ''), 'ru')

								                        }

								                    )


								                    raw_chain = item.get('chain', [])

								                    chain_arrays = []


								                    if raw_chain:

								                        # Normalize: If it's a flat list of ints, wrap it in a list so it's a 2D array

								                        if isinstance(raw_chain[0], int):

								                            chain_arrays = [raw_chain]

								                        else:

								                            chain_arrays = raw_chain


								                    for chain_idx, narrator_ids in enumerate(chain_arrays):

								                        for order_idx, n_id in enumerate(narrator_ids):

								                            transmitter = Transmitters.objects.filter(legacy_number=n_id).first()

								                            if transmitter:

								                                layer = NarratorLayer.objects.filter(number=transmitter.generation).first()

								                                HadisTransmitter.objects.get_or_create(

								                                    hadis=hadis, transmitter=transmitter, chain_index=chain_idx, order=order_idx,

								                                    defaults={'narrator_layer': layer, 'status': transmitter.reliability}

								                                )

								                    # Editions & Images

								                    for ed in item.get('editions', []):

								                        book = self._get_book_volume(ed.get('book_id'), ed.get('volume'))

								                        href, _ = HadisReference.objects.get_or_create(

								                            hadis=hadis, book_reference=book,

								                            defaults={'hadith_number': str(ed.get('hadith_number', '')), 'description': self.wrap_lang('')}

								                        )

								                        if ed.get('screenshots_folder'):

								                            self._attach_images(os.path.join(base_dir, 'screens', item.get('id'), ed.get('screenshots_folder')), ReferenceImage, href  , field_name='thumbnail')


								                # B: CORRECTIONS

								                elif i_type == 'authenticity_analysis':

								                    parent_id = correction_to_hadith_map.get(item.get('id'))

								                    parent_hadith = Hadis.objects.filter(legacy_id=parent_id).first()


								                    if parent_hadith:

								                        corr, _ = HadisCorrection.objects.get_or_create(

								                            hadis=parent_hadith, legacy_id=item.get('id'),

								                            defaults={

								                                'title': self.wrap_lang(''),

								                                'text': item.get('original_text', ''), # Directly mapped to TextField

								                                'translation': self.wrap_lang(item.get('translation', ''), 'ru')

								                            }

								                        )

								                        for ed in item.get('editions', []):

								                            book = self._get_book_volume(ed.get('book_id'), ed.get('volume'))

								                            cref, _ = CorrectionReference.objects.get_or_create(correction=corr, book_reference=book, defaults={'hadith_number': str(ed.get('hadith_number', ''))})

								                            if ed.get('screenshots_folder'):

								                                self._attach_images(os.path.join(base_dir, 'screens', item.get('id'), ed.get('screenshots_folder')), CorrectionReferenceImage, cref)


								                # C: INTERPRETATIONS

								                elif i_type == 'interpretation':

								                    cat_str = item.get('category', [''])[0] if item.get('category') else ''

								                    category = HadisCategory.objects.filter(title__contains=[{'text': cat_str}]).first()


								                    if category:

								                        interp, _ = HadisInterpretation.objects.get_or_create(

								                            category=category, legacy_id=item.get('id'),

								                            defaults={

								                                'title': self.wrap_lang(''),

								                                'text': item.get('original_text', ''),

								                                'translation': self.wrap_lang(item.get('translation', ''), 'ru')

								                            }

								                        )

								                        for ed in item.get('editions', []):

								                            book = self._get_book_volume(ed.get('book_id'), ed.get('volume'))

								                            iref, _ = InterpretationReference.objects.get_or_create(interpretation=interp, book_reference=book, defaults={'hadith_number': str(ed.get('hadith_number', ''))})

								                            if ed.get('screenshots_folder'):

								                                self._attach_images(os.path.join(base_dir, 'screens', item.get('id'), ed.get('screenshots_folder')), InterpretationReferenceImage, iref)


								            self.stdout.write(self.style.SUCCESS('\nAll Hadiths, Corrections, and Interpretations Imported Successfully!'))


								    def _get_book_volume(self, book_id, volume_str):

								        """Finds the specific volume of a book, with fallbacks."""

								        if not book_id: return None


								        # 1. Try to find specific volume (e.g., uuid-v2)

								        if volume_str:

								            vol_clean = ''.join(filter(str.isdigit, str(volume_str))) # extracts "2" from "Vol 2"

								            if vol_clean:

								                book = BookReference.objects.filter(legacy_id=f"{book_id}-v{vol_clean}").first()

								                if book: return book


								        # 2. Fallback: Find the base book (single volume) or the first volume available

								        return BookReference.objects.filter(legacy_id__startswith=book_id).first()


								    def _attach_images(self, folder_path, ImageModelClass, reference_instance, field_name='image'):

								        """Helper to safely scan a folder and attach images to a specific reference instance."""

								        if os.path.exists(folder_path):

								            for i, filename in enumerate(sorted(os.listdir(folder_path))):

								                if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.gif')):

								                    file_path = os.path.join(folder_path, filename)

								                    with open(file_path, 'rb') as f:

								                        img_obj = ImageModelClass(reference=reference_instance, priority=i)


								                        # Dynamically grab the correct field ('image' or 'thumbnail')

								                        image_field = getattr(img_obj, field_name)

								                        image_field.save(filename, File(f), save=True)