import json import re import sys def detect_language(text): """ Detect language code based on text content. - Cyrillic characters -> 'ru' - Arabic/Persian characters -> 'fa' - Default -> 'en' """ if not text or not isinstance(text, str): return 'en' # Check for Cyrillic (Russian) if re.search(r'[а-яА-Я]', text): return 'ru' # Check for Arabic/Persian script if re.search(r'[\u0600-\u06FF]', text): return 'fa' # Default to English return 'en' def reformat_data(input_file, output_file): print(f"📖 Reading {input_file}...") try: with open(input_file, 'r', encoding='utf-8') as f: data = json.load(f) except FileNotFoundError: print(f"❌ Error: File '{input_file}' not found.") return except json.JSONDecodeError as e: print(f"❌ Error: Failed to decode JSON. {e}") return processed_count = 0 # Configuration based on your request TARGETS = { 'hadis.narratorlayer': [ 'name', 'description' ], 'hadis.transmitters': [ 'full_name', 'kunya', 'known_as', 'nickname', 'origin', 'lived_in', 'died_in', 'description' ], 'hadis.transmitteropinion': [ 'scholar_name', 'opinion_text' ], 'hadis.transmitteroriginaltext': [ 'title', 'text' ], } for record in data: model = record.get('model') if model in TARGETS: fields = record.get('fields', {}) target_fields = TARGETS[model] for field in target_fields: if field in fields: original_value = fields[field] # Case 1: Value is None/Null -> Empty List if original_value is None: fields[field] = [] continue # Case 2: Value is String -> Convert to JSON Format if isinstance(original_value, str): # Detect language lang_code = detect_language(original_value) # Reformat fields[field] = [ { "text": original_value, "language_code": lang_code } ] # Case 3: Already a list -> Skip elif isinstance(original_value, list): continue processed_count += 1 print(f"✅ Processed {processed_count} records.") try: with open(output_file, 'w', encoding='utf-8') as f: json.dump(data, f, indent=2, ensure_ascii=False) print(f"💾 Saved reformatted data to: {output_file}") except Exception as e: print(f"❌ Error writing output file: {e}") if __name__ == "__main__": # Input/Output filenames INPUT_FILE = "transmitters_backup.json" OUTPUT_FILE = "transmitters_reformatted.json" reformat_data(INPUT_FILE, OUTPUT_FILE)