import os
import pandas as pd
from dotenv import load_dotenv
from agno.knowledge.knowledge import Knowledge
from agno.vectordb.qdrant import Qdrant
from agno.vectordb.search import SearchType
import sys
from pathlib import Path

# -----------------------------------------------------------------------------
# DYNAMIC PATH SETUP
# This finds the project root automatically, whether run from root or tests/ folder
# -----------------------------------------------------------------------------
# Get the absolute path of this test file
current_file = Path(__file__).resolve()

# Find the 'src' directory by looking up the tree
# We look for the folder that contains 'src'
root_path = current_file.parent
while not (root_path / 'src').exists():
    if root_path == root_path.parent: # Reached system root
        raise FileNotFoundError("Could not find project root containing 'src' folder")
    root_path = root_path.parent

# Add the project root to Python path
sys.path.insert(0, str(root_path))
print(f"🔧 Added project root to path: {root_path}")
# -----------------------------------------------------------------------------

from src.knowledge.embedding_factory import EmbeddingFactory
load_dotenv()

# --- 1. CONFIGURATION ---
qdrant_host = os.getenv("QDRANT_HOST")
qdrant_port = os.getenv("QDRANT_PORT")
qdrant_url = f"http://{qdrant_host}:{qdrant_port}"
collection_name = os.getenv("BASE_COLLECTION_NAME")
qdrant_api_key = os.getenv("QDRANT_API_KEY")
# Matches the embedder used in app.py
embed_factory = EmbeddingFactory()
local_embedder = embed_factory.get_embedder()
collection_name = f"{collection_name}_{local_embedder.id}_hybrid"

print(f"****************************************************************")
print(f"Collection name: {collection_name}")

# Initialize Qdrant Vector DB
vector_db = Qdrant(
    collection=collection_name,   # positional or keyword is fine here
    url=qdrant_url,
    embedder=local_embedder,
    timeout=30.0,
    api_key=qdrant_api_key,
    search_type=SearchType.hybrid
)

knowledge_base = Knowledge(vector_db=vector_db)


def ingest_hadiths(file_path: str):
    print(f"📖 Processing Hadiths: {file_path}")
    df = pd.read_excel(file_path)
    count = 0

    for _, row in df.iterrows():
        content = (
            f"HADITH TYPE: HADITH\n"
            f"TITLE: {row.get('Title', '')}\n"
            f"ARABIC: {row.get('Arabic Text', '')}\n"
            f"TRANSLATION: {row.get('Translation', '')}\n"
            f"SOURCE: {row.get('Source Info', '')}"
        )
        knowledge_base.add_content(text_content=content)
        count += 1

    print(f"✅ Successfully ingested {count} Hadiths into Qdrant.")


def ingest_articles(file_path: str):
    print(f"📄 Processing Articles: {file_path}")
    df = pd.read_excel(file_path)
    count = 0

    for _, row in df.iterrows():
        content = (
            f"ARTICLE TYPE: ARTICLE\n"
            f"TITLE: {row.get('Title', '')}\n"
            f"AUTHOR: {row.get('Author', '')}\n"
            f"CONTENT: {row.get('Content', '')}\n"
            f"URL: {row.get('URL', '')}"
        )
        knowledge_base.add_content(text_content=content)
        count += 1

    print(f"✅ Successfully ingested {count} Articles into Qdrant.")


if __name__ == "__main__":
    print("--- 🚀 Starting Data Ingestion to Qdrant ---")
    SCRIPTS_DIR = os.path.dirname(os.path.abspath(__file__))

    # 2. Go up one level to the Project Root
    PROJECT_ROOT = os.path.dirname(SCRIPTS_DIR)

    # 3. Build the path to the data folder
    DATA_DIR = os.path.join(PROJECT_ROOT, "data", "raw")

    # 4. Define your file paths
    HADITH_FILE = os.path.join(DATA_DIR, "hadiths_data.xlsx")
    ARTICLE_FILE = os.path.join(DATA_DIR, "dovodi_articles.xlsx")

    try:
        # Ingest Hadiths
        if os.path.exists(HADITH_FILE):
            ingest_hadiths(HADITH_FILE)
        else:
            print(f"⚠️ {HADITH_FILE} not found!")

        # Ingest Articles
        if os.path.exists(ARTICLE_FILE):
            ingest_articles(ARTICLE_FILE)
        else:
            print(f"⚠️ {ARTICLE_FILE} not found!")

        print("--- ✨ Ingestion Complete ---")
    except Exception as e:
        print(f"❌ Error during ingestion: {e}")