imam-reza-agent/scripts/ingest_excel.py


								import os

								import pandas as pd

								from dotenv import load_dotenv

								from agno.knowledge.knowledge import Knowledge

								from agno.vectordb.qdrant import Qdrant

								from agno.vectordb.search import SearchType

								import sys

								from pathlib import Path


								# -----------------------------------------------------------------------------

								# DYNAMIC PATH SETUP

								# This finds the project root automatically, whether run from root or tests/ folder

								# -----------------------------------------------------------------------------

								# Get the absolute path of this test file

								current_file = Path(__file__).resolve()


								# Find the 'src' directory by looking up the tree

								# We look for the folder that contains 'src'

								root_path = current_file.parent

								while not (root_path / 'src').exists():

								    if root_path == root_path.parent: # Reached system root

								        raise FileNotFoundError("Could not find project root containing 'src' folder")

								    root_path = root_path.parent


								# Add the project root to Python path

								sys.path.insert(0, str(root_path))

								print(f"🔧 Added project root to path: {root_path}")

								# -----------------------------------------------------------------------------


								from src.knowledge.embedding_factory import EmbeddingFactory

								load_dotenv()


								# --- 1. CONFIGURATION ---

								qdrant_host = os.getenv("QDRANT_HOST")

								qdrant_port = os.getenv("QDRANT_PORT")

								qdrant_url = f"http://{qdrant_host}:{qdrant_port}"

								collection_name = os.getenv("BASE_COLLECTION_NAME")

								qdrant_api_key = os.getenv("QDRANT_API_KEY")

								# Matches the embedder used in app.py

								embed_factory = EmbeddingFactory()

								local_embedder = embed_factory.get_embedder()

								collection_name = f"{collection_name}_{local_embedder.id}_hybrid"


								print(f"****************************************************************")

								print(f"Collection name: {collection_name}")


								# Initialize Qdrant Vector DB

								vector_db = Qdrant(

								    collection=collection_name,   # positional or keyword is fine here

								    url=qdrant_url,

								    embedder=local_embedder,

								    timeout=30.0,

								    api_key=qdrant_api_key,

								    search_type=SearchType.hybrid

								)


								knowledge_base = Knowledge(vector_db=vector_db)


								def ingest_hadiths(file_path: str):

								    print(f"📖 Processing Hadiths: {file_path}")

								    df = pd.read_excel(file_path)

								    count = 0


								    for _, row in df.iterrows():

								        content = (

								            f"HADITH TYPE: HADITH\n"

								            f"TITLE: {row.get('Title', '')}\n"

								            f"ARABIC: {row.get('Arabic Text', '')}\n"

								            f"TRANSLATION: {row.get('Translation', '')}\n"

								            f"SOURCE: {row.get('Source Info', '')}"

								        )

								        knowledge_base.add_content(text_content=content)

								        count += 1


								    print(f"✅ Successfully ingested {count} Hadiths into Qdrant.")


								def ingest_articles(file_path: str):

								    print(f"📄 Processing Articles: {file_path}")

								    df = pd.read_excel(file_path)

								    count = 0


								    for _, row in df.iterrows():

								        content = (

								            f"ARTICLE TYPE: ARTICLE\n"

								            f"TITLE: {row.get('Title', '')}\n"

								            f"AUTHOR: {row.get('Author', '')}\n"

								            f"CONTENT: {row.get('Content', '')}\n"

								            f"URL: {row.get('URL', '')}"

								        )

								        knowledge_base.add_content(text_content=content)

								        count += 1


								    print(f"✅ Successfully ingested {count} Articles into Qdrant.")


								if __name__ == "__main__":

								    print("--- 🚀 Starting Data Ingestion to Qdrant ---")

								    SCRIPTS_DIR = os.path.dirname(os.path.abspath(__file__))


								    # 2. Go up one level to the Project Root

								    PROJECT_ROOT = os.path.dirname(SCRIPTS_DIR)


								    # 3. Build the path to the data folder

								    DATA_DIR = os.path.join(PROJECT_ROOT, "data", "raw")


								    # 4. Define your file paths

								    HADITH_FILE = os.path.join(DATA_DIR, "hadiths_data.xlsx")

								    ARTICLE_FILE = os.path.join(DATA_DIR, "dovodi_articles.xlsx")


								    try:

								        # Ingest Hadiths

								        if os.path.exists(HADITH_FILE):

								            ingest_hadiths(HADITH_FILE)

								        else:

								            print(f"⚠️ {HADITH_FILE} not found!")


								        # Ingest Articles

								        if os.path.exists(ARTICLE_FILE):

								            ingest_articles(ARTICLE_FILE)

								        else:

								            print(f"⚠️ {ARTICLE_FILE} not found!")


								        print("--- ✨ Ingestion Complete ---")

								    except Exception as e:

								        print(f"❌ Error during ingestion: {e}")