import csv
import os
import xml.etree.ElementTree as ET
from datetime import datetime, timezone
from typing import Optional

import requests
from fastapi import HTTPException, UploadFile, status
from fastapi.responses import FileResponse
from sqlalchemy.exc import SQLAlchemyError
from sqlalchemy.orm import Session
from src.core.exceptions import APIException
from src.apps.files.models.cdn import CDN
from src.apps.files.models.file import File
from src.apps.files.services.file import create_file as create_file_service
from src.apps.web_crawler.models.web_crawler import WebCrawler, WebCrawlerFiles
from src.apps.web_crawler.schemas.web_crawler import (
    WebCrawlerCreate,
    WebCrawlerFilterSchema,
    WebCrawlerResponse,
    WebCrawlerUpdate,
)
from src.apps.web_crawler.services import web_crawler as web_crawler_service
from src.core.exceptions import APIException
from src.utils import constants
from src.utils.helpers.functions import get_file_header
from src.utils.pagination import QueryPaginator
from src.utils.enums import WebCrawlerFileType

MATCH_CONFIG_CSV_PATH = "match_config_csv"


def get_web_crawler(db: Session, crawler_id: int):
    crawler = db.query(WebCrawler).filter(WebCrawler.id == crawler_id, WebCrawler.is_deleted == False).first()

    if not crawler:
        raise APIException(
            module=__name__,
            error={},
            status_code=status.HTTP_400_BAD_REQUEST,
            message="Web Crawler not found",
        )
    # Fetch associated files
    files = db.query(WebCrawlerFiles).filter(WebCrawlerFiles.web_crawler_id == crawler.id).all()
    files = [t.__dict__ for t in files]
    for file in files:
        file["file"] = db.query(File).filter(File.id == file["file_id"]).first()
    if not files:
        files = []
    response_data = {
        **crawler.__dict__,
        "files": files,
    }
    return WebCrawlerResponse.model_validate(response_data)


def get_web_crawlers(db: Session, page: int = 1, per_page: int = 10, filters: Optional[WebCrawlerFilterSchema] = None):
    offset = (page - 1) * per_page
    query = db.query(WebCrawler).filter(WebCrawler.is_deleted == False)

    if filters:
        for key, value in filters.dict(exclude_none=True).items():
            if hasattr(WebCrawler, key):
                query = query.filter(getattr(WebCrawler, key) == value)
            else:
                raise APIException(
                    module=__name__,
                    error={},
                    status_code=status.HTTP_400_BAD_REQUEST,
                    message=f"Invalid filter key: {key}",
                )

    paginator = QueryPaginator(
        query=query, schema=WebCrawlerResponse, url="/web-crawlers", offset=offset, limit=per_page, use_orm=True
    )

    return paginator.paginate()


def create_web_crawler(db: Session, crawler: WebCrawlerCreate) -> WebCrawler:
    try:
        db_crawler = WebCrawler(**crawler.dict())
        db.add(db_crawler)
        db.commit()
        db.refresh(db_crawler)
        return db_crawler

    except SQLAlchemyError as e:
        db.rollback()
        raise APIException(
            module=__name__,
            error={},
            status_code=status.HTTP_400_BAD_REQUEST,
            message=f"Database error during creation: {str(e)}",
        )


def update_web_crawler(db: Session, crawler_id: int, crawler: WebCrawlerUpdate) -> WebCrawler:
    db_crawler = db.query(WebCrawler).filter(WebCrawler.id == crawler_id, WebCrawler.is_deleted == False).first()

    if not db_crawler:
        raise APIException(
            module=__name__,
            error={},
            status_code=status.HTTP_400_BAD_REQUEST,
            message="Web Crawler not found",
        )

    update_data = crawler.dict(exclude_unset=True)

    try:
        for key, value in update_data.items():
            if hasattr(db_crawler, key):
                setattr(db_crawler, key, value)

        db.commit()
        db.refresh(db_crawler)

        web_craeler = get_web_crawler(db, db_crawler.id)
        return web_craeler

    except SQLAlchemyError as e:
        db.rollback()
        raise APIException(
            module=__name__,
            error={},
            status_code=status.HTTP_400_BAD_REQUEST,
            message=f"Database error during update: {str(e)}",
        )


def soft_delete_web_crawler(db: Session, crawler_id: int) -> bool:
    db_crawler = db.query(WebCrawler).filter(WebCrawler.id == crawler_id, WebCrawler.is_deleted == False).first()

    if not db_crawler:
        raise APIException(
            module=__name__,
            error={},
            status_code=status.HTTP_400_BAD_REQUEST,
            message="Web Crawler not found",
        )

    db_crawler.is_deleted = True
    db_crawler.deleted_at = datetime.utcnow()

    try:
        db.commit()
        return True
    except SQLAlchemyError as e:
        db.rollback()
        raise APIException(
            module=__name__, error={}, status_code=status.HTTP_400_BAD_REQUEST, message=f"Error during delete: {str(e)}"
        )


def generate_csv_from_xml(db: Session, match_crawler_id: int, created_by_id: int):
    # Get crawler
    crawler = db.query(WebCrawler).filter(WebCrawler.id == match_crawler_id, WebCrawler.is_deleted == False).first()
    if not crawler:
        raise HTTPException(status_code=404, detail="Web Crawler configuration not found")
    config = crawler
    url = crawler.url
    if not url:
        raise HTTPException(status_code=400, detail="URL not set for this configuration")

    # Fetch XML
    response = requests.get(url)
    if response.status_code != 200:
        raise HTTPException(status_code=400, detail="Failed to fetch XML data from the provided URL.")

    xml_data = response.content

    try:
        root = ET.fromstring(xml_data)

        data = []
        all_columns = set()

        for item in root.findall(".//*"):
            item_data = dict(item.attrib)

            for child in item:
                if child.tag and child.text and child.text.strip():
                    item_data[child.tag] = child.text.strip()
                    all_columns.add(child.tag)

            all_columns.update(item.attrib.keys())

            if item_data:
                data.append(item_data)

        if not data:
            raise HTTPException(status_code=400, detail="No data rows found in XML.")

        columns = sorted(list(all_columns))

    except ET.ParseError as e:
        raise HTTPException(status_code=400, detail=f"Error parsing XML: {str(e)}")

    os.makedirs(MATCH_CONFIG_CSV_PATH, exist_ok=True)

    csv_file_name = f"{crawler.name}-{datetime.today()}.csv"
    csv_file_path = os.path.join(MATCH_CONFIG_CSV_PATH, csv_file_name)

    with open(csv_file_path, mode="w", newline="", encoding="utf-8") as file:
        writer = csv.DictWriter(file, fieldnames=columns)
        writer.writeheader()
        writer.writerows(data)

    cdn = db.query(CDN).filter(CDN.is_active == True).first()
    if not cdn:
        raise HTTPException(status_code=404, detail="CDN with label 'Crawler Files CDN' not found")

    # Create File record
    new_file = File(
        original_name=csv_file_name,
        name=csv_file_name,
        path=csv_file_path,
        mime="text/csv",
        cdn_id=cdn.id,
        created_by_id=created_by_id,
        created_at=datetime.utcnow(),
    )
    db.add(new_file)
    db.commit()
    db.refresh(new_file)

    # Update config
    config_files = WebCrawlerFiles(web_crawler_id=config.id, file_id=new_file.id, file_type=WebCrawlerFileType.MASTER_FILE)
    db.add(config_files)
    db.commit()

    config.match_date = datetime.utcnow().date()
    db.commit()
    db.refresh(new_file)

    return new_file


def get_column_indexes_service(db: Session, webcrawler_id: int):
    # Fetch WebCrawler
    crawler = db.query(WebCrawler).filter(WebCrawler.id == webcrawler_id, WebCrawler.is_deleted == False).first()
    if not crawler:
        raise APIException(
            module=__name__,
            error={},
            status_code=status.HTTP_400_BAD_REQUEST,
            message="WebCrawler not found",
        )

    if not crawler.files:
        raise APIException(
            module=__name__,
            error={},
            status_code=status.HTTP_400_BAD_REQUEST,
            message="No CSV file associated with this WebCrawler",
        )

    # Fetch File
    try: 
        latest_file = (
            db.query(WebCrawlerFiles)
            .filter(
                WebCrawlerFiles.web_crawler_id == crawler.id,
                WebCrawlerFiles.file_type == WebCrawlerFileType.MASTER_FILE
            )
            .order_by(WebCrawlerFiles.created_at.desc())
            .first()
        )
    except Exception as e:
        raise APIException(
            module="web_crawler",
            error={},
            status_code=status.HTTP_400_BAD_REQUEST,
            message=f"Error fetching file: {str(e)}",
        )

    file_record = db.query(File).filter(File.id == latest_file.id).first()
    if not file_record:
        raise APIException(
            module=__name__,
            error={},
            status_code=status.HTTP_400_BAD_REQUEST,
            message="File not found",
        )

    csv_path = file_record.path

    # try:
    headers = get_file_header(csv_path, custom_delimiter=crawler.delimiter)
    # except Exception as e:
    #     raise HTTPException(status_code=500, detail=f"Failed to read CSV: {str(e)}")

    columns_with_indexes = [{"column_name": name, "index": idx} for idx, name in enumerate(headers)]

    return columns_with_indexes


async def create_file(db: Session, upload_file: UploadFile, crawler_id: str, is_history: bool = False) -> File:
    """
    Create a File record and save the actual uploaded file.
    """
    # Use original filename as original_name
    original_name = upload_file.filename

    crawler = db.query(WebCrawler).filter(WebCrawler.id == crawler_id, WebCrawler.is_deleted == False).first()
    if not crawler:
        raise HTTPException(status_code=404, detail="Web Crawler configuration not found")

    crawler_code = crawler.code

    # Use today's date for naming
    date = datetime.now(timezone.utc)
    today = date.date()
    today_str = today.strftime("%Y-%m-%d")

    if not is_history:
        if crawler_code not in original_name:
            raise HTTPException(status_code=400, detail=f"Uploaded file name must contain '{crawler_code}-{today_str}'")

    # Remove crawler_code and first '-' from original_name to get match_date
    name_without_code = original_name.replace(f"{crawler_code}-", "", 1)
    match_date = name_without_code.split(".")[0]
    if not match_date and not is_history:
        raise HTTPException(status_code=400, detail="Match date not found in the file name")

    new_file_name = f"{crawler.code}_{today_str}"

    created_file = await create_file_service(
        db=db,
        upload_file=upload_file,
        is_history=is_history,
        created_by_id=1,
        is_crawler=True,
    )

    if not created_file:
        raise HTTPException(status_code=500, detail="Failed to create file record")
    if not is_history:
        crawler.match_date = match_date
        db.commit()

    # Create new WebCrawlerFiles record
    file_type = WebCrawlerFileType.HISTORY if is_history else WebCrawlerFileType.MASTER_FILE
    web_crawler_file = WebCrawlerFiles(web_crawler_id=crawler_id, file_id=created_file.id, file_type=file_type)
    db.add(web_crawler_file)
    try:
        db.commit()
    except SQLAlchemyError as e:
        db.rollback()
        raise HTTPException(status_code=500, detail=f"Database error during file association: {str(e)}")

    return created_file


async def preview_file(db: Session, crawler_id: str, rows: int = 5):
    """
    Preview the first few rows of a file.
    """
    crawler = db.query(WebCrawler).filter(WebCrawler.id == crawler_id, WebCrawler.is_deleted == False).first()
    if not crawler:
        raise APIException(
            module="web_crawler",
            error={"crawler_id": "Web Crawler configuration not found"},
            status_code=status.HTTP_404_NOT_FOUND,
            message="Web Crawler configuration not found",
        )

    last_file = (
        db.query(WebCrawlerFiles)
        .filter(
            WebCrawlerFiles.web_crawler_id == crawler_id,
            WebCrawlerFiles.file_type == WebCrawlerFileType.MASTER_FILE
        )
        .order_by(WebCrawlerFiles.created_at.desc())
        .first()
    )
    
    if not last_file:
        raise APIException(
            module="web_crawler",
            error={"file": "No file found for this Web Crawler"},
            status_code=status.HTTP_404_NOT_FOUND,
            message="No file found for this Web Crawler"
        )
    
    file = db.query(File).filter(File.id == last_file.file_id).first()
    if not file:
        raise APIException(
            module="web_crawler",
            error={"file": "File not found"},
            status_code=status.HTTP_404_NOT_FOUND,
            message="File not found"
        )
    
    file_path = file.path
    
    if not os.path.exists(file_path):
        raise APIException(
            module="web_crawler",
            error={"file": "File does not exist on the server"},
            status_code=status.HTTP_404_NOT_FOUND,
            message="File does not exist on the server"
        )
    
    try:
        records = get_file_header( file_path, custom_delimiter=crawler.delimiter, rows=rows)
    except Exception as e:
        raise APIException(
            module="web_crawler",
            error={"file": f"Failed to read CSV: {str(e)}"},
            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
            message=f"Failed to read CSV: {str(e)}"
        )
    
    return {
        "file_name": file.original_name,
        "records": records,
        "rows": rows,
    }